Coverage for brodata / gmw.py: 74%
331 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-13 12:57 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-13 12:57 +0000
1import json
2import logging
3import os
4from functools import partial
5from zipfile import ZipFile
7import numpy as np
8import pandas as pd
10from . import bro, gld, gar, frd, gmn, util
12logger = logging.getLogger(__name__)
15def get_well_code(bro_id):
16 """
17 Retrieve the well code based on a given BRO-ID and return it as plain text.
19 This function sends a GET request to fetch the well code associated with the
20 specified BRO-ID. If the request fails, it logs an error message and returns `None`.
22 Parameters
23 ----------
24 bro_id : str
25 The BRO-ID for which to retrieve the associated well code.
27 Returns
28 -------
29 well_code : str or None
30 The well code as plain text if the request is successful. Returns `None` if
31 the request fails.
32 """
34 url = f"{GroundwaterMonitoringWell._rest_url}/well-code/{bro_id}"
35 req = bro.util.get_with_rate_limit(url)
36 if req.status_code > 200:
37 logger.error(req.reason)
38 return
39 well_code = req.text
40 return well_code
43class GroundwaterMonitoringWell(bro.FileOrUrl):
44 """
45 Class to represent a Groundwater Monitoring Well (GMW) from the BRO.
47 This class parses XML data related to a groundwater monitoring well (GMW).
48 It extracts details such as location, monitoring tube data, and well history
49 and stores these in attributes.
51 Notes
52 -----
53 This class extends `bro.XmlFileOrUrl` and is designed to work with GMW XML data,
54 either from a file or URL.
55 """
57 _rest_url = "https://publiek.broservices.nl/gm/gmw/v1"
58 _xmlns = "http://www.broservices.nl/xsd/dsgmw/1.1"
59 _char = "GMW_C"
61 def _read_contents(self, tree):
62 ns = {
63 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",
64 "xmlns": self._xmlns,
65 }
67 object_names = ["GMW_PO", "GMW_PPO", "BRO_DO"]
68 gmw = self._get_main_object(tree, object_names, ns)
70 for key in gmw.attrib:
71 setattr(self, key.split("}", 1)[1], gmw.attrib[key])
72 for child in gmw:
73 key = self._get_tag(child)
74 if len(child) == 0:
75 setattr(self, key, child.text)
76 elif key == "standardizedLocation":
77 self._read_standardized_location(child)
78 elif key == "deliveredLocation":
79 self._read_delivered_location(child)
80 elif key == "wellHistory":
81 for grandchild in child:
82 key = self._get_tag(grandchild)
83 if key in ["wellConstructionDate", "wellRemovalDate"]:
84 setattr(self, key, self._read_date(grandchild))
85 elif key == "intermediateEvent":
86 if not hasattr(self, key):
87 self.intermediateEvent = []
88 event = self._read_intermediate_event(grandchild)
89 self.intermediateEvent.append(event)
90 else:
91 self._warn_unknown_tag(key)
93 elif key in ["deliveredVerticalPosition", "registrationHistory"]:
94 to_float = ["offset", "groundLevelPosition"]
95 self._read_children_of_children(child, to_float=to_float)
96 elif key in ["monitoringTube"]:
97 if not hasattr(self, key):
98 self.monitoringTube = []
99 tube = {}
100 to_float = [
101 "tubeTopDiameter",
102 "tubeTopPosition",
103 "screenLength",
104 "screenTopPosition",
105 "screenBottomPosition",
106 "plainTubePartLength",
107 ]
108 self._read_children_of_children(child, tube, to_float=to_float)
109 self.monitoringTube.append(tube)
110 else:
111 self._warn_unknown_tag(key)
112 if hasattr(self, "monitoringTube"):
113 self.monitoringTube = pd.DataFrame(self.monitoringTube)
114 tubeNumber = self.monitoringTube["tubeNumber"].astype(int)
115 self.monitoringTube["tubeNumber"] = tubeNumber
116 self.monitoringTube = self.monitoringTube.set_index("tubeNumber")
117 if hasattr(self, "intermediateEvent"):
118 self.intermediateEvent = pd.DataFrame(self.intermediateEvent)
120 def _read_intermediate_event(self, node):
121 d = {}
122 for child in node:
123 key = self._get_tag(child)
124 if key == "eventName":
125 d[key] = child.text
126 elif key == "eventDate":
127 d[key] = self._read_date(child)
128 else:
129 self._warn_unknown_tag(key)
130 return d
133def get_observations(
134 bro_ids,
135 kind="gld",
136 drop_references=True,
137 silent=False,
138 tmin=None,
139 tmax=None,
140 as_csv=False,
141 tube_number=None,
142 status=None,
143 observation_type=None,
144 qualifier=None,
145 to_path=None,
146 to_zip=None,
147 redownload=False,
148 zipfile=None,
149 continue_on_error=False,
150 sort=True,
151 drop_duplicates=True,
152 progress_callback=None,
153 _files=None,
154):
155 """
156 Retrieve groundwater observations for the specified monitoring wells (bro_ids).
158 This function fetches groundwater data for monitoring wells based on the provided
159 parameters. It supports different types of observations, allows filtering by tube
160 number, and can request the data in CSV format for groundwater level observations.
162 Parameters
163 ----------
164 bro_ids : str or list or pd.DataFrame
165 The BRO IDs of the monitoring wells for which to retrieve the data. If a
166 DataFrame is provided, its index is used as the list of BRO IDs.
167 kind : str, optional
168 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.
169 Defaults to 'gld' (groundwater level dossier).
170 drop_references : bool or list of str, optional
171 Specifies whether to drop reference fields in the returned data. Defaults to True,
172 in which case 'gmnReferences', 'gldReferences', and 'garReferences' are removed.
173 silent : bool, optional
174 If True, suppresses progress logging. Defaults to False.
175 tmin : str or datetime, optional
176 The minimum time filter for the observations. Defaults to None.
177 tmax : str or datetime, optional
178 The maximum time filter for the observations. Defaults to None.
179 as_csv : bool, optional
180 If True, requests the observations as CSV files instead of XML-files. Only valid
181 if `kind` is 'gld'. Defaults to False.
182 tube_number : int, optional
183 Filters observations to a specific tube number. Defaults to None.
184 status : str, optional
185 A status string for additional filtering. Possible values are
186 "volledigBeoordeeld", "voorlopig" and "onbekend" Only valid if `kind` is 'gld'.
187 Defaults to None.
188 observation_type : str, optional
189 An observation type string for additional filtering. Possible values are
190 "reguliereMeting" and "controleMeting". Only valid if `kind` is 'gld'. Defaults
191 to None.
192 qualifier : str or list of str, optional
193 A qualifier string for additional filtering. Only valid if `kind` is 'gld'.
194 Defaults to None.
195 to_path : str, optional
196 If not None, save the downloaded files in the directory named to_path. The
197 default is None.
198 to_zip : str, optional
199 If not None, save the downloaded files in a zip-file named to_zip. The default
200 is None.
201 redownload : bool, optional
202 When downloaded files exist in to_path or to_zip, read from these files when
203 redownload is False. If redownload is True, download the data again from the
204 BRO-servers. The default is False.
205 zipfile : zipfile.ZipFile, optional
206 A zipfile-object. When not None, zipfile is used to read previously downloaded
207 data from. The default is None.
208 continue_on_error : bool, optional
209 If True, continue after an error occurs during downloading or processing of
210 individual observation data. Defaults to False.
211 sort : bool, optional
212 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.
213 drop_duplicates : bool, optional
214 If True, drop duplicate observations based on their timestamp. Only used if
215 `kind` is 'gld'. Defaults to True.
216 progress_callback : function, optional
217 A callback function that takes two arguments (current, total) to report
218 progress. If None, no progress reporting is done. Defaults to None.
221 Returns
222 -------
223 pd.DataFrame
224 A DataFrame containing the observations for the specified monitoring wells,
225 where each row corresponds to an individual observation.
227 Raises
228 ------
229 Exception
230 If `as_csv=True` and `kind` is not 'gld', or if `qualifier` is provided for
231 a kind other than 'gld'.
232 """
233 tubes = []
235 if isinstance(bro_ids, str):
236 bro_ids = [bro_ids]
237 silent = True
239 if isinstance(bro_ids, pd.DataFrame):
240 bro_ids = bro_ids.index
242 if isinstance(drop_references, bool):
243 if drop_references:
244 drop_references = [
245 "gmnReferences",
246 "gldReferences",
247 "garReferences",
248 # "frdReferences",
249 ]
250 else:
251 drop_references = []
253 if to_zip is not None:
254 if not redownload and os.path.isfile(to_zip):
255 raise (NotImplementedError("Redownload=False is not suppported yet"))
256 if to_path is None:
257 to_path = os.path.splitext(to_zip)[0]
258 remove_path_again = not os.path.isdir(to_path)
259 if _files is None:
260 _files = []
262 desc = f"Downloading {kind}-observations"
263 if as_csv and kind != "gld":
264 raise (Exception("as_csv=True is only supported for kind=='gld'"))
265 if qualifier is not None and kind != "gld":
266 raise (Exception("A qualifier is only supported for kind=='gld'"))
267 if to_path is not None and not os.path.isdir(to_path):
268 os.makedirs(to_path)
270 if kind == "gld":
271 meas_cl = gld.GroundwaterLevelDossier
272 elif kind == "gar":
273 meas_cl = gar.GroundwaterAnalysisReport
274 elif kind == "frd":
275 meas_cl = frd.FormationResistanceDossier
276 elif kind == "gmn":
277 meas_cl = gmn.GroundwaterMonitoringNetwork
278 else:
279 raise (ValueError(f"kind='{kind}' not supported"))
281 gld_kwargs = _get_gld_kwargs(
282 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates
283 )
285 for igmw, bro_id in enumerate(
286 util.tqdm(np.unique(bro_ids), disable=silent, desc=desc)
287 ):
288 to_rel_file = util._get_to_file(
289 f"gmw_relations_{bro_id}.json", zipfile, to_path, _files
290 )
291 if zipfile is None and (
292 redownload or to_rel_file is None or not os.path.isfile(to_rel_file)
293 ):
294 url = f"https://publiek.broservices.nl/gm/v1/gmw-relations/{bro_id}"
295 req = bro.util.get_with_rate_limit(url)
296 if req.status_code > 200:
297 logger.error(req.json()["errors"][0]["message"])
298 return
299 if to_rel_file is not None:
300 with open(to_rel_file, "w") as f:
301 f.write(req.text)
302 data = req.json()
303 else:
304 if zipfile is not None:
305 with zipfile.open(to_rel_file) as f:
306 data = json.load(f)
307 else:
308 with open(to_rel_file) as f:
309 data = json.load(f)
310 for tube_ref in data["monitoringTubeReferences"]:
311 tube_ref["groundwaterMonitoringWell"] = data["gmwBroId"]
312 if tube_number is not None:
313 if tube_ref["tubeNumber"] != tube_number:
314 continue
315 ref_key = f"{kind}References"
316 for ref in tube_ref[ref_key]:
317 obsdata = _download_observations_for_bro_id(
318 ref["broId"],
319 meas_cl,
320 as_csv,
321 zipfile,
322 to_path,
323 _files,
324 gld_kwargs,
325 redownload=redownload,
326 continue_on_error=continue_on_error,
327 )
328 if as_csv:
329 tube_ref["observation"] = obsdata
330 for key in drop_references:
331 if key in tube_ref:
332 tube_ref.pop(key)
333 else:
334 logger.warning(
335 "{} not defined for {}, filter {}".format(
336 key,
337 tube_ref["groundwaterMonitoringWell"],
338 tube_ref["tubeNumber"],
339 )
340 )
342 tube_ref["broId"] = ref["broId"]
343 tubes.append(tube_ref)
344 else:
345 tubes.append(obsdata.to_dict())
347 if progress_callback is not None:
348 progress_callback(igmw + 1, len(bro_ids))
349 if to_zip is not None:
350 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)
351 return pd.DataFrame(tubes)
354def _download_observations_for_bro_id(
355 bro_id,
356 meas_cl,
357 as_csv,
358 zipfile,
359 to_path,
360 _files,
361 gld_kwargs,
362 redownload=False,
363 continue_on_error=False,
364):
365 if as_csv:
366 fname = f"{bro_id}.csv"
367 observatietype = None
368 if "status" in gld_kwargs and gld_kwargs["status"] == "voorlopig":
369 observatietype = "regulier_voorlopig"
370 elif "status" in gld_kwargs and gld_kwargs["status"] == "volledigBeoordeeld":
371 observatietype = "regulier_beoordeeld"
372 elif "status" in gld_kwargs and gld_kwargs["status"] == "onbekend":
373 observatietype = "onbekend"
374 elif (
375 "observation_type" in gld_kwargs
376 and gld_kwargs["observation_type"] == "controleMeting"
377 ):
378 observatietype = "controle"
379 else:
380 fname = f"{bro_id}.xml"
381 to_file = util._get_to_file(fname, zipfile, to_path, _files)
382 if zipfile is None and (
383 redownload or to_file is None or not os.path.isfile(to_file)
384 ): # download the data
385 if as_csv:
386 try:
387 data = gld.get_objects_as_csv(
388 bro_id,
389 observatietype=observatietype,
390 to_file=to_file,
391 **gld_kwargs,
392 )
393 except Exception as e:
394 if not continue_on_error:
395 raise e
396 logger.error(
397 "Error processing %s csv for broid %s: %s",
398 meas_cl.__name__,
399 bro_id,
400 e,
401 )
402 else:
403 try:
404 data = meas_cl.from_bro_id(bro_id, to_file=to_file, **gld_kwargs)
405 except Exception as e:
406 if not continue_on_error:
407 raise e
408 logger.error(
409 "Error processing %s xml for broid %s: %s",
410 meas_cl.__name__,
411 bro_id,
412 e,
413 )
414 else:
415 # read the data from a file
416 if as_csv:
417 if zipfile is not None:
418 to_file = zipfile.open(to_file)
419 data = gld.read_gld_csv(
420 to_file,
421 bro_id,
422 observatietype=observatietype,
423 **gld_kwargs,
424 )
425 else:
426 data = meas_cl(to_file, zipfile=zipfile, **gld_kwargs)
427 return data
430def _get_gld_kwargs(
431 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates
432):
433 gld_kwargs = {}
434 if kind == "gld":
435 if tmin is not None:
436 gld_kwargs["tmin"] = tmin
437 if tmax is not None:
438 gld_kwargs["tmax"] = tmax
439 if qualifier is not None:
440 gld_kwargs["qualifier"] = qualifier
441 if status is not None:
442 gld_kwargs["status"] = status
443 if observation_type is not None:
444 gld_kwargs["observation_type"] = observation_type
445 gld_kwargs["sort"] = sort
446 gld_kwargs["drop_duplicates"] = drop_duplicates
447 return gld_kwargs
450def get_tube_observations(
451 gwm_id, tube_number, kind="gld", sort=True, drop_duplicates=True, **kwargs
452):
453 """
454 Get the observations of a single groundwater monitoring tube.
456 Parameters
457 ----------
458 gwm_id : str
459 The bro_id of the groundwater monitoring well.
460 tube_number : int
461 The tube number.
462 kind : str, optional
463 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.
464 Defaults to 'gld' (groundwater level dossier).
465 sort : bool, optional
466 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.
467 drop_duplicates : bool, optional
468 If True, drop duplicate observations based on their timestamp. Only used if
469 `kind` is 'gld'. Defaults to True.
470 **kwargs : dict
471 Kwargs are passed onto get_observations.
473 Returns
474 -------
475 pd.DataFrame
476 A DataFrame containing the observations.
478 """
479 # sorting and dropping duplicates is done after combining the observations
480 # to avoid doing this multiple times
481 df = get_observations(
482 gwm_id,
483 tube_number=tube_number,
484 kind=kind,
485 sort=False,
486 drop_duplicates=False,
487 **kwargs,
488 )
489 if df.empty:
490 return _get_empty_observation_df(kind)
491 else:
492 data_column = _get_data_column(kind)
493 return _combine_observations(
494 df[data_column],
495 kind=kind,
496 bro_id=f"{gwm_id}_{tube_number}",
497 sort=sort,
498 drop_duplicates=drop_duplicates,
499 )
502def get_tube_gdf(gmws, index=None):
503 """
504 Create a GeoDataFrame of tube properties combined with well metadata.
506 This function processes a DataFrame of well properties, extracts the relevant
507 tube information, and combines them into a GeoDataFrame. The resulting GeoDataFrame
508 contains metadata for each monitoring well and its associated tubes, with optional
509 spatial information (coordinates) and relevant physical properties.
511 Parameters
512 ----------
513 gmws : list or dict of GroundwaterMonitoringWell, or pd.DataFrame Well and tube data
514 in one of the following formats: a list of `GroundwaterMonitoringWell` objects,
515 a dictionary of these objects, or a DataFrame with the bro-ids of the
516 GroundwaterMonitoringWells as the index and the column monitoringTube containing
517 tube properties.
518 index : str or list of str, optional
519 The column or columns to use for indexing the resulting GeoDataFrame. Defaults
520 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.
522 Returns
523 -------
524 gdf : gpd.GeoDataFrame
525 A GeoDataFrame containing the combined well and tube properties, with the
526 specified index and optional geometry (spatial data) if 'x' and 'y' columns are
527 present.
529 Notes
530 -----
531 If 'x' and 'y' columns are present, the function creates a GeoDataFrame with point
532 geometries based on these coordinates, assuming the EPSG:28992 (Dutch National
533 Coordinate System) CRS.
534 """
535 if isinstance(gmws, list):
536 gmws = pd.DataFrame([x.to_dict() for x in gmws])
537 if "broId" in gmws.columns:
538 gmws = gmws.set_index("broId")
539 elif isinstance(gmws, dict):
540 gmws = pd.DataFrame([gmws[x].to_dict() for x in gmws])
541 if "broId" in gmws.columns:
542 gmws = gmws.set_index("broId")
543 tubes = []
544 for bro_id in gmws.index:
545 tube_df = gmws.loc[bro_id, "monitoringTube"]
546 if not isinstance(tube_df, pd.DataFrame):
547 continue
548 for tube_number in tube_df.index:
549 # combine properties of well and tube
550 tube = pd.concat(
551 (
552 gmws.loc[bro_id].drop("monitoringTube"),
553 tube_df.loc[tube_number],
554 )
555 )
556 tube["groundwaterMonitoringWell"] = bro_id
557 tube["tubeNumber"] = tube_number
559 tubes.append(tube)
561 if index is None:
562 index = ["groundwaterMonitoringWell", "tubeNumber"]
563 gdf = bro.objects_to_gdf(tubes, index=index)
565 gdf = gdf.sort_index()
566 return gdf
569def get_data_in_extent(
570 extent,
571 kind="gld",
572 tmin=None,
573 tmax=None,
574 combine=None,
575 index=None,
576 as_csv=False,
577 qualifier=None,
578 to_zip=None,
579 to_path=None,
580 redownload=False,
581 silent=False,
582 continue_on_error=False,
583 sort=True,
584 drop_duplicates=True,
585 progress_callback=None,
586):
587 """
588 Retrieve metadata and observations within a specified spatial extent.
590 This function fetches monitoring well characteristics, groundwater observations,
591 and tube properties within the given spatial extent. It can combine the data
592 for specific observation types and return either individual dataframes or a
593 combined dataframe.
595 Parameters
596 ----------
597 extent : str or sequence
598 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data.
599 kind : str, optional
600 The type of observations to retrieve. Valid values are {'gld', 'gar'} for
601 groundwater level dossier or groundwater analysis report. When kind is None, no
602 observations are downloaded. Defaults to 'gld'.
603 tmin : str or datetime, optional
604 The minimum time for filtering observations. Defaults to None.
605 tmax : str or datetime, optional
606 The maximum time for filtering observations. Defaults to None.
607 combine : bool, optional
608 If True, combines the metadata, tube properties, and observations into a single
609 dataframe. Defaults to False, which will change to True in a future version.
610 index : str, optional
611 The column to use for indexing in the resulting dataframe. Defaults to None.
612 as_csv : bool, optional
613 If True, the measurement data is requested as CSV files instead of XML files
614 (only supported for 'gld'). Defaults to False.
615 qualifier : str or list of str, optional
616 A string or list of strings used to filter the observations. Only valid if
617 `kind` is 'gld'. Defaults to None.
618 to_path : str, optional
619 If not None, save the downloaded files in the directory named to_path. The
620 default is None.
621 to_zip : str, optional
622 If not None, save the downloaded files in a zip-file named to_zip. The default
623 is None.
624 redownload : bool, optional
625 When downloaded files exist in to_path or to_zip, read from these files when
626 redownload is False. If redownload is True, download the data again from the
627 BRO-server. The default is False.
628 silent : bool, optional
629 If True, suppresses progress logging. Defaults to False.
630 continue_on_error : bool, optional
631 If True, continue after an error occurs during downloading or processing of
632 individual observation data. Defaults to False.
633 sort : bool, optional
634 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.
635 drop_duplicates : bool, optional
636 If True, drop duplicate observations based on their timestamp. Only used if
637 `kind` is 'gld'. Defaults to True.
638 progress_callback : function, optional
639 A callback function that takes two arguments (current, total) to report
640 progress. If None, no progress reporting is done. Defaults to None.
642 Returns
643 -------
644 gdf : pd.DataFrame
645 A dataframe containing tube properties and metadata within the specified extent.
647 obs_df : pd.DataFrame, optional
648 A dataframe containing the observations for the specified wells. Returned only if
649 `combine` is False.
651 Raises
652 ------
653 Exception
654 If `as_csv=True` and `kind` is not 'gld', or if other parameters are invalid.
655 """
656 if combine is None:
657 logger.warning(
658 "The default of `combine=False` will change to True in a future version of "
659 "brodata. Pass combine=False to retain current behavior or combine=True to "
660 "adopt the future default and silence this warning."
661 )
662 combine = False
663 if isinstance(extent, str):
664 if to_zip is not None:
665 raise (Exception("When extent is a string, do not supply to_zip"))
666 to_zip = extent
667 extent = None
668 redownload = False
670 zipfile = None
671 _files = None
672 if to_zip is not None:
673 if not redownload and os.path.isfile(to_zip):
674 logger.info(f"Reading data from {to_zip}")
675 zipfile = ZipFile(to_zip)
676 else:
677 if to_path is None:
678 to_path = os.path.splitext(to_zip)[0]
679 remove_path_again = not os.path.isdir(to_path)
680 _files = []
682 if to_path is not None and not os.path.isdir(to_path):
683 os.makedirs(to_path)
685 # get gwm characteristics
686 logger.info(f"Getting gmw-characteristics in extent: {extent}")
688 to_file = util._get_to_file("gmw_characteristics.xml", zipfile, to_path, _files)
689 gmw = get_characteristics(
690 extent=extent, to_file=to_file, redownload=redownload, zipfile=zipfile
691 )
693 if kind is None:
694 obs_df = pd.DataFrame()
695 combine = False
696 else:
697 # get observations
698 logger.info(f"Downloading {kind}-observations")
699 obs_df = get_observations(
700 gmw,
701 kind=kind,
702 tmin=tmin,
703 tmax=tmax,
704 as_csv=as_csv,
705 qualifier=qualifier,
706 to_path=to_path,
707 redownload=redownload,
708 zipfile=zipfile,
709 _files=_files,
710 silent=silent,
711 continue_on_error=continue_on_error,
712 sort=sort,
713 drop_duplicates=drop_duplicates,
714 progress_callback=progress_callback,
715 )
717 # only keep wells with observations
718 if "groundwaterMonitoringWell" in obs_df.columns:
719 gmw = gmw[gmw.index.isin(obs_df["groundwaterMonitoringWell"])]
721 logger.info("Downloading tube-properties")
723 # get the properties of the monitoringTubes
724 gdf = get_tube_gdf_from_characteristics(
725 gmw,
726 index=index,
727 to_path=to_path,
728 redownload=redownload,
729 zipfile=zipfile,
730 _files=_files,
731 silent=silent,
732 )
734 if zipfile is not None:
735 zipfile.close()
736 if zipfile is None and to_zip is not None:
737 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)
739 if not obs_df.empty:
740 obs_df = obs_df.set_index(
741 ["groundwaterMonitoringWell", "tubeNumber"]
742 ).sort_index()
744 if combine and kind in ["gld", "gar"]:
745 if kind == "gld":
746 idcol = "groundwaterLevelDossier"
747 elif kind == "gar":
748 idcol = "groundwaterAnalysisReport"
749 datcol = _get_data_column(kind)
751 logger.info("Combining well-properties, tube-properties and observations")
753 data = {}
754 ids = {}
755 for index in gdf.index:
756 if index not in obs_df.index:
757 continue
759 data[index] = _combine_observations(
760 obs_df.loc[[index], datcol], kind=kind, bro_id=f"{index[0]}_{index[1]}"
761 )
762 ids[index] = list(obs_df.loc[[index], "broId"])
763 gdf[datcol] = data
764 gdf[idcol] = ids
765 return gdf
766 else:
767 if kind is None:
768 return gdf
769 else:
770 return gdf, obs_df
773def _get_data_column(kind):
774 if kind == "gld":
775 return "observation"
776 elif kind == "gar":
777 return "laboratoryAnalysis"
778 else:
779 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))
782def _get_empty_observation_df(kind):
783 if kind == "gld":
784 return gld._get_empty_observation_df()
785 elif kind == "gar":
786 return gar._get_empty_observation_df()
787 else:
788 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))
791def _combine_observations(
792 observations, kind, bro_id=None, sort=True, drop_duplicates=True
793):
794 obslist = []
795 for observation in observations:
796 if not isinstance(observation, pd.DataFrame) or observation.empty:
797 continue
798 obslist.append(observation)
799 if len(obslist) == 0:
800 return _get_empty_observation_df(kind)
801 else:
802 df = pd.concat(obslist).sort_index()
803 if kind == "gld":
804 if sort:
805 df = gld.sort_observations(df)
806 if drop_duplicates:
807 df = gld.drop_duplicate_observations(df, bro_id=bro_id)
808 return df
811def get_tube_gdf_from_characteristics(characteristics_gdf, **kwargs):
812 """
813 Generate a GeoDataFrame of tube properties based on well characteristics.
815 This function downloads the GroundwaterMonitoringWell-objects to retreive data about
816 the groundwater monitoring tubes, and combined this information in a new
817 GeoDataFrame.
819 Parameters
820 ----------
821 characteristics_gdf : gpd.GeoDataFrame
822 GeoDataFrame of well characteristics with bro-ids of the
823 GroundwaterMonitoringWells as the index, retreived with
824 `brodata.gmw.get_characteristics`.
825 index : str or list of str, optional
826 Column(s) to use as the index for the resulting GeoDataFrame. Defaults
827 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.
829 Returns
830 -------
831 gpd.GeoDataFrame
832 GeoDataFrame of combined well and tube properties
833 """
834 bro_ids = characteristics_gdf.index.unique()
835 return get_tube_gdf_from_bro_ids(bro_ids, **kwargs)
838def get_tube_gdf_from_bro_ids(
839 bro_ids,
840 index=None,
841 **kwargs,
842):
843 """
844 Generate a GeoDataFrame of tube properties based on an iterable of gmw bro-ids.
846 This function downloads the GroundwaterMonitoringWell-objects to retreive data about
847 the groundwater monitoring tubes, and combined this information in a new
848 GeoDataFrame.
850 Parameters
851 ----------
852 bro_ids : gpd.GeoDataFrame
853 GeoDataFrame of well characteristics with bro-ids of the
854 GroundwaterMonitoringWells as the index, retreived with
855 `brodata.gmw.get_characteristics`.
856 index : str or list of str, optional
857 Column(s) to use as the index for the resulting GeoDataFrame. Defaults
858 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.
860 Returns
861 -------
862 gpd.GeoDataFrame
863 GeoDataFrame of combined well and tube properties
864 """
865 desc = "Downloading Groundwater Monitoring Wells"
866 gmws = bro._get_data_for_bro_ids(
867 GroundwaterMonitoringWell, bro_ids, desc=desc, **kwargs
868 )
869 gdf = get_tube_gdf(gmws, index=index)
870 return gdf
873cl = GroundwaterMonitoringWell
875get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl)
876get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__
878get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl)
879get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__
881get_characteristics = partial(bro._get_characteristics, cl)
882get_characteristics.__doc__ = bro._get_characteristics.__doc__