Coverage for brodata / gmw.py: 74%
332 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-20 14:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-20 14:37 +0000
1import json
2import logging
3import os
4from functools import partial
5from zipfile import ZipFile
7import numpy as np
8import pandas as pd
9import requests
11from . import bro, gld, gar, frd, gmn, util
13logger = logging.getLogger(__name__)
16def get_well_code(bro_id):
17 """
18 Retrieve the well code based on a given BRO-ID and return it as plain text.
20 This function sends a GET request to fetch the well code associated with the
21 specified BRO-ID. If the request fails, it logs an error message and returns `None`.
23 Parameters
24 ----------
25 bro_id : str
26 The BRO-ID for which to retrieve the associated well code.
28 Returns
29 -------
30 well_code : str or None
31 The well code as plain text if the request is successful. Returns `None` if
32 the request fails.
33 """
35 url = f"{GroundwaterMonitoringWell._rest_url}/well-code/{bro_id}"
36 req = requests.get(url)
37 if req.status_code > 200:
38 logger.error(req.reason)
39 return
40 well_code = req.text
41 return well_code
44class GroundwaterMonitoringWell(bro.FileOrUrl):
45 """
46 Class to represent a Groundwater Monitoring Well (GMW) from the BRO.
48 This class parses XML data related to a groundwater monitoring well (GMW).
49 It extracts details such as location, monitoring tube data, and well history
50 and stores these in attributes.
52 Notes
53 -----
54 This class extends `bro.XmlFileOrUrl` and is designed to work with GMW XML data,
55 either from a file or URL.
56 """
58 _rest_url = "https://publiek.broservices.nl/gm/gmw/v1"
59 _xmlns = "http://www.broservices.nl/xsd/dsgmw/1.1"
60 _char = "GMW_C"
62 def _read_contents(self, tree):
63 ns = {
64 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",
65 "xmlns": self._xmlns,
66 }
68 object_names = ["GMW_PO", "GMW_PPO", "BRO_DO"]
69 gmw = self._get_main_object(tree, object_names, ns)
71 for key in gmw.attrib:
72 setattr(self, key.split("}", 1)[1], gmw.attrib[key])
73 for child in gmw:
74 key = self._get_tag(child)
75 if len(child) == 0:
76 setattr(self, key, child.text)
77 elif key == "standardizedLocation":
78 self._read_standardized_location(child)
79 elif key == "deliveredLocation":
80 self._read_delivered_location(child)
81 elif key == "wellHistory":
82 for grandchild in child:
83 key = self._get_tag(grandchild)
84 if key in ["wellConstructionDate", "wellRemovalDate"]:
85 setattr(self, key, self._read_date(grandchild))
86 elif key == "intermediateEvent":
87 if not hasattr(self, key):
88 self.intermediateEvent = []
89 event = self._read_intermediate_event(grandchild)
90 self.intermediateEvent.append(event)
91 else:
92 self._warn_unknown_tag(key)
94 elif key in ["deliveredVerticalPosition", "registrationHistory"]:
95 to_float = ["offset", "groundLevelPosition"]
96 self._read_children_of_children(child, to_float=to_float)
97 elif key in ["monitoringTube"]:
98 if not hasattr(self, key):
99 self.monitoringTube = []
100 tube = {}
101 to_float = [
102 "tubeTopDiameter",
103 "tubeTopPosition",
104 "screenLength",
105 "screenTopPosition",
106 "screenBottomPosition",
107 "plainTubePartLength",
108 ]
109 self._read_children_of_children(child, tube, to_float=to_float)
110 self.monitoringTube.append(tube)
111 else:
112 self._warn_unknown_tag(key)
113 if hasattr(self, "monitoringTube"):
114 self.monitoringTube = pd.DataFrame(self.monitoringTube)
115 tubeNumber = self.monitoringTube["tubeNumber"].astype(int)
116 self.monitoringTube["tubeNumber"] = tubeNumber
117 self.monitoringTube = self.monitoringTube.set_index("tubeNumber")
118 if hasattr(self, "intermediateEvent"):
119 self.intermediateEvent = pd.DataFrame(self.intermediateEvent)
121 def _read_intermediate_event(self, node):
122 d = {}
123 for child in node:
124 key = self._get_tag(child)
125 if key == "eventName":
126 d[key] = child.text
127 elif key == "eventDate":
128 d[key] = self._read_date(child)
129 else:
130 self._warn_unknown_tag(key)
131 return d
134def get_observations(
135 bro_ids,
136 kind="gld",
137 drop_references=True,
138 silent=False,
139 tmin=None,
140 tmax=None,
141 as_csv=False,
142 tube_number=None,
143 status=None,
144 observation_type=None,
145 qualifier=None,
146 to_path=None,
147 to_zip=None,
148 redownload=False,
149 zipfile=None,
150 continue_on_error=False,
151 sort=True,
152 drop_duplicates=True,
153 progress_callback=None,
154 _files=None,
155):
156 """
157 Retrieve groundwater observations for the specified monitoring wells (bro_ids).
159 This function fetches groundwater data for monitoring wells based on the provided
160 parameters. It supports different types of observations, allows filtering by tube
161 number, and can request the data in CSV format for groundwater level observations.
163 Parameters
164 ----------
165 bro_ids : str or list or pd.DataFrame
166 The BRO IDs of the monitoring wells for which to retrieve the data. If a
167 DataFrame is provided, its index is used as the list of BRO IDs.
168 kind : str, optional
169 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.
170 Defaults to 'gld' (groundwater level dossier).
171 drop_references : bool or list of str, optional
172 Specifies whether to drop reference fields in the returned data. Defaults to True,
173 in which case 'gmnReferences', 'gldReferences', and 'garReferences' are removed.
174 silent : bool, optional
175 If True, suppresses progress logging. Defaults to False.
176 tmin : str or datetime, optional
177 The minimum time filter for the observations. Defaults to None.
178 tmax : str or datetime, optional
179 The maximum time filter for the observations. Defaults to None.
180 as_csv : bool, optional
181 If True, requests the observations as CSV files instead of XML-files. Only valid
182 if `kind` is 'gld'. Defaults to False.
183 tube_number : int, optional
184 Filters observations to a specific tube number. Defaults to None.
185 status : str, optional
186 A status string for additional filtering. Possible values are
187 "volledigBeoordeeld", "voorlopig" and "onbekend" Only valid if `kind` is 'gld'.
188 Defaults to None.
189 observation_type : str, optional
190 An observation type string for additional filtering. Possible values are
191 "reguliereMeting" and "controleMeting". Only valid if `kind` is 'gld'. Defaults
192 to None.
193 qualifier : str or list of str, optional
194 A qualifier string for additional filtering. Only valid if `kind` is 'gld'.
195 Defaults to None.
196 to_path : str, optional
197 If not None, save the downloaded files in the directory named to_path. The
198 default is None.
199 to_zip : str, optional
200 If not None, save the downloaded files in a zip-file named to_zip. The default
201 is None.
202 redownload : bool, optional
203 When downloaded files exist in to_path or to_zip, read from these files when
204 redownload is False. If redownload is True, download the data again from the
205 BRO-servers. The default is False.
206 zipfile : zipfile.ZipFile, optional
207 A zipfile-object. When not None, zipfile is used to read previously downloaded
208 data from. The default is None.
209 continue_on_error : bool, optional
210 If True, continue after an error occurs during downloading or processing of
211 individual observation data. Defaults to False.
212 sort : bool, optional
213 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.
214 drop_duplicates : bool, optional
215 If True, drop duplicate observations based on their timestamp. Only used if
216 `kind` is 'gld'. Defaults to True.
217 progress_callback : function, optional
218 A callback function that takes two arguments (current, total) to report
219 progress. If None, no progress reporting is done. Defaults to None.
222 Returns
223 -------
224 pd.DataFrame
225 A DataFrame containing the observations for the specified monitoring wells,
226 where each row corresponds to an individual observation.
228 Raises
229 ------
230 Exception
231 If `as_csv=True` and `kind` is not 'gld', or if `qualifier` is provided for
232 a kind other than 'gld'.
233 """
234 tubes = []
236 if isinstance(bro_ids, str):
237 bro_ids = [bro_ids]
238 silent = True
240 if isinstance(bro_ids, pd.DataFrame):
241 bro_ids = bro_ids.index
243 if isinstance(drop_references, bool):
244 if drop_references:
245 drop_references = [
246 "gmnReferences",
247 "gldReferences",
248 "garReferences",
249 # "frdReferences",
250 ]
251 else:
252 drop_references = []
254 if to_zip is not None:
255 if not redownload and os.path.isfile(to_zip):
256 raise (NotImplementedError("Redownload=False is not suppported yet"))
257 if to_path is None:
258 to_path = os.path.splitext(to_zip)[0]
259 remove_path_again = not os.path.isdir(to_path)
260 if _files is None:
261 _files = []
263 desc = f"Downloading {kind}-observations"
264 if as_csv and kind != "gld":
265 raise (Exception("as_csv=True is only supported for kind=='gld'"))
266 if qualifier is not None and kind != "gld":
267 raise (Exception("A qualifier is only supported for kind=='gld'"))
268 if to_path is not None and not os.path.isdir(to_path):
269 os.makedirs(to_path)
271 if kind == "gld":
272 meas_cl = gld.GroundwaterLevelDossier
273 elif kind == "gar":
274 meas_cl = gar.GroundwaterAnalysisReport
275 elif kind == "frd":
276 meas_cl = frd.FormationResistanceDossier
277 elif kind == "gmn":
278 meas_cl = gmn.GroundwaterMonitoringNetwork
279 else:
280 raise (ValueError(f"kind='{kind}' not supported"))
282 gld_kwargs = _get_gld_kwargs(
283 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates
284 )
286 for igmw, bro_id in enumerate(
287 util.tqdm(np.unique(bro_ids), disable=silent, desc=desc)
288 ):
289 to_rel_file = util._get_to_file(
290 f"gmw_relations_{bro_id}.json", zipfile, to_path, _files
291 )
292 if zipfile is None and (
293 redownload or to_rel_file is None or not os.path.isfile(to_rel_file)
294 ):
295 url = f"https://publiek.broservices.nl/gm/v1/gmw-relations/{bro_id}"
296 req = requests.get(url)
297 if req.status_code > 200:
298 logger.error(req.json()["errors"][0]["message"])
299 return
300 if to_rel_file is not None:
301 with open(to_rel_file, "w") as f:
302 f.write(req.text)
303 data = req.json()
304 else:
305 if zipfile is not None:
306 with zipfile.open(to_rel_file) as f:
307 data = json.load(f)
308 else:
309 with open(to_rel_file) as f:
310 data = json.load(f)
311 for tube_ref in data["monitoringTubeReferences"]:
312 tube_ref["groundwaterMonitoringWell"] = data["gmwBroId"]
313 if tube_number is not None:
314 if tube_ref["tubeNumber"] != tube_number:
315 continue
316 ref_key = f"{kind}References"
317 for ref in tube_ref[ref_key]:
318 obsdata = _download_observations_for_bro_id(
319 ref["broId"],
320 meas_cl,
321 as_csv,
322 zipfile,
323 to_path,
324 _files,
325 gld_kwargs,
326 redownload=redownload,
327 continue_on_error=continue_on_error,
328 )
329 if as_csv:
330 tube_ref["observation"] = obsdata
331 for key in drop_references:
332 if key in tube_ref:
333 tube_ref.pop(key)
334 else:
335 logger.warning(
336 "{} not defined for {}, filter {}".format(
337 key,
338 tube_ref["groundwaterMonitoringWell"],
339 tube_ref["tubeNumber"],
340 )
341 )
343 tube_ref["broId"] = ref["broId"]
344 tubes.append(tube_ref)
345 else:
346 tubes.append(obsdata.to_dict())
348 if progress_callback is not None:
349 progress_callback(igmw + 1, len(bro_ids))
350 if to_zip is not None:
351 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)
352 return pd.DataFrame(tubes)
355def _download_observations_for_bro_id(
356 bro_id,
357 meas_cl,
358 as_csv,
359 zipfile,
360 to_path,
361 _files,
362 gld_kwargs,
363 redownload=False,
364 continue_on_error=False,
365):
366 if as_csv:
367 fname = f"{bro_id}.csv"
368 observatietype = None
369 if "status" in gld_kwargs and gld_kwargs["status"] == "voorlopig":
370 observatietype = "regulier_voorlopig"
371 elif "status" in gld_kwargs and gld_kwargs["status"] == "volledigBeoordeeld":
372 observatietype = "regulier_beoordeeld"
373 elif "status" in gld_kwargs and gld_kwargs["status"] == "onbekend":
374 observatietype = "onbekend"
375 elif (
376 "observation_type" in gld_kwargs
377 and gld_kwargs["observation_type"] == "controleMeting"
378 ):
379 observatietype = "controle"
380 else:
381 fname = f"{bro_id}.xml"
382 to_file = util._get_to_file(fname, zipfile, to_path, _files)
383 if zipfile is None and (
384 redownload or to_file is None or not os.path.isfile(to_file)
385 ): # download the data
386 if as_csv:
387 try:
388 data = gld.get_objects_as_csv(
389 bro_id,
390 observatietype=observatietype,
391 to_file=to_file,
392 **gld_kwargs,
393 )
394 except Exception as e:
395 if not continue_on_error:
396 raise e
397 logger.error(
398 "Error processing %s csv for broid %s: %s",
399 meas_cl.__name__,
400 bro_id,
401 e,
402 )
403 else:
404 try:
405 data = meas_cl.from_bro_id(bro_id, to_file=to_file, **gld_kwargs)
406 except Exception as e:
407 if not continue_on_error:
408 raise e
409 logger.error(
410 "Error processing %s xml for broid %s: %s",
411 meas_cl.__name__,
412 bro_id,
413 e,
414 )
415 else:
416 # read the data from a file
417 if as_csv:
418 if zipfile is not None:
419 to_file = zipfile.open(to_file)
420 data = gld.read_gld_csv(
421 to_file,
422 bro_id,
423 observatietype=observatietype,
424 **gld_kwargs,
425 )
426 else:
427 data = meas_cl(to_file, zipfile=zipfile, **gld_kwargs)
428 return data
431def _get_gld_kwargs(
432 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates
433):
434 gld_kwargs = {}
435 if kind == "gld":
436 if tmin is not None:
437 gld_kwargs["tmin"] = tmin
438 if tmax is not None:
439 gld_kwargs["tmax"] = tmax
440 if qualifier is not None:
441 gld_kwargs["qualifier"] = qualifier
442 if status is not None:
443 gld_kwargs["status"] = status
444 if observation_type is not None:
445 gld_kwargs["observation_type"] = observation_type
446 gld_kwargs["sort"] = sort
447 gld_kwargs["drop_duplicates"] = drop_duplicates
448 return gld_kwargs
451def get_tube_observations(
452 gwm_id, tube_number, kind="gld", sort=True, drop_duplicates=True, **kwargs
453):
454 """
455 Get the observations of a single groundwater monitoring tube.
457 Parameters
458 ----------
459 gwm_id : str
460 The bro_id of the groundwater monitoring well.
461 tube_number : int
462 The tube number.
463 kind : str, optional
464 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.
465 Defaults to 'gld' (groundwater level dossier).
466 sort : bool, optional
467 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.
468 drop_duplicates : bool, optional
469 If True, drop duplicate observations based on their timestamp. Only used if
470 `kind` is 'gld'. Defaults to True.
471 **kwargs : dict
472 Kwargs are passed onto get_observations.
474 Returns
475 -------
476 pd.DataFrame
477 A DataFrame containing the observations.
479 """
480 # sorting and dropping duplicates is done after combining the observations
481 # to avoid doing this multiple times
482 df = get_observations(
483 gwm_id,
484 tube_number=tube_number,
485 kind=kind,
486 sort=False,
487 drop_duplicates=False,
488 **kwargs,
489 )
490 if df.empty:
491 return _get_empty_observation_df(kind)
492 else:
493 data_column = _get_data_column(kind)
494 return _combine_observations(
495 df[data_column],
496 kind=kind,
497 bro_id=f"{gwm_id}_{tube_number}",
498 sort=sort,
499 drop_duplicates=drop_duplicates,
500 )
503def get_tube_gdf(gmws, index=None):
504 """
505 Create a GeoDataFrame of tube properties combined with well metadata.
507 This function processes a DataFrame of well properties, extracts the relevant
508 tube information, and combines them into a GeoDataFrame. The resulting GeoDataFrame
509 contains metadata for each monitoring well and its associated tubes, with optional
510 spatial information (coordinates) and relevant physical properties.
512 Parameters
513 ----------
514 gmws : list or dict of GroundwaterMonitoringWell, or pd.DataFrame Well and tube data
515 in one of the following formats: a list of `GroundwaterMonitoringWell` objects,
516 a dictionary of these objects, or a DataFrame with the bro-ids of the
517 GroundwaterMonitoringWells as the index and the column monitoringTube containing
518 tube properties.
519 index : str or list of str, optional
520 The column or columns to use for indexing the resulting GeoDataFrame. Defaults
521 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.
523 Returns
524 -------
525 gdf : gpd.GeoDataFrame
526 A GeoDataFrame containing the combined well and tube properties, with the
527 specified index and optional geometry (spatial data) if 'x' and 'y' columns are
528 present.
530 Notes
531 -----
532 If 'x' and 'y' columns are present, the function creates a GeoDataFrame with point
533 geometries based on these coordinates, assuming the EPSG:28992 (Dutch National
534 Coordinate System) CRS.
535 """
536 if isinstance(gmws, list):
537 gmws = pd.DataFrame([x.to_dict() for x in gmws])
538 if "broId" in gmws.columns:
539 gmws = gmws.set_index("broId")
540 elif isinstance(gmws, dict):
541 gmws = pd.DataFrame([gmws[x].to_dict() for x in gmws])
542 if "broId" in gmws.columns:
543 gmws = gmws.set_index("broId")
544 tubes = []
545 for bro_id in gmws.index:
546 tube_df = gmws.loc[bro_id, "monitoringTube"]
547 if not isinstance(tube_df, pd.DataFrame):
548 continue
549 for tube_number in tube_df.index:
550 # combine properties of well and tube
551 tube = pd.concat(
552 (
553 gmws.loc[bro_id].drop("monitoringTube"),
554 tube_df.loc[tube_number],
555 )
556 )
557 tube["groundwaterMonitoringWell"] = bro_id
558 tube["tubeNumber"] = tube_number
560 tubes.append(tube)
562 if index is None:
563 index = ["groundwaterMonitoringWell", "tubeNumber"]
564 gdf = bro.objects_to_gdf(tubes, index=index)
566 gdf = gdf.sort_index()
567 return gdf
570def get_data_in_extent(
571 extent,
572 kind="gld",
573 tmin=None,
574 tmax=None,
575 combine=None,
576 index=None,
577 as_csv=False,
578 qualifier=None,
579 to_zip=None,
580 to_path=None,
581 redownload=False,
582 silent=False,
583 continue_on_error=False,
584 sort=True,
585 drop_duplicates=True,
586 progress_callback=None,
587):
588 """
589 Retrieve metadata and observations within a specified spatial extent.
591 This function fetches monitoring well characteristics, groundwater observations,
592 and tube properties within the given spatial extent. It can combine the data
593 for specific observation types and return either individual dataframes or a
594 combined dataframe.
596 Parameters
597 ----------
598 extent : str or sequence
599 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data.
600 kind : str, optional
601 The type of observations to retrieve. Valid values are {'gld', 'gar'} for
602 groundwater level dossier or groundwater analysis report. When kind is None, no
603 observations are downloaded. Defaults to 'gld'.
604 tmin : str or datetime, optional
605 The minimum time for filtering observations. Defaults to None.
606 tmax : str or datetime, optional
607 The maximum time for filtering observations. Defaults to None.
608 combine : bool, optional
609 If True, combines the metadata, tube properties, and observations into a single
610 dataframe. Defaults to False, which will change to True in a future version.
611 index : str, optional
612 The column to use for indexing in the resulting dataframe. Defaults to None.
613 as_csv : bool, optional
614 If True, the measurement data is requested as CSV files instead of XML files
615 (only supported for 'gld'). Defaults to False.
616 qualifier : str or list of str, optional
617 A string or list of strings used to filter the observations. Only valid if
618 `kind` is 'gld'. Defaults to None.
619 to_path : str, optional
620 If not None, save the downloaded files in the directory named to_path. The
621 default is None.
622 to_zip : str, optional
623 If not None, save the downloaded files in a zip-file named to_zip. The default
624 is None.
625 redownload : bool, optional
626 When downloaded files exist in to_path or to_zip, read from these files when
627 redownload is False. If redownload is True, download the data again from the
628 BRO-server. The default is False.
629 silent : bool, optional
630 If True, suppresses progress logging. Defaults to False.
631 continue_on_error : bool, optional
632 If True, continue after an error occurs during downloading or processing of
633 individual observation data. Defaults to False.
634 sort : bool, optional
635 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.
636 drop_duplicates : bool, optional
637 If True, drop duplicate observations based on their timestamp. Only used if
638 `kind` is 'gld'. Defaults to True.
639 progress_callback : function, optional
640 A callback function that takes two arguments (current, total) to report
641 progress. If None, no progress reporting is done. Defaults to None.
643 Returns
644 -------
645 gdf : pd.DataFrame
646 A dataframe containing tube properties and metadata within the specified extent.
648 obs_df : pd.DataFrame, optional
649 A dataframe containing the observations for the specified wells. Returned only if
650 `combine` is False.
652 Raises
653 ------
654 Exception
655 If `as_csv=True` and `kind` is not 'gld', or if other parameters are invalid.
656 """
657 if combine is None:
658 logger.warning(
659 "The default of `combine=False` will change to True in a future version of "
660 "brodata. Pass combine=False to retain current behavior or combine=True to "
661 "adopt the future default and silence this warning."
662 )
663 combine = False
664 if isinstance(extent, str):
665 if to_zip is not None:
666 raise (Exception("When extent is a string, do not supply to_zip"))
667 to_zip = extent
668 extent = None
669 redownload = False
671 zipfile = None
672 _files = None
673 if to_zip is not None:
674 if not redownload and os.path.isfile(to_zip):
675 logger.info(f"Reading data from {to_zip}")
676 zipfile = ZipFile(to_zip)
677 else:
678 if to_path is None:
679 to_path = os.path.splitext(to_zip)[0]
680 remove_path_again = not os.path.isdir(to_path)
681 _files = []
683 if to_path is not None and not os.path.isdir(to_path):
684 os.makedirs(to_path)
686 # get gwm characteristics
687 logger.info(f"Getting gmw-characteristics in extent: {extent}")
689 to_file = util._get_to_file("gmw_characteristics.xml", zipfile, to_path, _files)
690 gmw = get_characteristics(
691 extent=extent, to_file=to_file, redownload=redownload, zipfile=zipfile
692 )
694 if kind is None:
695 obs_df = pd.DataFrame()
696 combine = False
697 else:
698 # get observations
699 logger.info(f"Downloading {kind}-observations")
700 obs_df = get_observations(
701 gmw,
702 kind=kind,
703 tmin=tmin,
704 tmax=tmax,
705 as_csv=as_csv,
706 qualifier=qualifier,
707 to_path=to_path,
708 redownload=redownload,
709 zipfile=zipfile,
710 _files=_files,
711 silent=silent,
712 continue_on_error=continue_on_error,
713 sort=sort,
714 drop_duplicates=drop_duplicates,
715 progress_callback=progress_callback,
716 )
718 # only keep wells with observations
719 if "groundwaterMonitoringWell" in obs_df.columns:
720 gmw = gmw[gmw.index.isin(obs_df["groundwaterMonitoringWell"])]
722 logger.info("Downloading tube-properties")
724 # get the properties of the monitoringTubes
725 gdf = get_tube_gdf_from_characteristics(
726 gmw,
727 index=index,
728 to_path=to_path,
729 redownload=redownload,
730 zipfile=zipfile,
731 _files=_files,
732 silent=silent,
733 )
735 if zipfile is not None:
736 zipfile.close()
737 if zipfile is None and to_zip is not None:
738 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)
740 if not obs_df.empty:
741 obs_df = obs_df.set_index(
742 ["groundwaterMonitoringWell", "tubeNumber"]
743 ).sort_index()
745 if combine and kind in ["gld", "gar"]:
746 if kind == "gld":
747 idcol = "groundwaterLevelDossier"
748 elif kind == "gar":
749 idcol = "groundwaterAnalysisReport"
750 datcol = _get_data_column(kind)
752 logger.info("Combining well-properties, tube-properties and observations")
754 data = {}
755 ids = {}
756 for index in gdf.index:
757 if index not in obs_df.index:
758 continue
760 data[index] = _combine_observations(
761 obs_df.loc[[index], datcol], kind=kind, bro_id=f"{index[0]}_{index[1]}"
762 )
763 ids[index] = list(obs_df.loc[[index], "broId"])
764 gdf[datcol] = data
765 gdf[idcol] = ids
766 return gdf
767 else:
768 if kind is None:
769 return gdf
770 else:
771 return gdf, obs_df
774def _get_data_column(kind):
775 if kind == "gld":
776 return "observation"
777 elif kind == "gar":
778 return "laboratoryAnalysis"
779 else:
780 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))
783def _get_empty_observation_df(kind):
784 if kind == "gld":
785 return gld._get_empty_observation_df()
786 elif kind == "gar":
787 return gar._get_empty_observation_df()
788 else:
789 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))
792def _combine_observations(
793 observations, kind, bro_id=None, sort=True, drop_duplicates=True
794):
795 obslist = []
796 for observation in observations:
797 if not isinstance(observation, pd.DataFrame) or observation.empty:
798 continue
799 obslist.append(observation)
800 if len(obslist) == 0:
801 return _get_empty_observation_df(kind)
802 else:
803 df = pd.concat(obslist).sort_index()
804 if kind == "gld":
805 if sort:
806 df = gld.sort_observations(df)
807 if drop_duplicates:
808 df = gld.drop_duplicate_observations(df, bro_id=bro_id)
809 return df
812def get_tube_gdf_from_characteristics(characteristics_gdf, **kwargs):
813 """
814 Generate a GeoDataFrame of tube properties based on well characteristics.
816 This function downloads the GroundwaterMonitoringWell-objects to retreive data about
817 the groundwater monitoring tubes, and combined this information in a new
818 GeoDataFrame.
820 Parameters
821 ----------
822 characteristics_gdf : gpd.GeoDataFrame
823 GeoDataFrame of well characteristics with bro-ids of the
824 GroundwaterMonitoringWells as the index, retreived with
825 `brodata.gmw.get_characteristics`.
826 index : str or list of str, optional
827 Column(s) to use as the index for the resulting GeoDataFrame. Defaults
828 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.
830 Returns
831 -------
832 gpd.GeoDataFrame
833 GeoDataFrame of combined well and tube properties
834 """
835 bro_ids = characteristics_gdf.index.unique()
836 return get_tube_gdf_from_bro_ids(bro_ids, **kwargs)
839def get_tube_gdf_from_bro_ids(
840 bro_ids,
841 index=None,
842 **kwargs,
843):
844 """
845 Generate a GeoDataFrame of tube properties based on an iterable of gmw bro-ids.
847 This function downloads the GroundwaterMonitoringWell-objects to retreive data about
848 the groundwater monitoring tubes, and combined this information in a new
849 GeoDataFrame.
851 Parameters
852 ----------
853 bro_ids : gpd.GeoDataFrame
854 GeoDataFrame of well characteristics with bro-ids of the
855 GroundwaterMonitoringWells as the index, retreived with
856 `brodata.gmw.get_characteristics`.
857 index : str or list of str, optional
858 Column(s) to use as the index for the resulting GeoDataFrame. Defaults
859 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.
861 Returns
862 -------
863 gpd.GeoDataFrame
864 GeoDataFrame of combined well and tube properties
865 """
866 desc = "Downloading Groundwater Monitoring Wells"
867 gmws = bro._get_data_for_bro_ids(
868 GroundwaterMonitoringWell, bro_ids, desc=desc, **kwargs
869 )
870 gdf = get_tube_gdf(gmws, index=index)
871 return gdf
874cl = GroundwaterMonitoringWell
876get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl)
877get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__
879get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl)
880get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__
882get_characteristics = partial(bro._get_characteristics, cl)
883get_characteristics.__doc__ = bro._get_characteristics.__doc__