Coverage for brodata/gmw.py: 74%

1import json

2import logging

3import os

4from functools import partial

5from zipfile import ZipFile

7import numpy as np

8import pandas as pd

10from . import bro, gld, gar, frd, gmn, util

12logger = logging.getLogger(__name__)

15def get_well_code(bro_id):

16 """

17 Retrieve the well code based on a given BRO-ID and return it as plain text.

19 This function sends a GET request to fetch the well code associated with the

20 specified BRO-ID. If the request fails, it logs an error message and returns `None`.

22 Parameters

23 ----------

24 bro_id : str

25 The BRO-ID for which to retrieve the associated well code.

27 Returns

28 -------

29 well_code : str or None

30 The well code as plain text if the request is successful. Returns `None` if

31 the request fails.

32 """

34 url = f"{GroundwaterMonitoringWell._rest_url}/well-code/{bro_id}"

35 req = bro.util.get_with_rate_limit(url)

36 if req.status_code > 200:

37 logger.error(req.reason)

38 return

39 well_code = req.text

40 return well_code

43class GroundwaterMonitoringWell(bro.FileOrUrl):

44 """

45 Class to represent a Groundwater Monitoring Well (GMW) from the BRO.

47 This class parses XML data related to a groundwater monitoring well (GMW).

48 It extracts details such as location, monitoring tube data, and well history

49 and stores these in attributes.

51 Notes

52 -----

53 This class extends `bro.XmlFileOrUrl` and is designed to work with GMW XML data,

54 either from a file or URL.

55 """

57 _rest_url = "https://publiek.broservices.nl/gm/gmw/v1"

58 _xmlns = "http://www.broservices.nl/xsd/dsgmw/1.1"

59 _char = "GMW_C"

61 def _read_contents(self, tree):

62 ns = {

63 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",

64 "xmlns": self._xmlns,

65 }

67 object_names = ["GMW_PO", "GMW_PPO", "BRO_DO"]

68 gmw = self._get_main_object(tree, object_names, ns)

70 for key in gmw.attrib:

71 setattr(self, key.split("}", 1)[1], gmw.attrib[key])

72 for child in gmw:

73 key = self._get_tag(child)

74 if len(child) == 0:

75 setattr(self, key, child.text)

76 elif key == "standardizedLocation":

77 self._read_standardized_location(child)

78 elif key == "deliveredLocation":

79 self._read_delivered_location(child)

80 elif key == "wellHistory":

81 for grandchild in child:

82 key = self._get_tag(grandchild)

83 if key in ["wellConstructionDate", "wellRemovalDate"]:

84 setattr(self, key, self._read_date(grandchild))

85 elif key == "intermediateEvent":

86 if not hasattr(self, key):

87 self.intermediateEvent = []

88 event = self._read_intermediate_event(grandchild)

89 self.intermediateEvent.append(event)

90 else:

91 self._warn_unknown_tag(key)

93 elif key in ["deliveredVerticalPosition", "registrationHistory"]:

94 to_float = ["offset", "groundLevelPosition"]

95 self._read_children_of_children(child, to_float=to_float)

96 elif key in ["monitoringTube"]:

97 if not hasattr(self, key):

98 self.monitoringTube = []

99 tube = {}

100 to_float = [

101 "tubeTopDiameter",

102 "tubeTopPosition",

103 "screenLength",

104 "screenTopPosition",

105 "screenBottomPosition",

106 "plainTubePartLength",

107 ]

108 self._read_children_of_children(child, tube, to_float=to_float)

109 self.monitoringTube.append(tube)

110 else:

111 self._warn_unknown_tag(key)

112 if hasattr(self, "monitoringTube"):

113 self.monitoringTube = pd.DataFrame(self.monitoringTube)

114 tubeNumber = self.monitoringTube["tubeNumber"].astype(int)

115 self.monitoringTube["tubeNumber"] = tubeNumber

116 self.monitoringTube = self.monitoringTube.set_index("tubeNumber")

117 if hasattr(self, "intermediateEvent"):

118 self.intermediateEvent = pd.DataFrame(self.intermediateEvent)

119

120 def _read_intermediate_event(self, node):

121 d = {}

122 for child in node:

123 key = self._get_tag(child)

124 if key == "eventName":

125 d[key] = child.text

126 elif key == "eventDate":

127 d[key] = self._read_date(child)

128 else:

129 self._warn_unknown_tag(key)

130 return d

131

132

133def get_observations(

134 bro_ids,

135 kind="gld",

136 drop_references=True,

137 silent=False,

138 tmin=None,

139 tmax=None,

140 as_csv=False,

141 tube_number=None,

142 status=None,

143 observation_type=None,

144 qualifier=None,

145 to_path=None,

146 to_zip=None,

147 redownload=False,

148 zipfile=None,

149 continue_on_error=False,

150 sort=True,

151 drop_duplicates=True,

152 progress_callback=None,

153 _files=None,

154):

155 """

156 Retrieve groundwater observations for the specified monitoring wells (bro_ids).

157

158 This function fetches groundwater data for monitoring wells based on the provided

159 parameters. It supports different types of observations, allows filtering by tube

160 number, and can request the data in CSV format for groundwater level observations.

161

162 Parameters

163 ----------

164 bro_ids : str or list or pd.DataFrame

165 The BRO IDs of the monitoring wells for which to retrieve the data. If a

166 DataFrame is provided, its index is used as the list of BRO IDs.

167 kind : str, optional

168 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.

169 Defaults to 'gld' (groundwater level dossier).

170 drop_references : bool or list of str, optional

171 Specifies whether to drop reference fields in the returned data. Defaults to True,

172 in which case 'gmnReferences', 'gldReferences', and 'garReferences' are removed.

173 silent : bool, optional

174 If True, suppresses progress logging. Defaults to False.

175 tmin : str or datetime, optional

176 The minimum time filter for the observations. Defaults to None.

177 tmax : str or datetime, optional

178 The maximum time filter for the observations. Defaults to None.

179 as_csv : bool, optional

180 If True, requests the observations as CSV files instead of XML-files. Only valid

181 if `kind` is 'gld'. Defaults to False.

182 tube_number : int, optional

183 Filters observations to a specific tube number. Defaults to None.

184 status : str, optional

185 A status string for additional filtering. Possible values are

186 "volledigBeoordeeld", "voorlopig" and "onbekend" Only valid if `kind` is 'gld'.

187 Defaults to None.

188 observation_type : str, optional

189 An observation type string for additional filtering. Possible values are

190 "reguliereMeting" and "controleMeting". Only valid if `kind` is 'gld'. Defaults

191 to None.

192 qualifier : str or list of str, optional

193 A qualifier string for additional filtering. Only valid if `kind` is 'gld'.

194 Defaults to None.

195 to_path : str, optional

196 If not None, save the downloaded files in the directory named to_path. The

197 default is None.

198 to_zip : str, optional

199 If not None, save the downloaded files in a zip-file named to_zip. The default

200 is None.

201 redownload : bool, optional

202 When downloaded files exist in to_path or to_zip, read from these files when

203 redownload is False. If redownload is True, download the data again from the

204 BRO-servers. The default is False.

205 zipfile : zipfile.ZipFile, optional

206 A zipfile-object. When not None, zipfile is used to read previously downloaded

207 data from. The default is None.

208 continue_on_error : bool, optional

209 If True, continue after an error occurs during downloading or processing of

210 individual observation data. Defaults to False.

211 sort : bool, optional

212 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.

213 drop_duplicates : bool, optional

214 If True, drop duplicate observations based on their timestamp. Only used if

215 `kind` is 'gld'. Defaults to True.

216 progress_callback : function, optional

217 A callback function that takes two arguments (current, total) to report

218 progress. If None, no progress reporting is done. Defaults to None.

219

220

221 Returns

222 -------

223 pd.DataFrame

224 A DataFrame containing the observations for the specified monitoring wells,

225 where each row corresponds to an individual observation.

226

227 Raises

228 ------

229 Exception

230 If `as_csv=True` and `kind` is not 'gld', or if `qualifier` is provided for

231 a kind other than 'gld'.

232 """

233 tubes = []

234

235 if isinstance(bro_ids, str):

236 bro_ids = [bro_ids]

237 silent = True

238

239 if isinstance(bro_ids, pd.DataFrame):

240 bro_ids = bro_ids.index

241

242 if isinstance(drop_references, bool):

243 if drop_references:

244 drop_references = [

245 "gmnReferences",

246 "gldReferences",

247 "garReferences",

248 # "frdReferences",

249 ]

250 else:

251 drop_references = []

252

253 if to_zip is not None:

254 if not redownload and os.path.isfile(to_zip):

255 raise (NotImplementedError("Redownload=False is not suppported yet"))

256 if to_path is None:

257 to_path = os.path.splitext(to_zip)[0]

258 remove_path_again = not os.path.isdir(to_path)

259 if _files is None:

260 _files = []

261

262 desc = f"Downloading {kind}-observations"

263 if as_csv and kind != "gld":

264 raise (Exception("as_csv=True is only supported for kind=='gld'"))

265 if qualifier is not None and kind != "gld":

266 raise (Exception("A qualifier is only supported for kind=='gld'"))

267 if to_path is not None and not os.path.isdir(to_path):

268 os.makedirs(to_path)

269

270 if kind == "gld":

271 meas_cl = gld.GroundwaterLevelDossier

272 elif kind == "gar":

273 meas_cl = gar.GroundwaterAnalysisReport

274 elif kind == "frd":

275 meas_cl = frd.FormationResistanceDossier

276 elif kind == "gmn":

277 meas_cl = gmn.GroundwaterMonitoringNetwork

278 else:

279 raise (ValueError(f"kind='{kind}' not supported"))

280

281 gld_kwargs = _get_gld_kwargs(

282 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates

283 )

284

285 for igmw, bro_id in enumerate(

286 util.tqdm(np.unique(bro_ids), disable=silent, desc=desc)

287 ):

288 to_rel_file = util._get_to_file(

289 f"gmw_relations_{bro_id}.json", zipfile, to_path, _files

290 )

291 if zipfile is None and (

292 redownload or to_rel_file is None or not os.path.isfile(to_rel_file)

293 ):

294 url = f"https://publiek.broservices.nl/gm/v1/gmw-relations/{bro_id}"

295 req = bro.util.get_with_rate_limit(url)

296 if req.status_code > 200:

297 logger.error(req.json()["errors"][0]["message"])

298 return

299 if to_rel_file is not None:

300 with open(to_rel_file, "w") as f:

301 f.write(req.text)

302 data = req.json()

303 else:

304 if zipfile is not None:

305 with zipfile.open(to_rel_file) as f:

306 data = json.load(f)

307 else:

308 with open(to_rel_file) as f:

309 data = json.load(f)

310 for tube_ref in data["monitoringTubeReferences"]:

311 tube_ref["groundwaterMonitoringWell"] = data["gmwBroId"]

312 if tube_number is not None:

313 if tube_ref["tubeNumber"] != tube_number:

314 continue

315 ref_key = f"{kind}References"

316 for ref in tube_ref[ref_key]:

317 obsdata = _download_observations_for_bro_id(

318 ref["broId"],

319 meas_cl,

320 as_csv,

321 zipfile,

322 to_path,

323 _files,

324 gld_kwargs,

325 redownload=redownload,

326 continue_on_error=continue_on_error,

327 )

328 if as_csv:

329 tube_ref["observation"] = obsdata

330 for key in drop_references:

331 if key in tube_ref:

332 tube_ref.pop(key)

333 else:

334 logger.warning(

335 "{} not defined for {}, filter {}".format(

336 key,

337 tube_ref["groundwaterMonitoringWell"],

338 tube_ref["tubeNumber"],

339 )

340 )

341

342 tube_ref["broId"] = ref["broId"]

343 tubes.append(tube_ref)

344 else:

345 tubes.append(obsdata.to_dict())

346

347 if progress_callback is not None:

348 progress_callback(igmw + 1, len(bro_ids))

349 if to_zip is not None:

350 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)

351 return pd.DataFrame(tubes)

352

353

354def _download_observations_for_bro_id(

355 bro_id,

356 meas_cl,

357 as_csv,

358 zipfile,

359 to_path,

360 _files,

361 gld_kwargs,

362 redownload=False,

363 continue_on_error=False,

364):

365 if as_csv:

366 fname = f"{bro_id}.csv"

367 observatietype = None

368 if "status" in gld_kwargs and gld_kwargs["status"] == "voorlopig":

369 observatietype = "regulier_voorlopig"

370 elif "status" in gld_kwargs and gld_kwargs["status"] == "volledigBeoordeeld":

371 observatietype = "regulier_beoordeeld"

372 elif "status" in gld_kwargs and gld_kwargs["status"] == "onbekend":

373 observatietype = "onbekend"

374 elif (

375 "observation_type" in gld_kwargs

376 and gld_kwargs["observation_type"] == "controleMeting"

377 ):

378 observatietype = "controle"

379 else:

380 fname = f"{bro_id}.xml"

381 to_file = util._get_to_file(fname, zipfile, to_path, _files)

382 if zipfile is None and (

383 redownload or to_file is None or not os.path.isfile(to_file)

384 ): # download the data

385 if as_csv:

386 try:

387 data = gld.get_objects_as_csv(

388 bro_id,

389 observatietype=observatietype,

390 to_file=to_file,

391 **gld_kwargs,

392 )

393 except Exception as e:

394 if not continue_on_error:

395 raise e

396 logger.error(

397 "Error processing %s csv for broid %s: %s",

398 meas_cl.__name__,

399 bro_id,

400 e,

401 )

402 else:

403 try:

404 data = meas_cl.from_bro_id(bro_id, to_file=to_file, **gld_kwargs)

405 except Exception as e:

406 if not continue_on_error:

407 raise e

408 logger.error(

409 "Error processing %s xml for broid %s: %s",

410 meas_cl.__name__,

411 bro_id,

412 e,

413 )

414 else:

415 # read the data from a file

416 if as_csv:

417 if zipfile is not None:

418 to_file = zipfile.open(to_file)

419 data = gld.read_gld_csv(

420 to_file,

421 bro_id,

422 observatietype=observatietype,

423 **gld_kwargs,

424 )

425 else:

426 data = meas_cl(to_file, zipfile=zipfile, **gld_kwargs)

427 return data

428

429

430def _get_gld_kwargs(

431 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates

432):

433 gld_kwargs = {}

434 if kind == "gld":

435 if tmin is not None:

436 gld_kwargs["tmin"] = tmin

437 if tmax is not None:

438 gld_kwargs["tmax"] = tmax

439 if qualifier is not None:

440 gld_kwargs["qualifier"] = qualifier

441 if status is not None:

442 gld_kwargs["status"] = status

443 if observation_type is not None:

444 gld_kwargs["observation_type"] = observation_type

445 gld_kwargs["sort"] = sort

446 gld_kwargs["drop_duplicates"] = drop_duplicates

447 return gld_kwargs

448

449

450def get_tube_observations(

451 gwm_id, tube_number, kind="gld", sort=True, drop_duplicates=True, **kwargs

452):

453 """

454 Get the observations of a single groundwater monitoring tube.

455

456 Parameters

457 ----------

458 gwm_id : str

459 The bro_id of the groundwater monitoring well.

460 tube_number : int

461 The tube number.

462 kind : str, optional

463 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.

464 Defaults to 'gld' (groundwater level dossier).

465 sort : bool, optional

466 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.

467 drop_duplicates : bool, optional

468 If True, drop duplicate observations based on their timestamp. Only used if

469 `kind` is 'gld'. Defaults to True.

470 **kwargs : dict

471 Kwargs are passed onto get_observations.

472

473 Returns

474 -------

475 pd.DataFrame

476 A DataFrame containing the observations.

477

478 """

479 # sorting and dropping duplicates is done after combining the observations

480 # to avoid doing this multiple times

481 df = get_observations(

482 gwm_id,

483 tube_number=tube_number,

484 kind=kind,

485 sort=False,

486 drop_duplicates=False,

487 **kwargs,

488 )

489 if df.empty:

490 return _get_empty_observation_df(kind)

491 else:

492 data_column = _get_data_column(kind)

493 return _combine_observations(

494 df[data_column],

495 kind=kind,

496 bro_id=f"{gwm_id}_{tube_number}",

497 sort=sort,

498 drop_duplicates=drop_duplicates,

499 )

500

501

502def get_tube_gdf(gmws, index=None):

503 """

504 Create a GeoDataFrame of tube properties combined with well metadata.

505

506 This function processes a DataFrame of well properties, extracts the relevant

507 tube information, and combines them into a GeoDataFrame. The resulting GeoDataFrame

508 contains metadata for each monitoring well and its associated tubes, with optional

509 spatial information (coordinates) and relevant physical properties.

510

511 Parameters

512 ----------

513 gmws : list or dict of GroundwaterMonitoringWell, or pd.DataFrame Well and tube data

514 in one of the following formats: a list of `GroundwaterMonitoringWell` objects,

515 a dictionary of these objects, or a DataFrame with the bro-ids of the

516 GroundwaterMonitoringWells as the index and the column monitoringTube containing

517 tube properties.

518 index : str or list of str, optional

519 The column or columns to use for indexing the resulting GeoDataFrame. Defaults

520 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.

521

522 Returns

523 -------

524 gdf : gpd.GeoDataFrame

525 A GeoDataFrame containing the combined well and tube properties, with the

526 specified index and optional geometry (spatial data) if 'x' and 'y' columns are

527 present.

528

529 Notes

530 -----

531 If 'x' and 'y' columns are present, the function creates a GeoDataFrame with point

532 geometries based on these coordinates, assuming the EPSG:28992 (Dutch National

533 Coordinate System) CRS.

534 """

535 if isinstance(gmws, list):

536 gmws = pd.DataFrame([x.to_dict() for x in gmws])

537 if "broId" in gmws.columns:

538 gmws = gmws.set_index("broId")

539 elif isinstance(gmws, dict):

540 gmws = pd.DataFrame([gmws[x].to_dict() for x in gmws])

541 if "broId" in gmws.columns:

542 gmws = gmws.set_index("broId")

543 tubes = []

544 for bro_id in gmws.index:

545 tube_df = gmws.loc[bro_id, "monitoringTube"]

546 if not isinstance(tube_df, pd.DataFrame):

547 continue

548 for tube_number in tube_df.index:

549 # combine properties of well and tube

550 tube = pd.concat(

551 (

552 gmws.loc[bro_id].drop("monitoringTube"),

553 tube_df.loc[tube_number],

554 )

555 )

556 tube["groundwaterMonitoringWell"] = bro_id

557 tube["tubeNumber"] = tube_number

558

559 tubes.append(tube)

560

561 if index is None:

562 index = ["groundwaterMonitoringWell", "tubeNumber"]

563 gdf = bro.objects_to_gdf(tubes, index=index)

564

565 gdf = gdf.sort_index()

566 return gdf

567

568

569def get_data_in_extent(

570 extent,

571 kind="gld",

572 tmin=None,

573 tmax=None,

574 combine=None,

575 index=None,

576 as_csv=False,

577 qualifier=None,

578 to_zip=None,

579 to_path=None,

580 redownload=False,

581 silent=False,

582 continue_on_error=False,

583 sort=True,

584 drop_duplicates=True,

585 progress_callback=None,

586):

587 """

588 Retrieve metadata and observations within a specified spatial extent.

589

590 This function fetches monitoring well characteristics, groundwater observations,

591 and tube properties within the given spatial extent. It can combine the data

592 for specific observation types and return either individual dataframes or a

593 combined dataframe.

594

595 Parameters

596 ----------

597 extent : str or sequence

598 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data.

599 kind : str, optional

600 The type of observations to retrieve. Valid values are {'gld', 'gar'} for

601 groundwater level dossier or groundwater analysis report. When kind is None, no

602 observations are downloaded. Defaults to 'gld'.

603 tmin : str or datetime, optional

604 The minimum time for filtering observations. Defaults to None.

605 tmax : str or datetime, optional

606 The maximum time for filtering observations. Defaults to None.

607 combine : bool, optional

608 If True, combines the metadata, tube properties, and observations into a single

609 dataframe. Defaults to False, which will change to True in a future version.

610 index : str, optional

611 The column to use for indexing in the resulting dataframe. Defaults to None.

612 as_csv : bool, optional

613 If True, the measurement data is requested as CSV files instead of XML files

614 (only supported for 'gld'). Defaults to False.

615 qualifier : str or list of str, optional

616 A string or list of strings used to filter the observations. Only valid if

617 `kind` is 'gld'. Defaults to None.

618 to_path : str, optional

619 If not None, save the downloaded files in the directory named to_path. The

620 default is None.

621 to_zip : str, optional

622 If not None, save the downloaded files in a zip-file named to_zip. The default

623 is None.

624 redownload : bool, optional

625 When downloaded files exist in to_path or to_zip, read from these files when

626 redownload is False. If redownload is True, download the data again from the

627 BRO-server. The default is False.

628 silent : bool, optional

629 If True, suppresses progress logging. Defaults to False.

630 continue_on_error : bool, optional

631 If True, continue after an error occurs during downloading or processing of

632 individual observation data. Defaults to False.

633 sort : bool, optional

634 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.

635 drop_duplicates : bool, optional

636 If True, drop duplicate observations based on their timestamp. Only used if

637 `kind` is 'gld'. Defaults to True.

638 progress_callback : function, optional

639 A callback function that takes two arguments (current, total) to report

640 progress. If None, no progress reporting is done. Defaults to None.

641

642 Returns

643 -------

644 gdf : pd.DataFrame

645 A dataframe containing tube properties and metadata within the specified extent.

646

647 obs_df : pd.DataFrame, optional

648 A dataframe containing the observations for the specified wells. Returned only if

649 `combine` is False.

650

651 Raises

652 ------

653 Exception

654 If `as_csv=True` and `kind` is not 'gld', or if other parameters are invalid.

655 """

656 if combine is None:

657 logger.warning(

658 "The default of `combine=False` will change to True in a future version of "

659 "brodata. Pass combine=False to retain current behavior or combine=True to "

660 "adopt the future default and silence this warning."

661 )

662 combine = False

663 if isinstance(extent, str):

664 if to_zip is not None:

665 raise (Exception("When extent is a string, do not supply to_zip"))

666 to_zip = extent

667 extent = None

668 redownload = False

669

670 zipfile = None

671 _files = None

672 if to_zip is not None:

673 if not redownload and os.path.isfile(to_zip):

674 logger.info(f"Reading data from {to_zip}")

675 zipfile = ZipFile(to_zip)

676 else:

677 if to_path is None:

678 to_path = os.path.splitext(to_zip)[0]

679 remove_path_again = not os.path.isdir(to_path)

680 _files = []

681

682 if to_path is not None and not os.path.isdir(to_path):

683 os.makedirs(to_path)

684

685 # get gwm characteristics

686 logger.info(f"Getting gmw-characteristics in extent: {extent}")

687

688 to_file = util._get_to_file("gmw_characteristics.xml", zipfile, to_path, _files)

689 gmw = get_characteristics(

690 extent=extent, to_file=to_file, redownload=redownload, zipfile=zipfile

691 )

692

693 if kind is None:

694 obs_df = pd.DataFrame()

695 combine = False

696 else:

697 # get observations

698 logger.info(f"Downloading {kind}-observations")

699 obs_df = get_observations(

700 gmw,

701 kind=kind,

702 tmin=tmin,

703 tmax=tmax,

704 as_csv=as_csv,

705 qualifier=qualifier,

706 to_path=to_path,

707 redownload=redownload,

708 zipfile=zipfile,

709 _files=_files,

710 silent=silent,

711 continue_on_error=continue_on_error,

712 sort=sort,

713 drop_duplicates=drop_duplicates,

714 progress_callback=progress_callback,

715 )

716

717 # only keep wells with observations

718 if "groundwaterMonitoringWell" in obs_df.columns:

719 gmw = gmw[gmw.index.isin(obs_df["groundwaterMonitoringWell"])]

720

721 logger.info("Downloading tube-properties")

722

723 # get the properties of the monitoringTubes

724 gdf = get_tube_gdf_from_characteristics(

725 gmw,

726 index=index,

727 to_path=to_path,

728 redownload=redownload,

729 zipfile=zipfile,

730 _files=_files,

731 silent=silent,

732 )

733

734 if zipfile is not None:

735 zipfile.close()

736 if zipfile is None and to_zip is not None:

737 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)

738

739 if not obs_df.empty:

740 obs_df = obs_df.set_index(

741 ["groundwaterMonitoringWell", "tubeNumber"]

742 ).sort_index()

743

744 if combine and kind in ["gld", "gar"]:

745 if kind == "gld":

746 idcol = "groundwaterLevelDossier"

747 elif kind == "gar":

748 idcol = "groundwaterAnalysisReport"

749 datcol = _get_data_column(kind)

750

751 logger.info("Combining well-properties, tube-properties and observations")

752

753 data = {}

754 ids = {}

755 for index in gdf.index:

756 if index not in obs_df.index:

757 continue

758

759 data[index] = _combine_observations(

760 obs_df.loc[[index], datcol], kind=kind, bro_id=f"{index[0]}_{index[1]}"

761 )

762 ids[index] = list(obs_df.loc[[index], "broId"])

763 gdf[datcol] = data

764 gdf[idcol] = ids

765 return gdf

766 else:

767 if kind is None:

768 return gdf

769 else:

770 return gdf, obs_df

771

772

773def _get_data_column(kind):

774 if kind == "gld":

775 return "observation"

776 elif kind == "gar":

777 return "laboratoryAnalysis"

778 else:

779 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))

780

781

782def _get_empty_observation_df(kind):

783 if kind == "gld":

784 return gld._get_empty_observation_df()

785 elif kind == "gar":

786 return gar._get_empty_observation_df()

787 else:

788 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))

789

790

791def _combine_observations(

792 observations, kind, bro_id=None, sort=True, drop_duplicates=True

793):

794 obslist = []

795 for observation in observations:

796 if not isinstance(observation, pd.DataFrame) or observation.empty:

797 continue

798 obslist.append(observation)

799 if len(obslist) == 0:

800 return _get_empty_observation_df(kind)

801 else:

802 df = pd.concat(obslist).sort_index()

803 if kind == "gld":

804 if sort:

805 df = gld.sort_observations(df)

806 if drop_duplicates:

807 df = gld.drop_duplicate_observations(df, bro_id=bro_id)

808 return df

809

810

811def get_tube_gdf_from_characteristics(characteristics_gdf, **kwargs):

812 """

813 Generate a GeoDataFrame of tube properties based on well characteristics.

814

815 This function downloads the GroundwaterMonitoringWell-objects to retreive data about

816 the groundwater monitoring tubes, and combined this information in a new

817 GeoDataFrame.

818

819 Parameters

820 ----------

821 characteristics_gdf : gpd.GeoDataFrame

822 GeoDataFrame of well characteristics with bro-ids of the

823 GroundwaterMonitoringWells as the index, retreived with

824 `brodata.gmw.get_characteristics`.

825 index : str or list of str, optional

826 Column(s) to use as the index for the resulting GeoDataFrame. Defaults

827 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.

828

829 Returns

830 -------

831 gpd.GeoDataFrame

832 GeoDataFrame of combined well and tube properties

833 """

834 bro_ids = characteristics_gdf.index.unique()

835 return get_tube_gdf_from_bro_ids(bro_ids, **kwargs)

836

837

838def get_tube_gdf_from_bro_ids(

839 bro_ids,

840 index=None,

841 **kwargs,

842):

843 """

844 Generate a GeoDataFrame of tube properties based on an iterable of gmw bro-ids.

845

846 This function downloads the GroundwaterMonitoringWell-objects to retreive data about

847 the groundwater monitoring tubes, and combined this information in a new

848 GeoDataFrame.

849

850 Parameters

851 ----------

852 bro_ids : gpd.GeoDataFrame

853 GeoDataFrame of well characteristics with bro-ids of the

854 GroundwaterMonitoringWells as the index, retreived with

855 `brodata.gmw.get_characteristics`.

856 index : str or list of str, optional

857 Column(s) to use as the index for the resulting GeoDataFrame. Defaults

858 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.

859

860 Returns

861 -------

862 gpd.GeoDataFrame

863 GeoDataFrame of combined well and tube properties

864 """

865 desc = "Downloading Groundwater Monitoring Wells"

866 gmws = bro._get_data_for_bro_ids(

867 GroundwaterMonitoringWell, bro_ids, desc=desc, **kwargs

868 )

869 gdf = get_tube_gdf(gmws, index=index)

870 return gdf

871

872

873cl = GroundwaterMonitoringWell

874

875get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl)

876get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__

877

878get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl)

879get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__

880

881get_characteristics = partial(bro._get_characteristics, cl)

882get_characteristics.__doc__ = bro._get_characteristics.__doc__

Coverage for brodata / gmw.py: 74%

331 statements