Coverage for brodata / gmw.py: 74%

331 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-13 12:57 +0000

1import json 

2import logging 

3import os 

4from functools import partial 

5from zipfile import ZipFile 

6 

7import numpy as np 

8import pandas as pd 

9 

10from . import bro, gld, gar, frd, gmn, util 

11 

12logger = logging.getLogger(__name__) 

13 

14 

15def get_well_code(bro_id): 

16 """ 

17 Retrieve the well code based on a given BRO-ID and return it as plain text. 

18 

19 This function sends a GET request to fetch the well code associated with the 

20 specified BRO-ID. If the request fails, it logs an error message and returns `None`. 

21 

22 Parameters 

23 ---------- 

24 bro_id : str 

25 The BRO-ID for which to retrieve the associated well code. 

26 

27 Returns 

28 ------- 

29 well_code : str or None 

30 The well code as plain text if the request is successful. Returns `None` if 

31 the request fails. 

32 """ 

33 

34 url = f"{GroundwaterMonitoringWell._rest_url}/well-code/{bro_id}" 

35 req = bro.util.get_with_rate_limit(url) 

36 if req.status_code > 200: 

37 logger.error(req.reason) 

38 return 

39 well_code = req.text 

40 return well_code 

41 

42 

43class GroundwaterMonitoringWell(bro.FileOrUrl): 

44 """ 

45 Class to represent a Groundwater Monitoring Well (GMW) from the BRO. 

46 

47 This class parses XML data related to a groundwater monitoring well (GMW). 

48 It extracts details such as location, monitoring tube data, and well history 

49 and stores these in attributes. 

50 

51 Notes 

52 ----- 

53 This class extends `bro.XmlFileOrUrl` and is designed to work with GMW XML data, 

54 either from a file or URL. 

55 """ 

56 

57 _rest_url = "https://publiek.broservices.nl/gm/gmw/v1" 

58 _xmlns = "http://www.broservices.nl/xsd/dsgmw/1.1" 

59 _char = "GMW_C" 

60 

61 def _read_contents(self, tree): 

62 ns = { 

63 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0", 

64 "xmlns": self._xmlns, 

65 } 

66 

67 object_names = ["GMW_PO", "GMW_PPO", "BRO_DO"] 

68 gmw = self._get_main_object(tree, object_names, ns) 

69 

70 for key in gmw.attrib: 

71 setattr(self, key.split("}", 1)[1], gmw.attrib[key]) 

72 for child in gmw: 

73 key = self._get_tag(child) 

74 if len(child) == 0: 

75 setattr(self, key, child.text) 

76 elif key == "standardizedLocation": 

77 self._read_standardized_location(child) 

78 elif key == "deliveredLocation": 

79 self._read_delivered_location(child) 

80 elif key == "wellHistory": 

81 for grandchild in child: 

82 key = self._get_tag(grandchild) 

83 if key in ["wellConstructionDate", "wellRemovalDate"]: 

84 setattr(self, key, self._read_date(grandchild)) 

85 elif key == "intermediateEvent": 

86 if not hasattr(self, key): 

87 self.intermediateEvent = [] 

88 event = self._read_intermediate_event(grandchild) 

89 self.intermediateEvent.append(event) 

90 else: 

91 self._warn_unknown_tag(key) 

92 

93 elif key in ["deliveredVerticalPosition", "registrationHistory"]: 

94 to_float = ["offset", "groundLevelPosition"] 

95 self._read_children_of_children(child, to_float=to_float) 

96 elif key in ["monitoringTube"]: 

97 if not hasattr(self, key): 

98 self.monitoringTube = [] 

99 tube = {} 

100 to_float = [ 

101 "tubeTopDiameter", 

102 "tubeTopPosition", 

103 "screenLength", 

104 "screenTopPosition", 

105 "screenBottomPosition", 

106 "plainTubePartLength", 

107 ] 

108 self._read_children_of_children(child, tube, to_float=to_float) 

109 self.monitoringTube.append(tube) 

110 else: 

111 self._warn_unknown_tag(key) 

112 if hasattr(self, "monitoringTube"): 

113 self.monitoringTube = pd.DataFrame(self.monitoringTube) 

114 tubeNumber = self.monitoringTube["tubeNumber"].astype(int) 

115 self.monitoringTube["tubeNumber"] = tubeNumber 

116 self.monitoringTube = self.monitoringTube.set_index("tubeNumber") 

117 if hasattr(self, "intermediateEvent"): 

118 self.intermediateEvent = pd.DataFrame(self.intermediateEvent) 

119 

120 def _read_intermediate_event(self, node): 

121 d = {} 

122 for child in node: 

123 key = self._get_tag(child) 

124 if key == "eventName": 

125 d[key] = child.text 

126 elif key == "eventDate": 

127 d[key] = self._read_date(child) 

128 else: 

129 self._warn_unknown_tag(key) 

130 return d 

131 

132 

133def get_observations( 

134 bro_ids, 

135 kind="gld", 

136 drop_references=True, 

137 silent=False, 

138 tmin=None, 

139 tmax=None, 

140 as_csv=False, 

141 tube_number=None, 

142 status=None, 

143 observation_type=None, 

144 qualifier=None, 

145 to_path=None, 

146 to_zip=None, 

147 redownload=False, 

148 zipfile=None, 

149 continue_on_error=False, 

150 sort=True, 

151 drop_duplicates=True, 

152 progress_callback=None, 

153 _files=None, 

154): 

155 """ 

156 Retrieve groundwater observations for the specified monitoring wells (bro_ids). 

157 

158 This function fetches groundwater data for monitoring wells based on the provided 

159 parameters. It supports different types of observations, allows filtering by tube 

160 number, and can request the data in CSV format for groundwater level observations. 

161 

162 Parameters 

163 ---------- 

164 bro_ids : str or list or pd.DataFrame 

165 The BRO IDs of the monitoring wells for which to retrieve the data. If a 

166 DataFrame is provided, its index is used as the list of BRO IDs. 

167 kind : str, optional 

168 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}. 

169 Defaults to 'gld' (groundwater level dossier). 

170 drop_references : bool or list of str, optional 

171 Specifies whether to drop reference fields in the returned data. Defaults to True, 

172 in which case 'gmnReferences', 'gldReferences', and 'garReferences' are removed. 

173 silent : bool, optional 

174 If True, suppresses progress logging. Defaults to False. 

175 tmin : str or datetime, optional 

176 The minimum time filter for the observations. Defaults to None. 

177 tmax : str or datetime, optional 

178 The maximum time filter for the observations. Defaults to None. 

179 as_csv : bool, optional 

180 If True, requests the observations as CSV files instead of XML-files. Only valid 

181 if `kind` is 'gld'. Defaults to False. 

182 tube_number : int, optional 

183 Filters observations to a specific tube number. Defaults to None. 

184 status : str, optional 

185 A status string for additional filtering. Possible values are 

186 "volledigBeoordeeld", "voorlopig" and "onbekend" Only valid if `kind` is 'gld'. 

187 Defaults to None. 

188 observation_type : str, optional 

189 An observation type string for additional filtering. Possible values are 

190 "reguliereMeting" and "controleMeting". Only valid if `kind` is 'gld'. Defaults 

191 to None. 

192 qualifier : str or list of str, optional 

193 A qualifier string for additional filtering. Only valid if `kind` is 'gld'. 

194 Defaults to None. 

195 to_path : str, optional 

196 If not None, save the downloaded files in the directory named to_path. The 

197 default is None. 

198 to_zip : str, optional 

199 If not None, save the downloaded files in a zip-file named to_zip. The default 

200 is None. 

201 redownload : bool, optional 

202 When downloaded files exist in to_path or to_zip, read from these files when 

203 redownload is False. If redownload is True, download the data again from the 

204 BRO-servers. The default is False. 

205 zipfile : zipfile.ZipFile, optional 

206 A zipfile-object. When not None, zipfile is used to read previously downloaded 

207 data from. The default is None. 

208 continue_on_error : bool, optional 

209 If True, continue after an error occurs during downloading or processing of 

210 individual observation data. Defaults to False. 

211 sort : bool, optional 

212 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True. 

213 drop_duplicates : bool, optional 

214 If True, drop duplicate observations based on their timestamp. Only used if 

215 `kind` is 'gld'. Defaults to True. 

216 progress_callback : function, optional 

217 A callback function that takes two arguments (current, total) to report 

218 progress. If None, no progress reporting is done. Defaults to None. 

219 

220 

221 Returns 

222 ------- 

223 pd.DataFrame 

224 A DataFrame containing the observations for the specified monitoring wells, 

225 where each row corresponds to an individual observation. 

226 

227 Raises 

228 ------ 

229 Exception 

230 If `as_csv=True` and `kind` is not 'gld', or if `qualifier` is provided for 

231 a kind other than 'gld'. 

232 """ 

233 tubes = [] 

234 

235 if isinstance(bro_ids, str): 

236 bro_ids = [bro_ids] 

237 silent = True 

238 

239 if isinstance(bro_ids, pd.DataFrame): 

240 bro_ids = bro_ids.index 

241 

242 if isinstance(drop_references, bool): 

243 if drop_references: 

244 drop_references = [ 

245 "gmnReferences", 

246 "gldReferences", 

247 "garReferences", 

248 # "frdReferences", 

249 ] 

250 else: 

251 drop_references = [] 

252 

253 if to_zip is not None: 

254 if not redownload and os.path.isfile(to_zip): 

255 raise (NotImplementedError("Redownload=False is not suppported yet")) 

256 if to_path is None: 

257 to_path = os.path.splitext(to_zip)[0] 

258 remove_path_again = not os.path.isdir(to_path) 

259 if _files is None: 

260 _files = [] 

261 

262 desc = f"Downloading {kind}-observations" 

263 if as_csv and kind != "gld": 

264 raise (Exception("as_csv=True is only supported for kind=='gld'")) 

265 if qualifier is not None and kind != "gld": 

266 raise (Exception("A qualifier is only supported for kind=='gld'")) 

267 if to_path is not None and not os.path.isdir(to_path): 

268 os.makedirs(to_path) 

269 

270 if kind == "gld": 

271 meas_cl = gld.GroundwaterLevelDossier 

272 elif kind == "gar": 

273 meas_cl = gar.GroundwaterAnalysisReport 

274 elif kind == "frd": 

275 meas_cl = frd.FormationResistanceDossier 

276 elif kind == "gmn": 

277 meas_cl = gmn.GroundwaterMonitoringNetwork 

278 else: 

279 raise (ValueError(f"kind='{kind}' not supported")) 

280 

281 gld_kwargs = _get_gld_kwargs( 

282 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates 

283 ) 

284 

285 for igmw, bro_id in enumerate( 

286 util.tqdm(np.unique(bro_ids), disable=silent, desc=desc) 

287 ): 

288 to_rel_file = util._get_to_file( 

289 f"gmw_relations_{bro_id}.json", zipfile, to_path, _files 

290 ) 

291 if zipfile is None and ( 

292 redownload or to_rel_file is None or not os.path.isfile(to_rel_file) 

293 ): 

294 url = f"https://publiek.broservices.nl/gm/v1/gmw-relations/{bro_id}" 

295 req = bro.util.get_with_rate_limit(url) 

296 if req.status_code > 200: 

297 logger.error(req.json()["errors"][0]["message"]) 

298 return 

299 if to_rel_file is not None: 

300 with open(to_rel_file, "w") as f: 

301 f.write(req.text) 

302 data = req.json() 

303 else: 

304 if zipfile is not None: 

305 with zipfile.open(to_rel_file) as f: 

306 data = json.load(f) 

307 else: 

308 with open(to_rel_file) as f: 

309 data = json.load(f) 

310 for tube_ref in data["monitoringTubeReferences"]: 

311 tube_ref["groundwaterMonitoringWell"] = data["gmwBroId"] 

312 if tube_number is not None: 

313 if tube_ref["tubeNumber"] != tube_number: 

314 continue 

315 ref_key = f"{kind}References" 

316 for ref in tube_ref[ref_key]: 

317 obsdata = _download_observations_for_bro_id( 

318 ref["broId"], 

319 meas_cl, 

320 as_csv, 

321 zipfile, 

322 to_path, 

323 _files, 

324 gld_kwargs, 

325 redownload=redownload, 

326 continue_on_error=continue_on_error, 

327 ) 

328 if as_csv: 

329 tube_ref["observation"] = obsdata 

330 for key in drop_references: 

331 if key in tube_ref: 

332 tube_ref.pop(key) 

333 else: 

334 logger.warning( 

335 "{} not defined for {}, filter {}".format( 

336 key, 

337 tube_ref["groundwaterMonitoringWell"], 

338 tube_ref["tubeNumber"], 

339 ) 

340 ) 

341 

342 tube_ref["broId"] = ref["broId"] 

343 tubes.append(tube_ref) 

344 else: 

345 tubes.append(obsdata.to_dict()) 

346 

347 if progress_callback is not None: 

348 progress_callback(igmw + 1, len(bro_ids)) 

349 if to_zip is not None: 

350 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path) 

351 return pd.DataFrame(tubes) 

352 

353 

354def _download_observations_for_bro_id( 

355 bro_id, 

356 meas_cl, 

357 as_csv, 

358 zipfile, 

359 to_path, 

360 _files, 

361 gld_kwargs, 

362 redownload=False, 

363 continue_on_error=False, 

364): 

365 if as_csv: 

366 fname = f"{bro_id}.csv" 

367 observatietype = None 

368 if "status" in gld_kwargs and gld_kwargs["status"] == "voorlopig": 

369 observatietype = "regulier_voorlopig" 

370 elif "status" in gld_kwargs and gld_kwargs["status"] == "volledigBeoordeeld": 

371 observatietype = "regulier_beoordeeld" 

372 elif "status" in gld_kwargs and gld_kwargs["status"] == "onbekend": 

373 observatietype = "onbekend" 

374 elif ( 

375 "observation_type" in gld_kwargs 

376 and gld_kwargs["observation_type"] == "controleMeting" 

377 ): 

378 observatietype = "controle" 

379 else: 

380 fname = f"{bro_id}.xml" 

381 to_file = util._get_to_file(fname, zipfile, to_path, _files) 

382 if zipfile is None and ( 

383 redownload or to_file is None or not os.path.isfile(to_file) 

384 ): # download the data 

385 if as_csv: 

386 try: 

387 data = gld.get_objects_as_csv( 

388 bro_id, 

389 observatietype=observatietype, 

390 to_file=to_file, 

391 **gld_kwargs, 

392 ) 

393 except Exception as e: 

394 if not continue_on_error: 

395 raise e 

396 logger.error( 

397 "Error processing %s csv for broid %s: %s", 

398 meas_cl.__name__, 

399 bro_id, 

400 e, 

401 ) 

402 else: 

403 try: 

404 data = meas_cl.from_bro_id(bro_id, to_file=to_file, **gld_kwargs) 

405 except Exception as e: 

406 if not continue_on_error: 

407 raise e 

408 logger.error( 

409 "Error processing %s xml for broid %s: %s", 

410 meas_cl.__name__, 

411 bro_id, 

412 e, 

413 ) 

414 else: 

415 # read the data from a file 

416 if as_csv: 

417 if zipfile is not None: 

418 to_file = zipfile.open(to_file) 

419 data = gld.read_gld_csv( 

420 to_file, 

421 bro_id, 

422 observatietype=observatietype, 

423 **gld_kwargs, 

424 ) 

425 else: 

426 data = meas_cl(to_file, zipfile=zipfile, **gld_kwargs) 

427 return data 

428 

429 

430def _get_gld_kwargs( 

431 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates 

432): 

433 gld_kwargs = {} 

434 if kind == "gld": 

435 if tmin is not None: 

436 gld_kwargs["tmin"] = tmin 

437 if tmax is not None: 

438 gld_kwargs["tmax"] = tmax 

439 if qualifier is not None: 

440 gld_kwargs["qualifier"] = qualifier 

441 if status is not None: 

442 gld_kwargs["status"] = status 

443 if observation_type is not None: 

444 gld_kwargs["observation_type"] = observation_type 

445 gld_kwargs["sort"] = sort 

446 gld_kwargs["drop_duplicates"] = drop_duplicates 

447 return gld_kwargs 

448 

449 

450def get_tube_observations( 

451 gwm_id, tube_number, kind="gld", sort=True, drop_duplicates=True, **kwargs 

452): 

453 """ 

454 Get the observations of a single groundwater monitoring tube. 

455 

456 Parameters 

457 ---------- 

458 gwm_id : str 

459 The bro_id of the groundwater monitoring well. 

460 tube_number : int 

461 The tube number. 

462 kind : str, optional 

463 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}. 

464 Defaults to 'gld' (groundwater level dossier). 

465 sort : bool, optional 

466 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True. 

467 drop_duplicates : bool, optional 

468 If True, drop duplicate observations based on their timestamp. Only used if 

469 `kind` is 'gld'. Defaults to True. 

470 **kwargs : dict 

471 Kwargs are passed onto get_observations. 

472 

473 Returns 

474 ------- 

475 pd.DataFrame 

476 A DataFrame containing the observations. 

477 

478 """ 

479 # sorting and dropping duplicates is done after combining the observations 

480 # to avoid doing this multiple times 

481 df = get_observations( 

482 gwm_id, 

483 tube_number=tube_number, 

484 kind=kind, 

485 sort=False, 

486 drop_duplicates=False, 

487 **kwargs, 

488 ) 

489 if df.empty: 

490 return _get_empty_observation_df(kind) 

491 else: 

492 data_column = _get_data_column(kind) 

493 return _combine_observations( 

494 df[data_column], 

495 kind=kind, 

496 bro_id=f"{gwm_id}_{tube_number}", 

497 sort=sort, 

498 drop_duplicates=drop_duplicates, 

499 ) 

500 

501 

502def get_tube_gdf(gmws, index=None): 

503 """ 

504 Create a GeoDataFrame of tube properties combined with well metadata. 

505 

506 This function processes a DataFrame of well properties, extracts the relevant 

507 tube information, and combines them into a GeoDataFrame. The resulting GeoDataFrame 

508 contains metadata for each monitoring well and its associated tubes, with optional 

509 spatial information (coordinates) and relevant physical properties. 

510 

511 Parameters 

512 ---------- 

513 gmws : list or dict of GroundwaterMonitoringWell, or pd.DataFrame Well and tube data 

514 in one of the following formats: a list of `GroundwaterMonitoringWell` objects, 

515 a dictionary of these objects, or a DataFrame with the bro-ids of the 

516 GroundwaterMonitoringWells as the index and the column monitoringTube containing 

517 tube properties. 

518 index : str or list of str, optional 

519 The column or columns to use for indexing the resulting GeoDataFrame. Defaults 

520 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided. 

521 

522 Returns 

523 ------- 

524 gdf : gpd.GeoDataFrame 

525 A GeoDataFrame containing the combined well and tube properties, with the 

526 specified index and optional geometry (spatial data) if 'x' and 'y' columns are 

527 present. 

528 

529 Notes 

530 ----- 

531 If 'x' and 'y' columns are present, the function creates a GeoDataFrame with point 

532 geometries based on these coordinates, assuming the EPSG:28992 (Dutch National 

533 Coordinate System) CRS. 

534 """ 

535 if isinstance(gmws, list): 

536 gmws = pd.DataFrame([x.to_dict() for x in gmws]) 

537 if "broId" in gmws.columns: 

538 gmws = gmws.set_index("broId") 

539 elif isinstance(gmws, dict): 

540 gmws = pd.DataFrame([gmws[x].to_dict() for x in gmws]) 

541 if "broId" in gmws.columns: 

542 gmws = gmws.set_index("broId") 

543 tubes = [] 

544 for bro_id in gmws.index: 

545 tube_df = gmws.loc[bro_id, "monitoringTube"] 

546 if not isinstance(tube_df, pd.DataFrame): 

547 continue 

548 for tube_number in tube_df.index: 

549 # combine properties of well and tube 

550 tube = pd.concat( 

551 ( 

552 gmws.loc[bro_id].drop("monitoringTube"), 

553 tube_df.loc[tube_number], 

554 ) 

555 ) 

556 tube["groundwaterMonitoringWell"] = bro_id 

557 tube["tubeNumber"] = tube_number 

558 

559 tubes.append(tube) 

560 

561 if index is None: 

562 index = ["groundwaterMonitoringWell", "tubeNumber"] 

563 gdf = bro.objects_to_gdf(tubes, index=index) 

564 

565 gdf = gdf.sort_index() 

566 return gdf 

567 

568 

569def get_data_in_extent( 

570 extent, 

571 kind="gld", 

572 tmin=None, 

573 tmax=None, 

574 combine=None, 

575 index=None, 

576 as_csv=False, 

577 qualifier=None, 

578 to_zip=None, 

579 to_path=None, 

580 redownload=False, 

581 silent=False, 

582 continue_on_error=False, 

583 sort=True, 

584 drop_duplicates=True, 

585 progress_callback=None, 

586): 

587 """ 

588 Retrieve metadata and observations within a specified spatial extent. 

589 

590 This function fetches monitoring well characteristics, groundwater observations, 

591 and tube properties within the given spatial extent. It can combine the data 

592 for specific observation types and return either individual dataframes or a 

593 combined dataframe. 

594 

595 Parameters 

596 ---------- 

597 extent : str or sequence 

598 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data. 

599 kind : str, optional 

600 The type of observations to retrieve. Valid values are {'gld', 'gar'} for 

601 groundwater level dossier or groundwater analysis report. When kind is None, no 

602 observations are downloaded. Defaults to 'gld'. 

603 tmin : str or datetime, optional 

604 The minimum time for filtering observations. Defaults to None. 

605 tmax : str or datetime, optional 

606 The maximum time for filtering observations. Defaults to None. 

607 combine : bool, optional 

608 If True, combines the metadata, tube properties, and observations into a single 

609 dataframe. Defaults to False, which will change to True in a future version. 

610 index : str, optional 

611 The column to use for indexing in the resulting dataframe. Defaults to None. 

612 as_csv : bool, optional 

613 If True, the measurement data is requested as CSV files instead of XML files 

614 (only supported for 'gld'). Defaults to False. 

615 qualifier : str or list of str, optional 

616 A string or list of strings used to filter the observations. Only valid if 

617 `kind` is 'gld'. Defaults to None. 

618 to_path : str, optional 

619 If not None, save the downloaded files in the directory named to_path. The 

620 default is None. 

621 to_zip : str, optional 

622 If not None, save the downloaded files in a zip-file named to_zip. The default 

623 is None. 

624 redownload : bool, optional 

625 When downloaded files exist in to_path or to_zip, read from these files when 

626 redownload is False. If redownload is True, download the data again from the 

627 BRO-server. The default is False. 

628 silent : bool, optional 

629 If True, suppresses progress logging. Defaults to False. 

630 continue_on_error : bool, optional 

631 If True, continue after an error occurs during downloading or processing of 

632 individual observation data. Defaults to False. 

633 sort : bool, optional 

634 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True. 

635 drop_duplicates : bool, optional 

636 If True, drop duplicate observations based on their timestamp. Only used if 

637 `kind` is 'gld'. Defaults to True. 

638 progress_callback : function, optional 

639 A callback function that takes two arguments (current, total) to report 

640 progress. If None, no progress reporting is done. Defaults to None. 

641 

642 Returns 

643 ------- 

644 gdf : pd.DataFrame 

645 A dataframe containing tube properties and metadata within the specified extent. 

646 

647 obs_df : pd.DataFrame, optional 

648 A dataframe containing the observations for the specified wells. Returned only if 

649 `combine` is False. 

650 

651 Raises 

652 ------ 

653 Exception 

654 If `as_csv=True` and `kind` is not 'gld', or if other parameters are invalid. 

655 """ 

656 if combine is None: 

657 logger.warning( 

658 "The default of `combine=False` will change to True in a future version of " 

659 "brodata. Pass combine=False to retain current behavior or combine=True to " 

660 "adopt the future default and silence this warning." 

661 ) 

662 combine = False 

663 if isinstance(extent, str): 

664 if to_zip is not None: 

665 raise (Exception("When extent is a string, do not supply to_zip")) 

666 to_zip = extent 

667 extent = None 

668 redownload = False 

669 

670 zipfile = None 

671 _files = None 

672 if to_zip is not None: 

673 if not redownload and os.path.isfile(to_zip): 

674 logger.info(f"Reading data from {to_zip}") 

675 zipfile = ZipFile(to_zip) 

676 else: 

677 if to_path is None: 

678 to_path = os.path.splitext(to_zip)[0] 

679 remove_path_again = not os.path.isdir(to_path) 

680 _files = [] 

681 

682 if to_path is not None and not os.path.isdir(to_path): 

683 os.makedirs(to_path) 

684 

685 # get gwm characteristics 

686 logger.info(f"Getting gmw-characteristics in extent: {extent}") 

687 

688 to_file = util._get_to_file("gmw_characteristics.xml", zipfile, to_path, _files) 

689 gmw = get_characteristics( 

690 extent=extent, to_file=to_file, redownload=redownload, zipfile=zipfile 

691 ) 

692 

693 if kind is None: 

694 obs_df = pd.DataFrame() 

695 combine = False 

696 else: 

697 # get observations 

698 logger.info(f"Downloading {kind}-observations") 

699 obs_df = get_observations( 

700 gmw, 

701 kind=kind, 

702 tmin=tmin, 

703 tmax=tmax, 

704 as_csv=as_csv, 

705 qualifier=qualifier, 

706 to_path=to_path, 

707 redownload=redownload, 

708 zipfile=zipfile, 

709 _files=_files, 

710 silent=silent, 

711 continue_on_error=continue_on_error, 

712 sort=sort, 

713 drop_duplicates=drop_duplicates, 

714 progress_callback=progress_callback, 

715 ) 

716 

717 # only keep wells with observations 

718 if "groundwaterMonitoringWell" in obs_df.columns: 

719 gmw = gmw[gmw.index.isin(obs_df["groundwaterMonitoringWell"])] 

720 

721 logger.info("Downloading tube-properties") 

722 

723 # get the properties of the monitoringTubes 

724 gdf = get_tube_gdf_from_characteristics( 

725 gmw, 

726 index=index, 

727 to_path=to_path, 

728 redownload=redownload, 

729 zipfile=zipfile, 

730 _files=_files, 

731 silent=silent, 

732 ) 

733 

734 if zipfile is not None: 

735 zipfile.close() 

736 if zipfile is None and to_zip is not None: 

737 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path) 

738 

739 if not obs_df.empty: 

740 obs_df = obs_df.set_index( 

741 ["groundwaterMonitoringWell", "tubeNumber"] 

742 ).sort_index() 

743 

744 if combine and kind in ["gld", "gar"]: 

745 if kind == "gld": 

746 idcol = "groundwaterLevelDossier" 

747 elif kind == "gar": 

748 idcol = "groundwaterAnalysisReport" 

749 datcol = _get_data_column(kind) 

750 

751 logger.info("Combining well-properties, tube-properties and observations") 

752 

753 data = {} 

754 ids = {} 

755 for index in gdf.index: 

756 if index not in obs_df.index: 

757 continue 

758 

759 data[index] = _combine_observations( 

760 obs_df.loc[[index], datcol], kind=kind, bro_id=f"{index[0]}_{index[1]}" 

761 ) 

762 ids[index] = list(obs_df.loc[[index], "broId"]) 

763 gdf[datcol] = data 

764 gdf[idcol] = ids 

765 return gdf 

766 else: 

767 if kind is None: 

768 return gdf 

769 else: 

770 return gdf, obs_df 

771 

772 

773def _get_data_column(kind): 

774 if kind == "gld": 

775 return "observation" 

776 elif kind == "gar": 

777 return "laboratoryAnalysis" 

778 else: 

779 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet")) 

780 

781 

782def _get_empty_observation_df(kind): 

783 if kind == "gld": 

784 return gld._get_empty_observation_df() 

785 elif kind == "gar": 

786 return gar._get_empty_observation_df() 

787 else: 

788 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet")) 

789 

790 

791def _combine_observations( 

792 observations, kind, bro_id=None, sort=True, drop_duplicates=True 

793): 

794 obslist = [] 

795 for observation in observations: 

796 if not isinstance(observation, pd.DataFrame) or observation.empty: 

797 continue 

798 obslist.append(observation) 

799 if len(obslist) == 0: 

800 return _get_empty_observation_df(kind) 

801 else: 

802 df = pd.concat(obslist).sort_index() 

803 if kind == "gld": 

804 if sort: 

805 df = gld.sort_observations(df) 

806 if drop_duplicates: 

807 df = gld.drop_duplicate_observations(df, bro_id=bro_id) 

808 return df 

809 

810 

811def get_tube_gdf_from_characteristics(characteristics_gdf, **kwargs): 

812 """ 

813 Generate a GeoDataFrame of tube properties based on well characteristics. 

814 

815 This function downloads the GroundwaterMonitoringWell-objects to retreive data about 

816 the groundwater monitoring tubes, and combined this information in a new 

817 GeoDataFrame. 

818 

819 Parameters 

820 ---------- 

821 characteristics_gdf : gpd.GeoDataFrame 

822 GeoDataFrame of well characteristics with bro-ids of the 

823 GroundwaterMonitoringWells as the index, retreived with 

824 `brodata.gmw.get_characteristics`. 

825 index : str or list of str, optional 

826 Column(s) to use as the index for the resulting GeoDataFrame. Defaults 

827 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided. 

828 

829 Returns 

830 ------- 

831 gpd.GeoDataFrame 

832 GeoDataFrame of combined well and tube properties 

833 """ 

834 bro_ids = characteristics_gdf.index.unique() 

835 return get_tube_gdf_from_bro_ids(bro_ids, **kwargs) 

836 

837 

838def get_tube_gdf_from_bro_ids( 

839 bro_ids, 

840 index=None, 

841 **kwargs, 

842): 

843 """ 

844 Generate a GeoDataFrame of tube properties based on an iterable of gmw bro-ids. 

845 

846 This function downloads the GroundwaterMonitoringWell-objects to retreive data about 

847 the groundwater monitoring tubes, and combined this information in a new 

848 GeoDataFrame. 

849 

850 Parameters 

851 ---------- 

852 bro_ids : gpd.GeoDataFrame 

853 GeoDataFrame of well characteristics with bro-ids of the 

854 GroundwaterMonitoringWells as the index, retreived with 

855 `brodata.gmw.get_characteristics`. 

856 index : str or list of str, optional 

857 Column(s) to use as the index for the resulting GeoDataFrame. Defaults 

858 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided. 

859 

860 Returns 

861 ------- 

862 gpd.GeoDataFrame 

863 GeoDataFrame of combined well and tube properties 

864 """ 

865 desc = "Downloading Groundwater Monitoring Wells" 

866 gmws = bro._get_data_for_bro_ids( 

867 GroundwaterMonitoringWell, bro_ids, desc=desc, **kwargs 

868 ) 

869 gdf = get_tube_gdf(gmws, index=index) 

870 return gdf 

871 

872 

873cl = GroundwaterMonitoringWell 

874 

875get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl) 

876get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__ 

877 

878get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl) 

879get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__ 

880 

881get_characteristics = partial(bro._get_characteristics, cl) 

882get_characteristics.__doc__ = bro._get_characteristics.__doc__