Coverage for brodata / gmw.py: 74%

332 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-20 14:37 +0000

1import json 

2import logging 

3import os 

4from functools import partial 

5from zipfile import ZipFile 

6 

7import numpy as np 

8import pandas as pd 

9import requests 

10 

11from . import bro, gld, gar, frd, gmn, util 

12 

13logger = logging.getLogger(__name__) 

14 

15 

16def get_well_code(bro_id): 

17 """ 

18 Retrieve the well code based on a given BRO-ID and return it as plain text. 

19 

20 This function sends a GET request to fetch the well code associated with the 

21 specified BRO-ID. If the request fails, it logs an error message and returns `None`. 

22 

23 Parameters 

24 ---------- 

25 bro_id : str 

26 The BRO-ID for which to retrieve the associated well code. 

27 

28 Returns 

29 ------- 

30 well_code : str or None 

31 The well code as plain text if the request is successful. Returns `None` if 

32 the request fails. 

33 """ 

34 

35 url = f"{GroundwaterMonitoringWell._rest_url}/well-code/{bro_id}" 

36 req = requests.get(url) 

37 if req.status_code > 200: 

38 logger.error(req.reason) 

39 return 

40 well_code = req.text 

41 return well_code 

42 

43 

44class GroundwaterMonitoringWell(bro.FileOrUrl): 

45 """ 

46 Class to represent a Groundwater Monitoring Well (GMW) from the BRO. 

47 

48 This class parses XML data related to a groundwater monitoring well (GMW). 

49 It extracts details such as location, monitoring tube data, and well history 

50 and stores these in attributes. 

51 

52 Notes 

53 ----- 

54 This class extends `bro.XmlFileOrUrl` and is designed to work with GMW XML data, 

55 either from a file or URL. 

56 """ 

57 

58 _rest_url = "https://publiek.broservices.nl/gm/gmw/v1" 

59 _xmlns = "http://www.broservices.nl/xsd/dsgmw/1.1" 

60 _char = "GMW_C" 

61 

62 def _read_contents(self, tree): 

63 ns = { 

64 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0", 

65 "xmlns": self._xmlns, 

66 } 

67 

68 object_names = ["GMW_PO", "GMW_PPO", "BRO_DO"] 

69 gmw = self._get_main_object(tree, object_names, ns) 

70 

71 for key in gmw.attrib: 

72 setattr(self, key.split("}", 1)[1], gmw.attrib[key]) 

73 for child in gmw: 

74 key = self._get_tag(child) 

75 if len(child) == 0: 

76 setattr(self, key, child.text) 

77 elif key == "standardizedLocation": 

78 self._read_standardized_location(child) 

79 elif key == "deliveredLocation": 

80 self._read_delivered_location(child) 

81 elif key == "wellHistory": 

82 for grandchild in child: 

83 key = self._get_tag(grandchild) 

84 if key in ["wellConstructionDate", "wellRemovalDate"]: 

85 setattr(self, key, self._read_date(grandchild)) 

86 elif key == "intermediateEvent": 

87 if not hasattr(self, key): 

88 self.intermediateEvent = [] 

89 event = self._read_intermediate_event(grandchild) 

90 self.intermediateEvent.append(event) 

91 else: 

92 self._warn_unknown_tag(key) 

93 

94 elif key in ["deliveredVerticalPosition", "registrationHistory"]: 

95 to_float = ["offset", "groundLevelPosition"] 

96 self._read_children_of_children(child, to_float=to_float) 

97 elif key in ["monitoringTube"]: 

98 if not hasattr(self, key): 

99 self.monitoringTube = [] 

100 tube = {} 

101 to_float = [ 

102 "tubeTopDiameter", 

103 "tubeTopPosition", 

104 "screenLength", 

105 "screenTopPosition", 

106 "screenBottomPosition", 

107 "plainTubePartLength", 

108 ] 

109 self._read_children_of_children(child, tube, to_float=to_float) 

110 self.monitoringTube.append(tube) 

111 else: 

112 self._warn_unknown_tag(key) 

113 if hasattr(self, "monitoringTube"): 

114 self.monitoringTube = pd.DataFrame(self.monitoringTube) 

115 tubeNumber = self.monitoringTube["tubeNumber"].astype(int) 

116 self.monitoringTube["tubeNumber"] = tubeNumber 

117 self.monitoringTube = self.monitoringTube.set_index("tubeNumber") 

118 if hasattr(self, "intermediateEvent"): 

119 self.intermediateEvent = pd.DataFrame(self.intermediateEvent) 

120 

121 def _read_intermediate_event(self, node): 

122 d = {} 

123 for child in node: 

124 key = self._get_tag(child) 

125 if key == "eventName": 

126 d[key] = child.text 

127 elif key == "eventDate": 

128 d[key] = self._read_date(child) 

129 else: 

130 self._warn_unknown_tag(key) 

131 return d 

132 

133 

134def get_observations( 

135 bro_ids, 

136 kind="gld", 

137 drop_references=True, 

138 silent=False, 

139 tmin=None, 

140 tmax=None, 

141 as_csv=False, 

142 tube_number=None, 

143 status=None, 

144 observation_type=None, 

145 qualifier=None, 

146 to_path=None, 

147 to_zip=None, 

148 redownload=False, 

149 zipfile=None, 

150 continue_on_error=False, 

151 sort=True, 

152 drop_duplicates=True, 

153 progress_callback=None, 

154 _files=None, 

155): 

156 """ 

157 Retrieve groundwater observations for the specified monitoring wells (bro_ids). 

158 

159 This function fetches groundwater data for monitoring wells based on the provided 

160 parameters. It supports different types of observations, allows filtering by tube 

161 number, and can request the data in CSV format for groundwater level observations. 

162 

163 Parameters 

164 ---------- 

165 bro_ids : str or list or pd.DataFrame 

166 The BRO IDs of the monitoring wells for which to retrieve the data. If a 

167 DataFrame is provided, its index is used as the list of BRO IDs. 

168 kind : str, optional 

169 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}. 

170 Defaults to 'gld' (groundwater level dossier). 

171 drop_references : bool or list of str, optional 

172 Specifies whether to drop reference fields in the returned data. Defaults to True, 

173 in which case 'gmnReferences', 'gldReferences', and 'garReferences' are removed. 

174 silent : bool, optional 

175 If True, suppresses progress logging. Defaults to False. 

176 tmin : str or datetime, optional 

177 The minimum time filter for the observations. Defaults to None. 

178 tmax : str or datetime, optional 

179 The maximum time filter for the observations. Defaults to None. 

180 as_csv : bool, optional 

181 If True, requests the observations as CSV files instead of XML-files. Only valid 

182 if `kind` is 'gld'. Defaults to False. 

183 tube_number : int, optional 

184 Filters observations to a specific tube number. Defaults to None. 

185 status : str, optional 

186 A status string for additional filtering. Possible values are 

187 "volledigBeoordeeld", "voorlopig" and "onbekend" Only valid if `kind` is 'gld'. 

188 Defaults to None. 

189 observation_type : str, optional 

190 An observation type string for additional filtering. Possible values are 

191 "reguliereMeting" and "controleMeting". Only valid if `kind` is 'gld'. Defaults 

192 to None. 

193 qualifier : str or list of str, optional 

194 A qualifier string for additional filtering. Only valid if `kind` is 'gld'. 

195 Defaults to None. 

196 to_path : str, optional 

197 If not None, save the downloaded files in the directory named to_path. The 

198 default is None. 

199 to_zip : str, optional 

200 If not None, save the downloaded files in a zip-file named to_zip. The default 

201 is None. 

202 redownload : bool, optional 

203 When downloaded files exist in to_path or to_zip, read from these files when 

204 redownload is False. If redownload is True, download the data again from the 

205 BRO-servers. The default is False. 

206 zipfile : zipfile.ZipFile, optional 

207 A zipfile-object. When not None, zipfile is used to read previously downloaded 

208 data from. The default is None. 

209 continue_on_error : bool, optional 

210 If True, continue after an error occurs during downloading or processing of 

211 individual observation data. Defaults to False. 

212 sort : bool, optional 

213 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True. 

214 drop_duplicates : bool, optional 

215 If True, drop duplicate observations based on their timestamp. Only used if 

216 `kind` is 'gld'. Defaults to True. 

217 progress_callback : function, optional 

218 A callback function that takes two arguments (current, total) to report 

219 progress. If None, no progress reporting is done. Defaults to None. 

220 

221 

222 Returns 

223 ------- 

224 pd.DataFrame 

225 A DataFrame containing the observations for the specified monitoring wells, 

226 where each row corresponds to an individual observation. 

227 

228 Raises 

229 ------ 

230 Exception 

231 If `as_csv=True` and `kind` is not 'gld', or if `qualifier` is provided for 

232 a kind other than 'gld'. 

233 """ 

234 tubes = [] 

235 

236 if isinstance(bro_ids, str): 

237 bro_ids = [bro_ids] 

238 silent = True 

239 

240 if isinstance(bro_ids, pd.DataFrame): 

241 bro_ids = bro_ids.index 

242 

243 if isinstance(drop_references, bool): 

244 if drop_references: 

245 drop_references = [ 

246 "gmnReferences", 

247 "gldReferences", 

248 "garReferences", 

249 # "frdReferences", 

250 ] 

251 else: 

252 drop_references = [] 

253 

254 if to_zip is not None: 

255 if not redownload and os.path.isfile(to_zip): 

256 raise (NotImplementedError("Redownload=False is not suppported yet")) 

257 if to_path is None: 

258 to_path = os.path.splitext(to_zip)[0] 

259 remove_path_again = not os.path.isdir(to_path) 

260 if _files is None: 

261 _files = [] 

262 

263 desc = f"Downloading {kind}-observations" 

264 if as_csv and kind != "gld": 

265 raise (Exception("as_csv=True is only supported for kind=='gld'")) 

266 if qualifier is not None and kind != "gld": 

267 raise (Exception("A qualifier is only supported for kind=='gld'")) 

268 if to_path is not None and not os.path.isdir(to_path): 

269 os.makedirs(to_path) 

270 

271 if kind == "gld": 

272 meas_cl = gld.GroundwaterLevelDossier 

273 elif kind == "gar": 

274 meas_cl = gar.GroundwaterAnalysisReport 

275 elif kind == "frd": 

276 meas_cl = frd.FormationResistanceDossier 

277 elif kind == "gmn": 

278 meas_cl = gmn.GroundwaterMonitoringNetwork 

279 else: 

280 raise (ValueError(f"kind='{kind}' not supported")) 

281 

282 gld_kwargs = _get_gld_kwargs( 

283 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates 

284 ) 

285 

286 for igmw, bro_id in enumerate( 

287 util.tqdm(np.unique(bro_ids), disable=silent, desc=desc) 

288 ): 

289 to_rel_file = util._get_to_file( 

290 f"gmw_relations_{bro_id}.json", zipfile, to_path, _files 

291 ) 

292 if zipfile is None and ( 

293 redownload or to_rel_file is None or not os.path.isfile(to_rel_file) 

294 ): 

295 url = f"https://publiek.broservices.nl/gm/v1/gmw-relations/{bro_id}" 

296 req = requests.get(url) 

297 if req.status_code > 200: 

298 logger.error(req.json()["errors"][0]["message"]) 

299 return 

300 if to_rel_file is not None: 

301 with open(to_rel_file, "w") as f: 

302 f.write(req.text) 

303 data = req.json() 

304 else: 

305 if zipfile is not None: 

306 with zipfile.open(to_rel_file) as f: 

307 data = json.load(f) 

308 else: 

309 with open(to_rel_file) as f: 

310 data = json.load(f) 

311 for tube_ref in data["monitoringTubeReferences"]: 

312 tube_ref["groundwaterMonitoringWell"] = data["gmwBroId"] 

313 if tube_number is not None: 

314 if tube_ref["tubeNumber"] != tube_number: 

315 continue 

316 ref_key = f"{kind}References" 

317 for ref in tube_ref[ref_key]: 

318 obsdata = _download_observations_for_bro_id( 

319 ref["broId"], 

320 meas_cl, 

321 as_csv, 

322 zipfile, 

323 to_path, 

324 _files, 

325 gld_kwargs, 

326 redownload=redownload, 

327 continue_on_error=continue_on_error, 

328 ) 

329 if as_csv: 

330 tube_ref["observation"] = obsdata 

331 for key in drop_references: 

332 if key in tube_ref: 

333 tube_ref.pop(key) 

334 else: 

335 logger.warning( 

336 "{} not defined for {}, filter {}".format( 

337 key, 

338 tube_ref["groundwaterMonitoringWell"], 

339 tube_ref["tubeNumber"], 

340 ) 

341 ) 

342 

343 tube_ref["broId"] = ref["broId"] 

344 tubes.append(tube_ref) 

345 else: 

346 tubes.append(obsdata.to_dict()) 

347 

348 if progress_callback is not None: 

349 progress_callback(igmw + 1, len(bro_ids)) 

350 if to_zip is not None: 

351 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path) 

352 return pd.DataFrame(tubes) 

353 

354 

355def _download_observations_for_bro_id( 

356 bro_id, 

357 meas_cl, 

358 as_csv, 

359 zipfile, 

360 to_path, 

361 _files, 

362 gld_kwargs, 

363 redownload=False, 

364 continue_on_error=False, 

365): 

366 if as_csv: 

367 fname = f"{bro_id}.csv" 

368 observatietype = None 

369 if "status" in gld_kwargs and gld_kwargs["status"] == "voorlopig": 

370 observatietype = "regulier_voorlopig" 

371 elif "status" in gld_kwargs and gld_kwargs["status"] == "volledigBeoordeeld": 

372 observatietype = "regulier_beoordeeld" 

373 elif "status" in gld_kwargs and gld_kwargs["status"] == "onbekend": 

374 observatietype = "onbekend" 

375 elif ( 

376 "observation_type" in gld_kwargs 

377 and gld_kwargs["observation_type"] == "controleMeting" 

378 ): 

379 observatietype = "controle" 

380 else: 

381 fname = f"{bro_id}.xml" 

382 to_file = util._get_to_file(fname, zipfile, to_path, _files) 

383 if zipfile is None and ( 

384 redownload or to_file is None or not os.path.isfile(to_file) 

385 ): # download the data 

386 if as_csv: 

387 try: 

388 data = gld.get_objects_as_csv( 

389 bro_id, 

390 observatietype=observatietype, 

391 to_file=to_file, 

392 **gld_kwargs, 

393 ) 

394 except Exception as e: 

395 if not continue_on_error: 

396 raise e 

397 logger.error( 

398 "Error processing %s csv for broid %s: %s", 

399 meas_cl.__name__, 

400 bro_id, 

401 e, 

402 ) 

403 else: 

404 try: 

405 data = meas_cl.from_bro_id(bro_id, to_file=to_file, **gld_kwargs) 

406 except Exception as e: 

407 if not continue_on_error: 

408 raise e 

409 logger.error( 

410 "Error processing %s xml for broid %s: %s", 

411 meas_cl.__name__, 

412 bro_id, 

413 e, 

414 ) 

415 else: 

416 # read the data from a file 

417 if as_csv: 

418 if zipfile is not None: 

419 to_file = zipfile.open(to_file) 

420 data = gld.read_gld_csv( 

421 to_file, 

422 bro_id, 

423 observatietype=observatietype, 

424 **gld_kwargs, 

425 ) 

426 else: 

427 data = meas_cl(to_file, zipfile=zipfile, **gld_kwargs) 

428 return data 

429 

430 

431def _get_gld_kwargs( 

432 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates 

433): 

434 gld_kwargs = {} 

435 if kind == "gld": 

436 if tmin is not None: 

437 gld_kwargs["tmin"] = tmin 

438 if tmax is not None: 

439 gld_kwargs["tmax"] = tmax 

440 if qualifier is not None: 

441 gld_kwargs["qualifier"] = qualifier 

442 if status is not None: 

443 gld_kwargs["status"] = status 

444 if observation_type is not None: 

445 gld_kwargs["observation_type"] = observation_type 

446 gld_kwargs["sort"] = sort 

447 gld_kwargs["drop_duplicates"] = drop_duplicates 

448 return gld_kwargs 

449 

450 

451def get_tube_observations( 

452 gwm_id, tube_number, kind="gld", sort=True, drop_duplicates=True, **kwargs 

453): 

454 """ 

455 Get the observations of a single groundwater monitoring tube. 

456 

457 Parameters 

458 ---------- 

459 gwm_id : str 

460 The bro_id of the groundwater monitoring well. 

461 tube_number : int 

462 The tube number. 

463 kind : str, optional 

464 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}. 

465 Defaults to 'gld' (groundwater level dossier). 

466 sort : bool, optional 

467 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True. 

468 drop_duplicates : bool, optional 

469 If True, drop duplicate observations based on their timestamp. Only used if 

470 `kind` is 'gld'. Defaults to True. 

471 **kwargs : dict 

472 Kwargs are passed onto get_observations. 

473 

474 Returns 

475 ------- 

476 pd.DataFrame 

477 A DataFrame containing the observations. 

478 

479 """ 

480 # sorting and dropping duplicates is done after combining the observations 

481 # to avoid doing this multiple times 

482 df = get_observations( 

483 gwm_id, 

484 tube_number=tube_number, 

485 kind=kind, 

486 sort=False, 

487 drop_duplicates=False, 

488 **kwargs, 

489 ) 

490 if df.empty: 

491 return _get_empty_observation_df(kind) 

492 else: 

493 data_column = _get_data_column(kind) 

494 return _combine_observations( 

495 df[data_column], 

496 kind=kind, 

497 bro_id=f"{gwm_id}_{tube_number}", 

498 sort=sort, 

499 drop_duplicates=drop_duplicates, 

500 ) 

501 

502 

503def get_tube_gdf(gmws, index=None): 

504 """ 

505 Create a GeoDataFrame of tube properties combined with well metadata. 

506 

507 This function processes a DataFrame of well properties, extracts the relevant 

508 tube information, and combines them into a GeoDataFrame. The resulting GeoDataFrame 

509 contains metadata for each monitoring well and its associated tubes, with optional 

510 spatial information (coordinates) and relevant physical properties. 

511 

512 Parameters 

513 ---------- 

514 gmws : list or dict of GroundwaterMonitoringWell, or pd.DataFrame Well and tube data 

515 in one of the following formats: a list of `GroundwaterMonitoringWell` objects, 

516 a dictionary of these objects, or a DataFrame with the bro-ids of the 

517 GroundwaterMonitoringWells as the index and the column monitoringTube containing 

518 tube properties. 

519 index : str or list of str, optional 

520 The column or columns to use for indexing the resulting GeoDataFrame. Defaults 

521 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided. 

522 

523 Returns 

524 ------- 

525 gdf : gpd.GeoDataFrame 

526 A GeoDataFrame containing the combined well and tube properties, with the 

527 specified index and optional geometry (spatial data) if 'x' and 'y' columns are 

528 present. 

529 

530 Notes 

531 ----- 

532 If 'x' and 'y' columns are present, the function creates a GeoDataFrame with point 

533 geometries based on these coordinates, assuming the EPSG:28992 (Dutch National 

534 Coordinate System) CRS. 

535 """ 

536 if isinstance(gmws, list): 

537 gmws = pd.DataFrame([x.to_dict() for x in gmws]) 

538 if "broId" in gmws.columns: 

539 gmws = gmws.set_index("broId") 

540 elif isinstance(gmws, dict): 

541 gmws = pd.DataFrame([gmws[x].to_dict() for x in gmws]) 

542 if "broId" in gmws.columns: 

543 gmws = gmws.set_index("broId") 

544 tubes = [] 

545 for bro_id in gmws.index: 

546 tube_df = gmws.loc[bro_id, "monitoringTube"] 

547 if not isinstance(tube_df, pd.DataFrame): 

548 continue 

549 for tube_number in tube_df.index: 

550 # combine properties of well and tube 

551 tube = pd.concat( 

552 ( 

553 gmws.loc[bro_id].drop("monitoringTube"), 

554 tube_df.loc[tube_number], 

555 ) 

556 ) 

557 tube["groundwaterMonitoringWell"] = bro_id 

558 tube["tubeNumber"] = tube_number 

559 

560 tubes.append(tube) 

561 

562 if index is None: 

563 index = ["groundwaterMonitoringWell", "tubeNumber"] 

564 gdf = bro.objects_to_gdf(tubes, index=index) 

565 

566 gdf = gdf.sort_index() 

567 return gdf 

568 

569 

570def get_data_in_extent( 

571 extent, 

572 kind="gld", 

573 tmin=None, 

574 tmax=None, 

575 combine=None, 

576 index=None, 

577 as_csv=False, 

578 qualifier=None, 

579 to_zip=None, 

580 to_path=None, 

581 redownload=False, 

582 silent=False, 

583 continue_on_error=False, 

584 sort=True, 

585 drop_duplicates=True, 

586 progress_callback=None, 

587): 

588 """ 

589 Retrieve metadata and observations within a specified spatial extent. 

590 

591 This function fetches monitoring well characteristics, groundwater observations, 

592 and tube properties within the given spatial extent. It can combine the data 

593 for specific observation types and return either individual dataframes or a 

594 combined dataframe. 

595 

596 Parameters 

597 ---------- 

598 extent : str or sequence 

599 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data. 

600 kind : str, optional 

601 The type of observations to retrieve. Valid values are {'gld', 'gar'} for 

602 groundwater level dossier or groundwater analysis report. When kind is None, no 

603 observations are downloaded. Defaults to 'gld'. 

604 tmin : str or datetime, optional 

605 The minimum time for filtering observations. Defaults to None. 

606 tmax : str or datetime, optional 

607 The maximum time for filtering observations. Defaults to None. 

608 combine : bool, optional 

609 If True, combines the metadata, tube properties, and observations into a single 

610 dataframe. Defaults to False, which will change to True in a future version. 

611 index : str, optional 

612 The column to use for indexing in the resulting dataframe. Defaults to None. 

613 as_csv : bool, optional 

614 If True, the measurement data is requested as CSV files instead of XML files 

615 (only supported for 'gld'). Defaults to False. 

616 qualifier : str or list of str, optional 

617 A string or list of strings used to filter the observations. Only valid if 

618 `kind` is 'gld'. Defaults to None. 

619 to_path : str, optional 

620 If not None, save the downloaded files in the directory named to_path. The 

621 default is None. 

622 to_zip : str, optional 

623 If not None, save the downloaded files in a zip-file named to_zip. The default 

624 is None. 

625 redownload : bool, optional 

626 When downloaded files exist in to_path or to_zip, read from these files when 

627 redownload is False. If redownload is True, download the data again from the 

628 BRO-server. The default is False. 

629 silent : bool, optional 

630 If True, suppresses progress logging. Defaults to False. 

631 continue_on_error : bool, optional 

632 If True, continue after an error occurs during downloading or processing of 

633 individual observation data. Defaults to False. 

634 sort : bool, optional 

635 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True. 

636 drop_duplicates : bool, optional 

637 If True, drop duplicate observations based on their timestamp. Only used if 

638 `kind` is 'gld'. Defaults to True. 

639 progress_callback : function, optional 

640 A callback function that takes two arguments (current, total) to report 

641 progress. If None, no progress reporting is done. Defaults to None. 

642 

643 Returns 

644 ------- 

645 gdf : pd.DataFrame 

646 A dataframe containing tube properties and metadata within the specified extent. 

647 

648 obs_df : pd.DataFrame, optional 

649 A dataframe containing the observations for the specified wells. Returned only if 

650 `combine` is False. 

651 

652 Raises 

653 ------ 

654 Exception 

655 If `as_csv=True` and `kind` is not 'gld', or if other parameters are invalid. 

656 """ 

657 if combine is None: 

658 logger.warning( 

659 "The default of `combine=False` will change to True in a future version of " 

660 "brodata. Pass combine=False to retain current behavior or combine=True to " 

661 "adopt the future default and silence this warning." 

662 ) 

663 combine = False 

664 if isinstance(extent, str): 

665 if to_zip is not None: 

666 raise (Exception("When extent is a string, do not supply to_zip")) 

667 to_zip = extent 

668 extent = None 

669 redownload = False 

670 

671 zipfile = None 

672 _files = None 

673 if to_zip is not None: 

674 if not redownload and os.path.isfile(to_zip): 

675 logger.info(f"Reading data from {to_zip}") 

676 zipfile = ZipFile(to_zip) 

677 else: 

678 if to_path is None: 

679 to_path = os.path.splitext(to_zip)[0] 

680 remove_path_again = not os.path.isdir(to_path) 

681 _files = [] 

682 

683 if to_path is not None and not os.path.isdir(to_path): 

684 os.makedirs(to_path) 

685 

686 # get gwm characteristics 

687 logger.info(f"Getting gmw-characteristics in extent: {extent}") 

688 

689 to_file = util._get_to_file("gmw_characteristics.xml", zipfile, to_path, _files) 

690 gmw = get_characteristics( 

691 extent=extent, to_file=to_file, redownload=redownload, zipfile=zipfile 

692 ) 

693 

694 if kind is None: 

695 obs_df = pd.DataFrame() 

696 combine = False 

697 else: 

698 # get observations 

699 logger.info(f"Downloading {kind}-observations") 

700 obs_df = get_observations( 

701 gmw, 

702 kind=kind, 

703 tmin=tmin, 

704 tmax=tmax, 

705 as_csv=as_csv, 

706 qualifier=qualifier, 

707 to_path=to_path, 

708 redownload=redownload, 

709 zipfile=zipfile, 

710 _files=_files, 

711 silent=silent, 

712 continue_on_error=continue_on_error, 

713 sort=sort, 

714 drop_duplicates=drop_duplicates, 

715 progress_callback=progress_callback, 

716 ) 

717 

718 # only keep wells with observations 

719 if "groundwaterMonitoringWell" in obs_df.columns: 

720 gmw = gmw[gmw.index.isin(obs_df["groundwaterMonitoringWell"])] 

721 

722 logger.info("Downloading tube-properties") 

723 

724 # get the properties of the monitoringTubes 

725 gdf = get_tube_gdf_from_characteristics( 

726 gmw, 

727 index=index, 

728 to_path=to_path, 

729 redownload=redownload, 

730 zipfile=zipfile, 

731 _files=_files, 

732 silent=silent, 

733 ) 

734 

735 if zipfile is not None: 

736 zipfile.close() 

737 if zipfile is None and to_zip is not None: 

738 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path) 

739 

740 if not obs_df.empty: 

741 obs_df = obs_df.set_index( 

742 ["groundwaterMonitoringWell", "tubeNumber"] 

743 ).sort_index() 

744 

745 if combine and kind in ["gld", "gar"]: 

746 if kind == "gld": 

747 idcol = "groundwaterLevelDossier" 

748 elif kind == "gar": 

749 idcol = "groundwaterAnalysisReport" 

750 datcol = _get_data_column(kind) 

751 

752 logger.info("Combining well-properties, tube-properties and observations") 

753 

754 data = {} 

755 ids = {} 

756 for index in gdf.index: 

757 if index not in obs_df.index: 

758 continue 

759 

760 data[index] = _combine_observations( 

761 obs_df.loc[[index], datcol], kind=kind, bro_id=f"{index[0]}_{index[1]}" 

762 ) 

763 ids[index] = list(obs_df.loc[[index], "broId"]) 

764 gdf[datcol] = data 

765 gdf[idcol] = ids 

766 return gdf 

767 else: 

768 if kind is None: 

769 return gdf 

770 else: 

771 return gdf, obs_df 

772 

773 

774def _get_data_column(kind): 

775 if kind == "gld": 

776 return "observation" 

777 elif kind == "gar": 

778 return "laboratoryAnalysis" 

779 else: 

780 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet")) 

781 

782 

783def _get_empty_observation_df(kind): 

784 if kind == "gld": 

785 return gld._get_empty_observation_df() 

786 elif kind == "gar": 

787 return gar._get_empty_observation_df() 

788 else: 

789 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet")) 

790 

791 

792def _combine_observations( 

793 observations, kind, bro_id=None, sort=True, drop_duplicates=True 

794): 

795 obslist = [] 

796 for observation in observations: 

797 if not isinstance(observation, pd.DataFrame) or observation.empty: 

798 continue 

799 obslist.append(observation) 

800 if len(obslist) == 0: 

801 return _get_empty_observation_df(kind) 

802 else: 

803 df = pd.concat(obslist).sort_index() 

804 if kind == "gld": 

805 if sort: 

806 df = gld.sort_observations(df) 

807 if drop_duplicates: 

808 df = gld.drop_duplicate_observations(df, bro_id=bro_id) 

809 return df 

810 

811 

812def get_tube_gdf_from_characteristics(characteristics_gdf, **kwargs): 

813 """ 

814 Generate a GeoDataFrame of tube properties based on well characteristics. 

815 

816 This function downloads the GroundwaterMonitoringWell-objects to retreive data about 

817 the groundwater monitoring tubes, and combined this information in a new 

818 GeoDataFrame. 

819 

820 Parameters 

821 ---------- 

822 characteristics_gdf : gpd.GeoDataFrame 

823 GeoDataFrame of well characteristics with bro-ids of the 

824 GroundwaterMonitoringWells as the index, retreived with 

825 `brodata.gmw.get_characteristics`. 

826 index : str or list of str, optional 

827 Column(s) to use as the index for the resulting GeoDataFrame. Defaults 

828 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided. 

829 

830 Returns 

831 ------- 

832 gpd.GeoDataFrame 

833 GeoDataFrame of combined well and tube properties 

834 """ 

835 bro_ids = characteristics_gdf.index.unique() 

836 return get_tube_gdf_from_bro_ids(bro_ids, **kwargs) 

837 

838 

839def get_tube_gdf_from_bro_ids( 

840 bro_ids, 

841 index=None, 

842 **kwargs, 

843): 

844 """ 

845 Generate a GeoDataFrame of tube properties based on an iterable of gmw bro-ids. 

846 

847 This function downloads the GroundwaterMonitoringWell-objects to retreive data about 

848 the groundwater monitoring tubes, and combined this information in a new 

849 GeoDataFrame. 

850 

851 Parameters 

852 ---------- 

853 bro_ids : gpd.GeoDataFrame 

854 GeoDataFrame of well characteristics with bro-ids of the 

855 GroundwaterMonitoringWells as the index, retreived with 

856 `brodata.gmw.get_characteristics`. 

857 index : str or list of str, optional 

858 Column(s) to use as the index for the resulting GeoDataFrame. Defaults 

859 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided. 

860 

861 Returns 

862 ------- 

863 gpd.GeoDataFrame 

864 GeoDataFrame of combined well and tube properties 

865 """ 

866 desc = "Downloading Groundwater Monitoring Wells" 

867 gmws = bro._get_data_for_bro_ids( 

868 GroundwaterMonitoringWell, bro_ids, desc=desc, **kwargs 

869 ) 

870 gdf = get_tube_gdf(gmws, index=index) 

871 return gdf 

872 

873 

874cl = GroundwaterMonitoringWell 

875 

876get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl) 

877get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__ 

878 

879get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl) 

880get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__ 

881 

882get_characteristics = partial(bro._get_characteristics, cl) 

883get_characteristics.__doc__ = bro._get_characteristics.__doc__