Coverage for brodata/gmw.py: 74%

1import json

2import logging

3import os

4from functools import partial

5from zipfile import ZipFile

7import numpy as np

8import pandas as pd

9import requests

11from . import bro, gld, gar, frd, gmn, util

13logger = logging.getLogger(__name__)

16def get_well_code(bro_id):

17 """

18 Retrieve the well code based on a given BRO-ID and return it as plain text.

20 This function sends a GET request to fetch the well code associated with the

21 specified BRO-ID. If the request fails, it logs an error message and returns `None`.

23 Parameters

24 ----------

25 bro_id : str

26 The BRO-ID for which to retrieve the associated well code.

28 Returns

29 -------

30 well_code : str or None

31 The well code as plain text if the request is successful. Returns `None` if

32 the request fails.

33 """

35 url = f"{GroundwaterMonitoringWell._rest_url}/well-code/{bro_id}"

36 req = requests.get(url)

37 if req.status_code > 200:

38 logger.error(req.reason)

39 return

40 well_code = req.text

41 return well_code

44class GroundwaterMonitoringWell(bro.FileOrUrl):

45 """

46 Class to represent a Groundwater Monitoring Well (GMW) from the BRO.

48 This class parses XML data related to a groundwater monitoring well (GMW).

49 It extracts details such as location, monitoring tube data, and well history

50 and stores these in attributes.

52 Notes

53 -----

54 This class extends `bro.XmlFileOrUrl` and is designed to work with GMW XML data,

55 either from a file or URL.

56 """

58 _rest_url = "https://publiek.broservices.nl/gm/gmw/v1"

59 _xmlns = "http://www.broservices.nl/xsd/dsgmw/1.1"

60 _char = "GMW_C"

62 def _read_contents(self, tree):

63 ns = {

64 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",

65 "xmlns": self._xmlns,

66 }

68 object_names = ["GMW_PO", "GMW_PPO", "BRO_DO"]

69 gmw = self._get_main_object(tree, object_names, ns)

71 for key in gmw.attrib:

72 setattr(self, key.split("}", 1)[1], gmw.attrib[key])

73 for child in gmw:

74 key = self._get_tag(child)

75 if len(child) == 0:

76 setattr(self, key, child.text)

77 elif key == "standardizedLocation":

78 self._read_standardized_location(child)

79 elif key == "deliveredLocation":

80 self._read_delivered_location(child)

81 elif key == "wellHistory":

82 for grandchild in child:

83 key = self._get_tag(grandchild)

84 if key in ["wellConstructionDate", "wellRemovalDate"]:

85 setattr(self, key, self._read_date(grandchild))

86 elif key == "intermediateEvent":

87 if not hasattr(self, key):

88 self.intermediateEvent = []

89 event = self._read_intermediate_event(grandchild)

90 self.intermediateEvent.append(event)

91 else:

92 self._warn_unknown_tag(key)

94 elif key in ["deliveredVerticalPosition", "registrationHistory"]:

95 to_float = ["offset", "groundLevelPosition"]

96 self._read_children_of_children(child, to_float=to_float)

97 elif key in ["monitoringTube"]:

98 if not hasattr(self, key):

99 self.monitoringTube = []

100 tube = {}

101 to_float = [

102 "tubeTopDiameter",

103 "tubeTopPosition",

104 "screenLength",

105 "screenTopPosition",

106 "screenBottomPosition",

107 "plainTubePartLength",

108 ]

109 self._read_children_of_children(child, tube, to_float=to_float)

110 self.monitoringTube.append(tube)

111 else:

112 self._warn_unknown_tag(key)

113 if hasattr(self, "monitoringTube"):

114 self.monitoringTube = pd.DataFrame(self.monitoringTube)

115 tubeNumber = self.monitoringTube["tubeNumber"].astype(int)

116 self.monitoringTube["tubeNumber"] = tubeNumber

117 self.monitoringTube = self.monitoringTube.set_index("tubeNumber")

118 if hasattr(self, "intermediateEvent"):

119 self.intermediateEvent = pd.DataFrame(self.intermediateEvent)

120

121 def _read_intermediate_event(self, node):

122 d = {}

123 for child in node:

124 key = self._get_tag(child)

125 if key == "eventName":

126 d[key] = child.text

127 elif key == "eventDate":

128 d[key] = self._read_date(child)

129 else:

130 self._warn_unknown_tag(key)

131 return d

132

133

134def get_observations(

135 bro_ids,

136 kind="gld",

137 drop_references=True,

138 silent=False,

139 tmin=None,

140 tmax=None,

141 as_csv=False,

142 tube_number=None,

143 status=None,

144 observation_type=None,

145 qualifier=None,

146 to_path=None,

147 to_zip=None,

148 redownload=False,

149 zipfile=None,

150 continue_on_error=False,

151 sort=True,

152 drop_duplicates=True,

153 progress_callback=None,

154 _files=None,

155):

156 """

157 Retrieve groundwater observations for the specified monitoring wells (bro_ids).

158

159 This function fetches groundwater data for monitoring wells based on the provided

160 parameters. It supports different types of observations, allows filtering by tube

161 number, and can request the data in CSV format for groundwater level observations.

162

163 Parameters

164 ----------

165 bro_ids : str or list or pd.DataFrame

166 The BRO IDs of the monitoring wells for which to retrieve the data. If a

167 DataFrame is provided, its index is used as the list of BRO IDs.

168 kind : str, optional

169 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.

170 Defaults to 'gld' (groundwater level dossier).

171 drop_references : bool or list of str, optional

172 Specifies whether to drop reference fields in the returned data. Defaults to True,

173 in which case 'gmnReferences', 'gldReferences', and 'garReferences' are removed.

174 silent : bool, optional

175 If True, suppresses progress logging. Defaults to False.

176 tmin : str or datetime, optional

177 The minimum time filter for the observations. Defaults to None.

178 tmax : str or datetime, optional

179 The maximum time filter for the observations. Defaults to None.

180 as_csv : bool, optional

181 If True, requests the observations as CSV files instead of XML-files. Only valid

182 if `kind` is 'gld'. Defaults to False.

183 tube_number : int, optional

184 Filters observations to a specific tube number. Defaults to None.

185 status : str, optional

186 A status string for additional filtering. Possible values are

187 "volledigBeoordeeld", "voorlopig" and "onbekend" Only valid if `kind` is 'gld'.

188 Defaults to None.

189 observation_type : str, optional

190 An observation type string for additional filtering. Possible values are

191 "reguliereMeting" and "controleMeting". Only valid if `kind` is 'gld'. Defaults

192 to None.

193 qualifier : str or list of str, optional

194 A qualifier string for additional filtering. Only valid if `kind` is 'gld'.

195 Defaults to None.

196 to_path : str, optional

197 If not None, save the downloaded files in the directory named to_path. The

198 default is None.

199 to_zip : str, optional

200 If not None, save the downloaded files in a zip-file named to_zip. The default

201 is None.

202 redownload : bool, optional

203 When downloaded files exist in to_path or to_zip, read from these files when

204 redownload is False. If redownload is True, download the data again from the

205 BRO-servers. The default is False.

206 zipfile : zipfile.ZipFile, optional

207 A zipfile-object. When not None, zipfile is used to read previously downloaded

208 data from. The default is None.

209 continue_on_error : bool, optional

210 If True, continue after an error occurs during downloading or processing of

211 individual observation data. Defaults to False.

212 sort : bool, optional

213 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.

214 drop_duplicates : bool, optional

215 If True, drop duplicate observations based on their timestamp. Only used if

216 `kind` is 'gld'. Defaults to True.

217 progress_callback : function, optional

218 A callback function that takes two arguments (current, total) to report

219 progress. If None, no progress reporting is done. Defaults to None.

220

221

222 Returns

223 -------

224 pd.DataFrame

225 A DataFrame containing the observations for the specified monitoring wells,

226 where each row corresponds to an individual observation.

227

228 Raises

229 ------

230 Exception

231 If `as_csv=True` and `kind` is not 'gld', or if `qualifier` is provided for

232 a kind other than 'gld'.

233 """

234 tubes = []

235

236 if isinstance(bro_ids, str):

237 bro_ids = [bro_ids]

238 silent = True

239

240 if isinstance(bro_ids, pd.DataFrame):

241 bro_ids = bro_ids.index

242

243 if isinstance(drop_references, bool):

244 if drop_references:

245 drop_references = [

246 "gmnReferences",

247 "gldReferences",

248 "garReferences",

249 # "frdReferences",

250 ]

251 else:

252 drop_references = []

253

254 if to_zip is not None:

255 if not redownload and os.path.isfile(to_zip):

256 raise (NotImplementedError("Redownload=False is not suppported yet"))

257 if to_path is None:

258 to_path = os.path.splitext(to_zip)[0]

259 remove_path_again = not os.path.isdir(to_path)

260 if _files is None:

261 _files = []

262

263 desc = f"Downloading {kind}-observations"

264 if as_csv and kind != "gld":

265 raise (Exception("as_csv=True is only supported for kind=='gld'"))

266 if qualifier is not None and kind != "gld":

267 raise (Exception("A qualifier is only supported for kind=='gld'"))

268 if to_path is not None and not os.path.isdir(to_path):

269 os.makedirs(to_path)

270

271 if kind == "gld":

272 meas_cl = gld.GroundwaterLevelDossier

273 elif kind == "gar":

274 meas_cl = gar.GroundwaterAnalysisReport

275 elif kind == "frd":

276 meas_cl = frd.FormationResistanceDossier

277 elif kind == "gmn":

278 meas_cl = gmn.GroundwaterMonitoringNetwork

279 else:

280 raise (ValueError(f"kind='{kind}' not supported"))

281

282 gld_kwargs = _get_gld_kwargs(

283 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates

284 )

285

286 for igmw, bro_id in enumerate(

287 util.tqdm(np.unique(bro_ids), disable=silent, desc=desc)

288 ):

289 to_rel_file = util._get_to_file(

290 f"gmw_relations_{bro_id}.json", zipfile, to_path, _files

291 )

292 if zipfile is None and (

293 redownload or to_rel_file is None or not os.path.isfile(to_rel_file)

294 ):

295 url = f"https://publiek.broservices.nl/gm/v1/gmw-relations/{bro_id}"

296 req = requests.get(url)

297 if req.status_code > 200:

298 logger.error(req.json()["errors"][0]["message"])

299 return

300 if to_rel_file is not None:

301 with open(to_rel_file, "w") as f:

302 f.write(req.text)

303 data = req.json()

304 else:

305 if zipfile is not None:

306 with zipfile.open(to_rel_file) as f:

307 data = json.load(f)

308 else:

309 with open(to_rel_file) as f:

310 data = json.load(f)

311 for tube_ref in data["monitoringTubeReferences"]:

312 tube_ref["groundwaterMonitoringWell"] = data["gmwBroId"]

313 if tube_number is not None:

314 if tube_ref["tubeNumber"] != tube_number:

315 continue

316 ref_key = f"{kind}References"

317 for ref in tube_ref[ref_key]:

318 obsdata = _download_observations_for_bro_id(

319 ref["broId"],

320 meas_cl,

321 as_csv,

322 zipfile,

323 to_path,

324 _files,

325 gld_kwargs,

326 redownload=redownload,

327 continue_on_error=continue_on_error,

328 )

329 if as_csv:

330 tube_ref["observation"] = obsdata

331 for key in drop_references:

332 if key in tube_ref:

333 tube_ref.pop(key)

334 else:

335 logger.warning(

336 "{} not defined for {}, filter {}".format(

337 key,

338 tube_ref["groundwaterMonitoringWell"],

339 tube_ref["tubeNumber"],

340 )

341 )

342

343 tube_ref["broId"] = ref["broId"]

344 tubes.append(tube_ref)

345 else:

346 tubes.append(obsdata.to_dict())

347

348 if progress_callback is not None:

349 progress_callback(igmw + 1, len(bro_ids))

350 if to_zip is not None:

351 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)

352 return pd.DataFrame(tubes)

353

354

355def _download_observations_for_bro_id(

356 bro_id,

357 meas_cl,

358 as_csv,

359 zipfile,

360 to_path,

361 _files,

362 gld_kwargs,

363 redownload=False,

364 continue_on_error=False,

365):

366 if as_csv:

367 fname = f"{bro_id}.csv"

368 observatietype = None

369 if "status" in gld_kwargs and gld_kwargs["status"] == "voorlopig":

370 observatietype = "regulier_voorlopig"

371 elif "status" in gld_kwargs and gld_kwargs["status"] == "volledigBeoordeeld":

372 observatietype = "regulier_beoordeeld"

373 elif "status" in gld_kwargs and gld_kwargs["status"] == "onbekend":

374 observatietype = "onbekend"

375 elif (

376 "observation_type" in gld_kwargs

377 and gld_kwargs["observation_type"] == "controleMeting"

378 ):

379 observatietype = "controle"

380 else:

381 fname = f"{bro_id}.xml"

382 to_file = util._get_to_file(fname, zipfile, to_path, _files)

383 if zipfile is None and (

384 redownload or to_file is None or not os.path.isfile(to_file)

385 ): # download the data

386 if as_csv:

387 try:

388 data = gld.get_objects_as_csv(

389 bro_id,

390 observatietype=observatietype,

391 to_file=to_file,

392 **gld_kwargs,

393 )

394 except Exception as e:

395 if not continue_on_error:

396 raise e

397 logger.error(

398 "Error processing %s csv for broid %s: %s",

399 meas_cl.__name__,

400 bro_id,

401 e,

402 )

403 else:

404 try:

405 data = meas_cl.from_bro_id(bro_id, to_file=to_file, **gld_kwargs)

406 except Exception as e:

407 if not continue_on_error:

408 raise e

409 logger.error(

410 "Error processing %s xml for broid %s: %s",

411 meas_cl.__name__,

412 bro_id,

413 e,

414 )

415 else:

416 # read the data from a file

417 if as_csv:

418 if zipfile is not None:

419 to_file = zipfile.open(to_file)

420 data = gld.read_gld_csv(

421 to_file,

422 bro_id,

423 observatietype=observatietype,

424 **gld_kwargs,

425 )

426 else:

427 data = meas_cl(to_file, zipfile=zipfile, **gld_kwargs)

428 return data

429

430

431def _get_gld_kwargs(

432 kind, tmin, tmax, qualifier, status, observation_type, sort, drop_duplicates

433):

434 gld_kwargs = {}

435 if kind == "gld":

436 if tmin is not None:

437 gld_kwargs["tmin"] = tmin

438 if tmax is not None:

439 gld_kwargs["tmax"] = tmax

440 if qualifier is not None:

441 gld_kwargs["qualifier"] = qualifier

442 if status is not None:

443 gld_kwargs["status"] = status

444 if observation_type is not None:

445 gld_kwargs["observation_type"] = observation_type

446 gld_kwargs["sort"] = sort

447 gld_kwargs["drop_duplicates"] = drop_duplicates

448 return gld_kwargs

449

450

451def get_tube_observations(

452 gwm_id, tube_number, kind="gld", sort=True, drop_duplicates=True, **kwargs

453):

454 """

455 Get the observations of a single groundwater monitoring tube.

456

457 Parameters

458 ----------

459 gwm_id : str

460 The bro_id of the groundwater monitoring well.

461 tube_number : int

462 The tube number.

463 kind : str, optional

464 The type of observations to retrieve. Can be one of {'gmn', 'gld', 'gar', 'frd'}.

465 Defaults to 'gld' (groundwater level dossier).

466 sort : bool, optional

467 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.

468 drop_duplicates : bool, optional

469 If True, drop duplicate observations based on their timestamp. Only used if

470 `kind` is 'gld'. Defaults to True.

471 **kwargs : dict

472 Kwargs are passed onto get_observations.

473

474 Returns

475 -------

476 pd.DataFrame

477 A DataFrame containing the observations.

478

479 """

480 # sorting and dropping duplicates is done after combining the observations

481 # to avoid doing this multiple times

482 df = get_observations(

483 gwm_id,

484 tube_number=tube_number,

485 kind=kind,

486 sort=False,

487 drop_duplicates=False,

488 **kwargs,

489 )

490 if df.empty:

491 return _get_empty_observation_df(kind)

492 else:

493 data_column = _get_data_column(kind)

494 return _combine_observations(

495 df[data_column],

496 kind=kind,

497 bro_id=f"{gwm_id}_{tube_number}",

498 sort=sort,

499 drop_duplicates=drop_duplicates,

500 )

501

502

503def get_tube_gdf(gmws, index=None):

504 """

505 Create a GeoDataFrame of tube properties combined with well metadata.

506

507 This function processes a DataFrame of well properties, extracts the relevant

508 tube information, and combines them into a GeoDataFrame. The resulting GeoDataFrame

509 contains metadata for each monitoring well and its associated tubes, with optional

510 spatial information (coordinates) and relevant physical properties.

511

512 Parameters

513 ----------

514 gmws : list or dict of GroundwaterMonitoringWell, or pd.DataFrame Well and tube data

515 in one of the following formats: a list of `GroundwaterMonitoringWell` objects,

516 a dictionary of these objects, or a DataFrame with the bro-ids of the

517 GroundwaterMonitoringWells as the index and the column monitoringTube containing

518 tube properties.

519 index : str or list of str, optional

520 The column or columns to use for indexing the resulting GeoDataFrame. Defaults

521 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.

522

523 Returns

524 -------

525 gdf : gpd.GeoDataFrame

526 A GeoDataFrame containing the combined well and tube properties, with the

527 specified index and optional geometry (spatial data) if 'x' and 'y' columns are

528 present.

529

530 Notes

531 -----

532 If 'x' and 'y' columns are present, the function creates a GeoDataFrame with point

533 geometries based on these coordinates, assuming the EPSG:28992 (Dutch National

534 Coordinate System) CRS.

535 """

536 if isinstance(gmws, list):

537 gmws = pd.DataFrame([x.to_dict() for x in gmws])

538 if "broId" in gmws.columns:

539 gmws = gmws.set_index("broId")

540 elif isinstance(gmws, dict):

541 gmws = pd.DataFrame([gmws[x].to_dict() for x in gmws])

542 if "broId" in gmws.columns:

543 gmws = gmws.set_index("broId")

544 tubes = []

545 for bro_id in gmws.index:

546 tube_df = gmws.loc[bro_id, "monitoringTube"]

547 if not isinstance(tube_df, pd.DataFrame):

548 continue

549 for tube_number in tube_df.index:

550 # combine properties of well and tube

551 tube = pd.concat(

552 (

553 gmws.loc[bro_id].drop("monitoringTube"),

554 tube_df.loc[tube_number],

555 )

556 )

557 tube["groundwaterMonitoringWell"] = bro_id

558 tube["tubeNumber"] = tube_number

559

560 tubes.append(tube)

561

562 if index is None:

563 index = ["groundwaterMonitoringWell", "tubeNumber"]

564 gdf = bro.objects_to_gdf(tubes, index=index)

565

566 gdf = gdf.sort_index()

567 return gdf

568

569

570def get_data_in_extent(

571 extent,

572 kind="gld",

573 tmin=None,

574 tmax=None,

575 combine=None,

576 index=None,

577 as_csv=False,

578 qualifier=None,

579 to_zip=None,

580 to_path=None,

581 redownload=False,

582 silent=False,

583 continue_on_error=False,

584 sort=True,

585 drop_duplicates=True,

586 progress_callback=None,

587):

588 """

589 Retrieve metadata and observations within a specified spatial extent.

590

591 This function fetches monitoring well characteristics, groundwater observations,

592 and tube properties within the given spatial extent. It can combine the data

593 for specific observation types and return either individual dataframes or a

594 combined dataframe.

595

596 Parameters

597 ----------

598 extent : str or sequence

599 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data.

600 kind : str, optional

601 The type of observations to retrieve. Valid values are {'gld', 'gar'} for

602 groundwater level dossier or groundwater analysis report. When kind is None, no

603 observations are downloaded. Defaults to 'gld'.

604 tmin : str or datetime, optional

605 The minimum time for filtering observations. Defaults to None.

606 tmax : str or datetime, optional

607 The maximum time for filtering observations. Defaults to None.

608 combine : bool, optional

609 If True, combines the metadata, tube properties, and observations into a single

610 dataframe. Defaults to False, which will change to True in a future version.

611 index : str, optional

612 The column to use for indexing in the resulting dataframe. Defaults to None.

613 as_csv : bool, optional

614 If True, the measurement data is requested as CSV files instead of XML files

615 (only supported for 'gld'). Defaults to False.

616 qualifier : str or list of str, optional

617 A string or list of strings used to filter the observations. Only valid if

618 `kind` is 'gld'. Defaults to None.

619 to_path : str, optional

620 If not None, save the downloaded files in the directory named to_path. The

621 default is None.

622 to_zip : str, optional

623 If not None, save the downloaded files in a zip-file named to_zip. The default

624 is None.

625 redownload : bool, optional

626 When downloaded files exist in to_path or to_zip, read from these files when

627 redownload is False. If redownload is True, download the data again from the

628 BRO-server. The default is False.

629 silent : bool, optional

630 If True, suppresses progress logging. Defaults to False.

631 continue_on_error : bool, optional

632 If True, continue after an error occurs during downloading or processing of

633 individual observation data. Defaults to False.

634 sort : bool, optional

635 If True, sort the observations. Only used if `kind` is 'gld'. Defaults to True.

636 drop_duplicates : bool, optional

637 If True, drop duplicate observations based on their timestamp. Only used if

638 `kind` is 'gld'. Defaults to True.

639 progress_callback : function, optional

640 A callback function that takes two arguments (current, total) to report

641 progress. If None, no progress reporting is done. Defaults to None.

642

643 Returns

644 -------

645 gdf : pd.DataFrame

646 A dataframe containing tube properties and metadata within the specified extent.

647

648 obs_df : pd.DataFrame, optional

649 A dataframe containing the observations for the specified wells. Returned only if

650 `combine` is False.

651

652 Raises

653 ------

654 Exception

655 If `as_csv=True` and `kind` is not 'gld', or if other parameters are invalid.

656 """

657 if combine is None:

658 logger.warning(

659 "The default of `combine=False` will change to True in a future version of "

660 "brodata. Pass combine=False to retain current behavior or combine=True to "

661 "adopt the future default and silence this warning."

662 )

663 combine = False

664 if isinstance(extent, str):

665 if to_zip is not None:

666 raise (Exception("When extent is a string, do not supply to_zip"))

667 to_zip = extent

668 extent = None

669 redownload = False

670

671 zipfile = None

672 _files = None

673 if to_zip is not None:

674 if not redownload and os.path.isfile(to_zip):

675 logger.info(f"Reading data from {to_zip}")

676 zipfile = ZipFile(to_zip)

677 else:

678 if to_path is None:

679 to_path = os.path.splitext(to_zip)[0]

680 remove_path_again = not os.path.isdir(to_path)

681 _files = []

682

683 if to_path is not None and not os.path.isdir(to_path):

684 os.makedirs(to_path)

685

686 # get gwm characteristics

687 logger.info(f"Getting gmw-characteristics in extent: {extent}")

688

689 to_file = util._get_to_file("gmw_characteristics.xml", zipfile, to_path, _files)

690 gmw = get_characteristics(

691 extent=extent, to_file=to_file, redownload=redownload, zipfile=zipfile

692 )

693

694 if kind is None:

695 obs_df = pd.DataFrame()

696 combine = False

697 else:

698 # get observations

699 logger.info(f"Downloading {kind}-observations")

700 obs_df = get_observations(

701 gmw,

702 kind=kind,

703 tmin=tmin,

704 tmax=tmax,

705 as_csv=as_csv,

706 qualifier=qualifier,

707 to_path=to_path,

708 redownload=redownload,

709 zipfile=zipfile,

710 _files=_files,

711 silent=silent,

712 continue_on_error=continue_on_error,

713 sort=sort,

714 drop_duplicates=drop_duplicates,

715 progress_callback=progress_callback,

716 )

717

718 # only keep wells with observations

719 if "groundwaterMonitoringWell" in obs_df.columns:

720 gmw = gmw[gmw.index.isin(obs_df["groundwaterMonitoringWell"])]

721

722 logger.info("Downloading tube-properties")

723

724 # get the properties of the monitoringTubes

725 gdf = get_tube_gdf_from_characteristics(

726 gmw,

727 index=index,

728 to_path=to_path,

729 redownload=redownload,

730 zipfile=zipfile,

731 _files=_files,

732 silent=silent,

733 )

734

735 if zipfile is not None:

736 zipfile.close()

737 if zipfile is None and to_zip is not None:

738 util._save_data_to_zip(to_zip, _files, remove_path_again, to_path)

739

740 if not obs_df.empty:

741 obs_df = obs_df.set_index(

742 ["groundwaterMonitoringWell", "tubeNumber"]

743 ).sort_index()

744

745 if combine and kind in ["gld", "gar"]:

746 if kind == "gld":

747 idcol = "groundwaterLevelDossier"

748 elif kind == "gar":

749 idcol = "groundwaterAnalysisReport"

750 datcol = _get_data_column(kind)

751

752 logger.info("Combining well-properties, tube-properties and observations")

753

754 data = {}

755 ids = {}

756 for index in gdf.index:

757 if index not in obs_df.index:

758 continue

759

760 data[index] = _combine_observations(

761 obs_df.loc[[index], datcol], kind=kind, bro_id=f"{index[0]}_{index[1]}"

762 )

763 ids[index] = list(obs_df.loc[[index], "broId"])

764 gdf[datcol] = data

765 gdf[idcol] = ids

766 return gdf

767 else:

768 if kind is None:

769 return gdf

770 else:

771 return gdf, obs_df

772

773

774def _get_data_column(kind):

775 if kind == "gld":

776 return "observation"

777 elif kind == "gar":

778 return "laboratoryAnalysis"

779 else:

780 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))

781

782

783def _get_empty_observation_df(kind):

784 if kind == "gld":

785 return gld._get_empty_observation_df()

786 elif kind == "gar":

787 return gar._get_empty_observation_df()

788 else:

789 raise (NotImplementedError(f"Measurement-kind {kind} not supported yet"))

790

791

792def _combine_observations(

793 observations, kind, bro_id=None, sort=True, drop_duplicates=True

794):

795 obslist = []

796 for observation in observations:

797 if not isinstance(observation, pd.DataFrame) or observation.empty:

798 continue

799 obslist.append(observation)

800 if len(obslist) == 0:

801 return _get_empty_observation_df(kind)

802 else:

803 df = pd.concat(obslist).sort_index()

804 if kind == "gld":

805 if sort:

806 df = gld.sort_observations(df)

807 if drop_duplicates:

808 df = gld.drop_duplicate_observations(df, bro_id=bro_id)

809 return df

810

811

812def get_tube_gdf_from_characteristics(characteristics_gdf, **kwargs):

813 """

814 Generate a GeoDataFrame of tube properties based on well characteristics.

815

816 This function downloads the GroundwaterMonitoringWell-objects to retreive data about

817 the groundwater monitoring tubes, and combined this information in a new

818 GeoDataFrame.

819

820 Parameters

821 ----------

822 characteristics_gdf : gpd.GeoDataFrame

823 GeoDataFrame of well characteristics with bro-ids of the

824 GroundwaterMonitoringWells as the index, retreived with

825 `brodata.gmw.get_characteristics`.

826 index : str or list of str, optional

827 Column(s) to use as the index for the resulting GeoDataFrame. Defaults

828 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.

829

830 Returns

831 -------

832 gpd.GeoDataFrame

833 GeoDataFrame of combined well and tube properties

834 """

835 bro_ids = characteristics_gdf.index.unique()

836 return get_tube_gdf_from_bro_ids(bro_ids, **kwargs)

837

838

839def get_tube_gdf_from_bro_ids(

840 bro_ids,

841 index=None,

842 **kwargs,

843):

844 """

845 Generate a GeoDataFrame of tube properties based on an iterable of gmw bro-ids.

846

847 This function downloads the GroundwaterMonitoringWell-objects to retreive data about

848 the groundwater monitoring tubes, and combined this information in a new

849 GeoDataFrame.

850

851 Parameters

852 ----------

853 bro_ids : gpd.GeoDataFrame

854 GeoDataFrame of well characteristics with bro-ids of the

855 GroundwaterMonitoringWells as the index, retreived with

856 `brodata.gmw.get_characteristics`.

857 index : str or list of str, optional

858 Column(s) to use as the index for the resulting GeoDataFrame. Defaults

859 to ['groundwaterMonitoringWell', 'tubeNumber'] if not provided.

860

861 Returns

862 -------

863 gpd.GeoDataFrame

864 GeoDataFrame of combined well and tube properties

865 """

866 desc = "Downloading Groundwater Monitoring Wells"

867 gmws = bro._get_data_for_bro_ids(

868 GroundwaterMonitoringWell, bro_ids, desc=desc, **kwargs

869 )

870 gdf = get_tube_gdf(gmws, index=index)

871 return gdf

872

873

874cl = GroundwaterMonitoringWell

875

876get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl)

877get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__

878

879get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl)

880get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__

881

882get_characteristics = partial(bro._get_characteristics, cl)

883get_characteristics.__doc__ = bro._get_characteristics.__doc__

Coverage for brodata / gmw.py: 74%

332 statements