Coverage for brodata / dino.py: 75%

586 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-20 14:37 +0000

1import logging 

2import os 

3from io import BytesIO, StringIO, TextIOWrapper 

4from pathlib import Path 

5from zipfile import ZipFile 

6 

7import numpy as np 

8import pandas as pd 

9import geopandas as gpd 

10import requests 

11import json 

12from shapely.geometry import LineString 

13import matplotlib.pyplot as plt 

14 

15from . import util 

16from .webservices import get_configuration, get_gdf 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21def objects_to_gdf( 

22 objects, 

23 x="X-coordinaat", 

24 y="Y-coordinaat", 

25 geometry=None, 

26 index=None, 

27 to_gdf=True, 

28): 

29 """ 

30 Convert a dictionary of dino-objects to a geopandas GeoDataFrame. 

31 

32 Parameters 

33 ---------- 

34 objects: dictionary of bro or dinoloket objects 

35 dictionary of objects to convert to (geo)dataframe 

36 geometry: str 

37 name of column of geometry 

38 x: str 

39 name of column of x-coordinate 

40 y: str 

41 name of column of y-coordinate 

42 index: str or list of str 

43 name of column to use as index 

44 to_gdf: bool 

45 convert to geodataframe 

46 

47 Returns 

48 ------- 

49 gdf: GeoDataFrame or DataFrame 

50 Returns a GeoDataFrame if to_gdf is True, otherwise a DataFrame 

51 """ 

52 

53 if not to_gdf: 

54 return objects 

55 

56 # convert a list of dino-objects to a geodataframe 

57 df = pd.DataFrame([objects[key].to_dict() for key in objects]) 

58 if geometry is not None: 

59 if geometry in df.columns: 

60 geometry = df[geometry] 

61 else: 

62 geometry = None 

63 else: 

64 if df.empty: 

65 logger.warning("no data found") 

66 else: 

67 if x not in df: 

68 logger.warning(f"{x} not found in data. No geometry column created.") 

69 elif y not in df: 

70 logger.warning(f"{y} not found in data. No geometry column created.") 

71 else: 

72 geometry = gpd.points_from_xy(df[x], df[y]) 

73 gdf = gpd.GeoDataFrame(df, geometry=geometry) 

74 if index is not None and not gdf.empty: 

75 if isinstance(index, str): 

76 if index in gdf.columns: 

77 gdf = gdf.set_index(index) 

78 elif np.all([x in gdf.columns for x in index]): 

79 # we assume index is an iterable (list), to form a MultiIndex 

80 gdf = gdf.set_index(index) 

81 return gdf 

82 

83 

84def _get_data_within_extent( 

85 dino_cl, 

86 kind, 

87 extent, 

88 config=None, 

89 timeout=5, 

90 silent=False, 

91 to_path=None, 

92 to_zip=None, 

93 redownload=False, 

94 x="X-coordinaat", 

95 y="Y-coordinaat", 

96 geometry=None, 

97 index="NITG-nr", 

98 to_gdf=True, 

99 max_retries=2, 

100 continue_on_error=False, 

101 progress_callback=None, 

102): 

103 """Retrieve DINO data within a specified geographical extent or from local files. 

104 

105 This is a core function used by various data retrieval methods in the DINO system. 

106 It can either load data from local files/archives or fetch it from the DINO server 

107 based on geographical extent. 

108 

109 Parameters 

110 ---------- 

111 dino_cl : class 

112 The DINO data class to instantiate for each location (e.g., Grondwaterstand). 

113 kind : str 

114 The type of DINO data to retrieve (e.g., "Grondwaterstand", "Boorgatmeting"). 

115 extent : str, Path, or sequence 

116 Either a path to local data, or a sequence of [xmin, xmax, ymin, ymax] 

117 coordinates. 

118 config : dict, optional 

119 Configuration mapping for DINO data kinds. Uses default if None. 

120 timeout : int or float, optional. 

121 Timeout in seconds for network requests when downloading data. The default is 5. 

122 silent : bool, default=False 

123 If True, suppress progress output. 

124 to_path : str, optional 

125 Directory to save downloaded files. Created if it doesn't exist. 

126 to_zip : str, optional 

127 Path to save downloaded files in a zip archive. 

128 redownload : bool, optional 

129 If True, redownload data even if local files exist. The default is False. 

130 x : str, optional 

131 Name of the x-coordinate column. The default is "X-coordinaat". 

132 y : str, optional 

133 Name of the y-coordinate column. The default is "Y-coordinaat". 

134 geometry : str, optional 

135 Name of the geometry column if different from creating from x,y coordinates. 

136 index : str, optional 

137 Column(s) to use as index in the output GeoDataFrame. The default is "NITG-nr". 

138 to_gdf : bool, optional 

139 If True, return a GeoDataFrame; if False, return raw dictionary of objects. The 

140 default is True 

141 max_retries : int, optional 

142 Maximum number of retries for failed network requests. The default is 2. 

143 continue_on_error : bool, optional 

144 If True, continue after an error occurs during downloading or processing of 

145 individual observation data. Defaults to False. 

146 progress_callback : function, optional 

147 A callback function that takes two arguments (current, total) to report 

148 progress. If None, no progress reporting is done. Defaults to None. 

149 

150 Returns 

151 ------- 

152 geopandas.GeoDataFrame or dict 

153 If to_gdf is True, returns a GeoDataFrame with the requested data. 

154 If to_gdf is False, returns a dictionary of DINO objects. 

155 """ 

156 if isinstance(extent, (str, Path)): 

157 data = _get_data_from_path(extent, dino_cl, silent=silent, progress_callback=progress_callback) 

158 return objects_to_gdf(data, x, y, geometry, index, to_gdf) 

159 

160 if to_zip is not None: 

161 if not redownload and os.path.isfile(to_zip): 

162 data = _get_data_from_zip(to_zip, dino_cl, silent=silent, extent=extent, progress_callback=progress_callback) 

163 return objects_to_gdf(data, x, y, geometry, index, to_gdf) 

164 if to_path is None: 

165 to_path = os.path.splitext(to_zip)[0] 

166 remove_path_again = not os.path.isdir(to_path) 

167 files = [] 

168 

169 if config is None: 

170 config = get_configuration() 

171 

172 if to_path is not None and not os.path.isdir(to_path): 

173 os.makedirs(to_path) 

174 

175 to_file = None 

176 gdf = None 

177 if to_path is not None: 

178 to_file = os.path.join(to_path, f"{dino_cl.__name__}.geojson") 

179 if to_zip is not None: 

180 files.append(to_file) 

181 if not redownload and os.path.isfile(to_file): 

182 gdf = gpd.read_file(to_file) 

183 if not gdf.empty and "DINO_NR" in gdf.columns: 

184 gdf = gdf.set_index("DINO_NR") 

185 if gdf is None: 

186 gdf = get_gdf( 

187 kind, 

188 config=config, 

189 extent=extent, 

190 timeout=timeout, 

191 ) 

192 if to_file is not None: 

193 gdf.to_file(to_file) 

194 

195 to_file = None 

196 

197 data = {} 

198 for i, dino_nr in util.tqdm(enumerate(gdf.index), disable=silent): 

199 if progress_callback is not None: 

200 progress_callback(i, len(gdf)) 

201 if to_path is not None: 

202 to_file = os.path.join(to_path, f"{dino_nr}.csv") 

203 if to_zip is not None: 

204 files.append(to_file) 

205 if not redownload and os.path.isfile(to_file): 

206 data[dino_nr] = dino_cl(to_file) 

207 continue 

208 try: 

209 data[dino_nr] = dino_cl.from_dino_nr( 

210 dino_nr, timeout=timeout, to_file=to_file, max_retries=max_retries 

211 ) 

212 except Exception as e: 

213 if not continue_on_error: 

214 raise e 

215 logger.error("Error retrieving %s %s: %s", kind, dino_nr, e) 

216 continue 

217 if to_zip is not None: 

218 util._save_data_to_zip(to_zip, files, remove_path_again, to_path) 

219 

220 return objects_to_gdf(data, x, y, geometry, index, to_gdf) 

221 

222 

223def _get_data_from_path(from_path, dino_class, silent=False, ext=".csv", progress_callback=None): 

224 if str(from_path).endswith(".zip"): 

225 return _get_data_from_zip(from_path, dino_class, silent=silent, progress_callback=progress_callback) 

226 files = os.listdir(from_path) 

227 files = [file for file in files if file.endswith(ext)] 

228 data = {} 

229 total = len(files) 

230 for i, file in util.tqdm(enumerate(files), total=total, disable=silent): 

231 if progress_callback is not None: 

232 progress_callback(i, total) 

233 fname = os.path.join(from_path, file) 

234 data[os.path.splitext(file)[0]] = dino_class(fname) 

235 return data 

236 

237 

238def _get_data_from_zip(to_zip, dino_class, silent=False, extent=None, progress_callback=None): 

239 # read data from zipfile 

240 data = {} 

241 with ZipFile(to_zip) as zf: 

242 names = zf.namelist() 

243 name = f"{dino_class.__name__}.geojson" 

244 has_location_file = name in names 

245 if has_location_file: 

246 names.remove(name) 

247 if has_location_file and extent is not None: 

248 gdf = gpd.read_file(zf.open(name)) 

249 gdf = gdf.set_index("DINO_NR") 

250 gdf = gdf.cx[extent[0] : extent[1], extent[2] : extent[3]] 

251 names = [f"{name}.csv" for name in gdf.index] 

252 total = len(names) 

253 for i, name in util.tqdm(enumerate(names), total=total, disable=silent): 

254 if progress_callback is not None: 

255 progress_callback(i, total) 

256 data[name] = dino_class(name, zipfile=zf) 

257 return data 

258 

259 

260def get_verticaal_elektrisch_sondeeronderzoek(extent, **kwargs): 

261 dino_class = VerticaalElektrischSondeeronderzoek 

262 kind = "Verticaal elektrisch sondeeronderzoek" 

263 return _get_data_within_extent( 

264 dino_class, kind, extent, geometry="geometry", **kwargs 

265 ) 

266 

267 

268def get_grondwaterstand( 

269 extent, 

270 config=None, 

271 timeout=5, 

272 silent=False, 

273 to_path=None, 

274 to_zip=None, 

275 redownload=False, 

276 to_gdf=True, 

277 skip=None, 

278 continue_on_error=False, 

279 progress_callback=None, 

280): 

281 """ 

282 Get groundwater level (Grondwaterstand) data as a GeoDataFrame or raw objects. 

283 

284 Fetch Grondwaterstand data for a given geographical extent or load it from local 

285 files. Data are retrieved per monitoring location and per piezometer. Results can 

286 be returned as a GeoDataFrame or as a dictionary of Grondwaterstand objects. 

287 

288 Parameters 

289 ---------- 

290 extent : str or sequence 

291 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data. 

292 config : dict, optional 

293 Configuration mapping for available DINO data kinds. If None, a default 

294 configuration is used. 

295 timeout : int or float, optional 

296 Timeout in seconds for network requests when downloading data. The default is 5. 

297 silent : bool, optional 

298 If True, suppress progress output. 

299 to_path : str, optional 

300 If not None, save the downloaded files in the directory named to_path. The 

301 default is None. 

302 to_zip : str, optional 

303 If not None, save the downloaded files in a zip-file named to_zip. The default 

304 is None. 

305 redownload : bool, optional 

306 When downloaded files exist in to_path or to_zip, read from these files when 

307 redownload is False. If redownload is True, download the data again from the 

308 DINO-server. The default is False. 

309 to_gdf : bool, optional 

310 If True (default), convert the loaded Grondwaterstand objects into a 

311 geopandas.GeoDataFrame. If False, return the raw mapping of objects. 

312 skip : str or iterable, optional 

313 Name or iterable of location names to skip during download or processing. 

314 continue_on_error : bool, optional 

315 If True, continue after an error occurs during downloading or processing of 

316 individual observation data. Defaults to False. 

317 progress_callback : function, optional 

318 A callback function that takes two arguments (current, total) to report 

319 progress. If None, no progress reporting is done. Defaults to None. 

320 

321 Returns 

322 ------- 

323 geopandas.GeoDataFrame or dict 

324 If `to_gdf` is True, returns a GeoDataFrame indexed by ['Locatie', 

325 'Filternummer']. If False, returns a dictionary with Grondwaterstand objects. 

326 

327 Notes 

328 ----- 

329 - When `extent` is a path string, this function loads local data. 

330 - When `to_zip` is provided, the function will create a temporary directory and 

331 archive files into the supplied ZIP. 

332 """ 

333 dino_class = Grondwaterstand 

334 index = ["Locatie", "Filternummer"] 

335 if skip is not None and isinstance(skip, str): 

336 skip = [skip] 

337 

338 if isinstance(extent, str): 

339 data = _get_data_from_path(extent, dino_class, silent=silent, progress_callback=progress_callback) 

340 return objects_to_gdf(data, index=index, to_gdf=to_gdf) 

341 

342 if to_zip is not None: 

343 if not redownload and os.path.isfile(to_zip): 

344 data = _get_data_from_zip(to_zip, dino_class, silent=silent, progress_callback=progress_callback) 

345 return objects_to_gdf(data, index=index, to_gdf=to_gdf) 

346 if to_path is None: 

347 to_path = os.path.splitext(to_zip)[0] 

348 remove_path_again = not os.path.isdir(to_path) 

349 files = [] 

350 

351 kind = "Grondwaterstand" 

352 if config is None: 

353 config = get_configuration() 

354 gdf = get_gdf( 

355 kind, 

356 config=config, 

357 extent=extent, 

358 timeout=timeout, 

359 ) 

360 download_url = config[kind]["download"] 

361 

362 to_file = None 

363 if to_path is not None and not os.path.isdir(to_path): 

364 os.makedirs(to_path) 

365 data = {} 

366 for i, name in util.tqdm(enumerate(gdf.index), disable=silent): 

367 if progress_callback is not None: 

368 progress_callback(i, len(gdf)) 

369 if skip is not None and name in skip: 

370 continue 

371 for i_st in range(1, gdf.at[name, "ST_CNT"] + 1): 

372 piezometer_nr = f"{i_st:03d}" 

373 url = f"{download_url}/{name}/{piezometer_nr}" 

374 if to_path is not None: 

375 to_file = os.path.join(to_path, f"{name}_{piezometer_nr}.csv") 

376 if to_zip is not None: 

377 files.append(to_file) 

378 if not redownload and os.path.isfile(to_file): 

379 data[f"{name}_{piezometer_nr}"] = dino_class(to_file) 

380 continue 

381 try: 

382 data[f"{name}_{piezometer_nr}"] = dino_class( 

383 url, timeout=timeout, to_file=to_file 

384 ) 

385 except Exception as e: 

386 if not continue_on_error: 

387 raise e 

388 logger.error( 

389 "Error retrieving %s %s piezometer %s: %s", 

390 kind, 

391 name, 

392 piezometer_nr, 

393 e, 

394 ) 

395 continue 

396 if to_zip is not None: 

397 util._save_data_to_zip(to_zip, files, remove_path_again, to_path) 

398 return objects_to_gdf( 

399 data, index=index, to_gdf=to_gdf, x="X-coordinaat", y="Y-coordinaat" 

400 ) 

401 

402 

403def get_grondwatersamenstelling(extent, **kwargs): 

404 dino_class = Grondwatersamenstelling 

405 kind = "Grondwatersamenstelling" 

406 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

407 

408 

409def get_geologisch_booronderzoek(extent, **kwargs): 

410 logger.warning( 

411 "`get_geologisch_booronderzoek` is deprecated. Use `get_boormonsterprofiel` instead" 

412 ) 

413 dino_class = GeologischBooronderzoek 

414 kind = "Geologisch booronderzoek" 

415 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

416 

417 

418def get_boormonsterprofiel(extent, **kwargs): 

419 dino_class = Boormonsterprofiel 

420 kind = "Boormonsterprofiel" 

421 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

422 

423 

424def get_boorgatmeting(extent, **kwargs): 

425 dino_class = Boorgatmeting 

426 kind = "Boorgatmeting" 

427 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

428 

429 

430def get_chemische_analyse(extent, **kwargs): 

431 dino_class = ChemischeAnalyse 

432 kind = "Chemische analyse" 

433 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

434 

435 

436def get_korrelgrootte_analyse(extent, **kwargs): 

437 dino_class = KorrelgrootteAnalyse 

438 kind = "Korrelgrootte analyse" 

439 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

440 

441 

442def get_oppervlaktewaterstand(extent, **kwargs): 

443 dino_class = Oppervlaktewaterstand 

444 kind = "Oppervlaktewateronderzoek" 

445 return _get_data_within_extent(dino_class, kind, extent, **kwargs) 

446 

447 

448class CsvFileOrUrl: 

449 def __init__( 

450 self, 

451 url_or_file, 

452 zipfile=None, 

453 timeout=5, 

454 to_file=None, 

455 redownload=True, 

456 max_retries=2, 

457 ): 

458 if zipfile is not None: 

459 with zipfile.open(url_or_file) as f: 

460 self._read_contents(TextIOWrapper(f)) 

461 elif url_or_file.startswith("http"): 

462 if redownload or to_file is None or not os.path.isfile(to_file): 

463 if max_retries > 1: 

464 adapter = requests.adapters.HTTPAdapter(max_retries=max_retries) 

465 session = requests.Session() 

466 session.mount("https://", adapter) 

467 req = session.get(url_or_file, timeout=timeout) 

468 else: 

469 req = requests.get(url_or_file, timeout=timeout) 

470 if not req.ok: 

471 raise (Exception((f"Retieving data from {url_or_file} failed"))) 

472 is_zipfile = False 

473 if "content-disposition" in req.headers: 

474 if req.headers["content-disposition"].endswith(".zip"): 

475 is_zipfile = True 

476 if is_zipfile: 

477 # BoorgatMetingen are las files that are delivered in a zip-file 

478 with ZipFile(BytesIO(req.content)) as myzip: 

479 files = myzip.namelist() 

480 files = [f for f in files if f.endswith(".las")] 

481 assert len(files) == 1, "Only one file in the zipfile supported" 

482 with myzip.open(files[0]) as myfile: 

483 if to_file is not None: 

484 with open(to_file, "wb") as f: 

485 f.write(myfile.read()) 

486 self._read_contents(TextIOWrapper(myfile)) 

487 else: 

488 if to_file is not None: 

489 with open(to_file, "w") as f: 

490 f.write(req.text) 

491 self._read_contents(StringIO(req.text)) 

492 else: 

493 with open(to_file, "r") as f: 

494 self._read_contents(f) 

495 else: 

496 with open(url_or_file, "r") as f: 

497 self._read_contents(f) 

498 

499 def __repr__(self): 

500 # retrieve properties if they exist 

501 propdict = {"NITG-nr": "NITG-nr", "X-coordinaat": "x", "Y-coordinaat": "y"} 

502 props = {} 

503 for key in propdict: 

504 if hasattr(self, key): 

505 props[propdict[key]] = getattr(self, key) 

506 name = util._format_repr(self, props) 

507 return name 

508 

509 @classmethod 

510 def from_dino_nr(cls, dino_nr, **kwargs): 

511 if not hasattr(cls, "_download_url"): 

512 raise (NotImplementedError(f"No download-url defined for {cls.__name__}")) 

513 return cls(f"{cls._download_url}/{dino_nr}", **kwargs) 

514 

515 def _read_properties_csv_rows(self, f, merge_columns=False, **kwargs): 

516 # this is the new format of properties from dinoloket 

517 df, line = self._read_csv_part(f, header=None, index_col=0, **kwargs) 

518 # remove empty columns 

519 df = df.loc[:, ~df.isna().all(axis=0)] 

520 if merge_columns: 

521 for index in df.index: 

522 df.at[index, 1] = " ".join(df.loc[index, ~df.loc[index].isna()].values) 

523 df = df.loc[:, :1] 

524 else: 

525 assert df.shape[1] == 1 

526 d = df.squeeze().to_dict() 

527 return d, line 

528 

529 def _read_properties_csv_columns(self, f, **kwargs): 

530 df, line = self._read_csv_part(f, **kwargs) 

531 assert df.shape[0] == 1 

532 d = df.squeeze().to_dict() 

533 return d, line 

534 

535 def _read_csv_part(self, f, sep=",", header=0, index_col=False, **kwargs): 

536 strt = f.tell() 

537 if header is None: 

538 nrows = 0 

539 else: 

540 nrows = -1 # the header does not count 

541 line = f.readline() 

542 while line.replace(",", "") not in ["\n", ""]: 

543 nrows += 1 

544 line = f.readline() 

545 eind = f.tell() 

546 # go back to where we were before 

547 f.seek(strt) 

548 df = pd.read_csv( 

549 f, sep=sep, index_col=index_col, nrows=nrows, header=header, **kwargs 

550 ) 

551 if header is not None: 

552 df = df.loc[:, ~df.columns.str.startswith("Unnamed: ")] 

553 f.seek(eind) 

554 

555 if line != "": 

556 # read empty lines gat 

557 while line.replace(",", "") == "\n": 

558 new_start = f.tell() 

559 line = f.readline() 

560 f.seek(new_start) 

561 

562 return df, line 

563 

564 

565class Oppervlaktewaterstand(CsvFileOrUrl): 

566 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/owo/full" 

567 

568 def __repr__(self): 

569 # retrieve properties if they exist 

570 

571 props = {} 

572 if hasattr(self, "meta") and not self.meta.empty: 

573 s = self.meta.iloc[-1] 

574 propdict = {"Locatie": "Locatie", "X-coordinaat": "x", "Y-coordinaat": "y"} 

575 for key in propdict: 

576 if key in s: 

577 props[propdict[key]] = s[key] 

578 name = util._format_repr(self, props) 

579 return name 

580 

581 def _read_contents(self, f): 

582 self.props, line = self._read_properties_csv_rows(f, merge_columns=True) 

583 if line.startswith( 

584 '"Van deze put zijn geen standen opgenomen in de DINO-database"' 

585 ): 

586 return 

587 self.meta, line = self._read_csv_part(f) 

588 self.data, line = self._read_csv_part(f) 

589 for column in ["Peildatum"]: 

590 if column in self.data.columns: 

591 self.data[column] = pd.to_datetime(self.data[column], dayfirst=True) 

592 

593 def to_dict(self): 

594 d = {**self.props} 

595 if hasattr(self, "meta"): 

596 d["meta"] = self.meta 

597 for column in d["meta"]: 

598 d[column] = d["meta"][column].iloc[-1] 

599 if hasattr(self, "data"): 

600 d["data"] = self.data 

601 return d 

602 

603 

604class Grondwaterstand(CsvFileOrUrl): 

605 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/gwo/full" 

606 

607 @classmethod 

608 def from_dino_nr(cls, dino_nr, filter_nr, **kwargs): 

609 return cls(f"{cls._download_url}/{dino_nr}/{filter_nr:03d}", **kwargs) 

610 

611 def __repr__(self): 

612 # retrieve properties if they exist 

613 

614 props = {} 

615 if hasattr(self, "meta") and not self.meta.empty: 

616 s = self.meta.iloc[-1] 

617 propdict = { 

618 "Locatie": "Locatie", 

619 "Filternummer": "filter", 

620 "X-coordinaat": "x", 

621 "Y-coordinaat": "y", 

622 } 

623 for key in propdict: 

624 if key in s: 

625 props[propdict[key]] = s[key] 

626 name = util._format_repr(self, props) 

627 return name 

628 

629 def _read_contents(self, f): 

630 self.props, line = self._read_properties_csv_rows(f, merge_columns=True) 

631 self.props2, line = self._read_properties_csv_rows(f) 

632 if line.startswith( 

633 '"Van deze put zijn geen standen opgenomen in de DINO-database"' 

634 ): 

635 return 

636 if "Peildatum" not in line: 

637 self.meta, line = self._read_csv_part(f) 

638 self.data, line = self._read_csv_part(f) 

639 for column in ["Peildatum"]: 

640 if column in self.data.columns: 

641 self.data[column] = pd.to_datetime(self.data[column], dayfirst=True) 

642 

643 def to_dict(self): 

644 d = {**self.props, **self.props2} 

645 if hasattr(self, "meta"): 

646 d["meta"] = self.meta 

647 for column in d["meta"]: 

648 d[column] = d["meta"][column].iloc[-1] 

649 if hasattr(self, "data"): 

650 d["data"] = self.data 

651 return d 

652 

653 

654class Grondwatersamenstelling(CsvFileOrUrl): 

655 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/gwo/qua/report" 

656 

657 def _read_contents(self, f): 

658 # read first line and place cursor at start of document again 

659 start = f.tell() 

660 line = f.readline().rstrip("\n") 

661 f.seek(start) 

662 

663 # LOCATIE gegevens 

664 if line.startswith('"LOCATIE gegevens"'): 

665 line = f.readline() 

666 self.locatie_gegevens, line = self._read_properties_csv_columns(f) 

667 for key in self.locatie_gegevens: 

668 setattr(self, key, self.locatie_gegevens[key]) 

669 

670 # KWALITEIT gegevens VLOEIBAAR 

671 if line.startswith('"KWALITEIT gegevens VLOEIBAAR"'): 

672 line = f.readline() 

673 self.kwaliteit_gegevens_vloeibaar, line = self._read_csv_part(f) 

674 for column in ["Monster datum", "Analyse datum"]: 

675 if column in self.kwaliteit_gegevens_vloeibaar.columns: 

676 self.kwaliteit_gegevens_vloeibaar[column] = pd.to_datetime( 

677 self.kwaliteit_gegevens_vloeibaar[column], dayfirst=True 

678 ) 

679 

680 def to_dict(self): 

681 d = {**self.locatie_gegevens} 

682 if hasattr(self, "kwaliteit_gegevens_vloeibaar"): 

683 d["kwaliteit_gegevens_vloeibaar"] = self.kwaliteit_gegevens_vloeibaar 

684 return d 

685 

686 

687class Boormonsterprofiel(CsvFileOrUrl): 

688 _download_url = ( 

689 "https://www.dinoloket.nl/uitgifteloket/api/brh/sampledescription/csv" 

690 ) 

691 

692 def _read_contents(self, f): 

693 # read first line and place cursor at start of document again 

694 start = f.tell() 

695 line = f.readline().rstrip("\n") 

696 f.seek(start) 

697 if line.startswith('"ALGEMENE GEGEVENS BORING"'): 

698 line = f.readline() 

699 self.algemene_gegevens_boring, line = self._read_properties_csv_columns(f) 

700 for key in self.algemene_gegevens_boring: 

701 setattr(self, key, self.algemene_gegevens_boring[key]) 

702 if line.startswith('"ALGEMENE GEGEVENS LITHOLOGIE"'): 

703 line = f.readline() 

704 self.algemene_gegevens_lithologie, line = self._read_properties_csv_columns( 

705 f 

706 ) 

707 if line.startswith('"LITHOLOGIE LAGEN"'): 

708 line = f.readline() 

709 self.lithologie_lagen, line = self._read_csv_part(f) 

710 if line.startswith('"LITHOLOGIE SUBLAGEN"'): 

711 line = f.readline() 

712 self.lithologie_sublagen, line = self._read_csv_part(f) 

713 

714 def to_dict(self): 

715 d = {**self.algemene_gegevens_boring} 

716 if hasattr(self, "algemene_gegevens_lithologie"): 

717 for key in self.algemene_gegevens_boring: 

718 if key in self.algemene_gegevens_lithologie: 

719 # 'Datum boring' can be specified in algemene_gegevens_boring and algemene_gegevens_lithologie 

720 if pd.isna(self.algemene_gegevens_lithologie[key]): 

721 self.algemene_gegevens_lithologie.pop(key) 

722 d = {**d, **self.algemene_gegevens_lithologie} 

723 if hasattr(self, "lithologie_lagen"): 

724 d["lithologie_lagen"] = self.lithologie_lagen 

725 if hasattr(self, "lithologie_sublagen"): 

726 d["lithologie_sublagen"] = self.lithologie_sublagen 

727 return d 

728 

729 

730def get_drilling_from_dinoloket( 

731 name, 

732 column_type=None, 

733 depthReference="NAP", 

734 language="nl", 

735 return_response=False, 

736 ignore_exceptions=False, 

737): 

738 """ 

739 Get a drilling from dinoloket. 

740 

741 This method uses the information from the webservice used by dinoloket for 

742 displaying the drilling. In this way, also lithostratigraphy-data can be returned, 

743 which is not present in the data downloaded as a csv-file by `Boormonsterprofiel`. 

744 

745 Parameters 

746 ---------- 

747 name : str 

748 The name of the drilling. 

749 column_type : str, optional 

750 The type of data that is returned. Possible options are "LITHOLOGY" and 

751 "LITHOSTRATIGRAPHY" and None. If column_type is None, return a dictionary with 

752 all data. The default is None. 

753 depthReference : str, optional 

754 Possible values are "NAP" and "MV". The default is "NAP". 

755 language : str of length 2, optional 

756 Possible values are "nl" for Ducth and "en" for English. When language is not 

757 'nl' or 'en', english is returned. The default is "nl". 

758 return_response : bool, optional 

759 Return the json-respons of the web-service without any interpretation. The 

760 default is False. 

761 ignore_exceptions : bool, optional 

762 When True, ignore exceptions when things go wrong. This is usefull when 

763 requesting multiple drillings. The default is False. 

764 

765 Returns 

766 ------- 

767 df or dict 

768 A dictionary or a DataFarme (when column_type is set) containing the drilling 

769 data. 

770 """ 

771 # columnType is 'LITHOSTRATIGRAPHY' or 'LITHOLOGY' 

772 url = "https://www.dinoloket.nl/javascriptmapviewer-web/rest/brh/profile" 

773 payload = {"dinoId": name, "depthReference": depthReference, "language": language} 

774 req = requests.post( 

775 url, data=json.dumps(payload), headers={"content-type": "application/json"} 

776 ) 

777 if not req.ok: 

778 msg = f"Retieving data from {url} failed" 

779 if ignore_exceptions: 

780 logger.error(msg) 

781 return None 

782 else: 

783 raise (Exception(msg)) 

784 data = json.loads(req.content) 

785 if return_response: 

786 return data 

787 if "status" in data.keys(): 

788 if data["status"] == 500: 

789 msg = "Drilling {} could not be downloaded ".format(name) 

790 if ignore_exceptions: 

791 logger.error(msg) 

792 return None 

793 else: 

794 raise Exception(msg) 

795 

796 for column in data["columns"]: 

797 if column_type is None or column["columnType"] == column_type: 

798 ls = [] 

799 for meta in column["profileMetadata"]: 

800 di = {} 

801 for layerInfo in meta["layerInfos"]: 

802 di[layerInfo["code"]] = layerInfo["value"] 

803 ls.append(di) 

804 df = pd.DataFrame(ls) 

805 top = [] 

806 botm = [] 

807 for depth in df["DEPTH"]: 

808 depths = depth.replace("m", "").split(" - ") 

809 top.append(float(depths[0])) 

810 botm.append(float(depths[1])) 

811 df.insert(loc=0, column="top", value=top) 

812 df.insert(loc=1, column="botm", value=botm) 

813 df = df.drop("DEPTH", axis=1) 

814 if column_type is None: 

815 data[column["columnType"]] = df 

816 else: 

817 return df 

818 if column_type is None: 

819 data.pop("columns") 

820 return data 

821 else: 

822 msg = "Column {} not present -> {}".format(column_type, name) 

823 if ignore_exceptions: 

824 logger.error(msg) 

825 return None 

826 else: 

827 raise Exception(msg) 

828 

829 

830class GeologischBooronderzoek(Boormonsterprofiel): 

831 # In brodata, Boormonsterprofiel used to be called GeologischBooronderzoek. 

832 # Therefore, this is a copy of GeologischBooronderzoek, for backwards compatibility 

833 pass 

834 

835 

836class Boorgatmeting(CsvFileOrUrl): 

837 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/brh/log/las" 

838 

839 def __repr__(self): 

840 # retrieve properties if they exist 

841 

842 props = {} 

843 if hasattr(self, "las") and "Well" in self.las.header: 

844 items = self.las.header["Well"] 

845 for item in items: 

846 props[item.descr] = item.value 

847 name = util._format_repr(self, props) 

848 return name 

849 

850 def _read_contents(self, f): 

851 import lasio 

852 

853 self.las = lasio.read(f) 

854 

855 def to_dict(self): 

856 import lasio 

857 

858 return lasio.las.JSONEncoder().default(self.las) 

859 

860 def plot(self, ax=None, columns=None, z=0.0, **kwargs): 

861 if ax is None: 

862 import matplotlib.pyplot as plt 

863 

864 ax = plt.gca() 

865 df = self.las.df() 

866 if columns is None: 

867 columns = df.columns 

868 elif isinstance(columns, str): 

869 columns = [columns] 

870 

871 for column in df.columns: 

872 # df.reset_index().plot(y="DEPTH", x=column) 

873 ax.plot(df[column], z - df.index, label=column, **kwargs) 

874 return ax 

875 

876 

877class ChemischeAnalyse(CsvFileOrUrl): 

878 _download_url = ( 

879 "https://www.dinoloket.nl/uitgifteloket/api/brh/chemicalanalysis/csv" 

880 ) 

881 

882 def _read_contents(self, f): 

883 # read first line and place cursor at start of document again 

884 start = f.tell() 

885 line = f.readline().rstrip("\n") 

886 f.seek(start) 

887 

888 # LOCATIE gegevens 

889 if line.startswith('"LOCATIE gegevens"'): 

890 line = f.readline() 

891 self.locatie_gegevens, line = self._read_properties_csv_columns(f) 

892 for key in self.locatie_gegevens: 

893 setattr(self, key, self.locatie_gegevens[key]) 

894 

895 # KWALITEIT gegevens VLOEIBAAR 

896 if line.startswith('"KWALITEIT gegevens VAST"'): 

897 line = f.readline() 

898 self.kwaliteit_gegevens_vast, line = self._read_csv_part(f) 

899 for column in ["Monster datum", "Analyse datum"]: 

900 if column in self.kwaliteit_gegevens_vast.columns: 

901 self.kwaliteit_gegevens_vast[column] = pd.to_datetime( 

902 self.kwaliteit_gegevens_vast[column], dayfirst=True 

903 ) 

904 

905 def to_dict(self): 

906 d = {**self.locatie_gegevens} 

907 if hasattr(self, "kwaliteit_gegevens_vast"): 

908 d["kwaliteit_gegevens_vast"] = self.kwaliteit_gegevens_vast 

909 return d 

910 

911 

912class KorrelgrootteAnalyse(ChemischeAnalyse): 

913 _download_url = ( 

914 "https://www.dinoloket.nl/uitgifteloket/api/brh/grainsizeanalysis/csv" 

915 ) 

916 

917 

918class VerticaalElektrischSondeeronderzoek(CsvFileOrUrl): 

919 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/ves/csv" 

920 

921 # Read a VES-file 

922 def _read_contents(self, f): 

923 # read first line and place cursor at start of document again 

924 start = f.tell() 

925 line = f.readline().rstrip("\n") 

926 f.seek(start) 

927 

928 # VES Overzicht 

929 if line.startswith('"VES Overzicht"'): 

930 line = f.readline() 

931 self.ves_overzicht, line = self._read_properties_csv_columns(f) 

932 for key in self.ves_overzicht: 

933 setattr(self, key, self.ves_overzicht[key]) 

934 

935 # Kop 

936 if line.startswith('"Kop"'): 

937 line = f.readline() 

938 self.kop, line = self._read_properties_csv_columns(f) 

939 

940 if line.startswith('"Data"'): 

941 line = f.readline() 

942 self.data, line = self._read_csv_part(f) 

943 

944 self.interpretatie_door_tno_nitg = [] 

945 self.interpretaties = [] 

946 

947 while line.startswith('"Interpretatie door: TNO-NITG"'): 

948 # Interpretatie door: TNO-NITG 

949 line = f.readline() 

950 df, line = self._read_properties_csv_columns(f) 

951 self.interpretatie_door_tno_nitg.append(df) 

952 

953 # Interpretaties 

954 if line.startswith('"Interpretaties"'): 

955 line = f.readline() 

956 df, line = self._read_csv_part(f) 

957 self.interpretaties.append(df) 

958 

959 def to_dict(self): 

960 d = {**self.ves_overzicht, **self.kop} 

961 if hasattr(self, "data"): 

962 d["data"] = self.data 

963 d["Aantal interpretaties"] = len(self.interpretaties) 

964 if len(self.interpretatie_door_tno_nitg) > 0: 

965 # only take the first interpretatie_door_tno_nitg, as the data will not fit in a DataFrame 

966 d["interpretatie_door_tno_nitg"] = self.interpretatie_door_tno_nitg[0] 

967 if len(self.interpretaties) > 0: 

968 # only take the first interpretation, as the data will not fit in a DataFrame 

969 d["interpretaties"] = self.interpretaties[0] 

970 if ( 

971 "Richting" in d 

972 and "Maximale elektrode afstand L2" in d 

973 and "X-coordinaat" in d 

974 and "Y-coordinaat" in d 

975 ): 

976 angle = (d["Richting"] - 90) * np.pi / 180 

977 x = d["X-coordinaat"] 

978 y = d["Y-coordinaat"] 

979 dx = -np.cos(angle) * d["Maximale elektrode afstand L2"] 

980 dy = np.sin(angle) * d["Maximale elektrode afstand L2"] 

981 d["geometry"] = LineString([(x + dx, y + dy), (x - dx, y - dy)]) 

982 return d 

983 

984 def plot_interpretaties( 

985 self, nr=None, ax=None, top=0, bot=None, negative_depth=True, **kwargs 

986 ): 

987 """ 

988 Plot interpreted resistance profiles from VES data. 

989 

990 This method visualizes one or more interpretation profiles by plotting the 

991 'Werkelijke weerstand' (actual resistance) against depth as a line (stairs). 

992 

993 Parameters 

994 ---------- 

995 nr : int or None, optional 

996 Index of a specific interpretation to plot. If None (default), all 

997 interpretations in `self.interpretaties` are plotted. 

998 ax : matplotlib.axes.Axes, optional 

999 The matplotlib Axes object to draw the plot on. If None, the current Axes 

1000 (`plt.gca()`) is used. The default is None. 

1001 top : float, optional 

1002 Top depth of the plot in meters. The default is 0. 

1003 bot : float or None, optional 

1004 Bottom depth of the plot in meters. If None (default), it is inferred from 

1005 the data, by setting the length of the last section equal to the length of 

1006 the next to last section. 

1007 negative_depth : bool, optional 

1008 If True (default), depth is plotted as negative (i.e., increasing downwards, 

1009 following geotechnical convention). 

1010 **kwargs : dict, optional 

1011 Additional keyword arguments passed to `matplotlib.axes.Axes.plot` (e.g., 

1012 color, linestyle, label). 

1013 

1014 Returns 

1015 ------- 

1016 ax : matplotlib.axes.Axes 

1017 The Axes object containing the plot. 

1018 """ 

1019 if nr is None: 

1020 dfs = self.interpretaties 

1021 if len(dfs) == 0: 

1022 nitg_nr = getattr(self, "NITG-nr") 

1023 logger.warning(f"No interpretations in {nitg_nr}") 

1024 return 

1025 else: 

1026 dfs = [self.interpretaties[nr]] 

1027 

1028 if ax is None: 

1029 ax = plt.gca() 

1030 

1031 for df in dfs: 

1032 values = df["Werkelijke weerstand"].values 

1033 

1034 edges = df["Bovenkant laag (m)"].values[1:] 

1035 edges = np.vstack((edges, edges)).transpose().ravel() 

1036 edge_top = df["Bovenkant laag (m)"].iloc[0] 

1037 if np.isnan(edge_top): 

1038 edge_top = top 

1039 edge_bot = df["Onderkant laag (m)"].iloc[-1] 

1040 if np.isnan(edge_bot): 

1041 if bot is None or np.isnan(bot): 

1042 edge_bot = df["Bovenkant laag (m)"].iloc[-1] + ( 

1043 df["Bovenkant laag (m)"].iloc[-1] 

1044 - df["Bovenkant laag (m)"].iloc[-2] 

1045 ) 

1046 else: 

1047 edge_bot = bot 

1048 edges = np.hstack((edge_top, edges, edge_bot)) 

1049 

1050 values = np.vstack((values, values)).transpose().ravel() 

1051 

1052 if negative_depth: 

1053 edges = -edges 

1054 

1055 ax.plot(values, edges, **kwargs) 

1056 

1057 return ax