Coverage for brodata/dino.py: 75%

1import logging

2import os

3from io import BytesIO, StringIO, TextIOWrapper

4from pathlib import Path

5from zipfile import ZipFile

7import numpy as np

8import pandas as pd

9import geopandas as gpd

10import requests

11import json

12from shapely.geometry import LineString

13import matplotlib.pyplot as plt

15from . import util

16from .webservices import get_configuration, get_gdf

18logger = logging.getLogger(__name__)

21def objects_to_gdf(

22 objects,

23 x="X-coordinaat",

24 y="Y-coordinaat",

25 geometry=None,

26 index=None,

27 to_gdf=True,

28):

29 """

30 Convert a dictionary of dino-objects to a geopandas GeoDataFrame.

32 Parameters

33 ----------

34 objects: dictionary of bro or dinoloket objects

35 dictionary of objects to convert to (geo)dataframe

36 geometry: str

37 name of column of geometry

38 x: str

39 name of column of x-coordinate

40 y: str

41 name of column of y-coordinate

42 index: str or list of str

43 name of column to use as index

44 to_gdf: bool

45 convert to geodataframe

47 Returns

48 -------

49 gdf: GeoDataFrame or DataFrame

50 Returns a GeoDataFrame if to_gdf is True, otherwise a DataFrame

51 """

53 if not to_gdf:

54 return objects

56 # convert a list of dino-objects to a geodataframe

57 df = pd.DataFrame([objects[key].to_dict() for key in objects])

58 if geometry is not None:

59 if geometry in df.columns:

60 geometry = df[geometry]

61 else:

62 geometry = None

63 else:

64 if df.empty:

65 logger.warning("no data found")

66 else:

67 if x not in df:

68 logger.warning(f"{x} not found in data. No geometry column created.")

69 elif y not in df:

70 logger.warning(f"{y} not found in data. No geometry column created.")

71 else:

72 geometry = gpd.points_from_xy(df[x], df[y])

73 gdf = gpd.GeoDataFrame(df, geometry=geometry)

74 if index is not None and not gdf.empty:

75 if isinstance(index, str):

76 if index in gdf.columns:

77 gdf = gdf.set_index(index)

78 elif np.all([x in gdf.columns for x in index]):

79 # we assume index is an iterable (list), to form a MultiIndex

80 gdf = gdf.set_index(index)

81 return gdf

84def _get_data_within_extent(

85 dino_cl,

86 kind,

87 extent,

88 config=None,

89 timeout=5,

90 silent=False,

91 to_path=None,

92 to_zip=None,

93 redownload=False,

94 x="X-coordinaat",

95 y="Y-coordinaat",

96 geometry=None,

97 index="NITG-nr",

98 to_gdf=True,

99 max_retries=2,

100 continue_on_error=False,

101 progress_callback=None,

102):

103 """Retrieve DINO data within a specified geographical extent or from local files.

104

105 This is a core function used by various data retrieval methods in the DINO system.

106 It can either load data from local files/archives or fetch it from the DINO server

107 based on geographical extent.

108

109 Parameters

110 ----------

111 dino_cl : class

112 The DINO data class to instantiate for each location (e.g., Grondwaterstand).

113 kind : str

114 The type of DINO data to retrieve (e.g., "Grondwaterstand", "Boorgatmeting").

115 extent : str, Path, or sequence

116 Either a path to local data, or a sequence of [xmin, xmax, ymin, ymax]

117 coordinates.

118 config : dict, optional

119 Configuration mapping for DINO data kinds. Uses default if None.

120 timeout : int or float, optional.

121 Timeout in seconds for network requests when downloading data. The default is 5.

122 silent : bool, default=False

123 If True, suppress progress output.

124 to_path : str, optional

125 Directory to save downloaded files. Created if it doesn't exist.

126 to_zip : str, optional

127 Path to save downloaded files in a zip archive.

128 redownload : bool, optional

129 If True, redownload data even if local files exist. The default is False.

130 x : str, optional

131 Name of the x-coordinate column. The default is "X-coordinaat".

132 y : str, optional

133 Name of the y-coordinate column. The default is "Y-coordinaat".

134 geometry : str, optional

135 Name of the geometry column if different from creating from x,y coordinates.

136 index : str, optional

137 Column(s) to use as index in the output GeoDataFrame. The default is "NITG-nr".

138 to_gdf : bool, optional

139 If True, return a GeoDataFrame; if False, return raw dictionary of objects. The

140 default is True

141 max_retries : int, optional

142 Maximum number of retries for failed network requests. The default is 2.

143 continue_on_error : bool, optional

144 If True, continue after an error occurs during downloading or processing of

145 individual observation data. Defaults to False.

146 progress_callback : function, optional

147 A callback function that takes two arguments (current, total) to report

148 progress. If None, no progress reporting is done. Defaults to None.

149

150 Returns

151 -------

152 geopandas.GeoDataFrame or dict

153 If to_gdf is True, returns a GeoDataFrame with the requested data.

154 If to_gdf is False, returns a dictionary of DINO objects.

155 """

156 if isinstance(extent, (str, Path)):

157 data = _get_data_from_path(extent, dino_cl, silent=silent, progress_callback=progress_callback)

158 return objects_to_gdf(data, x, y, geometry, index, to_gdf)

159

160 if to_zip is not None:

161 if not redownload and os.path.isfile(to_zip):

162 data = _get_data_from_zip(to_zip, dino_cl, silent=silent, extent=extent, progress_callback=progress_callback)

163 return objects_to_gdf(data, x, y, geometry, index, to_gdf)

164 if to_path is None:

165 to_path = os.path.splitext(to_zip)[0]

166 remove_path_again = not os.path.isdir(to_path)

167 files = []

168

169 if config is None:

170 config = get_configuration()

171

172 if to_path is not None and not os.path.isdir(to_path):

173 os.makedirs(to_path)

174

175 to_file = None

176 gdf = None

177 if to_path is not None:

178 to_file = os.path.join(to_path, f"{dino_cl.__name__}.geojson")

179 if to_zip is not None:

180 files.append(to_file)

181 if not redownload and os.path.isfile(to_file):

182 gdf = gpd.read_file(to_file)

183 if not gdf.empty and "DINO_NR" in gdf.columns:

184 gdf = gdf.set_index("DINO_NR")

185 if gdf is None:

186 gdf = get_gdf(

187 kind,

188 config=config,

189 extent=extent,

190 timeout=timeout,

191 )

192 if to_file is not None:

193 gdf.to_file(to_file)

194

195 to_file = None

196

197 data = {}

198 for i, dino_nr in util.tqdm(enumerate(gdf.index), disable=silent):

199 if progress_callback is not None:

200 progress_callback(i, len(gdf))

201 if to_path is not None:

202 to_file = os.path.join(to_path, f"{dino_nr}.csv")

203 if to_zip is not None:

204 files.append(to_file)

205 if not redownload and os.path.isfile(to_file):

206 data[dino_nr] = dino_cl(to_file)

207 continue

208 try:

209 data[dino_nr] = dino_cl.from_dino_nr(

210 dino_nr, timeout=timeout, to_file=to_file, max_retries=max_retries

211 )

212 except Exception as e:

213 if not continue_on_error:

214 raise e

215 logger.error("Error retrieving %s %s: %s", kind, dino_nr, e)

216 continue

217 if to_zip is not None:

218 util._save_data_to_zip(to_zip, files, remove_path_again, to_path)

219

220 return objects_to_gdf(data, x, y, geometry, index, to_gdf)

221

222

223def _get_data_from_path(from_path, dino_class, silent=False, ext=".csv", progress_callback=None):

224 if str(from_path).endswith(".zip"):

225 return _get_data_from_zip(from_path, dino_class, silent=silent, progress_callback=progress_callback)

226 files = os.listdir(from_path)

227 files = [file for file in files if file.endswith(ext)]

228 data = {}

229 total = len(files)

230 for i, file in util.tqdm(enumerate(files), total=total, disable=silent):

231 if progress_callback is not None:

232 progress_callback(i, total)

233 fname = os.path.join(from_path, file)

234 data[os.path.splitext(file)[0]] = dino_class(fname)

235 return data

236

237

238def _get_data_from_zip(to_zip, dino_class, silent=False, extent=None, progress_callback=None):

239 # read data from zipfile

240 data = {}

241 with ZipFile(to_zip) as zf:

242 names = zf.namelist()

243 name = f"{dino_class.__name__}.geojson"

244 has_location_file = name in names

245 if has_location_file:

246 names.remove(name)

247 if has_location_file and extent is not None:

248 gdf = gpd.read_file(zf.open(name))

249 gdf = gdf.set_index("DINO_NR")

250 gdf = gdf.cx[extent[0] : extent[1], extent[2] : extent[3]]

251 names = [f"{name}.csv" for name in gdf.index]

252 total = len(names)

253 for i, name in util.tqdm(enumerate(names), total=total, disable=silent):

254 if progress_callback is not None:

255 progress_callback(i, total)

256 data[name] = dino_class(name, zipfile=zf)

257 return data

258

259

260def get_verticaal_elektrisch_sondeeronderzoek(extent, **kwargs):

261 dino_class = VerticaalElektrischSondeeronderzoek

262 kind = "Verticaal elektrisch sondeeronderzoek"

263 return _get_data_within_extent(

264 dino_class, kind, extent, geometry="geometry", **kwargs

265 )

266

267

268def get_grondwaterstand(

269 extent,

270 config=None,

271 timeout=5,

272 silent=False,

273 to_path=None,

274 to_zip=None,

275 redownload=False,

276 to_gdf=True,

277 skip=None,

278 continue_on_error=False,

279 progress_callback=None,

280):

281 """

282 Get groundwater level (Grondwaterstand) data as a GeoDataFrame or raw objects.

283

284 Fetch Grondwaterstand data for a given geographical extent or load it from local

285 files. Data are retrieved per monitoring location and per piezometer. Results can

286 be returned as a GeoDataFrame or as a dictionary of Grondwaterstand objects.

287

288 Parameters

289 ----------

290 extent : str or sequence

291 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data.

292 config : dict, optional

293 Configuration mapping for available DINO data kinds. If None, a default

294 configuration is used.

295 timeout : int or float, optional

296 Timeout in seconds for network requests when downloading data. The default is 5.

297 silent : bool, optional

298 If True, suppress progress output.

299 to_path : str, optional

300 If not None, save the downloaded files in the directory named to_path. The

301 default is None.

302 to_zip : str, optional

303 If not None, save the downloaded files in a zip-file named to_zip. The default

304 is None.

305 redownload : bool, optional

306 When downloaded files exist in to_path or to_zip, read from these files when

307 redownload is False. If redownload is True, download the data again from the

308 DINO-server. The default is False.

309 to_gdf : bool, optional

310 If True (default), convert the loaded Grondwaterstand objects into a

311 geopandas.GeoDataFrame. If False, return the raw mapping of objects.

312 skip : str or iterable, optional

313 Name or iterable of location names to skip during download or processing.

314 continue_on_error : bool, optional

315 If True, continue after an error occurs during downloading or processing of

316 individual observation data. Defaults to False.

317 progress_callback : function, optional

318 A callback function that takes two arguments (current, total) to report

319 progress. If None, no progress reporting is done. Defaults to None.

320

321 Returns

322 -------

323 geopandas.GeoDataFrame or dict

324 If `to_gdf` is True, returns a GeoDataFrame indexed by ['Locatie',

325 'Filternummer']. If False, returns a dictionary with Grondwaterstand objects.

326

327 Notes

328 -----

329 - When `extent` is a path string, this function loads local data.

330 - When `to_zip` is provided, the function will create a temporary directory and

331 archive files into the supplied ZIP.

332 """

333 dino_class = Grondwaterstand

334 index = ["Locatie", "Filternummer"]

335 if skip is not None and isinstance(skip, str):

336 skip = [skip]

337

338 if isinstance(extent, str):

339 data = _get_data_from_path(extent, dino_class, silent=silent, progress_callback=progress_callback)

340 return objects_to_gdf(data, index=index, to_gdf=to_gdf)

341

342 if to_zip is not None:

343 if not redownload and os.path.isfile(to_zip):

344 data = _get_data_from_zip(to_zip, dino_class, silent=silent, progress_callback=progress_callback)

345 return objects_to_gdf(data, index=index, to_gdf=to_gdf)

346 if to_path is None:

347 to_path = os.path.splitext(to_zip)[0]

348 remove_path_again = not os.path.isdir(to_path)

349 files = []

350

351 kind = "Grondwaterstand"

352 if config is None:

353 config = get_configuration()

354 gdf = get_gdf(

355 kind,

356 config=config,

357 extent=extent,

358 timeout=timeout,

359 )

360 download_url = config[kind]["download"]

361

362 to_file = None

363 if to_path is not None and not os.path.isdir(to_path):

364 os.makedirs(to_path)

365 data = {}

366 for i, name in util.tqdm(enumerate(gdf.index), disable=silent):

367 if progress_callback is not None:

368 progress_callback(i, len(gdf))

369 if skip is not None and name in skip:

370 continue

371 for i_st in range(1, gdf.at[name, "ST_CNT"] + 1):

372 piezometer_nr = f"{i_st:03d}"

373 url = f"{download_url}/{name}/{piezometer_nr}"

374 if to_path is not None:

375 to_file = os.path.join(to_path, f"{name}_{piezometer_nr}.csv")

376 if to_zip is not None:

377 files.append(to_file)

378 if not redownload and os.path.isfile(to_file):

379 data[f"{name}_{piezometer_nr}"] = dino_class(to_file)

380 continue

381 try:

382 data[f"{name}_{piezometer_nr}"] = dino_class(

383 url, timeout=timeout, to_file=to_file

384 )

385 except Exception as e:

386 if not continue_on_error:

387 raise e

388 logger.error(

389 "Error retrieving %s %s piezometer %s: %s",

390 kind,

391 name,

392 piezometer_nr,

393 e,

394 )

395 continue

396 if to_zip is not None:

397 util._save_data_to_zip(to_zip, files, remove_path_again, to_path)

398 return objects_to_gdf(

399 data, index=index, to_gdf=to_gdf, x="X-coordinaat", y="Y-coordinaat"

400 )

401

402

403def get_grondwatersamenstelling(extent, **kwargs):

404 dino_class = Grondwatersamenstelling

405 kind = "Grondwatersamenstelling"

406 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

407

408

409def get_geologisch_booronderzoek(extent, **kwargs):

410 logger.warning(

411 "`get_geologisch_booronderzoek` is deprecated. Use `get_boormonsterprofiel` instead"

412 )

413 dino_class = GeologischBooronderzoek

414 kind = "Geologisch booronderzoek"

415 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

416

417

418def get_boormonsterprofiel(extent, **kwargs):

419 dino_class = Boormonsterprofiel

420 kind = "Boormonsterprofiel"

421 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

422

423

424def get_boorgatmeting(extent, **kwargs):

425 dino_class = Boorgatmeting

426 kind = "Boorgatmeting"

427 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

428

429

430def get_chemische_analyse(extent, **kwargs):

431 dino_class = ChemischeAnalyse

432 kind = "Chemische analyse"

433 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

434

435

436def get_korrelgrootte_analyse(extent, **kwargs):

437 dino_class = KorrelgrootteAnalyse

438 kind = "Korrelgrootte analyse"

439 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

440

441

442def get_oppervlaktewaterstand(extent, **kwargs):

443 dino_class = Oppervlaktewaterstand

444 kind = "Oppervlaktewateronderzoek"

445 return _get_data_within_extent(dino_class, kind, extent, **kwargs)

446

447

448class CsvFileOrUrl:

449 def __init__(

450 self,

451 url_or_file,

452 zipfile=None,

453 timeout=5,

454 to_file=None,

455 redownload=True,

456 max_retries=2,

457 ):

458 if zipfile is not None:

459 with zipfile.open(url_or_file) as f:

460 self._read_contents(TextIOWrapper(f))

461 elif url_or_file.startswith("http"):

462 if redownload or to_file is None or not os.path.isfile(to_file):

463 if max_retries > 1:

464 adapter = requests.adapters.HTTPAdapter(max_retries=max_retries)

465 session = requests.Session()

466 session.mount("https://", adapter)

467 req = session.get(url_or_file, timeout=timeout)

468 else:

469 req = requests.get(url_or_file, timeout=timeout)

470 if not req.ok:

471 raise (Exception((f"Retieving data from {url_or_file} failed")))

472 is_zipfile = False

473 if "content-disposition" in req.headers:

474 if req.headers["content-disposition"].endswith(".zip"):

475 is_zipfile = True

476 if is_zipfile:

477 # BoorgatMetingen are las files that are delivered in a zip-file

478 with ZipFile(BytesIO(req.content)) as myzip:

479 files = myzip.namelist()

480 files = [f for f in files if f.endswith(".las")]

481 assert len(files) == 1, "Only one file in the zipfile supported"

482 with myzip.open(files[0]) as myfile:

483 if to_file is not None:

484 with open(to_file, "wb") as f:

485 f.write(myfile.read())

486 self._read_contents(TextIOWrapper(myfile))

487 else:

488 if to_file is not None:

489 with open(to_file, "w") as f:

490 f.write(req.text)

491 self._read_contents(StringIO(req.text))

492 else:

493 with open(to_file, "r") as f:

494 self._read_contents(f)

495 else:

496 with open(url_or_file, "r") as f:

497 self._read_contents(f)

498

499 def __repr__(self):

500 # retrieve properties if they exist

501 propdict = {"NITG-nr": "NITG-nr", "X-coordinaat": "x", "Y-coordinaat": "y"}

502 props = {}

503 for key in propdict:

504 if hasattr(self, key):

505 props[propdict[key]] = getattr(self, key)

506 name = util._format_repr(self, props)

507 return name

508

509 @classmethod

510 def from_dino_nr(cls, dino_nr, **kwargs):

511 if not hasattr(cls, "_download_url"):

512 raise (NotImplementedError(f"No download-url defined for {cls.__name__}"))

513 return cls(f"{cls._download_url}/{dino_nr}", **kwargs)

514

515 def _read_properties_csv_rows(self, f, merge_columns=False, **kwargs):

516 # this is the new format of properties from dinoloket

517 df, line = self._read_csv_part(f, header=None, index_col=0, **kwargs)

518 # remove empty columns

519 df = df.loc[:, ~df.isna().all(axis=0)]

520 if merge_columns:

521 for index in df.index:

522 df.at[index, 1] = " ".join(df.loc[index, ~df.loc[index].isna()].values)

523 df = df.loc[:, :1]

524 else:

525 assert df.shape[1] == 1

526 d = df.squeeze().to_dict()

527 return d, line

528

529 def _read_properties_csv_columns(self, f, **kwargs):

530 df, line = self._read_csv_part(f, **kwargs)

531 assert df.shape[0] == 1

532 d = df.squeeze().to_dict()

533 return d, line

534

535 def _read_csv_part(self, f, sep=",", header=0, index_col=False, **kwargs):

536 strt = f.tell()

537 if header is None:

538 nrows = 0

539 else:

540 nrows = -1 # the header does not count

541 line = f.readline()

542 while line.replace(",", "") not in ["\n", ""]:

543 nrows += 1

544 line = f.readline()

545 eind = f.tell()

546 # go back to where we were before

547 f.seek(strt)

548 df = pd.read_csv(

549 f, sep=sep, index_col=index_col, nrows=nrows, header=header, **kwargs

550 )

551 if header is not None:

552 df = df.loc[:, ~df.columns.str.startswith("Unnamed: ")]

553 f.seek(eind)

554

555 if line != "":

556 # read empty lines gat

557 while line.replace(",", "") == "\n":

558 new_start = f.tell()

559 line = f.readline()

560 f.seek(new_start)

561

562 return df, line

563

564

565class Oppervlaktewaterstand(CsvFileOrUrl):

566 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/owo/full"

567

568 def __repr__(self):

569 # retrieve properties if they exist

570

571 props = {}

572 if hasattr(self, "meta") and not self.meta.empty:

573 s = self.meta.iloc[-1]

574 propdict = {"Locatie": "Locatie", "X-coordinaat": "x", "Y-coordinaat": "y"}

575 for key in propdict:

576 if key in s:

577 props[propdict[key]] = s[key]

578 name = util._format_repr(self, props)

579 return name

580

581 def _read_contents(self, f):

582 self.props, line = self._read_properties_csv_rows(f, merge_columns=True)

583 if line.startswith(

584 '"Van deze put zijn geen standen opgenomen in de DINO-database"'

585 ):

586 return

587 self.meta, line = self._read_csv_part(f)

588 self.data, line = self._read_csv_part(f)

589 for column in ["Peildatum"]:

590 if column in self.data.columns:

591 self.data[column] = pd.to_datetime(self.data[column], dayfirst=True)

592

593 def to_dict(self):

594 d = {**self.props}

595 if hasattr(self, "meta"):

596 d["meta"] = self.meta

597 for column in d["meta"]:

598 d[column] = d["meta"][column].iloc[-1]

599 if hasattr(self, "data"):

600 d["data"] = self.data

601 return d

602

603

604class Grondwaterstand(CsvFileOrUrl):

605 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/gwo/full"

606

607 @classmethod

608 def from_dino_nr(cls, dino_nr, filter_nr, **kwargs):

609 return cls(f"{cls._download_url}/{dino_nr}/{filter_nr:03d}", **kwargs)

610

611 def __repr__(self):

612 # retrieve properties if they exist

613

614 props = {}

615 if hasattr(self, "meta") and not self.meta.empty:

616 s = self.meta.iloc[-1]

617 propdict = {

618 "Locatie": "Locatie",

619 "Filternummer": "filter",

620 "X-coordinaat": "x",

621 "Y-coordinaat": "y",

622 }

623 for key in propdict:

624 if key in s:

625 props[propdict[key]] = s[key]

626 name = util._format_repr(self, props)

627 return name

628

629 def _read_contents(self, f):

630 self.props, line = self._read_properties_csv_rows(f, merge_columns=True)

631 self.props2, line = self._read_properties_csv_rows(f)

632 if line.startswith(

633 '"Van deze put zijn geen standen opgenomen in de DINO-database"'

634 ):

635 return

636 if "Peildatum" not in line:

637 self.meta, line = self._read_csv_part(f)

638 self.data, line = self._read_csv_part(f)

639 for column in ["Peildatum"]:

640 if column in self.data.columns:

641 self.data[column] = pd.to_datetime(self.data[column], dayfirst=True)

642

643 def to_dict(self):

644 d = {**self.props, **self.props2}

645 if hasattr(self, "meta"):

646 d["meta"] = self.meta

647 for column in d["meta"]:

648 d[column] = d["meta"][column].iloc[-1]

649 if hasattr(self, "data"):

650 d["data"] = self.data

651 return d

652

653

654class Grondwatersamenstelling(CsvFileOrUrl):

655 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/gwo/qua/report"

656

657 def _read_contents(self, f):

658 # read first line and place cursor at start of document again

659 start = f.tell()

660 line = f.readline().rstrip("\n")

661 f.seek(start)

662

663 # LOCATIE gegevens

664 if line.startswith('"LOCATIE gegevens"'):

665 line = f.readline()

666 self.locatie_gegevens, line = self._read_properties_csv_columns(f)

667 for key in self.locatie_gegevens:

668 setattr(self, key, self.locatie_gegevens[key])

669

670 # KWALITEIT gegevens VLOEIBAAR

671 if line.startswith('"KWALITEIT gegevens VLOEIBAAR"'):

672 line = f.readline()

673 self.kwaliteit_gegevens_vloeibaar, line = self._read_csv_part(f)

674 for column in ["Monster datum", "Analyse datum"]:

675 if column in self.kwaliteit_gegevens_vloeibaar.columns:

676 self.kwaliteit_gegevens_vloeibaar[column] = pd.to_datetime(

677 self.kwaliteit_gegevens_vloeibaar[column], dayfirst=True

678 )

679

680 def to_dict(self):

681 d = {**self.locatie_gegevens}

682 if hasattr(self, "kwaliteit_gegevens_vloeibaar"):

683 d["kwaliteit_gegevens_vloeibaar"] = self.kwaliteit_gegevens_vloeibaar

684 return d

685

686

687class Boormonsterprofiel(CsvFileOrUrl):

688 _download_url = (

689 "https://www.dinoloket.nl/uitgifteloket/api/brh/sampledescription/csv"

690 )

691

692 def _read_contents(self, f):

693 # read first line and place cursor at start of document again

694 start = f.tell()

695 line = f.readline().rstrip("\n")

696 f.seek(start)

697 if line.startswith('"ALGEMENE GEGEVENS BORING"'):

698 line = f.readline()

699 self.algemene_gegevens_boring, line = self._read_properties_csv_columns(f)

700 for key in self.algemene_gegevens_boring:

701 setattr(self, key, self.algemene_gegevens_boring[key])

702 if line.startswith('"ALGEMENE GEGEVENS LITHOLOGIE"'):

703 line = f.readline()

704 self.algemene_gegevens_lithologie, line = self._read_properties_csv_columns(

705 f

706 )

707 if line.startswith('"LITHOLOGIE LAGEN"'):

708 line = f.readline()

709 self.lithologie_lagen, line = self._read_csv_part(f)

710 if line.startswith('"LITHOLOGIE SUBLAGEN"'):

711 line = f.readline()

712 self.lithologie_sublagen, line = self._read_csv_part(f)

713

714 def to_dict(self):

715 d = {**self.algemene_gegevens_boring}

716 if hasattr(self, "algemene_gegevens_lithologie"):

717 for key in self.algemene_gegevens_boring:

718 if key in self.algemene_gegevens_lithologie:

719 # 'Datum boring' can be specified in algemene_gegevens_boring and algemene_gegevens_lithologie

720 if pd.isna(self.algemene_gegevens_lithologie[key]):

721 self.algemene_gegevens_lithologie.pop(key)

722 d = {**d, **self.algemene_gegevens_lithologie}

723 if hasattr(self, "lithologie_lagen"):

724 d["lithologie_lagen"] = self.lithologie_lagen

725 if hasattr(self, "lithologie_sublagen"):

726 d["lithologie_sublagen"] = self.lithologie_sublagen

727 return d

728

729

730def get_drilling_from_dinoloket(

731 name,

732 column_type=None,

733 depthReference="NAP",

734 language="nl",

735 return_response=False,

736 ignore_exceptions=False,

737):

738 """

739 Get a drilling from dinoloket.

740

741 This method uses the information from the webservice used by dinoloket for

742 displaying the drilling. In this way, also lithostratigraphy-data can be returned,

743 which is not present in the data downloaded as a csv-file by `Boormonsterprofiel`.

744

745 Parameters

746 ----------

747 name : str

748 The name of the drilling.

749 column_type : str, optional

750 The type of data that is returned. Possible options are "LITHOLOGY" and

751 "LITHOSTRATIGRAPHY" and None. If column_type is None, return a dictionary with

752 all data. The default is None.

753 depthReference : str, optional

754 Possible values are "NAP" and "MV". The default is "NAP".

755 language : str of length 2, optional

756 Possible values are "nl" for Ducth and "en" for English. When language is not

757 'nl' or 'en', english is returned. The default is "nl".

758 return_response : bool, optional

759 Return the json-respons of the web-service without any interpretation. The

760 default is False.

761 ignore_exceptions : bool, optional

762 When True, ignore exceptions when things go wrong. This is usefull when

763 requesting multiple drillings. The default is False.

764

765 Returns

766 -------

767 df or dict

768 A dictionary or a DataFarme (when column_type is set) containing the drilling

769 data.

770 """

771 # columnType is 'LITHOSTRATIGRAPHY' or 'LITHOLOGY'

772 url = "https://www.dinoloket.nl/javascriptmapviewer-web/rest/brh/profile"

773 payload = {"dinoId": name, "depthReference": depthReference, "language": language}

774 req = requests.post(

775 url, data=json.dumps(payload), headers={"content-type": "application/json"}

776 )

777 if not req.ok:

778 msg = f"Retieving data from {url} failed"

779 if ignore_exceptions:

780 logger.error(msg)

781 return None

782 else:

783 raise (Exception(msg))

784 data = json.loads(req.content)

785 if return_response:

786 return data

787 if "status" in data.keys():

788 if data["status"] == 500:

789 msg = "Drilling {} could not be downloaded ".format(name)

790 if ignore_exceptions:

791 logger.error(msg)

792 return None

793 else:

794 raise Exception(msg)

795

796 for column in data["columns"]:

797 if column_type is None or column["columnType"] == column_type:

798 ls = []

799 for meta in column["profileMetadata"]:

800 di = {}

801 for layerInfo in meta["layerInfos"]:

802 di[layerInfo["code"]] = layerInfo["value"]

803 ls.append(di)

804 df = pd.DataFrame(ls)

805 top = []

806 botm = []

807 for depth in df["DEPTH"]:

808 depths = depth.replace("m", "").split(" - ")

809 top.append(float(depths[0]))

810 botm.append(float(depths[1]))

811 df.insert(loc=0, column="top", value=top)

812 df.insert(loc=1, column="botm", value=botm)

813 df = df.drop("DEPTH", axis=1)

814 if column_type is None:

815 data[column["columnType"]] = df

816 else:

817 return df

818 if column_type is None:

819 data.pop("columns")

820 return data

821 else:

822 msg = "Column {} not present -> {}".format(column_type, name)

823 if ignore_exceptions:

824 logger.error(msg)

825 return None

826 else:

827 raise Exception(msg)

828

829

830class GeologischBooronderzoek(Boormonsterprofiel):

831 # In brodata, Boormonsterprofiel used to be called GeologischBooronderzoek.

832 # Therefore, this is a copy of GeologischBooronderzoek, for backwards compatibility

833 pass

834

835

836class Boorgatmeting(CsvFileOrUrl):

837 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/brh/log/las"

838

839 def __repr__(self):

840 # retrieve properties if they exist

841

842 props = {}

843 if hasattr(self, "las") and "Well" in self.las.header:

844 items = self.las.header["Well"]

845 for item in items:

846 props[item.descr] = item.value

847 name = util._format_repr(self, props)

848 return name

849

850 def _read_contents(self, f):

851 import lasio

852

853 self.las = lasio.read(f)

854

855 def to_dict(self):

856 import lasio

857

858 return lasio.las.JSONEncoder().default(self.las)

859

860 def plot(self, ax=None, columns=None, z=0.0, **kwargs):

861 if ax is None:

862 import matplotlib.pyplot as plt

863

864 ax = plt.gca()

865 df = self.las.df()

866 if columns is None:

867 columns = df.columns

868 elif isinstance(columns, str):

869 columns = [columns]

870

871 for column in df.columns:

872 # df.reset_index().plot(y="DEPTH", x=column)

873 ax.plot(df[column], z - df.index, label=column, **kwargs)

874 return ax

875

876

877class ChemischeAnalyse(CsvFileOrUrl):

878 _download_url = (

879 "https://www.dinoloket.nl/uitgifteloket/api/brh/chemicalanalysis/csv"

880 )

881

882 def _read_contents(self, f):

883 # read first line and place cursor at start of document again

884 start = f.tell()

885 line = f.readline().rstrip("\n")

886 f.seek(start)

887

888 # LOCATIE gegevens

889 if line.startswith('"LOCATIE gegevens"'):

890 line = f.readline()

891 self.locatie_gegevens, line = self._read_properties_csv_columns(f)

892 for key in self.locatie_gegevens:

893 setattr(self, key, self.locatie_gegevens[key])

894

895 # KWALITEIT gegevens VLOEIBAAR

896 if line.startswith('"KWALITEIT gegevens VAST"'):

897 line = f.readline()

898 self.kwaliteit_gegevens_vast, line = self._read_csv_part(f)

899 for column in ["Monster datum", "Analyse datum"]:

900 if column in self.kwaliteit_gegevens_vast.columns:

901 self.kwaliteit_gegevens_vast[column] = pd.to_datetime(

902 self.kwaliteit_gegevens_vast[column], dayfirst=True

903 )

904

905 def to_dict(self):

906 d = {**self.locatie_gegevens}

907 if hasattr(self, "kwaliteit_gegevens_vast"):

908 d["kwaliteit_gegevens_vast"] = self.kwaliteit_gegevens_vast

909 return d

910

911

912class KorrelgrootteAnalyse(ChemischeAnalyse):

913 _download_url = (

914 "https://www.dinoloket.nl/uitgifteloket/api/brh/grainsizeanalysis/csv"

915 )

916

917

918class VerticaalElektrischSondeeronderzoek(CsvFileOrUrl):

919 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/ves/csv"

920

921 # Read a VES-file

922 def _read_contents(self, f):

923 # read first line and place cursor at start of document again

924 start = f.tell()

925 line = f.readline().rstrip("\n")

926 f.seek(start)

927

928 # VES Overzicht

929 if line.startswith('"VES Overzicht"'):

930 line = f.readline()

931 self.ves_overzicht, line = self._read_properties_csv_columns(f)

932 for key in self.ves_overzicht:

933 setattr(self, key, self.ves_overzicht[key])

934

935 # Kop

936 if line.startswith('"Kop"'):

937 line = f.readline()

938 self.kop, line = self._read_properties_csv_columns(f)

939

940 if line.startswith('"Data"'):

941 line = f.readline()

942 self.data, line = self._read_csv_part(f)

943

944 self.interpretatie_door_tno_nitg = []

945 self.interpretaties = []

946

947 while line.startswith('"Interpretatie door: TNO-NITG"'):

948 # Interpretatie door: TNO-NITG

949 line = f.readline()

950 df, line = self._read_properties_csv_columns(f)

951 self.interpretatie_door_tno_nitg.append(df)

952

953 # Interpretaties

954 if line.startswith('"Interpretaties"'):

955 line = f.readline()

956 df, line = self._read_csv_part(f)

957 self.interpretaties.append(df)

958

959 def to_dict(self):

960 d = {**self.ves_overzicht, **self.kop}

961 if hasattr(self, "data"):

962 d["data"] = self.data

963 d["Aantal interpretaties"] = len(self.interpretaties)

964 if len(self.interpretatie_door_tno_nitg) > 0:

965 # only take the first interpretatie_door_tno_nitg, as the data will not fit in a DataFrame

966 d["interpretatie_door_tno_nitg"] = self.interpretatie_door_tno_nitg[0]

967 if len(self.interpretaties) > 0:

968 # only take the first interpretation, as the data will not fit in a DataFrame

969 d["interpretaties"] = self.interpretaties[0]

970 if (

971 "Richting" in d

972 and "Maximale elektrode afstand L2" in d

973 and "X-coordinaat" in d

974 and "Y-coordinaat" in d

975 ):

976 angle = (d["Richting"] - 90) * np.pi / 180

977 x = d["X-coordinaat"]

978 y = d["Y-coordinaat"]

979 dx = -np.cos(angle) * d["Maximale elektrode afstand L2"]

980 dy = np.sin(angle) * d["Maximale elektrode afstand L2"]

981 d["geometry"] = LineString([(x + dx, y + dy), (x - dx, y - dy)])

982 return d

983

984 def plot_interpretaties(

985 self, nr=None, ax=None, top=0, bot=None, negative_depth=True, **kwargs

986 ):

987 """

988 Plot interpreted resistance profiles from VES data.

989

990 This method visualizes one or more interpretation profiles by plotting the

991 'Werkelijke weerstand' (actual resistance) against depth as a line (stairs).

992

993 Parameters

994 ----------

995 nr : int or None, optional

996 Index of a specific interpretation to plot. If None (default), all

997 interpretations in `self.interpretaties` are plotted.

998 ax : matplotlib.axes.Axes, optional

999 The matplotlib Axes object to draw the plot on. If None, the current Axes

1000 (`plt.gca()`) is used. The default is None.

1001 top : float, optional

1002 Top depth of the plot in meters. The default is 0.

1003 bot : float or None, optional

1004 Bottom depth of the plot in meters. If None (default), it is inferred from

1005 the data, by setting the length of the last section equal to the length of

1006 the next to last section.

1007 negative_depth : bool, optional

1008 If True (default), depth is plotted as negative (i.e., increasing downwards,

1009 following geotechnical convention).

1010 **kwargs : dict, optional

1011 Additional keyword arguments passed to `matplotlib.axes.Axes.plot` (e.g.,

1012 color, linestyle, label).

1013

1014 Returns

1015 -------

1016 ax : matplotlib.axes.Axes

1017 The Axes object containing the plot.

1018 """

1019 if nr is None:

1020 dfs = self.interpretaties

1021 if len(dfs) == 0:

1022 nitg_nr = getattr(self, "NITG-nr")

1023 logger.warning(f"No interpretations in {nitg_nr}")

1024 return

1025 else:

1026 dfs = [self.interpretaties[nr]]

1027

1028 if ax is None:

1029 ax = plt.gca()

1030

1031 for df in dfs:

1032 values = df["Werkelijke weerstand"].values

1033

1034 edges = df["Bovenkant laag (m)"].values[1:]

1035 edges = np.vstack((edges, edges)).transpose().ravel()

1036 edge_top = df["Bovenkant laag (m)"].iloc[0]

1037 if np.isnan(edge_top):

1038 edge_top = top

1039 edge_bot = df["Onderkant laag (m)"].iloc[-1]

1040 if np.isnan(edge_bot):

1041 if bot is None or np.isnan(bot):

1042 edge_bot = df["Bovenkant laag (m)"].iloc[-1] + (

1043 df["Bovenkant laag (m)"].iloc[-1]

1044 - df["Bovenkant laag (m)"].iloc[-2]

1045 )

1046 else:

1047 edge_bot = bot

1048 edges = np.hstack((edge_top, edges, edge_bot))

1049

1050 values = np.vstack((values, values)).transpose().ravel()

1051

1052 if negative_depth:

1053 edges = -edges

1054

1055 ax.plot(values, edges, **kwargs)

1056

1057 return ax

Coverage for brodata / dino.py: 75%

586 statements