Coverage for brodata / dino.py: 75%
586 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-20 14:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-20 14:37 +0000
1import logging
2import os
3from io import BytesIO, StringIO, TextIOWrapper
4from pathlib import Path
5from zipfile import ZipFile
7import numpy as np
8import pandas as pd
9import geopandas as gpd
10import requests
11import json
12from shapely.geometry import LineString
13import matplotlib.pyplot as plt
15from . import util
16from .webservices import get_configuration, get_gdf
18logger = logging.getLogger(__name__)
21def objects_to_gdf(
22 objects,
23 x="X-coordinaat",
24 y="Y-coordinaat",
25 geometry=None,
26 index=None,
27 to_gdf=True,
28):
29 """
30 Convert a dictionary of dino-objects to a geopandas GeoDataFrame.
32 Parameters
33 ----------
34 objects: dictionary of bro or dinoloket objects
35 dictionary of objects to convert to (geo)dataframe
36 geometry: str
37 name of column of geometry
38 x: str
39 name of column of x-coordinate
40 y: str
41 name of column of y-coordinate
42 index: str or list of str
43 name of column to use as index
44 to_gdf: bool
45 convert to geodataframe
47 Returns
48 -------
49 gdf: GeoDataFrame or DataFrame
50 Returns a GeoDataFrame if to_gdf is True, otherwise a DataFrame
51 """
53 if not to_gdf:
54 return objects
56 # convert a list of dino-objects to a geodataframe
57 df = pd.DataFrame([objects[key].to_dict() for key in objects])
58 if geometry is not None:
59 if geometry in df.columns:
60 geometry = df[geometry]
61 else:
62 geometry = None
63 else:
64 if df.empty:
65 logger.warning("no data found")
66 else:
67 if x not in df:
68 logger.warning(f"{x} not found in data. No geometry column created.")
69 elif y not in df:
70 logger.warning(f"{y} not found in data. No geometry column created.")
71 else:
72 geometry = gpd.points_from_xy(df[x], df[y])
73 gdf = gpd.GeoDataFrame(df, geometry=geometry)
74 if index is not None and not gdf.empty:
75 if isinstance(index, str):
76 if index in gdf.columns:
77 gdf = gdf.set_index(index)
78 elif np.all([x in gdf.columns for x in index]):
79 # we assume index is an iterable (list), to form a MultiIndex
80 gdf = gdf.set_index(index)
81 return gdf
84def _get_data_within_extent(
85 dino_cl,
86 kind,
87 extent,
88 config=None,
89 timeout=5,
90 silent=False,
91 to_path=None,
92 to_zip=None,
93 redownload=False,
94 x="X-coordinaat",
95 y="Y-coordinaat",
96 geometry=None,
97 index="NITG-nr",
98 to_gdf=True,
99 max_retries=2,
100 continue_on_error=False,
101 progress_callback=None,
102):
103 """Retrieve DINO data within a specified geographical extent or from local files.
105 This is a core function used by various data retrieval methods in the DINO system.
106 It can either load data from local files/archives or fetch it from the DINO server
107 based on geographical extent.
109 Parameters
110 ----------
111 dino_cl : class
112 The DINO data class to instantiate for each location (e.g., Grondwaterstand).
113 kind : str
114 The type of DINO data to retrieve (e.g., "Grondwaterstand", "Boorgatmeting").
115 extent : str, Path, or sequence
116 Either a path to local data, or a sequence of [xmin, xmax, ymin, ymax]
117 coordinates.
118 config : dict, optional
119 Configuration mapping for DINO data kinds. Uses default if None.
120 timeout : int or float, optional.
121 Timeout in seconds for network requests when downloading data. The default is 5.
122 silent : bool, default=False
123 If True, suppress progress output.
124 to_path : str, optional
125 Directory to save downloaded files. Created if it doesn't exist.
126 to_zip : str, optional
127 Path to save downloaded files in a zip archive.
128 redownload : bool, optional
129 If True, redownload data even if local files exist. The default is False.
130 x : str, optional
131 Name of the x-coordinate column. The default is "X-coordinaat".
132 y : str, optional
133 Name of the y-coordinate column. The default is "Y-coordinaat".
134 geometry : str, optional
135 Name of the geometry column if different from creating from x,y coordinates.
136 index : str, optional
137 Column(s) to use as index in the output GeoDataFrame. The default is "NITG-nr".
138 to_gdf : bool, optional
139 If True, return a GeoDataFrame; if False, return raw dictionary of objects. The
140 default is True
141 max_retries : int, optional
142 Maximum number of retries for failed network requests. The default is 2.
143 continue_on_error : bool, optional
144 If True, continue after an error occurs during downloading or processing of
145 individual observation data. Defaults to False.
146 progress_callback : function, optional
147 A callback function that takes two arguments (current, total) to report
148 progress. If None, no progress reporting is done. Defaults to None.
150 Returns
151 -------
152 geopandas.GeoDataFrame or dict
153 If to_gdf is True, returns a GeoDataFrame with the requested data.
154 If to_gdf is False, returns a dictionary of DINO objects.
155 """
156 if isinstance(extent, (str, Path)):
157 data = _get_data_from_path(extent, dino_cl, silent=silent, progress_callback=progress_callback)
158 return objects_to_gdf(data, x, y, geometry, index, to_gdf)
160 if to_zip is not None:
161 if not redownload and os.path.isfile(to_zip):
162 data = _get_data_from_zip(to_zip, dino_cl, silent=silent, extent=extent, progress_callback=progress_callback)
163 return objects_to_gdf(data, x, y, geometry, index, to_gdf)
164 if to_path is None:
165 to_path = os.path.splitext(to_zip)[0]
166 remove_path_again = not os.path.isdir(to_path)
167 files = []
169 if config is None:
170 config = get_configuration()
172 if to_path is not None and not os.path.isdir(to_path):
173 os.makedirs(to_path)
175 to_file = None
176 gdf = None
177 if to_path is not None:
178 to_file = os.path.join(to_path, f"{dino_cl.__name__}.geojson")
179 if to_zip is not None:
180 files.append(to_file)
181 if not redownload and os.path.isfile(to_file):
182 gdf = gpd.read_file(to_file)
183 if not gdf.empty and "DINO_NR" in gdf.columns:
184 gdf = gdf.set_index("DINO_NR")
185 if gdf is None:
186 gdf = get_gdf(
187 kind,
188 config=config,
189 extent=extent,
190 timeout=timeout,
191 )
192 if to_file is not None:
193 gdf.to_file(to_file)
195 to_file = None
197 data = {}
198 for i, dino_nr in util.tqdm(enumerate(gdf.index), disable=silent):
199 if progress_callback is not None:
200 progress_callback(i, len(gdf))
201 if to_path is not None:
202 to_file = os.path.join(to_path, f"{dino_nr}.csv")
203 if to_zip is not None:
204 files.append(to_file)
205 if not redownload and os.path.isfile(to_file):
206 data[dino_nr] = dino_cl(to_file)
207 continue
208 try:
209 data[dino_nr] = dino_cl.from_dino_nr(
210 dino_nr, timeout=timeout, to_file=to_file, max_retries=max_retries
211 )
212 except Exception as e:
213 if not continue_on_error:
214 raise e
215 logger.error("Error retrieving %s %s: %s", kind, dino_nr, e)
216 continue
217 if to_zip is not None:
218 util._save_data_to_zip(to_zip, files, remove_path_again, to_path)
220 return objects_to_gdf(data, x, y, geometry, index, to_gdf)
223def _get_data_from_path(from_path, dino_class, silent=False, ext=".csv", progress_callback=None):
224 if str(from_path).endswith(".zip"):
225 return _get_data_from_zip(from_path, dino_class, silent=silent, progress_callback=progress_callback)
226 files = os.listdir(from_path)
227 files = [file for file in files if file.endswith(ext)]
228 data = {}
229 total = len(files)
230 for i, file in util.tqdm(enumerate(files), total=total, disable=silent):
231 if progress_callback is not None:
232 progress_callback(i, total)
233 fname = os.path.join(from_path, file)
234 data[os.path.splitext(file)[0]] = dino_class(fname)
235 return data
238def _get_data_from_zip(to_zip, dino_class, silent=False, extent=None, progress_callback=None):
239 # read data from zipfile
240 data = {}
241 with ZipFile(to_zip) as zf:
242 names = zf.namelist()
243 name = f"{dino_class.__name__}.geojson"
244 has_location_file = name in names
245 if has_location_file:
246 names.remove(name)
247 if has_location_file and extent is not None:
248 gdf = gpd.read_file(zf.open(name))
249 gdf = gdf.set_index("DINO_NR")
250 gdf = gdf.cx[extent[0] : extent[1], extent[2] : extent[3]]
251 names = [f"{name}.csv" for name in gdf.index]
252 total = len(names)
253 for i, name in util.tqdm(enumerate(names), total=total, disable=silent):
254 if progress_callback is not None:
255 progress_callback(i, total)
256 data[name] = dino_class(name, zipfile=zf)
257 return data
260def get_verticaal_elektrisch_sondeeronderzoek(extent, **kwargs):
261 dino_class = VerticaalElektrischSondeeronderzoek
262 kind = "Verticaal elektrisch sondeeronderzoek"
263 return _get_data_within_extent(
264 dino_class, kind, extent, geometry="geometry", **kwargs
265 )
268def get_grondwaterstand(
269 extent,
270 config=None,
271 timeout=5,
272 silent=False,
273 to_path=None,
274 to_zip=None,
275 redownload=False,
276 to_gdf=True,
277 skip=None,
278 continue_on_error=False,
279 progress_callback=None,
280):
281 """
282 Get groundwater level (Grondwaterstand) data as a GeoDataFrame or raw objects.
284 Fetch Grondwaterstand data for a given geographical extent or load it from local
285 files. Data are retrieved per monitoring location and per piezometer. Results can
286 be returned as a GeoDataFrame or as a dictionary of Grondwaterstand objects.
288 Parameters
289 ----------
290 extent : str or sequence
291 The spatial extent ([xmin, xmax, ymin, ymax]) to filter the data.
292 config : dict, optional
293 Configuration mapping for available DINO data kinds. If None, a default
294 configuration is used.
295 timeout : int or float, optional
296 Timeout in seconds for network requests when downloading data. The default is 5.
297 silent : bool, optional
298 If True, suppress progress output.
299 to_path : str, optional
300 If not None, save the downloaded files in the directory named to_path. The
301 default is None.
302 to_zip : str, optional
303 If not None, save the downloaded files in a zip-file named to_zip. The default
304 is None.
305 redownload : bool, optional
306 When downloaded files exist in to_path or to_zip, read from these files when
307 redownload is False. If redownload is True, download the data again from the
308 DINO-server. The default is False.
309 to_gdf : bool, optional
310 If True (default), convert the loaded Grondwaterstand objects into a
311 geopandas.GeoDataFrame. If False, return the raw mapping of objects.
312 skip : str or iterable, optional
313 Name or iterable of location names to skip during download or processing.
314 continue_on_error : bool, optional
315 If True, continue after an error occurs during downloading or processing of
316 individual observation data. Defaults to False.
317 progress_callback : function, optional
318 A callback function that takes two arguments (current, total) to report
319 progress. If None, no progress reporting is done. Defaults to None.
321 Returns
322 -------
323 geopandas.GeoDataFrame or dict
324 If `to_gdf` is True, returns a GeoDataFrame indexed by ['Locatie',
325 'Filternummer']. If False, returns a dictionary with Grondwaterstand objects.
327 Notes
328 -----
329 - When `extent` is a path string, this function loads local data.
330 - When `to_zip` is provided, the function will create a temporary directory and
331 archive files into the supplied ZIP.
332 """
333 dino_class = Grondwaterstand
334 index = ["Locatie", "Filternummer"]
335 if skip is not None and isinstance(skip, str):
336 skip = [skip]
338 if isinstance(extent, str):
339 data = _get_data_from_path(extent, dino_class, silent=silent, progress_callback=progress_callback)
340 return objects_to_gdf(data, index=index, to_gdf=to_gdf)
342 if to_zip is not None:
343 if not redownload and os.path.isfile(to_zip):
344 data = _get_data_from_zip(to_zip, dino_class, silent=silent, progress_callback=progress_callback)
345 return objects_to_gdf(data, index=index, to_gdf=to_gdf)
346 if to_path is None:
347 to_path = os.path.splitext(to_zip)[0]
348 remove_path_again = not os.path.isdir(to_path)
349 files = []
351 kind = "Grondwaterstand"
352 if config is None:
353 config = get_configuration()
354 gdf = get_gdf(
355 kind,
356 config=config,
357 extent=extent,
358 timeout=timeout,
359 )
360 download_url = config[kind]["download"]
362 to_file = None
363 if to_path is not None and not os.path.isdir(to_path):
364 os.makedirs(to_path)
365 data = {}
366 for i, name in util.tqdm(enumerate(gdf.index), disable=silent):
367 if progress_callback is not None:
368 progress_callback(i, len(gdf))
369 if skip is not None and name in skip:
370 continue
371 for i_st in range(1, gdf.at[name, "ST_CNT"] + 1):
372 piezometer_nr = f"{i_st:03d}"
373 url = f"{download_url}/{name}/{piezometer_nr}"
374 if to_path is not None:
375 to_file = os.path.join(to_path, f"{name}_{piezometer_nr}.csv")
376 if to_zip is not None:
377 files.append(to_file)
378 if not redownload and os.path.isfile(to_file):
379 data[f"{name}_{piezometer_nr}"] = dino_class(to_file)
380 continue
381 try:
382 data[f"{name}_{piezometer_nr}"] = dino_class(
383 url, timeout=timeout, to_file=to_file
384 )
385 except Exception as e:
386 if not continue_on_error:
387 raise e
388 logger.error(
389 "Error retrieving %s %s piezometer %s: %s",
390 kind,
391 name,
392 piezometer_nr,
393 e,
394 )
395 continue
396 if to_zip is not None:
397 util._save_data_to_zip(to_zip, files, remove_path_again, to_path)
398 return objects_to_gdf(
399 data, index=index, to_gdf=to_gdf, x="X-coordinaat", y="Y-coordinaat"
400 )
403def get_grondwatersamenstelling(extent, **kwargs):
404 dino_class = Grondwatersamenstelling
405 kind = "Grondwatersamenstelling"
406 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
409def get_geologisch_booronderzoek(extent, **kwargs):
410 logger.warning(
411 "`get_geologisch_booronderzoek` is deprecated. Use `get_boormonsterprofiel` instead"
412 )
413 dino_class = GeologischBooronderzoek
414 kind = "Geologisch booronderzoek"
415 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
418def get_boormonsterprofiel(extent, **kwargs):
419 dino_class = Boormonsterprofiel
420 kind = "Boormonsterprofiel"
421 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
424def get_boorgatmeting(extent, **kwargs):
425 dino_class = Boorgatmeting
426 kind = "Boorgatmeting"
427 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
430def get_chemische_analyse(extent, **kwargs):
431 dino_class = ChemischeAnalyse
432 kind = "Chemische analyse"
433 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
436def get_korrelgrootte_analyse(extent, **kwargs):
437 dino_class = KorrelgrootteAnalyse
438 kind = "Korrelgrootte analyse"
439 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
442def get_oppervlaktewaterstand(extent, **kwargs):
443 dino_class = Oppervlaktewaterstand
444 kind = "Oppervlaktewateronderzoek"
445 return _get_data_within_extent(dino_class, kind, extent, **kwargs)
448class CsvFileOrUrl:
449 def __init__(
450 self,
451 url_or_file,
452 zipfile=None,
453 timeout=5,
454 to_file=None,
455 redownload=True,
456 max_retries=2,
457 ):
458 if zipfile is not None:
459 with zipfile.open(url_or_file) as f:
460 self._read_contents(TextIOWrapper(f))
461 elif url_or_file.startswith("http"):
462 if redownload or to_file is None or not os.path.isfile(to_file):
463 if max_retries > 1:
464 adapter = requests.adapters.HTTPAdapter(max_retries=max_retries)
465 session = requests.Session()
466 session.mount("https://", adapter)
467 req = session.get(url_or_file, timeout=timeout)
468 else:
469 req = requests.get(url_or_file, timeout=timeout)
470 if not req.ok:
471 raise (Exception((f"Retieving data from {url_or_file} failed")))
472 is_zipfile = False
473 if "content-disposition" in req.headers:
474 if req.headers["content-disposition"].endswith(".zip"):
475 is_zipfile = True
476 if is_zipfile:
477 # BoorgatMetingen are las files that are delivered in a zip-file
478 with ZipFile(BytesIO(req.content)) as myzip:
479 files = myzip.namelist()
480 files = [f for f in files if f.endswith(".las")]
481 assert len(files) == 1, "Only one file in the zipfile supported"
482 with myzip.open(files[0]) as myfile:
483 if to_file is not None:
484 with open(to_file, "wb") as f:
485 f.write(myfile.read())
486 self._read_contents(TextIOWrapper(myfile))
487 else:
488 if to_file is not None:
489 with open(to_file, "w") as f:
490 f.write(req.text)
491 self._read_contents(StringIO(req.text))
492 else:
493 with open(to_file, "r") as f:
494 self._read_contents(f)
495 else:
496 with open(url_or_file, "r") as f:
497 self._read_contents(f)
499 def __repr__(self):
500 # retrieve properties if they exist
501 propdict = {"NITG-nr": "NITG-nr", "X-coordinaat": "x", "Y-coordinaat": "y"}
502 props = {}
503 for key in propdict:
504 if hasattr(self, key):
505 props[propdict[key]] = getattr(self, key)
506 name = util._format_repr(self, props)
507 return name
509 @classmethod
510 def from_dino_nr(cls, dino_nr, **kwargs):
511 if not hasattr(cls, "_download_url"):
512 raise (NotImplementedError(f"No download-url defined for {cls.__name__}"))
513 return cls(f"{cls._download_url}/{dino_nr}", **kwargs)
515 def _read_properties_csv_rows(self, f, merge_columns=False, **kwargs):
516 # this is the new format of properties from dinoloket
517 df, line = self._read_csv_part(f, header=None, index_col=0, **kwargs)
518 # remove empty columns
519 df = df.loc[:, ~df.isna().all(axis=0)]
520 if merge_columns:
521 for index in df.index:
522 df.at[index, 1] = " ".join(df.loc[index, ~df.loc[index].isna()].values)
523 df = df.loc[:, :1]
524 else:
525 assert df.shape[1] == 1
526 d = df.squeeze().to_dict()
527 return d, line
529 def _read_properties_csv_columns(self, f, **kwargs):
530 df, line = self._read_csv_part(f, **kwargs)
531 assert df.shape[0] == 1
532 d = df.squeeze().to_dict()
533 return d, line
535 def _read_csv_part(self, f, sep=",", header=0, index_col=False, **kwargs):
536 strt = f.tell()
537 if header is None:
538 nrows = 0
539 else:
540 nrows = -1 # the header does not count
541 line = f.readline()
542 while line.replace(",", "") not in ["\n", ""]:
543 nrows += 1
544 line = f.readline()
545 eind = f.tell()
546 # go back to where we were before
547 f.seek(strt)
548 df = pd.read_csv(
549 f, sep=sep, index_col=index_col, nrows=nrows, header=header, **kwargs
550 )
551 if header is not None:
552 df = df.loc[:, ~df.columns.str.startswith("Unnamed: ")]
553 f.seek(eind)
555 if line != "":
556 # read empty lines gat
557 while line.replace(",", "") == "\n":
558 new_start = f.tell()
559 line = f.readline()
560 f.seek(new_start)
562 return df, line
565class Oppervlaktewaterstand(CsvFileOrUrl):
566 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/owo/full"
568 def __repr__(self):
569 # retrieve properties if they exist
571 props = {}
572 if hasattr(self, "meta") and not self.meta.empty:
573 s = self.meta.iloc[-1]
574 propdict = {"Locatie": "Locatie", "X-coordinaat": "x", "Y-coordinaat": "y"}
575 for key in propdict:
576 if key in s:
577 props[propdict[key]] = s[key]
578 name = util._format_repr(self, props)
579 return name
581 def _read_contents(self, f):
582 self.props, line = self._read_properties_csv_rows(f, merge_columns=True)
583 if line.startswith(
584 '"Van deze put zijn geen standen opgenomen in de DINO-database"'
585 ):
586 return
587 self.meta, line = self._read_csv_part(f)
588 self.data, line = self._read_csv_part(f)
589 for column in ["Peildatum"]:
590 if column in self.data.columns:
591 self.data[column] = pd.to_datetime(self.data[column], dayfirst=True)
593 def to_dict(self):
594 d = {**self.props}
595 if hasattr(self, "meta"):
596 d["meta"] = self.meta
597 for column in d["meta"]:
598 d[column] = d["meta"][column].iloc[-1]
599 if hasattr(self, "data"):
600 d["data"] = self.data
601 return d
604class Grondwaterstand(CsvFileOrUrl):
605 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/gwo/full"
607 @classmethod
608 def from_dino_nr(cls, dino_nr, filter_nr, **kwargs):
609 return cls(f"{cls._download_url}/{dino_nr}/{filter_nr:03d}", **kwargs)
611 def __repr__(self):
612 # retrieve properties if they exist
614 props = {}
615 if hasattr(self, "meta") and not self.meta.empty:
616 s = self.meta.iloc[-1]
617 propdict = {
618 "Locatie": "Locatie",
619 "Filternummer": "filter",
620 "X-coordinaat": "x",
621 "Y-coordinaat": "y",
622 }
623 for key in propdict:
624 if key in s:
625 props[propdict[key]] = s[key]
626 name = util._format_repr(self, props)
627 return name
629 def _read_contents(self, f):
630 self.props, line = self._read_properties_csv_rows(f, merge_columns=True)
631 self.props2, line = self._read_properties_csv_rows(f)
632 if line.startswith(
633 '"Van deze put zijn geen standen opgenomen in de DINO-database"'
634 ):
635 return
636 if "Peildatum" not in line:
637 self.meta, line = self._read_csv_part(f)
638 self.data, line = self._read_csv_part(f)
639 for column in ["Peildatum"]:
640 if column in self.data.columns:
641 self.data[column] = pd.to_datetime(self.data[column], dayfirst=True)
643 def to_dict(self):
644 d = {**self.props, **self.props2}
645 if hasattr(self, "meta"):
646 d["meta"] = self.meta
647 for column in d["meta"]:
648 d[column] = d["meta"][column].iloc[-1]
649 if hasattr(self, "data"):
650 d["data"] = self.data
651 return d
654class Grondwatersamenstelling(CsvFileOrUrl):
655 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/wo/gwo/qua/report"
657 def _read_contents(self, f):
658 # read first line and place cursor at start of document again
659 start = f.tell()
660 line = f.readline().rstrip("\n")
661 f.seek(start)
663 # LOCATIE gegevens
664 if line.startswith('"LOCATIE gegevens"'):
665 line = f.readline()
666 self.locatie_gegevens, line = self._read_properties_csv_columns(f)
667 for key in self.locatie_gegevens:
668 setattr(self, key, self.locatie_gegevens[key])
670 # KWALITEIT gegevens VLOEIBAAR
671 if line.startswith('"KWALITEIT gegevens VLOEIBAAR"'):
672 line = f.readline()
673 self.kwaliteit_gegevens_vloeibaar, line = self._read_csv_part(f)
674 for column in ["Monster datum", "Analyse datum"]:
675 if column in self.kwaliteit_gegevens_vloeibaar.columns:
676 self.kwaliteit_gegevens_vloeibaar[column] = pd.to_datetime(
677 self.kwaliteit_gegevens_vloeibaar[column], dayfirst=True
678 )
680 def to_dict(self):
681 d = {**self.locatie_gegevens}
682 if hasattr(self, "kwaliteit_gegevens_vloeibaar"):
683 d["kwaliteit_gegevens_vloeibaar"] = self.kwaliteit_gegevens_vloeibaar
684 return d
687class Boormonsterprofiel(CsvFileOrUrl):
688 _download_url = (
689 "https://www.dinoloket.nl/uitgifteloket/api/brh/sampledescription/csv"
690 )
692 def _read_contents(self, f):
693 # read first line and place cursor at start of document again
694 start = f.tell()
695 line = f.readline().rstrip("\n")
696 f.seek(start)
697 if line.startswith('"ALGEMENE GEGEVENS BORING"'):
698 line = f.readline()
699 self.algemene_gegevens_boring, line = self._read_properties_csv_columns(f)
700 for key in self.algemene_gegevens_boring:
701 setattr(self, key, self.algemene_gegevens_boring[key])
702 if line.startswith('"ALGEMENE GEGEVENS LITHOLOGIE"'):
703 line = f.readline()
704 self.algemene_gegevens_lithologie, line = self._read_properties_csv_columns(
705 f
706 )
707 if line.startswith('"LITHOLOGIE LAGEN"'):
708 line = f.readline()
709 self.lithologie_lagen, line = self._read_csv_part(f)
710 if line.startswith('"LITHOLOGIE SUBLAGEN"'):
711 line = f.readline()
712 self.lithologie_sublagen, line = self._read_csv_part(f)
714 def to_dict(self):
715 d = {**self.algemene_gegevens_boring}
716 if hasattr(self, "algemene_gegevens_lithologie"):
717 for key in self.algemene_gegevens_boring:
718 if key in self.algemene_gegevens_lithologie:
719 # 'Datum boring' can be specified in algemene_gegevens_boring and algemene_gegevens_lithologie
720 if pd.isna(self.algemene_gegevens_lithologie[key]):
721 self.algemene_gegevens_lithologie.pop(key)
722 d = {**d, **self.algemene_gegevens_lithologie}
723 if hasattr(self, "lithologie_lagen"):
724 d["lithologie_lagen"] = self.lithologie_lagen
725 if hasattr(self, "lithologie_sublagen"):
726 d["lithologie_sublagen"] = self.lithologie_sublagen
727 return d
730def get_drilling_from_dinoloket(
731 name,
732 column_type=None,
733 depthReference="NAP",
734 language="nl",
735 return_response=False,
736 ignore_exceptions=False,
737):
738 """
739 Get a drilling from dinoloket.
741 This method uses the information from the webservice used by dinoloket for
742 displaying the drilling. In this way, also lithostratigraphy-data can be returned,
743 which is not present in the data downloaded as a csv-file by `Boormonsterprofiel`.
745 Parameters
746 ----------
747 name : str
748 The name of the drilling.
749 column_type : str, optional
750 The type of data that is returned. Possible options are "LITHOLOGY" and
751 "LITHOSTRATIGRAPHY" and None. If column_type is None, return a dictionary with
752 all data. The default is None.
753 depthReference : str, optional
754 Possible values are "NAP" and "MV". The default is "NAP".
755 language : str of length 2, optional
756 Possible values are "nl" for Ducth and "en" for English. When language is not
757 'nl' or 'en', english is returned. The default is "nl".
758 return_response : bool, optional
759 Return the json-respons of the web-service without any interpretation. The
760 default is False.
761 ignore_exceptions : bool, optional
762 When True, ignore exceptions when things go wrong. This is usefull when
763 requesting multiple drillings. The default is False.
765 Returns
766 -------
767 df or dict
768 A dictionary or a DataFarme (when column_type is set) containing the drilling
769 data.
770 """
771 # columnType is 'LITHOSTRATIGRAPHY' or 'LITHOLOGY'
772 url = "https://www.dinoloket.nl/javascriptmapviewer-web/rest/brh/profile"
773 payload = {"dinoId": name, "depthReference": depthReference, "language": language}
774 req = requests.post(
775 url, data=json.dumps(payload), headers={"content-type": "application/json"}
776 )
777 if not req.ok:
778 msg = f"Retieving data from {url} failed"
779 if ignore_exceptions:
780 logger.error(msg)
781 return None
782 else:
783 raise (Exception(msg))
784 data = json.loads(req.content)
785 if return_response:
786 return data
787 if "status" in data.keys():
788 if data["status"] == 500:
789 msg = "Drilling {} could not be downloaded ".format(name)
790 if ignore_exceptions:
791 logger.error(msg)
792 return None
793 else:
794 raise Exception(msg)
796 for column in data["columns"]:
797 if column_type is None or column["columnType"] == column_type:
798 ls = []
799 for meta in column["profileMetadata"]:
800 di = {}
801 for layerInfo in meta["layerInfos"]:
802 di[layerInfo["code"]] = layerInfo["value"]
803 ls.append(di)
804 df = pd.DataFrame(ls)
805 top = []
806 botm = []
807 for depth in df["DEPTH"]:
808 depths = depth.replace("m", "").split(" - ")
809 top.append(float(depths[0]))
810 botm.append(float(depths[1]))
811 df.insert(loc=0, column="top", value=top)
812 df.insert(loc=1, column="botm", value=botm)
813 df = df.drop("DEPTH", axis=1)
814 if column_type is None:
815 data[column["columnType"]] = df
816 else:
817 return df
818 if column_type is None:
819 data.pop("columns")
820 return data
821 else:
822 msg = "Column {} not present -> {}".format(column_type, name)
823 if ignore_exceptions:
824 logger.error(msg)
825 return None
826 else:
827 raise Exception(msg)
830class GeologischBooronderzoek(Boormonsterprofiel):
831 # In brodata, Boormonsterprofiel used to be called GeologischBooronderzoek.
832 # Therefore, this is a copy of GeologischBooronderzoek, for backwards compatibility
833 pass
836class Boorgatmeting(CsvFileOrUrl):
837 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/brh/log/las"
839 def __repr__(self):
840 # retrieve properties if they exist
842 props = {}
843 if hasattr(self, "las") and "Well" in self.las.header:
844 items = self.las.header["Well"]
845 for item in items:
846 props[item.descr] = item.value
847 name = util._format_repr(self, props)
848 return name
850 def _read_contents(self, f):
851 import lasio
853 self.las = lasio.read(f)
855 def to_dict(self):
856 import lasio
858 return lasio.las.JSONEncoder().default(self.las)
860 def plot(self, ax=None, columns=None, z=0.0, **kwargs):
861 if ax is None:
862 import matplotlib.pyplot as plt
864 ax = plt.gca()
865 df = self.las.df()
866 if columns is None:
867 columns = df.columns
868 elif isinstance(columns, str):
869 columns = [columns]
871 for column in df.columns:
872 # df.reset_index().plot(y="DEPTH", x=column)
873 ax.plot(df[column], z - df.index, label=column, **kwargs)
874 return ax
877class ChemischeAnalyse(CsvFileOrUrl):
878 _download_url = (
879 "https://www.dinoloket.nl/uitgifteloket/api/brh/chemicalanalysis/csv"
880 )
882 def _read_contents(self, f):
883 # read first line and place cursor at start of document again
884 start = f.tell()
885 line = f.readline().rstrip("\n")
886 f.seek(start)
888 # LOCATIE gegevens
889 if line.startswith('"LOCATIE gegevens"'):
890 line = f.readline()
891 self.locatie_gegevens, line = self._read_properties_csv_columns(f)
892 for key in self.locatie_gegevens:
893 setattr(self, key, self.locatie_gegevens[key])
895 # KWALITEIT gegevens VLOEIBAAR
896 if line.startswith('"KWALITEIT gegevens VAST"'):
897 line = f.readline()
898 self.kwaliteit_gegevens_vast, line = self._read_csv_part(f)
899 for column in ["Monster datum", "Analyse datum"]:
900 if column in self.kwaliteit_gegevens_vast.columns:
901 self.kwaliteit_gegevens_vast[column] = pd.to_datetime(
902 self.kwaliteit_gegevens_vast[column], dayfirst=True
903 )
905 def to_dict(self):
906 d = {**self.locatie_gegevens}
907 if hasattr(self, "kwaliteit_gegevens_vast"):
908 d["kwaliteit_gegevens_vast"] = self.kwaliteit_gegevens_vast
909 return d
912class KorrelgrootteAnalyse(ChemischeAnalyse):
913 _download_url = (
914 "https://www.dinoloket.nl/uitgifteloket/api/brh/grainsizeanalysis/csv"
915 )
918class VerticaalElektrischSondeeronderzoek(CsvFileOrUrl):
919 _download_url = "https://www.dinoloket.nl/uitgifteloket/api/ves/csv"
921 # Read a VES-file
922 def _read_contents(self, f):
923 # read first line and place cursor at start of document again
924 start = f.tell()
925 line = f.readline().rstrip("\n")
926 f.seek(start)
928 # VES Overzicht
929 if line.startswith('"VES Overzicht"'):
930 line = f.readline()
931 self.ves_overzicht, line = self._read_properties_csv_columns(f)
932 for key in self.ves_overzicht:
933 setattr(self, key, self.ves_overzicht[key])
935 # Kop
936 if line.startswith('"Kop"'):
937 line = f.readline()
938 self.kop, line = self._read_properties_csv_columns(f)
940 if line.startswith('"Data"'):
941 line = f.readline()
942 self.data, line = self._read_csv_part(f)
944 self.interpretatie_door_tno_nitg = []
945 self.interpretaties = []
947 while line.startswith('"Interpretatie door: TNO-NITG"'):
948 # Interpretatie door: TNO-NITG
949 line = f.readline()
950 df, line = self._read_properties_csv_columns(f)
951 self.interpretatie_door_tno_nitg.append(df)
953 # Interpretaties
954 if line.startswith('"Interpretaties"'):
955 line = f.readline()
956 df, line = self._read_csv_part(f)
957 self.interpretaties.append(df)
959 def to_dict(self):
960 d = {**self.ves_overzicht, **self.kop}
961 if hasattr(self, "data"):
962 d["data"] = self.data
963 d["Aantal interpretaties"] = len(self.interpretaties)
964 if len(self.interpretatie_door_tno_nitg) > 0:
965 # only take the first interpretatie_door_tno_nitg, as the data will not fit in a DataFrame
966 d["interpretatie_door_tno_nitg"] = self.interpretatie_door_tno_nitg[0]
967 if len(self.interpretaties) > 0:
968 # only take the first interpretation, as the data will not fit in a DataFrame
969 d["interpretaties"] = self.interpretaties[0]
970 if (
971 "Richting" in d
972 and "Maximale elektrode afstand L2" in d
973 and "X-coordinaat" in d
974 and "Y-coordinaat" in d
975 ):
976 angle = (d["Richting"] - 90) * np.pi / 180
977 x = d["X-coordinaat"]
978 y = d["Y-coordinaat"]
979 dx = -np.cos(angle) * d["Maximale elektrode afstand L2"]
980 dy = np.sin(angle) * d["Maximale elektrode afstand L2"]
981 d["geometry"] = LineString([(x + dx, y + dy), (x - dx, y - dy)])
982 return d
984 def plot_interpretaties(
985 self, nr=None, ax=None, top=0, bot=None, negative_depth=True, **kwargs
986 ):
987 """
988 Plot interpreted resistance profiles from VES data.
990 This method visualizes one or more interpretation profiles by plotting the
991 'Werkelijke weerstand' (actual resistance) against depth as a line (stairs).
993 Parameters
994 ----------
995 nr : int or None, optional
996 Index of a specific interpretation to plot. If None (default), all
997 interpretations in `self.interpretaties` are plotted.
998 ax : matplotlib.axes.Axes, optional
999 The matplotlib Axes object to draw the plot on. If None, the current Axes
1000 (`plt.gca()`) is used. The default is None.
1001 top : float, optional
1002 Top depth of the plot in meters. The default is 0.
1003 bot : float or None, optional
1004 Bottom depth of the plot in meters. If None (default), it is inferred from
1005 the data, by setting the length of the last section equal to the length of
1006 the next to last section.
1007 negative_depth : bool, optional
1008 If True (default), depth is plotted as negative (i.e., increasing downwards,
1009 following geotechnical convention).
1010 **kwargs : dict, optional
1011 Additional keyword arguments passed to `matplotlib.axes.Axes.plot` (e.g.,
1012 color, linestyle, label).
1014 Returns
1015 -------
1016 ax : matplotlib.axes.Axes
1017 The Axes object containing the plot.
1018 """
1019 if nr is None:
1020 dfs = self.interpretaties
1021 if len(dfs) == 0:
1022 nitg_nr = getattr(self, "NITG-nr")
1023 logger.warning(f"No interpretations in {nitg_nr}")
1024 return
1025 else:
1026 dfs = [self.interpretaties[nr]]
1028 if ax is None:
1029 ax = plt.gca()
1031 for df in dfs:
1032 values = df["Werkelijke weerstand"].values
1034 edges = df["Bovenkant laag (m)"].values[1:]
1035 edges = np.vstack((edges, edges)).transpose().ravel()
1036 edge_top = df["Bovenkant laag (m)"].iloc[0]
1037 if np.isnan(edge_top):
1038 edge_top = top
1039 edge_bot = df["Onderkant laag (m)"].iloc[-1]
1040 if np.isnan(edge_bot):
1041 if bot is None or np.isnan(bot):
1042 edge_bot = df["Bovenkant laag (m)"].iloc[-1] + (
1043 df["Bovenkant laag (m)"].iloc[-1]
1044 - df["Bovenkant laag (m)"].iloc[-2]
1045 )
1046 else:
1047 edge_bot = bot
1048 edges = np.hstack((edge_top, edges, edge_bot))
1050 values = np.vstack((values, values)).transpose().ravel()
1052 if negative_depth:
1053 edges = -edges
1055 ax.plot(values, edges, **kwargs)
1057 return ax