Coverage for brodata / gar.py: 90%
148 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-20 14:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-20 14:37 +0000
1import logging
2from functools import partial
4import pandas as pd
5import requests
7from . import bro
9logger = logging.getLogger(__name__)
12class GroundwaterAnalysisReport(bro.FileOrUrl):
13 """Class to represent a Groundwater Analysis Report (GAR) from the BRO.
15 Attributes
16 ----------
17 laboratoryAnalysis : pd.DataFrame
18 DataFrame containing groundwater quality observations.
19 """
21 _rest_url = "https://publiek.broservices.nl/gm/gar/v1"
22 _xmlns = "http://www.broservices.nl/xsd/dsgar/1.0"
24 def _read_csv(self, csvfile, **kwargs):
25 df = pd.read_csv(csvfile, **kwargs)
26 na_rows = df.index[df.isna().all(axis=1)]
27 idata = df.iloc[: na_rows[0]].dropna(how="all", axis=1).squeeze().to_dict()
28 for i in range(len(na_rows) - 1):
29 idf = df.iloc[na_rows[i] + 2 : na_rows[i + 1]]
30 idf.columns = df.iloc[na_rows[i] + 1]
31 idf.columns.name = None
32 idf = idf.dropna(how="all", axis=1)
33 if "analysedatum" in idf.columns:
34 key = "laboratoryAnalysis"
35 else:
36 key = "fieldResearch"
37 idata[key] = idf
38 for k, v in idata.items():
39 setattr(self, k, v)
41 def _read_contents(self, tree):
42 ns = {
43 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",
44 "gml": "http://www.opengis.net/gml/3.2",
45 "garcommon": "http://www.broservices.nl/xsd/garcommon/1.0",
46 "xmlns": self._xmlns,
47 }
48 gar = self._get_main_object(tree, "GAR_O", ns)
49 for key in gar.attrib:
50 setattr(self, key.split("}", 1)[1], gar.attrib[key])
51 for child in gar:
52 key = self._get_tag(child)
53 if len(child) == 0:
54 setattr(self, key, child.text)
55 elif key == "registrationHistory":
56 self._read_children_of_children(child)
57 elif key == "groundwaterMonitoringNet":
58 for grandchild in child:
59 key2 = grandchild.tag.split("}", 1)[1]
60 if key2 == "GroundwaterMonitoringNet":
61 setattr(self, key, grandchild[0].text)
62 else:
63 logger.warning(f"Unknown key: {key2}")
64 elif key == "monitoringPoint":
65 well = child.find("garcommon:GroundwaterMonitoringTube", ns)
66 gmw_id = well.find("garcommon:broId", ns).text
67 setattr(self, "groundwaterMonitoringWell", gmw_id)
68 tube_nr = int(well.find("garcommon:tubeNumber", ns).text)
69 setattr(self, "tubeNumber", tube_nr)
70 elif key == "fieldResearch":
71 if not hasattr(self, key):
72 self.fieldResearch = []
73 self.fieldResearch.append(self._read_field_research(child))
74 elif key == "laboratoryAnalysis":
75 if not hasattr(self, key):
76 self.laboratoryAnalysis = []
77 self.laboratoryAnalysis.append(self._read_laboratory_analysis(child))
78 else:
79 self._warn_unknown_tag(key)
80 if hasattr(self, "fieldResearch"):
81 self.fieldResearch = pd.concat(self.fieldResearch)
82 if hasattr(self, "laboratoryAnalysis"):
83 self.laboratoryAnalysis = pd.concat(self.laboratoryAnalysis)
85 def _read_field_research(self, node):
86 field_research = []
88 d = {}
89 for child in node:
90 key = self._get_tag(child)
91 if key == "samplingDateTime":
92 d[key] = pd.to_datetime(child.text)
93 elif key in ["samplingStandard", "valuationMethod"]:
94 d[key] = child.text
95 elif key in ["samplingDevice"]:
96 d[key] = f"{child[0].tag.split('}', 1)[1]}: {child[0].text}"
97 elif key in ["fieldObservation"]:
98 d2 = {}
99 self._read_children_of_children(child, d2)
100 setattr(self, key, d2)
101 elif key in ["fieldMeasurement"]:
102 d2 = d.copy()
103 for greatgrandchild in child:
104 key2 = greatgrandchild.tag.split("}", 1)[1]
105 if key2 in ["parameter", "qualityControlStatus"]:
106 d2[key2] = greatgrandchild.text
107 elif key2 in ["fieldMeasurementValue"]:
108 d2[key2] = float(greatgrandchild.text)
109 d2["uom"] = greatgrandchild.attrib["uom"]
110 else:
111 self._read_children_of_children(node, d2)
112 field_research.append(d2)
113 # field_research.append(d)
114 df = pd.DataFrame(field_research)
115 if "samplingDateTime" in df.columns:
116 df = df.set_index("samplingDateTime")
117 return df
119 def _read_laboratory_analysis(self, node):
120 laboratory_analysis = []
121 for child in node:
122 d = {}
123 for grandchild in child:
124 key = self._get_tag(grandchild)
125 if key == "analysisDate":
126 d[key] = self._read_date(grandchild)
127 elif key in ["analyticalTechnique", "valuationMethod"]:
128 d[key] = grandchild.text
129 elif key == "analysis":
130 d2 = d.copy()
131 for greatgrandchild in grandchild:
132 key2 = greatgrandchild.tag.split("}", 1)[1]
133 if key2 in ["parameter", "qualityControlStatus", "limitSymbol"]:
134 d2[key2] = greatgrandchild.text
135 elif key2 in ["analysisMeasurementValue", "reportingLimit"]:
136 d2[key2] = float(greatgrandchild.text)
137 d2["uom"] = greatgrandchild.attrib["uom"]
138 else:
139 logger.warning(f"Unknown key: {key2}")
140 laboratory_analysis.append(d2)
141 # laboratory_analysis.append(d)
142 df = pd.DataFrame(laboratory_analysis)
143 if "analysisDate" in df.columns:
144 df = df.set_index("analysisDate")
145 return df
148def get_parameter_list(url=None, timeout=5, to_file=None, **kwargs):
149 """Download a DataFrame with gar-parameters from the BRO"""
150 if url is None:
151 url = "https://publiek.broservices.nl/bro/refcodes/v1/attribute_values?domain=urn:bro:gar:ParameterList&version=latest"
152 r = requests.get(url, timeout=timeout, **kwargs)
153 if not r.ok:
154 raise (Exception((f"Retieving data from {url} failed")))
155 if to_file is not None:
156 with open(to_file, "w") as f:
157 f.write(r.text)
158 data = r.json()["refDomainVersions"][0]["refCodes"]
159 for d in data:
160 for prop in d["refAttributeValues"]:
161 d[prop["name"]] = prop["value"]
162 d.pop("refAttributeValues")
164 df = pd.json_normalize(data).set_index("code")
165 return df
168def get_parameter_code(description, parameter_list=None):
169 """Get a parameter code from a parameter description"""
170 if parameter_list is None:
171 parameter_list = get_parameter_list()
172 code = parameter_list.index[parameter_list["description"] == description]
173 if len(code) == 0:
174 raise ValueError(f"Description {description} not found in Parameter List")
175 elif len(code) > 1:
176 raise ValueError(
177 f"Description {description} found more than once in Parameter List"
178 )
180 return code[0]
183def _get_empty_observation_df():
184 columns = [
185 "analysisDate",
186 "analyticalTechnique",
187 "valuationMethod",
188 "parameter",
189 "analysisMeasurementValue",
190 "uom",
191 "qualityControlStatus",
192 "limitSymbol",
193 ]
194 return pd.DataFrame(columns=columns).set_index("analysisDate")
197cl = GroundwaterAnalysisReport
199get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl=cl)
200get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__
202get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl)
203get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__