Coverage for brodata / gar.py: 90%
147 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-13 12:57 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-13 12:57 +0000
1import logging
2from functools import partial
4import pandas as pd
6from . import bro
8logger = logging.getLogger(__name__)
11class GroundwaterAnalysisReport(bro.FileOrUrl):
12 """Class to represent a Groundwater Analysis Report (GAR) from the BRO.
14 Attributes
15 ----------
16 laboratoryAnalysis : pd.DataFrame
17 DataFrame containing groundwater quality observations.
18 """
20 _rest_url = "https://publiek.broservices.nl/gm/gar/v1"
21 _xmlns = "http://www.broservices.nl/xsd/dsgar/1.0"
23 def _read_csv(self, csvfile, **kwargs):
24 df = pd.read_csv(csvfile, **kwargs)
25 na_rows = df.index[df.isna().all(axis=1)]
26 idata = df.iloc[: na_rows[0]].dropna(how="all", axis=1).squeeze().to_dict()
27 for i in range(len(na_rows) - 1):
28 idf = df.iloc[na_rows[i] + 2 : na_rows[i + 1]]
29 idf.columns = df.iloc[na_rows[i] + 1]
30 idf.columns.name = None
31 idf = idf.dropna(how="all", axis=1)
32 if "analysedatum" in idf.columns:
33 key = "laboratoryAnalysis"
34 else:
35 key = "fieldResearch"
36 idata[key] = idf
37 for k, v in idata.items():
38 setattr(self, k, v)
40 def _read_contents(self, tree):
41 ns = {
42 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",
43 "gml": "http://www.opengis.net/gml/3.2",
44 "garcommon": "http://www.broservices.nl/xsd/garcommon/1.0",
45 "xmlns": self._xmlns,
46 }
47 gar = self._get_main_object(tree, "GAR_O", ns)
48 for key in gar.attrib:
49 setattr(self, key.split("}", 1)[1], gar.attrib[key])
50 for child in gar:
51 key = self._get_tag(child)
52 if len(child) == 0:
53 setattr(self, key, child.text)
54 elif key == "registrationHistory":
55 self._read_children_of_children(child)
56 elif key == "groundwaterMonitoringNet":
57 for grandchild in child:
58 key2 = grandchild.tag.split("}", 1)[1]
59 if key2 == "GroundwaterMonitoringNet":
60 setattr(self, key, grandchild[0].text)
61 else:
62 logger.warning(f"Unknown key: {key2}")
63 elif key == "monitoringPoint":
64 well = child.find("garcommon:GroundwaterMonitoringTube", ns)
65 gmw_id = well.find("garcommon:broId", ns).text
66 setattr(self, "groundwaterMonitoringWell", gmw_id)
67 tube_nr = int(well.find("garcommon:tubeNumber", ns).text)
68 setattr(self, "tubeNumber", tube_nr)
69 elif key == "fieldResearch":
70 if not hasattr(self, key):
71 self.fieldResearch = []
72 self.fieldResearch.append(self._read_field_research(child))
73 elif key == "laboratoryAnalysis":
74 if not hasattr(self, key):
75 self.laboratoryAnalysis = []
76 self.laboratoryAnalysis.append(self._read_laboratory_analysis(child))
77 else:
78 self._warn_unknown_tag(key)
79 if hasattr(self, "fieldResearch"):
80 self.fieldResearch = pd.concat(self.fieldResearch)
81 if hasattr(self, "laboratoryAnalysis"):
82 self.laboratoryAnalysis = pd.concat(self.laboratoryAnalysis)
84 def _read_field_research(self, node):
85 field_research = []
87 d = {}
88 for child in node:
89 key = self._get_tag(child)
90 if key == "samplingDateTime":
91 d[key] = pd.to_datetime(child.text)
92 elif key in ["samplingStandard", "valuationMethod"]:
93 d[key] = child.text
94 elif key in ["samplingDevice"]:
95 d[key] = f"{child[0].tag.split('}', 1)[1]}: {child[0].text}"
96 elif key in ["fieldObservation"]:
97 d2 = {}
98 self._read_children_of_children(child, d2)
99 setattr(self, key, d2)
100 elif key in ["fieldMeasurement"]:
101 d2 = d.copy()
102 for greatgrandchild in child:
103 key2 = greatgrandchild.tag.split("}", 1)[1]
104 if key2 in ["parameter", "qualityControlStatus"]:
105 d2[key2] = greatgrandchild.text
106 elif key2 in ["fieldMeasurementValue"]:
107 d2[key2] = float(greatgrandchild.text)
108 d2["uom"] = greatgrandchild.attrib["uom"]
109 else:
110 self._read_children_of_children(node, d2)
111 field_research.append(d2)
112 # field_research.append(d)
113 df = pd.DataFrame(field_research)
114 if "samplingDateTime" in df.columns:
115 df = df.set_index("samplingDateTime")
116 return df
118 def _read_laboratory_analysis(self, node):
119 laboratory_analysis = []
120 for child in node:
121 d = {}
122 for grandchild in child:
123 key = self._get_tag(grandchild)
124 if key == "analysisDate":
125 d[key] = self._read_date(grandchild)
126 elif key in ["analyticalTechnique", "valuationMethod"]:
127 d[key] = grandchild.text
128 elif key == "analysis":
129 d2 = d.copy()
130 for greatgrandchild in grandchild:
131 key2 = greatgrandchild.tag.split("}", 1)[1]
132 if key2 in ["parameter", "qualityControlStatus", "limitSymbol"]:
133 d2[key2] = greatgrandchild.text
134 elif key2 in ["analysisMeasurementValue", "reportingLimit"]:
135 d2[key2] = float(greatgrandchild.text)
136 d2["uom"] = greatgrandchild.attrib["uom"]
137 else:
138 logger.warning(f"Unknown key: {key2}")
139 laboratory_analysis.append(d2)
140 # laboratory_analysis.append(d)
141 df = pd.DataFrame(laboratory_analysis)
142 if "analysisDate" in df.columns:
143 df = df.set_index("analysisDate")
144 return df
147def get_parameter_list(url=None, timeout=5, to_file=None, **kwargs):
148 """Download a DataFrame with gar-parameters from the BRO"""
149 if url is None:
150 url = "https://publiek.broservices.nl/bro/refcodes/v1/attribute_values?domain=urn:bro:gar:ParameterList&version=latest"
151 r = bro.util.get_with_rate_limit(url, timeout=timeout, **kwargs)
152 if not r.ok:
153 raise (Exception((f"Retieving data from {url} failed")))
154 if to_file is not None:
155 with open(to_file, "w") as f:
156 f.write(r.text)
157 data = r.json()["refDomainVersions"][0]["refCodes"]
158 for d in data:
159 for prop in d["refAttributeValues"]:
160 d[prop["name"]] = prop["value"]
161 d.pop("refAttributeValues")
163 df = pd.json_normalize(data).set_index("code")
164 return df
167def get_parameter_code(description, parameter_list=None):
168 """Get a parameter code from a parameter description"""
169 if parameter_list is None:
170 parameter_list = get_parameter_list()
171 code = parameter_list.index[parameter_list["description"] == description]
172 if len(code) == 0:
173 raise ValueError(f"Description {description} not found in Parameter List")
174 elif len(code) > 1:
175 raise ValueError(
176 f"Description {description} found more than once in Parameter List"
177 )
179 return code[0]
182def _get_empty_observation_df():
183 columns = [
184 "analysisDate",
185 "analyticalTechnique",
186 "valuationMethod",
187 "parameter",
188 "analysisMeasurementValue",
189 "uom",
190 "qualityControlStatus",
191 "limitSymbol",
192 ]
193 return pd.DataFrame(columns=columns).set_index("analysisDate")
196cl = GroundwaterAnalysisReport
198get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl=cl)
199get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__
201get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl)
202get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__