Coverage for brodata / gar.py: 90%

147 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-13 12:57 +0000

1import logging 

2from functools import partial 

3 

4import pandas as pd 

5 

6from . import bro 

7 

8logger = logging.getLogger(__name__) 

9 

10 

11class GroundwaterAnalysisReport(bro.FileOrUrl): 

12 """Class to represent a Groundwater Analysis Report (GAR) from the BRO. 

13 

14 Attributes 

15 ---------- 

16 laboratoryAnalysis : pd.DataFrame 

17 DataFrame containing groundwater quality observations. 

18 """ 

19 

20 _rest_url = "https://publiek.broservices.nl/gm/gar/v1" 

21 _xmlns = "http://www.broservices.nl/xsd/dsgar/1.0" 

22 

23 def _read_csv(self, csvfile, **kwargs): 

24 df = pd.read_csv(csvfile, **kwargs) 

25 na_rows = df.index[df.isna().all(axis=1)] 

26 idata = df.iloc[: na_rows[0]].dropna(how="all", axis=1).squeeze().to_dict() 

27 for i in range(len(na_rows) - 1): 

28 idf = df.iloc[na_rows[i] + 2 : na_rows[i + 1]] 

29 idf.columns = df.iloc[na_rows[i] + 1] 

30 idf.columns.name = None 

31 idf = idf.dropna(how="all", axis=1) 

32 if "analysedatum" in idf.columns: 

33 key = "laboratoryAnalysis" 

34 else: 

35 key = "fieldResearch" 

36 idata[key] = idf 

37 for k, v in idata.items(): 

38 setattr(self, k, v) 

39 

40 def _read_contents(self, tree): 

41 ns = { 

42 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0", 

43 "gml": "http://www.opengis.net/gml/3.2", 

44 "garcommon": "http://www.broservices.nl/xsd/garcommon/1.0", 

45 "xmlns": self._xmlns, 

46 } 

47 gar = self._get_main_object(tree, "GAR_O", ns) 

48 for key in gar.attrib: 

49 setattr(self, key.split("}", 1)[1], gar.attrib[key]) 

50 for child in gar: 

51 key = self._get_tag(child) 

52 if len(child) == 0: 

53 setattr(self, key, child.text) 

54 elif key == "registrationHistory": 

55 self._read_children_of_children(child) 

56 elif key == "groundwaterMonitoringNet": 

57 for grandchild in child: 

58 key2 = grandchild.tag.split("}", 1)[1] 

59 if key2 == "GroundwaterMonitoringNet": 

60 setattr(self, key, grandchild[0].text) 

61 else: 

62 logger.warning(f"Unknown key: {key2}") 

63 elif key == "monitoringPoint": 

64 well = child.find("garcommon:GroundwaterMonitoringTube", ns) 

65 gmw_id = well.find("garcommon:broId", ns).text 

66 setattr(self, "groundwaterMonitoringWell", gmw_id) 

67 tube_nr = int(well.find("garcommon:tubeNumber", ns).text) 

68 setattr(self, "tubeNumber", tube_nr) 

69 elif key == "fieldResearch": 

70 if not hasattr(self, key): 

71 self.fieldResearch = [] 

72 self.fieldResearch.append(self._read_field_research(child)) 

73 elif key == "laboratoryAnalysis": 

74 if not hasattr(self, key): 

75 self.laboratoryAnalysis = [] 

76 self.laboratoryAnalysis.append(self._read_laboratory_analysis(child)) 

77 else: 

78 self._warn_unknown_tag(key) 

79 if hasattr(self, "fieldResearch"): 

80 self.fieldResearch = pd.concat(self.fieldResearch) 

81 if hasattr(self, "laboratoryAnalysis"): 

82 self.laboratoryAnalysis = pd.concat(self.laboratoryAnalysis) 

83 

84 def _read_field_research(self, node): 

85 field_research = [] 

86 

87 d = {} 

88 for child in node: 

89 key = self._get_tag(child) 

90 if key == "samplingDateTime": 

91 d[key] = pd.to_datetime(child.text) 

92 elif key in ["samplingStandard", "valuationMethod"]: 

93 d[key] = child.text 

94 elif key in ["samplingDevice"]: 

95 d[key] = f"{child[0].tag.split('}', 1)[1]}: {child[0].text}" 

96 elif key in ["fieldObservation"]: 

97 d2 = {} 

98 self._read_children_of_children(child, d2) 

99 setattr(self, key, d2) 

100 elif key in ["fieldMeasurement"]: 

101 d2 = d.copy() 

102 for greatgrandchild in child: 

103 key2 = greatgrandchild.tag.split("}", 1)[1] 

104 if key2 in ["parameter", "qualityControlStatus"]: 

105 d2[key2] = greatgrandchild.text 

106 elif key2 in ["fieldMeasurementValue"]: 

107 d2[key2] = float(greatgrandchild.text) 

108 d2["uom"] = greatgrandchild.attrib["uom"] 

109 else: 

110 self._read_children_of_children(node, d2) 

111 field_research.append(d2) 

112 # field_research.append(d) 

113 df = pd.DataFrame(field_research) 

114 if "samplingDateTime" in df.columns: 

115 df = df.set_index("samplingDateTime") 

116 return df 

117 

118 def _read_laboratory_analysis(self, node): 

119 laboratory_analysis = [] 

120 for child in node: 

121 d = {} 

122 for grandchild in child: 

123 key = self._get_tag(grandchild) 

124 if key == "analysisDate": 

125 d[key] = self._read_date(grandchild) 

126 elif key in ["analyticalTechnique", "valuationMethod"]: 

127 d[key] = grandchild.text 

128 elif key == "analysis": 

129 d2 = d.copy() 

130 for greatgrandchild in grandchild: 

131 key2 = greatgrandchild.tag.split("}", 1)[1] 

132 if key2 in ["parameter", "qualityControlStatus", "limitSymbol"]: 

133 d2[key2] = greatgrandchild.text 

134 elif key2 in ["analysisMeasurementValue", "reportingLimit"]: 

135 d2[key2] = float(greatgrandchild.text) 

136 d2["uom"] = greatgrandchild.attrib["uom"] 

137 else: 

138 logger.warning(f"Unknown key: {key2}") 

139 laboratory_analysis.append(d2) 

140 # laboratory_analysis.append(d) 

141 df = pd.DataFrame(laboratory_analysis) 

142 if "analysisDate" in df.columns: 

143 df = df.set_index("analysisDate") 

144 return df 

145 

146 

147def get_parameter_list(url=None, timeout=5, to_file=None, **kwargs): 

148 """Download a DataFrame with gar-parameters from the BRO""" 

149 if url is None: 

150 url = "https://publiek.broservices.nl/bro/refcodes/v1/attribute_values?domain=urn:bro:gar:ParameterList&version=latest" 

151 r = bro.util.get_with_rate_limit(url, timeout=timeout, **kwargs) 

152 if not r.ok: 

153 raise (Exception((f"Retieving data from {url} failed"))) 

154 if to_file is not None: 

155 with open(to_file, "w") as f: 

156 f.write(r.text) 

157 data = r.json()["refDomainVersions"][0]["refCodes"] 

158 for d in data: 

159 for prop in d["refAttributeValues"]: 

160 d[prop["name"]] = prop["value"] 

161 d.pop("refAttributeValues") 

162 

163 df = pd.json_normalize(data).set_index("code") 

164 return df 

165 

166 

167def get_parameter_code(description, parameter_list=None): 

168 """Get a parameter code from a parameter description""" 

169 if parameter_list is None: 

170 parameter_list = get_parameter_list() 

171 code = parameter_list.index[parameter_list["description"] == description] 

172 if len(code) == 0: 

173 raise ValueError(f"Description {description} not found in Parameter List") 

174 elif len(code) > 1: 

175 raise ValueError( 

176 f"Description {description} found more than once in Parameter List" 

177 ) 

178 

179 return code[0] 

180 

181 

182def _get_empty_observation_df(): 

183 columns = [ 

184 "analysisDate", 

185 "analyticalTechnique", 

186 "valuationMethod", 

187 "parameter", 

188 "analysisMeasurementValue", 

189 "uom", 

190 "qualityControlStatus", 

191 "limitSymbol", 

192 ] 

193 return pd.DataFrame(columns=columns).set_index("analysisDate") 

194 

195 

196cl = GroundwaterAnalysisReport 

197 

198get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl=cl) 

199get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__ 

200 

201get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl) 

202get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__