Coverage for brodata / gar.py: 90%

148 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-20 14:37 +0000

1import logging 

2from functools import partial 

3 

4import pandas as pd 

5import requests 

6 

7from . import bro 

8 

9logger = logging.getLogger(__name__) 

10 

11 

12class GroundwaterAnalysisReport(bro.FileOrUrl): 

13 """Class to represent a Groundwater Analysis Report (GAR) from the BRO. 

14 

15 Attributes 

16 ---------- 

17 laboratoryAnalysis : pd.DataFrame 

18 DataFrame containing groundwater quality observations. 

19 """ 

20 

21 _rest_url = "https://publiek.broservices.nl/gm/gar/v1" 

22 _xmlns = "http://www.broservices.nl/xsd/dsgar/1.0" 

23 

24 def _read_csv(self, csvfile, **kwargs): 

25 df = pd.read_csv(csvfile, **kwargs) 

26 na_rows = df.index[df.isna().all(axis=1)] 

27 idata = df.iloc[: na_rows[0]].dropna(how="all", axis=1).squeeze().to_dict() 

28 for i in range(len(na_rows) - 1): 

29 idf = df.iloc[na_rows[i] + 2 : na_rows[i + 1]] 

30 idf.columns = df.iloc[na_rows[i] + 1] 

31 idf.columns.name = None 

32 idf = idf.dropna(how="all", axis=1) 

33 if "analysedatum" in idf.columns: 

34 key = "laboratoryAnalysis" 

35 else: 

36 key = "fieldResearch" 

37 idata[key] = idf 

38 for k, v in idata.items(): 

39 setattr(self, k, v) 

40 

41 def _read_contents(self, tree): 

42 ns = { 

43 "brocom": "http://www.broservices.nl/xsd/brocommon/3.0", 

44 "gml": "http://www.opengis.net/gml/3.2", 

45 "garcommon": "http://www.broservices.nl/xsd/garcommon/1.0", 

46 "xmlns": self._xmlns, 

47 } 

48 gar = self._get_main_object(tree, "GAR_O", ns) 

49 for key in gar.attrib: 

50 setattr(self, key.split("}", 1)[1], gar.attrib[key]) 

51 for child in gar: 

52 key = self._get_tag(child) 

53 if len(child) == 0: 

54 setattr(self, key, child.text) 

55 elif key == "registrationHistory": 

56 self._read_children_of_children(child) 

57 elif key == "groundwaterMonitoringNet": 

58 for grandchild in child: 

59 key2 = grandchild.tag.split("}", 1)[1] 

60 if key2 == "GroundwaterMonitoringNet": 

61 setattr(self, key, grandchild[0].text) 

62 else: 

63 logger.warning(f"Unknown key: {key2}") 

64 elif key == "monitoringPoint": 

65 well = child.find("garcommon:GroundwaterMonitoringTube", ns) 

66 gmw_id = well.find("garcommon:broId", ns).text 

67 setattr(self, "groundwaterMonitoringWell", gmw_id) 

68 tube_nr = int(well.find("garcommon:tubeNumber", ns).text) 

69 setattr(self, "tubeNumber", tube_nr) 

70 elif key == "fieldResearch": 

71 if not hasattr(self, key): 

72 self.fieldResearch = [] 

73 self.fieldResearch.append(self._read_field_research(child)) 

74 elif key == "laboratoryAnalysis": 

75 if not hasattr(self, key): 

76 self.laboratoryAnalysis = [] 

77 self.laboratoryAnalysis.append(self._read_laboratory_analysis(child)) 

78 else: 

79 self._warn_unknown_tag(key) 

80 if hasattr(self, "fieldResearch"): 

81 self.fieldResearch = pd.concat(self.fieldResearch) 

82 if hasattr(self, "laboratoryAnalysis"): 

83 self.laboratoryAnalysis = pd.concat(self.laboratoryAnalysis) 

84 

85 def _read_field_research(self, node): 

86 field_research = [] 

87 

88 d = {} 

89 for child in node: 

90 key = self._get_tag(child) 

91 if key == "samplingDateTime": 

92 d[key] = pd.to_datetime(child.text) 

93 elif key in ["samplingStandard", "valuationMethod"]: 

94 d[key] = child.text 

95 elif key in ["samplingDevice"]: 

96 d[key] = f"{child[0].tag.split('}', 1)[1]}: {child[0].text}" 

97 elif key in ["fieldObservation"]: 

98 d2 = {} 

99 self._read_children_of_children(child, d2) 

100 setattr(self, key, d2) 

101 elif key in ["fieldMeasurement"]: 

102 d2 = d.copy() 

103 for greatgrandchild in child: 

104 key2 = greatgrandchild.tag.split("}", 1)[1] 

105 if key2 in ["parameter", "qualityControlStatus"]: 

106 d2[key2] = greatgrandchild.text 

107 elif key2 in ["fieldMeasurementValue"]: 

108 d2[key2] = float(greatgrandchild.text) 

109 d2["uom"] = greatgrandchild.attrib["uom"] 

110 else: 

111 self._read_children_of_children(node, d2) 

112 field_research.append(d2) 

113 # field_research.append(d) 

114 df = pd.DataFrame(field_research) 

115 if "samplingDateTime" in df.columns: 

116 df = df.set_index("samplingDateTime") 

117 return df 

118 

119 def _read_laboratory_analysis(self, node): 

120 laboratory_analysis = [] 

121 for child in node: 

122 d = {} 

123 for grandchild in child: 

124 key = self._get_tag(grandchild) 

125 if key == "analysisDate": 

126 d[key] = self._read_date(grandchild) 

127 elif key in ["analyticalTechnique", "valuationMethod"]: 

128 d[key] = grandchild.text 

129 elif key == "analysis": 

130 d2 = d.copy() 

131 for greatgrandchild in grandchild: 

132 key2 = greatgrandchild.tag.split("}", 1)[1] 

133 if key2 in ["parameter", "qualityControlStatus", "limitSymbol"]: 

134 d2[key2] = greatgrandchild.text 

135 elif key2 in ["analysisMeasurementValue", "reportingLimit"]: 

136 d2[key2] = float(greatgrandchild.text) 

137 d2["uom"] = greatgrandchild.attrib["uom"] 

138 else: 

139 logger.warning(f"Unknown key: {key2}") 

140 laboratory_analysis.append(d2) 

141 # laboratory_analysis.append(d) 

142 df = pd.DataFrame(laboratory_analysis) 

143 if "analysisDate" in df.columns: 

144 df = df.set_index("analysisDate") 

145 return df 

146 

147 

148def get_parameter_list(url=None, timeout=5, to_file=None, **kwargs): 

149 """Download a DataFrame with gar-parameters from the BRO""" 

150 if url is None: 

151 url = "https://publiek.broservices.nl/bro/refcodes/v1/attribute_values?domain=urn:bro:gar:ParameterList&version=latest" 

152 r = requests.get(url, timeout=timeout, **kwargs) 

153 if not r.ok: 

154 raise (Exception((f"Retieving data from {url} failed"))) 

155 if to_file is not None: 

156 with open(to_file, "w") as f: 

157 f.write(r.text) 

158 data = r.json()["refDomainVersions"][0]["refCodes"] 

159 for d in data: 

160 for prop in d["refAttributeValues"]: 

161 d[prop["name"]] = prop["value"] 

162 d.pop("refAttributeValues") 

163 

164 df = pd.json_normalize(data).set_index("code") 

165 return df 

166 

167 

168def get_parameter_code(description, parameter_list=None): 

169 """Get a parameter code from a parameter description""" 

170 if parameter_list is None: 

171 parameter_list = get_parameter_list() 

172 code = parameter_list.index[parameter_list["description"] == description] 

173 if len(code) == 0: 

174 raise ValueError(f"Description {description} not found in Parameter List") 

175 elif len(code) > 1: 

176 raise ValueError( 

177 f"Description {description} found more than once in Parameter List" 

178 ) 

179 

180 return code[0] 

181 

182 

183def _get_empty_observation_df(): 

184 columns = [ 

185 "analysisDate", 

186 "analyticalTechnique", 

187 "valuationMethod", 

188 "parameter", 

189 "analysisMeasurementValue", 

190 "uom", 

191 "qualityControlStatus", 

192 "limitSymbol", 

193 ] 

194 return pd.DataFrame(columns=columns).set_index("analysisDate") 

195 

196 

197cl = GroundwaterAnalysisReport 

198 

199get_bro_ids_of_bronhouder = partial(bro._get_bro_ids_of_bronhouder, cl=cl) 

200get_bro_ids_of_bronhouder.__doc__ = bro._get_bro_ids_of_bronhouder.__doc__ 

201 

202get_data_for_bro_ids = partial(bro._get_data_for_bro_ids, cl) 

203get_data_for_bro_ids.__doc__ = bro._get_data_for_bro_ids.__doc__