@@ -107,132 +107,119 @@ def make_analysis_in_a_day(params: MakeAnalysisParams) -> dict:
107107
108108 tracer = opentelemetry .trace .get_tracer (__name__ )
109109
110- with opentelemetry .trace .get_current_span ():
111- fingerprintdb = FingerprintDB (datadir = data_dir , download = False )
112- body_db = BodyDB (db = ClickhouseConnection (clickhouse ))
113- db_writer = ClickhouseConnection (clickhouse , row_buffer_size = 10_000 )
114- db_lookup = ClickhouseConnection (clickhouse )
115-
116- column_names_wa = [f .name for f in dataclasses .fields (WebAnalysis )]
117- column_names_er = [
118- f .name for f in dataclasses .fields (MeasurementExperimentResult )
119- ]
120-
121- # TODO(art): this previous range search and deletion makes the idempotence
122- # of the activity not 100% accurate.
123- # We should look into fixing it.
124- prev_range_list = [
125- get_prev_range (
126- db = db_lookup ,
127- table_name = WebAnalysis .__table_name__ ,
128- timestamp = datetime .combine (day , datetime .min .time ()),
129- test_name = [],
130- probe_cc = probe_cc ,
131- timestamp_column = "measurement_start_time" ,
132- ),
133- get_prev_range (
134- db = db_lookup ,
135- table_name = MeasurementExperimentResult .__table_name__ ,
136- timestamp = datetime .combine (day , datetime .min .time ()),
137- test_name = [],
138- probe_cc = probe_cc ,
139- timestamp_column = "timeofday" ,
140- probe_cc_column = "location_network_cc" ,
141- ),
142- ]
143-
144- log .info (f"loading ground truth DB for { day } " )
145- with tracer .start_as_current_span (
146- "MakeObservations:load_ground_truths"
147- ) as span :
148- ground_truth_db_path = (
149- data_dir / "ground_truths" / f"web-{ day .strftime ('%Y-%m-%d' )} .sqlite3"
150- )
151- web_ground_truth_db = WebGroundTruthDB ()
152- web_ground_truth_db .build_from_existing (
153- str (ground_truth_db_path .absolute ())
154- )
155- log .info (f"loaded ground truth DB for { day } " )
156- span .add_event (f"loaded ground truth DB for { day } " )
157- span .set_attribute ("day" , day .strftime ("%Y-%m-%d" ))
158- span .set_attribute (
159- "ground_truth_row_count" , web_ground_truth_db .count_rows ()
160- )
110+ fingerprintdb = FingerprintDB (datadir = data_dir , download = False )
111+ body_db = BodyDB (db = ClickhouseConnection (clickhouse ))
112+ db_writer = ClickhouseConnection (clickhouse , row_buffer_size = 10_000 )
113+ db_lookup = ClickhouseConnection (clickhouse )
114+
115+ column_names_wa = [f .name for f in dataclasses .fields (WebAnalysis )]
116+ column_names_er = [f .name for f in dataclasses .fields (MeasurementExperimentResult )]
117+
118+ # TODO(art): this previous range search and deletion makes the idempotence
119+ # of the activity not 100% accurate.
120+ # We should look into fixing it.
121+ prev_range_list = [
122+ get_prev_range (
123+ db = db_lookup ,
124+ table_name = WebAnalysis .__table_name__ ,
125+ timestamp = datetime .combine (day , datetime .min .time ()),
126+ test_name = [],
127+ probe_cc = probe_cc ,
128+ timestamp_column = "measurement_start_time" ,
129+ ),
130+ get_prev_range (
131+ db = db_lookup ,
132+ table_name = MeasurementExperimentResult .__table_name__ ,
133+ timestamp = datetime .combine (day , datetime .min .time ()),
134+ test_name = [],
135+ probe_cc = probe_cc ,
136+ timestamp_column = "timeofday" ,
137+ probe_cc_column = "location_network_cc" ,
138+ ),
139+ ]
140+
141+ log .info (f"loading ground truth DB for { day } " )
142+ with tracer .start_span ("MakeObservations:load_ground_truths" ) as span :
143+ ground_truth_db_path = (
144+ data_dir / "ground_truths" / f"web-{ day .strftime ('%Y-%m-%d' )} .sqlite3"
145+ )
146+ web_ground_truth_db = WebGroundTruthDB ()
147+ web_ground_truth_db .build_from_existing (str (ground_truth_db_path .absolute ()))
148+ log .info (f"loaded ground truth DB for { day } " )
149+ span .add_event (f"loaded ground truth DB for { day } " )
150+ span .set_attribute ("day" , day .strftime ("%Y-%m-%d" ))
151+ span .set_attribute ("ground_truth_row_count" , web_ground_truth_db .count_rows ())
152+
153+ failures = 0
154+ no_exp_results = 0
155+ observation_count = 0
156+ with tracer .start_span ("MakeObservations:iter_web_observations" ) as span :
157+ for web_obs in iter_web_observations (
158+ db_lookup ,
159+ measurement_day = day ,
160+ probe_cc = probe_cc ,
161+ test_name = "web_connectivity" ,
162+ ):
163+ try :
164+ relevant_gts = web_ground_truth_db .lookup_by_web_obs (web_obs = web_obs )
165+ except :
166+ log .error (
167+ f"failed to lookup relevant_gts for { web_obs [0 ].measurement_uid } " ,
168+ exc_info = True ,
169+ )
170+ failures += 1
171+ continue
161172
162- failures = 0
163- no_exp_results = 0
164- observation_count = 0
165- with tracer .start_as_current_span (
166- "MakeObservations:iter_web_observations"
167- ) as span :
168- for web_obs in iter_web_observations (
169- db_lookup ,
170- measurement_day = day ,
171- probe_cc = probe_cc ,
172- test_name = "web_connectivity" ,
173- ):
174- try :
175- relevant_gts = web_ground_truth_db .lookup_by_web_obs (
176- web_obs = web_obs
177- )
178- except :
179- log .error (
180- f"failed to lookup relevant_gts for { web_obs [0 ].measurement_uid } " ,
181- exc_info = True ,
173+ try :
174+ website_analysis = list (
175+ make_web_analysis (
176+ web_observations = web_obs ,
177+ body_db = body_db ,
178+ web_ground_truths = relevant_gts ,
179+ fingerprintdb = fingerprintdb ,
182180 )
183- failures += 1
181+ )
182+ if len (website_analysis ) == 0 :
183+ log .info (f"no website analysis for { probe_cc } , { test_name } " )
184+ no_exp_results += 1
184185 continue
185186
186- try :
187- website_analysis = list (
188- make_web_analysis (
189- web_observations = web_obs ,
190- body_db = body_db ,
191- web_ground_truths = relevant_gts ,
192- fingerprintdb = fingerprintdb ,
193- )
194- )
195- if len (website_analysis ) == 0 :
196- log .info (f"no website analysis for { probe_cc } , { test_name } " )
197- no_exp_results += 1
198- continue
199-
200- observation_count += 1
201- table_name , rows = make_db_rows (
202- dc_list = website_analysis , column_names = column_names_wa
203- )
204-
205- db_writer .write_rows (
206- table_name = table_name ,
207- rows = rows ,
208- column_names = column_names_wa ,
209- )
210-
211- website_er = list (make_website_experiment_results (website_analysis ))
212- table_name , rows = make_db_rows (
213- dc_list = website_er ,
214- column_names = column_names_er ,
215- custom_remap = {"loni_list" : orjson .dumps },
216- )
217-
218- db_writer .write_rows (
219- table_name = table_name ,
220- rows = rows ,
221- column_names = column_names_er ,
222- )
223-
224- except :
225- web_obs_ids = "," .join (map (lambda wo : wo .observation_id , web_obs ))
226- log .error (
227- f"failed to generate analysis for { web_obs_ids } " , exc_info = True
228- )
229- failures += 1
230-
231- span .set_attribute ("total_failure_count" , failures )
232- span .set_attribute ("total_observation_count" , observation_count )
233- span .set_attribute ("no_experiment_results_count" , no_exp_results )
234- span .set_attribute ("day" , day .strftime ("%Y-%m-%d" ))
235- span .set_attribute ("probe_cc" , probe_cc )
187+ observation_count += 1
188+ table_name , rows = make_db_rows (
189+ dc_list = website_analysis , column_names = column_names_wa
190+ )
191+
192+ db_writer .write_rows (
193+ table_name = table_name ,
194+ rows = rows ,
195+ column_names = column_names_wa ,
196+ )
197+
198+ website_er = list (make_website_experiment_results (website_analysis ))
199+ table_name , rows = make_db_rows (
200+ dc_list = website_er ,
201+ column_names = column_names_er ,
202+ custom_remap = {"loni_list" : orjson .dumps },
203+ )
204+
205+ db_writer .write_rows (
206+ table_name = table_name ,
207+ rows = rows ,
208+ column_names = column_names_er ,
209+ )
210+
211+ except :
212+ web_obs_ids = "," .join (map (lambda wo : wo .observation_id , web_obs ))
213+ log .error (
214+ f"failed to generate analysis for { web_obs_ids } " , exc_info = True
215+ )
216+ failures += 1
217+
218+ span .set_attribute ("total_failure_count" , failures )
219+ span .set_attribute ("total_observation_count" , observation_count )
220+ span .set_attribute ("no_experiment_results_count" , no_exp_results )
221+ span .set_attribute ("day" , day .strftime ("%Y-%m-%d" ))
222+ span .set_attribute ("probe_cc" , probe_cc )
236223
237224 for prev_range in prev_range_list :
238225 maybe_delete_prev_range (db = db_lookup , prev_range = prev_range )
0 commit comments