Skip to content

Commit b3895a4

Browse files
authored
981 Fix failing RKI urls (#1007)
+ delete getCaseDataWithEstimations.py
1 parent d1c751b commit b3895a4

10 files changed

Lines changed: 21 additions & 1061 deletions

File tree

.github/actions/test-py/action.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ runs:
8181
getdividata --help
8282
getsimdata --help
8383
cleandata --help
84-
getcasesestimation --help
8584
getcommutermobility --help
8685
getvaccinationdata --help
8786
gethospitalizationdata --help

pycode/memilio-epidata/README.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ After installation the scripts can be run via the following entry points.
6666
- getjhdata (get case data from john hopkins university, see Results: JH)
6767
- getdividata (get ICU data from DIVI, see Results: DIVI)
6868
- getsimdata (get simulation data including case and vaccination data from rki, population data and ICU data, see Results: RKI-C, RKI-V, P, DIVI)
69-
- cleandata (deletes written files)
70-
- getcasesestimation (get case data with estimations from rki, see Results: RKI-Estimation)
69+
- cleandata (deletes written files)
7170
- getcommutermobility (get data about commuter mobility, see Results: BAA)
7271
- gettestingdata (get data about number of tests, see Results: RKI-T)
7372
- gethospitalizationdata (get hospitalization data from RKI, see Results: RKI-H)

pycode/memilio-epidata/memilio/epidata/README.rst

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Sources
3131

3232
- Testing Data (RKI-T)
3333

34-
https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx
34+
https://github.com/robert-koch-institut/SARS-CoV-2-PCR-Testungen_in_Deutschland/raw/main/SARS-CoV-2-PCR-Testungen_in_Deutschland.csv
3535

3636
- Hospitalization data (RKI-H)
3737

@@ -191,16 +191,6 @@ RKI-H Germany hospit_germany_age hospitalizations
191191
RKI-H Germany hospit_state_age hospitalizations per day for different states
192192
RKI-H Germany hospit_germany hospitalizations per day in germany
193193

194-
RKI-Estimation Germany cases_all_germany_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for whole Germany
195-
RKI-Estimation Germany cases_all_state_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different states (Bundesländer)
196-
RKI-Estimation Germany cases_all_county_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different counties (Landkreise)
197-
RKI-Estimation Germany cases_all_gender_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different gender
198-
RKI-Estimation Germany cases_all_age_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different age ranges
199-
RKI-Estimation Germany cases_all_state_age_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different age ranges and states
200-
RKI-Estimation Germany cases_all_state_gender_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different genders and states
201-
RKI-Estimation Germany cases_all_county_age_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different age ranges and counties
202-
RKI-Estimation Germany cases_all_county_gender_estimated infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different genders and counties
203-
204194
P Germany county_current_population[_dim401] population for different age groups from the 2011 census, extrapolated to the current level [with Wartburgkreis and Eisenach separated]
205195
P Germany county_population[_dim401] population for different age groups from the 2011 census [with Wartburgkreis and Eisenach separated]
206196
P Germany county_table raw information on the German counties and its population sizes

pycode/memilio-epidata/memilio/epidata/defaultDict.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
'vaccNotComplete': "Vacc_not_completed",
9999
# test data
100100
'positiveRate': 'Positive_rate',
101+
'testPositiveRatio': 'Positive_rate',
101102
# NPI data
102103
'npiCode': 'NPI_code',
103104
# mobility data refs

pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py

Lines changed: 0 additions & 409 deletions
This file was deleted.

pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,6 @@ def cli(what):
366366

367367
cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
368368
"cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'],
369-
"cases_est": ['Download case data from RKI and JHU and estimate recovered and deaths', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date'],
370369
"population": ['Download population data from official sources', 'username'],
371370
"commuter_official": ['Download commuter data from official sources'],
372371
"vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'sanitize_data'],

pycode/memilio-epidata/memilio/epidata/getTestingData.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,10 @@ def download_testing_data():
4848

4949
# get country-wide testing data without resolution per federal state
5050
# but from much more laboratories
51-
url = 'https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx?__blob=publicationFile'
52-
header = {'User-Agent': 'Mozilla/5.0'}
53-
r = requests.get(url, headers=header)
54-
if r.status_code != 200: # e.g. 404
55-
raise requests.exceptions.HTTPError("HTTPError: "+str(r.status_code))
56-
with io.BytesIO(r.content) as fh:
57-
df = pd.io.excel.ExcelFile(fh, engine=gd.Conf.excel_engine)
58-
sheet_names = df.sheet_names
59-
df_test[0] = pd.read_excel(
60-
df, sheet_name=sheet_names[1],
61-
dtype={'Positivenanteil (%)': float})
62-
# start on calender week 12/2020 as in federal states sheet,
63-
# below and remove sum at bottom
64-
df_test[0] = df_test[0][2:-1].reset_index()
65-
df_test[0] = df_test[0].drop(columns='index')
51+
url = "https://github.com/robert-koch-institut/SARS-CoV-2-PCR-Testungen_in_Deutschland/raw/main/SARS-CoV-2-PCR-Testungen_in_Deutschland.csv"
52+
df_test[0] = gd.get_file(url=url, read_data=False)
53+
# start on calender week 12/2020 as in federal states sheet, below
54+
df_test[0] = df_test[0].iloc[2:, :].reset_index(drop=True)
6655

6756
# get testing data on federal state level (from only a subset of
6857
# laboratories)
@@ -97,7 +86,7 @@ def transform_weeks_to_dates(df_test):
9786
# use %G insteaf of %Y (for year) and %V instead of %W (for month)
9887
# to get ISO week definition
9988
df_test[0].loc[i, dd.EngEng['date']] = datetime.strftime(datetime.strptime(
100-
df_test[0].loc[i, dd.EngEng['date']] + '-4', "%V/%G-%w"), "%Y-%m-%d")
89+
str(df_test[0].loc[i, dd.EngEng['date']]).replace('W', '') + '-4', "%G-%V-%u"), "%Y-%m-%d")
10190

10291
# federal state-based data
10392
df_test[1].rename(columns={df_test[1].columns[1]: dd.EngEng['date']}, inplace=True)
@@ -237,18 +226,18 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
237226

238227
# drop columns
239228
df_test[0].drop(
240-
columns=['Anzahl Testungen', 'Positiv getestet',
241-
'Anzahl übermittelnder Labore'], inplace=True)
229+
columns=['tests_total', 'tests_total_accumulated', 'tests_positive',
230+
'tests_positive_accumulated',
231+
'laboratories_tests', 'capacities_daily',
232+
'capacities_weekly_theoretically', 'capacities_weeklyweek_actually',
233+
'laboratories_capacities', 'laboratories_samplebacklog',
234+
'samplebacklog'], inplace=True)
242235
df_test[1].drop(columns='Anzahl Gesamt', inplace=True)
243236

244237
# remove unknown locations
245238
df_test[1] = df_test[1][df_test[1].State != 'unbekannt']
246239
df_test[1].reset_index(drop=True, inplace=True)
247240

248-
# correct positive rate to percentage
249-
df_test[0][dd.EngEng['positiveRate']
250-
] = df_test[0][dd.EngEng['positiveRate']]/100
251-
252241
# replace state names with IDs
253242
df_test[1].rename(
254243
columns={dd.EngEng['state']: dd.EngEng['idState']}, inplace=True)
@@ -260,7 +249,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
260249
df_test[0] = mdfs.impute_and_reduce_df(
261250
df_test[0],
262251
{},
263-
[dd.EngEng['positiveRate']],
252+
[dd.EngEng['testPositiveRatio']],
264253
impute='forward', moving_average=moving_average,
265254
min_date=start_date, max_date=end_date)
266255

@@ -274,7 +263,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
274263
# make plot
275264
customPlot.plot_multiple_series(
276265
df_test[0][dd.EngEng['date']],
277-
[df_test[0][dd.EngEng['positiveRate']]],
266+
[df_test[0][dd.EngEng['testPositiveRatio']]],
278267
["Germany"],
279268
title='Positive rate for Sars-CoV-2 testing', xlabel='Date', ylabel='Positive rate',
280269
fig_name="Germany_Testing_positive_rate")
@@ -283,7 +272,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
283272
df_test[1] = mdfs.impute_and_reduce_df(
284273
df_test[1],
285274
{dd.EngEng["idState"]: [k for k in geoger.get_state_ids()]},
286-
[dd.EngEng['positiveRate']],
275+
[dd.EngEng['testPositiveRatio']],
287276
impute='forward', moving_average=moving_average,
288277
min_date=start_date, max_date=end_date)
289278
# store positive rates for the all federal states
@@ -298,7 +287,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
298287
df_test[0][dd.EngEng['date']],
299288
[df_test[1].loc
300289
[df_test[1][dd.EngEng['idState']] == stateID,
301-
[dd.EngEng['positiveRate']]] for stateID in geoger.get_state_ids()],
290+
[dd.EngEng['testPositiveRatio']]] for stateID in geoger.get_state_ids()],
302291
[stateName for stateName in geoger.get_state_names()],
303292
title='Positive rate for Sars-CoV-2 testing', xlabel='Date', ylabel='Positive rate',
304293
fig_name='FederalStates_Testing_positive_rate')
@@ -321,7 +310,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
321310
columns=({dd.EngEng['idState']: dd.EngEng['idCounty']}),
322311
inplace=True)
323312
df_local[dd.EngEng['idCounty']] = county
324-
df_test_counties.append(df_test_counties, df_local)
313+
df_test_counties.append(df_local)
325314

326315
df_test_counties = pd.concat(df_test_counties)
327316

0 commit comments

Comments
 (0)