Skip to content

Commit 7607fbe

Browse files
kilianvolmerHenrZu
andauthored
1483 make memilio.simulation compatible with pandas 3 (#1484)
In order to make memilio-simulation compatible with pandas 3, the following changes were done: - Use iloc for indexing with pandas - Updated some epidata functions - Removed getNPI data functionality from epidata - Use parse from packaging for version check Co-authored-by: HenrZu <[email protected]>
1 parent b7f220f commit 7607fbe

12 files changed

Lines changed: 25 additions & 1902 deletions

File tree

docs/source/python/m-epidata.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ After installation the following functions are available:
3131
* ``get_divi_data``: Downloads ICU data from German DIVI Intensivregister (DIVI).
3232
* ``get_hospitalization_data``: Downloads data about COVID-19 hospitalizations data from Robert Koch-Institut (RKI-H).
3333
* ``get_jh_data``: Downloads COVID-19 case data from Johns Hopkins University (JH).
34-
* ``get_npi_data``: Loads a certain resolution of recorded NPI data from the Corona Datenplattform and extracts the counties asked for and activates the NPIs if they are incidence dependent.
3534
* ``get_population_data``: Downloads population data for German federal states and counties from various public sources (P).
3635
* ``get_simulation_data``: Downloads all data required for a simulation with the graph-metapopulation model which are SARS-CoV-2 case data(RKI-C), population data (P), ICU data (DIVI) and COVID-19 vaccination data from Robert Koch-Institut (RKI-V).
3736
* ``get_testing_data``: Downloads data about SARS-CoV-2 PCR tests from Robert Koch-Institut (RKI-T).
@@ -41,14 +40,14 @@ After installation the following functions are available:
4140
* ``transformWeatherData``: Transforms weather data.
4241

4342
For a detailed description of the run options and the resulting data files written
44-
see the `epidata subfolder <memilio/epidata/README.rst>`_.
43+
see the `epidata subfolder <https://github.com/SciCompMod/memilio/blob/main/pycode/memilio-epidata/README.rst>`_.
4544

4645
The downloaded data is written either to HDF5 or json files.
4746

4847
Additional Tools
4948
----------------
5049

51-
Some additional tools for processing or analysing data can be found in `tools directory <tools/README.md>`_.
50+
Some additional tools for processing or analysing data can be found in `tools directory <https://github.com/SciCompMod/memilio/tree/main/tools>`_.
5251

5352
Notes for developers
5453
--------------------

pycode/memilio-epidata/memilio/epidata/README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ BAA Germany mobility_bfa_2020_dim400 number of commuter
214214
============== ========== =================================== =================
215215

216216
More detailed information can be found in the
217-
`documentation <https://scicompmod.github.io/memilio/documentation/index.html>`_ of the different functions.
217+
`documentation <https://memilio.readthedocs.io/en/latest/python/m-epidata.html>`_ of the different functions.
218218

219219
Notes for developers
220220
--------------------

pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ def get_nuts3_county_id_map():
373373
county_table = get_official_county_table()
374374
# delete rows with nuts3 = NaN
375375
# take just columns with name dd.EngEng['idCounty'] and dd.EngEng['nuts3']
376-
key_nuts3 = county_table.dropna(subset=[dd.EngEng['nuts3']])[
376+
key_nuts3 = county_table.dropna(subset=[dd.EngEng['nuts3'], dd.EngEng['idCounty']])[
377377
[dd.EngEng['idCounty'], dd.EngEng['nuts3']]]
378378
# convert ID data types
379379
key_nuts3 = key_nuts3.astype({dd.EngEng['idCounty']: int})

pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from io import BytesIO
4343
from zipfile import ZipFile
4444
from enum import Enum
45-
from pkg_resources import parse_version
45+
from packaging.version import parse
4646

4747
import pandas as pd
4848

@@ -66,7 +66,7 @@ class Conf:
6666

6767
v_level = 'Info'
6868
show_progr = False
69-
if parse_version(pd.__version__) < parse_version('2.2'):
69+
if parse(pd.__version__) < parse('2.2'):
7070
excel_engine = 'openpyxl'
7171
else:
7272
# calamine is faster, but cannot be used for pandas < 2.2
@@ -84,8 +84,6 @@ def __init__(self, out_folder, **kwargs):
8484
path = os.path.join(os.path.dirname(
8585
os.path.abspath(__file__)), 'download_config.conf')
8686

87-
# activate CoW for more predictable behaviour of pandas DataFrames
88-
pd.options.mode.copy_on_write = True
8987
# read in config file
9088
# if no config file is given, use default values
9189
if os.path.exists(path):
@@ -196,9 +194,12 @@ def download_file(
196194
197195
"""
198196
if verify not in [True, False, "interactive"]:
199-
warnings.warn('Invalid input for argument verify. Expected True, False, or'
200-
' "interactive", got ' + str(verify) + '.'
201-
' Proceeding with "verify=True".', category=RuntimeWarning)
197+
warnings.warn(
198+
'Invalid input for argument verify. Expected True, False, or'
199+
' "interactive", got ' + str(verify) +
200+
'.'
201+
' Proceeding with "verify=True".',
202+
category=RuntimeWarning)
202203
verify = True
203204
# send GET request as stream so the content is not downloaded at once
204205
try:
@@ -438,8 +439,7 @@ def cli(what):
438439
parser.add_argument(
439440
'-s', '--start-date', default=start_date_default,
440441
help='Defines start date for data download. Should have form: YYYY-mm-dd.'
441-
'Default is ' +
442-
str(dd.defaultDict['start_date']) +
442+
'Default is ' + str(dd.defaultDict['start_date']) +
443443
' (2020-04-24 for divi and 2020-01-22 for jh)',
444444
type=lambda s: datetime.datetime.strptime(s, '%Y-%m-%d').date())
445445
if 'end_date' in what_list:
@@ -455,8 +455,10 @@ def cli(what):
455455
' omitting dates where no data was reported', action='store_true')
456456
if 'moving_average' in what_list:
457457
parser.add_argument(
458-
'-m', '--moving-average', type=int, default=dd.defaultDict['moving_average'],
459-
help='Compute a moving average of N days over the time series. Default is ' + str(dd.defaultDict['moving_average']))
458+
'-m', '--moving-average', type=int, default=dd.defaultDict
459+
['moving_average'],
460+
help='Compute a moving average of N days over the time series. Default is '
461+
+ str(dd.defaultDict['moving_average']))
460462
if 'split_berlin' in what_list:
461463
parser.add_argument(
462464
'-b', '--split-berlin', default=dd.defaultDict['split_berlin'],
@@ -504,7 +506,8 @@ def cli(what):
504506
if '--interactive' in sys.argv:
505507
parser.add_argument(
506508
'--interactive',
507-
help='Interactive download (Handle warnings, passwords etc.).', action='store_true')
509+
help='Interactive download (Handle warnings, passwords etc.).',
510+
action='store_true')
508511

509512
if not {'--verbose', '-v', '-vv', '-vvv', '-vvvv', '-vvvvv', '-vvvvvv'}.isdisjoint(sys.argv):
510513
parser.add_argument(

0 commit comments

Comments
 (0)