SciCompMod
diff --git a/‎pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md‎
Lines changed: 3 additions & 3 deletions b/‎pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py‎
Lines changed: 86 additions & 93 deletions b/‎pycode/memilio-surrogatemodel/memilio/surrogatemodel/ode_secir_groups/data_generation.py‎
Lines changed: 86 additions & 93 deletions
@@ -1,4 +1,4 @@
-# SECIR model with multiple age groups and one damping
+# SECIR model with multiple age groups and multiple dampings
 
-This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using one damping to represent a change in the contact matrice.
-The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/secir_groups.py which uses python bindings to run the underlying C++ code.
+This model is an application of the SECIR model implemented in https://github.com/DLR-SC/memilio/tree/main/cpp/models/ode_secir/ stratified by age groups using dampings to represent  changes in the contact matrix.
+The example is based on https://github.com/DLR-SC/memilio/tree/main/pycode/examples/simulation/ode_secir_groups.py which uses python bindings to run the underlying C++ code.
@@ -17,6 +17,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #############################################################################
+
 import copy
 import os
 import pickle
@@ -33,75 +34,41 @@
 from memilio.simulation.osecir import (Index_InfectionState,
                                        InfectionState, Model,
                                        interpolate_simulation_result, simulate)
+import memilio.surrogatemodel.utils.dampings as dampings
+from memilio.surrogatemodel.utils.helper_functions import (
+    interpolate_age_groups, remove_confirmed_compartments, normalize_simulation_data)
+import memilio.simulation as mio
+import memilio.simulation.osecir as osecir
 
 
-def interpolate_age_groups(data_entry):
-    """ Interpolates the age groups from the population data into the age groups used in the simulation.
-    We assume that the people in the age groups are uniformly distributed.
-
-    :param data_entry: Data entry containing the population data.
-    :returns: List containing the population in each age group used in the simulation.
-
-    """
-    age_groups = {
-        "A00-A04": data_entry['<3 years'] + data_entry['3-5 years'] * 2 / 3,
-        "A05-A14": data_entry['3-5 years'] * 1 / 3 + data_entry['6-14 years'],
-        "A15-A34": data_entry['15-17 years'] + data_entry['18-24 years'] + data_entry['25-29 years'] + data_entry['30-39 years'] * 1 / 2,
-        "A35-A59": data_entry['30-39 years'] * 1 / 2 + data_entry['40-49 years'] + data_entry['50-64 years'] * 2 / 3,
-        "A60-A79": data_entry['50-64 years'] * 1 / 3 + data_entry['65-74 years'] + data_entry['>74 years'] * 1 / 5,
-        "A80+": data_entry['>74 years'] * 4 / 5
-    }
-    return [age_groups[key] for key in age_groups]
-
-
-def remove_confirmed_compartments(result_array):
-    """ Removes the confirmed compartments which are not used in the data generation.
-
-    :param result_array: Array containing the simulation results.
-    :returns: Array containing the simulation results without the confirmed compartments.
-
-    """
-    num_groups = int(result_array.shape[1] / 10)
-    delete_indices = [index for i in range(
-        num_groups) for index in (3+10*i, 5+10*i)]
-    return np.delete(result_array, delete_indices, axis=1)
-
-
-def transform_data(data, transformer, num_runs):
-    """ Transforms the data by a logarithmic normalization.
-    Reshaping is necessary, because the transformer needs an array with dimension <= 2.
-
-    :param data: Data to be transformed.
-    :param transformer: Transformer used for the transformation.
-    :param num_runs: 
-    :returns: Transformed data.
-
-    """
-    data = np.asarray(data).transpose(2, 0, 1).reshape(48, -1)
-    scaled_data = transformer.transform(data)
-    return tf.convert_to_tensor(scaled_data.transpose().reshape(num_runs, -1, 48))
-
-
-def run_secir_groups_simulation(days, damping_day, populations):
+def run_secir_groups_simulation(days, damping_days, damping_factors, populations):
     """ Uses an ODE SECIR model allowing for asymptomatic infection with 6 different age groups. The model is not stratified by region.
     Virus-specific parameters are fixed and initial number of persons in the particular infection states are chosen randomly from defined ranges.
 
     :param days: Describes how many days we simulate within a single run.
-    :param damping_day: The day when damping is applied.
+    :param damping_days: The days when damping is applied.
+    :param damping_factors: damping factors associated to the damping days.
     :param populations: List containing the population in each age group.
-    :returns: List containing the populations in each compartment used to initialize the run.
+    :returns: Tuple of lists (list_of_simulation_results, list_of_damped_matrices), the first containing the simulation results, the second list containing the 
+        damped contact matrices. 
 
     """
+    # Collect indices of confirmed compartments
+    del_indices = []
+
+    if len(damping_days) != len(damping_factors):
+        raise ValueError("Length of damping_days and damping_factors differ!")
+
     set_log_level(LogLevel.Off)
 
     start_day = 1
     start_month = 1
     start_year = 2019
     dt = 0.1
 
-    # Define age Groups
-    groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+']
-    num_groups = len(groups)
+    age_groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+']
+    # Get number of age groups
+    num_groups = len(age_groups)
 
     # Initialize Parameters
     model = Model(num_groups)
@@ -118,26 +85,26 @@ def run_secir_groups_simulation(days, damping_day, populations):
         # Initial number of people in each compartment with random numbers
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.Exposed)] = random.uniform(
-            0.00025, 0.0005) * populations[i]
+            0.00025, 0.005) * populations[i]
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.InfectedNoSymptoms)] = random.uniform(
-            0.0001, 0.00035) * populations[i]
+            0.0001, 0.0035) * populations[i]
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.InfectedNoSymptomsConfirmed)] = 0
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.InfectedSymptoms)] = random.uniform(
-            0.00007, 0.0001) * populations[i]
+            0.00007, 0.001) * populations[i]
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.InfectedSymptomsConfirmed)] = 0
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.InfectedSevere)] = random.uniform(
-            0.00003, 0.00006) * populations[i]
+            0.00003, 0.0006) * populations[i]
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.InfectedCritical)] = random.uniform(
-            0.00001, 0.00002) * populations[i]
+            0.00001, 0.0002) * populations[i]
         model.populations[AgeGroup(i), Index_InfectionState(
             InfectionState.Recovered)] = random.uniform(
-            0.002, 0.008) * populations[i]
+            0.002, 0.08) * populations[i]
         model.populations[AgeGroup(i),
                           Index_InfectionState(InfectionState.Dead)] = 0
         model.populations.set_difference_from_group_total_AgeGroup(
@@ -154,6 +121,14 @@ def run_secir_groups_simulation(days, damping_day, populations):
         # twice the value of RiskOfInfectionFromSymptomatic
         model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.5
 
+        # Collecting deletable indices
+        index_no_sym_conf = model.populations.get_flat_index(
+            osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.InfectedNoSymptomsConfirmed))
+        index_sym_conf = model.populations.get_flat_index(
+            osecir.MultiIndex_PopulationsArray(mio.AgeGroup(i), osecir.InfectionState.InfectedSymptomsConfirmed))
+        del_indices.append(index_no_sym_conf)
+        del_indices.append(index_sym_conf)
+
     # StartDay is the n-th day of the year
     model.parameters.StartDay = (
         date(start_year, start_month, start_day) - date(start_year, 1, 1)).days
@@ -166,14 +141,16 @@ def run_secir_groups_simulation(days, damping_day, populations):
     model.parameters.ContactPatterns.cont_freq_mat[0].minimum = minimum
 
     # Generate a damping matrix and assign it to the model
-    damping = np.ones((num_groups, num_groups)
-                      ) * np.float16(random.uniform(0, 0.5))
-
-    model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping(
-        coeffs=(damping), t=damping_day, level=0, type=0))
+    damped_matrices = []
 
-    damped_contact_matrix = model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at(
-        damping_day+1)
+    for i in np.arange(len(damping_days)):
+        damping = np.ones((num_groups, num_groups)
+                          ) * damping_factors[i]
+        day = damping_days[i]
+        model.parameters.ContactPatterns.cont_freq_mat.add_damping(Damping(
+            coeffs=(damping), t=day, level=0, type=0))
+        damped_matrices.append(model.parameters.ContactPatterns.cont_freq_mat.get_matrix_at(
+            day+1))
 
     # Apply mathematical constraints to parameters
     model.apply_constraints()
@@ -184,21 +161,22 @@ def run_secir_groups_simulation(days, damping_day, populations):
     # Interpolate simulation result on days time scale
     result = interpolate_simulation_result(result)
 
+    # Omit first column, as the time points are not of interest here.
     result_array = remove_confirmed_compartments(
-        np.transpose(result.as_ndarray()[1:, :]))
+        np.transpose(result.as_ndarray()[1:, :]), del_indices)
 
-    # Omit first column, as the time points are not of interest here.
     dataset_entries = copy.deepcopy(result_array)
+    dataset_entries = np.nan_to_num(dataset_entries)
 
-    return dataset_entries.tolist(), damped_contact_matrix
+    return dataset_entries.tolist(), damped_matrices
 
 
 def generate_data(
         num_runs, path_out, path_population, input_width, label_width,
-        normalize=True, save_data=True):
-    """ Generate data sets of num_runs many equation-based model simulations and transforms the computed results by a log(1+x) transformation.
+        normalize=True, save_data=True, damping_method="random", number_dampings=5):
+    """ Generate data sets of num_runs many equation-based model simulations and possibly transforms the computed results by a log(1+x) transformation.
     Divides the results in input and label data sets and returns them as a dictionary of two TensorFlow Stacks.
-    In general, we have 8 different compartments and 6 age groups.  If we choose,
+    In general, we have 8 different compartments and 6 age groups.  If we choose
     input_width = 5 and label_width = 20, the dataset has
     - input with dimension 5 x 8 x 6
     - labels with dimension 20 x 8 x 6
@@ -210,14 +188,17 @@ def generate_data(
     :param label_width: Int value that defines the size of the labels.
     :param normalize: Default: true Option to transform dataset by logarithmic normalization.
     :param save_data: Default: true Option to save the dataset.
+    :param damping_method: String specifying the damping method, that should be used. Possible values "classic", "active", "random".
+    :param number_dampings: Maximal number of possible dampings. 
     :returns: Data dictionary of input and label data sets.
 
     """
     data = {
         "inputs": [],
         "labels": [],
-        "contact_matrix": [],
-        "damping_day": []
+        "contact_matrices": [],
+        "damping_factors": [],
+        "damping_days": []
     }
 
     # The number of days is the same as the sum of input and label width.
@@ -232,16 +213,18 @@ def generate_data(
     bar = Bar('Number of Runs done', max=num_runs)
     for _ in range(0, num_runs):
 
-        # Generate a random damping day
-        damping_day = random.randrange(
-            input_width, input_width+label_width)
+        # Generate random damping days
+        damping_days, damping_factors = dampings.generate_dampings(
+            days, number_dampings, method=damping_method, min_distance=2,
+            min_damping_day=2)
 
-        data_run, damped_contact_matrix = run_secir_groups_simulation(
-            days, damping_day, population[random.randint(0, len(population) - 1)])
+        data_run, damped_matrices = run_secir_groups_simulation(
+            days, damping_days, damping_factors, population[random.randint(0, len(population) - 1)])
         data['inputs'].append(data_run[:input_width])
         data['labels'].append(data_run[input_width:])
-        data['contact_matrix'].append(np.array(damped_contact_matrix))
-        data['damping_day'].append([damping_day])
+        data['contact_matrices'].append(damped_matrices)
+        data['damping_factors'].append(damping_factors)
+        data['damping_days'].append(damping_days)
         bar.next()
     bar.finish()
 
@@ -250,8 +233,10 @@ def generate_data(
         transformer = FunctionTransformer(np.log1p, validate=True)
 
         # transform inputs and labels
-        data['inputs'] = transform_data(data['inputs'], transformer, num_runs)
-        data['labels'] = transform_data(data['labels'], transformer, num_runs)
+        data['inputs'] = normalize_simulation_data(
+            data['inputs'], transformer, num_runs)
+        data['labels'] = normalize_simulation_data(
+            data['labels'], transformer, num_runs)
     else:
         data['inputs'] = tf.convert_to_tensor(data['inputs'])
         data['labels'] = tf.convert_to_tensor(data['labels'])
@@ -261,8 +246,15 @@ def generate_data(
         if not os.path.isdir(path_out):
             os.mkdir(path_out)
 
-        # save dict to json file
-        with open(os.path.join(path_out, 'data_secir_groups.pickle'), 'wb') as f:
+        # save dict to pickle file
+        if num_runs < 1000:
+            filename = 'data_secir_groups_%ddays_%d_' % (
+                label_width, num_runs) + damping_method+'.pickle'
+        else:
+            filename = 'data_secir_groups_%ddays_%dk_' % (
+                label_width, num_runs//1000) + damping_method+'.pickle'
+
+        with open(os.path.join(path_out, filename), 'wb') as f:
             pickle.dump(data, f)
     return data
 
@@ -291,13 +283,13 @@ def getMinimumMatrix():
     """ loads the minimum matrix"""
 
     minimum_contact_matrix0 = os.path.join(
-        "./data/contacts/minimum_home.txt")
+        "./data/Germany/contacts/minimum_home.txt")
     minimum_contact_matrix1 = os.path.join(
-        "./data/contacts/minimum_school_pf_eig.txt")
+        "./data/Germany/contacts/minimum_school_pf_eig.txt")
     minimum_contact_matrix2 = os.path.join(
-        "./data/contacts/minimum_work.txt")
+        "./data/Germany/contacts/minimum_work.txt")
     minimum_contact_matrix3 = os.path.join(
-        "./data/contacts/minimum_other.txt")
+        "./data/Germany/contacts/minimum_other.txt")
 
     minimum = np.loadtxt(minimum_contact_matrix0) \
         + np.loadtxt(minimum_contact_matrix1) + \
@@ -310,7 +302,8 @@ def getMinimumMatrix():
 def get_population(path):
     """ read population data in list from dataset
 
-    :param path: Path to the dataset containing the population data
+    :param path: Path to the dataset containing the population 
+    :returns: List of interpolated age grouped population data
 
     """
 
@@ -332,7 +325,7 @@ def get_population(path):
         r"data//Germany//pydata//county_current_population.json")
 
     input_width = 5
-    label_width = 30
-    num_runs = 10000
+    label_width = 90
+    num_runs = 100
     data = generate_data(num_runs, path_output, path_population, input_width,
-                         label_width)
+                         label_width, damping_method="active")