Suppress duplicated stdout output from normalize.py

felker · felker · commit df4197022218 · 2019-11-05T17:01:22.000-06:00
diff --git a/examples/mpi_learn.py b/examples/mpi_learn.py
@@ -80,12 +80,26 @@
 (shot_list_train, shot_list_validate,
  shot_list_test) = guarantee_preprocessed(conf, verbose=True)
 
+# TODO(KGF): shouldn't normalize.train() be called like guaranteed_preprocessed
+# above? I.e. if Normalizer.previously_saved_stats() does not load a computed
+# normalizer for all machines ("loaded normalization data from {d3d: 3449, jet: 2918}  # noqa
+# shots ( {d3d: 810, jet: 74} disruptive )" ), then only the master MPI rank
+# calls normalizer.train() ???
 g.print_unique("begin normalization...")
 normalizer = Normalizer(conf)
 normalizer.train()
 loader = Loader(conf, normalizer)
 g.print_unique("...done")
 
+# TODO(KGF): note, "python examples/guaranteed_preprocessed.py" does NOT train
+# the normalizer. Try deleting the previously-computed file, e.g.
+# normalization/normalization_signal_group_250640798211266795112500621861190558178.npz  # noqa
+# or set conf['data']['recompute_normalization'] = True to see example stdout
+
+# TODO(KGF): both preprocess.py and normalize.py are littered with print()
+# calls that should probably be replaced with print_unique() when they are not
+# purely loading previously-computed quantities from file
+
 #####################################################
 #                    TRAINING                       #
 #####################################################
diff --git a/plasma/models/mpi_runner.py b/plasma/models/mpi_runner.py
@@ -782,12 +782,12 @@ def mpi_train(conf, shot_list_train, shot_list_validate, loader,
         print("Optimizer not implemented yet")
         exit(1)
 
-    g.write_unique('{} epochs left to go'.format(num_epochs - 1 - e))
+    g.print_unique('{} epochs left to go'.format(num_epochs - 1 - e))
 
     batch_generator = partial(loader.training_batch_generator_partial_reset,
                               shot_list=shot_list_train)
 
-    g.write_unique("warmup steps = {}".format(warmup_steps))
+    g.print_unique("warmup steps = {}".format(warmup_steps))
     mpi_model = MPIModel(train_model, optimizer, g.comm, batch_generator,
                          batch_size, lr=lr, warmup_steps=warmup_steps,
                          num_batches_minimum=num_batches_minimum, conf=conf)
diff --git a/plasma/preprocessor/normalize.py b/plasma/preprocessor/normalize.py
@@ -9,6 +9,7 @@
 '''
 
 from __future__ import print_function
+import plasma.global_vars as g
 import os
 import time
 import sys
@@ -74,7 +75,7 @@ def load_stats(self):
         pass
 
     def print_summary(self, action='loaded'):
-        print('{} normalization data from {} shots ( {} disruptive )'.format(
+        g.print_unique('{} normalization data from {} shots ( {} disruptive )'.format(
             action, self.num_processed, self.num_disruptive))
 
     def set_inference_mode(self, val):
@@ -149,7 +150,7 @@ def train_on_files(self, shot_files, use_shots, all_machines):
             self.save_stats()
         else:
             self.load_stats()
-        print(self)
+        g.print_unique(self)
 
     def cut_end_of_shot(self, shot):
         cut_shot_ends = self.conf['data']['cut_shot_ends']
@@ -222,7 +223,7 @@ def __str__(self):
         for machine in self.means:
             means = np.median(self.means[machine], axis=0)
             stds = np.median(self.stds[machine], axis=0)
-            s += 'Machine: {}:\nMean Var Normalizer.\n'.format(machine)
+            s += 'Machine = {}:\nMean Var Normalizer.\n'.format(machine)
             s += 'means: {}\nstds: {}'.format(means, stds)
         return s
 
@@ -304,8 +305,8 @@ def load_stats(self):
         self.num_disruptive = dat['num_disruptive'][()]
         self.machines = dat['machines'][()]
         for machine in self.means:
-            print('Machine {}:'.format(machine))
-            self.print_summary()
+            g.print_unique('Machine = {}:'.format(machine))
+        self.print_summary()
 
 
 class VarNormalizer(MeanVarNormalizer):
@@ -452,7 +453,7 @@ def load_stats(self):
         self.num_disruptive = dat['num_disruptive'][()]
         self.machines = dat['machines'][()]
         for machine in self.means:
-            print('Machine {}:'.format(machine))
+            g.print_unique('Machine {}:'.format(machine))
             self.print_summary()