From f535ce41ead2889ff14978ac23a65c53ef20397e Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 14:22:44 -0800
Subject: [PATCH 01/15] Data from managed datastore

---
 .env.example                                 |  3 +++
 .pipelines/azdo-variables.yml                |  4 +++-
 code/register/register_model.py              |  1 +
 code/training/train.py                       | 20 ++++++++++++++++++--
 docs/getting_started.md                      |  3 +++
 ml_service/pipelines/build_train_pipeline.py | 17 +++++++++++++++--
 ml_service/util/env_variables.py             | 15 +++++++++++++++
 7 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/.env.example b/.env.example
index f6b2fe58..a3662d20 100644
--- a/.env.example
+++ b/.env.example
@@ -31,6 +31,9 @@ MODEL_PATH = ''
 EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
 REGISTER_SCRIPT_PATH = 'register/register_model.py'
 SOURCES_DIR_TRAIN = 'code'
+DATASET_NAME = 'diabetes_ds'
+DATASTORE_NAME = 'datablobstore'
+DATAFILE_NAME = 'diabetes.csv'
 
 # Optional. Used by a training pipeline with R on Databricks
 DB_CLUSTER_ID = ''
diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml
index fcf67c2b..0691e673 100644
--- a/.pipelines/azdo-variables.yml
+++ b/.pipelines/azdo-variables.yml
@@ -39,4 +39,6 @@ variables:
 - name: DB_CLUSTER_ID
   value: ''
 - name: SCORE_SCRIPT
-  value: score.py
\ No newline at end of file
+  value: score.py
+- name: DATASET_NAME
+  value: diabetes_ds
diff --git a/code/register/register_model.py b/code/register/register_model.py
index 73b4792e..a2485bfa 100644
--- a/code/register/register_model.py
+++ b/code/register/register_model.py
@@ -89,6 +89,7 @@ def main():
     else:
         run.tag("BuildId", value=build_id)
         builduri_base = os.environ.get("BUILDURI_BASE")
+        build_uri = None
         if (builduri_base is not None):
             build_uri = builduri_base + build_id
             run.tag("BuildUri", value=build_uri)
diff --git a/code/training/train.py b/code/training/train.py
index a04972dd..8f46acbe 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -24,6 +24,7 @@
 POSSIBILITY OF SUCH DAMAGE.
 """
 from azureml.core.run import Run
+from azureml.core import Dataset
 import os
 import argparse
 from sklearn.datasets import load_diabetes
@@ -68,20 +69,35 @@ def main():
         help=("Ridge regression regularization strength hyperparameter; "
               "must be a positive float.")
     )
-
+    
+    parser.add_argument(
+        "--dataset_name",
+        type=str,
+        help=("Dataset with the training data")
+    )
     args = parser.parse_args()
 
     print("Argument [build_id]: %s" % args.build_id)
     print("Argument [model_name]: %s" % args.model_name)
     print("Argument [alpha]: %s" % args.alpha)
+    print("Argument [dataset_name]: %s" % args.dataset_name)
 
     model_name = args.model_name
     build_id = args.build_id
     alpha = args.alpha
+    dataset_name = args.dataset_name
 
     run = Run.get_context()
+    ws = run.experiment.workspace
+    
+    if (dataset_name is not None):
+        dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)
+        df = dataset.to_pandas_dataframe()
+        X = df.values
+        y = df.Y
+    else:
+        X, y = load_diabetes(return_X_y=True)
 
-    X, y = load_diabetes(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.2, random_state=0)
     data = {"train": {"X": X_train, "y": y_train},
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 6d03e5b8..e2caeed6 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -122,6 +122,9 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
+**Note:** The training ML pipelne uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
+
+
 ## Create an Azure DevOps Azure ML Workspace Service Connection
 Install the **Azure Machine Learning** extension to your organization from the
 [marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml),
diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index 2b41f12c..ff57b7e3 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -3,6 +3,7 @@
 from azureml.pipeline.core import Pipeline
 from azureml.core import Workspace
 from azureml.core.runconfig import RunConfiguration, CondaDependencies
+from azureml.core import Dataset, Datastore
 import os
 import sys
 sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
@@ -35,10 +36,10 @@ def main():
                         'scikit-learn', 'tensorflow', 'keras'],
         pip_packages=['azure', 'azureml-core',
                       'azure-storage',
-                      'azure-storage-blob'])
+                      'azure-storage-blob',
+                      'azureml-dataprep'])
     )
     run_config.environment.docker.enabled = True
-
     config_envvar = {}
     if (e.collection_uri is not None and e.teamproject_name is not None):
         builduri_base = e.collection_uri + e.teamproject_name
@@ -53,6 +54,17 @@ def main():
     hyperparameter_alpha_param = PipelineParameter(
         name="hyperparameter_alpha", default_value=0.5)
 
+    
+    if (e.datastore_name is not None and e.datafile_name is not None):
+        dataset_name = e.dataset_name
+        datastore = Datastore.get(aml_workspace, e.datastore_name)    
+        dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, e.datafile_name)])    
+        dataset.register(workspace=aml_workspace,
+                        name=e.dataset_name,
+                        description="dataset with training data")
+    else:
+        dataset_name = None                    
+
     train_step = PythonScriptStep(
         name="Train Model",
         script_name=e.train_script_path,
@@ -62,6 +74,7 @@ def main():
             "--build_id", build_id_param,
             "--model_name", model_name_param,
             "--alpha", hyperparameter_alpha_param,
+            "--dataset_name", dataset_name,
         ],
         runconfig=run_config,
         allow_reuse=False,
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index ec13ac95..99d1686a 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -41,6 +41,9 @@ def __init__(self):
         self._score_script = os.environ.get("SCORE_SCRIPT")
         self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI")
         self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT")
+        self._datastore_name = os.environ.get("DATASTORE_NAME")
+        self._datafile_name = os.environ.get("DATAFILE_NAME")        
+        self._dataset_name = os.environ.get("DATASET_NAME")        
 
     @property
     def workspace_name(self):
@@ -145,3 +148,15 @@ def collection_uri(self):
     @property
     def teamproject_name(self):
         return self._teamproject_name
+
+    @property
+    def datastore_name(self):
+        return self._datastore_name
+
+    @property
+    def datafile_name(self):
+        return self._datafile_name
+
+    @property
+    def dataset_name(self):
+        return self._dataset_name

From 4efa969a51abab0e2bca10e30080679638e6a0b6 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 14:33:50 -0800
Subject: [PATCH 02/15] merge with unassigned variable fix

---
 code/register/register_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/code/register/register_model.py b/code/register/register_model.py
index ec50b80e..b2d454bb 100644
--- a/code/register/register_model.py
+++ b/code/register/register_model.py
@@ -89,7 +89,6 @@ def main():
     else:
         run.tag("BuildId", value=build_id)
         builduri_base = os.environ.get("BUILDURI_BASE")
-        build_uri = None
         if (builduri_base is not None):
             build_uri = builduri_base + build_id
             run.tag("BuildUri", value=build_uri)

From 8000e9e5aeef2dd7f487452cbabaadfaaaa09c89 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 14:59:31 -0800
Subject: [PATCH 03/15] bugfix

---
 code/training/train.py                       | 2 +-
 ml_service/pipelines/build_train_pipeline.py | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/code/training/train.py b/code/training/train.py
index 8f46acbe..4a3c6148 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -90,7 +90,7 @@ def main():
     run = Run.get_context()
     ws = run.experiment.workspace
     
-    if (dataset_name is not None):
+    if (dataset_name):
         dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)
         df = dataset.to_pandas_dataframe()
         X = df.values
diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index ff57b7e3..3297c389 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -53,8 +53,8 @@ def main():
         name="build_id", default_value=e.build_id)
     hyperparameter_alpha_param = PipelineParameter(
         name="hyperparameter_alpha", default_value=0.5)
-
     
+    dataset_name = ""
     if (e.datastore_name is not None and e.datafile_name is not None):
         dataset_name = e.dataset_name
         datastore = Datastore.get(aml_workspace, e.datastore_name)    
@@ -62,9 +62,7 @@ def main():
         dataset.register(workspace=aml_workspace,
                         name=e.dataset_name,
                         description="dataset with training data")
-    else:
-        dataset_name = None                    
-
+                            
     train_step = PythonScriptStep(
         name="Train Model",
         script_name=e.train_script_path,

From 656add0ae09aa62a43af07d6e2417236db786542 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 15:19:45 -0800
Subject: [PATCH 04/15] typo

---
 docs/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index e2caeed6..f082262b 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
-**Note:** The training ML pipelne uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
+**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
 
 
 ## Create an Azure DevOps Azure ML Workspace Service Connection

From 8ac54d2d8a7289f427bb038430fd51a6be28cdef Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 15:28:35 -0800
Subject: [PATCH 05/15] linting

---
 code/training/train.py                       |  2 +-
 ml_service/pipelines/build_train_pipeline.py | 13 +++++++------
 ml_service/util/env_variables.py             |  4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/code/training/train.py b/code/training/train.py
index 4a3c6148..ebcfee96 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -89,7 +89,7 @@ def main():
 
     run = Run.get_context()
     ws = run.experiment.workspace
-    
+
     if (dataset_name):
         dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)
         df = dataset.to_pandas_dataframe()
diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index 3297c389..ebdc55de 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -53,16 +53,17 @@ def main():
         name="build_id", default_value=e.build_id)
     hyperparameter_alpha_param = PipelineParameter(
         name="hyperparameter_alpha", default_value=0.5)
-    
+
     dataset_name = ""
     if (e.datastore_name is not None and e.datafile_name is not None):
         dataset_name = e.dataset_name
-        datastore = Datastore.get(aml_workspace, e.datastore_name)    
-        dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, e.datafile_name)])    
+        datastore = Datastore.get(aml_workspace, e.datastore_name)
+        dataset = Dataset.Tabular.from_delimited_files(path=[(datastore,
+                                                              e.datafile_name)])
         dataset.register(workspace=aml_workspace,
-                        name=e.dataset_name,
-                        description="dataset with training data")
-                            
+                         name=e.dataset_name,
+                         description="dataset with training data")
+
     train_step = PythonScriptStep(
         name="Train Model",
         script_name=e.train_script_path,
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index 99d1686a..ed3be221 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -42,8 +42,8 @@ def __init__(self):
         self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI")
         self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT")
         self._datastore_name = os.environ.get("DATASTORE_NAME")
-        self._datafile_name = os.environ.get("DATAFILE_NAME")        
-        self._dataset_name = os.environ.get("DATASET_NAME")        
+        self._datafile_name = os.environ.get("DATAFILE_NAME")
+        self._dataset_name = os.environ.get("DATASET_NAME")
 
     @property
     def workspace_name(self):

From ba9bccd0a8b5593d719e49178a57fa54762a819b Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 15:34:03 -0800
Subject: [PATCH 06/15] linting

---
 code/training/train.py                       | 2 +-
 ml_service/pipelines/build_train_pipeline.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/code/training/train.py b/code/training/train.py
index ebcfee96..f56daa99 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -69,7 +69,7 @@ def main():
         help=("Ridge regression regularization strength hyperparameter; "
               "must be a positive float.")
     )
-    
+
     parser.add_argument(
         "--dataset_name",
         type=str,
diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index ebdc55de..314c7311 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -58,8 +58,9 @@ def main():
     if (e.datastore_name is not None and e.datafile_name is not None):
         dataset_name = e.dataset_name
         datastore = Datastore.get(aml_workspace, e.datastore_name)
-        dataset = Dataset.Tabular.from_delimited_files(path=[(datastore,
-                                                              e.datafile_name)])
+        dataset = Dataset.Tabular.
+            from_delimited_files(path=[(datastore,
+                                        e.datafile_name)])
         dataset.register(workspace=aml_workspace,
                          name=e.dataset_name,
                          description="dataset with training data")

From 1b262887c87e248eae1203f58c5bbae4ebf59f76 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Thu, 9 Jan 2020 15:39:13 -0800
Subject: [PATCH 07/15] linting

---
 ml_service/pipelines/build_train_pipeline.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index 314c7311..6df29800 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -58,9 +58,8 @@ def main():
     if (e.datastore_name is not None and e.datafile_name is not None):
         dataset_name = e.dataset_name
         datastore = Datastore.get(aml_workspace, e.datastore_name)
-        dataset = Dataset.Tabular.
-            from_delimited_files(path=[(datastore,
-                                        e.datafile_name)])
+        data_path = [(datastore, e.datafile_name)]
+        dataset = Dataset.Tabular.from_delimited_files(path=data_path)
         dataset.register(workspace=aml_workspace,
                          name=e.dataset_name,
                          description="dataset with training data")

From a92fc84b6cd60f4f322f3c94740018b21100c547 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 09:29:39 -0800
Subject: [PATCH 08/15] added a link to az cli

---
 docs/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index f082262b..4ccc98aa 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
-**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
+**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
 
 
 ## Create an Azure DevOps Azure ML Workspace Service Connection

From 3d345aac55c81fbeb2d2895ab6bfa76f6c621997 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 10:00:45 -0800
Subject: [PATCH 09/15] doc update

---
 docs/getting_started.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index 4ccc98aa..0ae38469 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -122,7 +122,8 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
-**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
+**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
+Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
 
 
 ## Create an Azure DevOps Azure ML Workspace Service Connection

From b207921fccb1c5d18cf72ec106c0da0d4b8d4e2a Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 11:46:28 -0800
Subject: [PATCH 10/15] reregistering a dataset

---
 ml_service/pipelines/build_train_pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index 6df29800..0b86eb50 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -62,7 +62,8 @@ def main():
         dataset = Dataset.Tabular.from_delimited_files(path=data_path)
         dataset.register(workspace=aml_workspace,
                          name=e.dataset_name,
-                         description="dataset with training data")
+                         description="dataset with training data",
+                         create_new_version=True)
 
     train_step = PythonScriptStep(
         name="Train Model",

From d1c4d3f4cbdaeb64425c883c21119d6b16c549a2 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 15:19:10 -0800
Subject: [PATCH 11/15] typo

---
 docs/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index 0ae38469..71760ee5 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
-**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
+**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
 Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
 
 

From 32a80034abd503628929cd99c23bad5292abdb86 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 15:20:34 -0800
Subject: [PATCH 12/15] rephrasing

---
 docs/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index 71760ee5..f836340e 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -123,7 +123,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
 **Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
-Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
+You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
 
 
 ## Create an Azure DevOps Azure ML Workspace Service Connection

From 39001ca7209d71f29817ae338b83e7760d6a9658 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 15:23:34 -0800
Subject: [PATCH 13/15] rephrasing

---
 docs/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index f836340e..a46d5304 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
-**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
+**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and upload the datafile (e.g. [diabetes.csv](./data/diabetes.csv)) to the corresponding blob container. You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
 You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
 
 

From f623860c1ed562e39bfb1c9090e4f498ca817afc Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 15:51:51 -0800
Subject: [PATCH 14/15] auth enabled

---
 code/scoring/deployment_config_aci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/code/scoring/deployment_config_aci.yml b/code/scoring/deployment_config_aci.yml
index 939483b5..f6cbe341 100644
--- a/code/scoring/deployment_config_aci.yml
+++ b/code/scoring/deployment_config_aci.yml
@@ -2,4 +2,5 @@
 containerResourceRequirements:
   cpu: 1
   memoryInGB: 4
-computeType: ACI
\ No newline at end of file
+computeType: ACI
+authEnabled: True
\ No newline at end of file

From 82e709d5ab10e10614cb45aed5ce6c9017f9efc2 Mon Sep 17 00:00:00 2001
From: Eugene Fedorenko <efedorenko@Eugenes-MBP.guest.corp.microsoft.com>
Date: Fri, 10 Jan 2020 16:07:48 -0800
Subject: [PATCH 15/15] revert auth enabled

---
 code/scoring/deployment_config_aci.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/code/scoring/deployment_config_aci.yml b/code/scoring/deployment_config_aci.yml
index f6cbe341..939483b5 100644
--- a/code/scoring/deployment_config_aci.yml
+++ b/code/scoring/deployment_config_aci.yml
@@ -2,5 +2,4 @@
 containerResourceRequirements:
   cpu: 1
   memoryInGB: 4
-computeType: ACI
-authEnabled: True
\ No newline at end of file
+computeType: ACI
\ No newline at end of file