From f535ce41ead2889ff14978ac23a65c53ef20397e Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 14:22:44 -0800 Subject: [PATCH 01/15] Data from managed datastore --- .env.example | 3 +++ .pipelines/azdo-variables.yml | 4 +++- code/register/register_model.py | 1 + code/training/train.py | 20 ++++++++++++++++++-- docs/getting_started.md | 3 +++ ml_service/pipelines/build_train_pipeline.py | 17 +++++++++++++++-- ml_service/util/env_variables.py | 15 +++++++++++++++ 7 files changed, 58 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index f6b2fe58..a3662d20 100644 --- a/.env.example +++ b/.env.example @@ -31,6 +31,9 @@ MODEL_PATH = '' EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py' REGISTER_SCRIPT_PATH = 'register/register_model.py' SOURCES_DIR_TRAIN = 'code' +DATASET_NAME = 'diabetes_ds' +DATASTORE_NAME = 'datablobstore' +DATAFILE_NAME = 'diabetes.csv' # Optional. Used by a training pipeline with R on Databricks DB_CLUSTER_ID = '' diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml index fcf67c2b..0691e673 100644 --- a/.pipelines/azdo-variables.yml +++ b/.pipelines/azdo-variables.yml @@ -39,4 +39,6 @@ variables: - name: DB_CLUSTER_ID value: '' - name: SCORE_SCRIPT - value: score.py \ No newline at end of file + value: score.py +- name: DATASET_NAME + value: diabetes_ds diff --git a/code/register/register_model.py b/code/register/register_model.py index 73b4792e..a2485bfa 100644 --- a/code/register/register_model.py +++ b/code/register/register_model.py @@ -89,6 +89,7 @@ def main(): else: run.tag("BuildId", value=build_id) builduri_base = os.environ.get("BUILDURI_BASE") + build_uri = None if (builduri_base is not None): build_uri = builduri_base + build_id run.tag("BuildUri", value=build_uri) diff --git a/code/training/train.py b/code/training/train.py index a04972dd..8f46acbe 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE. """ from azureml.core.run import Run +from azureml.core import Dataset import os import argparse from sklearn.datasets import load_diabetes @@ -68,20 +69,35 @@ def main(): help=("Ridge regression regularization strength hyperparameter; " "must be a positive float.") ) - + + parser.add_argument( + "--dataset_name", + type=str, + help=("Dataset with the training data") + ) args = parser.parse_args() print("Argument [build_id]: %s" % args.build_id) print("Argument [model_name]: %s" % args.model_name) print("Argument [alpha]: %s" % args.alpha) + print("Argument [dataset_name]: %s" % args.dataset_name) model_name = args.model_name build_id = args.build_id alpha = args.alpha + dataset_name = args.dataset_name run = Run.get_context() + ws = run.experiment.workspace + + if (dataset_name is not None): + dataset = Dataset.get_by_name(workspace=ws, name=dataset_name) + df = dataset.to_pandas_dataframe() + X = df.values + y = df.Y + else: + X, y = load_diabetes(return_X_y=True) - X, y = load_diabetes(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=0) data = {"train": {"X": X_train, "y": y_train}, diff --git a/docs/getting_started.md b/docs/getting_started.md index 6d03e5b8..e2caeed6 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -122,6 +122,9 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). +**Note:** The training ML pipelne uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. + + ## Create an Azure DevOps Azure ML Workspace Service Connection Install the **Azure Machine Learning** extension to your organization from the [marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml), diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index 2b41f12c..ff57b7e3 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -3,6 +3,7 @@ from azureml.pipeline.core import Pipeline from azureml.core import Workspace from azureml.core.runconfig import RunConfiguration, CondaDependencies +from azureml.core import Dataset, Datastore import os import sys sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 @@ -35,10 +36,10 @@ def main(): 'scikit-learn', 'tensorflow', 'keras'], pip_packages=['azure', 'azureml-core', 'azure-storage', - 'azure-storage-blob']) + 'azure-storage-blob', + 'azureml-dataprep']) ) run_config.environment.docker.enabled = True - config_envvar = {} if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name @@ -53,6 +54,17 @@ def main(): hyperparameter_alpha_param = PipelineParameter( name="hyperparameter_alpha", default_value=0.5) + + if (e.datastore_name is not None and e.datafile_name is not None): + dataset_name = e.dataset_name + datastore = Datastore.get(aml_workspace, e.datastore_name) + dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, e.datafile_name)]) + dataset.register(workspace=aml_workspace, + name=e.dataset_name, + description="dataset with training data") + else: + dataset_name = None + train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, @@ -62,6 +74,7 @@ def main(): "--build_id", build_id_param, "--model_name", model_name_param, "--alpha", hyperparameter_alpha_param, + "--dataset_name", dataset_name, ], runconfig=run_config, allow_reuse=False, diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index ec13ac95..99d1686a 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -41,6 +41,9 @@ def __init__(self): self._score_script = os.environ.get("SCORE_SCRIPT") self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI") self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT") + self._datastore_name = os.environ.get("DATASTORE_NAME") + self._datafile_name = os.environ.get("DATAFILE_NAME") + self._dataset_name = os.environ.get("DATASET_NAME") @property def workspace_name(self): @@ -145,3 +148,15 @@ def collection_uri(self): @property def teamproject_name(self): return self._teamproject_name + + @property + def datastore_name(self): + return self._datastore_name + + @property + def datafile_name(self): + return self._datafile_name + + @property + def dataset_name(self): + return self._dataset_name From 4efa969a51abab0e2bca10e30080679638e6a0b6 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 14:33:50 -0800 Subject: [PATCH 02/15] merge with unassigned variable fix --- code/register/register_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/code/register/register_model.py b/code/register/register_model.py index ec50b80e..b2d454bb 100644 --- a/code/register/register_model.py +++ b/code/register/register_model.py @@ -89,7 +89,6 @@ def main(): else: run.tag("BuildId", value=build_id) builduri_base = os.environ.get("BUILDURI_BASE") - build_uri = None if (builduri_base is not None): build_uri = builduri_base + build_id run.tag("BuildUri", value=build_uri) From 8000e9e5aeef2dd7f487452cbabaadfaaaa09c89 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 14:59:31 -0800 Subject: [PATCH 03/15] bugfix --- code/training/train.py | 2 +- ml_service/pipelines/build_train_pipeline.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/code/training/train.py b/code/training/train.py index 8f46acbe..4a3c6148 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -90,7 +90,7 @@ def main(): run = Run.get_context() ws = run.experiment.workspace - if (dataset_name is not None): + if (dataset_name): dataset = Dataset.get_by_name(workspace=ws, name=dataset_name) df = dataset.to_pandas_dataframe() X = df.values diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index ff57b7e3..3297c389 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -53,8 +53,8 @@ def main(): name="build_id", default_value=e.build_id) hyperparameter_alpha_param = PipelineParameter( name="hyperparameter_alpha", default_value=0.5) - + dataset_name = "" if (e.datastore_name is not None and e.datafile_name is not None): dataset_name = e.dataset_name datastore = Datastore.get(aml_workspace, e.datastore_name) @@ -62,9 +62,7 @@ def main(): dataset.register(workspace=aml_workspace, name=e.dataset_name, description="dataset with training data") - else: - dataset_name = None - + train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, From 656add0ae09aa62a43af07d6e2417236db786542 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 15:19:45 -0800 Subject: [PATCH 04/15] typo --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index e2caeed6..f082262b 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). -**Note:** The training ML pipelne uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. +**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. ## Create an Azure DevOps Azure ML Workspace Service Connection From 8ac54d2d8a7289f427bb038430fd51a6be28cdef Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 15:28:35 -0800 Subject: [PATCH 05/15] linting --- code/training/train.py | 2 +- ml_service/pipelines/build_train_pipeline.py | 13 +++++++------ ml_service/util/env_variables.py | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/code/training/train.py b/code/training/train.py index 4a3c6148..ebcfee96 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -89,7 +89,7 @@ def main(): run = Run.get_context() ws = run.experiment.workspace - + if (dataset_name): dataset = Dataset.get_by_name(workspace=ws, name=dataset_name) df = dataset.to_pandas_dataframe() diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index 3297c389..ebdc55de 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -53,16 +53,17 @@ def main(): name="build_id", default_value=e.build_id) hyperparameter_alpha_param = PipelineParameter( name="hyperparameter_alpha", default_value=0.5) - + dataset_name = "" if (e.datastore_name is not None and e.datafile_name is not None): dataset_name = e.dataset_name - datastore = Datastore.get(aml_workspace, e.datastore_name) - dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, e.datafile_name)]) + datastore = Datastore.get(aml_workspace, e.datastore_name) + dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, + e.datafile_name)]) dataset.register(workspace=aml_workspace, - name=e.dataset_name, - description="dataset with training data") - + name=e.dataset_name, + description="dataset with training data") + train_step = PythonScriptStep( name="Train Model", script_name=e.train_script_path, diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index 99d1686a..ed3be221 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -42,8 +42,8 @@ def __init__(self): self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI") self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT") self._datastore_name = os.environ.get("DATASTORE_NAME") - self._datafile_name = os.environ.get("DATAFILE_NAME") - self._dataset_name = os.environ.get("DATASET_NAME") + self._datafile_name = os.environ.get("DATAFILE_NAME") + self._dataset_name = os.environ.get("DATASET_NAME") @property def workspace_name(self): From ba9bccd0a8b5593d719e49178a57fa54762a819b Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 15:34:03 -0800 Subject: [PATCH 06/15] linting --- code/training/train.py | 2 +- ml_service/pipelines/build_train_pipeline.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/code/training/train.py b/code/training/train.py index ebcfee96..f56daa99 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -69,7 +69,7 @@ def main(): help=("Ridge regression regularization strength hyperparameter; " "must be a positive float.") ) - + parser.add_argument( "--dataset_name", type=str, diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index ebdc55de..314c7311 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -58,8 +58,9 @@ def main(): if (e.datastore_name is not None and e.datafile_name is not None): dataset_name = e.dataset_name datastore = Datastore.get(aml_workspace, e.datastore_name) - dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, - e.datafile_name)]) + dataset = Dataset.Tabular. + from_delimited_files(path=[(datastore, + e.datafile_name)]) dataset.register(workspace=aml_workspace, name=e.dataset_name, description="dataset with training data") From 1b262887c87e248eae1203f58c5bbae4ebf59f76 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 9 Jan 2020 15:39:13 -0800 Subject: [PATCH 07/15] linting --- ml_service/pipelines/build_train_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index 314c7311..6df29800 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -58,9 +58,8 @@ def main(): if (e.datastore_name is not None and e.datafile_name is not None): dataset_name = e.dataset_name datastore = Datastore.get(aml_workspace, e.datastore_name) - dataset = Dataset.Tabular. - from_delimited_files(path=[(datastore, - e.datafile_name)]) + data_path = [(datastore, e.datafile_name)] + dataset = Dataset.Tabular.from_delimited_files(path=data_path) dataset.register(workspace=aml_workspace, name=e.dataset_name, description="dataset with training data") From a92fc84b6cd60f4f322f3c94740018b21100c547 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 09:29:39 -0800 Subject: [PATCH 08/15] added a link to az cli --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index f082262b..4ccc98aa 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). -**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. +**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. ## Create an Azure DevOps Azure ML Workspace Service Connection From 3d345aac55c81fbeb2d2895ab6bfa76f6c621997 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 10:00:45 -0800 Subject: [PATCH 09/15] doc update --- docs/getting_started.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 4ccc98aa..0ae38469 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -122,7 +122,8 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). -**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. +**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). +Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. ## Create an Azure DevOps Azure ML Workspace Service Connection From b207921fccb1c5d18cf72ec106c0da0d4b8d4e2a Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 11:46:28 -0800 Subject: [PATCH 10/15] reregistering a dataset --- ml_service/pipelines/build_train_pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index 6df29800..0b86eb50 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -62,7 +62,8 @@ def main(): dataset = Dataset.Tabular.from_delimited_files(path=data_path) dataset.register(workspace=aml_workspace, name=e.dataset_name, - description="dataset with training data") + description="dataset with training data", + create_new_version=True) train_step = PythonScriptStep( name="Train Model", From d1c4d3f4cbdaeb64425c883c21119d6b16c549a2 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 15:19:10 -0800 Subject: [PATCH 11/15] typo --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 0ae38469..71760ee5 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). -**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and resgister a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). +**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. From 32a80034abd503628929cd99c23bad5292abdb86 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 15:20:34 -0800 Subject: [PATCH 12/15] rephrasing --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 71760ee5..f836340e 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -123,7 +123,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). **Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). -Having done that, you'll need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. +You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. ## Create an Azure DevOps Azure ML Workspace Service Connection From 39001ca7209d71f29817ae338b83e7760d6a9658 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 15:23:34 -0800 Subject: [PATCH 13/15] rephrasing --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index f836340e..a46d5304 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -122,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). -**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and put the datafile in the corresponding blob container (e.g. [diabetes.csv](./data/diabetes.csv)). You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). +**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and upload the datafile (e.g. [diabetes.csv](./data/diabetes.csv)) to the corresponding blob container. You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. From f623860c1ed562e39bfb1c9090e4f498ca817afc Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 15:51:51 -0800 Subject: [PATCH 14/15] auth enabled --- code/scoring/deployment_config_aci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/code/scoring/deployment_config_aci.yml b/code/scoring/deployment_config_aci.yml index 939483b5..f6cbe341 100644 --- a/code/scoring/deployment_config_aci.yml +++ b/code/scoring/deployment_config_aci.yml @@ -2,4 +2,5 @@ containerResourceRequirements: cpu: 1 memoryInGB: 4 -computeType: ACI \ No newline at end of file +computeType: ACI +authEnabled: True \ No newline at end of file From 82e709d5ab10e10614cb45aed5ce6c9017f9efc2 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Fri, 10 Jan 2020 16:07:48 -0800 Subject: [PATCH 15/15] revert auth enabled --- code/scoring/deployment_config_aci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/code/scoring/deployment_config_aci.yml b/code/scoring/deployment_config_aci.yml index f6cbe341..939483b5 100644 --- a/code/scoring/deployment_config_aci.yml +++ b/code/scoring/deployment_config_aci.yml @@ -2,5 +2,4 @@ containerResourceRequirements: cpu: 1 memoryInGB: 4 -computeType: ACI -authEnabled: True \ No newline at end of file +computeType: ACI \ No newline at end of file