From dcf3b263f8761c7a83be81d6b67183127797dd0a Mon Sep 17 00:00:00 2001 From: Salem Boyland Date: Mon, 29 Jan 2024 14:07:21 -0600 Subject: [PATCH 1/3] create_single_timeseries_forecasting_model_test.py code sample --- ...ingle_timeseries_forecasting_model_test.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 samples/snippets/create_single_timeseries_forecasting_model_test.py diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py new file mode 100644 index 0000000000..a6df5f0297 --- /dev/null +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -0,0 +1,66 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (t +# you may not use this file except in compliance wi +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in +# distributed under the License is distributed on a +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, eit +# See the License for the specific language governi +# limitations under the License. + + +def test_create_single_timeseries(random_model_id): + your_model_id = random_model_id + + # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial] + import bigframes.pandas as bpd + + # Start by selecting the data you'll use for training. `read_gbq` accepts + # either a SQL query or a table ID. Since this example selects from multiple + # tables via a wildcard, use SQL to define this data. Watch issue + # https://github.com/googleapis/python-bigquery-dataframes/issues/169 + # for updates to `read_gbq` to support wildcard tables. + + # Read and visualize the time series you want to forecast. + df = bpd.read_gbq(''' + SELECT PARSE_TIMESTAMP("%Y%m%d", date) AS parsed_date, + SUM(totals.visits) AS total_visits + FROM + `bigquery-public-data.google_analytics_sample.ga_sessions_*` + GROUP BY date + ''') + X = df[["parsed_date"]] + y = df[["total_visits"]] + + # Create an Arima-based time series model using the Google Analytics 360 data. + from bigframes.ml.forecasting import ARIMAPlus + + ga_arima_model = ARIMAPlus() + + # Fit the model to your dataframe. + ga_arima_model.fit(X,y) + + # The model.fit() call above created a temporary model. + # Use the to_gbq() method to write to a permanent location. + ga_arima_model.to_gbq( + your_model_id, # For example: "bqml_tutorial.sample_model", + replace=True, + ) + + # Inspect the evaluation metrics of all evaluated models. + # when ruuning this function use same model, dataset, model name (str) + evaluation = ga_arima_model.summary( + f''' + SELECT * + FROM ML.ARIMA_EVALUATE(MODEL `{your_model_id}`) + ''' + ) + + print(evaluation) + # Inspect the coefficients of your model + + \ No newline at end of file From adc22ef8a872227ee49ee0907b35eb8e5775bab9 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Tue, 20 Feb 2024 09:46:43 -0600 Subject: [PATCH 2/3] fix: forecast method to forecast time series --- ...ingle_timeseries_forecasting_model_test.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index a6df5f0297..6429f613a4 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -52,15 +52,30 @@ def test_create_single_timeseries(random_model_id): ) # Inspect the evaluation metrics of all evaluated models. - # when ruuning this function use same model, dataset, model name (str) + # when running this function use same model, dataset, model name (str) evaluation = ga_arima_model.summary( - f''' - SELECT * - FROM ML.ARIMA_EVALUATE(MODEL `{your_model_id}`) - ''' + show_all_candidate_models = False, ) print(evaluation) + # Inspect the coefficients of your model - - \ No newline at end of file + f''' + SELECT * + FROM ML.ARIMA_COEFFICIENTS(MODEL `{your_model_id}`) + ''' + evaluation.ML.ARIMA_COEFFICIENTS() + + # Use your model to forecast the time series + #standardSQL + your_model_id.forecast() + + # Explain and visualize the forecasting results + f''' + SELECT * + FROM ML.EXPLAIN_FORECAST( + MODEL `{your_model_id}`, + STRUCT( + [horizon AS horizon] + [, confidence_level AS confidence_level])) + ''' \ No newline at end of file From d3ea7c79affca6d1edcf38da84c9525ed61df765 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Wed, 13 Mar 2024 12:10:10 -0500 Subject: [PATCH 3/3] pair programming PR draft creation --- ...ingle_timeseries_forecasting_model_test.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index 6429f613a4..a91f6d07b7 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -26,21 +26,21 @@ def test_create_single_timeseries(random_model_id): # for updates to `read_gbq` to support wildcard tables. # Read and visualize the time series you want to forecast. - df = bpd.read_gbq(''' - SELECT PARSE_TIMESTAMP("%Y%m%d", date) AS parsed_date, - SUM(totals.visits) AS total_visits - FROM - `bigquery-public-data.google_analytics_sample.ga_sessions_*` - GROUP BY date - ''') - X = df[["parsed_date"]] - y = df[["total_visits"]] + df = bpd.read_gbq( + 'bigquery-public-data.google_analytics_sample.ga_sessions_*' + ) + parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True) + total_visits = df.groupby(["date"])["parsed_date"].sum() + visits = df["totals"].struct.field("visits") - # Create an Arima-based time series model using the Google Analytics 360 data. + # Create an Arima-based time series model using the Google Analytics 360 data. from bigframes.ml.forecasting import ARIMAPlus ga_arima_model = ARIMAPlus() + X = df[["parsed_date"]] + y = df[["total_visits"]] + # Fit the model to your dataframe. ga_arima_model.fit(X,y)