A code sample to create a random forest machine learning model in BigQuery and make predictions all using SQL.

-- Build a random forest model to predict the type of flower
-- based on the iris dataset.
-- Data source: https://archive.ics.uci.edu/dataset/53/iris
-- Layout:
/*
| sepal_length | sepal_width | petal_length | petal_width | species |
| ------------ | ----------- | ------------ | ----------- | ----------- |
| 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
| 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
| 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
| 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
| 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
*/
-- Docs: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-random-forest#example
CREATE MODEL `marquin-personal-tools.marquinsmith_dot_com.iris_forest_01`
OPTIONS(MODEL_TYPE='RANDOM_FOREST_CLASSIFIER',
NUM_PARALLEL_TREE = 50,
TREE_METHOD = 'HIST',
EARLY_STOP = FALSE,
SUBSAMPLE = 0.85,
INPUT_LABEL_COLS = ['class'])
AS SELECT * FROM `marquin-personal-tools.marquinsmith_dot_com.iris_test`;
-- With the created random forest model create predictions
-- Google docs: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-predict#examples
SELECT
CASE WHEN predicted_class = class THEN "correct" ELSE "wrong" END AS correct_flag,
*
FROM ML.PREDICT(
MODEL `marquin-personal-tools.marquinsmith_dot_com.iris_forest_01`,
(
SELECT
class,
sepal_length,
sepal_width,
petal_length,
petal_width
FROM `marquin-personal-tools.marquinsmith_dot_com.iris_test`
)
);