Create a Random Forest ML model in Bigquery and use it to make predictions

A code sample to create a random forest machine learning model in BigQuery and make predictions all using SQL.

-- Build a random forest model to predict the type of flower
-- based on the iris dataset.
-- Data source: https://archive.ics.uci.edu/dataset/53/iris
-- Layout:
/*
| sepal_length | sepal_width | petal_length | petal_width | species     |
| ------------ | ----------- | ------------ | ----------- | ----------- |
|          5.1 |         3.5 |          1.4 |         0.2 | Iris-setosa |
|          4.9 |         3.0 |          1.4 |         0.2 | Iris-setosa |
|          4.7 |         3.2 |          1.3 |         0.2 | Iris-setosa |
|          4.6 |         3.1 |          1.5 |         0.2 | Iris-setosa |
|          5.0 |         3.6 |          1.4 |         0.2 | Iris-setosa |
*/
-- Docs: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-random-forest#example

CREATE MODEL `marquin-personal-tools.marquinsmith_dot_com.iris_forest_01`
OPTIONS(MODEL_TYPE='RANDOM_FOREST_CLASSIFIER',
        NUM_PARALLEL_TREE = 50,
        TREE_METHOD = 'HIST',
        EARLY_STOP = FALSE,
        SUBSAMPLE = 0.85,
        INPUT_LABEL_COLS = ['class'])
AS SELECT * FROM `marquin-personal-tools.marquinsmith_dot_com.iris_test`;


-- With the created random forest model create predictions
-- Google docs: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-predict#examples

SELECT 
    CASE WHEN predicted_class = class THEN "correct" ELSE "wrong" END AS correct_flag,
    *
FROM ML.PREDICT(
    MODEL `marquin-personal-tools.marquinsmith_dot_com.iris_forest_01`, 
    (
        SELECT
            class,
            sepal_length,
            sepal_width,
            petal_length,
            petal_width
        FROM `marquin-personal-tools.marquinsmith_dot_com.iris_test`
    )
);

Send a Comment

Your email address will not be published.