From e9fec0ea8d8304e8f6f344e820c5b40e55111690 Mon Sep 17 00:00:00 2001 From: Kamesh Sampath Date: Wed, 23 Oct 2024 16:25:14 +0530 Subject: [PATCH] (fix!): Refactor Enable GH Action - Ability to execute Py and SQL scripts - Template Streamlit app - Use `snow object` for creating objects - Clean environment variables --- .github/workflows/cleanup.yml | 50 +++++++++++++++ .github/workflows/snow.yml | 86 +++++++++++++++++++++++++ .gitignore | 1 + .tool-versions | 1 + README.md | 116 ++++++++++++++++++++++------------ app/.gitignore | 1 + app/snowflake.yml | 13 +++- cleanup.sql | 4 +- config/config.toml | 5 ++ todos.csv => data/todos.csv | 0 de/0_warehouses.py | 17 +++++ de/1_databases.py | 18 ++++++ de/2_schemas.py | 18 ++++++ de/3_tables.py | 39 ++++++++++++ de/4_stages.py | 23 +++++++ de/5_load.sql | 25 ++++++++ requirements.txt | 4 ++ todos.sql | 43 ------------- 18 files changed, 377 insertions(+), 87 deletions(-) create mode 100644 .github/workflows/cleanup.yml create mode 100644 .github/workflows/snow.yml create mode 100644 .gitignore create mode 100644 .tool-versions create mode 100644 config/config.toml rename todos.csv => data/todos.csv (100%) create mode 100644 de/0_warehouses.py create mode 100644 de/1_databases.py create mode 100644 de/2_schemas.py create mode 100644 de/3_tables.py create mode 100644 de/4_stages.py create mode 100644 de/5_load.sql create mode 100644 requirements.txt delete mode 100644 todos.sql diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml new file mode 100644 index 0000000..145142f --- /dev/null +++ b/.github/workflows/cleanup.yml @@ -0,0 +1,50 @@ +name: Clean up +on: + workflow_dispatch: +env: + SNOWFLAKE_DEFAULT_CONNECTION_NAME: "workflow" + SNOWFLAKE_CONNECTIONS_WORKFLOW_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} + SNOWFLAKE_CONNECTIONS_WORKFLOW_USER: ${{ secrets.SNOWFLAKE_USER }} + SNOWFLAKE_CONNECTIONS_WORKFLOW_PRIVATE_KEY_PASSPHRASE: + ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + SNOWFLAKE_CONNECTIONS_WORKFLOW_PRIVATE_KEY_RAW: + ${{ secrets.PRIVATE_KEY_RAW }} + GIT_REPO_DB: MY_GIT_REPOS + GIT_REPO_SCHEMA: GITHUB + GIT_REPO_NAME: snow_cli_demo + TODO_APP_NAME: todos_app + TODO_APP_WH: TODO_APP_WH + TODO_APP_DB: TODO_APP_DB + TODO_APP_SCHEMA: APPS + TODOS_DATA_SCHEMA: DATA +jobs: + Deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + # todo update to pick the right branch + ref: ${{ env.GITHUB_REF_NAME }} + - uses: Snowflake-Labs/snowflake-cli-action@v1.5 + with: + cli-version: "latest" + default-config-file-path: + ${{ github.workspace }}/config/config.toml" + - name: Check Version and Verify Connection + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + run: | + snow --version + snow connection test + echo "Using branch $GITHUB_REF_NAME" + + - name: Setup Todos App Database Objects + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + GIT_REPO_FQN: + ${{ env.GIT_REPO_DB }}.${{ env.GIT_REPO_SCHEMA }}.${{ + env.GIT_REPO_NAME }} + run: | + snow git execute "@$GIT_REPO_FQN/branches/$GITHUB_REF_NAME/cleanup.sql" \ + --variable "db_name='$TODO_APP_DB'" \ + --variable "git_repo_name='$GIT_REPO_FQN'" diff --git a/.github/workflows/snow.yml b/.github/workflows/snow.yml new file mode 100644 index 0000000..8965991 --- /dev/null +++ b/.github/workflows/snow.yml @@ -0,0 +1,86 @@ +name: Deploy +on: + workflow_dispatch: + release: + types: [published] + push: + paths: + - "de/**" + - "data/*.csv" + - "app/**" +env: + SNOWFLAKE_DEFAULT_CONNECTION_NAME: "workflow" + SNOWFLAKE_CONNECTIONS_WORKFLOW_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} + SNOWFLAKE_CONNECTIONS_WORKFLOW_USER: ${{ secrets.SNOWFLAKE_USER }} + SNOWFLAKE_CONNECTIONS_WORKFLOW_PRIVATE_KEY_PASSPHRASE: + ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + SNOWFLAKE_CONNECTIONS_WORKFLOW_PRIVATE_KEY_RAW: + ${{ secrets.PRIVATE_KEY_RAW }} + GIT_REPO_DB: MY_GIT_REPOS + GIT_REPO_SCHEMA: GITHUB + GIT_REPO_NAME: snow_cli_demo + TODO_APP_NAME: todos_app + TODO_APP_WH: TODO_APP_WH + TODO_APP_DB: TODO_APP_DB + TODO_APP_SCHEMA: APPS + TODOS_DATA_SCHEMA: DATA +jobs: + Deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + # todo update to pick the right branch + ref: ${{ env.GITHUB_REF_NAME }} + - uses: Snowflake-Labs/snowflake-cli-action@v1.5 + with: + cli-version: "latest" + default-config-file-path: + ${{ github.workspace }}/config/config.toml + - name: Check Version and Verify Connection + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + run: | + snow --version + snow connection test + echo "Using branch $GITHUB_REF_NAME" + + - name: Refresh Repo + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + GIT_REPO_FQN: + ${{ env.GIT_REPO_DB }}.${{ env.GIT_REPO_SCHEMA }}.${{ + env.GIT_REPO_NAME }} + run: | + snow git fetch "$GIT_REPO_FQN" + + - name: Setup Todos App Database Objects + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + GIT_REPO_FQN: + ${{ env.GIT_REPO_DB }}.${{ env.GIT_REPO_SCHEMA }}.${{ + env.GIT_REPO_NAME }} + run: | + snow git execute @"${GIT_REPO_FQN}/branches/${GITHUB_REF_NAME}/de/" \ + --variable "db_name='$TODO_APP_DB'" \ + --variable "schema_name='$TODOS_DATA_SCHEMA'" \ + --variable "wh_name='$TODO_APP_WH'" \ + --variable "git_repo_name='$GIT_REPO_FQN'" \ + --variable "git_branch='$GIT_BRANCH'" \ + --database $GIT_REPO_DB --schema $GIT_REPO_SCHEMA + + - name: Deploy TODOs Streamlit App + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + run: | + snow streamlit deploy --replace \ + --database $TODO_APP_DB --schema $TODO_APP_SCHEMA + working-directory: app + + - name: Get App URL + env: + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} + run: | + snow streamlit get-url $TODO_APP_NAME \ + --database $TODO_APP_DB --schema $TODO_APP_SCHEMA + working-directory: app diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a340c10 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +work \ No newline at end of file diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..8aa451a --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +python 3.11.9 diff --git a/README.md b/README.md index 914fafb..b059dd8 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,18 @@ -# Snowflake Git Integration: 60-Second Guide for Devs โ„๏ธ๐Ÿ™โฑ๏ธ +# Using Snowflake CLI โ„๏ธโ„๏ธโ„๏ธโ„๏ธโ„๏ธ๐Ÿ™โฑ๏ธโ„๏ธโ„๏ธโ„๏ธโ„๏ธ -A quick demo of [Snowflake's Git integration](https://docs.snowflake.com/en/developer-guide/git/git-setting-up) , version control for your data workflows in under a minute? You bet! โšกโ„๏ธ. +A quick demo of using few killer features of [SNOW CLI](https://docs.snowflake.com/en/developer-guide/snowflake-cli/index)(`snow`) that will make your everyday Snowflake development tasks at breeze. -This demo uses[SNOW CLI](https://docs.snowflake.com/en/developer-guide/snowflake-cli/index) to perform all tasks from the CLI. +As part of this quick demo we will see how to use `snow` for, + +- [x] [Snowflake's Git integration](https://docs.snowflake.com/en/developer-guide/git/git-setting-up) +- [x] Deploying [Streamlit in Snowflake](https://docs.snowflake.com/en/developer-guide/streamlit/about-streamlit) +- [ ] Setup CI/CD using [Snowflake CLI](https://github.com/Snowflake-Labs/snowflake-cli-action) GitHub Action + +> [!WARNING] +> Currently the demo works only with SNOW CLI 3.1+. This demo was last tested with 3.1RC4 +> ```shell +> pip install -U git+https://github.com/snowflakedb/snowflake-cli.git@v3.1.0-rc4 +> ``` ## Pre-requisites @@ -10,24 +20,42 @@ This demo uses[SNOW CLI](https://docs.snowflake.com/en/developer-guide/snowflake - [GitHub](https://github.com) Account - [SNOW CLI](https://docs.snowflake.com/en/developer-guide/snowflake-cli/index) +If you need to use GitHub Action set the following GitHub Environment Secrets to your fork, + +- `SNOWFLAKE_ACCOUNT` - the Snowflake Accouunt ID +- `SNOWFLAKE_USER` - The user to perform all operations, for simple checks try with `ACCOUNTADMIN` before trying granular roles +- `PRIVATE_KEY_PASSPHRASE` - The Passphrase to be used to decrypt the Private Key +- `PRIVATE_KEY_RAW`- The **ENCRYPTED** private key to be used when connecting via the GH Action. + ### Snowflake Environment Create a database, schema and a warehouse to use to hold all the git repos and related objects +We will set them as our default datbase, schema and warehouse for the rest of the demo, + ```shell -snow sql --stdin < [!NOTE] +> **ONLY IF GH Action is Enabled** +> Update anything under app, de or data folders. Do a commit push to see GH Action trigger and updating your Snowflake data and app ## Deploy Streamlit Application @@ -113,16 +151,11 @@ There is simple Streamlit application that is available under [app](./app) direc cd app ``` -Let us create a schema to deploy the Todo APP, - -```shell -snow sql -q 'create schema if not exists apps' --dbname="$TODO_DB_NAME" -``` - Deploy Streamlit app, ```shell -snow streamlit deploy --dbname="$TODO_DB_NAME" --schema="apps" +snow streamlit deploy --replace \ + --database $TODO_APP_DB --schema $TODO_APP_SCHEMA ``` You can use the URL from the output of the successful deployment to access the application. @@ -130,18 +163,21 @@ You can use the URL from the output of the successful deployment to access the a >[!TIP] > You can also get the URL of the application anytime using the command >```shell -> snow streamlit get-url todo_app --dbname=--dbname="$TODO_DB_NAME" --schema="apps" +> snow streamlit get-url todo_app > ``` > You can find the app name in [snowflake.yml](./app/snowflake.yml) ## Cleanup ```shell -snow git execute "@$GIT_REPO_NAME/branches/$GIT_BRANCH/cleanup.sql" \ - --variable "db_name='$TODO_DB_NAME'" +snow git execute "@$GIT_REPO_FQN/branches/$GIT_BRANCH/cleanup.sql" \ + --variable "db_name='$TODO_APP_DB'" \ + --variable "git_repo_name='$GIT_REPO_FQN'" ``` -Verify clean up and the `$TODO_DB_NAME` should be listed, +It should delete the `$TODO_APP_DB` and the `MY_GIT_REPOS.GITHUB.SNOW_CLI_DEMO` Git repository. + +Verify clean up and the `$TODO_APP_DB` should not be listed, ```shell snow sql -q "SHOW DATABASES" diff --git a/app/.gitignore b/app/.gitignore index 55f74f6..a266984 100644 --- a/app/.gitignore +++ b/app/.gitignore @@ -2,3 +2,4 @@ .venv/ app.zip __pycache__ +TODO.md \ No newline at end of file diff --git a/app/snowflake.yml b/app/snowflake.yml index a89c80e..b2a9f57 100644 --- a/app/snowflake.yml +++ b/app/snowflake.yml @@ -1,13 +1,20 @@ definition_version: "2" +env: + TODO_APP_NAME: todos_app + TODO_APP_DB: TODO_APP_DB + TODO_APP_SCHEMA: APPS + TODO_APP_WH: TODO_APP_WH entities: todos_app: type: streamlit identifier: - name: todos_app + name: <% ctx.env.TODO_APP_NAME %> + database: <% ctx.env.TODO_APP_DB %> + schema: <% ctx.env.TODO_APP_SCHEMA %> main_file: streamlit_app.py pages_dir: pages - query_warehouse: todo_app_wh - stage: todos_app + query_warehouse: <% ctx.env.TODO_APP_WH %> + stage: <% ctx.env.TODO_APP_NAME %> artifacts: - streamlit_app.py - environment.yml diff --git a/cleanup.sql b/cleanup.sql index 1298766..6cb5cbc 100644 --- a/cleanup.sql +++ b/cleanup.sql @@ -1,4 +1,6 @@ --!jinja USE ROLE ACCOUNTADMIN; -DROP DATABASE IF EXISTS {{db_name}}; \ No newline at end of file +DROP DATABASE IF EXISTS {{db_name}}; + +DROP GIT REPOSITORY IF EXISTS {{git_repo_name}}; \ No newline at end of file diff --git a/config/config.toml b/config/config.toml new file mode 100644 index 0000000..6adfa71 --- /dev/null +++ b/config/config.toml @@ -0,0 +1,5 @@ +default_connection_name = "workflow" + +[connections] +[connections.workflow] +authenticator="SNOWFLAKE_JWT" \ No newline at end of file diff --git a/todos.csv b/data/todos.csv similarity index 100% rename from todos.csv rename to data/todos.csv diff --git a/de/0_warehouses.py b/de/0_warehouses.py new file mode 100644 index 0000000..2e7edef --- /dev/null +++ b/de/0_warehouses.py @@ -0,0 +1,17 @@ +import os +from snowflake.core import Root +from snowflake.core.warehouse import Warehouse +from snowflake.snowpark.session import Session + + +session = Session.builder.getOrCreate() +root = Root(session) + +name = os.getenv("TODO_APP_WH", "TODO_APP_WH") + +todos_wh = Warehouse(name) +todos_wh.warehouse_size = "SMALL" +todos_wh.auto_suspend = 120 +todos_wh.initially_suspended = "true" + +root.warehouses[todos_wh].create_or_alter(todos_wh) diff --git a/de/1_databases.py b/de/1_databases.py new file mode 100644 index 0000000..3d7cfa4 --- /dev/null +++ b/de/1_databases.py @@ -0,0 +1,18 @@ +import os + +from snowflake.core import Root +from snowflake.core.database import Database +from snowflake.snowpark.session import Session + + +session = Session.builder.getOrCreate() +root = Root(session) + +# Create Database to hold todo database +db_name = os.getenv("TODO_APP_DB", "TODO_APP_DB") +db = Database( + name=db_name, + comment="Database to hold the TODOs and its related objects", +) + +root.databases[db_name].create_or_alter(db) diff --git a/de/2_schemas.py b/de/2_schemas.py new file mode 100644 index 0000000..32d4fab --- /dev/null +++ b/de/2_schemas.py @@ -0,0 +1,18 @@ +import os +from snowflake.core import Root +from snowflake.snowpark.session import Session +from snowflake.core.schema import Schema + + +session = Session.builder.getOrCreate() +root = Root(session) + +db_name = os.getenv("TODO_APP_DB", "TODO_APP_DB") + + +app_schema = os.getenv("TODO_APP_SCHEMA", "APPS") +data_schema = os.getenv("TODOS_DATA_SCHEMA", "DATA") + +for name in [app_schema, data_schema]: + new_schema = Schema(name) + root.databases[db_name].schemas[name].create_or_alter(new_schema) diff --git a/de/3_tables.py b/de/3_tables.py new file mode 100644 index 0000000..740400a --- /dev/null +++ b/de/3_tables.py @@ -0,0 +1,39 @@ +import os +from snowflake.core import Root +from snowflake.snowpark.session import Session +from snowflake.core.table import Table, TableColumn + + +session = Session.builder.getOrCreate() +root = Root(session) + +db_name = os.getenv("TODO_APP_DB", "TODO_APP_DB") +data_schema = os.getenv("TODOS_DATA_SCHEMA", "DATA") + +todo_table = Table( + name="todos", + columns=[ + TableColumn( + name="title", + datatype="string", + nullable=False, + ), + TableColumn( + name="description", + datatype="string", + nullable=True, + ), + TableColumn( + name="category", + datatype="string", + nullable=True, + ), + TableColumn( + name="status", + datatype="boolean", + default="FALSE", + ), + ], + comment="All TODOS data are stored in this", +) +root.databases[db_name].schemas[data_schema].tables["todos"].create_or_alter(todo_table) diff --git a/de/4_stages.py b/de/4_stages.py new file mode 100644 index 0000000..5e265f7 --- /dev/null +++ b/de/4_stages.py @@ -0,0 +1,23 @@ +import os + +from snowflake.core import Root +from snowflake.snowpark.session import Session +from snowflake.core.stage import Stage, StageEncryption, StageDirectoryTable +from snowflake.core import CreateMode + +session = Session.builder.getOrCreate() +root = Root(session) + +db_name = os.getenv("TODO_APP_DB", "TODO_APP_DB") +data_schema = os.getenv("TODOS_DATA_SCHEMA", "DATA") + +git_data = Stage( + name="git_data", + directory_table=StageDirectoryTable(enable=True), + encryption=StageEncryption( + type="SNOWFLAKE_SSE", + ), +) + +stages = root.databases[db_name].schemas[data_schema].stages +stages.create(git_data, mode=CreateMode.if_not_exists) diff --git a/de/5_load.sql b/de/5_load.sql new file mode 100644 index 0000000..243dce1 --- /dev/null +++ b/de/5_load.sql @@ -0,0 +1,25 @@ +--!jinja + +USE WAREHOUSE {{wh_name}}; + +USE DATABASE {{db_name}}; + +USE SCHEMA {{schema_name}}; + +CREATE FILE FORMAT IF NOT EXISTS csv_ff + SKIP_HEADER=1; + +-- referesh repository content +ALTER GIT REPOSITORY {{git_repo_name}} FETCH; + +-- Copy fies from git into local stage +COPY FILES + INTO @git_data + FROM @{{git_repo_name}}/branches/{{git_branch}}/data/todos.csv; + +-- Load the CSV into the table +COPY INTO TODOS FROM @git_data/todos.csv + FILE_FORMAT = csv_ff; + +-- check the data +SELECT * FROM TODOS; diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9e43337 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +snowflake +snowflake.core +streamlit==1.35.0 +black \ No newline at end of file diff --git a/todos.sql b/todos.sql deleted file mode 100644 index 05e9b07..0000000 --- a/todos.sql +++ /dev/null @@ -1,43 +0,0 @@ ---!jinja -USE ROLE ACCOUNTADMIN; - -CREATE DATABASE IF NOT EXISTS {{db_name}}; - -CREATE WAREHOUSE IF NOT EXISTS {{wh_name}}; - -USE WAREHOUSE {{wh_name}}; - -USE DATABASE {{db_name}}; - -CREATE SCHEMA IF NOT EXISTS {{schema_name}}; - -USE SCHEMA {{schema_name}}; - -CREATE FILE FORMAT IF NOT EXISTS csv_ff - SKIP_HEADER=1; - -CREATE OR REPLACE TABLE TODOS ( - TITLE STRING, - DESCRIPTION STRING, - CATEGORY STRING, - STATUS BOOLEAN -); - --- List files -LS @{{git_repo_name}}/branches/{{git_branch}}/; - --- Create the stage to copy all files from git stage to current data stage -CREATE STAGE IF NOT EXISTS git_data - ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE'); - --- Copy fies from git into local stage -COPY FILES - INTO @git_data - FROM @{{git_repo_name}}/branches/{{git_branch}}/todos.csv; - --- Load the CSV into the table -COPY INTO TODOS FROM @git_data/todos.csv - FILE_FORMAT = csv_ff; - --- check the data -SELECT * FROM TODOS;