From 6eff0f32f4bb02f2e6ed082672671b50b314d290 Mon Sep 17 00:00:00 2001 From: VaitaR Date: Sun, 8 Feb 2026 19:28:53 +0300 Subject: [PATCH 1/2] Enhance Dune push workflow to detect changed SQL files and update only those in Dune; add error handling for query ID extraction --- .github/workflows/push_to_dune.yml | 44 ++++++++++++- .vscode/settings.json | 5 ++ README.md | 2 +- scripts/push_to_dune.py | 101 +++++++++++++++++++++++++---- 4 files changed, 137 insertions(+), 15 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.github/workflows/push_to_dune.yml b/.github/workflows/push_to_dune.yml index 5d438d7..98f99cb 100644 --- a/.github/workflows/push_to_dune.yml +++ b/.github/workflows/push_to_dune.yml @@ -13,6 +13,8 @@ jobs: steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - uses: actions/setup-python@v4 with: @@ -26,7 +28,47 @@ jobs: - name: pip requirements run: pip install -r requirements.txt - - name: Update all queries from Dune, by overwriting queries with repo query text + - name: Detect changed SQL files + id: detect_queries + shell: bash + run: | + set -euo pipefail + ZERO_SHA="0000000000000000000000000000000000000000" + BEFORE="${{ github.event.before }}" + AFTER="${{ github.sha }}" + + echo "CHANGED_QUERY_FILES=" >> "$GITHUB_ENV" + echo "FULL_SYNC=false" >> "$GITHUB_ENV" + + if [[ -z "${BEFORE}" || "${BEFORE}" == "${ZERO_SHA}" ]]; then + echo "Previous commit SHA is unavailable. Falling back to FULL_SYNC." + echo "FULL_SYNC=true" >> "$GITHUB_ENV" + echo "run_push=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + if ! git cat-file -e "${BEFORE}^{commit}" 2>/dev/null; then + echo "Previous commit ${BEFORE} is not available locally. Falling back to FULL_SYNC." + echo "FULL_SYNC=true" >> "$GITHUB_ENV" + echo "run_push=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + mapfile -t changed_sql_files < <(git diff --name-only "${BEFORE}" "${AFTER}" -- queries | grep -E '^queries/.*\.sql$' || true) + + if [[ ${#changed_sql_files[@]} -eq 0 ]]; then + echo "No changed SQL files detected. Skipping push_to_dune.py." + echo "run_push=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + changed_csv=$(printf '%s\n' "${changed_sql_files[@]}" | paste -sd, -) + echo "Detected changed SQL files: ${changed_csv}" + echo "CHANGED_QUERY_FILES=${changed_csv}" >> "$GITHUB_ENV" + echo "run_push=true" >> "$GITHUB_OUTPUT" + + - name: Push changed queries to Dune + if: steps.detect_queries.outputs.run_push == 'true' env: DUNE_API_KEY: ${{ secrets.DUNE_API_KEY }} run: python -u scripts/push_to_dune.py diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a8c2003 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:conda", + "python-envs.defaultPackageManager": "ms-python.python:conda", + "python-envs.pythonProjects": [] +} \ No newline at end of file diff --git a/README.md b/README.md index 4d6f99d..aee3b1c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ A template for creating repos to [manage your Dune queries](https://dune.mintlif ### Updating Queries or CSV Tables -1. Make any changes you need to directly in the repo. Any time you push a commit to MAIN branch, `push_to_dune.py` will save your changes into Dune directly. You can run this manually too if you want. +1. Make any changes you need to directly in the repo. Any time you push a commit to MAIN branch, `push_to_dune.py` updates only the changed `.sql` queries in Dune. You can run this manually too if you want (set `FULL_SYNC=true` to force updating all queries). 2. For CSVs, update the files in the `/uploads` folder. `upload_to_dune.py` will run on commit, or can be run manually. The table name in Dune will be `dune.team_name.dataset_`. diff --git a/scripts/push_to_dune.py b/scripts/push_to_dune.py index 69de9b8..196e3a7 100644 --- a/scripts/push_to_dune.py +++ b/scripts/push_to_dune.py @@ -1,13 +1,50 @@ +import codecs import os +import sys + import yaml -from dune_client.client import DuneClient from dotenv import load_dotenv -import sys -import codecs +from dune_client.client import DuneClient # Set the default encoding to UTF-8 sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) + +def is_truthy(value): + return str(value).strip().lower() in {"1", "true", "yes", "y", "on"} + + +def extract_query_id_from_filename(file_name): + if not file_name.endswith(".sql") or "___" not in file_name: + return None + + query_id_text = file_name.rsplit("___", 1)[-1].rsplit(".", 1)[0] + if not query_id_text.isdigit(): + return None + + return int(query_id_text) + + +def parse_changed_query_ids(changed_query_files_raw): + if not changed_query_files_raw: + return set() + + query_ids = set() + for rel_path in changed_query_files_raw.split(","): + rel_path = rel_path.strip() + if not rel_path: + continue + + query_id = extract_query_id_from_filename(os.path.basename(rel_path)) + if query_id is None: + print(f'WARNING: could not parse query id from changed file "{rel_path}"') + continue + + query_ids.add(query_id) + + return query_ids + + dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env') load_dotenv(dotenv_path) @@ -16,22 +53,60 @@ # Read the queries.yml file queries_yml = os.path.join(os.path.dirname(__file__), '..', 'queries.yml') with open(queries_yml, 'r', encoding='utf-8') as file: - data = yaml.safe_load(file) + data = yaml.safe_load(file) or {} # Extract the query_ids from the data -query_ids = [id for id in data['query_ids']] +query_ids = [] +for query_id in data.get('query_ids', []): + try: + query_ids.append(int(query_id)) + except (TypeError, ValueError): + print(f'WARNING: skipping non-numeric query id in queries.yml: "{query_id}"') + +if len(query_ids) == 0: + print('INFO: no query_ids configured in queries.yml') + sys.exit(0) + +full_sync_requested = is_truthy(os.getenv('FULL_SYNC', 'false')) +changed_query_files_raw = os.getenv('CHANGED_QUERY_FILES', '').strip() +changed_query_ids = parse_changed_query_ids(changed_query_files_raw) + +if full_sync_requested: + target_query_ids = query_ids + print('SYNC MODE: full (FULL_SYNC=true)') +elif len(changed_query_ids) != 0: + tracked_query_ids = set(query_ids) + untracked_changed_ids = sorted(changed_query_ids - tracked_query_ids) + if len(untracked_changed_ids) != 0: + print(f'WARNING: changed files include query ids not present in queries.yml: {untracked_changed_ids}') + + target_query_ids = [query_id for query_id in query_ids if query_id in changed_query_ids] + if len(target_query_ids) == 0: + print('INFO: changed SQL files do not match any query id in queries.yml. Nothing to update.') + sys.exit(0) + print(f'SYNC MODE: changed-only ({len(target_query_ids)} of {len(query_ids)} query ids from queries.yml)') +else: + if changed_query_files_raw: + print('WARNING: CHANGED_QUERY_FILES was provided but no query ids were parsed; falling back to full sync.') + target_query_ids = query_ids + print('SYNC MODE: full (default)') + +queries_path = os.path.join(os.path.dirname(__file__), '..', 'queries') +query_file_by_id = {} +for file_name in os.listdir(queries_path): + query_id = extract_query_id_from_filename(file_name) + if query_id is not None: + query_file_by_id[query_id] = file_name -for id in query_ids: - query = dune.get_query(id) +for query_id in target_query_ids: + query = dune.get_query(query_id) print('PROCESSING: query {}, {}'.format(query.base.query_id, query.base.name)) # Check if query file exists in /queries folder - queries_path = os.path.join(os.path.dirname(__file__), '..', 'queries') - files = os.listdir(queries_path) - found_files = [file for file in files if str(id) == file.split('___')[-1].split('.')[0]] + query_file_name = query_file_by_id.get(query_id) - if len(found_files) != 0: - file_path = os.path.join(os.path.dirname(__file__), '..', 'queries', found_files[0]) + if query_file_name is not None: + file_path = os.path.join(queries_path, query_file_name) # Read the content of the file with open(file_path, 'r', encoding='utf-8') as file: text = file.read() @@ -43,4 +118,4 @@ ) print('SUCCESS: updated query {} to dune'.format(query.base.query_id)) else: - print('ERROR: file not found, query id {}'.format(query.base.query_id)) \ No newline at end of file + print('ERROR: file not found, query id {}'.format(query.base.query_id)) From c859395487241eb13554e7af04539d66a607757c Mon Sep 17 00:00:00 2001 From: VaitaR Date: Mon, 9 Feb 2026 10:04:54 +0300 Subject: [PATCH 2/2] Delete .vscode/settings.json --- .vscode/settings.json | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index a8c2003..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "python-envs.defaultEnvManager": "ms-python.python:conda", - "python-envs.defaultPackageManager": "ms-python.python:conda", - "python-envs.pythonProjects": [] -} \ No newline at end of file