Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion .github/workflows/push_to_dune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:

steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

- uses: actions/setup-python@v4
with:
Expand All @@ -26,7 +28,47 @@ jobs:
- name: pip requirements
run: pip install -r requirements.txt

- name: Update all queries from Dune, by overwriting queries with repo query text
- name: Detect changed SQL files
id: detect_queries
shell: bash
run: |
set -euo pipefail
ZERO_SHA="0000000000000000000000000000000000000000"
BEFORE="${{ github.event.before }}"
AFTER="${{ github.sha }}"

echo "CHANGED_QUERY_FILES=" >> "$GITHUB_ENV"
echo "FULL_SYNC=false" >> "$GITHUB_ENV"

if [[ -z "${BEFORE}" || "${BEFORE}" == "${ZERO_SHA}" ]]; then
echo "Previous commit SHA is unavailable. Falling back to FULL_SYNC."
echo "FULL_SYNC=true" >> "$GITHUB_ENV"
echo "run_push=true" >> "$GITHUB_OUTPUT"
exit 0
fi

if ! git cat-file -e "${BEFORE}^{commit}" 2>/dev/null; then
echo "Previous commit ${BEFORE} is not available locally. Falling back to FULL_SYNC."
echo "FULL_SYNC=true" >> "$GITHUB_ENV"
echo "run_push=true" >> "$GITHUB_OUTPUT"
exit 0
fi

mapfile -t changed_sql_files < <(git diff --name-only "${BEFORE}" "${AFTER}" -- queries | grep -E '^queries/.*\.sql$' || true)

if [[ ${#changed_sql_files[@]} -eq 0 ]]; then
echo "No changed SQL files detected. Skipping push_to_dune.py."
echo "run_push=false" >> "$GITHUB_OUTPUT"
exit 0
fi

changed_csv=$(printf '%s\n' "${changed_sql_files[@]}" | paste -sd, -)
echo "Detected changed SQL files: ${changed_csv}"
echo "CHANGED_QUERY_FILES=${changed_csv}" >> "$GITHUB_ENV"
echo "run_push=true" >> "$GITHUB_OUTPUT"

- name: Push changed queries to Dune
if: steps.detect_queries.outputs.run_push == 'true'
env:
DUNE_API_KEY: ${{ secrets.DUNE_API_KEY }}
run: python -u scripts/push_to_dune.py
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ A template for creating repos to [manage your Dune queries](https://dune.mintlif

### Updating Queries or CSV Tables

1. Make any changes you need to directly in the repo. Any time you push a commit to MAIN branch, `push_to_dune.py` will save your changes into Dune directly. You can run this manually too if you want.
1. Make any changes you need to directly in the repo. Any time you push a commit to MAIN branch, `push_to_dune.py` updates only the changed `.sql` queries in Dune. You can run this manually too if you want (set `FULL_SYNC=true` to force updating all queries).

2. For CSVs, update the files in the `/uploads` folder. `upload_to_dune.py` will run on commit, or can be run manually. The table name in Dune will be `dune.team_name.dataset_<filename>`.

Expand Down
101 changes: 88 additions & 13 deletions scripts/push_to_dune.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,50 @@
import codecs
import os
import sys

import yaml
from dune_client.client import DuneClient
from dotenv import load_dotenv
import sys
import codecs
from dune_client.client import DuneClient

# Set the default encoding to UTF-8
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())


def is_truthy(value):
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}


def extract_query_id_from_filename(file_name):
if not file_name.endswith(".sql") or "___" not in file_name:
return None

query_id_text = file_name.rsplit("___", 1)[-1].rsplit(".", 1)[0]
if not query_id_text.isdigit():
return None

return int(query_id_text)


def parse_changed_query_ids(changed_query_files_raw):
if not changed_query_files_raw:
return set()

query_ids = set()
for rel_path in changed_query_files_raw.split(","):
rel_path = rel_path.strip()
if not rel_path:
continue

query_id = extract_query_id_from_filename(os.path.basename(rel_path))
if query_id is None:
print(f'WARNING: could not parse query id from changed file "{rel_path}"')
continue

query_ids.add(query_id)

return query_ids


dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env')
load_dotenv(dotenv_path)

Expand All @@ -16,22 +53,60 @@
# Read the queries.yml file
queries_yml = os.path.join(os.path.dirname(__file__), '..', 'queries.yml')
with open(queries_yml, 'r', encoding='utf-8') as file:
data = yaml.safe_load(file)
data = yaml.safe_load(file) or {}

# Extract the query_ids from the data
query_ids = [id for id in data['query_ids']]
query_ids = []
for query_id in data.get('query_ids', []):
try:
query_ids.append(int(query_id))
except (TypeError, ValueError):
print(f'WARNING: skipping non-numeric query id in queries.yml: "{query_id}"')

if len(query_ids) == 0:
print('INFO: no query_ids configured in queries.yml')
sys.exit(0)

full_sync_requested = is_truthy(os.getenv('FULL_SYNC', 'false'))
changed_query_files_raw = os.getenv('CHANGED_QUERY_FILES', '').strip()
changed_query_ids = parse_changed_query_ids(changed_query_files_raw)

if full_sync_requested:
target_query_ids = query_ids
print('SYNC MODE: full (FULL_SYNC=true)')
elif len(changed_query_ids) != 0:
tracked_query_ids = set(query_ids)
untracked_changed_ids = sorted(changed_query_ids - tracked_query_ids)
if len(untracked_changed_ids) != 0:
print(f'WARNING: changed files include query ids not present in queries.yml: {untracked_changed_ids}')

target_query_ids = [query_id for query_id in query_ids if query_id in changed_query_ids]
if len(target_query_ids) == 0:
print('INFO: changed SQL files do not match any query id in queries.yml. Nothing to update.')
sys.exit(0)
print(f'SYNC MODE: changed-only ({len(target_query_ids)} of {len(query_ids)} query ids from queries.yml)')
else:
if changed_query_files_raw:
print('WARNING: CHANGED_QUERY_FILES was provided but no query ids were parsed; falling back to full sync.')
target_query_ids = query_ids
print('SYNC MODE: full (default)')

queries_path = os.path.join(os.path.dirname(__file__), '..', 'queries')
query_file_by_id = {}
for file_name in os.listdir(queries_path):
query_id = extract_query_id_from_filename(file_name)
if query_id is not None:
query_file_by_id[query_id] = file_name

for id in query_ids:
query = dune.get_query(id)
for query_id in target_query_ids:
query = dune.get_query(query_id)
print('PROCESSING: query {}, {}'.format(query.base.query_id, query.base.name))

# Check if query file exists in /queries folder
queries_path = os.path.join(os.path.dirname(__file__), '..', 'queries')
files = os.listdir(queries_path)
found_files = [file for file in files if str(id) == file.split('___')[-1].split('.')[0]]
query_file_name = query_file_by_id.get(query_id)

if len(found_files) != 0:
file_path = os.path.join(os.path.dirname(__file__), '..', 'queries', found_files[0])
if query_file_name is not None:
file_path = os.path.join(queries_path, query_file_name)
# Read the content of the file
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
Expand All @@ -43,4 +118,4 @@
)
print('SUCCESS: updated query {} to dune'.format(query.base.query_id))
else:
print('ERROR: file not found, query id {}'.format(query.base.query_id))
print('ERROR: file not found, query id {}'.format(query.base.query_id))