Skip to content

Commit 6eff0f3

Browse files
committed
Enhance Dune push workflow to detect changed SQL files and update only those in Dune; add error handling for query ID extraction
1 parent e809639 commit 6eff0f3

File tree

4 files changed

+137
-15
lines changed

4 files changed

+137
-15
lines changed

.github/workflows/push_to_dune.yml

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ jobs:
1313

1414
steps:
1515
- uses: actions/checkout@v2
16+
with:
17+
fetch-depth: 0
1618

1719
- uses: actions/setup-python@v4
1820
with:
@@ -26,7 +28,47 @@ jobs:
2628
- name: pip requirements
2729
run: pip install -r requirements.txt
2830

29-
- name: Update all queries from Dune, by overwriting queries with repo query text
31+
- name: Detect changed SQL files
32+
id: detect_queries
33+
shell: bash
34+
run: |
35+
set -euo pipefail
36+
ZERO_SHA="0000000000000000000000000000000000000000"
37+
BEFORE="${{ github.event.before }}"
38+
AFTER="${{ github.sha }}"
39+
40+
echo "CHANGED_QUERY_FILES=" >> "$GITHUB_ENV"
41+
echo "FULL_SYNC=false" >> "$GITHUB_ENV"
42+
43+
if [[ -z "${BEFORE}" || "${BEFORE}" == "${ZERO_SHA}" ]]; then
44+
echo "Previous commit SHA is unavailable. Falling back to FULL_SYNC."
45+
echo "FULL_SYNC=true" >> "$GITHUB_ENV"
46+
echo "run_push=true" >> "$GITHUB_OUTPUT"
47+
exit 0
48+
fi
49+
50+
if ! git cat-file -e "${BEFORE}^{commit}" 2>/dev/null; then
51+
echo "Previous commit ${BEFORE} is not available locally. Falling back to FULL_SYNC."
52+
echo "FULL_SYNC=true" >> "$GITHUB_ENV"
53+
echo "run_push=true" >> "$GITHUB_OUTPUT"
54+
exit 0
55+
fi
56+
57+
mapfile -t changed_sql_files < <(git diff --name-only "${BEFORE}" "${AFTER}" -- queries | grep -E '^queries/.*\.sql$' || true)
58+
59+
if [[ ${#changed_sql_files[@]} -eq 0 ]]; then
60+
echo "No changed SQL files detected. Skipping push_to_dune.py."
61+
echo "run_push=false" >> "$GITHUB_OUTPUT"
62+
exit 0
63+
fi
64+
65+
changed_csv=$(printf '%s\n' "${changed_sql_files[@]}" | paste -sd, -)
66+
echo "Detected changed SQL files: ${changed_csv}"
67+
echo "CHANGED_QUERY_FILES=${changed_csv}" >> "$GITHUB_ENV"
68+
echo "run_push=true" >> "$GITHUB_OUTPUT"
69+
70+
- name: Push changed queries to Dune
71+
if: steps.detect_queries.outputs.run_push == 'true'
3072
env:
3173
DUNE_API_KEY: ${{ secrets.DUNE_API_KEY }}
3274
run: python -u scripts/push_to_dune.py

.vscode/settings.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"python-envs.defaultEnvManager": "ms-python.python:conda",
3+
"python-envs.defaultPackageManager": "ms-python.python:conda",
4+
"python-envs.pythonProjects": []
5+
}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ A template for creating repos to [manage your Dune queries](https://dune.mintlif
1212

1313
### Updating Queries or CSV Tables
1414

15-
1. Make any changes you need to directly in the repo. Any time you push a commit to MAIN branch, `push_to_dune.py` will save your changes into Dune directly. You can run this manually too if you want.
15+
1. Make any changes you need to directly in the repo. Any time you push a commit to MAIN branch, `push_to_dune.py` updates only the changed `.sql` queries in Dune. You can run this manually too if you want (set `FULL_SYNC=true` to force updating all queries).
1616

1717
2. For CSVs, update the files in the `/uploads` folder. `upload_to_dune.py` will run on commit, or can be run manually. The table name in Dune will be `dune.team_name.dataset_<filename>`.
1818

scripts/push_to_dune.py

Lines changed: 88 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,50 @@
1+
import codecs
12
import os
3+
import sys
4+
25
import yaml
3-
from dune_client.client import DuneClient
46
from dotenv import load_dotenv
5-
import sys
6-
import codecs
7+
from dune_client.client import DuneClient
78

89
# Set the default encoding to UTF-8
910
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
1011

12+
13+
def is_truthy(value):
14+
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
15+
16+
17+
def extract_query_id_from_filename(file_name):
18+
if not file_name.endswith(".sql") or "___" not in file_name:
19+
return None
20+
21+
query_id_text = file_name.rsplit("___", 1)[-1].rsplit(".", 1)[0]
22+
if not query_id_text.isdigit():
23+
return None
24+
25+
return int(query_id_text)
26+
27+
28+
def parse_changed_query_ids(changed_query_files_raw):
29+
if not changed_query_files_raw:
30+
return set()
31+
32+
query_ids = set()
33+
for rel_path in changed_query_files_raw.split(","):
34+
rel_path = rel_path.strip()
35+
if not rel_path:
36+
continue
37+
38+
query_id = extract_query_id_from_filename(os.path.basename(rel_path))
39+
if query_id is None:
40+
print(f'WARNING: could not parse query id from changed file "{rel_path}"')
41+
continue
42+
43+
query_ids.add(query_id)
44+
45+
return query_ids
46+
47+
1148
dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env')
1249
load_dotenv(dotenv_path)
1350

@@ -16,22 +53,60 @@
1653
# Read the queries.yml file
1754
queries_yml = os.path.join(os.path.dirname(__file__), '..', 'queries.yml')
1855
with open(queries_yml, 'r', encoding='utf-8') as file:
19-
data = yaml.safe_load(file)
56+
data = yaml.safe_load(file) or {}
2057

2158
# Extract the query_ids from the data
22-
query_ids = [id for id in data['query_ids']]
59+
query_ids = []
60+
for query_id in data.get('query_ids', []):
61+
try:
62+
query_ids.append(int(query_id))
63+
except (TypeError, ValueError):
64+
print(f'WARNING: skipping non-numeric query id in queries.yml: "{query_id}"')
65+
66+
if len(query_ids) == 0:
67+
print('INFO: no query_ids configured in queries.yml')
68+
sys.exit(0)
69+
70+
full_sync_requested = is_truthy(os.getenv('FULL_SYNC', 'false'))
71+
changed_query_files_raw = os.getenv('CHANGED_QUERY_FILES', '').strip()
72+
changed_query_ids = parse_changed_query_ids(changed_query_files_raw)
73+
74+
if full_sync_requested:
75+
target_query_ids = query_ids
76+
print('SYNC MODE: full (FULL_SYNC=true)')
77+
elif len(changed_query_ids) != 0:
78+
tracked_query_ids = set(query_ids)
79+
untracked_changed_ids = sorted(changed_query_ids - tracked_query_ids)
80+
if len(untracked_changed_ids) != 0:
81+
print(f'WARNING: changed files include query ids not present in queries.yml: {untracked_changed_ids}')
82+
83+
target_query_ids = [query_id for query_id in query_ids if query_id in changed_query_ids]
84+
if len(target_query_ids) == 0:
85+
print('INFO: changed SQL files do not match any query id in queries.yml. Nothing to update.')
86+
sys.exit(0)
87+
print(f'SYNC MODE: changed-only ({len(target_query_ids)} of {len(query_ids)} query ids from queries.yml)')
88+
else:
89+
if changed_query_files_raw:
90+
print('WARNING: CHANGED_QUERY_FILES was provided but no query ids were parsed; falling back to full sync.')
91+
target_query_ids = query_ids
92+
print('SYNC MODE: full (default)')
93+
94+
queries_path = os.path.join(os.path.dirname(__file__), '..', 'queries')
95+
query_file_by_id = {}
96+
for file_name in os.listdir(queries_path):
97+
query_id = extract_query_id_from_filename(file_name)
98+
if query_id is not None:
99+
query_file_by_id[query_id] = file_name
23100

24-
for id in query_ids:
25-
query = dune.get_query(id)
101+
for query_id in target_query_ids:
102+
query = dune.get_query(query_id)
26103
print('PROCESSING: query {}, {}'.format(query.base.query_id, query.base.name))
27104

28105
# Check if query file exists in /queries folder
29-
queries_path = os.path.join(os.path.dirname(__file__), '..', 'queries')
30-
files = os.listdir(queries_path)
31-
found_files = [file for file in files if str(id) == file.split('___')[-1].split('.')[0]]
106+
query_file_name = query_file_by_id.get(query_id)
32107

33-
if len(found_files) != 0:
34-
file_path = os.path.join(os.path.dirname(__file__), '..', 'queries', found_files[0])
108+
if query_file_name is not None:
109+
file_path = os.path.join(queries_path, query_file_name)
35110
# Read the content of the file
36111
with open(file_path, 'r', encoding='utf-8') as file:
37112
text = file.read()
@@ -43,4 +118,4 @@
43118
)
44119
print('SUCCESS: updated query {} to dune'.format(query.base.query_id))
45120
else:
46-
print('ERROR: file not found, query id {}'.format(query.base.query_id))
121+
print('ERROR: file not found, query id {}'.format(query.base.query_id))

0 commit comments

Comments
 (0)