Skip to content

Commit 8e67373

Browse files
authored
feat: Added logic to support bucket partitioning (#97)
1 parent a28a952 commit 8e67373

5 files changed

Lines changed: 51 additions & 21 deletions

File tree

ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,15 @@ INPUT_PATH_ONTIME="${SCENARIO_DIR}/${INPUT_FILE_ONTIME}"
1717
echo "OK Inserted data with partitioning into table ${TABLE_NAME_IRIS}"
1818

1919
# List partitions and validate output
20-
LIST_PARTITIONS_OUT_IRIS=$(mktemp)
21-
trap "rm -f '${LIST_PARTITIONS_OUT_IRIS}'" EXIT
22-
{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_IRIS} > "${LIST_PARTITIONS_OUT_IRIS}"
23-
if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_IRIS}"; then
20+
LIST_PARTITIONS_OUT_IRIS=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_IRIS}")
21+
if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then
2422
echo "FAIL: list-partitions output missing 'partitions:' section"
25-
cat "${LIST_PARTITIONS_OUT_IRIS}"
23+
printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}"
2624
exit 1
2725
fi
28-
if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_IRIS}"; then
26+
if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then
2927
echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)"
30-
cat "${LIST_PARTITIONS_OUT_IRIS}"
28+
printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}"
3129
exit 1
3230
fi
3331
echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}"
@@ -37,17 +35,15 @@ echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}"
3735
echo "OK Inserted data with partitioning into table ${TABLE_NAME_ONTIME}"
3836

3937
# List partitions and validate output
40-
LIST_PARTITIONS_OUT_ONTIME=$(mktemp)
41-
trap "rm -f '${LIST_PARTITIONS_OUT_ONTIME}'" EXIT
42-
{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_ONTIME} > "${LIST_PARTITIONS_OUT_ONTIME}"
43-
if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_ONTIME}"; then
38+
LIST_PARTITIONS_OUT_ONTIME=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_ONTIME}")
39+
if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then
4440
echo "FAIL: list-partitions output missing 'partitions:' section"
45-
cat "${LIST_PARTITIONS_OUT_ONTIME}"
41+
printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}"
4642
exit 1
4743
fi
48-
if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_ONTIME}"; then
44+
if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then
4945
echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)"
50-
cat "${LIST_PARTITIONS_OUT_ONTIME}"
46+
printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}"
5147
exit 1
5248
fi
5349
echo "OK Listed and validated partitions for ${TABLE_NAME_ONTIME}"
@@ -62,11 +58,40 @@ if [[ "${FILES_OUT_ONTIME}" != *${EXPECTED_DATA_PATH_ONTIME}* ]]; then
6258
fi
6359
echo "OK Validated correct partitioned data file path for ${TABLE_NAME_ONTIME}"
6460

61+
# Create table with bucket partition and insert data
62+
{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME_BUCKET} ${INPUT_PATH_IRIS} --partition="${PARTITION_SPEC_BUCKET}"
63+
echo "OK Inserted data with bucket partitioning into table ${TABLE_NAME_BUCKET}"
64+
65+
# List partitions and validate output
66+
LIST_PARTITIONS_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_BUCKET}")
67+
if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then
68+
echo "FAIL: list-partitions output missing 'partitions:' section"
69+
printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}"
70+
exit 1
71+
fi
72+
if ! grep -q "variety_bucket=" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then
73+
echo "FAIL: list-partitions output missing bucket partition entries (expected 'variety_bucket=')"
74+
printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}"
75+
exit 1
76+
fi
77+
echo "OK Listed and validated bucket partitions for ${TABLE_NAME_BUCKET}"
78+
79+
# Validate data file was inserted to correct bucket-partitioned path
80+
FILES_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_NAME_BUCKET})
81+
82+
if [[ "${FILES_OUT_BUCKET}" != *${EXPECTED_DATA_PATH_BUCKET}* ]]; then
83+
echo "FAIL: expected substring '${EXPECTED_DATA_PATH_BUCKET}' not found in files command output: ${FILES_OUT_BUCKET}"
84+
exit 1
85+
fi
86+
echo "OK Validated correct bucket-partitioned data file path for ${TABLE_NAME_BUCKET}"
87+
6588
# Cleanup
6689
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_IRIS}
6790
echo "OK Deleted table: ${TABLE_NAME_IRIS}"
6891
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_ONTIME}
6992
echo "OK Deleted table: ${TABLE_NAME_ONTIME}"
93+
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_BUCKET}
94+
echo "OK Deleted table: ${TABLE_NAME_BUCKET}"
7095

7196
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME}
7297
echo "OK Deleted namespace: ${NAMESPACE_NAME}"

ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,8 @@ env:
1313
PARTITION_SPEC_IRIS: '[{"column":"variety","transform":"identity"}]'
1414
PARTITION_SPEC_ONTIME: '[{"column":"Year"}]'
1515
EXPECTED_DATA_PATH_ONTIME: "s3://test-bucket/warehouse/test_insert_partitioned/ontime_partitioned/data/Year=2010/*.parquet"
16+
TABLE_NAME_BUCKET: "test_insert_partitioned.iris_bucket_partitioned"
17+
PARTITION_SPEC_BUCKET: '[{"column":"variety","transform":"bucket[3]"}]'
18+
# Scheme-agnostic: files output may use s3:// or s3a:// depending on Hadoop FS
19+
EXPECTED_DATA_PATH_BUCKET: "iris_bucket_partitioned/data/variety_bucket="
1620

ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ echo "OK Created namespace"
1313
--partition="${PARTITION_SPEC}"
1414
echo "OK Inserted data with custom partition name"
1515

16-
{{ICE_CLI}} --config {{CLI_CONFIG}} describe -s ${TABLE_NAME} > /tmp/custom_part_describe.txt
16+
DESCRIBE_OUT=$({{ICE_CLI}} --config {{CLI_CONFIG}} describe -s "${TABLE_NAME}")
1717

18-
if ! grep -q "var_trunc" /tmp/custom_part_describe.txt; then
19-
echo "FAIL describe -s output missing custom partition name 'var_trunc'"
20-
cat /tmp/custom_part_describe.txt
18+
if ! grep -q "var_bucket" <<<"${DESCRIBE_OUT}"; then
19+
echo "FAIL describe -s output missing custom partition name 'var_bucket'"
20+
printf '%s\n' "${DESCRIBE_OUT}"
2121
exit 1
2222
fi
23-
echo "OK Custom partition name 'var_trunc' found in describe output"
23+
echo "OK Custom partition name 'var_bucket' found in describe output"
2424

2525
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME}
2626
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME}

ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ env:
88
NAMESPACE_NAME: "test_custom_part"
99
TABLE_NAME: "test_custom_part.iris_custom"
1010
INPUT_FILE: "input.parquet"
11-
PARTITION_SPEC: '[{"column":"variety","transform":"truncate[3]","name":"var_trunc"}]'
11+
PARTITION_SPEC: '[{"column":"variety","transform":"bucket[3]","name":"var_bucket"}]'

ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ public static Map<PartitionKey, List<org.apache.iceberg.data.Record>> partition(
343343
continue;
344344
}
345345
String transformName = transform.toString();
346+
346347
switch (transformName) {
347348
case "hour", "day", "month", "year":
348349
if (fieldSpec.type().typeId() != Type.TypeID.DATE) {
@@ -352,7 +353,7 @@ public static Map<PartitionKey, List<org.apache.iceberg.data.Record>> partition(
352353
sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type()));
353354
break;
354355
default:
355-
if (transformName.startsWith("truncate[") || transformName.startsWith("bucket[")) {
356+
if (transformName.startsWith("bucket[")) {
356357
partitionRecord.setField(
357358
sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type()));
358359
} else {

0 commit comments

Comments
 (0)