Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,6 @@
[submodule "contrib/libcotp"]
path = contrib/libcotp
url = https://github.com/paolostivanin/libcotp.git
[submodule "contrib/TurboPFor-CPP"]
path = contrib/TurboPFor-CPP
url = https://github.com/amosbird/TurboPFor-CPP
1 change: 1 addition & 0 deletions ci/jobs/fast_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def clone_submodules():
"contrib/corrosion",
"contrib/StringZilla",
"contrib/rust_vendor",
"contrib/TurboPFor-CPP",
]

res = Shell.check("git submodule sync", verbose=True, strict=True)
Expand Down
2 changes: 2 additions & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ add_contrib(simdcomp-cmake simdcomp)

add_contrib (numactl-cmake numactl)

add_contrib (TurboPFor-CPP-cmake TurboPFor-CPP)

# Google Cloud Cpp
if(ENABLE_LIBRARIES AND (OS_LINUX OR OS_DARWIN OR OS_FREEBSD) AND (ARCH_AMD64 OR (ARCH_AARCH64 AND NOT NO_ARMV81_OR_HIGHER)))
set(ENABLE_GOOGLE_CLOUD_CPP_DEFAULT ON)
Expand Down
1 change: 1 addition & 0 deletions contrib/TurboPFor-CPP
Submodule TurboPFor-CPP added at 97d8c4
80 changes: 80 additions & 0 deletions contrib/TurboPFor-CPP-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/TurboPFor-CPP")

# Core scalar sources - always needed (internal utilities used by both scalar and SIMD paths)
set(SRCS
"${LIBRARY_DIR}/src/dispatch.cpp"
"${LIBRARY_DIR}/src/scalar/p4enc32.cpp"
"${LIBRARY_DIR}/src/scalar/p4d1dec32.cpp"
"${LIBRARY_DIR}/src/scalar/p4_scalar_bitpack32.cpp"
"${LIBRARY_DIR}/src/scalar/p4_scalar_bitunpack32.cpp"
"${LIBRARY_DIR}/src/scalar/p4_scalar_bitunpackd1_32.cpp"
"${LIBRARY_DIR}/src/scalar/p4_scalar_internal.cpp"
)

# Determine which SIMD features to use based on ClickHouse's CPU feature options
# TurboPFor-CPP requires: SSE4.2 + POPCNT for 128v32, AVX2 for 256v32
set(TURBOPFOR_USE_SSE42 OFF)
set(TURBOPFOR_USE_AVX2 OFF)

if (ARCH_AMD64)
if (ENABLE_SSE42 AND ENABLE_POPCNT)
set(TURBOPFOR_USE_SSE42 ON)
endif()
if (ENABLE_AVX2 AND ENABLE_SSE42 AND ENABLE_POPCNT)
set(TURBOPFOR_USE_AVX2 ON)
endif()
endif()

# 128v32 functions: use SIMD (SSE4.2) or scalar fallback
if (TURBOPFOR_USE_SSE42)
set(SSE42_SRCS
"${LIBRARY_DIR}/src/simd/p4enc128v32.cpp"
"${LIBRARY_DIR}/src/simd/p4d1dec128v32.cpp"
"${LIBRARY_DIR}/src/simd/p4_simd_internal.cpp"
"${LIBRARY_DIR}/src/simd/bitpack128v32_simd.cpp"
"${LIBRARY_DIR}/src/simd/bitunpack128v32_simd.cpp"
)
list(APPEND SRCS ${SSE42_SRCS})
else()
# Scalar fallback for 128v32
list(APPEND SRCS
"${LIBRARY_DIR}/src/scalar/bitpack128v32_scalar.cpp"
"${LIBRARY_DIR}/src/scalar/p4enc128v32_scalar.cpp"
"${LIBRARY_DIR}/src/scalar/p4d1dec128v32_scalar.cpp"
)
endif()

# 256v32 functions: use SIMD (AVX2) or scalar fallback
if (TURBOPFOR_USE_AVX2)
set(AVX2_SRCS
"${LIBRARY_DIR}/src/simd/p4enc256v32.cpp"
"${LIBRARY_DIR}/src/simd/p4d1dec256v32.cpp"
"${LIBRARY_DIR}/src/simd/p4_simd_internal_256v.cpp"
"${LIBRARY_DIR}/src/simd/bitpack256v32_simd.cpp"
"${LIBRARY_DIR}/src/simd/bitunpack256v32_simd.cpp"
)
list(APPEND SRCS ${AVX2_SRCS})
else()
# Scalar fallback for 256v32
list(APPEND SRCS
"${LIBRARY_DIR}/src/scalar/bitpack256v32_scalar.cpp"
"${LIBRARY_DIR}/src/scalar/p4enc256v32_scalar.cpp"
"${LIBRARY_DIR}/src/scalar/p4d1dec256v32_scalar.cpp"
)
endif()

add_library(_turbopfor_cpp ${SRCS})

target_include_directories(_turbopfor_cpp SYSTEM PUBLIC "${LIBRARY_DIR}/include")
target_include_directories(_turbopfor_cpp PRIVATE "${LIBRARY_DIR}/src")

# Set preprocessor definitions and per-file compile flags for SIMD sources
if (TURBOPFOR_USE_SSE42)
target_compile_definitions(_turbopfor_cpp PRIVATE ENABLE_SSE42)
endif()

if (TURBOPFOR_USE_AVX2)
target_compile_definitions(_turbopfor_cpp PRIVATE ENABLE_AVX2)
endif()

add_library(ch_contrib::turbopfor_cpp ALIAS _turbopfor_cpp)
478 changes: 478 additions & 0 deletions posting-list-layout-v2.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,12 @@ class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithC
const auto & index_columns = index.expression->getRequiredColumns();
add_key_columns(index_columns);
}

for (const auto & projection : metadata_snapshot->getProjections())
{
if (projection.index && projection.index->getIndexDescription())
add_key_columns(projection.getRequiredColumns());
}
}

void enterImpl(const ColumnNode & column_node)
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tabl
# Make dbms depend on roaring instead of clickhouse_common_io so that roaring itself can depend on clickhouse_common_io
# That way we we can redirect malloc/free functions avoiding circular dependencies
target_link_libraries(dbms PUBLIC ch_contrib::roaring)
target_link_libraries(dbms PUBLIC ch_contrib::turbopfor_cpp)
if (TARGET ch_contrib::fastpfor)
target_link_libraries(dbms PUBLIC ch_contrib::fastpfor)
endif()
Expand Down
6 changes: 6 additions & 0 deletions src/Common/ProfileEvents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@
M(TextIndexUsedEmbeddedPostings, "Number of times a posting list embedded in the dictionary has been used.", ValueType::Number) \
M(TextIndexUseHint, "Number of index granules where a direct reading from the text index was added as hint and was used.", ValueType::Number) \
M(TextIndexDiscardHint, "Number of index granules where a direct reading from the text index was added as hint and was discarded due to low selectivity.", ValueType::Number) \
M(TextIndexLazyPackedBlocksDecoded, "Number of packed blocks decoded via TurboPFor in the lazy posting list cursor path.", ValueType::Number) \
M(TextIndexLazyPackedBlocksSkipped, "Number of packed blocks skipped (arithmetic sequence) in the lazy posting list cursor path.", ValueType::Number) \
M(TextIndexLazySeekCount, "Number of seek calls on lazy posting list cursors.", ValueType::Number) \
M(TextIndexLazyLargeBlocksPrepared, "Number of large block Index Sections loaded by lazy posting list cursors.", ValueType::Number) \
M(TextIndexLazyBruteForceIntersections, "Number of times the brute-force intersection algorithm was chosen for lazy posting list cursors.", ValueType::Number) \
M(TextIndexLazyLeapfrogIntersections, "Number of times the leapfrog intersection algorithm was chosen for lazy posting list cursors.", ValueType::Number) \
M(QueryConditionCacheHits, "Number of times an entry has been found in the query condition cache (and reading of marks can be skipped). Only updated for SELECT queries with SETTING use_query_condition_cache = 1.", ValueType::Number) \
M(QueryConditionCacheMisses, "Number of times an entry has not been found in the query condition cache (and reading of mark cannot be skipped). Only updated for SELECT queries with SETTING use_query_condition_cache = 1.", ValueType::Number) \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
Expand Down
11 changes: 11 additions & 0 deletions src/Core/Settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7416,6 +7416,17 @@ Using the text index header cache can significantly reduce latency and increase
DECLARE(Bool, use_text_index_postings_cache, false, R"(
Whether to use a cache of deserialized text index posting lists.
Using the text index postings cache can significantly reduce latency and increase throughput when working with a large number of text index queries.
)", 0) \
DECLARE(String, text_index_posting_list_apply_mode, "materialize", R"(
Controls how posting lists are applied during projection text index queries.
'materialize' mode (default) eagerly materializes posting lists into Roaring bitmaps before performing set operations.
'lazy' mode uses lazy-decoding cursors with adaptive intersection/union algorithms (skip-list leapfrog for sparse, brute-force bitmap for dense), which can significantly reduce memory usage and improve performance for selective queries.
Note: 'lazy' mode requires V2 posting list format (with block index). V1 data always uses 'materialize' regardless of this setting.
)", 0) \
DECLARE(Float, text_index_density_threshold, 0.5, R"(
Density threshold for adaptive algorithm selection in lazy posting list mode.
When the minimum posting list density (cardinality / range_span) across all search terms meets or exceeds this threshold, the brute-force bitmap intersection algorithm is used. Below this threshold, the skip-list leapfrog algorithm is used instead.
Higher values favor leapfrog (better for sparse queries), lower values favor brute-force (better for dense queries).
)", 0) \
DECLARE(Bool, allow_experimental_window_view, false, R"(
Enable WINDOW VIEW. Not mature enough.
Expand Down
3 changes: 3 additions & 0 deletions src/Core/SettingsChangesHistory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
{"query_plan_read_in_order_through_join", false, true, "New setting"},
{"query_plan_max_limit_for_lazy_materialization", 10, 10000, "Increase the limit after performance improvement"},
{"text_index_hint_max_selectivity", 0.2, 0.2, "New setting"},
{"text_index_posting_list_apply_mode", "materialize", "materialize", "New setting to control how posting lists are applied during projection text index queries"},
{"text_index_density_threshold", 0.5, 0.5, "New setting for adaptive algorithm selection threshold in lazy posting list mode"},
{"allow_experimental_time_time64_type", false, true, "Enable Time and Time64 type by default"},
{"enable_time_time64_type", false, true, "Enable Time and Time64 type by default"},
{"use_skip_indexes_for_top_k", false, false, "New setting."},
Expand Down Expand Up @@ -1043,6 +1045,7 @@ const VersionToSettingsChangesMap & getMergeTreeSettingsChangesHistory()
{"distributed_index_analysis_min_parts_to_activate", 10, 10, "New setting"},
{"distributed_index_analysis_min_indexes_size_to_activate", 1_GiB, 1_GiB, "New setting"},
{"refresh_statistics_interval", 0, 300, "Enable statistics cache"},
{"compact_parts_flush_per_column", true, true, "New setting"},
});
addSettingsChanges(merge_tree_settings_changes_history, "26.1",
{
Expand Down
4 changes: 4 additions & 0 deletions src/Core/SortCursor.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,11 @@ class SortingQueueImpl
{
size_t size = queue.size();
if (size < 2)
{
if constexpr (strategy == SortingQueueStrategy::Batch)
updateBatchSize();
return;
}

auto begin = queue.begin();

Expand Down
4 changes: 4 additions & 0 deletions src/DataTypes/DataTypeFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,8 @@ const DataTypeFactory::Value * DataTypeFactory::findCreatorByName(const String &
throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown data type family: {}", family_name);
}

void registerDataTypePostingList(DataTypeFactory & factory);

DataTypeFactory::DataTypeFactory()
{
registerDataTypeNumbers(*this);
Expand Down Expand Up @@ -363,6 +365,8 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeVariant(*this);
registerDataTypeDynamic(*this);
registerDataTypeJSON(*this);

registerDataTypePostingList(*this);
}

DataTypeFactory & DataTypeFactory::instance()
Expand Down
11 changes: 11 additions & 0 deletions src/DataTypes/Serializations/ISerialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ struct NameAndTypePair;

struct MergeTreeSettings;

struct ProjectionIndexSerializationContext;
struct ProjectionIndexDeserializationContext;

/** Represents serialization of data type.
* Has methods to serialize/deserialize column in binary and several text formats.
* Every data type has default serialization, but can be serialized in different representations.
Expand Down Expand Up @@ -395,6 +398,10 @@ class ISerialization : private boost::noncopyable, public std::enable_shared_fro
/// Type of MergeTree data part we serialize data from if any.
/// Some serializations may differ from type part for more optimal deserialization.
MergeTreeDataPartType data_part_type = MergeTreeDataPartType::Unknown;

/// Optional context for projection index–driven serialization. Provides query- and index-specific information
/// required during serialization, such as additional streams for large postings and part–level metadata.
const ProjectionIndexSerializationContext * projection_index_context = nullptr;
};

struct DeserializeBinaryBulkSettings
Expand Down Expand Up @@ -457,6 +464,10 @@ class ISerialization : private boost::noncopyable, public std::enable_shared_fro
/// If true, call release_stream on all streams used in the prefixes deserialization
/// even for streams that will be used later for data deserialization.
bool release_all_prefixes_streams = false;

/// Optional context for projection index driven deserialization. Contains query- and index-specific information
/// (e.g. row id remapping, row range limits) that affects how data is deserialized and filtered.
const ProjectionIndexDeserializationContext * projection_index_context = nullptr;
};

/// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark.
Expand Down
13 changes: 12 additions & 1 deletion src/Interpreters/InterpreterCreateQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,11 +784,22 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
}

if (create.columns_list->projections)
{
for (const auto & projection_ast : create.columns_list->projections->children)
{
auto projection = ProjectionDescription::getProjectionFromAST(projection_ast, properties.columns, getContext());
auto projection = ProjectionDescription::getProjectionFromAST(projection_ast, properties.columns, getContext(), mode);

const auto & settings = getContext()->getSettingsRef();
if (projection.index && projection.index->getName() == TEXT_INDEX_NAME && !settings[Setting::enable_full_text_index])
{
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"The text index feature is disabled. Enable the setting 'enable_full_text_index' to use it");
}

properties.projections.add(std::move(projection));
}
}

properties.constraints = getConstraintsDescription(create.columns_list->constraints, properties.columns, getContext());
}
Expand Down
4 changes: 2 additions & 2 deletions src/Parsers/IAST.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class IAST : public TypePromotion<IAST>
"Derived RESERVED_BITS must be greater than parent's");
}

return *reinterpret_cast<BitfieldStruct *>(&flags_storage);
return *std::launder(reinterpret_cast<BitfieldStruct *>(&flags_storage));
}

template <typename BitfieldStruct>
Expand All @@ -94,7 +94,7 @@ class IAST : public TypePromotion<IAST>
"Derived RESERVED_BITS must be greater than parent's");
}

return *reinterpret_cast<const BitfieldStruct *>(&flags_storage);
return *std::launder(reinterpret_cast<const BitfieldStruct *>(&flags_storage));
}

UInt32 use_count() const noexcept { return ref_counter.load(std::memory_order_relaxed); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,10 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
std::vector<const ProjectionDescription *> agg_projections;

for (const auto & projection : projections)
if (projection.type == ProjectionDescription::Type::Aggregate)
{
if (projection.type == ProjectionDescription::Type::Aggregate && !projection.index)
agg_projections.push_back(&projection);
}

bool can_use_minmax_projection = allow_implicit_projections && metadata->minmax_count_projection
&& !reading.getMergeTreeData().has_lightweight_delete_parts.load();
Expand Down Expand Up @@ -457,8 +459,10 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(QueryPlan::Node &
std::vector<const ProjectionDescription *> agg_projections;

for (const auto & projection : projections)
if (projection.type == ProjectionDescription::Type::Aggregate)
{
if (projection.type == ProjectionDescription::Type::Aggregate && !projection.index)
agg_projections.push_back(&projection);
}

AggregateProjectionCandidates candidates;

Expand Down
21 changes: 18 additions & 3 deletions src/Processors/QueryPlan/ReadFromMergeTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1889,9 +1889,6 @@ void ReadFromMergeTree::buildIndexes(

const auto & all_indexes = metadata_snapshot->getSecondaryIndices();

if (all_indexes.empty())
return;

std::unordered_set<std::string> ignored_index_names;

if (settings[Setting::ignore_data_skipping_indices].changed)
Expand Down Expand Up @@ -1961,6 +1958,24 @@ void ReadFromMergeTree::buildIndexes(
}
}

if (filter_dag.predicate)
{
const auto & all_projections = metadata_snapshot->getProjections();
for (const auto & projection : all_projections)
{
if (projection.index)
{
auto index_helper = projection.index->getIndex();
if (index_helper)
{
auto condition = index_helper->createIndexCondition(filter_dag.predicate, query_context);
if (!condition->alwaysUnknownOrTrue())
skip_indexes.useful_indices.emplace_back(index_helper, condition);
}
}
}
}

indexes->use_skip_indexes_for_disjunctions = settings[Setting::use_skip_indexes_for_disjunctions]
&& skip_indexes.useful_indices.size() > 1
&& !indexes->key_condition_rpn_template->hasOnlyConjunctions();
Expand Down
18 changes: 15 additions & 3 deletions src/Storages/AlterCommands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
}
else if (type == ADD_PROJECTION)
{
auto projection = ProjectionDescription::getProjectionFromAST(projection_decl, metadata.columns, context);
auto projection
= ProjectionDescription::getProjectionFromAST(projection_decl, metadata.columns, context, LoadingStrictnessLevel::CREATE);
metadata.projections.add(std::move(projection), after_projection_name, first, if_not_exists);
}
else if (type == DROP_PROJECTION)
Expand Down Expand Up @@ -1219,6 +1220,12 @@ bool AlterCommands::hasTextIndex(const StorageInMemoryMetadata & metadata)
if (index.type == TEXT_INDEX_NAME)
return true;
}

for (const auto & projection : metadata.projections)
{
if (projection.index && projection.index->getName() == TEXT_INDEX_NAME)
return true;
}
return false;
}

Expand Down Expand Up @@ -1296,13 +1303,18 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context
try
{
/// Check if we can still build projection from new metadata.
auto new_projection = ProjectionDescription::getProjectionFromAST(projection.definition_ast, metadata_copy.columns, context);
auto new_projection = ProjectionDescription::getProjectionFromAST(
projection.definition_ast, metadata_copy.columns, context, LoadingStrictnessLevel::CREATE);
/// Check if new metadata has the same keys as the old one.
if (!blocksHaveEqualStructure(projection.sample_block_for_keys, new_projection.sample_block_for_keys))
throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "Cannot ALTER column");
/// Check if new metadata is convertible from old metadata for projection.
Block old_projection_block = projection.sample_block;
performRequiredConversions(old_projection_block, new_projection.sample_block.getNamesAndTypesList(), context, metadata_copy.getColumns().getDefaults());
performRequiredConversions(
old_projection_block,
new_projection.sample_block.getNamesAndTypesList(),
context,
metadata_copy.getColumns().getDefaults());
new_projections.add(std::move(new_projection));
}
catch (const Exception & exception)
Expand Down
Loading