From 4650d36d6707aa9a6ad379d16dfe033b85adba7e Mon Sep 17 00:00:00 2001 From: Jiacai Liu Date: Fri, 13 Dec 2024 15:16:58 +0800 Subject: [PATCH] chore: move old engine into analytic-engine branch (#1607) ## Rationale To maintain system stability and code clarity, it would be advisable to relocate the legacy engine to a separate branch for dedicated maintenance, preventing potential complications from mixing old and new engine implementations. ## Detailed Changes ## Test Plan CI --- .asf.yaml | 7 +- .github/actions-rs/gcrov.yml | 20 - .github/codecov.yml | 24 - .github/workflows/ci.yml | 211 +- .github/workflows/coverage.yml | 68 - .github/workflows/meta-ci.yml | 71 - .github/workflows/metric-engine-ci.yml | 99 - .github/workflows/publish-image.yml | 72 - .github/workflows/publish-nightly-image.yml | 57 - .github/workflows/test-build-image.yml | 67 - .github/workflows/tsbs.yml | 65 - .gitmodules | 3 - Cargo.lock | 9398 +++-------------- Cargo.toml | 196 +- Dockerfile | 64 - Makefile | 106 +- README-CN.md | 89 - README.md | 79 +- docker/basic.sh | 56 - docker/datasource.yml | 31 - docker/docker-compose.yaml | 63 - docker/entrypoint.sh | 39 - docs/example-cluster-0.toml | 59 - docs/example-cluster-1.toml | 60 - docs/example-standalone-static-routing.toml | 93 - docs/minimal.toml | 39 - horaectl/Cargo.toml | 39 - horaectl/src/cmd/cluster.rs | 67 - horaectl/src/cmd/mod.rs | 140 - horaectl/src/main.rs | 54 - horaectl/src/operation/cluster.rs | 147 - horaectl/src/operation/mod.rs | 80 - horaectl/src/util/mod.rs | 60 - horaedb/Cargo.lock | 3019 ------ horaedb/Cargo.toml | 68 - horaedb/Makefile | 35 - horaedb/rust-toolchain.toml | 20 - horaedb/server/Cargo.toml | 38 - horaemeta/.golangci.yml | 62 - horaemeta/CONTRIBUTING.md | 77 - horaemeta/DEPENDENCIES.csv | 77 - horaemeta/Dockerfile | 53 - horaemeta/Makefile | 48 - horaemeta/README.md | 51 - horaemeta/cmd/horaemeta-server/main.go | 132 - horaemeta/config/example-cluster0.toml | 34 - horaemeta/config/example-cluster1.toml | 34 - horaemeta/config/example-cluster2.toml | 34 - horaemeta/config/example-standalone.toml | 32 - horaemeta/docker/entrypoint.sh | 34 - horaemeta/docs/style_guide.md | 111 - horaemeta/go.mod | 100 - horaemeta/go.sum | 308 - horaemeta/pkg/assert/assert.go | 35 - horaemeta/pkg/coderr/code.go | 52 - horaemeta/pkg/coderr/error.go | 106 - horaemeta/pkg/log/config.go | 61 - horaemeta/pkg/log/global.go | 66 - horaemeta/pkg/log/log.go | 76 - horaemeta/server/cluster/cluster.go | 127 - horaemeta/server/cluster/manager.go | 528 - horaemeta/server/cluster/manager_test.go | 221 - .../cluster/metadata/cluster_metadata.go | 815 -- .../cluster/metadata/cluster_metadata_test.go | 235 - .../metadata/compare_benchmark_test.go | 95 - horaemeta/server/cluster/metadata/error.go | 39 - .../server/cluster/metadata/table_manager.go | 375 - .../cluster/metadata/table_manager_test.go | 94 - .../cluster/metadata/topology_manager.go | 691 -- .../cluster/metadata/topology_manager_test.go | 151 - horaemeta/server/cluster/metadata/types.go | 215 - horaemeta/server/config/config.go | 402 - horaemeta/server/config/error.go | 31 - horaemeta/server/config/util.go | 41 - horaemeta/server/coordinator/error.go | 27 - .../coordinator/eventdispatch/dispatch.go | 71 - .../eventdispatch/dispatch_impl.go | 199 - horaemeta/server/coordinator/factory.go | 302 - horaemeta/server/coordinator/factory_test.go | 171 - .../coordinator/inspector/node_inspector.go | 147 - .../inspector/node_inspector_test.go | 145 - .../server/coordinator/lock/entry_lock.go | 71 - .../coordinator/lock/entry_lock_test.go | 59 - .../coordinator/persist_shard_picker.go | 80 - .../coordinator/persist_shard_picker_test.go | 87 - .../coordinator/procedure/ddl/common_util.go | 173 - .../create_partition_table.go | 387 - .../create_partition_table_test.go | 94 - .../procedure/ddl/createtable/create_table.go | 329 - .../ddl/createtable/create_table_test.go | 69 - .../create_drop_partition_table_test.go | 193 - .../drop_partition_table.go | 424 - .../ddl/droptable/create_drop_table_test.go | 154 - .../procedure/ddl/droptable/drop_table.go | 297 - .../coordinator/procedure/delay_queue.go | 146 - .../coordinator/procedure/delay_queue_test.go | 110 - .../server/coordinator/procedure/error.go | 46 - .../server/coordinator/procedure/manager.go | 37 - .../coordinator/procedure/manager_impl.go | 259 - .../coordinator/procedure/manager_test.go | 177 - .../procedure/operation/split/split.go | 370 - .../procedure/operation/split/split_test.go | 100 - .../transferleader/batch_transfer_leader.go | 154 - .../batch_transfer_leader_test.go | 123 - .../transferleader/transfer_leader.go | 295 - .../transferleader/trasnfer_leader_test.go | 61 - .../server/coordinator/procedure/procedure.go | 104 - .../server/coordinator/procedure/storage.go | 43 - .../coordinator/procedure/storage_impl.go | 198 - .../coordinator/procedure/storage_test.go | 124 - .../coordinator/procedure/test/common.go | 283 - .../server/coordinator/procedure/util.go | 51 - .../coordinator/scheduler/manager/error.go | 24 - .../scheduler/manager/scheduler_manager.go | 333 - .../manager/scheduler_manager_test.go | 74 - .../coordinator/scheduler/nodepicker/error.go | 24 - .../nodepicker/hash/consistent_uniform.go | 380 - .../hash/consistent_uniform_test.go | 311 - .../scheduler/nodepicker/node_picker.go | 131 - .../scheduler/nodepicker/node_picker_test.go | 215 - .../scheduler/rebalanced/scheduler.go | 230 - .../scheduler/rebalanced/scheduler_test.go | 59 - .../coordinator/scheduler/reopen/scheduler.go | 127 - .../scheduler/reopen/scheduler_test.go | 72 - .../server/coordinator/scheduler/scheduler.go | 55 - .../coordinator/scheduler/static/error.go | 24 - .../coordinator/scheduler/static/scheduler.go | 188 - .../scheduler/static/scheduler_test.go | 61 - horaemeta/server/coordinator/shard_picker.go | 82 - .../server/coordinator/shard_picker_test.go | 125 - horaemeta/server/coordinator/watch/watch.go | 224 - .../server/coordinator/watch/watch_test.go | 93 - horaemeta/server/error.go | 30 - horaemeta/server/etcdutil/config.go | 85 - horaemeta/server/etcdutil/error.go | 28 - horaemeta/server/etcdutil/get_leader.go | 36 - horaemeta/server/etcdutil/util.go | 155 - horaemeta/server/etcdutil/util_test.go | 146 - horaemeta/server/id/error.go | 29 - horaemeta/server/id/id.go | 31 - horaemeta/server/id/id_impl.go | 179 - horaemeta/server/id/id_test.go | 61 - horaemeta/server/id/reusable_id_impl.go | 111 - horaemeta/server/id/reusable_id_test.go | 90 - horaemeta/server/limiter/limiter.go | 78 - horaemeta/server/limiter/limiter_test.go | 74 - horaemeta/server/member/error.go | 34 - horaemeta/server/member/lease.go | 233 - horaemeta/server/member/member.go | 286 - horaemeta/server/member/watch_leader.go | 199 - horaemeta/server/member/watch_leader_test.go | 92 - horaemeta/server/server.go | 416 - horaemeta/server/service/grpc/error.go | 32 - horaemeta/server/service/grpc/forward.go | 81 - horaemeta/server/service/grpc/service.go | 458 - horaemeta/server/service/http/api.go | 777 -- horaemeta/server/service/http/error.go | 49 - horaemeta/server/service/http/etcd_api.go | 204 - horaemeta/server/service/http/forward.go | 133 - horaemeta/server/service/http/route.go | 131 - horaemeta/server/service/http/service.go | 63 - horaemeta/server/service/http/types.go | 170 - horaemeta/server/service/util.go | 55 - horaemeta/server/status/status.go | 52 - horaemeta/server/storage/error.go | 37 - horaemeta/server/storage/key_path.go | 147 - horaemeta/server/storage/meta.go | 83 - horaemeta/server/storage/storage_impl.go | 661 -- horaemeta/server/storage/storage_test.go | 359 - horaemeta/server/storage/types.go | 605 -- integration_tests/.gitignore | 1 - integration_tests/Cargo.toml | 39 - integration_tests/Makefile | 129 - integration_tests/README.md | 41 - integration_tests/build_meta.sh | 36 - integration_tests/cases/common/basic.result | 105 - integration_tests/cases/common/basic.sql | 71 - .../cases/common/dml/case_sensitive.result | 121 - .../cases/common/dml/case_sensitive.sql | 72 - .../cases/common/dml/insert_mode.result | 235 - .../cases/common/dml/insert_mode.sql | 161 - .../cases/common/dml/issue-1087.result | 122 - .../cases/common/dml/issue-1087.sql | 31 - .../cases/common/dml/issue-302.result | 40 - .../cases/common/dml/issue-302.sql | 28 - .../cases/common/dml/issue-341.result | 161 - .../cases/common/dml/issue-341.sql | 111 - .../cases/common/dml/issue-59.result | 52 - .../cases/common/dml/issue-59.sql | 39 - .../cases/common/dml/issue-637.result | 92 - .../cases/common/dml/issue-637.sql | 71 - .../cases/common/dml/select_filter.result | 81 - .../cases/common/dml/select_filter.sql | 62 - .../cases/common/dml/select_having.result | 80 - .../cases/common/dml/select_having.sql | 65 - .../cases/common/dml/select_order.result | 81 - .../cases/common/dml/select_order.sql | 58 - .../cases/common/dummy/select_1.result | 81 - .../cases/common/dummy/select_1.sql | 41 - .../cases/common/explain/explain.result | 37 - .../cases/common/explain/explain.sql | 26 - .../cases/common/function/aggregate.result | 172 - .../cases/common/function/aggregate.sql | 107 - .../cases/common/function/date_bin.result | 81 - .../cases/common/function/date_bin.sql | 44 - .../function/thetasketch_distinct.result | 467 - .../common/function/thetasketch_distinct.sql | 450 - .../cases/common/function/time_bucket.result | 161 - .../cases/common/function/time_bucket.sql | 56 - .../cases/common/optimizer/optimizer.result | 37 - .../cases/common/optimizer/optimizer.sql | 26 - .../common/show/show_create_table.result | 72 - .../cases/common/show/show_create_table.sql | 35 - integration_tests/cases/env/cluster/common | 1 - .../cases/env/cluster/ddl/alter_table.result | 129 - .../cases/env/cluster/ddl/alter_table.sql | 53 - .../env/cluster/ddl/create_tables.result | 338 - .../cases/env/cluster/ddl/create_tables.sql | 124 - .../env/cluster/ddl/partition_table.result | 252 - .../cases/env/cluster/ddl/partition_table.sql | 143 - .../compaction_offload/compact/compact.result | 110 - .../compaction_offload/compact/compact.sql | 76 - integration_tests/cases/env/local/common | 1 - .../cases/env/local/ddl/alter_table.result | 133 - .../cases/env/local/ddl/alter_table.sql | 54 - .../cases/env/local/ddl/create_tables.result | 362 - .../cases/env/local/ddl/create_tables.sql | 133 - .../cases/env/local/ddl/query-plan.result | 199 - .../cases/env/local/ddl/query-plan.sql | 136 - .../env/local/ddl/sampling-primary-key.result | 82 - .../env/local/ddl/sampling-primary-key.sql | 53 - .../env/local/dml/insert_into_select.result | 83 - .../env/local/dml/insert_into_select.sql | 59 - .../cases/env/local/influxql/basic.result | 95 - .../cases/env/local/influxql/basic.sql | 74 - .../cases/env/local/opentsdb/basic.result | 106 - .../cases/env/local/opentsdb/basic.sql | 93 - .../env/local/system/system_tables.result | 51 - .../cases/env/local/system/system_tables.sql | 47 - .../config/compaction-offload.toml | 44 - .../config/horaedb-cluster-0.toml | 59 - .../config/horaedb-cluster-1.toml | 60 - integration_tests/config/horaemeta.toml | 32 - .../config/shard-based-recovery.toml | 38 - integration_tests/dist_query/diff.py | 64 - integration_tests/dist_query/run.sh | 98 - integration_tests/mysql/basic.sh | 30 - integration_tests/opentsdb/run-tests.sh | 20 - integration_tests/opentsdb/test-put.py | 367 - integration_tests/postgresql/basic.sh | 32 - integration_tests/prom/prometheus.yml | 20 - integration_tests/prom/remote-query.py | 114 - integration_tests/prom/run-tests.sh | 27 - integration_tests/recovery/check.py | 101 - integration_tests/recovery/run.sh | 52 - integration_tests/sdk/go/alteraddcolumn.go | 212 - integration_tests/sdk/go/autocreatetable.go | 41 - integration_tests/sdk/go/go.mod | 43 - integration_tests/sdk/go/go.sum | 230 - integration_tests/sdk/go/issue-779.go | 47 - integration_tests/sdk/go/main.go | 67 - integration_tests/sdk/go/util.go | 171 - integration_tests/sdk/java/pom.xml | 92 - .../java/src/main/java/io/ceresdb/App.java | 122 - .../sdk/java/src/main/resources/log4j2.xml | 33 - integration_tests/sdk/rust/Cargo.toml | 32 - integration_tests/sdk/rust/src/main.rs | 353 - integration_tests/src/database.rs | 489 - integration_tests/src/main.rs | 119 - rust-toolchain.toml | 2 +- rustfmt.toml | 4 - scripts/run-tsbs.sh | 128 - src/analytic_engine/Cargo.toml | 107 - .../src/compaction/compactor.rs | 268 - src/analytic_engine/src/compaction/metrics.rs | 30 - src/analytic_engine/src/compaction/mod.rs | 682 -- src/analytic_engine/src/compaction/picker.rs | 1087 -- .../src/compaction/runner/local_runner.rs | 318 - .../src/compaction/runner/mod.rs | 377 - .../src/compaction/runner/node_picker.rs | 88 - .../src/compaction/runner/remote_client.rs | 148 - .../src/compaction/runner/remote_runner.rs | 116 - .../src/compaction/scheduler.rs | 822 -- src/analytic_engine/src/context.rs | 44 - src/analytic_engine/src/engine.rs | 381 - src/analytic_engine/src/error.rs | 23 - src/analytic_engine/src/instance/alter.rs | 308 - src/analytic_engine/src/instance/close.rs | 107 - src/analytic_engine/src/instance/create.rs | 133 - src/analytic_engine/src/instance/drop.rs | 110 - src/analytic_engine/src/instance/engine.rs | 474 - .../src/instance/flush_compaction.rs | 993 -- .../src/instance/mem_collector.rs | 138 - src/analytic_engine/src/instance/mod.rs | 401 - src/analytic_engine/src/instance/open.rs | 558 - src/analytic_engine/src/instance/read.rs | 453 - .../src/instance/reorder_memtable.rs | 297 - .../src/instance/serial_executor.rs | 261 - .../src/instance/wal_replayer.rs | 716 -- src/analytic_engine/src/instance/write.rs | 831 -- src/analytic_engine/src/lib.rs | 238 - src/analytic_engine/src/manifest/details.rs | 1544 --- src/analytic_engine/src/manifest/error.rs | 47 - src/analytic_engine/src/manifest/meta_edit.rs | 495 - .../src/manifest/meta_snapshot.rs | 109 - src/analytic_engine/src/manifest/mod.rs | 59 - .../src/memtable/columnar/factory.rs | 53 - .../src/memtable/columnar/iter.rs | 399 - .../src/memtable/columnar/mod.rs | 233 - src/analytic_engine/src/memtable/error.rs | 51 - src/analytic_engine/src/memtable/factory.rs | 47 - src/analytic_engine/src/memtable/key.rs | 232 - .../src/memtable/layered/factory.rs | 54 - .../src/memtable/layered/iter.rs | 120 - .../src/memtable/layered/mod.rs | 724 -- src/analytic_engine/src/memtable/mod.rs | 277 - .../src/memtable/reversed_iter.rs | 82 - .../src/memtable/skiplist/factory.rs | 47 - .../src/memtable/skiplist/iter.rs | 320 - .../src/memtable/skiplist/mod.rs | 526 - src/analytic_engine/src/memtable/test_util.rs | 45 - src/analytic_engine/src/payload.rs | 338 - .../src/prefetchable_stream.rs | 187 - src/analytic_engine/src/row_iter/chain.rs | 499 - src/analytic_engine/src/row_iter/dedup.rs | 264 - src/analytic_engine/src/row_iter/merge.rs | 1036 -- src/analytic_engine/src/row_iter/mod.rs | 61 - .../src/row_iter/record_batch_stream.rs | 400 - src/analytic_engine/src/row_iter/tests.rs | 121 - src/analytic_engine/src/sampler.rs | 618 -- src/analytic_engine/src/setup.rs | 284 - src/analytic_engine/src/space.rs | 271 - src/analytic_engine/src/sst/factory.rs | 303 - src/analytic_engine/src/sst/file.rs | 740 -- src/analytic_engine/src/sst/header.rs | 81 - src/analytic_engine/src/sst/manager.rs | 175 - .../src/sst/meta_data/cache.rs | 351 - .../src/sst/meta_data/metadata_reader.rs | 148 - src/analytic_engine/src/sst/meta_data/mod.rs | 222 - src/analytic_engine/src/sst/metrics.rs | 101 - src/analytic_engine/src/sst/mod.rs | 28 - .../src/sst/parquet/async_reader.rs | 922 -- .../src/sst/parquet/encoding.rs | 431 - .../src/sst/parquet/meta_data/filter.rs | 372 - .../src/sst/parquet/meta_data/mod.rs | 247 - src/analytic_engine/src/sst/parquet/mod.rs | 26 - .../src/sst/parquet/row_group_pruner.rs | 415 - src/analytic_engine/src/sst/parquet/writer.rs | 1033 -- src/analytic_engine/src/sst/reader.rs | 136 - src/analytic_engine/src/sst/writer.rs | 272 - src/analytic_engine/src/table/data.rs | 1184 --- src/analytic_engine/src/table/metrics.rs | 481 - src/analytic_engine/src/table/mod.rs | 758 -- src/analytic_engine/src/table/sst_util.rs | 47 - src/analytic_engine/src/table/version.rs | 1237 --- src/analytic_engine/src/table/version_edit.rs | 208 - .../src/table_meta_set_impl.rs | 383 - src/analytic_engine/src/table_options.rs | 921 -- src/analytic_engine/src/tests/alter_test.rs | 514 - .../src/tests/compaction_test.rs | 118 - src/analytic_engine/src/tests/drop_test.rs | 356 - src/analytic_engine/src/tests/mod.rs | 32 - src/analytic_engine/src/tests/open_test.rs | 46 - .../src/tests/read_write_test.rs | 697 -- src/analytic_engine/src/tests/row_util.rs | 108 - src/analytic_engine/src/tests/table.rs | 362 - src/analytic_engine/src/tests/util.rs | 745 -- src/benchmarks/Cargo.toml | 74 - src/benchmarks/README.md | 25 - src/benchmarks/bench.toml | 70 - src/benchmarks/benches/bench.rs | 242 - src/benchmarks/config/bench.toml | 78 - src/benchmarks/config/sst.toml | 45 - src/benchmarks/src/bin/parquet-reader.rs | 44 - src/benchmarks/src/bin/sst-tools.rs | 84 - src/benchmarks/src/config.rs | 157 - src/benchmarks/src/lib.rs | 34 - src/benchmarks/src/merge_memtable_bench.rs | 232 - src/benchmarks/src/merge_sst_bench.rs | 256 - src/benchmarks/src/parquet_bench.rs | 169 - src/benchmarks/src/replay_bench.rs | 97 - src/benchmarks/src/scan_memtable_bench.rs | 130 - src/benchmarks/src/sst_bench.rs | 148 - src/benchmarks/src/sst_tools.rs | 324 - src/benchmarks/src/table.rs | 246 - src/benchmarks/src/util.rs | 670 -- src/benchmarks/src/wal_write_bench.rs | 104 - src/catalog/Cargo.toml | 45 - src/catalog/src/consts.rs | 32 - src/catalog/src/lib.rs | 100 - src/catalog/src/manager.rs | 58 - src/catalog/src/schema.rs | 413 - src/catalog/src/table_operator.rs | 291 - src/catalog/src/test_util.rs | 172 - src/catalog_impls/Cargo.toml | 48 - src/catalog_impls/src/cluster_based.rs | 118 - src/catalog_impls/src/lib.rs | 73 - src/catalog_impls/src/system_tables.rs | 155 - src/catalog_impls/src/table_based.rs | 1175 --- src/catalog_impls/src/volatile.rs | 430 - src/cluster/Cargo.toml | 53 - src/cluster/src/cluster_impl.rs | 470 - src/cluster/src/config.rs | 141 - src/cluster/src/lib.rs | 223 - src/cluster/src/shard_lock_manager.rs | 908 -- src/cluster/src/shard_operation.rs | 53 - src/cluster/src/shard_operator.rs | 453 - src/cluster/src/shard_set.rs | 367 - src/cluster/src/topology.rs | 210 - src/common_types/Cargo.toml | 54 - src/common_types/src/bitset.rs | 612 -- src/common_types/src/cluster.rs | 26 - src/common_types/src/column.rs | 396 - src/common_types/src/column_block.rs | 1370 --- src/common_types/src/column_schema.rs | 690 -- src/common_types/src/datum.rs | 1834 ---- src/common_types/src/hex.rs | 76 - src/common_types/src/lib.rs | 61 - src/common_types/src/projected_schema.rs | 475 - src/common_types/src/record_batch.rs | 829 -- src/common_types/src/request_id.rs | 73 - src/common_types/src/row/bitset.rs | 270 - src/common_types/src/row/contiguous.rs | 832 -- src/common_types/src/row/mod.rs | 650 -- src/common_types/src/schema.rs | 1852 ---- src/common_types/src/string.rs | 121 - src/common_types/src/table.rs | 23 - src/common_types/src/tests.rs | 448 - src/common_types/src/time.rs | 411 - src/components/alloc_tracker/Cargo.toml | 31 - src/components/alloc_tracker/src/lib.rs | 174 - src/components/arena/Cargo.toml | 33 - src/components/arena/src/arena_trait.rs | 88 - src/components/arena/src/lib.rs | 24 - src/components/arena/src/mono_inc.rs | 367 - src/components/arrow_ext/Cargo.toml | 37 - src/components/arrow_ext/src/ipc.rs | 438 - src/components/arrow_ext/src/lib.rs | 20 - src/components/arrow_ext/src/operation.rs | 96 - src/components/bytes_ext/Cargo.toml | 35 - src/components/bytes_ext/src/lib.rs | 311 - src/components/codec/Cargo.toml | 39 - src/components/codec/src/columnar/bool.rs | 248 - src/components/codec/src/columnar/bytes.rs | 265 - src/components/codec/src/columnar/mod.rs | 840 -- src/components/codec/src/columnar/number.rs | 159 - .../codec/src/columnar/timestamp.rs | 109 - src/components/codec/src/compact/bytes.rs | 143 - src/components/codec/src/compact/datum.rs | 294 - src/components/codec/src/compact/float.rs | 116 - src/components/codec/src/compact/mod.rs | 111 - src/components/codec/src/compact/number.rs | 175 - src/components/codec/src/consts.rs | 36 - src/components/codec/src/lib.rs | 58 - .../codec/src/memcomparable/bytes.rs | 294 - .../codec/src/memcomparable/datum.rs | 332 - src/components/codec/src/memcomparable/mod.rs | 115 - .../codec/src/memcomparable/number.rs | 348 - src/components/codec/src/row/mod.rs | 247 - src/components/codec/src/varint.rs | 226 - src/components/future_ext/Cargo.toml | 39 - src/components/future_ext/src/cancel.rs | 190 - src/components/future_ext/src/lib.rs | 24 - src/components/future_ext/src/retry.rs | 223 - src/components/generic_error/Cargo.toml | 31 - src/components/generic_error/src/lib.rs | 34 - src/components/hash_ext/Cargo.toml | 37 - src/components/hash_ext/src/lib.rs | 100 - src/components/id_allocator/Cargo.toml | 36 - src/components/id_allocator/src/lib.rs | 121 - src/components/logger/Cargo.toml | 45 - src/components/logger/src/lib.rs | 614 -- src/components/macros/Cargo.toml | 31 - src/components/macros/src/lib.rs | 84 - src/components/message_queue/Cargo.toml | 48 - .../message_queue/src/kafka/config.rs | 119 - .../message_queue/src/kafka/kafka_impl.rs | 467 - src/components/message_queue/src/kafka/mod.rs | 21 - src/components/message_queue/src/lib.rs | 121 - .../message_queue/src/tests/cases.rs | 258 - src/components/message_queue/src/tests/mod.rs | 22 - .../message_queue/src/tests/util.rs | 51 - src/components/metric_ext/Cargo.toml | 38 - src/components/metric_ext/src/lib.rs | 280 - src/components/notifier/Cargo.toml | 34 - src/components/notifier/src/lib.rs | 18 - src/components/notifier/src/notifier.rs | 126 - src/components/object_store/Cargo.toml | 73 - src/components/object_store/src/aliyun.rs | 81 - src/components/object_store/src/config.rs | 157 - src/components/object_store/src/disk_cache.rs | 1558 --- src/components/object_store/src/lib.rs | 40 - src/components/object_store/src/local_file.rs | 44 - src/components/object_store/src/mem_cache.rs | 415 - src/components/object_store/src/metrics.rs | 429 - src/components/object_store/src/multi_part.rs | 224 - src/components/object_store/src/prefix.rs | 444 - src/components/object_store/src/s3.rs | 54 - src/components/object_store/src/test_util.rs | 207 - src/components/panic_ext/Cargo.toml | 46 - src/components/panic_ext/src/lib.rs | 175 - src/components/parquet-testing | 1 - src/components/parquet_ext/Cargo.toml | 44 - src/components/parquet_ext/src/lib.rs | 27 - src/components/parquet_ext/src/meta_data.rs | 104 - src/components/parquet_ext/src/prune/equal.rs | 503 - .../parquet_ext/src/prune/min_max.rs | 337 - src/components/parquet_ext/src/prune/mod.rs | 19 - src/components/parquet_ext/src/reader.rs | 138 - src/components/parquet_ext/src/tests.rs | 131 - src/components/partitioned_lock/Cargo.toml | 37 - src/components/partitioned_lock/src/lib.rs | 426 - src/components/profile/Cargo.toml | 41 - src/components/profile/src/lib.rs | 186 - src/components/runtime/Cargo.toml | 42 - src/components/runtime/src/lib.rs | 327 - src/components/runtime/src/metrics.rs | 72 - .../runtime/src/priority_runtime.rs | 101 - src/components/sampling_cache/Cargo.toml | 34 - src/components/sampling_cache/src/lib.rs | 126 - src/components/size_ext/Cargo.toml | 37 - src/components/size_ext/src/lib.rs | 297 - src/components/skiplist/Cargo.toml | 25 - src/components/skiplist/benches/bench.rs | 179 - src/components/skiplist/src/key.rs | 68 - src/components/skiplist/src/lib.rs | 19 - src/components/skiplist/src/list.rs | 702 -- src/components/skiplist/src/slice.rs | 89 - src/components/skiplist/tests/tests.rs | 259 - src/components/system_stats/Cargo.toml | 33 - src/components/system_stats/src/lib.rs | 157 - src/components/table_kv/Cargo.toml | 44 - src/components/table_kv/src/config.rs | 238 - src/components/table_kv/src/lib.rs | 221 - src/components/table_kv/src/memory.rs | 355 - src/components/table_kv/src/metrics.rs | 30 - src/components/table_kv/src/obkv.rs | 853 -- src/components/table_kv/src/obkv/tests.rs | 553 - src/components/table_kv/src/tests.rs | 564 - src/components/test_util/Cargo.toml | 38 - src/components/test_util/src/lib.rs | 61 - src/components/time_ext/Cargo.toml | 44 - src/components/time_ext/src/lib.rs | 550 - src/components/timed_task/Cargo.toml | 36 - src/components/timed_task/src/lib.rs | 164 - src/components/toml_ext/Cargo.toml | 40 - src/components/toml_ext/src/lib.rs | 120 - src/components/trace_metric/Cargo.toml | 34 - src/components/trace_metric/src/collector.rs | 197 - src/components/trace_metric/src/lib.rs | 23 - src/components/trace_metric/src/metric.rs | 114 - src/components/trace_metric_derive/Cargo.toml | 39 - .../trace_metric_derive/src/builder.rs | 252 - src/components/trace_metric_derive/src/lib.rs | 29 - .../trace_metric_derive_tests/Cargo.toml | 34 - .../trace_metric_derive_tests/src/lib.rs | 74 - src/components/tracing_util/Cargo.toml | 21 - src/components/tracing_util/src/lib.rs | 7 - src/components/tracing_util/src/logging.rs | 178 - src/df_engine_extensions/Cargo.toml | 54 - src/df_engine_extensions/src/codec.rs | 113 - .../src/dist_sql_query/codec.rs | 109 - .../src/dist_sql_query/mod.rs | 185 - .../src/dist_sql_query/physical_plan.rs | 769 -- .../src/dist_sql_query/resolver.rs | 362 - ...query__resolver__test__aggr_push_down.snap | 15 - ...esolver__test__basic_partitioned_scan.snap | 16 - ...query__resolver__test__basic_sub_scan.snap | 9 - ...lver__test__compounded_aggr_push_down.snap | 21 - ...th_multiple_partitioned_scan_children.snap | 27 - .../src/dist_sql_query/test_util.rs | 619 -- src/df_engine_extensions/src/lib.rs | 20 - src/df_engine_extensions/src/metrics.rs | 28 - src/df_operator/Cargo.toml | 44 - src/df_operator/src/aggregate.rs | 189 - src/df_operator/src/functions.rs | 334 - src/df_operator/src/lib.rs | 26 - src/df_operator/src/registry.rs | 163 - src/df_operator/src/scalar.rs | 55 - src/df_operator/src/udaf.rs | 61 - src/df_operator/src/udfs/mod.rs | 31 - .../src/udfs/thetasketch_distinct.rs | 202 - src/df_operator/src/udfs/time_bucket.rs | 342 - src/df_operator/src/visitor.rs | 28 - src/horaedb/Cargo.toml | 80 - src/horaedb/bin/horaedb-server.rs | 131 - src/horaedb/build.rs | 31 - src/horaedb/src/config.rs | 148 - src/horaedb/src/lib.rs | 20 - src/horaedb/src/setup.rs | 488 - src/horaedb/src/signal_handler.rs | 44 - src/interpreters/Cargo.toml | 64 - src/interpreters/src/alter_table.rs | 147 - src/interpreters/src/context.rs | 135 - src/interpreters/src/create.rs | 81 - src/interpreters/src/describe.rs | 108 - src/interpreters/src/drop.rs | 81 - src/interpreters/src/exists.rs | 78 - src/interpreters/src/factory.rs | 102 - src/interpreters/src/insert.rs | 614 -- src/interpreters/src/interpreter.rs | 98 - src/interpreters/src/lib.rs | 46 - src/interpreters/src/metrics.rs | 28 - src/interpreters/src/select.rs | 176 - src/interpreters/src/show.rs | 284 - src/interpreters/src/show_create.rs | 255 - .../src/table_manipulator/catalog_based.rs | 126 - .../src/table_manipulator/meta_based.rs | 161 - src/interpreters/src/table_manipulator/mod.rs | 103 - src/interpreters/src/tests.rs | 422 - src/interpreters/src/validator.rs | 111 - src/meta_client/Cargo.toml | 50 - src/meta_client/src/lib.rs | 127 - src/meta_client/src/load_balance.rs | 80 - src/meta_client/src/meta_impl.rs | 295 - src/meta_client/src/types.rs | 600 -- {horaedb => src}/metric_engine/Cargo.toml | 1 - .../metric_engine/src/compaction/mod.rs | 0 .../metric_engine/src/compaction/picker.rs | 0 .../metric_engine/src/compaction/scheduler.rs | 0 {horaedb => src}/metric_engine/src/error.rs | 0 {horaedb => src}/metric_engine/src/lib.rs | 0 {horaedb => src}/metric_engine/src/macros.rs | 22 + .../metric_engine/src/manifest.rs | 2 +- .../metric_engine/src/operator.rs | 3 +- {horaedb => src}/metric_engine/src/read.rs | 0 {horaedb => src}/metric_engine/src/sst.rs | 3 +- {horaedb => src}/metric_engine/src/storage.rs | 2 +- .../metric_engine/src/test_util.rs | 0 {horaedb => src}/metric_engine/src/types.rs | 0 src/partition_table_engine/Cargo.toml | 48 - src/partition_table_engine/src/error.rs | 31 - src/partition_table_engine/src/lib.rs | 155 - src/partition_table_engine/src/metrics.rs | 39 - src/partition_table_engine/src/partition.rs | 451 - .../src/scan_builder.rs | 317 - src/partition_table_engine/src/test_util.rs | 158 - {horaedb => src}/pb_types/Cargo.toml | 0 {horaedb => src}/pb_types/build.rs | 0 {horaedb => src}/pb_types/protos/sst.proto | 0 {horaedb => src}/pb_types/src/lib.rs | 0 src/proxy/Cargo.toml | 84 - src/proxy/src/auth/mod.rs | 37 - src/proxy/src/auth/with_file.rs | 136 - src/proxy/src/context.rs | 110 - src/proxy/src/error.rs | 109 - src/proxy/src/error_util.rs | 53 - src/proxy/src/forward.rs | 549 - src/proxy/src/grpc/mod.rs | 21 - src/proxy/src/grpc/prom_query.rs | 517 - src/proxy/src/grpc/route.rs | 45 - src/proxy/src/grpc/sql_query.rs | 408 - src/proxy/src/grpc/write.rs | 61 - src/proxy/src/handlers/admin.rs | 79 - src/proxy/src/handlers/error.rs | 93 - src/proxy/src/handlers/mod.rs | 27 - src/proxy/src/hotspot.rs | 462 - src/proxy/src/hotspot_lru.rs | 122 - src/proxy/src/http/mod.rs | 20 - src/proxy/src/http/prom.rs | 688 -- src/proxy/src/http/route.rs | 69 - src/proxy/src/http/sql.rs | 279 - src/proxy/src/influxdb/mod.rs | 204 - src/proxy/src/influxdb/types.rs | 903 -- src/proxy/src/instance.rs | 65 - src/proxy/src/lib.rs | 567 - src/proxy/src/limiter.rs | 437 - src/proxy/src/metrics.rs | 80 - src/proxy/src/opentsdb/mod.rs | 183 - src/proxy/src/opentsdb/types.rs | 573 - src/proxy/src/read.rs | 349 - .../schema_config_provider/cluster_based.rs | 45 - .../schema_config_provider/config_based.rs | 52 - src/proxy/src/schema_config_provider/mod.rs | 39 - src/proxy/src/util.rs | 97 - src/proxy/src/write.rs | 1244 --- src/query_engine/Cargo.toml | 58 - src/query_engine/src/config.rs | 38 - src/query_engine/src/context.rs | 35 - .../src/datafusion_impl/executor.rs | 102 - .../logical_optimizer/tests.rs | 170 - src/query_engine/src/datafusion_impl/mod.rs | 143 - .../physical_optimizer/coalesce_batches.rs | 81 - .../datafusion_impl/physical_optimizer/mod.rs | 43 - .../physical_optimizer/repartition.rs | 72 - .../src/datafusion_impl/physical_plan.rs | 153 - .../physical_plan_extension/mod.rs | 19 - .../physical_plan_extension/prom_align.rs | 998 -- .../src/datafusion_impl/physical_planner.rs | 107 - .../physical_planner_extension/mod.rs | 52 - .../physical_planner_extension/prom_align.rs | 64 - .../src/datafusion_impl/task_context.rs | 312 - src/query_engine/src/error.rs | 104 - src/query_engine/src/executor.rs | 43 - src/query_engine/src/lib.rs | 132 - src/query_engine/src/physical_planner.rs | 68 - src/query_frontend/Cargo.toml | 73 - src/query_frontend/src/ast.rs | 188 - src/query_frontend/src/config.rs | 31 - src/query_frontend/src/container.rs | 219 - src/query_frontend/src/datafusion_util.rs | 38 - src/query_frontend/src/frontend.rs | 337 - src/query_frontend/src/influxql/mod.rs | 56 - src/query_frontend/src/influxql/planner.rs | 286 - src/query_frontend/src/lib.rs | 40 - .../src/logical_optimizer/mod.rs | 53 - .../src/logical_optimizer/type_conversion.rs | 611 -- src/query_frontend/src/opentsdb/mod.rs | 417 - src/query_frontend/src/opentsdb/types.rs | 66 - src/query_frontend/src/parser.rs | 1628 --- src/query_frontend/src/partition.rs | 107 - src/query_frontend/src/plan.rs | 468 - src/query_frontend/src/planner.rs | 2728 ----- src/query_frontend/src/promql.rs | 29 - src/query_frontend/src/promql/convert.rs | 655 -- .../src/promql/datafusion_util.rs | 145 - src/query_frontend/src/promql/error.rs | 69 - src/query_frontend/src/promql/pushdown.rs | 66 - src/query_frontend/src/promql/remote.rs | 285 - src/query_frontend/src/promql/udf.rs | 307 - src/query_frontend/src/provider.rs | 536 - src/query_frontend/src/tests.rs | 138 - src/remote_engine_client/Cargo.toml | 49 - src/remote_engine_client/src/cached_router.rs | 146 - src/remote_engine_client/src/channel.rs | 101 - src/remote_engine_client/src/client.rs | 645 -- src/remote_engine_client/src/config.rs | 56 - src/remote_engine_client/src/lib.rs | 242 - src/remote_engine_client/src/status_code.rs | 44 - src/router/Cargo.toml | 48 - src/router/src/cluster_based.rs | 349 - src/router/src/endpoint.rs | 83 - src/router/src/hash.rs | 44 - src/router/src/lib.rs | 120 - src/router/src/rule_based.rs | 194 - src/server/Cargo.toml | 63 +- src/server/src/config.rs | 243 - src/server/src/consts.rs | 29 - src/server/src/error_util.rs | 53 - src/server/src/federated.rs | 400 - .../src/grpc/compaction_service/error.rs | 96 - src/server/src/grpc/compaction_service/mod.rs | 113 - .../src/grpc/meta_event_service/error.rs | 98 - src/server/src/grpc/meta_event_service/mod.rs | 703 -- src/server/src/grpc/metrics.rs | 145 - src/server/src/grpc/mod.rs | 402 - .../src/grpc/remote_engine_service/error.rs | 97 - .../src/grpc/remote_engine_service/metrics.rs | 28 - .../src/grpc/remote_engine_service/mod.rs | 1421 --- src/server/src/grpc/storage_service/error.rs | 24 - src/server/src/grpc/storage_service/header.rs | 59 - src/server/src/grpc/storage_service/mod.rs | 458 - src/server/src/http.rs | 1018 -- src/server/src/lib.rs | 34 - src/server/src/local_tables.rs | 105 - {horaedb => src}/server/src/main.rs | 0 src/server/src/metrics.rs | 46 - src/server/src/mysql/builder.rs | 74 - src/server/src/mysql/error.rs | 63 - src/server/src/mysql/mod.rs | 25 - src/server/src/mysql/service.rs | 117 - src/server/src/mysql/worker.rs | 158 - src/server/src/mysql/writer.rs | 243 - src/server/src/postgresql/builder.rs | 78 - src/server/src/postgresql/error.rs | 54 - src/server/src/postgresql/handler.rs | 184 - src/server/src/postgresql/mod.rs | 24 - src/server/src/postgresql/service.rs | 128 - src/server/src/server.rs | 558 - src/server/src/session.rs | 206 - src/system_catalog/Cargo.toml | 50 - src/system_catalog/src/lib.rs | 209 - src/system_catalog/src/sys_catalog_table.rs | 1117 -- src/system_catalog/src/tables.rs | 202 - src/table_engine/Cargo.toml | 63 - src/table_engine/src/engine.rs | 408 - src/table_engine/src/lib.rs | 32 - src/table_engine/src/memory.rs | 391 - src/table_engine/src/partition/mod.rs | 348 - .../partition/rule/df_adapter/extractor.rs | 205 - .../src/partition/rule/df_adapter/mod.rs | 309 - .../src/partition/rule/factory.rs | 74 - src/table_engine/src/partition/rule/filter.rs | 52 - src/table_engine/src/partition/rule/key.rs | 604 -- src/table_engine/src/partition/rule/mod.rs | 79 - src/table_engine/src/partition/rule/random.rs | 60 - src/table_engine/src/predicate.rs | 589 -- src/table_engine/src/provider.rs | 564 - src/table_engine/src/proxy.rs | 118 - src/table_engine/src/remote/mod.rs | 86 - src/table_engine/src/remote/model.rs | 576 - src/table_engine/src/stream.rs | 178 - src/table_engine/src/table.rs | 718 -- src/tools/Cargo.toml | 49 - src/tools/src/bin/sst-convert.rs | 156 - src/tools/src/bin/sst-metadata.rs | 306 - src/tools/src/bin/wal-reader.rs | 163 - src/tools/src/lib.rs | 18 - src/tools/src/sst_util.rs | 37 - src/wal/Cargo.toml | 84 - src/wal/src/config.rs | 73 - src/wal/src/dummy.rs | 81 - src/wal/src/kv_encoder.rs | 812 -- src/wal/src/lib.rs | 35 - src/wal/src/local_storage_impl/config.rs | 37 - src/wal/src/local_storage_impl/mod.rs | 21 - .../src/local_storage_impl/record_encoding.rs | 254 - src/wal/src/local_storage_impl/segment.rs | 1630 --- src/wal/src/local_storage_impl/wal_manager.rs | 217 - src/wal/src/log_batch.rs | 144 - src/wal/src/manager.rs | 712 -- src/wal/src/message_queue_impl/config.rs | 47 - src/wal/src/message_queue_impl/encoding.rs | 423 - src/wal/src/message_queue_impl/log_cleaner.rs | 124 - src/wal/src/message_queue_impl/mod.rs | 45 - src/wal/src/message_queue_impl/namespace.rs | 727 -- src/wal/src/message_queue_impl/region.rs | 1245 --- .../src/message_queue_impl/region_context.rs | 714 -- .../snapshot_synchronizer.rs | 158 - src/wal/src/message_queue_impl/test_util.rs | 103 - src/wal/src/message_queue_impl/wal.rs | 191 - src/wal/src/metrics.rs | 28 - src/wal/src/rocksdb_impl/config.rs | 80 - src/wal/src/rocksdb_impl/manager.rs | 1039 -- src/wal/src/rocksdb_impl/mod.rs | 21 - src/wal/src/table_kv_impl/config.rs | 135 - src/wal/src/table_kv_impl/encoding.rs | 226 - src/wal/src/table_kv_impl/mod.rs | 34 - src/wal/src/table_kv_impl/model.rs | 688 -- src/wal/src/table_kv_impl/namespace.rs | 2042 ---- src/wal/src/table_kv_impl/table_unit.rs | 1033 -- src/wal/src/table_kv_impl/wal.rs | 306 - src/wal/tests/read_write.rs | 1133 -- 833 files changed, 1795 insertions(+), 185271 deletions(-) delete mode 100644 .github/actions-rs/gcrov.yml delete mode 100644 .github/codecov.yml delete mode 100644 .github/workflows/coverage.yml delete mode 100644 .github/workflows/meta-ci.yml delete mode 100644 .github/workflows/metric-engine-ci.yml delete mode 100644 .github/workflows/publish-image.yml delete mode 100644 .github/workflows/publish-nightly-image.yml delete mode 100644 .github/workflows/test-build-image.yml delete mode 100644 .github/workflows/tsbs.yml delete mode 100644 .gitmodules delete mode 100644 Dockerfile delete mode 100644 README-CN.md delete mode 100755 docker/basic.sh delete mode 100644 docker/datasource.yml delete mode 100644 docker/docker-compose.yaml delete mode 100755 docker/entrypoint.sh delete mode 100644 docs/example-cluster-0.toml delete mode 100644 docs/example-cluster-1.toml delete mode 100644 docs/example-standalone-static-routing.toml delete mode 100644 docs/minimal.toml delete mode 100644 horaectl/Cargo.toml delete mode 100644 horaectl/src/cmd/cluster.rs delete mode 100644 horaectl/src/cmd/mod.rs delete mode 100644 horaectl/src/main.rs delete mode 100644 horaectl/src/operation/cluster.rs delete mode 100644 horaectl/src/operation/mod.rs delete mode 100644 horaectl/src/util/mod.rs delete mode 100644 horaedb/Cargo.lock delete mode 100644 horaedb/Cargo.toml delete mode 100644 horaedb/Makefile delete mode 100644 horaedb/rust-toolchain.toml delete mode 100644 horaedb/server/Cargo.toml delete mode 100644 horaemeta/.golangci.yml delete mode 100644 horaemeta/CONTRIBUTING.md delete mode 100644 horaemeta/DEPENDENCIES.csv delete mode 100644 horaemeta/Dockerfile delete mode 100644 horaemeta/Makefile delete mode 100644 horaemeta/README.md delete mode 100644 horaemeta/cmd/horaemeta-server/main.go delete mode 100644 horaemeta/config/example-cluster0.toml delete mode 100644 horaemeta/config/example-cluster1.toml delete mode 100644 horaemeta/config/example-cluster2.toml delete mode 100644 horaemeta/config/example-standalone.toml delete mode 100755 horaemeta/docker/entrypoint.sh delete mode 100644 horaemeta/docs/style_guide.md delete mode 100644 horaemeta/go.mod delete mode 100644 horaemeta/go.sum delete mode 100644 horaemeta/pkg/assert/assert.go delete mode 100644 horaemeta/pkg/coderr/code.go delete mode 100644 horaemeta/pkg/coderr/error.go delete mode 100644 horaemeta/pkg/log/config.go delete mode 100644 horaemeta/pkg/log/global.go delete mode 100644 horaemeta/pkg/log/log.go delete mode 100644 horaemeta/server/cluster/cluster.go delete mode 100644 horaemeta/server/cluster/manager.go delete mode 100644 horaemeta/server/cluster/manager_test.go delete mode 100644 horaemeta/server/cluster/metadata/cluster_metadata.go delete mode 100644 horaemeta/server/cluster/metadata/cluster_metadata_test.go delete mode 100644 horaemeta/server/cluster/metadata/compare_benchmark_test.go delete mode 100644 horaemeta/server/cluster/metadata/error.go delete mode 100644 horaemeta/server/cluster/metadata/table_manager.go delete mode 100644 horaemeta/server/cluster/metadata/table_manager_test.go delete mode 100644 horaemeta/server/cluster/metadata/topology_manager.go delete mode 100644 horaemeta/server/cluster/metadata/topology_manager_test.go delete mode 100644 horaemeta/server/cluster/metadata/types.go delete mode 100644 horaemeta/server/config/config.go delete mode 100644 horaemeta/server/config/error.go delete mode 100644 horaemeta/server/config/util.go delete mode 100644 horaemeta/server/coordinator/error.go delete mode 100644 horaemeta/server/coordinator/eventdispatch/dispatch.go delete mode 100644 horaemeta/server/coordinator/eventdispatch/dispatch_impl.go delete mode 100644 horaemeta/server/coordinator/factory.go delete mode 100644 horaemeta/server/coordinator/factory_test.go delete mode 100644 horaemeta/server/coordinator/inspector/node_inspector.go delete mode 100644 horaemeta/server/coordinator/inspector/node_inspector_test.go delete mode 100644 horaemeta/server/coordinator/lock/entry_lock.go delete mode 100644 horaemeta/server/coordinator/lock/entry_lock_test.go delete mode 100644 horaemeta/server/coordinator/persist_shard_picker.go delete mode 100644 horaemeta/server/coordinator/persist_shard_picker_test.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/common_util.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table_test.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/createtable/create_table.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/createtable/create_table_test.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/droppartitiontable/create_drop_partition_table_test.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/droppartitiontable/drop_partition_table.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/droptable/create_drop_table_test.go delete mode 100644 horaemeta/server/coordinator/procedure/ddl/droptable/drop_table.go delete mode 100644 horaemeta/server/coordinator/procedure/delay_queue.go delete mode 100644 horaemeta/server/coordinator/procedure/delay_queue_test.go delete mode 100644 horaemeta/server/coordinator/procedure/error.go delete mode 100644 horaemeta/server/coordinator/procedure/manager.go delete mode 100644 horaemeta/server/coordinator/procedure/manager_impl.go delete mode 100644 horaemeta/server/coordinator/procedure/manager_test.go delete mode 100644 horaemeta/server/coordinator/procedure/operation/split/split.go delete mode 100644 horaemeta/server/coordinator/procedure/operation/split/split_test.go delete mode 100644 horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader.go delete mode 100644 horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader_test.go delete mode 100644 horaemeta/server/coordinator/procedure/operation/transferleader/transfer_leader.go delete mode 100644 horaemeta/server/coordinator/procedure/operation/transferleader/trasnfer_leader_test.go delete mode 100644 horaemeta/server/coordinator/procedure/procedure.go delete mode 100644 horaemeta/server/coordinator/procedure/storage.go delete mode 100644 horaemeta/server/coordinator/procedure/storage_impl.go delete mode 100644 horaemeta/server/coordinator/procedure/storage_test.go delete mode 100644 horaemeta/server/coordinator/procedure/test/common.go delete mode 100644 horaemeta/server/coordinator/procedure/util.go delete mode 100644 horaemeta/server/coordinator/scheduler/manager/error.go delete mode 100644 horaemeta/server/coordinator/scheduler/manager/scheduler_manager.go delete mode 100644 horaemeta/server/coordinator/scheduler/manager/scheduler_manager_test.go delete mode 100644 horaemeta/server/coordinator/scheduler/nodepicker/error.go delete mode 100644 horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform.go delete mode 100644 horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform_test.go delete mode 100644 horaemeta/server/coordinator/scheduler/nodepicker/node_picker.go delete mode 100644 horaemeta/server/coordinator/scheduler/nodepicker/node_picker_test.go delete mode 100644 horaemeta/server/coordinator/scheduler/rebalanced/scheduler.go delete mode 100644 horaemeta/server/coordinator/scheduler/rebalanced/scheduler_test.go delete mode 100644 horaemeta/server/coordinator/scheduler/reopen/scheduler.go delete mode 100644 horaemeta/server/coordinator/scheduler/reopen/scheduler_test.go delete mode 100644 horaemeta/server/coordinator/scheduler/scheduler.go delete mode 100644 horaemeta/server/coordinator/scheduler/static/error.go delete mode 100644 horaemeta/server/coordinator/scheduler/static/scheduler.go delete mode 100644 horaemeta/server/coordinator/scheduler/static/scheduler_test.go delete mode 100644 horaemeta/server/coordinator/shard_picker.go delete mode 100644 horaemeta/server/coordinator/shard_picker_test.go delete mode 100644 horaemeta/server/coordinator/watch/watch.go delete mode 100644 horaemeta/server/coordinator/watch/watch_test.go delete mode 100644 horaemeta/server/error.go delete mode 100644 horaemeta/server/etcdutil/config.go delete mode 100644 horaemeta/server/etcdutil/error.go delete mode 100644 horaemeta/server/etcdutil/get_leader.go delete mode 100644 horaemeta/server/etcdutil/util.go delete mode 100644 horaemeta/server/etcdutil/util_test.go delete mode 100644 horaemeta/server/id/error.go delete mode 100644 horaemeta/server/id/id.go delete mode 100644 horaemeta/server/id/id_impl.go delete mode 100644 horaemeta/server/id/id_test.go delete mode 100644 horaemeta/server/id/reusable_id_impl.go delete mode 100644 horaemeta/server/id/reusable_id_test.go delete mode 100644 horaemeta/server/limiter/limiter.go delete mode 100644 horaemeta/server/limiter/limiter_test.go delete mode 100644 horaemeta/server/member/error.go delete mode 100644 horaemeta/server/member/lease.go delete mode 100644 horaemeta/server/member/member.go delete mode 100644 horaemeta/server/member/watch_leader.go delete mode 100644 horaemeta/server/member/watch_leader_test.go delete mode 100644 horaemeta/server/server.go delete mode 100644 horaemeta/server/service/grpc/error.go delete mode 100644 horaemeta/server/service/grpc/forward.go delete mode 100644 horaemeta/server/service/grpc/service.go delete mode 100644 horaemeta/server/service/http/api.go delete mode 100644 horaemeta/server/service/http/error.go delete mode 100644 horaemeta/server/service/http/etcd_api.go delete mode 100644 horaemeta/server/service/http/forward.go delete mode 100644 horaemeta/server/service/http/route.go delete mode 100644 horaemeta/server/service/http/service.go delete mode 100644 horaemeta/server/service/http/types.go delete mode 100644 horaemeta/server/service/util.go delete mode 100644 horaemeta/server/status/status.go delete mode 100644 horaemeta/server/storage/error.go delete mode 100644 horaemeta/server/storage/key_path.go delete mode 100644 horaemeta/server/storage/meta.go delete mode 100644 horaemeta/server/storage/storage_impl.go delete mode 100644 horaemeta/server/storage/storage_test.go delete mode 100644 horaemeta/server/storage/types.go delete mode 100644 integration_tests/.gitignore delete mode 100644 integration_tests/Cargo.toml delete mode 100644 integration_tests/Makefile delete mode 100644 integration_tests/README.md delete mode 100755 integration_tests/build_meta.sh delete mode 100644 integration_tests/cases/common/basic.result delete mode 100644 integration_tests/cases/common/basic.sql delete mode 100644 integration_tests/cases/common/dml/case_sensitive.result delete mode 100644 integration_tests/cases/common/dml/case_sensitive.sql delete mode 100644 integration_tests/cases/common/dml/insert_mode.result delete mode 100644 integration_tests/cases/common/dml/insert_mode.sql delete mode 100644 integration_tests/cases/common/dml/issue-1087.result delete mode 100644 integration_tests/cases/common/dml/issue-1087.sql delete mode 100644 integration_tests/cases/common/dml/issue-302.result delete mode 100644 integration_tests/cases/common/dml/issue-302.sql delete mode 100644 integration_tests/cases/common/dml/issue-341.result delete mode 100644 integration_tests/cases/common/dml/issue-341.sql delete mode 100644 integration_tests/cases/common/dml/issue-59.result delete mode 100644 integration_tests/cases/common/dml/issue-59.sql delete mode 100644 integration_tests/cases/common/dml/issue-637.result delete mode 100644 integration_tests/cases/common/dml/issue-637.sql delete mode 100644 integration_tests/cases/common/dml/select_filter.result delete mode 100644 integration_tests/cases/common/dml/select_filter.sql delete mode 100644 integration_tests/cases/common/dml/select_having.result delete mode 100644 integration_tests/cases/common/dml/select_having.sql delete mode 100644 integration_tests/cases/common/dml/select_order.result delete mode 100644 integration_tests/cases/common/dml/select_order.sql delete mode 100644 integration_tests/cases/common/dummy/select_1.result delete mode 100644 integration_tests/cases/common/dummy/select_1.sql delete mode 100644 integration_tests/cases/common/explain/explain.result delete mode 100644 integration_tests/cases/common/explain/explain.sql delete mode 100644 integration_tests/cases/common/function/aggregate.result delete mode 100644 integration_tests/cases/common/function/aggregate.sql delete mode 100644 integration_tests/cases/common/function/date_bin.result delete mode 100644 integration_tests/cases/common/function/date_bin.sql delete mode 100644 integration_tests/cases/common/function/thetasketch_distinct.result delete mode 100644 integration_tests/cases/common/function/thetasketch_distinct.sql delete mode 100644 integration_tests/cases/common/function/time_bucket.result delete mode 100644 integration_tests/cases/common/function/time_bucket.sql delete mode 100644 integration_tests/cases/common/optimizer/optimizer.result delete mode 100644 integration_tests/cases/common/optimizer/optimizer.sql delete mode 100644 integration_tests/cases/common/show/show_create_table.result delete mode 100644 integration_tests/cases/common/show/show_create_table.sql delete mode 120000 integration_tests/cases/env/cluster/common delete mode 100644 integration_tests/cases/env/cluster/ddl/alter_table.result delete mode 100644 integration_tests/cases/env/cluster/ddl/alter_table.sql delete mode 100644 integration_tests/cases/env/cluster/ddl/create_tables.result delete mode 100644 integration_tests/cases/env/cluster/ddl/create_tables.sql delete mode 100644 integration_tests/cases/env/cluster/ddl/partition_table.result delete mode 100644 integration_tests/cases/env/cluster/ddl/partition_table.sql delete mode 100644 integration_tests/cases/env/compaction_offload/compact/compact.result delete mode 100644 integration_tests/cases/env/compaction_offload/compact/compact.sql delete mode 120000 integration_tests/cases/env/local/common delete mode 100644 integration_tests/cases/env/local/ddl/alter_table.result delete mode 100644 integration_tests/cases/env/local/ddl/alter_table.sql delete mode 100644 integration_tests/cases/env/local/ddl/create_tables.result delete mode 100644 integration_tests/cases/env/local/ddl/create_tables.sql delete mode 100644 integration_tests/cases/env/local/ddl/query-plan.result delete mode 100644 integration_tests/cases/env/local/ddl/query-plan.sql delete mode 100644 integration_tests/cases/env/local/ddl/sampling-primary-key.result delete mode 100644 integration_tests/cases/env/local/ddl/sampling-primary-key.sql delete mode 100644 integration_tests/cases/env/local/dml/insert_into_select.result delete mode 100644 integration_tests/cases/env/local/dml/insert_into_select.sql delete mode 100644 integration_tests/cases/env/local/influxql/basic.result delete mode 100644 integration_tests/cases/env/local/influxql/basic.sql delete mode 100644 integration_tests/cases/env/local/opentsdb/basic.result delete mode 100644 integration_tests/cases/env/local/opentsdb/basic.sql delete mode 100644 integration_tests/cases/env/local/system/system_tables.result delete mode 100644 integration_tests/cases/env/local/system/system_tables.sql delete mode 100644 integration_tests/config/compaction-offload.toml delete mode 100644 integration_tests/config/horaedb-cluster-0.toml delete mode 100644 integration_tests/config/horaedb-cluster-1.toml delete mode 100644 integration_tests/config/horaemeta.toml delete mode 100644 integration_tests/config/shard-based-recovery.toml delete mode 100644 integration_tests/dist_query/diff.py delete mode 100755 integration_tests/dist_query/run.sh delete mode 100755 integration_tests/mysql/basic.sh delete mode 100755 integration_tests/opentsdb/run-tests.sh delete mode 100755 integration_tests/opentsdb/test-put.py delete mode 100755 integration_tests/postgresql/basic.sh delete mode 100644 integration_tests/prom/prometheus.yml delete mode 100755 integration_tests/prom/remote-query.py delete mode 100755 integration_tests/prom/run-tests.sh delete mode 100644 integration_tests/recovery/check.py delete mode 100755 integration_tests/recovery/run.sh delete mode 100644 integration_tests/sdk/go/alteraddcolumn.go delete mode 100644 integration_tests/sdk/go/autocreatetable.go delete mode 100644 integration_tests/sdk/go/go.mod delete mode 100644 integration_tests/sdk/go/go.sum delete mode 100644 integration_tests/sdk/go/issue-779.go delete mode 100644 integration_tests/sdk/go/main.go delete mode 100644 integration_tests/sdk/go/util.go delete mode 100644 integration_tests/sdk/java/pom.xml delete mode 100644 integration_tests/sdk/java/src/main/java/io/ceresdb/App.java delete mode 100644 integration_tests/sdk/java/src/main/resources/log4j2.xml delete mode 100644 integration_tests/sdk/rust/Cargo.toml delete mode 100644 integration_tests/sdk/rust/src/main.rs delete mode 100644 integration_tests/src/database.rs delete mode 100644 integration_tests/src/main.rs delete mode 100755 scripts/run-tsbs.sh delete mode 100644 src/analytic_engine/Cargo.toml delete mode 100644 src/analytic_engine/src/compaction/compactor.rs delete mode 100644 src/analytic_engine/src/compaction/metrics.rs delete mode 100644 src/analytic_engine/src/compaction/mod.rs delete mode 100644 src/analytic_engine/src/compaction/picker.rs delete mode 100644 src/analytic_engine/src/compaction/runner/local_runner.rs delete mode 100644 src/analytic_engine/src/compaction/runner/mod.rs delete mode 100644 src/analytic_engine/src/compaction/runner/node_picker.rs delete mode 100644 src/analytic_engine/src/compaction/runner/remote_client.rs delete mode 100644 src/analytic_engine/src/compaction/runner/remote_runner.rs delete mode 100644 src/analytic_engine/src/compaction/scheduler.rs delete mode 100644 src/analytic_engine/src/context.rs delete mode 100644 src/analytic_engine/src/engine.rs delete mode 100644 src/analytic_engine/src/error.rs delete mode 100644 src/analytic_engine/src/instance/alter.rs delete mode 100644 src/analytic_engine/src/instance/close.rs delete mode 100644 src/analytic_engine/src/instance/create.rs delete mode 100644 src/analytic_engine/src/instance/drop.rs delete mode 100644 src/analytic_engine/src/instance/engine.rs delete mode 100644 src/analytic_engine/src/instance/flush_compaction.rs delete mode 100644 src/analytic_engine/src/instance/mem_collector.rs delete mode 100644 src/analytic_engine/src/instance/mod.rs delete mode 100644 src/analytic_engine/src/instance/open.rs delete mode 100644 src/analytic_engine/src/instance/read.rs delete mode 100644 src/analytic_engine/src/instance/reorder_memtable.rs delete mode 100644 src/analytic_engine/src/instance/serial_executor.rs delete mode 100644 src/analytic_engine/src/instance/wal_replayer.rs delete mode 100644 src/analytic_engine/src/instance/write.rs delete mode 100644 src/analytic_engine/src/lib.rs delete mode 100644 src/analytic_engine/src/manifest/details.rs delete mode 100644 src/analytic_engine/src/manifest/error.rs delete mode 100644 src/analytic_engine/src/manifest/meta_edit.rs delete mode 100644 src/analytic_engine/src/manifest/meta_snapshot.rs delete mode 100644 src/analytic_engine/src/manifest/mod.rs delete mode 100644 src/analytic_engine/src/memtable/columnar/factory.rs delete mode 100644 src/analytic_engine/src/memtable/columnar/iter.rs delete mode 100644 src/analytic_engine/src/memtable/columnar/mod.rs delete mode 100644 src/analytic_engine/src/memtable/error.rs delete mode 100644 src/analytic_engine/src/memtable/factory.rs delete mode 100644 src/analytic_engine/src/memtable/key.rs delete mode 100644 src/analytic_engine/src/memtable/layered/factory.rs delete mode 100644 src/analytic_engine/src/memtable/layered/iter.rs delete mode 100644 src/analytic_engine/src/memtable/layered/mod.rs delete mode 100644 src/analytic_engine/src/memtable/mod.rs delete mode 100644 src/analytic_engine/src/memtable/reversed_iter.rs delete mode 100644 src/analytic_engine/src/memtable/skiplist/factory.rs delete mode 100644 src/analytic_engine/src/memtable/skiplist/iter.rs delete mode 100644 src/analytic_engine/src/memtable/skiplist/mod.rs delete mode 100644 src/analytic_engine/src/memtable/test_util.rs delete mode 100644 src/analytic_engine/src/payload.rs delete mode 100644 src/analytic_engine/src/prefetchable_stream.rs delete mode 100644 src/analytic_engine/src/row_iter/chain.rs delete mode 100644 src/analytic_engine/src/row_iter/dedup.rs delete mode 100644 src/analytic_engine/src/row_iter/merge.rs delete mode 100644 src/analytic_engine/src/row_iter/mod.rs delete mode 100644 src/analytic_engine/src/row_iter/record_batch_stream.rs delete mode 100644 src/analytic_engine/src/row_iter/tests.rs delete mode 100644 src/analytic_engine/src/sampler.rs delete mode 100644 src/analytic_engine/src/setup.rs delete mode 100644 src/analytic_engine/src/space.rs delete mode 100644 src/analytic_engine/src/sst/factory.rs delete mode 100644 src/analytic_engine/src/sst/file.rs delete mode 100644 src/analytic_engine/src/sst/header.rs delete mode 100644 src/analytic_engine/src/sst/manager.rs delete mode 100644 src/analytic_engine/src/sst/meta_data/cache.rs delete mode 100644 src/analytic_engine/src/sst/meta_data/metadata_reader.rs delete mode 100644 src/analytic_engine/src/sst/meta_data/mod.rs delete mode 100644 src/analytic_engine/src/sst/metrics.rs delete mode 100644 src/analytic_engine/src/sst/mod.rs delete mode 100644 src/analytic_engine/src/sst/parquet/async_reader.rs delete mode 100644 src/analytic_engine/src/sst/parquet/encoding.rs delete mode 100644 src/analytic_engine/src/sst/parquet/meta_data/filter.rs delete mode 100644 src/analytic_engine/src/sst/parquet/meta_data/mod.rs delete mode 100644 src/analytic_engine/src/sst/parquet/mod.rs delete mode 100644 src/analytic_engine/src/sst/parquet/row_group_pruner.rs delete mode 100644 src/analytic_engine/src/sst/parquet/writer.rs delete mode 100644 src/analytic_engine/src/sst/reader.rs delete mode 100644 src/analytic_engine/src/sst/writer.rs delete mode 100644 src/analytic_engine/src/table/data.rs delete mode 100644 src/analytic_engine/src/table/metrics.rs delete mode 100644 src/analytic_engine/src/table/mod.rs delete mode 100644 src/analytic_engine/src/table/sst_util.rs delete mode 100644 src/analytic_engine/src/table/version.rs delete mode 100644 src/analytic_engine/src/table/version_edit.rs delete mode 100644 src/analytic_engine/src/table_meta_set_impl.rs delete mode 100644 src/analytic_engine/src/table_options.rs delete mode 100644 src/analytic_engine/src/tests/alter_test.rs delete mode 100644 src/analytic_engine/src/tests/compaction_test.rs delete mode 100644 src/analytic_engine/src/tests/drop_test.rs delete mode 100644 src/analytic_engine/src/tests/mod.rs delete mode 100644 src/analytic_engine/src/tests/open_test.rs delete mode 100644 src/analytic_engine/src/tests/read_write_test.rs delete mode 100644 src/analytic_engine/src/tests/row_util.rs delete mode 100644 src/analytic_engine/src/tests/table.rs delete mode 100644 src/analytic_engine/src/tests/util.rs delete mode 100644 src/benchmarks/Cargo.toml delete mode 100644 src/benchmarks/README.md delete mode 100644 src/benchmarks/bench.toml delete mode 100644 src/benchmarks/benches/bench.rs delete mode 100644 src/benchmarks/config/bench.toml delete mode 100644 src/benchmarks/config/sst.toml delete mode 100644 src/benchmarks/src/bin/parquet-reader.rs delete mode 100644 src/benchmarks/src/bin/sst-tools.rs delete mode 100644 src/benchmarks/src/config.rs delete mode 100644 src/benchmarks/src/lib.rs delete mode 100644 src/benchmarks/src/merge_memtable_bench.rs delete mode 100644 src/benchmarks/src/merge_sst_bench.rs delete mode 100644 src/benchmarks/src/parquet_bench.rs delete mode 100644 src/benchmarks/src/replay_bench.rs delete mode 100644 src/benchmarks/src/scan_memtable_bench.rs delete mode 100644 src/benchmarks/src/sst_bench.rs delete mode 100644 src/benchmarks/src/sst_tools.rs delete mode 100644 src/benchmarks/src/table.rs delete mode 100644 src/benchmarks/src/util.rs delete mode 100644 src/benchmarks/src/wal_write_bench.rs delete mode 100644 src/catalog/Cargo.toml delete mode 100644 src/catalog/src/consts.rs delete mode 100644 src/catalog/src/lib.rs delete mode 100644 src/catalog/src/manager.rs delete mode 100644 src/catalog/src/schema.rs delete mode 100644 src/catalog/src/table_operator.rs delete mode 100644 src/catalog/src/test_util.rs delete mode 100644 src/catalog_impls/Cargo.toml delete mode 100644 src/catalog_impls/src/cluster_based.rs delete mode 100644 src/catalog_impls/src/lib.rs delete mode 100644 src/catalog_impls/src/system_tables.rs delete mode 100644 src/catalog_impls/src/table_based.rs delete mode 100644 src/catalog_impls/src/volatile.rs delete mode 100644 src/cluster/Cargo.toml delete mode 100644 src/cluster/src/cluster_impl.rs delete mode 100644 src/cluster/src/config.rs delete mode 100644 src/cluster/src/lib.rs delete mode 100644 src/cluster/src/shard_lock_manager.rs delete mode 100644 src/cluster/src/shard_operation.rs delete mode 100644 src/cluster/src/shard_operator.rs delete mode 100644 src/cluster/src/shard_set.rs delete mode 100644 src/cluster/src/topology.rs delete mode 100644 src/common_types/Cargo.toml delete mode 100644 src/common_types/src/bitset.rs delete mode 100644 src/common_types/src/cluster.rs delete mode 100644 src/common_types/src/column.rs delete mode 100644 src/common_types/src/column_block.rs delete mode 100644 src/common_types/src/column_schema.rs delete mode 100644 src/common_types/src/datum.rs delete mode 100644 src/common_types/src/hex.rs delete mode 100644 src/common_types/src/lib.rs delete mode 100644 src/common_types/src/projected_schema.rs delete mode 100644 src/common_types/src/record_batch.rs delete mode 100644 src/common_types/src/request_id.rs delete mode 100644 src/common_types/src/row/bitset.rs delete mode 100644 src/common_types/src/row/contiguous.rs delete mode 100644 src/common_types/src/row/mod.rs delete mode 100644 src/common_types/src/schema.rs delete mode 100644 src/common_types/src/string.rs delete mode 100644 src/common_types/src/table.rs delete mode 100644 src/common_types/src/tests.rs delete mode 100644 src/common_types/src/time.rs delete mode 100644 src/components/alloc_tracker/Cargo.toml delete mode 100644 src/components/alloc_tracker/src/lib.rs delete mode 100644 src/components/arena/Cargo.toml delete mode 100644 src/components/arena/src/arena_trait.rs delete mode 100644 src/components/arena/src/lib.rs delete mode 100644 src/components/arena/src/mono_inc.rs delete mode 100644 src/components/arrow_ext/Cargo.toml delete mode 100644 src/components/arrow_ext/src/ipc.rs delete mode 100644 src/components/arrow_ext/src/lib.rs delete mode 100644 src/components/arrow_ext/src/operation.rs delete mode 100644 src/components/bytes_ext/Cargo.toml delete mode 100644 src/components/bytes_ext/src/lib.rs delete mode 100644 src/components/codec/Cargo.toml delete mode 100644 src/components/codec/src/columnar/bool.rs delete mode 100644 src/components/codec/src/columnar/bytes.rs delete mode 100644 src/components/codec/src/columnar/mod.rs delete mode 100644 src/components/codec/src/columnar/number.rs delete mode 100644 src/components/codec/src/columnar/timestamp.rs delete mode 100644 src/components/codec/src/compact/bytes.rs delete mode 100644 src/components/codec/src/compact/datum.rs delete mode 100644 src/components/codec/src/compact/float.rs delete mode 100644 src/components/codec/src/compact/mod.rs delete mode 100644 src/components/codec/src/compact/number.rs delete mode 100644 src/components/codec/src/consts.rs delete mode 100644 src/components/codec/src/lib.rs delete mode 100644 src/components/codec/src/memcomparable/bytes.rs delete mode 100644 src/components/codec/src/memcomparable/datum.rs delete mode 100644 src/components/codec/src/memcomparable/mod.rs delete mode 100644 src/components/codec/src/memcomparable/number.rs delete mode 100644 src/components/codec/src/row/mod.rs delete mode 100644 src/components/codec/src/varint.rs delete mode 100644 src/components/future_ext/Cargo.toml delete mode 100644 src/components/future_ext/src/cancel.rs delete mode 100644 src/components/future_ext/src/lib.rs delete mode 100644 src/components/future_ext/src/retry.rs delete mode 100644 src/components/generic_error/Cargo.toml delete mode 100644 src/components/generic_error/src/lib.rs delete mode 100644 src/components/hash_ext/Cargo.toml delete mode 100644 src/components/hash_ext/src/lib.rs delete mode 100644 src/components/id_allocator/Cargo.toml delete mode 100644 src/components/id_allocator/src/lib.rs delete mode 100644 src/components/logger/Cargo.toml delete mode 100644 src/components/logger/src/lib.rs delete mode 100644 src/components/macros/Cargo.toml delete mode 100644 src/components/macros/src/lib.rs delete mode 100644 src/components/message_queue/Cargo.toml delete mode 100644 src/components/message_queue/src/kafka/config.rs delete mode 100644 src/components/message_queue/src/kafka/kafka_impl.rs delete mode 100644 src/components/message_queue/src/kafka/mod.rs delete mode 100644 src/components/message_queue/src/lib.rs delete mode 100644 src/components/message_queue/src/tests/cases.rs delete mode 100644 src/components/message_queue/src/tests/mod.rs delete mode 100644 src/components/message_queue/src/tests/util.rs delete mode 100644 src/components/metric_ext/Cargo.toml delete mode 100644 src/components/metric_ext/src/lib.rs delete mode 100644 src/components/notifier/Cargo.toml delete mode 100644 src/components/notifier/src/lib.rs delete mode 100644 src/components/notifier/src/notifier.rs delete mode 100644 src/components/object_store/Cargo.toml delete mode 100644 src/components/object_store/src/aliyun.rs delete mode 100644 src/components/object_store/src/config.rs delete mode 100644 src/components/object_store/src/disk_cache.rs delete mode 100644 src/components/object_store/src/lib.rs delete mode 100644 src/components/object_store/src/local_file.rs delete mode 100644 src/components/object_store/src/mem_cache.rs delete mode 100644 src/components/object_store/src/metrics.rs delete mode 100644 src/components/object_store/src/multi_part.rs delete mode 100644 src/components/object_store/src/prefix.rs delete mode 100644 src/components/object_store/src/s3.rs delete mode 100644 src/components/object_store/src/test_util.rs delete mode 100644 src/components/panic_ext/Cargo.toml delete mode 100644 src/components/panic_ext/src/lib.rs delete mode 160000 src/components/parquet-testing delete mode 100644 src/components/parquet_ext/Cargo.toml delete mode 100644 src/components/parquet_ext/src/lib.rs delete mode 100644 src/components/parquet_ext/src/meta_data.rs delete mode 100644 src/components/parquet_ext/src/prune/equal.rs delete mode 100644 src/components/parquet_ext/src/prune/min_max.rs delete mode 100644 src/components/parquet_ext/src/prune/mod.rs delete mode 100644 src/components/parquet_ext/src/reader.rs delete mode 100644 src/components/parquet_ext/src/tests.rs delete mode 100644 src/components/partitioned_lock/Cargo.toml delete mode 100644 src/components/partitioned_lock/src/lib.rs delete mode 100644 src/components/profile/Cargo.toml delete mode 100644 src/components/profile/src/lib.rs delete mode 100644 src/components/runtime/Cargo.toml delete mode 100644 src/components/runtime/src/lib.rs delete mode 100644 src/components/runtime/src/metrics.rs delete mode 100644 src/components/runtime/src/priority_runtime.rs delete mode 100644 src/components/sampling_cache/Cargo.toml delete mode 100644 src/components/sampling_cache/src/lib.rs delete mode 100644 src/components/size_ext/Cargo.toml delete mode 100644 src/components/size_ext/src/lib.rs delete mode 100644 src/components/skiplist/Cargo.toml delete mode 100644 src/components/skiplist/benches/bench.rs delete mode 100644 src/components/skiplist/src/key.rs delete mode 100644 src/components/skiplist/src/lib.rs delete mode 100644 src/components/skiplist/src/list.rs delete mode 100644 src/components/skiplist/src/slice.rs delete mode 100644 src/components/skiplist/tests/tests.rs delete mode 100644 src/components/system_stats/Cargo.toml delete mode 100644 src/components/system_stats/src/lib.rs delete mode 100644 src/components/table_kv/Cargo.toml delete mode 100644 src/components/table_kv/src/config.rs delete mode 100644 src/components/table_kv/src/lib.rs delete mode 100644 src/components/table_kv/src/memory.rs delete mode 100644 src/components/table_kv/src/metrics.rs delete mode 100644 src/components/table_kv/src/obkv.rs delete mode 100644 src/components/table_kv/src/obkv/tests.rs delete mode 100644 src/components/table_kv/src/tests.rs delete mode 100644 src/components/test_util/Cargo.toml delete mode 100644 src/components/test_util/src/lib.rs delete mode 100644 src/components/time_ext/Cargo.toml delete mode 100644 src/components/time_ext/src/lib.rs delete mode 100644 src/components/timed_task/Cargo.toml delete mode 100644 src/components/timed_task/src/lib.rs delete mode 100644 src/components/toml_ext/Cargo.toml delete mode 100644 src/components/toml_ext/src/lib.rs delete mode 100644 src/components/trace_metric/Cargo.toml delete mode 100644 src/components/trace_metric/src/collector.rs delete mode 100644 src/components/trace_metric/src/lib.rs delete mode 100644 src/components/trace_metric/src/metric.rs delete mode 100644 src/components/trace_metric_derive/Cargo.toml delete mode 100644 src/components/trace_metric_derive/src/builder.rs delete mode 100644 src/components/trace_metric_derive/src/lib.rs delete mode 100644 src/components/trace_metric_derive_tests/Cargo.toml delete mode 100644 src/components/trace_metric_derive_tests/src/lib.rs delete mode 100644 src/components/tracing_util/Cargo.toml delete mode 100644 src/components/tracing_util/src/lib.rs delete mode 100644 src/components/tracing_util/src/logging.rs delete mode 100644 src/df_engine_extensions/Cargo.toml delete mode 100644 src/df_engine_extensions/src/codec.rs delete mode 100644 src/df_engine_extensions/src/dist_sql_query/codec.rs delete mode 100644 src/df_engine_extensions/src/dist_sql_query/mod.rs delete mode 100644 src/df_engine_extensions/src/dist_sql_query/physical_plan.rs delete mode 100644 src/df_engine_extensions/src/dist_sql_query/resolver.rs delete mode 100644 src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__aggr_push_down.snap delete mode 100644 src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_partitioned_scan.snap delete mode 100644 src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_sub_scan.snap delete mode 100644 src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__compounded_aggr_push_down.snap delete mode 100644 src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__node_with_multiple_partitioned_scan_children.snap delete mode 100644 src/df_engine_extensions/src/dist_sql_query/test_util.rs delete mode 100644 src/df_engine_extensions/src/lib.rs delete mode 100644 src/df_engine_extensions/src/metrics.rs delete mode 100644 src/df_operator/Cargo.toml delete mode 100644 src/df_operator/src/aggregate.rs delete mode 100644 src/df_operator/src/functions.rs delete mode 100644 src/df_operator/src/lib.rs delete mode 100644 src/df_operator/src/registry.rs delete mode 100644 src/df_operator/src/scalar.rs delete mode 100644 src/df_operator/src/udaf.rs delete mode 100644 src/df_operator/src/udfs/mod.rs delete mode 100644 src/df_operator/src/udfs/thetasketch_distinct.rs delete mode 100644 src/df_operator/src/udfs/time_bucket.rs delete mode 100644 src/df_operator/src/visitor.rs delete mode 100644 src/horaedb/Cargo.toml delete mode 100644 src/horaedb/bin/horaedb-server.rs delete mode 100644 src/horaedb/build.rs delete mode 100644 src/horaedb/src/config.rs delete mode 100644 src/horaedb/src/lib.rs delete mode 100644 src/horaedb/src/setup.rs delete mode 100644 src/horaedb/src/signal_handler.rs delete mode 100644 src/interpreters/Cargo.toml delete mode 100644 src/interpreters/src/alter_table.rs delete mode 100644 src/interpreters/src/context.rs delete mode 100644 src/interpreters/src/create.rs delete mode 100644 src/interpreters/src/describe.rs delete mode 100644 src/interpreters/src/drop.rs delete mode 100644 src/interpreters/src/exists.rs delete mode 100644 src/interpreters/src/factory.rs delete mode 100644 src/interpreters/src/insert.rs delete mode 100644 src/interpreters/src/interpreter.rs delete mode 100644 src/interpreters/src/lib.rs delete mode 100644 src/interpreters/src/metrics.rs delete mode 100644 src/interpreters/src/select.rs delete mode 100644 src/interpreters/src/show.rs delete mode 100644 src/interpreters/src/show_create.rs delete mode 100644 src/interpreters/src/table_manipulator/catalog_based.rs delete mode 100644 src/interpreters/src/table_manipulator/meta_based.rs delete mode 100644 src/interpreters/src/table_manipulator/mod.rs delete mode 100644 src/interpreters/src/tests.rs delete mode 100644 src/interpreters/src/validator.rs delete mode 100644 src/meta_client/Cargo.toml delete mode 100644 src/meta_client/src/lib.rs delete mode 100644 src/meta_client/src/load_balance.rs delete mode 100644 src/meta_client/src/meta_impl.rs delete mode 100644 src/meta_client/src/types.rs rename {horaedb => src}/metric_engine/Cargo.toml (98%) rename {horaedb => src}/metric_engine/src/compaction/mod.rs (100%) rename {horaedb => src}/metric_engine/src/compaction/picker.rs (100%) rename {horaedb => src}/metric_engine/src/compaction/scheduler.rs (100%) rename {horaedb => src}/metric_engine/src/error.rs (100%) rename {horaedb => src}/metric_engine/src/lib.rs (100%) rename {horaedb => src}/metric_engine/src/macros.rs (67%) rename {horaedb => src}/metric_engine/src/manifest.rs (99%) rename {horaedb => src}/metric_engine/src/operator.rs (99%) rename {horaedb => src}/metric_engine/src/read.rs (100%) rename {horaedb => src}/metric_engine/src/sst.rs (99%) rename {horaedb => src}/metric_engine/src/storage.rs (99%) rename {horaedb => src}/metric_engine/src/test_util.rs (100%) rename {horaedb => src}/metric_engine/src/types.rs (100%) delete mode 100644 src/partition_table_engine/Cargo.toml delete mode 100644 src/partition_table_engine/src/error.rs delete mode 100644 src/partition_table_engine/src/lib.rs delete mode 100644 src/partition_table_engine/src/metrics.rs delete mode 100644 src/partition_table_engine/src/partition.rs delete mode 100644 src/partition_table_engine/src/scan_builder.rs delete mode 100644 src/partition_table_engine/src/test_util.rs rename {horaedb => src}/pb_types/Cargo.toml (100%) rename {horaedb => src}/pb_types/build.rs (100%) rename {horaedb => src}/pb_types/protos/sst.proto (100%) rename {horaedb => src}/pb_types/src/lib.rs (100%) delete mode 100644 src/proxy/Cargo.toml delete mode 100644 src/proxy/src/auth/mod.rs delete mode 100644 src/proxy/src/auth/with_file.rs delete mode 100644 src/proxy/src/context.rs delete mode 100644 src/proxy/src/error.rs delete mode 100644 src/proxy/src/error_util.rs delete mode 100644 src/proxy/src/forward.rs delete mode 100644 src/proxy/src/grpc/mod.rs delete mode 100644 src/proxy/src/grpc/prom_query.rs delete mode 100644 src/proxy/src/grpc/route.rs delete mode 100644 src/proxy/src/grpc/sql_query.rs delete mode 100644 src/proxy/src/grpc/write.rs delete mode 100644 src/proxy/src/handlers/admin.rs delete mode 100644 src/proxy/src/handlers/error.rs delete mode 100644 src/proxy/src/handlers/mod.rs delete mode 100644 src/proxy/src/hotspot.rs delete mode 100644 src/proxy/src/hotspot_lru.rs delete mode 100644 src/proxy/src/http/mod.rs delete mode 100644 src/proxy/src/http/prom.rs delete mode 100644 src/proxy/src/http/route.rs delete mode 100644 src/proxy/src/http/sql.rs delete mode 100644 src/proxy/src/influxdb/mod.rs delete mode 100644 src/proxy/src/influxdb/types.rs delete mode 100644 src/proxy/src/instance.rs delete mode 100644 src/proxy/src/lib.rs delete mode 100644 src/proxy/src/limiter.rs delete mode 100644 src/proxy/src/metrics.rs delete mode 100644 src/proxy/src/opentsdb/mod.rs delete mode 100644 src/proxy/src/opentsdb/types.rs delete mode 100644 src/proxy/src/read.rs delete mode 100644 src/proxy/src/schema_config_provider/cluster_based.rs delete mode 100644 src/proxy/src/schema_config_provider/config_based.rs delete mode 100644 src/proxy/src/schema_config_provider/mod.rs delete mode 100644 src/proxy/src/util.rs delete mode 100644 src/proxy/src/write.rs delete mode 100644 src/query_engine/Cargo.toml delete mode 100644 src/query_engine/src/config.rs delete mode 100644 src/query_engine/src/context.rs delete mode 100644 src/query_engine/src/datafusion_impl/executor.rs delete mode 100644 src/query_engine/src/datafusion_impl/logical_optimizer/tests.rs delete mode 100644 src/query_engine/src/datafusion_impl/mod.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_optimizer/coalesce_batches.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_optimizer/mod.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_plan.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_plan_extension/mod.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_planner.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_planner_extension/mod.rs delete mode 100644 src/query_engine/src/datafusion_impl/physical_planner_extension/prom_align.rs delete mode 100644 src/query_engine/src/datafusion_impl/task_context.rs delete mode 100644 src/query_engine/src/error.rs delete mode 100644 src/query_engine/src/executor.rs delete mode 100644 src/query_engine/src/lib.rs delete mode 100644 src/query_engine/src/physical_planner.rs delete mode 100644 src/query_frontend/Cargo.toml delete mode 100644 src/query_frontend/src/ast.rs delete mode 100644 src/query_frontend/src/config.rs delete mode 100644 src/query_frontend/src/container.rs delete mode 100644 src/query_frontend/src/datafusion_util.rs delete mode 100644 src/query_frontend/src/frontend.rs delete mode 100644 src/query_frontend/src/influxql/mod.rs delete mode 100644 src/query_frontend/src/influxql/planner.rs delete mode 100644 src/query_frontend/src/lib.rs delete mode 100644 src/query_frontend/src/logical_optimizer/mod.rs delete mode 100644 src/query_frontend/src/logical_optimizer/type_conversion.rs delete mode 100644 src/query_frontend/src/opentsdb/mod.rs delete mode 100644 src/query_frontend/src/opentsdb/types.rs delete mode 100644 src/query_frontend/src/parser.rs delete mode 100644 src/query_frontend/src/partition.rs delete mode 100644 src/query_frontend/src/plan.rs delete mode 100644 src/query_frontend/src/planner.rs delete mode 100644 src/query_frontend/src/promql.rs delete mode 100644 src/query_frontend/src/promql/convert.rs delete mode 100644 src/query_frontend/src/promql/datafusion_util.rs delete mode 100644 src/query_frontend/src/promql/error.rs delete mode 100644 src/query_frontend/src/promql/pushdown.rs delete mode 100644 src/query_frontend/src/promql/remote.rs delete mode 100644 src/query_frontend/src/promql/udf.rs delete mode 100644 src/query_frontend/src/provider.rs delete mode 100644 src/query_frontend/src/tests.rs delete mode 100644 src/remote_engine_client/Cargo.toml delete mode 100644 src/remote_engine_client/src/cached_router.rs delete mode 100644 src/remote_engine_client/src/channel.rs delete mode 100644 src/remote_engine_client/src/client.rs delete mode 100644 src/remote_engine_client/src/config.rs delete mode 100644 src/remote_engine_client/src/lib.rs delete mode 100644 src/remote_engine_client/src/status_code.rs delete mode 100644 src/router/Cargo.toml delete mode 100644 src/router/src/cluster_based.rs delete mode 100644 src/router/src/endpoint.rs delete mode 100644 src/router/src/hash.rs delete mode 100644 src/router/src/lib.rs delete mode 100644 src/router/src/rule_based.rs delete mode 100644 src/server/src/config.rs delete mode 100644 src/server/src/consts.rs delete mode 100644 src/server/src/error_util.rs delete mode 100644 src/server/src/federated.rs delete mode 100644 src/server/src/grpc/compaction_service/error.rs delete mode 100644 src/server/src/grpc/compaction_service/mod.rs delete mode 100644 src/server/src/grpc/meta_event_service/error.rs delete mode 100644 src/server/src/grpc/meta_event_service/mod.rs delete mode 100644 src/server/src/grpc/metrics.rs delete mode 100644 src/server/src/grpc/mod.rs delete mode 100644 src/server/src/grpc/remote_engine_service/error.rs delete mode 100644 src/server/src/grpc/remote_engine_service/metrics.rs delete mode 100644 src/server/src/grpc/remote_engine_service/mod.rs delete mode 100644 src/server/src/grpc/storage_service/error.rs delete mode 100644 src/server/src/grpc/storage_service/header.rs delete mode 100644 src/server/src/grpc/storage_service/mod.rs delete mode 100644 src/server/src/http.rs delete mode 100644 src/server/src/lib.rs delete mode 100644 src/server/src/local_tables.rs rename {horaedb => src}/server/src/main.rs (100%) delete mode 100644 src/server/src/metrics.rs delete mode 100644 src/server/src/mysql/builder.rs delete mode 100644 src/server/src/mysql/error.rs delete mode 100644 src/server/src/mysql/mod.rs delete mode 100644 src/server/src/mysql/service.rs delete mode 100644 src/server/src/mysql/worker.rs delete mode 100644 src/server/src/mysql/writer.rs delete mode 100644 src/server/src/postgresql/builder.rs delete mode 100644 src/server/src/postgresql/error.rs delete mode 100644 src/server/src/postgresql/handler.rs delete mode 100644 src/server/src/postgresql/mod.rs delete mode 100644 src/server/src/postgresql/service.rs delete mode 100644 src/server/src/server.rs delete mode 100644 src/server/src/session.rs delete mode 100644 src/system_catalog/Cargo.toml delete mode 100644 src/system_catalog/src/lib.rs delete mode 100644 src/system_catalog/src/sys_catalog_table.rs delete mode 100644 src/system_catalog/src/tables.rs delete mode 100644 src/table_engine/Cargo.toml delete mode 100644 src/table_engine/src/engine.rs delete mode 100644 src/table_engine/src/lib.rs delete mode 100644 src/table_engine/src/memory.rs delete mode 100644 src/table_engine/src/partition/mod.rs delete mode 100644 src/table_engine/src/partition/rule/df_adapter/extractor.rs delete mode 100644 src/table_engine/src/partition/rule/df_adapter/mod.rs delete mode 100644 src/table_engine/src/partition/rule/factory.rs delete mode 100644 src/table_engine/src/partition/rule/filter.rs delete mode 100644 src/table_engine/src/partition/rule/key.rs delete mode 100644 src/table_engine/src/partition/rule/mod.rs delete mode 100644 src/table_engine/src/partition/rule/random.rs delete mode 100644 src/table_engine/src/predicate.rs delete mode 100644 src/table_engine/src/provider.rs delete mode 100644 src/table_engine/src/proxy.rs delete mode 100644 src/table_engine/src/remote/mod.rs delete mode 100644 src/table_engine/src/remote/model.rs delete mode 100644 src/table_engine/src/stream.rs delete mode 100644 src/table_engine/src/table.rs delete mode 100644 src/tools/Cargo.toml delete mode 100644 src/tools/src/bin/sst-convert.rs delete mode 100644 src/tools/src/bin/sst-metadata.rs delete mode 100644 src/tools/src/bin/wal-reader.rs delete mode 100644 src/tools/src/lib.rs delete mode 100644 src/tools/src/sst_util.rs delete mode 100644 src/wal/Cargo.toml delete mode 100644 src/wal/src/config.rs delete mode 100644 src/wal/src/dummy.rs delete mode 100644 src/wal/src/kv_encoder.rs delete mode 100644 src/wal/src/lib.rs delete mode 100644 src/wal/src/local_storage_impl/config.rs delete mode 100644 src/wal/src/local_storage_impl/mod.rs delete mode 100644 src/wal/src/local_storage_impl/record_encoding.rs delete mode 100644 src/wal/src/local_storage_impl/segment.rs delete mode 100644 src/wal/src/local_storage_impl/wal_manager.rs delete mode 100644 src/wal/src/log_batch.rs delete mode 100644 src/wal/src/manager.rs delete mode 100644 src/wal/src/message_queue_impl/config.rs delete mode 100644 src/wal/src/message_queue_impl/encoding.rs delete mode 100644 src/wal/src/message_queue_impl/log_cleaner.rs delete mode 100644 src/wal/src/message_queue_impl/mod.rs delete mode 100644 src/wal/src/message_queue_impl/namespace.rs delete mode 100644 src/wal/src/message_queue_impl/region.rs delete mode 100644 src/wal/src/message_queue_impl/region_context.rs delete mode 100644 src/wal/src/message_queue_impl/snapshot_synchronizer.rs delete mode 100644 src/wal/src/message_queue_impl/test_util.rs delete mode 100644 src/wal/src/message_queue_impl/wal.rs delete mode 100644 src/wal/src/metrics.rs delete mode 100644 src/wal/src/rocksdb_impl/config.rs delete mode 100644 src/wal/src/rocksdb_impl/manager.rs delete mode 100644 src/wal/src/rocksdb_impl/mod.rs delete mode 100644 src/wal/src/table_kv_impl/config.rs delete mode 100644 src/wal/src/table_kv_impl/encoding.rs delete mode 100644 src/wal/src/table_kv_impl/mod.rs delete mode 100644 src/wal/src/table_kv_impl/model.rs delete mode 100644 src/wal/src/table_kv_impl/namespace.rs delete mode 100644 src/wal/src/table_kv_impl/table_unit.rs delete mode 100644 src/wal/src/table_kv_impl/wal.rs delete mode 100644 src/wal/tests/read_write.rs diff --git a/.asf.yaml b/.asf.yaml index 0040c45289..e9467e642d 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -23,10 +23,9 @@ github: homepage: https://horaedb.apache.org labels: - rust - - sql - - database - distributed-database - cloud-native + - prometheus-remote-storage - tsdb - timeseries-database - timeseries-analysis @@ -41,6 +40,10 @@ github: required_pull_request_reviews: dismiss_stale_reviews: false required_approving_review_count: 1 + analytic-engine: + required_pull_request_reviews: + dismiss_stale_reviews: false + required_approving_review_count: 1 protected_tags: [] notifications: diff --git a/.github/actions-rs/gcrov.yml b/.github/actions-rs/gcrov.yml deleted file mode 100644 index ee99ff6f2d..0000000000 --- a/.github/actions-rs/gcrov.yml +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ignore-not-existing: true -output-type: lcov -output-path: ./lcov.info diff --git a/.github/codecov.yml b/.github/codecov.yml deleted file mode 100644 index 7697d5bc8f..0000000000 --- a/.github/codecov.yml +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ignore: - - "benchmarks" - - "integration_test" - - "meta_client" - - "remote_engine_client" - - "src/wal/src/message_queue_impl" - - "src/wal/src/table_kv_impl" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e4b6cc4a9..c88221af7a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,33 +23,30 @@ on: push: branches: - main + - analytic-engine - dev paths: - - 'src/**' - - 'integration_tests/**' + - '**.rs' - 'Cargo.toml' - 'Cargo.lock' - '.github/workflows/ci.yml' - 'licenserc.toml' pull_request: paths: - - 'src/**' - - 'integration_tests/**' + - '**.rs' - 'Cargo.toml' - 'Cargo.lock' - '.github/workflows/ci.yml' -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - # Common environment variables env: RUSTFLAGS: "-C debuginfo=1" CARGO_TERM_COLOR: always RUST_BACKTRACE: "1" - LOCK_FILE: Cargo.lock - RUST_VERSION: nightly-2024-01-28 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: style-check: @@ -57,12 +54,9 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 60 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - name: Release Disk Quota run: | sudo make ensure-disk-quota @@ -72,28 +66,22 @@ jobs: sudo apt install --yes protobuf-compiler - name: Install check binaries run: | - rustup component add clippy - rustup component add rustfmt cargo install --git https://github.com/DevinR528/cargo-sort --rev 55ec890 --locked - curl --proto '=https' --tlsv1.2 -LsSf https://github.com/korandoru/hawkeye/releases/download/v5.8.1/hawkeye-installer.sh | sh - name: Run Style Check run: | - make fmt - make check-cargo-toml - make check-asf-header - make clippy + make fmt sort clippy + - name: Check lock + run: | + git diff --exit-code unit-test: name: unit-test runs-on: ubuntu-latest timeout-minutes: 60 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - name: Release Disk Quota run: | sudo make ensure-disk-quota @@ -101,176 +89,9 @@ jobs: run: | sudo apt update sudo apt install --yes protobuf-compiler - - name: Backup Lock File - run: | - cp ${LOCK_FILE} ${LOCK_FILE}.bak - name: Run Unit Tests run: | - make test-ut - echo "Checking if ${LOCK_FILE} has changed..." - - name: Check Lock File - run: | - diff ${LOCK_FILE} ${LOCK_FILE}.bak - - integration-test: - name: integration-test - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - uses: actions/setup-go@v3 - with: - go-version: 1.21 - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - - name: Release Disk Quota - run: | - sudo make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Run integration tests - run: | - make integration-test - env: - RUST_BACKTRACE: "1" - - name: Upload Logs - if: always() - uses: actions/upload-artifact@v3 - with: - name: CI-${{ github.sha }} - path: | - /tmp/horaedb-stdout.log - /tmp/horaemeta-stdout.log - /tmp/horaedb-stdout-0.log - /tmp/horaedb-stdout-1.log - - sdk-test: - name: sdk-test - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - uses: actions/setup-go@v3 - with: - go-version: 1.21 - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - - name: Release Disk Quota - run: | - sudo make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Build and Run HoraeDB Cluster - working-directory: integration_tests - run: | - make prepare - make run-horaemeta - make run-horaedb-cluster - - name: Run Go SDK tests - working-directory: integration_tests - run: | - make run-go - - name: Run Java SDK tests - working-directory: integration_tests - run: | - make run-java - - name: Run Rust SDK tests - working-directory: integration_tests - run: | - make run-rust - - name: Run MySQL client tests - working-directory: integration_tests - run: | - make run-mysql - - name: Run PostgreSQL client tests - working-directory: integration_tests - run: | - make run-postgresql - - name: Run Prometheus query tests - working-directory: integration_tests - run: | - make run-prom - - name: Run OpenTSDB tests - working-directory: integration_tests - run: | - make run-opentsdb - - name: Upload Logs - if: always() - uses: actions/upload-artifact@v3 - with: - name: sdk-test-${{ github.sha }} - path: | - /tmp/horaedb-stdout.log - - recovery-test: - name: recovery-test - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - - name: Release Disk Quota - run: | - sudo make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Run recovery tests - working-directory: integration_tests - run: | - make run-recovery - - name: Upload Logs - if: always() - uses: actions/upload-artifact@v3 - with: - name: recovery-test-${{ github.sha }} - path: | - /tmp/horaedb-stdout.log - - dist-query-test: - name: dist-query-test - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - uses: actions/setup-go@v3 - with: - go-version: 1.21 - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - - name: Release Disk Quota - run: | - sudo make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Run dist query tests - working-directory: integration_tests + make test + - name: Check lock run: | - make run-dist-query - - name: Upload Logs - if: always() - uses: actions/upload-artifact@v3 - with: - name: dist-query-test-${{ github.sha }} - path: | - /tmp/horaedb-stdout.log + git diff --exit-code diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index 1f91451aef..0000000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: coverage - -on: - workflow_dispatch: - pull_request: - paths-ignore: - - 'docs/**' - - 'etc/**' - - '**.md' - - '.dockerignore' - - 'docker/**' - schedule: - - cron: '14 1/3 * * *' - -# Common environment variables -env: - RUSTFLAGS: "-C debuginfo=1" - CARGO_TERM_COLOR: always - RUST_BACKTRACE: "1" - RUST_VERSION: nightly-2024-01-28 - -jobs: - coverage: - name: coverage - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - name: Setup Rust toolchain - run: | - rustup toolchain install ${{ env.RUST_VERSION }} - - name: Release Disk Quota - run: | - sudo rm -rf /usr/local/lib/android # release about 10 GB - sudo rm -rf /usr/share/dotnet # release about 20GB - - name: Ensure Disk Quota - run: | - make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Install cargo-llvm-cov - run: cargo install cargo-llvm-cov --version=0.5.9 - - name: Generate code coverage - run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info - - name: Report coverage - continue-on-error: true - run: bash <(curl -s https://codecov.io/bash) - diff --git a/.github/workflows/meta-ci.yml b/.github/workflows/meta-ci.yml deleted file mode 100644 index dcbfcc7922..0000000000 --- a/.github/workflows/meta-ci.yml +++ /dev/null @@ -1,71 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: HoraeMeta CI - -on: - push: - branches: - - main - paths: - - 'horaemeta/**' - - '.github/workflows/meta-ci.yml' - pull_request: - paths: - - 'horaemeta/**' - - '.github/workflows/meta-ci.yml' - workflow_dispatch: - -jobs: - style-check: - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v3 - with: - go-version: 1.21.3 - - working-directory: ./horaemeta - run: | - make install-tools - make check - - unit-test: - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v3 - with: - go-version: 1.21.3 - - working-directory: ./horaemeta - run: | - make install-tools - make test - - integration-test: - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v3 - with: - go-version: 1.21.3 - - working-directory: ./integration_tests - run: | - sudo apt install -y protobuf-compiler - make run diff --git a/.github/workflows/metric-engine-ci.yml b/.github/workflows/metric-engine-ci.yml deleted file mode 100644 index cad7d3ab8d..0000000000 --- a/.github/workflows/metric-engine-ci.yml +++ /dev/null @@ -1,99 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Metric Engine CI - -on: - merge_group: - workflow_dispatch: - push: - branches: - - main - - dev - paths: - - 'horaedb/**' - pull_request: - paths: - - 'horaedb/**' - -# Common environment variables -env: - RUSTFLAGS: "-C debuginfo=1" - CARGO_TERM_COLOR: always - RUST_BACKTRACE: "1" - LOCK_FILE: Cargo.lock - RUST_VERSION: nightly-2024-01-28 - -jobs: - style-check: - name: style-check - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - - name: Release Disk Quota - run: | - sudo make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Install check binaries - run: | - rustup component add clippy - rustup component add rustfmt - cargo install --git https://github.com/DevinR528/cargo-sort --rev 55ec890 --locked - - name: Run Style Check - working-directory: horaedb - run: | - make fmt sort clippy - - name: Check lock - working-directory: horaedb - run: | - git diff --exit-code - - unit-test: - name: unit-test - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - run: | - rustup set auto-self-update disable - rustup toolchain install ${RUST_VERSION} --profile minimal - - name: Release Disk Quota - run: | - sudo make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler - - name: Run Unit Tests - working-directory: horaedb - run: | - make test - - name: Check lock - working-directory: horaedb - run: | - git diff --exit-code diff --git a/.github/workflows/publish-image.yml b/.github/workflows/publish-image.yml deleted file mode 100644 index be97587c5e..0000000000 --- a/.github/workflows/publish-image.yml +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Publish Docker image - -on: - workflow_dispatch: - inputs: - version: - description: Version to release - required: true - -jobs: - horaemeta: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - ref: refs/tags/${{ inputs.version }} - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and Push HoraeMeta Server Docker Image - uses: docker/build-push-action@v3 - with: - context: horaemeta - push: true - tags: | - apache/horaemeta-server:${{ inputs.version }} - ${{ contains(inputs.version.toLowerCase(), 'rc') == false && 'apache/horaemeta-server:latest' || '' }} - - horaedb: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - ref: refs/tags/${{ inputs.version }} - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and Push HoraeDB Server Docker Image - uses: docker/build-push-action@v3 - with: - context: . - push: true - tags: | - apache/horaedb-server:${{ inputs.version }} - ${{ contains(inputs.version.toLowerCase(), 'rc') == false && 'apache/horaedb-server:latest' || '' }} diff --git a/.github/workflows/publish-nightly-image.yml b/.github/workflows/publish-nightly-image.yml deleted file mode 100644 index 13e1c69d06..0000000000 --- a/.github/workflows/publish-nightly-image.yml +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Publish Nightly Docker image - -on: - workflow_dispatch: - schedule: - - cron: '10 20 * * *' - -env: - REGISTRY: ghcr.io - IMAGE_NAME: apache/horaedb-server - -jobs: - docker: - if: github.repository_owner == 'apache' - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Login to Container Registry - uses: docker/login-action@v2 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Set Environment Variables - run: | - echo "BUILD_DATE=$(TZ=':Asia/Shanghai' date '+%Y%m%d')" >> $GITHUB_ENV - echo "SHORT_SHA=`echo ${GITHUB_SHA} | cut -c1-8`" >> $GITHUB_ENV - - name: Build and Push Docker Image - uses: docker/build-push-action@v3 - with: - context: . - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly-${{ env.BUILD_DATE }}-${{ env.SHORT_SHA }} diff --git a/.github/workflows/test-build-image.yml b/.github/workflows/test-build-image.yml deleted file mode 100644 index 6efb49b8da..0000000000 --- a/.github/workflows/test-build-image.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Test HoraeDB Docker image build - -on: - workflow_dispatch: - pull_request: - paths: - - '.github/workflows/**' - - 'Dockerfile' - - 'docker/**' - - 'docs/minimal.toml' - push: - branches: - - main - paths: - - '.github/workflows/**' - - 'Dockerfile' - - 'docker/**' - - 'docs/minimal.toml' - -env: - HORAEDB_ADDR: 127.0.0.1 - HORAEDB_PORT: 5440 - IMAGE_NAME: horaedb-server:latest - SERVER_NAME: standalone-server - -jobs: - docker: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Release Disk Quota - run: | - sudo rm -rf /usr/local/lib/android # release about 10 GB - sudo rm -rf /usr/share/dotnet # release about 20GB - - name: Build Docker image - run: docker build -t ${IMAGE_NAME} . - - name: Test the Built Image - run: | - docker run --name ${SERVER_NAME} -p ${HORAEDB_ADDR}:${HORAEDB_PORT}:${HORAEDB_PORT} -d ${IMAGE_NAME} - sleep 10 - bash ./docker/basic.sh - docker rm -f ${SERVER_NAME} - - name: Test the Built Image With Config - run: | - docker run --name ${SERVER_NAME} -p ${HORAEDB_ADDR}:${HORAEDB_PORT}:${HORAEDB_PORT} \ - -v `pwd`/docs/minimal.toml:/etc/horaedb/horaedb.toml -d ${IMAGE_NAME} - sleep 10 - bash ./docker/basic.sh - docker rm -f ${SERVER_NAME} diff --git a/.github/workflows/tsbs.yml b/.github/workflows/tsbs.yml deleted file mode 100644 index 7216edbc44..0000000000 --- a/.github/workflows/tsbs.yml +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: TSBS Benchmark - -on: - workflow_dispatch: - pull_request: - paths: - - '.github/workflows/**' - - 'scrits/run-tsbs.sh' - paths-ignore: - schedule: - - cron: '2 0 * * *' - -jobs: - run-tsbs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - name: Release Disk Quota - run: | - sudo rm -rf /usr/local/lib/android # release about 10 GB - sudo rm -rf /usr/share/dotnet # release about 20GB - - name: Cache Rust Dependencies - uses: Swatinem/rust-cache@v2 - - name: Ensure Disk Quota - run: | - make ensure-disk-quota - - name: Setup Build Environment - run: | - sudo apt update - sudo apt install --yes protobuf-compiler liblzma-dev - - name: Build server - run: | - make build - - name: Run TSBS - run: | - ./scripts/run-tsbs.sh - echo "NOW=$(TZ=':Asia/Shanghai' date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_ENV - - name: Update Summary - run: | - cat tsbs/result.md >> $GITHUB_STEP_SUMMARY - - uses: actions/upload-artifact@v3 - with: - name: bench-${{ env.NOW }} - path: | - logs/** - tsbs/result.md diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index c413bc677a..0000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "parquet-testing"] - path = src/components/parquet-testing - url = https://github.com/apache/parquet-testing.git diff --git a/Cargo.lock b/Cargo.lock index 273a89683d..d484dea6dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,51 +1,41 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" -version = "0.19.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "const-random", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "1.0.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -65,82 +55,11 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "alloc_tracker" -version = "2.2.0-dev" - [[package]] name = "allocator-api2" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - -[[package]] -name = "analytic_engine" -version = "2.2.0-dev" -dependencies = [ - "anyhow", - "arc-swap 1.6.0", - "arena", - "arrow 49.0.0", - "async-scoped", - "async-stream", - "async-trait", - "atomic_enum", - "base64 0.13.1", - "bytes_ext", - "cluster", - "codec", - "common_types", - "datafusion", - "env_logger", - "future_ext", - "futures 0.3.28", - "generic_error", - "hash_ext", - "hex", - "horaedbproto 2.0.0", - "hyperloglog", - "id_allocator", - "itertools 0.10.5", - "lazy_static", - "logger", - "lru 0.7.8", - "macros", - "message_queue", - "meta_client", - "metric_ext", - "object_store 2.2.0-dev", - "parquet", - "parquet_ext", - "pin-project-lite", - "prometheus 0.12.0", - "prost 0.11.8", - "rand 0.8.5", - "remote_engine_client", - "reqwest 0.12.4", - "router", - "runtime", - "sampling_cache", - "serde", - "serde_json", - "size_ext", - "skiplist", - "smallvec", - "snafu 0.6.10", - "table_engine", - "table_kv", - "tempfile", - "test_util", - "thiserror", - "time_ext", - "tokio", - "tonic 0.8.3", - "trace_metric", - "url", - "wal", - "xorfilter-rs", -] +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" [[package]] name = "android-tzdata" @@ -157,305 +76,169 @@ dependencies = [ "libc", ] -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anstream" -version = "0.6.13" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.1" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.3" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.2" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.70" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" -dependencies = [ - "backtrace", -] - -[[package]] -name = "arc-swap" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dabe5a181f83789739c194cbe5a897dde195078fac08568d09221fd6137a7ba8" - -[[package]] -name = "arc-swap" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" - -[[package]] -name = "arena" -version = "2.2.0-dev" - -[[package]] -name = "array-init" -version = "2.1.0" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" [[package]] name = "arrayref" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" +checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "arrow" -version = "38.0.0" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c107a57b5913d852da9d5a40e280e4695f2258b5b87733c13b770c63a7117287" -dependencies = [ - "ahash 0.8.3", - "arrow-arith 38.0.0", - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-cast 38.0.0", - "arrow-csv 38.0.0", - "arrow-data 38.0.0", - "arrow-ipc 38.0.0", - "arrow-json 38.0.0", - "arrow-ord 38.0.0", - "arrow-row 38.0.0", - "arrow-schema 38.0.0", - "arrow-select 38.0.0", - "arrow-string 38.0.0", -] +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" -dependencies = [ - "ahash 0.8.3", - "arrow-arith 49.0.0", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-csv 49.0.0", - "arrow-data 49.0.0", - "arrow-ipc 49.0.0", - "arrow-json 49.0.0", - "arrow-ord 49.0.0", - "arrow-row 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", - "arrow-string 49.0.0", -] - -[[package]] -name = "arrow-arith" -version = "38.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace6aa3d5617c5d03041a05e01c6819428a8ddf49dd0b055df9b40fef9d96094" +checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "chrono", - "half 2.2.1", - "num", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", ] [[package]] name = "arrow-arith" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" -dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "chrono", - "half 2.2.1", - "num", -] - -[[package]] -name = "arrow-array" -version = "38.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "104a04520692cc674e6afd7682f213ca41f9b13ff1873f63a5a2857a590b87b3" +checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" dependencies = [ - "ahash 0.8.3", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", - "half 2.2.1", - "hashbrown 0.13.2", + "half", "num", ] [[package]] name = "arrow-array" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" dependencies = [ - "ahash 0.8.3", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "chrono-tz", - "half 2.2.1", - "hashbrown 0.14.0", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72c875bcb9530ec403998fb0b2dc6d180a7c64563ca4bc22b90eafb84b113143" -dependencies = [ - "half 2.2.1", + "half", + "hashbrown", "num", ] [[package]] name = "arrow-buffer" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" dependencies = [ "bytes", - "half 2.2.1", - "num", -] - -[[package]] -name = "arrow-cast" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6d6e18281636c8fc0b93be59834da6bf9a72bb70fd0c98ddfdaf124da466c28" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "arrow-select 38.0.0", - "chrono", - "lexical-core", + "half", "num", ] [[package]] name = "arrow-cast" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", - "base64 0.21.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", "chrono", - "comfy-table 7.0.1", - "half 2.2.1", + "comfy-table", + "half", "lexical-core", "num", + "ryu", ] [[package]] name = "arrow-csv" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3197dab0963a236ff8e7c82e2272535745955ac1321eb740c29f2f88b353f54e" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-cast 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-csv" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" +checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "csv", "csv-core", @@ -466,90 +249,45 @@ dependencies = [ [[package]] name = "arrow-data" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb68113d6ecdbe8bba48b2c4042c151bf9e1c61244e45072a50250a6fc59bafe" -dependencies = [ - "arrow-buffer 38.0.0", - "arrow-schema 38.0.0", - "half 2.2.1", - "num", -] - -[[package]] -name = "arrow-data" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" dependencies = [ - "arrow-buffer 49.0.0", - "arrow-schema 49.0.0", - "half 2.2.1", + "arrow-buffer", + "arrow-schema", + "half", "num", ] [[package]] name = "arrow-ipc" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eab4bbf2dd3078facb5ce0a9641316a64f42bfd8cf357e6775c8a5e6708e3a8d" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-cast 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "flatbuffers", -] - -[[package]] -name = "arrow-ipc" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "flatbuffers", + "lz4_flex", ] [[package]] name = "arrow-json" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c5b650d23746a494665d914a7fa3d21d939153cff9d53bdebe39bffa88f263" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-cast 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "chrono", - "half 2.2.1", - "indexmap 1.9.3", - "lexical-core", - "num", - "serde", - "serde_json", -] - -[[package]] -name = "arrow-json" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" +checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", - "half 2.2.1", - "indexmap 2.0.0", + "half", + "indexmap", "lexical-core", "num", "serde", @@ -558,166 +296,75 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68c6fce28e5011e30acc7466b5efcb8ed0197c396240bd2b10e167f275a3c208" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "arrow-select 38.0.0", - "half 2.2.1", - "num", -] - -[[package]] -name = "arrow-ord" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", - "half 2.2.1", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", "num", ] [[package]] name = "arrow-row" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f20a421f19799d8b93eb8edde5217e910fa1e2d6ceb3c529f000e57b6db144c0" -dependencies = [ - "ahash 0.8.3", - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "half 2.2.1", - "hashbrown 0.13.2", -] - -[[package]] -name = "arrow-row" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" dependencies = [ - "ahash 0.8.3", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "half 2.2.1", - "hashbrown 0.14.0", + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", ] [[package]] name = "arrow-schema" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc85923d8d6662cc66ac6602c7d1876872e671002d60993dfdf492a6badeae92" - -[[package]] -name = "arrow-schema" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" - -[[package]] -name = "arrow-select" -version = "38.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ab6613ce65b61d85a3410241744e84e48fbab0fe06e1251b4429d21b3470fd" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "num", -] +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" [[package]] name = "arrow-select" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" dependencies = [ - "ahash 0.8.3", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "num", ] [[package]] name = "arrow-string" -version = "38.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3008641239e884aefba66d8b8532da6af40d14296349fcc85935de4ba67b89e" -dependencies = [ - "arrow-array 38.0.0", - "arrow-buffer 38.0.0", - "arrow-data 38.0.0", - "arrow-schema 38.0.0", - "arrow-select 38.0.0", - "regex", - "regex-syntax 0.6.29", -] - -[[package]] -name = "arrow-string" -version = "49.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" +checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", "num", "regex", - "regex-syntax 0.8.2", -] - -[[package]] -name = "arrow_ext" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "serde", - "snafu 0.6.10", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "arrow_util" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "ahash 0.8.3", - "arrow 49.0.0", - "chrono", - "comfy-table 6.1.4", - "hashbrown 0.13.2", - "num-traits", - "once_cell", - "regex", - "snafu 0.7.4", - "uuid", + "regex-syntax 0.8.4", ] [[package]] name = "async-compression" -version = "0.4.1" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b74f44609f0f91493e3082d3734d98497e094777144380ea4db9f9905dd5b6" +checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" dependencies = [ "bzip2", "flate2", @@ -727,376 +374,100 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.12.3+zstd.1.5.2", - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd", + "zstd-safe", ] [[package]] -name = "async-io" -version = "1.13.0" +name = "async-scoped" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +checksum = "4042078ea593edffc452eef14e99fdb2b120caa4ad9618bcdeabc4a023b98740" dependencies = [ - "async-lock", - "autocfg", - "cfg-if 1.0.0", - "concurrent-queue", - "futures-lite", - "log", - "parking", - "polling", - "rustix", - "slab", - "socket2 0.4.9", - "waker-fn", + "futures", + "pin-project", + "tokio", ] [[package]] -name = "async-lock" -version = "2.7.0" +name = "async-trait" +version = "0.1.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa24f727524730b077666307f2734b4a1a1c57acb79193127dcc8914d5242dd7" +checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ - "event-listener", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "async-recursion" -version = "1.0.4" +name = "atoi" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", + "num-traits", ] [[package]] -name = "async-scoped" -version = "0.9.0" +name = "autocfg" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4042078ea593edffc452eef14e99fdb2b120caa4ad9618bcdeabc4a023b98740" -dependencies = [ - "futures 0.3.28", - "pin-project", - "tokio", -] +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] -name = "async-stream" -version = "0.3.4" +name = "backtrace" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad445822218ce64be7a341abfb0b1ea43b5c23aa83902542a4542e78309d8e5e" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", ] [[package]] -name = "async-stream-impl" -version = "0.3.4" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4655ae1a7b0cdf149156f780c5bf3f1352bc53cbd9e0a361a7ef7b22947e965" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "async-trait" -version = "0.1.77" +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "atomic-waker" -version = "1.1.2" +name = "bitflags" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] -name = "atomic_enum" -version = "0.2.0" +name = "blake2" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6227a8d6fdb862bcb100c4314d0d9579e5cd73fa6df31a2e6f6e1acd3c5f1207" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "axum" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f8ccfd9221ee7d1f3d4b33e1f8319b3a81ed8f61f2ea40b37b859794b4491" -dependencies = [ - "async-trait", - "axum-core", - "bitflags 1.3.2", - "bytes", - "futures-util", - "http 0.2.9", - "http-body 0.4.5", - "hyper 0.14.25", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2f958c80c248b34b9a877a643811be8dbca03ca5ba827f2b63baf3a81e5fc4e" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http 0.2.9", - "http-body 0.4.5", - "mime", - "rustversion", - "tower-layer", - "tower-service", -] - -[[package]] -name = "backon" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" -dependencies = [ - "fastrand 2.1.0", - "futures-core", - "pin-project", - "tokio", -] - -[[package]] -name = "backtrace" -version = "0.3.67" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" -dependencies = [ - "addr2line", - "cc", - "cfg-if 1.0.0", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "base64" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - -[[package]] -name = "base64" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "base64ct" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" - -[[package]] -name = "bcder" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf16bec990f8ea25cab661199904ef452fcf11f565c404ce6cffbdf3f8cbbc47" -dependencies = [ - "bytes", - "smallvec", -] - -[[package]] -name = "benchmarks" -version = "2.2.0-dev" -dependencies = [ - "analytic_engine", - "arena", - "arrow 49.0.0", - "base64 0.13.1", - "bytes_ext", - "clap", - "common_types", - "criterion", - "env_logger", - "futures 0.3.28", - "generic_error", - "logger", - "macros", - "object_store 2.2.0-dev", - "parquet", - "parquet_ext", - "pprof", - "rand 0.8.5", - "runtime", - "serde", - "size_ext", - "snafu 0.6.10", - "table_engine", - "table_kv", - "tempfile", - "time_ext", - "tokio", - "toml_ext", - "trace_metric", - "wal", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "bigdecimal" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aaf33151a6429fe9211d1b276eafdf70cdff28b071e76c0b0e1503221ea3744" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - -[[package]] -name = "bindgen" -version = "0.59.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" -dependencies = [ - "bitflags 1.3.2", - "cexpr", - "clang-sys", - "lazy_static", - "lazycell", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", -] - -[[package]] -name = "bindgen" -version = "0.65.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" -dependencies = [ - "bitflags 1.3.2", - "cexpr", - "clang-sys", - "lazy_static", - "lazycell", - "peeking_take_while", - "prettyplease 0.2.12", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.48", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", + "digest", ] [[package]] name = "blake3" -version = "1.3.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" dependencies = [ "arrayref", "arrayvec", "cc", - "cfg-if 1.0.0", + "cfg-if", "constant_time_eq", - "digest", ] [[package]] @@ -1108,56 +479,11 @@ dependencies = [ "generic-array", ] -[[package]] -name = "borsh" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "115e54d64eb62cdebad391c19efc9dce4981c690c85a33a12199d99bb9546fee" -dependencies = [ - "borsh-derive", - "hashbrown 0.13.2", -] - -[[package]] -name = "borsh-derive" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7" -dependencies = [ - "borsh-derive-internal", - "borsh-schema-derive-internal", - "proc-macro-crate 0.1.5", - "proc-macro2", - "syn 1.0.109", -] - -[[package]] -name = "borsh-derive-internal" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "borsh-schema-derive-internal" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "brotli" -version = "3.3.4" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1166,7721 +492,2468 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.3.4" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", ] [[package]] -name = "buf_redux" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b953a6887648bb07a535631f2bc00fbdb2a2216f135552cb3f534ed136b9c07f" -dependencies = [ - "memchr", - "safemem", -] - -[[package]] -name = "bufstream" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" - -[[package]] -name = "bumpalo" -version = "3.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" - -[[package]] -name = "bytecheck" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13fe11640a23eb24562225322cd3e452b93a3d4091d62fab69c70542fcd17d1f" -dependencies = [ - "bytecheck_derive", - "ptr_meta", - "simdutf8", -] - -[[package]] -name = "bytecheck_derive" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31225543cb46f81a7e224762764f4a6a0f097b1db0b175f69e8065efaa42de5" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "bytecount" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" - -[[package]] -name = "bytemuck" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "bytes" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" - -[[package]] -name = "bytes_ext" -version = "2.2.0-dev" -dependencies = [ - "bytes", - "snafu 0.6.10", -] - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "camino" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c530edf18f37068ac2d977409ed5cd50d53d73bc653c7647b48eb78976ac9ae2" -dependencies = [ - "serde", -] - -[[package]] -name = "cargo-platform" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbdb825da8a5df079a43676dbe042702f1707b1109f713a01420fbb4cc71fa27" -dependencies = [ - "serde", -] - -[[package]] -name = "cargo_metadata" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" -dependencies = [ - "camino", - "cargo-platform", - "semver", - "serde", - "serde_json", -] - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "catalog" -version = "2.2.0-dev" -dependencies = [ - "async-trait", - "common_types", - "generic_error", - "lazy_static", - "logger", - "macros", - "snafu 0.6.10", - "table_engine", - "time_ext", -] - -[[package]] -name = "catalog_impls" -version = "2.2.0-dev" -dependencies = [ - "analytic_engine", - "async-trait", - "catalog", - "cluster", - "common_types", - "generic_error", - "logger", - "macros", - "meta_client", - "snafu 0.6.10", - "system_catalog", - "table_engine", - "tokio", -] - -[[package]] -name = "cc" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" -dependencies = [ - "jobserver", - "libc", -] - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "wasm-bindgen", - "windows-targets 0.52.0", -] - -[[package]] -name = "chrono-tz" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa48fa079165080f11d7753fd0bc175b7d391f276b965fe4b55bfad67856e463" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" -dependencies = [ - "parse-zoneinfo", - "phf", - "phf_codegen", -] - -[[package]] -name = "ciborium" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" - -[[package]] -name = "ciborium-ll" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" -dependencies = [ - "ciborium-io", - "half 1.8.2", -] - -[[package]] -name = "clang-sys" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "clap" -version = "4.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" -dependencies = [ - "clap_builder", - "clap_derive", -] - -[[package]] -name = "clap_builder" -version = "4.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim 0.11.0", -] - -[[package]] -name = "clap_derive" -version = "4.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "clap_lex" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" - -[[package]] -name = "clru" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" - -[[package]] -name = "cluster" -version = "2.2.0-dev" -dependencies = [ - "async-trait", - "bytes_ext", - "catalog", - "common_types", - "etcd-client", - "future_ext", - "generic_error", - "horaedbproto 2.0.0", - "logger", - "macros", - "meta_client", - "prost 0.11.8", - "runtime", - "serde", - "serde_json", - "snafu 0.6.10", - "table_engine", - "time_ext", - "tokio", - "wal", -] - -[[package]] -name = "cmake" -version = "0.1.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" -dependencies = [ - "cc", -] - -[[package]] -name = "codec" -version = "2.2.0-dev" -dependencies = [ - "bytes_ext", - "common_types", - "lz4_flex", - "macros", - "snafu 0.6.10", -] - -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - -[[package]] -name = "colorchoice" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" - -[[package]] -name = "comfy-table" -version = "6.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d" -dependencies = [ - "strum 0.24.1", - "strum_macros 0.24.3", - "unicode-width", -] - -[[package]] -name = "comfy-table" -version = "7.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" -dependencies = [ - "strum 0.24.1", - "strum_macros 0.24.3", - "unicode-width", -] - -[[package]] -name = "common_types" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "arrow_ext", - "bytes_ext", - "chrono", - "datafusion", - "hash_ext", - "horaedbproto 2.0.0", - "macros", - "paste 1.0.12", - "prost 0.11.8", - "rand 0.8.5", - "seahash", - "serde", - "serde_json", - "snafu 0.6.10", - "sqlparser", - "uuid", -] - -[[package]] -name = "concurrent-queue" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c278839b831783b70278b14df4d45e1beb1aad306c07bb796637de9a0e323e8e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "console" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" -dependencies = [ - "encode_unicode 0.3.6", - "lazy_static", - "libc", - "windows-sys 0.45.0", -] - -[[package]] -name = "console-api" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e" -dependencies = [ - "prost 0.11.8", - "prost-types", - "tonic 0.9.2", - "tracing-core", -] - -[[package]] -name = "console-subscriber" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ab2224a0311582eb03adba4caaf18644f7b1f10a760803a803b9b605187fc7" -dependencies = [ - "console-api", - "crossbeam-channel", - "crossbeam-utils", - "futures 0.3.28", - "hdrhistogram", - "humantime 2.1.0", - "prost-types", - "serde", - "serde_json", - "thread_local", - "tokio", - "tokio-stream", - "tonic 0.9.2", - "tracing", - "tracing-core", - "tracing-subscriber", -] - -[[package]] -name = "const-oid" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "795bc6e66a8e340f075fcf6227e417a2dc976b92b91f3cdc778bb858778b6747" - -[[package]] -name = "const-random" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" -dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" -dependencies = [ - "getrandom", - "once_cell", - "proc-macro-hack", - "tiny-keccak", -] - -[[package]] -name = "constant_time_eq" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" - -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" - -[[package]] -name = "cpp_demangle" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee34052ee3d93d6d8f3e6f81d85c47921f6653a19a7b70e939e3e602d893a674" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "cpufeatures" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" -dependencies = [ - "libc", -] - -[[package]] -name = "crc" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" -dependencies = [ - "crc-catalog", -] - -[[package]] -name = "crc-catalog" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484" - -[[package]] -name = "crc32c" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" -dependencies = [ - "rustc_version", -] - -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "is-terminal", - "itertools 0.10.5", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - -[[package]] -name = "crossbeam" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" -dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "crossbeam-utils", - "memoffset 0.8.0", - "scopeguard", -] - -[[package]] -name = "crossbeam-queue" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-skiplist" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883a5821d7d079fcf34ac55f27a833ee61678110f6b97637cc74513c0d0b42fc" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-epoch", - "crossbeam-utils", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "cxx" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn 2.0.48", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "darling" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" -dependencies = [ - "darling_core 0.14.4", - "darling_macro 0.14.4", -] - -[[package]] -name = "darling" -version = "0.20.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" -dependencies = [ - "darling_core 0.20.3", - "darling_macro 0.20.3", -] - -[[package]] -name = "darling_core" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", -] - -[[package]] -name = "darling_core" -version = "0.20.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 2.0.48", -] - -[[package]] -name = "darling_macro" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" -dependencies = [ - "darling_core 0.14.4", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "darling_macro" -version = "0.20.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" -dependencies = [ - "darling_core 0.20.3", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if 1.0.0", - "hashbrown 0.14.0", - "lock_api", - "once_cell", - "parking_lot_core 0.9.9", -] - -[[package]] -name = "datafusion" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "ahash 0.8.3", - "arrow 49.0.0", - "arrow-array 49.0.0", - "arrow-schema 49.0.0", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", - "flate2", - "futures 0.3.28", - "glob", - "half 2.2.1", - "hashbrown 0.14.0", - "indexmap 2.0.0", - "itertools 0.12.0", - "log", - "num_cpus", - "object_store 0.8.0", - "parking_lot 0.12.1", - "parquet", - "pin-project-lite", - "rand 0.8.5", - "sqlparser", - "tempfile", - "tokio", - "tokio-util", - "url", - "uuid", - "xz2", - "zstd 0.13.0", -] - -[[package]] -name = "datafusion-common" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "ahash 0.8.3", - "arrow 49.0.0", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-schema 49.0.0", - "chrono", - "half 2.2.1", - "libc", - "num_cpus", - "object_store 0.8.0", - "parquet", - "sqlparser", -] - -[[package]] -name = "datafusion-execution" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "arrow 49.0.0", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures 0.3.28", - "hashbrown 0.14.0", - "log", - "object_store 0.8.0", - "parking_lot 0.12.1", - "rand 0.8.5", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "ahash 0.8.3", - "arrow 49.0.0", - "arrow-array 49.0.0", - "datafusion-common", - "paste 1.0.12", - "sqlparser", - "strum 0.25.0", - "strum_macros 0.25.1", -] - -[[package]] -name = "datafusion-optimizer" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "arrow 49.0.0", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown 0.14.0", - "itertools 0.12.0", - "log", - "regex-syntax 0.8.2", -] - -[[package]] -name = "datafusion-physical-expr" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "ahash 0.8.3", - "arrow 49.0.0", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-ord 49.0.0", - "arrow-schema 49.0.0", - "base64 0.21.0", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "half 2.2.1", - "hashbrown 0.14.0", - "hex", - "indexmap 2.0.0", - "itertools 0.12.0", - "log", - "md-5", - "paste 1.0.12", - "petgraph", - "rand 0.8.5", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] - -[[package]] -name = "datafusion-physical-plan" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "ahash 0.8.3", - "arrow 49.0.0", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-schema 49.0.0", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "futures 0.3.28", - "half 2.2.1", - "hashbrown 0.14.0", - "indexmap 2.0.0", - "itertools 0.12.0", - "log", - "once_cell", - "parking_lot 0.12.1", - "pin-project-lite", - "rand 0.8.5", - "tokio", - "uuid", -] - -[[package]] -name = "datafusion-proto" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "arrow 49.0.0", - "chrono", - "datafusion", - "datafusion-common", - "datafusion-expr", - "object_store 0.8.0", - "prost 0.12.3", -] - -[[package]] -name = "datafusion-sql" -version = "33.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" -dependencies = [ - "arrow 49.0.0", - "arrow-schema 49.0.0", - "datafusion-common", - "datafusion-expr", - "log", - "sqlparser", -] - -[[package]] -name = "datafusion_util" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "async-trait", - "datafusion", - "futures 0.3.28", - "observability_deps", - "pin-project", - "tokio", - "tokio-stream", -] - -[[package]] -name = "debugid" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" -dependencies = [ - "uuid", -] - -[[package]] -name = "der" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" -dependencies = [ - "const-oid", - "zeroize", -] - -[[package]] -name = "derive-new" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "derive_builder" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07adf7be193b71cc36b193d0f5fe60b918a3a9db4dad0449f57bcfd519704a3" -dependencies = [ - "derive_builder_macro 0.11.2", -] - -[[package]] -name = "derive_builder" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" -dependencies = [ - "derive_builder_macro 0.12.0", -] - -[[package]] -name = "derive_builder_core" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f91d4cfa921f1c05904dc3c57b4a32c38aed3340cce209f3a6fd1478babafc4" -dependencies = [ - "darling 0.14.4", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "derive_builder_core" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" -dependencies = [ - "darling 0.14.4", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "derive_builder_macro" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68" -dependencies = [ - "derive_builder_core 0.11.2", - "syn 1.0.109", -] - -[[package]] -name = "derive_builder_macro" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" -dependencies = [ - "derive_builder_core 0.12.0", - "syn 1.0.109", -] - -[[package]] -name = "derive_utils" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abcad25e9720609ccb3dcdb795d845e37d8ce34183330a9f48b03a1a71c8e21" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "df_engine_extensions" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "async-recursion", - "async-trait", - "catalog", - "common_types", - "datafusion", - "datafusion-proto", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "insta", - "lazy_static", - "prometheus 0.12.0", - "prost 0.11.8", - "runtime", - "snafu 0.6.10", - "table_engine", - "tokio", - "trace_metric", -] - -[[package]] -name = "df_operator" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "base64 0.13.1", - "bincode", - "chrono", - "common_types", - "datafusion", - "generic_error", - "hyperloglog", - "macros", - "smallvec", - "snafu 0.6.10", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", - "subtle", -] - -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if 1.0.0", - "dirs-sys-next", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "dlv-list" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" -dependencies = [ - "const-random", -] - -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - -[[package]] -name = "dotenvy" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" - -[[package]] -name = "dtoa" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65d09067bfacaa79114679b279d7f5885b53295b1e2cfb4e79c8e4bd3d633169" - -[[package]] -name = "either" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" - -[[package]] -name = "encode_unicode" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" - -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - -[[package]] -name = "encoding_rs" -version = "0.8.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "env_logger" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" -dependencies = [ - "atty", - "humantime 1.3.0", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.45.0", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "error-chain" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" -dependencies = [ - "version_check", -] - -[[package]] -name = "etcd-client" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319dc0fb739a6e84cb8678b8cf50c9bcfa4712ae826b33ecf00cc0850550a58" -dependencies = [ - "http 0.2.9", - "prost 0.11.8", - "tokio", - "tokio-stream", - "tonic 0.8.3", - "tonic-build", - "tower", - "tower-service", -] - -[[package]] -name = "event-listener" -version = "2.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" - -[[package]] -name = "fail" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" -dependencies = [ - "log", - "once_cell", - "rand 0.8.5", -] - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "fastrand" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] - -[[package]] -name = "fastrand" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" - -[[package]] -name = "filedescriptor" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7199d965852c3bac31f779ef99cbb4537f80e952e2d6aa0ffeb30cce00f4f46e" -dependencies = [ - "libc", - "thiserror", - "winapi", -] - -[[package]] -name = "findshlibs" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" -dependencies = [ - "cc", - "lazy_static", - "libc", - "winapi", -] - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flagset" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" - -[[package]] -name = "flatbuffers" -version = "23.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.0.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" -dependencies = [ - "crc32fast", - "libz-sys", - "miniz_oxide", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "form_urlencoded" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "frunk" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89c703bf50009f383a0873845357cc400a95fc535f836feddfe015d7df6e1e0" -dependencies = [ - "frunk_core", - "frunk_derives", - "frunk_proc_macros", -] - -[[package]] -name = "frunk_core" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a446d01a558301dca28ef43222864a9fa2bd9a2e71370f769d5d5d5ec9f3537" - -[[package]] -name = "frunk_derives" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b83164912bb4c97cfe0772913c7af7387ee2e00cb6d4636fb65a35b3d0c8f173" -dependencies = [ - "frunk_proc_macro_helpers", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "frunk_proc_macro_helpers" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "015425591bbeb0f5b8a75593340f1789af428e9f887a4f1e36c0c471f067ef50" -dependencies = [ - "frunk_core", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "frunk_proc_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea01524f285deab48affffb342b97f186e657b119c3f1821ac531780e0fbfae0" -dependencies = [ - "frunk_core", - "frunk_proc_macros_impl", - "proc-macro-hack", -] - -[[package]] -name = "frunk_proc_macros_impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a802d974cc18ee7fe1a7868fc9ce31086294fd96ba62f8da64ecb44e92a2653" -dependencies = [ - "frunk_core", - "frunk_proc_macro_helpers", - "proc-macro-hack", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "future_ext" -version = "2.2.0-dev" -dependencies = [ - "futures 0.3.28", - "lazy_static", - "prometheus 0.12.0", - "rand 0.8.5", - "runtime", - "tokio", -] - -[[package]] -name = "futures" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" - -[[package]] -name = "futures" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" - -[[package]] -name = "futures-cpupool" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab90cde24b3319636588d0c35fe03b1333857621051837ed769faefb4c2162e4" -dependencies = [ - "futures 0.1.31", - "num_cpus", -] - -[[package]] -name = "futures-executor" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" - -[[package]] -name = "futures-lite" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48" -dependencies = [ - "fastrand 1.9.0", - "futures-core", - "futures-io", - "memchr", - "parking", - "pin-project-lite", - "waker-fn", -] - -[[package]] -name = "futures-macro" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "futures-sink" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" - -[[package]] -name = "futures-task" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" - -[[package]] -name = "futures-util" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "gag" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a713bee13966e9fbffdf7193af71d54a6b35a0bb34997cd6c9519ebeb5005972" -dependencies = [ - "filedescriptor", - "tempfile", -] - -[[package]] -name = "gcc" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2" - -[[package]] -name = "generated_types" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "pbjson", - "pbjson-build", - "pbjson-types", - "prost 0.11.8", - "prost-build", - "serde", - "tonic-build", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "generic_error" -version = "2.2.0-dev" - -[[package]] -name = "getrandom" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" -dependencies = [ - "cfg-if 1.0.0", - "js-sys", - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "wasm-bindgen", -] - -[[package]] -name = "gimli" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.9", - "indexmap 2.0.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "h2" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http 1.1.0", - "indexmap 2.0.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "half" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" -dependencies = [ - "crunchy", - "num-traits", -] - -[[package]] -name = "hash_ext" -version = "2.2.0-dev" -dependencies = [ - "ahash 0.8.3", - "byteorder", - "murmur3", - "seahash", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.6", -] - -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.3", -] - -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" -dependencies = [ - "ahash 0.8.3", - "allocator-api2", -] - -[[package]] -name = "hdrhistogram" -version = "7.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f19b9f54f7c7f55e31401bb647626ce0cf0f67b0004982ce815b3ee72a02aa8" -dependencies = [ - "base64 0.13.1", - "byteorder", - "flate2", - "nom", - "num-traits", -] - -[[package]] -name = "headers" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3e372db8e5c0d213e0cd0b9be18be2aca3d44cf2fe30a9d46a65581cd454584" -dependencies = [ - "base64 0.13.1", - "bitflags 1.3.2", - "bytes", - "headers-core", - "http 0.2.9", - "httpdate", - "mime", - "sha1", -] - -[[package]] -name = "headers-core" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" -dependencies = [ - "http 0.2.9", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "horaectl" -version = "2.2.0-dev" -dependencies = [ - "anyhow", - "chrono", - "clap", - "lazy_static", - "prettytable", - "reqwest 0.12.4", - "serde", - "shell-words", - "tokio", -] - -[[package]] -name = "horaedb" -version = "2.2.0-dev" -dependencies = [ - "analytic_engine", - "catalog", - "catalog_impls", - "clap", - "cluster", - "common_types", - "datafusion", - "df_operator", - "etcd-client", - "interpreters", - "logger", - "meta_client", - "moka", - "panic_ext", - "proxy", - "query_engine", - "router", - "runtime", - "serde", - "server", - "signal-hook", - "size_ext", - "table_engine", - "toml 0.7.3", - "toml_ext", - "tracing_util", - "vergen", - "wal", -] - -[[package]] -name = "horaedb-client" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb850f1e1fc5d95b31278a842836652f0159e7f00d5e846e78c6076ae46b3b3" -dependencies = [ - "arrow 38.0.0", - "async-trait", - "dashmap", - "futures 0.3.28", - "horaedbproto 1.0.24", - "paste 1.0.12", - "thiserror", - "tokio", - "tonic 0.8.3", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "horaedb-test" -version = "2.2.0-dev" -dependencies = [ - "anyhow", - "async-trait", - "horaedb-client", - "local-ip-address", - "reqwest 0.12.4", - "serde", - "sqlness", - "tokio", - "uuid", -] - -[[package]] -name = "horaedbproto" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5907c770ee20818978cf2050341ca2c4c7fb7888423ccb090cbb2fda250dfad7" -dependencies = [ - "prost 0.11.8", - "protoc-bin-vendored", - "tonic 0.8.3", - "tonic-build", - "walkdir", -] - -[[package]] -name = "horaedbproto" -version = "2.0.0" -source = "git+https://github.com/apache/incubator-horaedb-proto.git?rev=fac8564e6e3d50e51daa2af6eb905e747f3191b0#fac8564e6e3d50e51daa2af6eb905e747f3191b0" -dependencies = [ - "prost 0.11.8", - "protoc-bin-vendored", - "tonic 0.8.3", - "tonic-build", - "walkdir", -] - -[[package]] -name = "http" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" -dependencies = [ - "bytes", - "http 0.2.9", - "pin-project-lite", -] - -[[package]] -name = "http-body" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" -dependencies = [ - "bytes", - "http 1.1.0", -] - -[[package]] -name = "http-body-util" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" -dependencies = [ - "bytes", - "futures-util", - "http 1.1.0", - "http-body 1.0.0", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" - -[[package]] -name = "httpdate" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" - -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "hyper" -version = "0.14.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.26", - "http 0.2.9", - "http-body 0.4.5", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.4.9", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "h2 0.4.5", - "http 1.1.0", - "http-body 1.0.0", - "httparse", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.9", - "hyper 0.14.25", - "rustls 0.21.6", - "tokio", - "tokio-rustls 0.24.1", -] - -[[package]] -name = "hyper-rustls" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" -dependencies = [ - "futures-util", - "http 1.1.0", - "hyper 1.3.1", - "hyper-util", - "rustls 0.22.2", - "rustls-pki-types", - "tokio", - "tokio-rustls 0.25.0", - "tower-service", -] - -[[package]] -name = "hyper-timeout" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" -dependencies = [ - "hyper 0.14.25", - "pin-project-lite", - "tokio", - "tokio-io-timeout", -] - -[[package]] -name = "hyper-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "http 1.1.0", - "http-body 1.0.0", - "hyper 1.3.1", - "pin-project-lite", - "socket2 0.5.3", - "tokio", - "tower", - "tower-service", - "tracing", -] - -[[package]] -name = "hyperloglog" -version = "1.0.2" -source = "git+https://github.com/jedisct1/rust-hyperloglog.git?rev=425487ce910f26636fbde8c4d640b538431aad50#425487ce910f26636fbde8c4d640b538431aad50" -dependencies = [ - "bytecount", - "rand 0.8.5", - "serde", - "siphasher", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "716f12fbcfac6ffab0a5e9ec51d0a0ff70503742bb2dc7b99396394c9dc323f0" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows 0.47.0", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" -dependencies = [ - "cxx", - "cxx-build", -] - -[[package]] -name = "id_allocator" -version = "2.2.0-dev" -dependencies = [ - "generic_error", - "tokio", -] - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "idna" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown 0.14.0", -] - -[[package]] -name = "inferno" -version = "0.11.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fb7c1b80a1dfa604bb4a649a5c5aeef3d913f7c520cb42b40e534e8a61bcdfc" -dependencies = [ - "ahash 0.8.3", - "indexmap 1.9.3", - "is-terminal", - "itoa", - "log", - "num-format", - "once_cell", - "quick-xml 0.26.0", - "rgb", - "str_stack", -] - -[[package]] -name = "influxdb-line-protocol" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1192416847e724d001c1f410cf348f27978a50b30f3473afbf73062cada2697a" -dependencies = [ - "bytes", - "log", - "nom", - "smallvec", - "snafu 0.7.4", -] - -[[package]] -name = "influxdb_influxql_parser" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "chrono", - "chrono-tz", - "nom", - "num-traits", - "once_cell", -] - -[[package]] -name = "insta" -version = "1.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0770b0a3d4c70567f0d58331f3088b0e4c4f56c9b8d764efe654b4a5d46de3a" -dependencies = [ - "console", - "lazy_static", - "linked-hash-map", - "similar", - "yaml-rust", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "interpreters" -version = "2.2.0-dev" -dependencies = [ - "analytic_engine", - "arrow 49.0.0", - "async-trait", - "catalog", - "catalog_impls", - "codec", - "common_types", - "datafusion", - "datafusion-proto", - "df_operator", - "futures 0.3.28", - "generic_error", - "hash_ext", - "lazy_static", - "logger", - "macros", - "meta_client", - "prometheus 0.12.0", - "query_engine", - "query_frontend", - "regex", - "runtime", - "snafu 0.6.10", - "table_engine", - "test_util", - "tokio", -] - -[[package]] -name = "io-enum" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5305557fa27b460072ae15ce07617e999f5879f14d376c8449f0bfb9f9d8e91e" -dependencies = [ - "derive_utils", - "syn 2.0.48", -] - -[[package]] -name = "io-lifetimes" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" -dependencies = [ - "hermit-abi 0.3.1", - "libc", - "windows-sys 0.45.0", -] - -[[package]] -name = "iox_query" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "arrow 49.0.0", - "arrow_util", - "async-trait", - "chrono", - "datafusion", - "datafusion_util", - "futures 0.3.28", - "hashbrown 0.13.2", - "observability_deps", - "once_cell", - "parking_lot 0.12.1", - "query_functions", - "schema", - "snafu 0.7.4", - "test_helpers", - "tokio", - "tokio-stream", -] - -[[package]] -name = "iox_query_influxql" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "arrow 49.0.0", - "chrono", - "chrono-tz", - "datafusion", - "datafusion_util", - "generated_types", - "influxdb_influxql_parser", - "iox_query", - "itertools 0.10.5", - "observability_deps", - "once_cell", - "query_functions", - "regex", - "schema", - "serde_json", -] - -[[package]] -name = "ipnet" -version = "2.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" - -[[package]] -name = "is-terminal" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256017f749ab3117e93acb91063009e1f1bb56d03965b14c2c8df4eb02c524d8" -dependencies = [ - "hermit-abi 0.3.1", - "io-lifetimes", - "rustix", - "windows-sys 0.45.0", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" - -[[package]] -name = "jemalloc-ctl" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c502a5ff9dd2924f1ed32ba96e3b65735d837b4bfd978d3161b1702e66aca4b7" -dependencies = [ - "jemalloc-sys", - "libc", - "paste 0.1.18", -] - -[[package]] -name = "jemalloc-sys" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45" -dependencies = [ - "cc", - "fs_extra", - "libc", -] - -[[package]] -name = "jemallocator" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69" -dependencies = [ - "jemalloc-sys", - "libc", -] - -[[package]] -name = "jobserver" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.67" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "json_pretty" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9fe3f290d2cb8660e3e051352ea55a404788d213a106a33ec8802447c4a762" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "lexical" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" -dependencies = [ - "lexical-core", -] - -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.152" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" - -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if 1.0.0", - "winapi", -] - -[[package]] -name = "libm" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" - -[[package]] -name = "librocksdb_sys" -version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git?rev=85e79e52c6ad80b8c547fcb90b3cade64f141fac#85e79e52c6ad80b8c547fcb90b3cade64f141fac" -dependencies = [ - "bindgen 0.65.1", - "bzip2-sys", - "cc", - "cmake", - "libc", - "libtitan_sys", - "libz-sys", - "lz4-sys", - "snappy-sys", - "zstd-sys", -] - -[[package]] -name = "libtitan_sys" -version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git?rev=85e79e52c6ad80b8c547fcb90b3cade64f141fac#85e79e52c6ad80b8c547fcb90b3cade64f141fac" -dependencies = [ - "bzip2-sys", - "cc", - "cmake", - "libc", - "libz-sys", - "lz4-sys", - "snappy-sys", - "zstd-sys", -] - -[[package]] -name = "libz-sys" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "link-cplusplus" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" -dependencies = [ - "cc", -] - -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - -[[package]] -name = "linux-raw-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" - -[[package]] -name = "local-ip-address" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612ed4ea9ce5acfb5d26339302528a5e1e59dfed95e9e11af3c083236ff1d15d" -dependencies = [ - "libc", - "neli", - "thiserror", - "windows-sys 0.48.0", -] - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "logger" -version = "2.2.0-dev" -dependencies = [ - "chrono", - "log", - "runtime", - "serde", - "slog", - "slog-async", - "slog-global", - "slog-term", -] - -[[package]] -name = "lru" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" -dependencies = [ - "hashbrown 0.12.3", -] - -[[package]] -name = "lru" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718e8fae447df0c7e1ba7f5189829e63fd536945c8988d61444c19039f16b670" -dependencies = [ - "hashbrown 0.13.2", -] - -[[package]] -name = "lz4" -version = "1.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "lz4_flex" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "mach" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" -dependencies = [ - "libc", -] - -[[package]] -name = "macros" -version = "2.2.0-dev" - -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata", -] - -[[package]] -name = "matchit" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40" - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if 1.0.0", - "digest", -] - -[[package]] -name = "md5" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - -[[package]] -name = "memmap2" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" -dependencies = [ - "libc", -] - -[[package]] -name = "memoffset" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" -dependencies = [ - "autocfg", -] - -[[package]] -name = "memoffset" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" -dependencies = [ - "autocfg", -] - -[[package]] -name = "message_queue" -version = "2.2.0-dev" -dependencies = [ - "async-trait", - "chrono", - "futures 0.3.28", - "logger", - "macros", - "rskafka", - "serde", - "snafu 0.6.10", - "time_ext", - "tokio", - "uuid", -] - -[[package]] -name = "meta_client" -version = "2.2.0-dev" -dependencies = [ - "async-trait", - "common_types", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "logger", - "macros", - "prost 0.11.8", - "reqwest 0.12.4", - "serde", - "serde_json", - "snafu 0.6.10", - "table_engine", - "time_ext", - "tokio", - "tonic 0.8.3", - "url", -] - -[[package]] -name = "metric_ext" -version = "2.2.0-dev" -dependencies = [ - "crossbeam-utils", - "serde", - "serde_json", - "time_ext", -] - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "mime_guess" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" -dependencies = [ - "mime", - "unicase", -] - -[[package]] -name = "minijinja" -version = "1.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb3bf58a1ec4f3f228bec851a2066c7717ad308817cd8a08f67c10660c6ff7b" -dependencies = [ - "serde", -] - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" -dependencies = [ - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.48.0", -] - -[[package]] -name = "moka" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ccedbe530334b20d6f57b2ca9f7c54c7bd1072a5a0b0035970aafd53982bd8d" -dependencies = [ - "async-io", - "async-lock", - "crossbeam-channel", - "crossbeam-epoch", - "crossbeam-utils", - "futures-util", - "num_cpus", - "once_cell", - "parking_lot 0.12.1", - "quanta", - "rustc_version", - "scheduled-thread-pool", - "skeptic", - "smallvec", - "tagptr", - "thiserror", - "triomphe", - "uuid", -] - -[[package]] -name = "multimap" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" - -[[package]] -name = "multipart" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00dec633863867f29cb39df64a397cdf4a6354708ddd7759f70c7fb51c5f9182" -dependencies = [ - "buf_redux", - "httparse", - "log", - "mime", - "mime_guess", - "quick-error", - "rand 0.8.5", - "safemem", - "tempfile", - "twoway", -] - -[[package]] -name = "murmur2" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb585ade2549a017db2e35978b77c319214fa4b37cede841e27954dd6e8f3ca8" - -[[package]] -name = "murmur3" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a198f9589efc03f544388dfc4a19fe8af4323662b62f598b8dcfdac62c14771c" -dependencies = [ - "byteorder", -] - -[[package]] -name = "mysql" -version = "24.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe2babc5f5b354eab9c0a0e40da3e69c4d77421c8b9b6ee03f97acc75bd7955" -dependencies = [ - "bufstream", - "bytes", - "crossbeam", - "flate2", - "io-enum", - "libc", - "lru 0.10.1", - "mysql_common 0.30.6", - "named_pipe", - "once_cell", - "pem 2.0.1", - "percent-encoding", - "rustls 0.21.6", - "rustls-pemfile 1.0.2", - "serde", - "serde_json", - "socket2 0.5.3", - "twox-hash", - "url", - "webpki", - "webpki-roots 0.23.1", -] - -[[package]] -name = "mysql-common-derive" -version = "0.30.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f" -dependencies = [ - "darling 0.20.3", - "heck", - "num-bigint", - "proc-macro-crate 1.3.1", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 2.0.48", - "termcolor", - "thiserror", -] - -[[package]] -name = "mysql_common" -version = "0.29.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9006c95034ccf7b903d955f210469119f6c3477fc9c9e7a7845ce38a3e665c2a" -dependencies = [ - "base64 0.13.1", - "bigdecimal", - "bindgen 0.59.2", - "bitflags 1.3.2", - "bitvec", - "byteorder", - "bytes", - "cc", - "chrono", - "cmake", - "crc32fast", - "flate2", - "frunk", - "lazy_static", - "lexical", - "num-bigint", - "num-traits", - "rand 0.8.5", - "regex", - "rust_decimal", - "saturating", - "serde", - "serde_json", - "sha1", - "sha2", - "smallvec", - "subprocess", - "thiserror", - "time 0.3.20", - "uuid", -] - -[[package]] -name = "mysql_common" -version = "0.30.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57349d5a326b437989b6ee4dc8f2f34b0cc131202748414712a8e7d98952fc8c" -dependencies = [ - "base64 0.21.0", - "bigdecimal", - "bindgen 0.65.1", - "bitflags 2.3.3", - "bitvec", - "byteorder", - "bytes", - "cc", - "cmake", - "crc32fast", - "flate2", - "frunk", - "lazy_static", - "lexical", - "mysql-common-derive", - "num-bigint", - "num-traits", - "rand 0.8.5", - "regex", - "rust_decimal", - "saturating", - "serde", - "serde_json", - "sha1", - "sha2", - "smallvec", - "subprocess", - "thiserror", - "time 0.3.20", - "uuid", -] - -[[package]] -name = "named_pipe" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad9c443cce91fc3e12f017290db75dde490d685cdaaf508d7159d7cf41f0eb2b" -dependencies = [ - "winapi", -] - -[[package]] -name = "neli" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1100229e06604150b3becd61a4965d5c70f3be1759544ea7274166f4be41ef43" -dependencies = [ - "byteorder", - "libc", - "log", - "neli-proc-macros", -] - -[[package]] -name = "neli-proc-macros" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c168194d373b1e134786274020dae7fc5513d565ea2ebb9bc9ff17ffb69106d4" -dependencies = [ - "either", - "proc-macro2", - "quote", - "serde", - "syn 1.0.109", -] - -[[package]] -name = "net2" -version = "0.2.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d0df99cfcd2530b2e694f6e17e7f37b8e26bb23983ac530c0c97408837c631" -dependencies = [ - "cfg-if 0.1.10", - "libc", - "winapi", -] - -[[package]] -name = "nix" -version = "0.22.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4916f159ed8e5de0082076562152a76b7a1f64a01fd9d1e0fea002c37624faf" -dependencies = [ - "bitflags 1.3.2", - "cc", - "cfg-if 1.0.0", - "libc", - "memoffset 0.6.5", -] - -[[package]] -name = "nix" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" -dependencies = [ - "bitflags 1.3.2", - "cfg-if 1.0.0", - "libc", - "static_assertions", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "notifier" -version = "2.2.0-dev" -dependencies = [ - "tokio", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-format" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" -dependencies = [ - "arrayvec", - "itoa", -] - -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" -dependencies = [ - "hermit-abi 0.2.6", - "libc", -] - -[[package]] -name = "num_threads" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ - "libc", -] - -[[package]] -name = "object" -version = "0.30.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" -dependencies = [ - "memchr", -] - -[[package]] -name = "object_store" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures 0.3.28", - "humantime 2.1.0", - "itertools 0.11.0", - "parking_lot 0.12.1", - "percent-encoding", - "snafu 0.7.4", - "tokio", - "tracing", - "url", - "walkdir", -] - -[[package]] -name = "object_store" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures 0.3.28", - "humantime 2.1.0", - "itertools 0.12.0", - "parking_lot 0.12.1", - "percent-encoding", - "snafu 0.7.4", - "tokio", - "tracing", - "url", - "walkdir", -] - -[[package]] -name = "object_store" -version = "2.2.0-dev" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "clru", - "crc", - "futures 0.3.28", - "generic_error", - "hash_ext", - "horaedbproto 2.0.0", - "lazy_static", - "logger", - "lru 0.7.8", - "macros", - "notifier", - "object_store 0.10.1", - "object_store_opendal", - "opendal", - "partitioned_lock", - "prometheus 0.12.0", - "prometheus-static-metric", - "prost 0.11.8", - "rand 0.8.5", - "reqwest 0.12.4", - "runtime", - "serde", - "serde_json", - "size_ext", - "snafu 0.6.10", - "table_kv", - "tempfile", - "time_ext", - "tokio", - "twox-hash", - "uuid", -] - -[[package]] -name = "object_store_opendal" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e5902fc99e9fb9e32c93f6a67dc5cc0772dc0fb348e2ef4ce258b03666d034" -dependencies = [ - "async-trait", - "bytes", - "flagset", - "futures 0.3.28", - "futures-util", - "object_store 0.10.1", - "opendal", - "pin-project", - "tokio", -] - -[[package]] -name = "obkv-table-client-rs" -version = "0.1.0" -source = "git+https://github.com/oceanbase/obkv-table-client-rs.git?rev=81cee5d55a2423686dee07163f1ec60f9e28272c#81cee5d55a2423686dee07163f1ec60f9e28272c" -dependencies = [ - "anyhow", - "byteorder", - "bytes", - "chrono", - "crossbeam", - "futures 0.1.31", - "futures-cpupool", - "lazy_static", - "log", - "murmur2", - "mysql", - "net2", - "pin-project-lite", - "prometheus-client", - "quick-error", - "r2d2", - "rand 0.8.5", - "reqwest 0.11.24", - "rust-crypto", - "scheduled-thread-pool", - "serde", - "serde_bytes", - "serde_derive", - "serde_json", - "spin 0.9.8", - "tokio", - "tokio-util", - "uuid", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "observability_deps" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "tracing", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -dependencies = [ - "parking_lot_core 0.9.9", -] - -[[package]] -name = "oorandom" -version = "11.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" - -[[package]] -name = "opendal" -version = "0.49.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39d516adf7db912c38af382c3e92c27cd62fbbc240e630920555d784c2ab1494" -dependencies = [ - "anyhow", - "async-trait", - "backon", - "base64 0.22.1", - "bytes", - "chrono", - "crc32c", - "flagset", - "futures 0.3.28", - "getrandom", - "http 1.1.0", - "log", - "md-5", - "once_cell", - "percent-encoding", - "quick-xml 0.36.1", - "reqsign", - "reqwest 0.12.4", - "serde", - "serde_json", - "tokio", - "uuid", -] - -[[package]] -name = "opensrv-mysql" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bcb5fc2fda7e5e5f8478cd637285bbdd6196a9601e32293d0897e469a7dd020" -dependencies = [ - "async-trait", - "byteorder", - "chrono", - "mysql_common 0.29.2", - "nom", - "tokio", -] - -[[package]] -name = "ordered-float" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-multimap" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" -dependencies = [ - "dlv-list", - "hashbrown 0.14.0", -] - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "panic_ext" -version = "2.2.0-dev" -dependencies = [ - "backtrace", - "gag", - "libc", - "logger", - "nix 0.22.3", - "slog", - "slog-global", -] - -[[package]] -name = "papergrid" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7419ad52a7de9b60d33e11085a0fe3df1fbd5926aa3f93d3dd53afbc9e86725" -dependencies = [ - "bytecount", - "fnv", - "unicode-width", -] - -[[package]] -name = "parking" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" - -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core 0.9.9", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if 1.0.0", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if 1.0.0", - "libc", - "redox_syscall 0.4.1", - "smallvec", - "windows-targets 0.48.1", -] - -[[package]] -name = "parquet" -version = "49.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" -dependencies = [ - "ahash 0.8.3", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-data 49.0.0", - "arrow-ipc 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", - "base64 0.21.0", - "brotli", - "bytes", - "chrono", - "flate2", - "futures 0.3.28", - "hashbrown 0.14.0", - "lz4_flex", - "num", - "num-bigint", - "object_store 0.8.0", - "paste 1.0.12", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd 0.13.0", -] - -[[package]] -name = "parquet_ext" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "arrow_ext", - "async-trait", - "bytes", - "datafusion", - "futures 0.3.28", - "generic_error", - "logger", - "object_store 2.2.0-dev", - "parquet", - "tokio", -] - -[[package]] -name = "parse-zoneinfo" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" -dependencies = [ - "regex", -] - -[[package]] -name = "partition_table_engine" -version = "2.2.0-dev" -dependencies = [ - "analytic_engine", - "arrow 49.0.0", - "async-trait", - "common_types", - "datafusion", - "df_engine_extensions", - "df_operator", - "futures 0.3.28", - "generic_error", - "lazy_static", - "logger", - "macros", - "prometheus 0.12.0", - "snafu 0.6.10", - "table_engine", -] - -[[package]] -name = "partitioned_lock" -version = "2.2.0-dev" -dependencies = [ - "hash_ext", - "tokio", -] - -[[package]] -name = "paste" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" - -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] - -[[package]] -name = "pbjson" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "048f9ac93c1eab514f9470c4bc8d97ca2a0a236b84f45cc19d69a59fc11467f6" -dependencies = [ - "base64 0.13.1", - "serde", -] - -[[package]] -name = "pbjson-build" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbb7b706f2afc610f3853550cdbbf6372fd324824a087806bd4480ea4996e24" -dependencies = [ - "heck", - "itertools 0.10.5", - "prost 0.11.8", - "prost-types", -] - -[[package]] -name = "pbjson-types" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a88c8d87f99a4ac14325e7a4c24af190fca261956e3b82dd7ed67e77e6c7043" -dependencies = [ - "bytes", - "chrono", - "pbjson", - "pbjson-build", - "prost 0.11.8", - "prost-build", - "serde", -] - -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - -[[package]] -name = "pem" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b13fe415cdf3c8e44518e18a7c95a13431d9bdf6d15367d82b23c377fdd441a" -dependencies = [ - "base64 0.21.0", - "serde", -] - -[[package]] -name = "pem" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" -dependencies = [ - "base64 0.21.0", - "serde", -] - -[[package]] -name = "percent-encoding" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" - -[[package]] -name = "petgraph" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" -dependencies = [ - "fixedbitset", - "indexmap 1.9.3", -] - -[[package]] -name = "pgwire" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17780c93587822c191c3f4d43fa5f6bc6df1e51b9f58a0be0cd1b7fd6e80d9e6" -dependencies = [ - "async-trait", - "base64 0.21.0", - "bytes", - "chrono", - "derive-new", - "futures 0.3.28", - "hex", - "log", - "md5", - "postgres-types", - "rand 0.8.5", - "ring 0.17.7", - "stringprep", - "thiserror", - "time 0.3.20", - "tokio", - "tokio-rustls 0.25.0", - "tokio-util", - "x509-certificate", -] - -[[package]] -name = "phf" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - -[[package]] -name = "phf_shared" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" - -[[package]] -name = "plotters" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" - -[[package]] -name = "plotters-svg" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "polling" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e1f879b2998099c2d69ab9605d145d5b661195627eccc680002c4918a7fb6fa" -dependencies = [ - "autocfg", - "bitflags 1.3.2", - "cfg-if 1.0.0", - "concurrent-queue", - "libc", - "log", - "pin-project-lite", - "windows-sys 0.45.0", -] - -[[package]] -name = "postgres-protocol" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b7fa9f396f51dffd61546fd8573ee20592287996568e6175ceb0f8699ad75d" -dependencies = [ - "base64 0.21.0", - "byteorder", - "bytes", - "fallible-iterator", - "hmac", - "md-5", - "memchr", - "rand 0.8.5", - "sha2", - "stringprep", -] - -[[package]] -name = "postgres-types" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f028f05971fe20f512bcc679e2c10227e57809a3af86a7606304435bc8896cd6" -dependencies = [ - "array-init", - "bytes", - "chrono", - "fallible-iterator", - "postgres-protocol", -] - -[[package]] -name = "pprof" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978385d59daf9269189d052ca8a84c1acfd0715c0599a5d5188d4acc078ca46a" -dependencies = [ - "backtrace", - "cfg-if 1.0.0", - "criterion", - "findshlibs", - "inferno", - "libc", - "log", - "nix 0.26.2", - "once_cell", - "parking_lot 0.12.1", - "smallvec", - "symbolic-demangle", - "tempfile", - "thiserror", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "prettydiff" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d593ade80c7e334ad6bffbe003afac07948b88a0ae41aa321a5cd87abf260928" -dependencies = [ - "ansi_term", -] - -[[package]] -name = "prettyplease" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" -dependencies = [ - "proc-macro2", - "syn 1.0.109", -] - -[[package]] -name = "prettyplease" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" -dependencies = [ - "proc-macro2", - "syn 2.0.48", -] - -[[package]] -name = "prettytable" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46480520d1b77c9a3482d39939fcf96831537a250ec62d4fd8fbdf8e0302e781" -dependencies = [ - "csv", - "encode_unicode 1.0.0", - "is-terminal", - "lazy_static", - "term", - "unicode-width", -] - -[[package]] -name = "proc-macro-crate" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" -dependencies = [ - "toml 0.5.11", -] - -[[package]] -name = "proc-macro-crate" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" -dependencies = [ - "once_cell", - "toml_edit", -] - -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - -[[package]] -name = "proc-macro2" -version = "1.0.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "profile" -version = "2.2.0-dev" -dependencies = [ - "jemalloc-ctl", - "jemalloc-sys", - "jemallocator", - "logger", - "pprof", -] - -[[package]] -name = "prom-remote-api" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f3c521e136d06204033bd68d20f7ef7c8503417e233d0f9fc41787a6883387" -dependencies = [ - "async-trait", - "bytes", - "futures 0.3.28", - "prost 0.11.8", - "prost-build", - "snap", - "warp", -] - -[[package]] -name = "prometheus" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5986aa8d62380092d2f50f8b1cdba9cb9b6731ffd4b25b51fd126b6c3e05b99c" -dependencies = [ - "cfg-if 1.0.0", - "fnv", - "lazy_static", - "memchr", - "parking_lot 0.11.2", - "protobuf", - "thiserror", -] - -[[package]] -name = "prometheus" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" -dependencies = [ - "cfg-if 1.0.0", - "fnv", - "lazy_static", - "memchr", - "parking_lot 0.12.1", - "thiserror", -] - -[[package]] -name = "prometheus-client" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c2f43e8969d51935d2a7284878ae053ba30034cd563f673cde37ba5205685e" -dependencies = [ - "dtoa", - "itoa", - "parking_lot 0.12.1", - "prometheus-client-derive-encode", -] - -[[package]] -name = "prometheus-client-derive-encode" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b6a5217beb0ad503ee7fa752d451c905113d70721b937126158f3106a48cc1" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "prometheus-static-metric" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8f30cdb09c39930b8fa5e0f23cbb895ab3f766b187403a0ba0956fc1ef4f0e5" -dependencies = [ - "lazy_static", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "prost" -version = "0.11.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" -dependencies = [ - "bytes", - "prost-derive 0.11.8", -] - -[[package]] -name = "prost" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" -dependencies = [ - "bytes", - "prost-derive 0.12.3", -] - -[[package]] -name = "prost-build" -version = "0.11.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a24be1d23b4552a012093e1b93697b73d644ae9590e3253d878d0e77d411b614" -dependencies = [ - "bytes", - "heck", - "itertools 0.10.5", - "lazy_static", - "log", - "multimap", - "petgraph", - "prettyplease 0.1.25", - "prost 0.11.8", - "prost-types", - "regex", - "syn 1.0.109", - "tempfile", - "which", -] - -[[package]] -name = "prost-derive" -version = "0.11.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea9b0f8cbe5e15a8a042d030bd96668db28ecb567ec37d691971ff5731d2b1b" -dependencies = [ - "anyhow", - "itertools 0.10.5", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "prost-derive" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" -dependencies = [ - "anyhow", - "itertools 0.11.0", - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "prost-types" -version = "0.11.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" -dependencies = [ - "prost 0.11.8", -] - -[[package]] -name = "protobuf" -version = "2.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" - -[[package]] -name = "protoc-bin-vendored" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "005ca8623e5633e298ad1f917d8be0a44bcf406bf3cde3b80e63003e49a3f27d" -dependencies = [ - "protoc-bin-vendored-linux-aarch_64", - "protoc-bin-vendored-linux-ppcle_64", - "protoc-bin-vendored-linux-x86_32", - "protoc-bin-vendored-linux-x86_64", - "protoc-bin-vendored-macos-x86_64", - "protoc-bin-vendored-win32", -] - -[[package]] -name = "protoc-bin-vendored-linux-aarch_64" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb9fc9cce84c8694b6ea01cc6296617b288b703719b725b8c9c65f7c5874435" - -[[package]] -name = "protoc-bin-vendored-linux-ppcle_64" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d2a07dcf7173a04d49974930ccbfb7fd4d74df30ecfc8762cf2f895a094516" - -[[package]] -name = "protoc-bin-vendored-linux-x86_32" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54fef0b04fcacba64d1d80eed74a20356d96847da8497a59b0a0a436c9165b0" - -[[package]] -name = "protoc-bin-vendored-linux-x86_64" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8782f2ce7d43a9a5c74ea4936f001e9e8442205c244f7a3d4286bd4c37bc924" - -[[package]] -name = "protoc-bin-vendored-macos-x86_64" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5de656c7ee83f08e0ae5b81792ccfdc1d04e7876b1d9a38e6876a9e09e02537" - -[[package]] -name = "protoc-bin-vendored-win32" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9653c3ed92974e34c5a6e0a510864dab979760481714c172e0a34e437cb98804" - -[[package]] -name = "proxy" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "arrow_ext", - "async-trait", - "base64 0.13.1", - "bytes", - "catalog", - "clru", - "cluster", - "common_types", - "datafusion", - "df_operator", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "http 0.2.9", - "influxdb-line-protocol", - "interpreters", - "iox_query", - "itertools 0.10.5", - "json_pretty", - "lazy_static", - "logger", - "macros", - "meta_client", - "notifier", - "paste 1.0.12", - "prom-remote-api", - "prometheus 0.12.0", - "prometheus-static-metric", - "prost 0.11.8", - "query_engine", - "query_frontend", - "router", - "runtime", - "serde", - "serde_json", - "snafu 0.6.10", - "spin 0.9.8", - "sqlparser", - "system_catalog", - "table_engine", - "time_ext", - "timed_task", - "tokio", - "tokio-stream", - "tonic 0.8.3", - "warp", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "ptr_meta" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" -dependencies = [ - "ptr_meta_derive", -] - -[[package]] -name = "ptr_meta_derive" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "pulldown-cmark" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d9cc634bc78768157b5cbfe988ffcd1dcba95cd2b2f03a88316c08c6d00ed63" -dependencies = [ - "bitflags 1.3.2", - "memchr", - "unicase", -] - -[[package]] -name = "quanta" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e31331286705f455e56cca62e0e717158474ff02b7936c1fa596d983f4ae27" -dependencies = [ - "crossbeam-utils", - "libc", - "mach", - "once_cell", - "raw-cpuid", - "wasi 0.10.2+wasi-snapshot-preview1", - "web-sys", - "winapi", -] - -[[package]] -name = "query_engine" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "async-trait", - "bytes_ext", - "catalog", - "chrono", - "common_types", - "datafusion", - "datafusion-proto", - "df_engine_extensions", - "df_operator", - "futures 0.3.28", - "generic_error", - "iox_query", - "logger", - "macros", - "prost 0.11.8", - "query_frontend", - "runtime", - "serde", - "snafu 0.6.10", - "table_engine", - "time_ext", - "tokio", - "trace_metric", -] - -[[package]] -name = "query_frontend" -version = "2.2.0-dev" -dependencies = [ - "arrow 49.0.0", - "async-trait", - "catalog", - "chrono", - "cluster", - "codec", - "common_types", - "datafusion", - "datafusion-proto", - "df_operator", - "generic_error", - "hash_ext", - "horaedbproto 2.0.0", - "influxdb_influxql_parser", - "iox_query", - "iox_query_influxql", - "itertools 0.10.5", - "lazy_static", - "logger", - "macros", - "partition_table_engine", - "paste 1.0.12", - "prom-remote-api", - "regex", - "regex-syntax 0.6.29", - "runtime", - "schema", - "serde", - "serde_json", - "snafu 0.6.10", - "sqlparser", - "table_engine", - "tokio", -] - -[[package]] -name = "query_functions" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" -dependencies = [ - "arrow 49.0.0", - "chrono", - "datafusion", - "itertools 0.10.5", - "observability_deps", - "once_cell", - "regex", - "regex-syntax 0.6.29", - "schema", - "snafu 0.7.4", -] - -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - -[[package]] -name = "quick-xml" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" -dependencies = [ - "memchr", -] - -[[package]] -name = "quick-xml" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86e446ed58cef1bbfe847bc2fda0e2e4ea9f0e57b90c507d4781292590d72a4e" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "quick-xml" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r2d2" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" -dependencies = [ - "log", - "parking_lot 0.12.1", - "scheduled-thread-pool", -] - -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - -[[package]] -name = "rand" -version = "0.3.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" -dependencies = [ - "libc", - "rand 0.4.6", -] - -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", -] - -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "raw-cpuid" -version = "10.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", -] - -[[package]] -name = "regex" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.7.1", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", -] - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" - -[[package]] -name = "regex-syntax" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" - -[[package]] -name = "remote_engine_client" -version = "2.2.0-dev" -dependencies = [ - "arrow_ext", - "async-trait", - "common_types", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "logger", - "macros", - "router", - "runtime", - "serde", - "snafu 0.6.10", - "table_engine", - "time_ext", - "tokio", - "tonic 0.8.3", -] - -[[package]] -name = "rend" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581008d2099240d37fb08d77ad713bcaec2c4d89d50b5b21a8bb1996bbab68ab" -dependencies = [ - "bytecheck", -] - -[[package]] -name = "reqsign" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa" -dependencies = [ - "anyhow", - "async-trait", - "base64 0.22.1", - "chrono", - "form_urlencoded", - "getrandom", - "hex", - "hmac", - "home", - "http 1.1.0", - "log", - "once_cell", - "percent-encoding", - "quick-xml 0.35.0", - "rand 0.8.5", - "reqwest 0.12.4", - "rust-ini", - "serde", - "serde_json", - "sha1", - "sha2", -] - -[[package]] -name = "reqwest" -version = "0.11.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" -dependencies = [ - "base64 0.21.0", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2 0.3.26", - "http 0.2.9", - "http-body 0.4.5", - "hyper 0.14.25", - "hyper-rustls 0.24.2", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls 0.21.6", - "rustls-pemfile 1.0.2", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "system-configuration", - "tokio", - "tokio-rustls 0.24.1", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "webpki-roots 0.25.4", - "winreg 0.50.0", -] - -[[package]] -name = "reqwest" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" -dependencies = [ - "base64 0.22.1", - "bytes", - "futures-core", - "futures-util", - "h2 0.4.5", - "http 1.1.0", - "http-body 1.0.0", - "http-body-util", - "hyper 1.3.1", - "hyper-rustls 0.26.0", - "hyper-util", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls 0.22.2", - "rustls-pemfile 2.1.2", - "rustls-pki-types", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "tokio", - "tokio-rustls 0.25.0", - "tokio-util", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams", - "web-sys", - "webpki-roots 0.26.3", - "winreg 0.52.0", -] - -[[package]] -name = "rgb" -version = "0.8.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20ec2d3e3fc7a92ced357df9cebd5a10b6fb2aa1ee797bf7e9ce2f17dffc8f59" -dependencies = [ - "bytemuck", -] - -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - -[[package]] -name = "ring" -version = "0.17.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" -dependencies = [ - "cc", - "getrandom", - "libc", - "spin 0.9.8", - "untrusted 0.9.0", - "windows-sys 0.48.0", -] - -[[package]] -name = "rkyv" -version = "0.7.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21499ed91807f07ae081880aabb2ccc0235e9d88011867d984525e9a4c3cfa3e" -dependencies = [ - "bytecheck", - "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", - "seahash", -] - -[[package]] -name = "rkyv_derive" -version = "0.7.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac1c672430eb41556291981f45ca900a0239ad007242d1cb4b4167af842db666" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "rocksdb" -version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git?rev=85e79e52c6ad80b8c547fcb90b3cade64f141fac#85e79e52c6ad80b8c547fcb90b3cade64f141fac" -dependencies = [ - "libc", - "librocksdb_sys", -] - -[[package]] -name = "router" -version = "2.2.0-dev" -dependencies = [ - "async-trait", - "cluster", - "common_types", - "generic_error", - "horaedbproto 2.0.0", - "logger", - "macros", - "meta_client", - "moka", - "serde", - "snafu 0.6.10", - "table_engine", - "time_ext", - "tokio", - "twox-hash", -] - -[[package]] -name = "rskafka" -version = "0.4.0" -source = "git+https://github.com/Rachelint/rskafka.git?rev=f0fd8e278d8164cb0cfca5a80476361fc308ecc3#f0fd8e278d8164cb0cfca5a80476361fc308ecc3" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "crc32c", - "flate2", - "futures 0.3.28", - "integer-encoding", - "lz4", - "parking_lot 0.12.1", - "pin-project-lite", - "rand 0.8.5", - "snap", - "thiserror", - "tokio", - "tracing", -] - -[[package]] -name = "runtime" -version = "2.2.0-dev" -dependencies = [ - "lazy_static", - "macros", - "pin-project-lite", - "prometheus 0.12.0", - "snafu 0.6.10", - "tokio", - "tokio-test", -] - -[[package]] -name = "rust-crypto" -version = "0.2.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f76d05d3993fd5f4af9434e8e436db163a12a9d40e1a58a726f27a01dfd12a2a" -dependencies = [ - "gcc", - "libc", - "rand 0.3.23", - "rustc-serialize", - "time 0.1.43", -] - -[[package]] -name = "rust-ini" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" -dependencies = [ - "cfg-if 1.0.0", - "ordered-multimap", - "trim-in-place", -] - -[[package]] -name = "rust-sdk-test" -version = "2.2.0-dev" -dependencies = [ - "horaedb-client", - "tokio", -] - -[[package]] -name = "rust_decimal" -version = "1.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26bd36b60561ee1fb5ec2817f198b6fd09fa571c897a5e86d1487cfc2b096dfc" -dependencies = [ - "arrayvec", - "borsh", - "bytecheck", - "byteorder", - "bytes", - "num-traits", - "rand 0.8.5", - "rkyv", - "serde", - "serde_json", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-serialize" -version = "0.3.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe834bc780604f4674073badbad26d7219cadfb4a2275802db12cbae17498401" - -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "0.37.5" +name = "bumpalo" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" -dependencies = [ - "bitflags 1.3.2", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys", - "windows-sys 0.45.0", -] +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] -name = "rustls" -version = "0.20.8" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" -dependencies = [ - "log", - "ring 0.16.20", - "sct", - "webpki", -] +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "rustls" -version = "0.21.6" +name = "bytes" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" -dependencies = [ - "log", - "ring 0.16.20", - "rustls-webpki 0.101.2", - "sct", -] +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] -name = "rustls" -version = "0.22.2" +name = "bytesize" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" -dependencies = [ - "log", - "ring 0.17.7", - "rustls-pki-types", - "rustls-webpki 0.102.1", - "subtle", - "zeroize", -] +checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" [[package]] -name = "rustls-pemfile" -version = "0.2.1" +name = "bzip2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eebeaeb360c87bfb72e84abdb3447159c0eaececf1bef2aecd65a8be949d1c9" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" dependencies = [ - "base64 0.13.1", + "bzip2-sys", + "libc", ] [[package]] -name = "rustls-pemfile" -version = "1.0.2" +name = "bzip2-sys" +version = "0.1.11+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" dependencies = [ - "base64 0.21.0", + "cc", + "libc", + "pkg-config", ] [[package]] -name = "rustls-pemfile" -version = "2.1.2" +name = "cc" +version = "1.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" dependencies = [ - "base64 0.22.1", - "rustls-pki-types", + "jobserver", + "libc", + "shlex", ] [[package]] -name = "rustls-pki-types" -version = "1.7.0" +name = "cfg-if" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "rustls-webpki" -version = "0.100.2" +name = "chrono" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e98ff011474fa39949b7e5c0428f9b4937eda7da7848bbb947786b7be0b27dab" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", ] [[package]] -name = "rustls-webpki" -version = "0.101.2" +name = "chrono-tz" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "513722fd73ad80a71f72b61009ea1b584bcfa1483ca93949c8f290298837fa59" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "chrono", + "chrono-tz-build", + "phf", ] [[package]] -name = "rustls-webpki" -version = "0.102.1" +name = "chrono-tz-build" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4ca26037c909dedb327b48c3327d0ba91d3dd3c4e05dad328f210ffb68e95b" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ - "ring 0.17.7", - "rustls-pki-types", - "untrusted 0.9.0", + "parse-zoneinfo", + "phf_codegen", ] [[package]] -name = "rustversion" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" - -[[package]] -name = "ryu" -version = "1.0.13" +name = "colorchoice" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] -name = "safemem" -version = "0.3.3" +name = "comfy-table" +version = "7.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] [[package]] -name = "same-file" -version = "1.0.6" +name = "const-random" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" dependencies = [ - "winapi-util", + "const-random-macro", ] [[package]] -name = "sampling_cache" -version = "2.2.0-dev" +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "chrono", + "getrandom", + "once_cell", + "tiny-keccak", ] [[package]] -name = "saturating" -version = "0.1.0" +name = "constant_time_eq" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] -name = "scheduled-thread-pool" -version = "0.2.7" +name = "core-foundation-sys" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" -dependencies = [ - "parking_lot 0.12.1", -] +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] -name = "schema" -version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ - "arrow 49.0.0", - "hashbrown 0.13.2", - "indexmap 1.9.3", - "itertools 0.10.5", - "observability_deps", - "snafu 0.7.4", + "libc", ] [[package]] -name = "scoped-tls" -version = "1.0.1" +name = "crc32fast" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] [[package]] -name = "scopeguard" -version = "1.1.0" +name = "crossbeam-utils" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] -name = "scratch" -version = "1.0.5" +name = "crunchy" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] -name = "sct" -version = "0.7.0" +name = "crypto-common" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "generic-array", + "typenum", ] [[package]] -name = "seahash" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" - -[[package]] -name = "semver" -version = "1.0.17" +name = "csv" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ + "csv-core", + "itoa", + "ryu", "serde", ] [[package]] -name = "seq-macro" -version = "0.3.3" +name = "csv-core" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] [[package]] -name = "serde" -version = "1.0.196" +name = "dashmap" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ - "serde_derive", + "cfg-if", + "crossbeam-utils", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", ] [[package]] -name = "serde_bytes" -version = "0.11.9" +name = "datafusion" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "416bda436f9aab92e02c8e10d49a15ddd339cea90b6e340fe51ed97abb548294" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" dependencies = [ - "serde", + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown", + "indexmap", + "itertools 0.13.0", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", ] [[package]] -name = "serde_derive" -version = "1.0.196" +name = "datafusion-catalog" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", + "arrow-schema", + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", ] [[package]] -name = "serde_json" -version = "1.0.95" +name = "datafusion-common" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" dependencies = [ - "itoa", - "ryu", - "serde", + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown", + "indexmap", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "paste", + "sqlparser", + "tokio", ] [[package]] -name = "serde_spanned" -version = "0.6.1" +name = "datafusion-common-runtime" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" dependencies = [ - "serde", + "log", + "tokio", ] [[package]] -name = "serde_urlencoded" -version = "0.7.1" +name = "datafusion-execution" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", ] [[package]] -name = "server" -version = "2.2.0-dev" +name = "datafusion-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" dependencies = [ - "analytic_engine", - "arc-swap 1.6.0", - "arrow 49.0.0", - "arrow_ext", - "async-trait", - "bytes_ext", - "catalog", - "clru", - "cluster", - "common_types", - "datafusion", - "derive_builder 0.12.0", - "df_operator", - "flate2", - "future_ext", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "http 0.2.9", - "influxdb-line-protocol", - "interpreters", - "lazy_static", - "logger", - "macros", - "meta_client", - "notifier", - "once_cell", - "opensrv-mysql", - "partition_table_engine", - "paste 1.0.12", - "pgwire", - "profile", - "prom-remote-api", - "prometheus 0.12.0", - "prometheus-static-metric", - "prost 0.11.8", - "proxy", - "query_engine", - "query_frontend", - "regex", - "remote_engine_client", - "router", - "runtime", - "serde", + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", "serde_json", - "size_ext", - "snafu 0.6.10", - "spin 0.9.8", "sqlparser", - "table_engine", - "time_ext", - "tokio", - "tokio-stream", - "tonic 0.8.3", - "wal", - "warp", - "zstd 0.12.3+zstd.1.5.2", + "strum", + "strum_macros", ] [[package]] -name = "sha-1" -version = "0.10.1" +name = "datafusion-expr-common" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest", + "arrow", + "datafusion-common", + "itertools 0.13.0", + "paste", ] [[package]] -name = "sha1" -version = "0.10.5" +name = "datafusion-functions" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest", + "arrow", + "arrow-buffer", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown", + "hex", + "itertools 0.13.0", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", ] [[package]] -name = "sha2" -version = "0.10.6" +name = "datafusion-functions-aggregate" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest", + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "indexmap", + "log", + "paste", ] [[package]] -name = "sharded-slab" -version = "0.1.4" +name = "datafusion-functions-aggregate-common" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" dependencies = [ - "lazy_static", + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", ] [[package]] -name = "shell-words" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" - -[[package]] -name = "shlex" -version = "1.3.0" +name = "datafusion-functions-nested" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "itertools 0.13.0", + "log", + "paste", + "rand", +] [[package]] -name = "signal-hook" -version = "0.3.15" +name = "datafusion-functions-window" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" dependencies = [ - "libc", - "signal-hook-registry", + "datafusion-common", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", ] [[package]] -name = "signal-hook-registry" -version = "1.4.1" +name = "datafusion-functions-window-common" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" dependencies = [ - "libc", + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] -name = "signature" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e1788eed21689f9cf370582dfc467ef36ed9c707f073528ddafa8d83e3b8500" - -[[package]] -name = "simdutf8" -version = "0.1.4" +name = "datafusion-optimizer" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "regex-syntax 0.8.4", +] [[package]] -name = "similar" -version = "2.2.1" +name = "datafusion-physical-expr" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "petgraph", +] [[package]] -name = "siphasher" -version = "0.3.10" +name = "datafusion-physical-expr-common" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" dependencies = [ - "serde", + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown", + "rand", ] [[package]] -name = "size_ext" -version = "2.2.0-dev" +name = "datafusion-physical-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" dependencies = [ - "serde", - "toml 0.7.3", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-plan", + "itertools 0.13.0", ] [[package]] -name = "skeptic" -version = "0.13.7" +name = "datafusion-physical-plan" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" dependencies = [ - "bytecount", - "cargo_metadata", - "error-chain", - "glob", - "pulldown-cmark", - "tempfile", - "walkdir", + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown", + "indexmap", + "itertools 0.13.0", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", ] [[package]] -name = "skiplist" -version = "2.2.0-dev" +name = "datafusion-sql" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" dependencies = [ - "arena", - "bytes", - "criterion", - "rand 0.8.5", - "yatp", + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", + "strum", ] [[package]] -name = "slab" -version = "0.4.8" +name = "digest" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "autocfg", + "block-buffer", + "crypto-common", + "subtle", ] [[package]] -name = "slog" -version = "2.7.0" +name = "either" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] -name = "slog-async" -version = "2.7.0" +name = "env_filter" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe" +checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" dependencies = [ - "crossbeam-channel", - "slog", - "take_mut", - "thread_local", + "log", ] [[package]] -name = "slog-global" -version = "0.1.0" -source = "git+https://github.com/tikv/slog-global.git?rev=d592f88e4dbba5eb439998463054f1a44fbf17b9#d592f88e4dbba5eb439998463054f1a44fbf17b9" +name = "env_logger" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" dependencies = [ - "arc-swap 0.4.8", - "lazy_static", + "anstream", + "anstyle", + "env_filter", "log", - "slog", ] [[package]] -name = "slog-term" -version = "2.9.0" +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ - "atty", - "slog", - "term", - "thread_local", - "time 0.3.20", + "libc", + "windows-sys 0.52.0", ] [[package]] -name = "smallvec" -version = "1.13.2" +name = "fastrand" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] -name = "snafu" -version = "0.6.10" +name = "fixedbitset" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" -dependencies = [ - "backtrace", - "doc-comment", - "snafu-derive 0.6.10", -] +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] -name = "snafu" -version = "0.7.4" +name = "flatbuffers" +version = "24.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0656e7e3ffb70f6c39b3c2a86332bb74aa3c679da781642590f3c1118c5045" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" dependencies = [ - "doc-comment", - "snafu-derive 0.7.4", + "bitflags 1.3.2", + "rustc_version", ] [[package]] -name = "snafu-derive" -version = "0.6.10" +name = "flate2" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" +checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "crc32fast", + "miniz_oxide", ] [[package]] -name = "snafu-derive" -version = "0.7.4" +name = "form_urlencoded" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "475b3bbe5245c26f2d8a6f62d67c1f30eb9fffeccee721c45d162c3ebbdf81b2" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", + "percent-encoding", ] [[package]] -name = "snap" -version = "1.1.0" +name = "futures" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" - -[[package]] -name = "snappy-sys" -version = "0.1.0" -source = "git+https://github.com/tikv/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ - "cmake", - "libc", - "pkg-config", + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", ] [[package]] -name = "socket2" -version = "0.4.9" +name = "futures-channel" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ - "libc", - "winapi", + "futures-core", + "futures-sink", ] [[package]] -name = "socket2" -version = "0.5.3" +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ - "libc", - "windows-sys 0.48.0", + "futures-core", + "futures-task", + "futures-util", ] [[package]] -name = "spin" -version = "0.5.2" +name = "futures-io" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] -name = "spin" -version = "0.9.8" +name = "futures-macro" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ - "lock_api", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "spki" -version = "0.7.2" +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1e996ef02c474957d681f1b05213dfb0abab947b446a62d37770b23500184a" -dependencies = [ - "base64ct", - "der", -] +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] -name = "sqlness" -version = "0.6.0" +name = "futures-util" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf503ec532a37b2444ef9de1f47b2e897b1322d8816d25e1824fc74760d32106" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ - "async-trait", - "derive_builder 0.11.2", - "minijinja", - "prettydiff", - "regex", - "serde_json", - "thiserror", - "toml 0.5.11", - "walkdir", + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", ] [[package]] -name = "sqlparser" -version = "0.39.0" +name = "generic-array" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ - "log", - "serde", - "sqlparser_derive", + "typenum", + "version_check", ] [[package]] -name = "sqlparser_derive" -version = "0.1.1" +name = "getrandom" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "cfg-if", + "libc", + "wasi", ] [[package]] -name = "stable_deref_trait" -version = "1.2.0" +name = "gimli" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" [[package]] -name = "static_assertions" -version = "1.1.0" +name = "glob" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] -name = "str_stack" -version = "0.1.0" +name = "half" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] [[package]] -name = "stringprep" -version = "0.1.3" +name = "hashbrown" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3737bde7edce97102e0e2b15365bf7a20bfdb5f60f4f9e8d7004258a51a8da" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "ahash", + "allocator-api2", ] [[package]] -name = "strsim" -version = "0.10.0" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "strsim" -version = "0.11.0" +name = "hermit-abi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] -name = "strum" -version = "0.24.1" +name = "hex" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] -name = "strum" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +name = "horaedb-server" +version = "2.2.0-alpha" dependencies = [ - "strum_macros 0.25.1", + "futures", + "metric_engine", + "tokio", + "tracing", + "tracing-subscriber", ] [[package]] -name = "strum_macros" -version = "0.24.3" +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", ] [[package]] -name = "strum_macros" -version = "0.25.1" +name = "iana-time-zone-haiku" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.48", + "cc", ] [[package]] -name = "subprocess" -version = "0.2.9" +name = "idna" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ - "libc", - "winapi", + "unicode-bidi", + "unicode-normalization", ] [[package]] -name = "subtle" +name = "indexmap" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +dependencies = [ + "equivalent", + "hashbrown", +] [[package]] -name = "symbolic-common" -version = "12.3.0" +name = "instant" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "167a4ffd7c35c143fd1030aa3c2caf76ba42220bd5a6b5f4781896434723b8c3" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ - "debugid", - "memmap2 0.5.10", - "stable_deref_trait", - "uuid", + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", ] [[package]] -name = "symbolic-demangle" -version = "12.3.0" +name = "integer-encoding" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e378c50e80686c1c5c205674e1f86a2858bec3d2a7dfdd690331a8a19330f293" -dependencies = [ - "cpp_demangle", - "rustc-demangle", - "symbolic-common", -] +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] -name = "syn" -version = "1.0.109" +name = "is_terminal_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] -name = "syn" -version = "2.0.48" +name = "itertools" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b73f1c685cfd8ff8d75698ed87e6188cd09944b30c0863d45c2c3699d1da0c" + +[[package]] +name = "itertools" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", + "either", ] [[package]] -name = "sync_wrapper" -version = "0.1.2" +name = "itoa" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] -name = "sysinfo" -version = "0.30.5" +name = "jobserver" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb4f3438c8f6389c864e61221cbc97e9bca98b4daf39a5beb7bea660f528bb2" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ - "cfg-if 1.0.0", - "core-foundation-sys", "libc", - "ntapi", - "once_cell", - "windows 0.52.0", ] [[package]] -name = "system-configuration" -version = "0.5.1" +name = "js-sys" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", + "wasm-bindgen", ] [[package]] -name = "system-configuration-sys" -version = "0.5.0" +name = "lazy_static" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] -name = "system_catalog" -version = "2.2.0-dev" +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" dependencies = [ - "arrow 49.0.0", - "async-trait", - "bytes_ext", - "catalog", - "codec", - "common_types", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "logger", - "macros", - "prost 0.11.8", - "snafu 0.6.10", - "table_engine", - "tokio", - "trace_metric", + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", ] [[package]] -name = "system_stats" -version = "2.2.0-dev" +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" dependencies = [ - "sysinfo", - "tokio", + "lexical-parse-integer", + "lexical-util", + "static_assertions", ] [[package]] -name = "table_engine" -version = "2.2.0-dev" +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" dependencies = [ - "arrow 49.0.0", - "arrow_ext", - "async-trait", - "bytes_ext", - "common_types", - "datafusion", - "datafusion-proto", - "df_operator", - "futures 0.3.28", - "generic_error", - "hash_ext", - "horaedbproto 2.0.0", - "itertools 0.10.5", - "lazy_static", - "logger", - "macros", - "prost 0.11.8", - "rand 0.8.5", - "regex", - "runtime", - "serde", - "smallvec", - "snafu 0.6.10", - "time_ext", - "tokio", - "trace_metric", + "lexical-util", + "static_assertions", ] [[package]] -name = "table_kv" -version = "2.2.0-dev" +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" dependencies = [ - "lazy_static", - "logger", - "macros", - "obkv-table-client-rs", - "prometheus 0.12.0", - "rand 0.8.5", - "serde", - "snafu 0.6.10", - "time_ext", + "static_assertions", ] [[package]] -name = "tabled" -version = "0.16.0" +name = "lexical-write-float" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c9303ee60b9bedf722012ea29ae3711ba13a67c9b9ae28993838b63057cb1b" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" dependencies = [ - "papergrid", - "tabled_derive", + "lexical-util", + "lexical-write-integer", + "static_assertions", ] [[package]] -name = "tabled_derive" -version = "0.8.0" +name = "lexical-write-integer" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0fb8bfdc709786c154e24a66777493fb63ae97e3036d914c8666774c477069" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" dependencies = [ - "heck", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", + "lexical-util", + "static_assertions", ] [[package]] -name = "tagptr" -version = "0.2.0" +name = "libc" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] -name = "take_mut" -version = "0.2.2" +name = "libm" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] -name = "tap" -version = "1.0.1" +name = "linux-raw-sys" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] -name = "tempfile" -version = "3.5.0" +name = "lock_api" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ - "cfg-if 1.0.0", - "fastrand 1.9.0", - "redox_syscall 0.3.5", - "rustix", - "windows-sys 0.45.0", + "autocfg", + "scopeguard", ] [[package]] -name = "term" -version = "0.7.0" +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lz4_flex" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ - "dirs-next", - "rustversion", - "winapi", + "twox-hash", ] [[package]] -name = "termcolor" -version = "1.2.0" +name = "lzma-sys" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" dependencies = [ - "winapi-util", + "cc", + "libc", + "pkg-config", ] [[package]] -name = "test_helpers" +name = "matchers" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=05a8a9f#05a8a9f79c5b8e3c6d324b214e7ccf910c2f6b73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ - "dotenvy", - "observability_deps", - "parking_lot 0.12.1", - "tempfile", - "tracing-log", - "tracing-subscriber", + "regex-automata 0.1.10", ] [[package]] -name = "test_util" -version = "2.2.0-dev" +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ - "arrow 49.0.0", - "chrono", - "common_types", - "env_logger", + "cfg-if", + "digest", ] [[package]] -name = "thiserror" -version = "1.0.56" +name = "memchr" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "metric_engine" +version = "2.2.0-alpha" dependencies = [ - "thiserror-impl", + "anyhow", + "arrow", + "arrow-schema", + "async-scoped", + "async-trait", + "byteorder", + "bytes", + "bytesize", + "datafusion", + "futures", + "itertools 0.3.25", + "lazy_static", + "object_store", + "parquet", + "pb_types", + "prost", + "temp-dir", + "test-log", + "thiserror", + "tokio", + "tracing", ] [[package]] -name = "thiserror-impl" -version = "1.0.56" +name = "miniz_oxide" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", + "adler2", ] [[package]] -name = "thread_local" -version = "1.1.7" +name = "mio" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ - "cfg-if 1.0.0", - "once_cell", + "hermit-abi", + "libc", + "wasi", + "windows-sys 0.52.0", ] [[package]] -name = "thrift" -version = "0.17.0" +name = "multimap" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" [[package]] -name = "time" -version = "0.1.43" +name = "nu-ansi-term" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" dependencies = [ - "libc", + "overload", "winapi", ] [[package]] -name = "time" -version = "0.3.20" +name = "num" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" dependencies = [ - "itoa", - "libc", - "num_threads", - "serde", - "time-core", - "time-macros", + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", ] [[package]] -name = "time-core" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" - -[[package]] -name = "time-macros" -version = "0.2.8" +name = "num-bigint" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "time-core", + "num-integer", + "num-traits", ] [[package]] -name = "time_ext" -version = "2.2.0-dev" +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ - "chrono", - "common_types", - "horaedbproto 2.0.0", - "macros", - "serde", - "serde_json", - "snafu 0.6.10", - "toml 0.7.3", + "num-traits", ] [[package]] -name = "timed_task" -version = "2.2.0-dev" +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "logger", - "runtime", - "tokio", + "num-traits", ] [[package]] -name = "tiny-keccak" -version = "2.0.2" +name = "num-iter" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" dependencies = [ - "crunchy", + "autocfg", + "num-integer", + "num-traits", ] [[package]] -name = "tinytemplate" -version = "1.2.1" +name = "num-rational" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ - "serde", - "serde_json", + "num-bigint", + "num-integer", + "num-traits", ] [[package]] -name = "tinyvec" -version = "1.6.0" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "tinyvec_macros", + "autocfg", + "libm", ] [[package]] -name = "tinyvec_macros" -version = "0.1.1" +name = "num_cpus" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] [[package]] -name = "tokio" -version = "1.29.1" +name = "object" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ - "autocfg", - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "parking_lot 0.12.1", - "pin-project-lite", - "signal-hook-registry", - "socket2 0.4.9", - "tokio-macros", - "tracing", - "windows-sys 0.48.0", + "memchr", ] [[package]] -name = "tokio-io-timeout" -version = "1.2.0" +name = "object_store" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" +checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" dependencies = [ - "pin-project-lite", + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu", "tokio", + "tracing", + "url", + "walkdir", ] [[package]] -name = "tokio-macros" -version = "2.1.0" +name = "once_cell" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] -name = "tokio-rustls" -version = "0.23.4" +name = "ordered-float" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ - "rustls 0.20.8", - "tokio", - "webpki", + "num-traits", ] [[package]] -name = "tokio-rustls" -version = "0.24.1" +name = "overload" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.6", - "tokio", -] +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] -name = "tokio-rustls" -version = "0.25.0" +name = "parking_lot" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ - "rustls 0.22.2", - "rustls-pki-types", - "tokio", + "lock_api", + "parking_lot_core", ] [[package]] -name = "tokio-stream" -version = "0.1.12" +name = "parking_lot_core" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", ] [[package]] -name = "tokio-test" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53474327ae5e166530d17f2d956afcb4f8a004de581b3cae10f12006bc8163e3" -dependencies = [ - "async-stream", +name = "parquet" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", "bytes", - "futures-core", + "chrono", + "flate2", + "futures", + "half", + "hashbrown", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", "tokio", - "tokio-stream", + "twox-hash", + "zstd", + "zstd-sys", ] [[package]] -name = "tokio-tungstenite" -version = "0.17.2" +name = "parse-zoneinfo" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f714dd15bead90401d77e04243611caec13726c2408afd5b31901dfcdcb3b181" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite", + "regex", ] [[package]] -name = "tokio-util" -version = "0.7.7" +name = "paste" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pb_types" +version = "2.2.0-alpha" dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", - "tracing", + "prost", + "prost-build", ] [[package]] -name = "toml" -version = "0.5.11" +name = "percent-encoding" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "toml" -version = "0.7.3" +name = "petgraph" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", + "fixedbitset", + "indexmap", ] [[package]] -name = "toml_datetime" -version = "0.6.1" +name = "phf" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "serde", + "phf_shared", ] [[package]] -name = "toml_edit" -version = "0.19.8" +name = "phf_codegen" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "indexmap 1.9.3", - "serde", - "serde_spanned", - "toml_datetime", - "winnow", + "phf_generator", + "phf_shared", ] [[package]] -name = "toml_ext" -version = "2.2.0-dev" +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ - "macros", - "serde", - "snafu 0.6.10", - "tempfile", - "toml 0.7.3", + "phf_shared", + "rand", ] [[package]] -name = "tonic" -version = "0.8.3" +name = "phf_shared" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ - "async-stream", - "async-trait", - "axum", - "base64 0.13.1", - "bytes", - "futures-core", - "futures-util", - "h2 0.3.26", - "http 0.2.9", - "http-body 0.4.5", - "hyper 0.14.25", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost 0.11.8", - "prost-derive 0.11.8", - "rustls-pemfile 1.0.2", - "tokio", - "tokio-rustls 0.23.4", - "tokio-stream", - "tokio-util", - "tower", - "tower-layer", - "tower-service", - "tracing", - "tracing-futures", + "siphasher", ] [[package]] -name = "tonic" -version = "0.9.2" +name = "pin-project" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ - "async-trait", - "axum", - "base64 0.21.0", - "bytes", - "futures-core", - "futures-util", - "h2 0.3.26", - "http 0.2.9", - "http-body 0.4.5", - "hyper 0.14.25", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost 0.11.8", - "tokio", - "tokio-stream", - "tower", - "tower-layer", - "tower-service", - "tracing", + "pin-project-internal", ] [[package]] -name = "tonic-build" -version = "0.8.4" +name = "pin-project-internal" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ - "prettyplease 0.1.25", "proc-macro2", - "prost-build", "quote", - "syn 1.0.109", -] - -[[package]] -name = "tools" -version = "2.2.0-dev" -dependencies = [ - "analytic_engine", - "anyhow", - "clap", - "common_types", - "futures 0.3.28", - "generic_error", - "num_cpus", - "object_store 2.2.0-dev", - "parquet", - "parquet_ext", - "runtime", - "table_engine", - "tabled", - "time_ext", - "tokio", - "wal", + "syn", ] [[package]] -name = "tower" -version = "0.4.13" +name = "pin-project-lite" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] -name = "tower-layer" -version = "0.3.2" +name = "pin-utils" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "tower-service" -version = "0.3.2" +name = "pkg-config" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] -name = "trace_metric" -version = "2.2.0-dev" +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "trace_metric_derive", + "zerocopy", ] [[package]] -name = "trace_metric_derive" -version = "2.2.0-dev" +name = "prettyplease" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "910d41a655dac3b764f1ade94821093d3610248694320cd072303a8eedcf221d" dependencies = [ "proc-macro2", - "quote", - "syn 1.0.109", + "syn", ] [[package]] -name = "trace_metric_derive_tests" -version = "2.2.0-dev" +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ - "trace_metric", + "unicode-ident", ] [[package]] -name = "tracing" -version = "0.1.37" +name = "prost" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" dependencies = [ - "cfg-if 1.0.0", - "log", - "pin-project-lite", - "tracing-attributes", - "tracing-core", + "bytes", + "prost-derive", ] [[package]] -name = "tracing-appender" -version = "0.2.2" +name = "prost-build" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d48f71a791638519505cefafe162606f706c25592e4bde4d97600c0195312e" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ - "crossbeam-channel", - "time 0.3.20", - "tracing-subscriber", + "bytes", + "heck", + "itertools 0.13.0", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn", + "tempfile", ] [[package]] -name = "tracing-attributes" -version = "0.1.23" +name = "prost-derive" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ + "anyhow", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] -name = "tracing-core" -version = "0.1.30" +name = "prost-types" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" dependencies = [ - "once_cell", - "valuable", + "prost", ] [[package]] -name = "tracing-futures" -version = "0.2.5" +name = "quote" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ - "pin-project", - "tracing", + "proc-macro2", ] [[package]] -name = "tracing-log" -version = "0.1.3" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "lazy_static", - "log", - "tracing-core", + "libc", + "rand_chacha", + "rand_core", ] [[package]] -name = "tracing-subscriber" -version = "0.3.17" +name = "rand_chacha" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", + "ppv-lite86", + "rand_core", ] [[package]] -name = "tracing_util" -version = "2.2.0-dev" +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "console-subscriber", - "lazy_static", - "serde", - "tracing", - "tracing-appender", - "tracing-subscriber", + "getrandom", ] [[package]] -name = "trim-in-place" -version = "0.1.7" +name = "redox_syscall" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +dependencies = [ + "bitflags 2.6.0", +] [[package]] -name = "triomphe" -version = "0.1.8" +name = "regex" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1ee9bd9239c339d714d657fac840c6d2a4f9c45f4f9ec7b0975113458be78db" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] [[package]] -name = "try-lock" -version = "0.2.4" +name = "regex-automata" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] [[package]] -name = "tungstenite" -version = "0.17.3" +name = "regex-automata" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e27992fd6a8c29ee7eef28fc78349aa244134e10ad447ce3b9f0ac0ed0fa4ce0" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ - "base64 0.13.1", - "byteorder", - "bytes", - "http 0.2.9", - "httparse", - "log", - "rand 0.8.5", - "sha-1", - "thiserror", - "url", - "utf-8", + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", ] [[package]] -name = "twoway" -version = "0.1.8" +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rustc-demangle" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b11b2b5241ba34be09c3cc85a36e56e48f9888862e19cedf23336d35316ed1" -dependencies = [ - "memchr", -] +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] -name = "twox-hash" -version = "1.6.3" +name = "rustc_version" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "cfg-if 1.0.0", - "rand 0.8.5", - "static_assertions", + "semver", ] [[package]] -name = "typenum" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" - -[[package]] -name = "unicase" -version = "2.6.0" +name = "rustix" +version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ - "version_check", + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", ] [[package]] -name = "unicode-bidi" -version = "0.3.13" +name = "rustversion" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" [[package]] -name = "unicode-ident" -version = "1.0.8" +name = "ryu" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] -name = "unicode-normalization" -version = "0.1.22" +name = "same-file" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ - "tinyvec", + "winapi-util", ] [[package]] -name = "unicode-segmentation" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" - -[[package]] -name = "unicode-width" -version = "0.1.11" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "untrusted" -version = "0.7.1" +name = "semver" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] -name = "untrusted" -version = "0.9.0" +name = "seq-macro" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] -name = "url" -version = "2.3.1" +name = "serde" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", + "serde_derive", ] [[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - -[[package]] -name = "utf8parse" -version = "0.2.1" +name = "serde_derive" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "uuid" -version = "1.6.1" +name = "serde_json" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ - "getrandom", - "rand 0.8.5", + "itoa", + "memchr", + "ryu", "serde", - "uuid-macro-internal", ] [[package]] -name = "uuid-macro-internal" -version = "1.6.1" +name = "sha2" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49e7f3f3db8040a100710a11932239fd30697115e2ba4107080d8252939845e" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", + "cfg-if", + "cpufeatures", + "digest", ] [[package]] -name = "valuable" -version = "0.1.0" +name = "sharded-slab" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] [[package]] -name = "vcpkg" -version = "0.2.15" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "vergen" -version = "8.2.1" +name = "signal-hook-registry" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b3c89c2c7e50f33e4d35527e5bf9c11d6d132226dbbd1753f0fbe9f19ef88c6" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ - "anyhow", - "rustc_version", - "rustversion", - "time 0.3.20", + "libc", ] [[package]] -name = "version_check" -version = "0.9.4" +name = "siphasher" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] -name = "waker-fn" -version = "1.1.0" +name = "slab" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" - -[[package]] -name = "wal" -version = "2.2.0-dev" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ - "anyhow", - "async-scoped", - "async-trait", - "bytes_ext", - "chrono", - "codec", - "common_types", - "crc32fast", - "futures 0.3.28", - "generic_error", - "horaedbproto 2.0.0", - "lazy_static", - "logger", - "macros", - "memmap2 0.9.4", - "message_queue", - "prometheus 0.12.0", - "prost 0.11.8", - "rand 0.8.5", - "rayon", - "rocksdb", - "runtime", - "serde", - "serde_json", - "size_ext", - "smallvec", - "snafu 0.6.10", - "table_kv", - "tempfile", - "time_ext", - "timed_task", - "tokio", - "uuid", + "autocfg", ] [[package]] -name = "walkdir" -version = "2.3.3" +name = "smallvec" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" -dependencies = [ - "same-file", - "winapi-util", -] +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] -name = "want" -version = "0.3.0" +name = "snafu" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" dependencies = [ - "log", - "try-lock", + "snafu-derive", ] [[package]] -name = "warp" -version = "0.3.3" +name = "snafu-derive" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed7b8be92646fc3d18b06147664ebc5f48d222686cb11a8755e561a735aacc6d" +checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "headers", - "http 0.2.9", - "hyper 0.14.25", - "log", - "mime", - "mime_guess", - "multipart", - "percent-encoding", - "pin-project", - "rustls-pemfile 0.2.1", - "scoped-tls", - "serde", - "serde_json", - "serde_urlencoded", - "tokio", - "tokio-stream", - "tokio-tungstenite", - "tokio-util", - "tower-service", - "tracing", + "heck", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +name = "snap" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +name = "socket2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] [[package]] -name = "wasm-bindgen" -version = "0.2.90" +name = "sqlparser" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" dependencies = [ - "cfg-if 1.0.0", - "wasm-bindgen-macro", + "log", + "sqlparser_derive", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.90" +name = "sqlparser_derive" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ - "bumpalo", - "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.48", - "wasm-bindgen-shared", + "syn", ] [[package]] -name = "wasm-bindgen-futures" -version = "0.4.40" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde2032aeb86bdfaecc8b261eef3cba735cc426c1f3a3416d1e0791be95fc461" -dependencies = [ - "cfg-if 1.0.0", - "js-sys", - "wasm-bindgen", - "web-sys", -] +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "wasm-bindgen-macro" -version = "0.2.90" +name = "strum" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "quote", - "wasm-bindgen-macro-support", + "strum_macros", ] [[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.90" +name = "strum_macros" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ + "heck", "proc-macro2", "quote", - "syn 2.0.48", - "wasm-bindgen-backend", - "wasm-bindgen-shared", + "rustversion", + "syn", ] [[package]] -name = "wasm-bindgen-shared" -version = "0.2.90" +name = "subtle" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] -name = "wasm-streams" -version = "0.4.0" +name = "syn" +version = "2.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "web-sys" -version = "0.3.67" +name = "temp-dir" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc1ee6eef34f12f765cb94725905c6312b6610ab2b0940889cfe58dae7bc3c72" + +[[package]] +name = "tempfile" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ - "js-sys", - "wasm-bindgen", + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", ] [[package]] -name = "webpki" -version = "0.22.2" +name = "test-log" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f" +checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "env_logger", + "test-log-macros", + "tracing-subscriber", ] [[package]] -name = "webpki-roots" -version = "0.23.1" +name = "test-log-macros" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338" +checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ - "rustls-webpki 0.100.2", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "webpki-roots" -version = "0.25.4" +name = "thiserror" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] [[package]] -name = "webpki-roots" -version = "0.26.3" +name = "thiserror-impl" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ - "rustls-pki-types", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "which" -version = "4.4.0" +name = "thread_local" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" dependencies = [ - "either", - "libc", + "cfg-if", "once_cell", ] [[package]] -name = "winapi" -version = "0.3.9" +name = "thrift" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", + "byteorder", + "integer-encoding", + "ordered-float", ] [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +name = "tiny-keccak" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] [[package]] -name = "winapi-util" -version = "0.1.5" +name = "tinyvec" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ - "winapi", + "tinyvec_macros", ] [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "tinyvec_macros" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] -name = "windows" -version = "0.47.0" +name = "tokio" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2649ff315bee4c98757f15dac226efe3d81927adbb6e882084bb1ee3e0c330a7" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ - "windows-targets 0.47.0", + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", ] [[package]] -name = "windows" -version = "0.52.0" +name = "tokio-macros" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ - "windows-core", - "windows-targets 0.52.0", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "windows-core" -version = "0.52.0" +name = "tokio-util" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ - "windows-targets 0.52.0", + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "tracing" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "windows-targets 0.42.2", + "pin-project-lite", + "tracing-attributes", + "tracing-core", ] [[package]] -name = "windows-sys" -version = "0.48.0" +name = "tracing-attributes" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ - "windows-targets 0.48.1", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "windows-sys" -version = "0.52.0" +name = "tracing-core" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ - "windows-targets 0.52.0", + "once_cell", + "valuable", ] [[package]] -name = "windows-targets" -version = "0.42.2" +name = "tracing-log" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "log", + "once_cell", + "tracing-core", ] [[package]] -name = "windows-targets" -version = "0.47.0" +name = "tracing-subscriber" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f8996d3f43b4b2d44327cd71b7b0efd1284ab60e6e9d0e8b630e18555d87d3e" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ - "windows_aarch64_gnullvm 0.47.0", - "windows_aarch64_msvc 0.47.0", - "windows_i686_gnu 0.47.0", - "windows_i686_msvc 0.47.0", - "windows_x86_64_gnu 0.47.0", - "windows_x86_64_gnullvm 0.47.0", - "windows_x86_64_msvc 0.47.0", + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] -name = "windows-targets" -version = "0.48.1" +name = "twox-hash" +version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", + "cfg-if", + "static_assertions", ] [[package]] -name = "windows-targets" -version = "0.52.0" +name = "typenum" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" -dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", -] +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" +name = "unicode-bidi" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] -name = "windows_aarch64_gnullvm" -version = "0.47.0" +name = "unicode-ident" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "831d567d53d4f3cb1db332b68e6e2b6260228eb4d99a777d8b2e8ed794027c90" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" +name = "unicode-normalization" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.0" +name = "unicode-segmentation" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" +name = "unicode-width" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" [[package]] -name = "windows_aarch64_msvc" -version = "0.47.0" +name = "url" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a42d54a417c60ce4f0e31661eed628f0fa5aca73448c093ec4d45fab4c51cdf" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] [[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" +name = "utf8parse" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] -name = "windows_aarch64_msvc" -version = "0.52.0" +name = "uuid" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +dependencies = [ + "getrandom", +] [[package]] -name = "windows_i686_gnu" -version = "0.42.2" +name = "valuable" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" [[package]] -name = "windows_i686_gnu" -version = "0.47.0" +name = "version_check" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1925beafdbb22201a53a483db861a5644123157c1c3cee83323a2ed565d71e3" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] -name = "windows_i686_gnu" -version = "0.48.0" +name = "walkdir" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] [[package]] -name = "windows_i686_gnu" -version = "0.52.0" +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] -name = "windows_i686_msvc" -version = "0.42.2" +name = "wasm-bindgen" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] [[package]] -name = "windows_i686_msvc" -version = "0.47.0" +name = "wasm-bindgen-backend" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8ef8f2f1711b223947d9b69b596cf5a4e452c930fb58b6fc3fdae7d0ec6b31" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] [[package]] -name = "windows_i686_msvc" -version = "0.48.0" +name = "wasm-bindgen-macro" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] [[package]] -name = "windows_i686_msvc" -version = "0.52.0" +name = "wasm-bindgen-macro-support" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] [[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" +name = "wasm-bindgen-shared" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] -name = "windows_x86_64_gnu" -version = "0.47.0" +name = "web-sys" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acaa0c2cf0d2ef99b61c308a0c3dbae430a51b7345dedec470bd8f53f5a3642" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +dependencies = [ + "js-sys", + "wasm-bindgen", +] [[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] [[package]] -name = "windows_x86_64_gnu" -version = "0.52.0" +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" +name = "winapi-util" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] [[package]] -name = "windows_x86_64_gnullvm" -version = "0.47.0" +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a0628f71be1d11e17ca4a0e9e15b3a5180f6fbf1c2d55e3ba3f850378052c1" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" +name = "windows-core" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] [[package]] -name = "windows_x86_64_gnullvm" +name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] [[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] [[package]] -name = "windows_x86_64_msvc" -version = "0.47.0" +name = "windows-targets" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6e62c256dc6d40b8c8707df17df8d774e60e39db723675241e7c15e910bce7" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] [[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" +name = "windows_aarch64_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_x86_64_msvc" -version = "0.52.0" +name = "windows_aarch64_msvc" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "winnow" -version = "0.4.1" +name = "windows_i686_gnu" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" -dependencies = [ - "memchr", -] +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] -name = "winreg" -version = "0.50.0" +name = "windows_i686_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if 1.0.0", - "windows-sys 0.48.0", -] +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "winreg" -version = "0.52.0" +name = "windows_i686_msvc" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if 1.0.0", - "windows-sys 0.48.0", -] +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "wyz" -version = "0.5.1" +name = "windows_x86_64_gnu" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] -name = "x509-certificate" -version = "0.23.1" +name = "windows_x86_64_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66534846dec7a11d7c50a74b7cdb208b9a581cad890b7866430d438455847c85" -dependencies = [ - "bcder", - "bytes", - "chrono", - "der", - "hex", - "pem 3.0.3", - "ring 0.17.7", - "signature", - "spki", - "thiserror", - "zeroize", -] +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "xorfilter-rs" -version = "0.6.0" -source = "git+https://github.com/CeresDB/xorfilter?rev=ac8ef01#ac8ef01a800b84b568bb55ec7d84fdf410616929" +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "xz2" @@ -8892,95 +2965,50 @@ dependencies = [ ] [[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] - -[[package]] -name = "yatp" -version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?rev=793be4d789d4bd15292fe4d06e38063b4ec9d48e#793be4d789d4bd15292fe4d06e38063b4ec9d48e" -dependencies = [ - "crossbeam-deque", - "crossbeam-skiplist", - "crossbeam-utils", - "dashmap", - "fail", - "lazy_static", - "num_cpus", - "parking_lot_core 0.9.9", - "prometheus 0.13.3", - "rand 0.8.5", -] - -[[package]] -name = "zeroize" -version = "1.6.0" +name = "zerocopy" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "zeroize_derive", + "byteorder", + "zerocopy-derive", ] [[package]] -name = "zeroize_derive" -version = "1.4.2" +name = "zerocopy-derive" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", -] - -[[package]] -name = "zstd" -version = "0.12.3+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" -dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "syn", ] [[package]] name = "zstd" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" -dependencies = [ - "zstd-safe 7.0.0", -] - -[[package]] -name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.0.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.12+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" dependencies = [ "cc", - "libc", "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index 987ed4b1aa..4be3a04925 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,190 +16,40 @@ # under the License. [workspace.package] -version = "2.2.0-dev" +version = "2.2.0-alpha" authors = ["HoraeDB Authors"] edition = "2021" license = "Apache-2.0" [workspace] resolver = "2" -# In alphabetical order -members = [ - "horaectl", - "integration_tests", - "integration_tests/sdk/rust", - "src/analytic_engine", - "src/benchmarks", - "src/catalog", - "src/catalog_impls", - "src/cluster", - "src/common_types", - "src/components/alloc_tracker", - "src/components/arena", - "src/components/arrow_ext", - "src/components/bytes_ext", - "src/components/codec", - "src/components/future_ext", - "src/components/hash_ext", - "src/components/id_allocator", - "src/components/logger", - "src/components/macros", - "src/components/message_queue", - "src/components/metric_ext", - "src/components/notifier", - "src/components/object_store", - "src/components/panic_ext", - "src/components/parquet_ext", - "src/components/partitioned_lock", - "src/components/profile", - "src/components/runtime", - "src/components/sampling_cache", - "src/components/size_ext", - "src/components/skiplist", - "src/components/system_stats", - "src/components/table_kv", - "src/components/test_util", - "src/components/time_ext", - "src/components/timed_task", - "src/components/toml_ext", - "src/components/trace_metric", - "src/components/trace_metric_derive", - "src/components/trace_metric_derive_tests", - "src/components/tracing_util", - "src/df_engine_extensions", - "src/df_operator", - "src/horaedb", - "src/interpreters", - "src/meta_client", - "src/partition_table_engine", - "src/proxy", - "src/query_engine", - "src/query_frontend", - "src/remote_engine_client", - "src/router", - "src/server", - "src/system_catalog", - "src/table_engine", - "src/tools", - "src/wal" -] - -default-members = ["src/horaedb"] +members = ["src/metric_engine", "src/pb_types", "src/server"] [workspace.dependencies] -alloc_tracker = { path = "src/components/alloc_tracker" } -arrow = { version = "49.0.0", features = ["prettyprint"] } -arrow_ipc = { version = "49.0.0" } -arrow_ext = { path = "src/components/arrow_ext" } -analytic_engine = { path = "src/analytic_engine" } anyhow = { version = "1.0" } -arena = { path = "src/components/arena" } -async-stream = "0.3.4" -async-trait = "0.1.72" -atomic_enum = "0.2.0" -base64 = "0.13" -bytes = "1" +metric_engine = { path = "src/metric_engine" } thiserror = "1" -bytes_ext = { path = "src/components/bytes_ext" } -catalog = { path = "src/catalog" } -catalog_impls = { path = "src/catalog_impls" } -horaedbproto = { git = "https://github.com/apache/incubator-horaedb-proto.git", rev = "fac8564e6e3d50e51daa2af6eb905e747f3191b0" } -codec = { path = "src/components/codec" } -chrono = "0.4" -clap = { version = "4.5.1", features = ["derive"] } -clru = "0.6.1" -cluster = { path = "src/cluster" } -criterion = "0.5" -horaedb-client = "1.0.2" -common_types = { path = "src/common_types" } -datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" } -datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" } -derive_builder = "0.12" -df_operator = { path = "src/df_operator" } -df_engine_extensions = { path = "src/df_engine_extensions" } -future_ext = { path = "src/components/future_ext" } -etcd-client = { version = "0.10.3", features = ["tls"] } -env_logger = "0.6" +bytes = "1" +byteorder = "1" +datafusion = "43" +parquet = { version = "53" } +object_store = { version = "0.11" } +pb_types = { path = "src/pb_types" } +prost = { version = "0.13" } +arrow = { version = "53", features = ["prettyprint"] } +bytesize = "1" +arrow-schema = "53" +tokio = { version = "1", features = ["full"] } +async-trait = "0.1" +async-stream = "0.3" futures = "0.3" -generic_error = { path = "src/components/generic_error" } -hash_ext = { path = "src/components/hash_ext" } -hex = "0.4.3" -hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" } -id_allocator = { path = "src/components/id_allocator" } -influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "05a8a9f", package = "iox_query_influxql" } -influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "05a8a9f", package = "influxdb_influxql_parser" } -influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "05a8a9f", package = "iox_query" } -influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "05a8a9f", package = "schema" } -interpreters = { path = "src/interpreters" } -itertools = "0.10.5" -lz4_flex = { version = "0.11", default-features = false, features = ["frame"] } -lazy_static = "1.4.0" -logger = { path = "src/components/logger" } -lru = "0.7.6" -macros = { path = "src/components/macros" } -message_queue = { path = "src/components/message_queue" } -meta_client = { path = "src/meta_client" } -metric_ext = { path = "src/components/metric_ext" } -notifier = { path = "src/components/notifier" } -object_store = { path = "src/components/object_store" } -once_cell = "1.18" -panic_ext = { path = "src/components/panic_ext" } -partitioned_lock = { path = "src/components/partitioned_lock" } -partition_table_engine = { path = "src/partition_table_engine" } -parquet_ext = { path = "src/components/parquet_ext" } -parquet = { version = "49.0.0" } -paste = "1.0" -pin-project-lite = "0.2.8" -pprof = "0.12.1" -profile = { path = "src/components/profile" } -prom-remote-api = { version = "0.2.2" } -prometheus = "0.12" -prometheus-static-metric = "0.5" -prost = "0.11" -proxy = { path = "src/proxy" } -query_engine = { path = "src/query_engine" } -query_frontend = { path = "src/query_frontend" } -rand = "0.8.5" -regex = "1" -remote_engine_client = { path = "src/remote_engine_client" } -reqwest = { version = "0.12.4", default-features = false, features = [ - "rustls-tls", - "json", - "http2", -] } -router = { path = "src/router" } -runtime = { path = "src/components/runtime" } -sampling_cache = { path = "src/components/sampling_cache" } -snafu = { version = "0.6.10", features = ["backtraces"] } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0.60" -server = { path = "src/server" } -size_ext = { path = "src/components/size_ext" } -smallvec = "1.6" -slog = "2.7" -spin = "0.9.6" -system_statis = { path = "src/components/system_stats" } -sqlparser = { version = "0.39.0", features = ["serde"] } -system_catalog = { path = "src/system_catalog" } -table_engine = { path = "src/table_engine" } -table_kv = { path = "src/components/table_kv" } -tabled = "0.16.0" -tempfile = "3.1.0" -test_util = { path = "src/components/test_util" } -time_ext = { path = "src/components/time_ext" } -toml = "0.7" -toml_ext = { path = "src/components/toml_ext" } -timed_task = { path = "src/components/timed_task" } -tracing_util = { path = "src/components/tracing_util" } -trace_metric = { path = "src/components/trace_metric" } -trace_metric_derive = { path = "src/components/trace_metric_derive" } -trace_metric_derive_tests = { path = "src/components/trace_metric_derive_tests" } -tonic = "0.8.1" -tokio = { version = "1.29", features = ["full"] } -uuid = "1.6.1" -wal = { path = "src/wal" } -xorfilter-rs = { git = "https://github.com/CeresDB/xorfilter", rev = "ac8ef01" } -zstd = { version = "0.12", default-features = false } +temp-dir = "0.1" +itertools = "0.3" +lazy_static = "1" +tracing = "0.1" +tracing-subscriber = "0.3" +async-scoped = { version = "0.9.0", features = ["use-tokio"] } +test-log = "0.2" # This profile optimizes for good runtime performance. [profile.release] diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1229e4a25d..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -## Builder -ARG RUST_VERSION=1.59.0 -FROM rust:${RUST_VERSION}-slim-bullseye as build - -# cache mounts below may already exist and owned by root -USER root - -RUN apt update && apt install --yes git gcc g++ libssl-dev pkg-config cmake protobuf-compiler && rm -rf /var/lib/apt/lists/* - -COPY . /horaedb -WORKDIR /horaedb - -RUN make build - -## HoraeDB -FROM ubuntu:20.04 - -RUN useradd -m -s /bin/bash horae - -RUN apt update && \ - apt install --yes curl gdb iotop cron vim less net-tools mysql-client apt-transport-https software-properties-common wget && \ - apt clean - -# install grafana -RUN mkdir -p /etc/apt/keyrings/ && \ - wget -q -O - https://apt.grafana.com/gpg.key | gpg --dearmor | tee /etc/apt/keyrings/grafana.gpg > /dev/null && \ - echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" | tee -a /etc/apt/sources.list.d/grafana.list && \ - apt update && \ - apt install --yes grafana && \ - apt clean -COPY ./docker/datasource.yml /usr/share/grafana/conf/provisioning/datasources - -ENV RUST_BACKTRACE 1 - -COPY --from=build /horaedb/target/release/horaedb-server /usr/bin/horaedb-server -RUN chmod +x /usr/bin/horaedb-server - -COPY ./docker/entrypoint.sh /entrypoint.sh -COPY ./docs/minimal.toml /etc/horaedb/horaedb.toml - -ARG TINI_VERSION=v0.19.0 -ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini -RUN chmod +x /tini - -ARG USER=horae - -ENTRYPOINT ["/tini", "--", "/entrypoint.sh"] diff --git a/Makefile b/Makefile index c58cfb5e75..915073d59b 100644 --- a/Makefile +++ b/Makefile @@ -19,8 +19,6 @@ SHELL = /bin/bash DIR=$(shell pwd) -.DEFAULT_GOAL := integration-test - init: echo "init" echo "Git branch: $GITBRANCH" @@ -33,56 +31,13 @@ build: ls -alh cd $(DIR); cargo build --release $(CARGO_FEATURE_FLAGS) -build-wal-table-kv: - ls -alh - cd $(DIR)/src/horaedb; cargo build --release --no-default-features --features wal-table-kv - -build-wal-message-queue: - ls -alh - cd $(DIR)/src/horaedb; cargo build --release --no-default-features --features wal-message-queue - -build-slim: - ls -alh - cd $(DIR); cargo build --profile release-slim $(CARGO_FEATURE_FLAGS) - -build-asan: - ls -alh - export RUSTFLAGS=-Zsanitizer=address RUSTDOCFLAGS=-Zsanitizer=address - cd $(DIR); cargo build -Zbuild-std --target x86_64-unknown-linux-gnu --release $(CARGO_FEATURE_FLAGS) - -build-arm64: - ls -alh - cd $(DIR); cargo build --release --no-default-features $(CARGO_FEATURE_FLAGS) - -build-with-console: - ls -alh - cd $(DIR); RUSTFLAGS="--cfg tokio_unstable" cargo build --release $(CARGO_FEATURE_FLAGS) - test: cd $(DIR); cargo test --workspace -- --test-threads=4 -integration-test: - cd $(DIR)/integration_tests; make run - -# grcov needs build first, then run test -build-ut: - echo $(CARGO_INCREMENTAL) - echo $(RUSTFLAGS) - echo $(RUSTDOCFLAGS) - cd $(DIR); cargo build --workspace - -test-ut: - echo $(CARGO_INCREMENTAL) - echo $(RUSTFLAGS) - echo $(RUSTDOCFLAGS) - #cd $(DIR); cargo test --workspace -- -Z unstable-options --format json | tee results.json; \ - #cat results.json | cargo2junit > ${WORKSPACE}/testresult/TEST-all.xml - cargo test --workspace - fmt: cd $(DIR); cargo fmt -- --check -check-cargo-toml: +sort: cd $(DIR); cargo sort --workspace --check check-asf-header: @@ -92,64 +47,12 @@ udeps: cd $(DIR); cargo udeps --all-targets --all-features --workspace clippy: - cd $(DIR); cargo clippy --all-targets --all-features --workspace -- -D warnings -D clippy::dbg-macro - -# test with address sanitizer -asan-test: - export RUSTFLAGS=-Zsanitizer=address RUSTDOCFLAGS=-Zsanitizer=address - cd $(DIR); cargo test -Zbuild-std --target x86_64-unknown-linux-gnu --workspace - -# test with address sanitizer under release mode to workaround `attempt to create unaligned or null slice` -# error in parquet crate. -asan-test-release: - export RUSTFLAGS=-Zsanitizer=address RUSTDOCFLAGS=-Zsanitizer=address - cd $(DIR); cargo test -Zbuild-std --target x86_64-unknown-linux-gnu --release --workspace - -# test with memory sanitizer -mem-test: - export RUSTFLAGS=-Zsanitizer=memory RUSTDOCFLAGS=-Zsanitizer=memory - cd $(DIR); cargo test -Zbuild-std --target x86_64-unknown-linux-gnu --workspace - -# test with miri. -# only list packages will be tested. -miri: - cd $(DIR); cargo miri test --package arena + cd $(DIR); cargo clippy --all-targets --all-features --workspace -- -D warnings -D clippy::dbg-macro \ + -A dead_code -A unused_variables -A clippy::unreachable # Remove these once we have a clean build ensure-disk-quota: bash ./scripts/free-disk-space.sh -tsbs: build - cd $(DIR); sh scripts/run-tsbs.sh - -# install dev dependencies -ifeq ($(shell uname), Darwin) -dev-setup: - echo "Detecting macOS system..." - brew --version >/dev/null 2>&1 || { echo "Error: Homebrew is not installed. Exiting..."; exit 1; } - echo "Installing dependencies using Homebrew..." - HOMEBREW_NO_AUTO_UPDATE=1 brew install git openssl protobuf cmake pre-commit - cargo install cargo-udeps - cargo install --git https://github.com/DevinR528/cargo-sort --rev 55ec890 --locked -else ifeq ($(shell uname), Linux) -dev-setup: - echo "Detecting Linux system..." - os_id=$(shell awk -F= '/^ID=/{print $$2}' /etc/os-release) && \ - if [ "$$os_id" = "ubuntu" ]; then \ - echo "Detected Ubuntu system..."; \ - echo "Installing dependencies using apt-get..."; \ - sudo apt-get update; \ - sudo apt install -y git gcc g++ libssl-dev pkg-config protobuf-compiler cmake pre-commit; \ - cargo install cargo-udeps; \ - cargo install --git https://github.com/DevinR528/cargo-sort --rev 55ec890 --locked; \ - else \ - echo "Error: Unsupported Linux distribution. Exiting..."; \ - exit 1; \ - fi -else -dev-setup: - echo "Error: Unsupported OS. Exiting..." - exit 1 -endif fix: cargo fmt @@ -157,7 +60,4 @@ fix: cargo clippy --fix --allow-staged --all-targets --all-features --workspace -- -D warnings update-licenses: - # Update Horaedb dependencies cargo install --locked cargo-deny && cargo deny list -f tsv -l crate > DEPENDENCIES.tsv - # Update Horaemeta dependencies - cd $(DIR)/horaemeta; go install github.com/google/go-licenses@latest && go-licenses report ./... > DEPENDENCIES.csv diff --git a/README-CN.md b/README-CN.md deleted file mode 100644 index 1cf9bba2e7..0000000000 --- a/README-CN.md +++ /dev/null @@ -1,89 +0,0 @@ -![HoraeDB](docs/logo/horaedb-banner-white-small.jpg) - -![License](https://img.shields.io/badge/license-Apache--2.0-green.svg) -[![CI](https://github.com/apache/horaedb/actions/workflows/ci.yml/badge.svg)](https://github.com/apache/horaedb/actions/workflows/ci.yml) -[![OpenIssue](https://img.shields.io/github/issues/apache/horaedb)](https://github.com/apache/horaedb/issues) -[![HoraeDB Docker](https://img.shields.io/docker/v/apache/horaedb-server?logo=docker&label=horaedb-server)](https://hub.docker.com/r/apache/horaedb-server) -[![HoraeMeta Docker](https://img.shields.io/docker/v/apache/horaemeta-server?logo=docker&label=horaemeta-server)](https://hub.docker.com/r/apache/horaemeta-server) - -[English](./README.md) - -Apache HoraeDB (incubating) 是一款高性能、分布式的云原生时序数据库。 - -## 文档 - -- [User Guide](https://horaedb.apache.org/docs/getting-started/) -- [Development Guide](https://horaedb.apache.org/docs/dev/) - -## 快速开始 -### 通过 Docker 运行 -#### 使用 Docker 运行单机版 HoraeDB -``` -docker run -d --name horaedb-server \ - -p 8831:8831 \ - -p 3307:3307 \ - -p 5440:5440 \ - ghcr.io/apache/horaedb-server:nightly-20231222-f57b3827 -``` - -#### 使用 docker compose 运行集群,包含两个 horaedb 节点和一个 horaemeta 节点 - -``` -docker compose -f docker/docker-compose.yaml up -``` - -### 基本操作 - -创建表 -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -CREATE TABLE `demo` ( - `name` string TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - timestamp KEY (t)) -ENGINE=Analytic - with -(enable_ttl="false") -' -``` - -数据写入 -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -INSERT INTO demo (t, name, value) - VALUES (1702224000000, "horaedb", 100) -' -``` - -数据查询 -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -SELECT * FROM `demo` -' -``` - -删除表 - -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -Drop TABLE `demo` -' -``` - -## 开发者社区 - -与来自世界各地的用户和开发人员一起在 Apache HoraeDB (incubating) 社区中茁壮成长。 - -- https://horaedb.apache.org/community/ - -## 致谢 - -在开发程中,我们受到很多开源项目的影响和启发,例如 [influxdb_iox](https://github.com/influxdata/influxdb/tree/main), [tikv](https://github.com/tikv/tikv) 等等,感谢这些杰出的项目。 - -## 开源许可 -[Apache License 2.0](./LICENSE) diff --git a/README.md b/README.md index 423838c192..f50bb0d7d5 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ [![HoraeDB Docker](https://img.shields.io/docker/v/apache/horaedb-server?logo=docker&label=horaedb-server)](https://hub.docker.com/r/apache/horaedb-server) [![HoraeMeta Docker](https://img.shields.io/docker/v/apache/horaemeta-server?logo=docker&label=horaemeta-server)](https://hub.docker.com/r/apache/horaemeta-server) -[中文](./README-CN.md) - Apache HoraeDB (incubating) is a high-performance, distributed, cloud native time-series database. > [!IMPORTANT] @@ -16,80 +14,11 @@ Apache HoraeDB (incubating) is a high-performance, distributed, cloud native tim > > Please read the [DISCLAIMER](DISCLAIMER) and a full explanation of ["incubating"](https://incubator.apache.org/policy/incubation.html). -## Documentation - -- [User Guide](https://horaedb.apache.org/docs/getting-started/) -- [Development Guide](https://horaedb.apache.org/docs/dev/) - -## Quick Start - -### Run with Docker - -#### Run HoraeDB standalone Server - -``` -docker run -d --name horaedb-server \ - -p 8831:8831 \ - -p 3307:3307 \ - -p 5440:5440 \ - ghcr.io/apache/horaedb-server:nightly-20231222-f57b3827 -``` - -#### Run HoraeDB cluster with two horaedb-server node and one horaemeta-server node. - -``` -docker compose -f docker/docker-compose.yaml up -``` - -### Run from source code - -Please read the [development guide](https://horaedb.apache.org/docs/dev/compile_run/) guide for instructions on how to build. - -### Create Table and Write/Read data -Create Table. - -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -CREATE TABLE `demo` ( - `name` string TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - timestamp KEY (t)) -ENGINE=Analytic - with -(enable_ttl="false") -' -``` - -Write data with SQL. - -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -INSERT INTO demo (t, name, value) - VALUES (1702224000000, "horaedb", 100) -' -``` - -Read data with SQL. - -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -SELECT * FROM `demo` -' -``` - -Drop table. +## Notice -``` -curl --location --request POST 'http://127.0.0.1:5440/sql' \ --d ' -Drop TABLE `demo` -' -``` +The `main` branch is used for developing new engine described in this [RFC](docs/rfcs/20240827-metric-engine.md). It's currently in a phase of rapid development and remains somewhat unstable, and it's scheduled to be publicly available towards the end of 2025. +The legacy, stable engine is in [analytic-engine](https://github.com/apache/horaedb/tree/analytic-engine) branch. ## Community @@ -103,7 +32,7 @@ Thrive together in Apache HoraeDB (incubating) community with users and develope ## Acknowledgment -When develop we benefit a lot from several other open source projects, such as [influxdb_iox](https://github.com/influxdata/influxdb/tree/main), [tikv](https://github.com/tikv/tikv) etc, thanks for their awesome work. +When develop we benefit a lot from several other open source projects, such as [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics), thanks for their awesome work. ## License diff --git a/docker/basic.sh b/docker/basic.sh deleted file mode 100755 index de783585f4..0000000000 --- a/docker/basic.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -set -exo pipefail - -trap cleanup EXIT - -cleanup() { - exit_code=$? - # cat server log when script failed - if [[ ${exit_code} -ne 0 ]]; then - docker logs -n 200 ${SERVER_NAME} - fi -} - -ADDR=${HORAEDB_ADDR:-"127.0.0.1"} -PORT=${HORAEDB_PORT:-"5440"} - -URL="http://${ADDR}:${PORT}/sql" - -function horaedb_query { - sql=${1} - - curl --location --fail \ - --request POST ${URL} \ - --header 'Content-Type: application/json' \ - --data-raw '{ - "query": "'"${sql}"'" - }' -} - -horaedb_query 'CREATE TABLE `demo` (`name` string TAG, `value` double NOT NULL, `t` timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic with (enable_ttl='\''false'\'')' - -horaedb_query 'INSERT INTO demo(t, name, value) VALUES(1651737067000, '\''horaedb'\'', 100)' - -horaedb_query 'select * from demo' - -horaedb_query 'show create table demo' - -horaedb_query 'DROP TABLE demo' diff --git a/docker/datasource.yml b/docker/datasource.yml deleted file mode 100644 index acdeadc546..0000000000 --- a/docker/datasource.yml +++ /dev/null @@ -1,31 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -apiVersion: 1 - -datasources: - - name: HoraeDB-mysql - type: mysql - access: proxy - url: localhost:3307 - isDefault: true - - - name: HoraeDB-influxdb - type: influxdb - access: proxy - url: http://localhost:5440/influxdb/v1/ - isDefault: false \ No newline at end of file diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml deleted file mode 100644 index 799ed963a6..0000000000 --- a/docker/docker-compose.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -version: '2' - -services: - horaemeta: - image: ghcr.io/apache/horaemeta-server:nightly-20240103-24322bc7 - ports: - - "2379:2379" - volumes: - - ../docs/example-standalone.toml:/etc/horaemeta/horaemeta.toml - healthcheck: - test: [ "CMD-SHELL", "curl 0:2379" ] - interval: 10s - timeout: 5s - retries: 5 - - horaedb0: - image: ghcr.io/apache/horaedb-server:nightly-20240105-bd737b24 - restart: always - ports: - - "8831:8831" - - "5440:5440" - volumes: - - ../docs/example-cluster-0.toml:/etc/horaedb/horaedb.toml - environment: - - HORAEDB_SERVER_ADDR=horaedb0 - - HORAEMETA_SERVER_ADDR=http://horaemeta:2379 - - ETCD_ADDRS=http://horaemeta:2379 - depends_on: - horaemeta: - condition: service_healthy - - horaedb1: - image: ghcr.io/apache/horaedb-server:nightly-20240105-bd737b24 - restart: always - ports: - - "8832:8832" - - "5441:5441" - volumes: - - ../docs/example-cluster-1.toml:/etc/horaedb/horaedb.toml - environment: - - HORAEDB_SERVER_ADDR=horaedb1 - - HORAEMETA_SERVER_ADDR=http://horaemeta:2379 - - ETCD_ADDRS=http://horaemeta:2379 - depends_on: - horaemeta: - condition: service_healthy diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh deleted file mode 100755 index 19523793d8..0000000000 --- a/docker/entrypoint.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -set -exo pipefail - -## init varibles -USER="horae" -DATA_DIR="/home/${USER}/data" -DATA_PATH="${DATA_DIR}/horaedb" -CONFIG_FILE="/etc/horaedb/horaedb.toml" - -# enable jemalloc heap profiling -export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:19" - -## data dir -mkdir -p ${DATA_DIR} -chmod +777 -R ${DATA_DIR} -chown -R ${USER}.${USER} ${DATA_DIR} - -# start grafana server -nohup /usr/share/grafana/bin/grafana server --homepath /usr/share/grafana/ & - -exec /usr/bin/horaedb-server --config ${CONFIG_FILE} diff --git a/docs/example-cluster-0.toml b/docs/example-cluster-0.toml deleted file mode 100644 index 169a9630af..0000000000 --- a/docs/example-cluster-0.toml +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[node] -addr = "127.0.0.1" - -[logger] -level = "info" - -[server] -bind_addr = "0.0.0.0" -http_port = 5440 -grpc_port = 8831 -mysql_port = 3307 -deploy_mode = "Cluster" - -[analytic.storage] -mem_cache_capacity = '1G' -mem_cache_partition_bits = 0 - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb0" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb0" - -[cluster_deployment] -mode = "WithMeta" -cmd_channel_buffer_size = 10 - -[cluster_deployment.meta_client] -# Only support "defaultCluster" currently. -cluster_name = "defaultCluster" -meta_addr = "http://127.0.0.1:2379" -lease = "10s" -timeout = "5s" - -[cluster_deployment.etcd_client] -server_addrs = ['127.0.0.1:2379'] - -[limiter] -write_block_list = ['mytable1'] -read_block_list = ['mytable1'] diff --git a/docs/example-cluster-1.toml b/docs/example-cluster-1.toml deleted file mode 100644 index e293f7cc26..0000000000 --- a/docs/example-cluster-1.toml +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[node] -addr = "127.0.0.1" - -[logger] -level = "debug" - -[server] -bind_addr = "0.0.0.0" -http_port = 5441 -grpc_port = 8832 -mysql_port = 13307 -postgresql_port = 15433 -deploy_mode = "Cluster" - -[analytic.storage] -mem_cache_capacity = '1G' -mem_cache_partition_bits = 0 - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb1" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb1" - -[cluster_deployment] -mode = "WithMeta" -cmd_channel_buffer_size = 10 - -[cluster_deployment.meta_client] -# Only support "defaultCluster" currently. -cluster_name = "defaultCluster" -meta_addr = "http://127.0.0.1:2379" -lease = "10s" -timeout = "5s" - -[cluster_deployment.etcd_client] -server_addrs = ['127.0.0.1:2379'] - -[limiter] -write_block_list = ['mytable1'] -read_block_list = ['mytable1'] diff --git a/docs/example-standalone-static-routing.toml b/docs/example-standalone-static-routing.toml deleted file mode 100644 index 7c7d77caf2..0000000000 --- a/docs/example-standalone-static-routing.toml +++ /dev/null @@ -1,93 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[server] -bind_addr = "0.0.0.0" -http_port = 5440 -grpc_port = 8831 - -[logger] -level = "info" - -[runtime] -read_thread_num = 4 -write_thread_num = 4 -compact_thread_num = 2 -default_thread_num = 4 - -[analytic] -write_group_worker_num = 4 -replay_batch_size = 500 -max_replay_tables_per_batch = 1024 -write_group_command_channel_cap = 1024 - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb1" - -[analytic.storage] -mem_cache_capacity = '1G' -mem_cache_partition_bits = 0 - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb" - -[analytic.compaction_config] -schedule_channel_len = 4 -schedule_interval = "30m" -max_ongoing_tasks = 4 - -[cluster_deployment] -mode = "NoMeta" - -# Route&Shard: public -[[cluster_deployment.topology.schema_shards]] -schema = 'public' -[[cluster_deployment.topology.schema_shards.shard_views]] -shard_id = 0 -[cluster_deployment.topology.schema_shards.shard_views.endpoint] -addr = '127.0.0.1' -port = 8831 - -# Route&Shard: test -[[cluster_deployment.topology.schema_shards]] -schema = 'test' -[[cluster_deployment.topology.schema_shards.shard_views]] -shard_id = 0 -[cluster_deployment.topology.schema_shards.shard_views.endpoint] -addr = '127.0.0.1' -port = 8831 -[[cluster_deployment.topology.schema_shards.shard_views]] -shard_id = 1 -[cluster_deployment.topology.schema_shards.shard_views.endpoint] -addr = '127.0.0.2' -port = 8831 - -# Route rules: test -[[cluster_deployment.rules.prefix_rules]] -schema = 'test' -prefix = 'BIZ' -shard = 0 - -[[cluster_deployment.rules.hash_rules]] -schema = 'test' -shards = [ 1 ] - -[limiter] -write_block_list = ['mytable1'] -read_block_list = ['mytable1'] diff --git a/docs/minimal.toml b/docs/minimal.toml deleted file mode 100644 index de11aae7b5..0000000000 --- a/docs/minimal.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[server] -bind_addr = "0.0.0.0" -http_port = 5440 -grpc_port = 8831 -postgresql_port = 5433 - -[logger] -level = "info" - -[tracing] -dir = "/tmp/horaedb" - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb" - -[analytic] -enable_primary_key_sampling = true diff --git a/horaectl/Cargo.toml b/horaectl/Cargo.toml deleted file mode 100644 index 580c0e7946..0000000000 --- a/horaectl/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "horaectl" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -anyhow = { workspace = true, features = ["backtrace"] } -chrono = { workspace = true } -clap = { workspace = true, features = ["env", "derive"] } -lazy_static = { workspace = true } -prettytable = "0.10.0" -reqwest = { workspace = true } -serde = { workspace = true } -shell-words = "1.1.0" -tokio = { workspace = true } diff --git a/horaectl/src/cmd/cluster.rs b/horaectl/src/cmd/cluster.rs deleted file mode 100644 index cd22c0ccc3..0000000000 --- a/horaectl/src/cmd/cluster.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use anyhow::Result; -use clap::Subcommand; - -use crate::operation::cluster::ClusterOp; - -#[derive(Subcommand)] -pub enum ClusterCommand { - /// List cluster - List, - - /// Diagnose cluster - Diagnose, - - /// Schedule cluster - Schedule { - #[clap(subcommand)] - cmd: Option, - }, -} - -#[derive(Subcommand)] -pub enum ScheduleCommand { - /// Get the schedule status - Get, - - /// Enable schedule - On, - - /// Disable schedule - Off, -} - -pub async fn run(cmd: ClusterCommand) -> Result<()> { - let op = ClusterOp::try_new()?; - match cmd { - ClusterCommand::List => op.list().await, - ClusterCommand::Diagnose => op.diagnose().await, - ClusterCommand::Schedule { cmd } => { - if let Some(cmd) = cmd { - match cmd { - ScheduleCommand::Get => op.get_schedule_status().await, - ScheduleCommand::On => op.update_schedule_status(true).await, - ScheduleCommand::Off => op.update_schedule_status(false).await, - } - } else { - op.get_schedule_status().await - } - } - } -} diff --git a/horaectl/src/cmd/mod.rs b/horaectl/src/cmd/mod.rs deleted file mode 100644 index 5906ef1822..0000000000 --- a/horaectl/src/cmd/mod.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -mod cluster; -use std::{io, io::Write}; - -use anyhow::Result; -use clap::{Args, Parser, Subcommand}; - -use crate::{ - cmd::cluster::ClusterCommand, - util::{CLUSTER_NAME, META_ADDR}, -}; - -#[derive(Parser)] -#[clap(name = "horaectl")] -#[clap(about = "HoraeCTL is a command line tool for HoraeDB", version)] -pub struct App { - #[clap(flatten)] - pub global_opts: GlobalOpts, - - /// Enter interactive mode - #[clap(short, long, default_value_t = false)] - pub interactive: bool, - - #[clap(subcommand)] - pub command: Option, -} - -#[derive(Debug, Args)] -pub struct GlobalOpts { - /// Meta addr - #[clap( - short, - long = "meta", - global = true, - env = "HORAECTL_META_ADDR", - default_value = "127.0.0.1:8080" - )] - pub meta_addr: String, - - /// Cluster name - #[clap( - short, - long = "cluster", - global = true, - env = "HORAECTL_CLUSTER", - default_value = "defaultCluster" - )] - pub cluster_name: String, -} - -#[derive(Subcommand)] -pub enum SubCommand { - /// Operations on cluster - #[clap(alias = "c")] - Cluster { - #[clap(subcommand)] - commands: ClusterCommand, - }, -} - -pub async fn run_command(cmd: SubCommand) -> Result<()> { - match cmd { - SubCommand::Cluster { commands } => cluster::run(commands).await, - } -} - -pub async fn repl_loop() { - loop { - print_prompt( - META_ADDR.lock().unwrap().as_str(), - CLUSTER_NAME.lock().unwrap().as_str(), - ); - - let args = match read_args() { - Ok(args) => args, - Err(e) => { - println!("Read input failed, err:{}", e); - continue; - } - }; - - if let Some(cmd) = args.get(1) { - if ["quit", "exit", "q"].iter().any(|v| v == cmd) { - break; - } - } - - match App::try_parse_from(args) { - Ok(horaectl) => { - if let Some(cmd) = horaectl.command { - if let Err(e) = match cmd { - SubCommand::Cluster { commands } => cluster::run(commands).await, - } { - println!("Run command failed, err:{e}"); - } - } - } - Err(e) => { - println!("Parse command failed, err:{e}"); - } - } - } -} - -fn read_args() -> Result, String> { - io::stdout().flush().unwrap(); - let mut input = String::new(); - io::stdin() - .read_line(&mut input) - .map_err(|e| e.to_string())?; - - let input = input.trim(); - if input.is_empty() { - return Err("No arguments provided".into()); - } - - let mut args = vec!["horaectl".to_string()]; - args.extend(shell_words::split(input).map_err(|e| e.to_string())?); - Ok(args) -} - -fn print_prompt(address: &str, cluster: &str) { - print!("{}({}) > ", address, cluster); -} diff --git a/horaectl/src/main.rs b/horaectl/src/main.rs deleted file mode 100644 index 24658ab626..0000000000 --- a/horaectl/src/main.rs +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -mod cmd; -mod operation; -mod util; - -use clap::{CommandFactory, Parser}; - -use crate::{ - cmd::{repl_loop, run_command, App}, - util::{CLUSTER_NAME, META_ADDR}, -}; - -#[tokio::main] -async fn main() { - let app = App::parse(); - { - let mut meta_addr = META_ADDR.lock().unwrap(); - *meta_addr = app.global_opts.meta_addr; - } - { - let mut cluster_name = CLUSTER_NAME.lock().unwrap(); - *cluster_name = app.global_opts.cluster_name; - } - - if app.interactive { - repl_loop().await; - return; - } - - if let Some(cmd) = app.command { - if let Err(e) = run_command(cmd).await { - println!("Run command failed, err:{e}"); - std::process::exit(1); - } - } else { - App::command().print_help().expect("print help failed"); - } -} diff --git a/horaectl/src/operation/cluster.rs b/horaectl/src/operation/cluster.rs deleted file mode 100644 index 709d44be26..0000000000 --- a/horaectl/src/operation/cluster.rs +++ /dev/null @@ -1,147 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::time::Duration; - -use anyhow::Result; -use prettytable::row; -use reqwest::Client; - -use crate::{ - operation::{ - ClusterResponse, DiagnoseShardResponse, EnableScheduleRequest, EnableScheduleResponse, - }, - util::{ - format_time_milli, table_writer, API, CLUSTERS, CLUSTERS_DIAGNOSE_HEADER, - CLUSTERS_ENABLE_SCHEDULE_HEADER, CLUSTERS_LIST_HEADER, CLUSTER_NAME, DEBUG, HTTP, - META_ADDR, - }, -}; - -fn list_url() -> String { - HTTP.to_string() + META_ADDR.lock().unwrap().as_str() + API + CLUSTERS -} - -fn diagnose_url() -> String { - HTTP.to_string() - + META_ADDR.lock().unwrap().as_str() - + DEBUG - + "/diagnose" - + "/" - + CLUSTER_NAME.lock().unwrap().as_str() - + "/shards" -} - -fn schedule_url() -> String { - HTTP.to_string() - + META_ADDR.lock().unwrap().as_str() - + DEBUG - + CLUSTERS - + "/" - + CLUSTER_NAME.lock().unwrap().as_str() - + "/enableSchedule" -} - -pub struct ClusterOp { - http_client: Client, -} - -impl ClusterOp { - pub fn try_new() -> Result { - let hc = Client::builder() - .timeout(Duration::from_secs(30)) - .user_agent("horaectl") - .build()?; - - Ok(Self { http_client: hc }) - } - - pub async fn list(&self) -> Result<()> { - let res = self.http_client.get(list_url()).send().await?; - let response: ClusterResponse = res.json().await?; - - let mut table = table_writer(&CLUSTERS_LIST_HEADER); - for cluster in response.data { - table.add_row(row![ - cluster.id, - cluster.name, - cluster.shard_total.to_string(), - cluster.topology_type, - cluster.procedure_executing_batch_size.to_string(), - format_time_milli(cluster.created_at), - format_time_milli(cluster.modified_at) - ]); - } - table.printstd(); - - Ok(()) - } - - pub async fn diagnose(&self) -> Result<()> { - let res = self.http_client.get(diagnose_url()).send().await?; - let response: DiagnoseShardResponse = res.json().await?; - let mut table = table_writer(&CLUSTERS_DIAGNOSE_HEADER); - table.add_row(row![response - .data - .unregistered_shards - .iter() - .map(|shard_id| shard_id.to_string()) - .collect::>() - .join(", ")]); - for (shard_id, data) in response.data.unready_shards { - table.add_row(row!["", shard_id, data.node_name, data.status]); - } - table.printstd(); - - Ok(()) - } - - pub async fn get_schedule_status(&self) -> Result<()> { - let res = self.http_client.get(schedule_url()).send().await?; - let response: EnableScheduleResponse = res.json().await?; - let mut table = table_writer(&CLUSTERS_ENABLE_SCHEDULE_HEADER); - let row = match response.data { - Some(data) => row![data], - None => row!["topology should in dynamic mode"], - }; - table.add_row(row); - table.printstd(); - - Ok(()) - } - - pub async fn update_schedule_status(&self, enable: bool) -> Result<()> { - let request = EnableScheduleRequest { enable }; - - let res = self - .http_client - .put(schedule_url()) - .json(&request) - .send() - .await?; - let response: EnableScheduleResponse = res.json().await?; - let mut table = table_writer(&CLUSTERS_ENABLE_SCHEDULE_HEADER); - let row = match response.data { - Some(data) => row![data], - None => row!["topology should in dynamic mode"], - }; - table.add_row(row); - table.printstd(); - - Ok(()) - } -} diff --git a/horaectl/src/operation/mod.rs b/horaectl/src/operation/mod.rs deleted file mode 100644 index f872346700..0000000000 --- a/horaectl/src/operation/mod.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod cluster; - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; -#[derive(Deserialize, Debug)] -pub struct Cluster { - #[serde(rename = "ID")] - id: u32, - #[serde(rename = "Name")] - name: String, - #[serde(rename = "ShardTotal")] - shard_total: u32, - #[serde(rename = "TopologyType")] - topology_type: String, - #[serde(rename = "ProcedureExecutingBatchSize")] - procedure_executing_batch_size: u32, - #[serde(rename = "CreatedAt")] - created_at: i64, - #[serde(rename = "ModifiedAt")] - modified_at: i64, -} - -#[derive(Deserialize, Debug)] -pub struct ClusterResponse { - #[allow(unused)] - status: String, - data: Vec, -} - -#[derive(Deserialize, Debug)] -pub struct DiagnoseShardStatus { - #[serde(rename = "nodeName")] - node_name: String, - status: String, -} - -#[derive(Deserialize, Debug)] -pub struct DiagnoseShard { - #[serde(rename = "unregisteredShards")] - unregistered_shards: Vec, - #[serde(rename = "unreadyShards")] - unready_shards: HashMap, -} - -#[derive(Deserialize, Debug)] -pub struct DiagnoseShardResponse { - #[allow(unused)] - status: String, - data: DiagnoseShard, -} - -#[derive(Serialize)] -pub struct EnableScheduleRequest { - enable: bool, -} - -#[derive(Deserialize)] -pub struct EnableScheduleResponse { - #[allow(unused)] - status: String, - data: Option, -} diff --git a/horaectl/src/util/mod.rs b/horaectl/src/util/mod.rs deleted file mode 100644 index d6a4311365..0000000000 --- a/horaectl/src/util/mod.rs +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Mutex; - -use chrono::{TimeZone, Utc}; -use lazy_static::lazy_static; -use prettytable::{Cell, Row, Table}; - -lazy_static! { - pub static ref META_ADDR: Mutex = Mutex::new(String::new()); - pub static ref CLUSTER_NAME: Mutex = Mutex::new(String::new()); -} - -pub const HTTP: &str = "http://"; -pub const API: &str = "/api/v1"; -pub const DEBUG: &str = "/debug"; -pub const CLUSTERS: &str = "/clusters"; -pub static CLUSTERS_LIST_HEADER: [&str; 7] = [ - "ID", - "Name", - "ShardTotal", - "TopologyType", - "ProcedureExecutingBatchSize", - "CreatedAt", - "ModifiedAt", -]; -pub static CLUSTERS_DIAGNOSE_HEADER: [&str; 4] = [ - "unregistered_shards", - "unready_shards:shard_id", - "unready_shards:node_name", - "unready_shards:status", -]; -pub static CLUSTERS_ENABLE_SCHEDULE_HEADER: [&str; 1] = ["enable_schedule"]; - -pub fn table_writer(header: &[&str]) -> Table { - let mut table = Table::new(); - let header_row = Row::from_iter(header.iter().map(|&entry| Cell::new(entry))); - table.add_row(header_row); - table -} - -pub fn format_time_milli(milli: i64) -> String { - let datetime = Utc.timestamp_millis_opt(milli).single().unwrap(); - datetime.format("%Y-%m-%d %H:%M:%S%.3f").to_string() -} diff --git a/horaedb/Cargo.lock b/horaedb/Cargo.lock deleted file mode 100644 index 6bb4454e9e..0000000000 --- a/horaedb/Cargo.lock +++ /dev/null @@ -1,3019 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "addr2line" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler2" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" - -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "const-random", - "getrandom", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anstream" -version = "0.6.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" - -[[package]] -name = "anstyle-parse" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" -dependencies = [ - "windows-sys 0.59.0", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" -dependencies = [ - "anstyle", - "windows-sys 0.59.0", -] - -[[package]] -name = "anyhow" -version = "1.0.87" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" - -[[package]] -name = "arrayref" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" - -[[package]] -name = "arrayvec" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" - -[[package]] -name = "arrow" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "num", -] - -[[package]] -name = "arrow-array" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" -dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" -dependencies = [ - "bytes", - "half", - "num", -] - -[[package]] -name = "arrow-cast" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "atoi", - "base64", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", - "ryu", -] - -[[package]] -name = "arrow-csv" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num", -] - -[[package]] -name = "arrow-ipc" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", - "lz4_flex", -] - -[[package]] -name = "arrow-json" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "indexmap", - "lexical-core", - "num", - "serde", - "serde_json", -] - -[[package]] -name = "arrow-ord" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "half", - "num", -] - -[[package]] -name = "arrow-row" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", -] - -[[package]] -name = "arrow-schema" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" - -[[package]] -name = "arrow-select" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num", -] - -[[package]] -name = "arrow-string" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "memchr", - "num", - "regex", - "regex-syntax 0.8.4", -] - -[[package]] -name = "async-compression" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" -dependencies = [ - "bzip2", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", -] - -[[package]] -name = "async-scoped" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4042078ea593edffc452eef14e99fdb2b120caa4ad9618bcdeabc4a023b98740" -dependencies = [ - "futures", - "pin-project", - "tokio", -] - -[[package]] -name = "async-trait" -version = "0.1.82" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - -[[package]] -name = "autocfg" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" - -[[package]] -name = "backtrace" -version = "0.3.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets", -] - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" - -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "brotli" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "4.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bumpalo" -version = "3.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" - -[[package]] -name = "bytesize" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "cc" -version = "1.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" -dependencies = [ - "jobserver", - "libc", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "num-traits", - "windows-targets", -] - -[[package]] -name = "chrono-tz" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" -dependencies = [ - "parse-zoneinfo", - "phf_codegen", -] - -[[package]] -name = "colorchoice" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" - -[[package]] -name = "comfy-table" -version = "7.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" -dependencies = [ - "strum", - "strum_macros", - "unicode-width", -] - -[[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom", - "once_cell", - "tiny-keccak", -] - -[[package]] -name = "constant_time_eq" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "cpufeatures" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] - -[[package]] -name = "dashmap" -version = "6.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "datafusion" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "dashmap", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-sql", - "flate2", - "futures", - "glob", - "half", - "hashbrown", - "indexmap", - "itertools 0.13.0", - "log", - "num_cpus", - "object_store", - "parking_lot", - "parquet", - "paste", - "pin-project-lite", - "rand", - "sqlparser", - "tempfile", - "tokio", - "tokio-util", - "url", - "uuid", - "xz2", - "zstd", -] - -[[package]] -name = "datafusion-catalog" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" -dependencies = [ - "arrow-schema", - "async-trait", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", -] - -[[package]] -name = "datafusion-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "chrono", - "half", - "hashbrown", - "indexmap", - "instant", - "libc", - "num_cpus", - "object_store", - "parquet", - "paste", - "sqlparser", - "tokio", -] - -[[package]] -name = "datafusion-common-runtime" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" -dependencies = [ - "log", - "tokio", -] - -[[package]] -name = "datafusion-execution" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" -dependencies = [ - "arrow", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures", - "hashbrown", - "log", - "object_store", - "parking_lot", - "rand", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "chrono", - "datafusion-common", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", - "indexmap", - "paste", - "serde_json", - "sqlparser", - "strum", - "strum_macros", -] - -[[package]] -name = "datafusion-expr-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" -dependencies = [ - "arrow", - "datafusion-common", - "itertools 0.13.0", - "paste", -] - -[[package]] -name = "datafusion-functions" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" -dependencies = [ - "arrow", - "arrow-buffer", - "base64", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "hashbrown", - "hex", - "itertools 0.13.0", - "log", - "md-5", - "rand", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] - -[[package]] -name = "datafusion-functions-aggregate" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" -dependencies = [ - "ahash", - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "half", - "indexmap", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-aggregate-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", - "rand", -] - -[[package]] -name = "datafusion-functions-nested" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" -dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-physical-expr-common", - "itertools 0.13.0", - "log", - "paste", - "rand", -] - -[[package]] -name = "datafusion-functions-window" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" -dependencies = [ - "datafusion-common", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-window-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" -dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", -] - -[[package]] -name = "datafusion-optimizer" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown", - "indexmap", - "itertools 0.13.0", - "log", - "paste", - "regex-syntax 0.8.4", -] - -[[package]] -name = "datafusion-physical-expr" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-string", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "half", - "hashbrown", - "indexmap", - "itertools 0.13.0", - "log", - "paste", - "petgraph", -] - -[[package]] -name = "datafusion-physical-expr-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "hashbrown", - "rand", -] - -[[package]] -name = "datafusion-physical-optimizer" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" -dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-execution", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-plan", - "itertools 0.13.0", -] - -[[package]] -name = "datafusion-physical-plan" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "futures", - "half", - "hashbrown", - "indexmap", - "itertools 0.13.0", - "log", - "once_cell", - "parking_lot", - "pin-project-lite", - "rand", - "tokio", -] - -[[package]] -name = "datafusion-sql" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" -dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", - "datafusion-common", - "datafusion-expr", - "indexmap", - "log", - "regex", - "sqlparser", - "strum", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" - -[[package]] -name = "env_filter" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" -dependencies = [ - "log", -] - -[[package]] -name = "env_logger" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" -dependencies = [ - "anstream", - "anstyle", - "env_filter", - "log", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "fastrand" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flatbuffers" -version = "24.3.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.0.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-executor" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-macro" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "half" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" -dependencies = [ - "cfg-if", - "crunchy", - "num-traits", -] - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "horaedb-server" -version = "2.0.0" -dependencies = [ - "futures", - "metric_engine", - "tokio", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "iana-time-zone" -version = "0.1.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "is_terminal_polyfill" -version = "1.70.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" - -[[package]] -name = "itertools" -version = "0.3.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b73f1c685cfd8ff8d75698ed87e6188cd09944b30c0863d45c2c3699d1da0c" - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "jobserver" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.70" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - -[[package]] -name = "lexical-core" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.158" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" - -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" - -[[package]] -name = "lz4_flex" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "macros" -version = "2.2.0-dev" - -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata 0.1.10", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "metric_engine" -version = "2.0.0" -dependencies = [ - "anyhow", - "arrow", - "arrow-schema", - "async-scoped", - "async-trait", - "byteorder", - "bytes", - "bytesize", - "datafusion", - "futures", - "itertools 0.3.25", - "lazy_static", - "macros", - "object_store", - "parquet", - "pb_types", - "prost", - "temp-dir", - "test-log", - "thiserror", - "tokio", - "tracing", -] - -[[package]] -name = "miniz_oxide" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" -dependencies = [ - "adler2", -] - -[[package]] -name = "mio" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" -dependencies = [ - "hermit-abi", - "libc", - "wasi", - "windows-sys 0.52.0", -] - -[[package]] -name = "multimap" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" - -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "object" -version = "0.36.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" -dependencies = [ - "memchr", -] - -[[package]] -name = "object_store" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "humantime", - "itertools 0.13.0", - "parking_lot", - "percent-encoding", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - -[[package]] -name = "parquet" -version = "53.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown", - "lz4_flex", - "num", - "num-bigint", - "object_store", - "paste", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", - "zstd-sys", -] - -[[package]] -name = "parse-zoneinfo" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" -dependencies = [ - "regex", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pb_types" -version = "2.0.0" -dependencies = [ - "prost", - "prost-build", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "phf" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" - -[[package]] -name = "ppv-lite86" -version = "0.2.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "prettyplease" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "910d41a655dac3b764f1ade94821093d3610248694320cd072303a8eedcf221d" -dependencies = [ - "proc-macro2", - "syn", -] - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prost" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" -dependencies = [ - "bytes", - "heck", - "itertools 0.13.0", - "log", - "multimap", - "once_cell", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn", - "tempfile", -] - -[[package]] -name = "prost-derive" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" -dependencies = [ - "anyhow", - "itertools 0.13.0", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "prost-types" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" -dependencies = [ - "prost", -] - -[[package]] -name = "quote" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "redox_syscall" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" -dependencies = [ - "bitflags 2.6.0", -] - -[[package]] -name = "regex" -version = "1.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", -] - -[[package]] -name = "regex-automata" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.4", -] - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" - -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "0.38.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" -dependencies = [ - "bitflags 2.6.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustversion" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" - -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - -[[package]] -name = "serde" -version = "1.0.210" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.210" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.128" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "signal-hook-registry" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" -dependencies = [ - "libc", -] - -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - -[[package]] -name = "snafu" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "socket2" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "sqlparser" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" -dependencies = [ - "log", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - -[[package]] -name = "syn" -version = "2.0.82" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "temp-dir" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc1ee6eef34f12f765cb94725905c6312b6610ab2b0940889cfe58dae7bc3c72" - -[[package]] -name = "tempfile" -version = "3.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" -dependencies = [ - "cfg-if", - "fastrand", - "once_cell", - "rustix", - "windows-sys 0.59.0", -] - -[[package]] -name = "test-log" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93" -dependencies = [ - "env_logger", - "test-log-macros", - "tracing-subscriber", -] - -[[package]] -name = "test-log-macros" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "thiserror" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "thread_local" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" -dependencies = [ - "cfg-if", - "once_cell", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinyvec" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.40.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "parking_lot", - "pin-project-lite", - "signal-hook-registry", - "socket2", - "tokio-macros", - "windows-sys 0.52.0", -] - -[[package]] -name = "tokio-macros" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-util" -version = "0.7.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-segmentation" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" - -[[package]] -name = "unicode-width" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" - -[[package]] -name = "url" -version = "2.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "utf8parse" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" - -[[package]] -name = "uuid" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" -dependencies = [ - "getrandom", -] - -[[package]] -name = "valuable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" - -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" -dependencies = [ - "cfg-if", - "once_cell", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" - -[[package]] -name = "web-sys" -version = "0.3.70" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys 0.59.0", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zstd" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/horaedb/Cargo.toml b/horaedb/Cargo.toml deleted file mode 100644 index 0bc58ea0e8..0000000000 --- a/horaedb/Cargo.toml +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[workspace.package] -version = "2.0.0" -authors = ["HoraeDB Authors"] -edition = "2021" -license = "Apache-2.0" - -[workspace] -resolver = "2" -members = ["metric_engine", "pb_types", "server"] - -[workspace.dependencies] -anyhow = { version = "1.0" } -metric_engine = { path = "metric_engine" } -thiserror = "1" -bytes = "1" -byteorder = "1" -datafusion = "43" -parquet = { version = "53" } -object_store = { version = "0.11" } -macros = { path = "../src/components/macros" } -pb_types = { path = "pb_types" } -prost = { version = "0.13" } -arrow = { version = "53", features = ["prettyprint"] } -bytesize = "1" -arrow-schema = "53" -tokio = { version = "1", features = ["full"] } -async-trait = "0.1" -async-stream = "0.3" -futures = "0.3" -temp-dir = "0.1" -itertools = "0.3" -lazy_static = "1" -tracing = "0.1" -tracing-subscriber = "0.3" -async-scoped = { version = "0.9.0", features = ["use-tokio"] } -test-log = "0.2" - -# This profile optimizes for good runtime performance. -[profile.release] -# reference: https://doc.rust-lang.org/rustc/codegen-options/index.html#codegen-units -codegen-units = 1 -debug = true -overflow-checks = true - -# This profile is used to produce a smaller (no symbols) binary with a little bit poorer performance, -# but with a faster speed and low memory consumption required by compiling. -[profile.release-slim] -inherits = "release" -codegen-units = 16 -debug = false -strip = true diff --git a/horaedb/Makefile b/horaedb/Makefile deleted file mode 100644 index 189b1c2b22..0000000000 --- a/horaedb/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -SHELL = /bin/bash - -clippy: - cargo clippy --all-targets --all-features -- -D warnings \ - -A dead_code -A unused_variables -A clippy::unreachable # Remove these once we have a clean build - -sort: - cargo sort --workspace --check - -fmt: - cargo fmt -- --check - -fix: - cargo fmt - cargo sort --workspace - -test: - cargo test --workspace diff --git a/horaedb/rust-toolchain.toml b/horaedb/rust-toolchain.toml deleted file mode 100644 index 4c621ca810..0000000000 --- a/horaedb/rust-toolchain.toml +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[toolchain] -channel = "nightly-2024-10-15" -components = [ "rustfmt", "clippy" ] diff --git a/horaedb/server/Cargo.toml b/horaedb/server/Cargo.toml deleted file mode 100644 index 65dbc0bc18..0000000000 --- a/horaedb/server/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "horaedb-server" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -futures = { workspace = true } -metric_engine = { workspace = true } -tokio = { workspace = true } -tracing = { workspace = true } -tracing-subscriber = { workspace = true } diff --git a/horaemeta/.golangci.yml b/horaemeta/.golangci.yml deleted file mode 100644 index 590dffc165..0000000000 --- a/horaemeta/.golangci.yml +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -run: - timeout: '5m' - -output: - sort-results: true - -linters: - disable-all: true - enable: - - bodyclose - - dogsled - - errcheck - - goconst - - gocritic - - goimports - - goprintffuncname - - gosec - - gosimple - - govet - - ineffassign - - misspell - - nakedret - - exportloopref - - staticcheck - - stylecheck - - typecheck - - unconvert - - unused - - whitespace - - gocyclo - - exhaustive - - typecheck - - asciicheck - #- errorlint" # disbale it for now, because the error style seems inconsistent with the one required by the linter - - revive - - exhaustruct - -linters-settings: - revive: - ignore-generated-header: false - severity: warning - confidence: 3 - exhaustruct: - include: - - 'github.com.apache.incubator-horaedb-meta.*' diff --git a/horaemeta/CONTRIBUTING.md b/horaemeta/CONTRIBUTING.md deleted file mode 100644 index e361cf0b59..0000000000 --- a/horaemeta/CONTRIBUTING.md +++ /dev/null @@ -1,77 +0,0 @@ -# Contributing - -Thank you for thinking of contributing! We very much welcome contributions from the community. -To make the process easier and more valuable for everyone involved we have a few rules and guidelines to follow. - -## Submitting Issues and Feature Requests - -Before you file an [issue](https://github.com/apache/horaedb/issues/new), please search existing issues in case the same or similar issues have already been filed. -If you find an existing open ticket covering your issue then please avoid adding "👍" or "me too" comments; Github notifications can cause a lot of noise for the project maintainers who triage the back-log. -However, if you have a new piece of information for an existing ticket and you think it may help the investigation or resolution, then please do add it as a comment! -You can signal to the team that you're experiencing an existing issue with one of Github's emoji reactions (these are a good way to add "weight" to an issue from a prioritisation perspective). - -### Submitting an Issue - -The [New Issue]((https://github.com/apache/horaedb/issues/new)) page has templates for both bug reports and feature requests. -Please fill one of them out! -The issue templates provide details on what information we will find useful to help us fix an issue. -In short though, the more information you can provide us about your environment and what behaviour you're seeing, the easier we can fix the issue. -If you can push a PR with test cases that trigger a defect or bug, even better! - -As well as bug reports we also welcome feature requests (there is a dedicated issue template for these). -Typically, the maintainers will periodically review community feature requests and make decisions about if we want to add them. -For features we don't plan to support we will close the feature request ticket (so, again, please check closed tickets for feature requests before submitting them). - -## Contributing Changes - -Please see the [Style Guide](docs/style_guide.md) for more details. - -### Making a PR - -To open a PR you will need to have a Github account. -Fork the `horaemeta` repo and work on a branch on your fork. -When you have completed your changes, or you want some incremental feedback make a Pull Request to HoraeDB [here](https://github.com/apache/horaedb/compare). - -If you want to discuss some work in progress then please prefix `[WIP]` to the -PR title. - -For PRs that you consider ready for review, verify the following locally before you submit it: - -* you have a coherent set of logical commits, with messages conforming to the [Conventional Commits](https://horaedb.apache.org/docs/dev/conventional_commit/) specification; -* all the tests and/or benchmarks pass, including documentation tests; -* the code is correctly formatted and all linter checks pass; and -* you haven't left any "code cruft" (commented out code blocks etc). - -There are some tips on verifying the above in the [next section](#running-tests). - -**After** submitting a PR, you should: - -* verify that all CI status checks pass and the PR is 💚; -* ask for help on the PR if any of the status checks are 🔴, and you don't know why; -* wait patiently for one of the team to review your PR, which could take a few days. - -## Building the project - -The rule for building the project has been configured in the [Makefile](./Makefile) so just run: - -```shell -make build -``` - -## Running Tests - -The rule for running test has been configured in the [Makefile](./Makefile) so just run: - -```shell -make test -``` - -## Running linter check - -CI will check the code formatting and best practices by some specific linters. - -And you can run the check locally by the command: - -```shell -make check -``` diff --git a/horaemeta/DEPENDENCIES.csv b/horaemeta/DEPENDENCIES.csv deleted file mode 100644 index bd0189b283..0000000000 --- a/horaemeta/DEPENDENCIES.csv +++ /dev/null @@ -1,77 +0,0 @@ -github.com/apache/incubator-horaedb-meta,https://github.com/apache/incubator-horaedb-meta/blob/HEAD/licenserc.toml,Apache-2.0 -github.com/apache/incubator-horaedb-proto/golang/pkg,https://github.com/apache/incubator-horaedb-proto/blob/92152841fc8a/golang/LICENSE,Apache-2.0 -github.com/beorn7/perks/quantile,https://github.com/beorn7/perks/blob/v1.0.1/LICENSE,MIT -github.com/caarlos0/env/v6,https://github.com/caarlos0/env/blob/v6.10.1/LICENSE.md,MIT -github.com/cenkalti/backoff/v4,https://github.com/cenkalti/backoff/blob/v4.2.1/LICENSE,MIT -github.com/cespare/xxhash/v2,https://github.com/cespare/xxhash/blob/v2.2.0/LICENSE.txt,MIT -github.com/coreos/go-semver/semver,https://github.com/coreos/go-semver/blob/v0.3.1/LICENSE,Apache-2.0 -github.com/coreos/go-systemd/v22/journal,https://github.com/coreos/go-systemd/blob/v22.5.0/LICENSE,Apache-2.0 -github.com/davecgh/go-spew/spew,https://github.com/davecgh/go-spew/blob/v1.1.1/LICENSE,ISC -github.com/dustin/go-humanize,https://github.com/dustin/go-humanize/blob/v1.0.1/LICENSE,MIT -github.com/go-logr/logr,https://github.com/go-logr/logr/blob/v1.4.1/LICENSE,Apache-2.0 -github.com/go-logr/stdr,https://github.com/go-logr/stdr/blob/v1.2.2/LICENSE,Apache-2.0 -github.com/gogo/protobuf,https://github.com/gogo/protobuf/blob/v1.3.2/LICENSE,BSD-3-Clause -github.com/golang-jwt/jwt/v4,https://github.com/golang-jwt/jwt/blob/v4.5.0/LICENSE,MIT -github.com/golang/protobuf,https://github.com/golang/protobuf/blob/v1.5.4/LICENSE,BSD-3-Clause -github.com/google/btree,https://github.com/google/btree/blob/v1.1.2/LICENSE,Apache-2.0 -github.com/gorilla/websocket,https://github.com/gorilla/websocket/blob/v1.5.1/LICENSE,BSD-3-Clause -github.com/grpc-ecosystem/go-grpc-middleware,https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v1.4.0/LICENSE,Apache-2.0 -github.com/grpc-ecosystem/go-grpc-prometheus,https://github.com/grpc-ecosystem/go-grpc-prometheus/blob/v1.2.0/LICENSE,Apache-2.0 -github.com/grpc-ecosystem/grpc-gateway,https://github.com/grpc-ecosystem/grpc-gateway/blob/v1.16.0/LICENSE.txt,BSD-3-Clause -github.com/grpc-ecosystem/grpc-gateway/v2,https://github.com/grpc-ecosystem/grpc-gateway/blob/v2.19.1/LICENSE,BSD-3-Clause -github.com/jonboulle/clockwork,https://github.com/jonboulle/clockwork/blob/v0.4.0/LICENSE,Apache-2.0 -github.com/json-iterator/go,https://github.com/json-iterator/go/blob/v1.1.12/LICENSE,MIT -github.com/julienschmidt/httprouter,https://github.com/julienschmidt/httprouter/blob/v1.3.0/LICENSE,BSD-3-Clause -github.com/looplab/fsm,https://github.com/looplab/fsm/blob/v0.3.0/LICENSE,Apache-2.0 -github.com/modern-go/concurrent,https://github.com/modern-go/concurrent/blob/bacd9c7ef1dd/LICENSE,Apache-2.0 -github.com/modern-go/reflect2,https://github.com/modern-go/reflect2/blob/v1.0.2/LICENSE,Apache-2.0 -github.com/pelletier/go-toml/v2,https://github.com/pelletier/go-toml/blob/v2.0.6/LICENSE,MIT -github.com/pingcap/log,https://github.com/pingcap/log/blob/v1.1.0/LICENSE,Apache-2.0 -github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause -github.com/pmezard/go-difflib/difflib,https://github.com/pmezard/go-difflib/blob/v1.0.0/LICENSE,BSD-3-Clause -github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.19.0/LICENSE,Apache-2.0 -github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.6.0/LICENSE,Apache-2.0 -github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.50.0/LICENSE,Apache-2.0 -github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.50.0/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause -github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.13.0/LICENSE,Apache-2.0 -github.com/sirupsen/logrus,https://github.com/sirupsen/logrus/blob/v1.9.3/LICENSE,MIT -github.com/soheilhy/cmux,https://github.com/soheilhy/cmux/blob/v0.1.5/LICENSE,Apache-2.0 -github.com/spaolacci/murmur3,https://github.com/spaolacci/murmur3/blob/v1.1.0/LICENSE,BSD-3-Clause -github.com/spf13/pflag,https://github.com/spf13/pflag/blob/v1.0.5/LICENSE,BSD-3-Clause -github.com/stretchr/testify,https://github.com/stretchr/testify/blob/v1.8.4/LICENSE,MIT -github.com/tikv/pd/pkg/tempurl,https://github.com/tikv/pd/blob/v2.1.19/LICENSE,Apache-2.0 -github.com/tmc/grpc-websocket-proxy/wsproxy,https://github.com/tmc/grpc-websocket-proxy/blob/673ab2c3ae75/LICENSE,MIT -github.com/xiang90/probing,https://github.com/xiang90/probing/blob/a49e3df8f510/LICENSE,MIT -go.etcd.io/bbolt,https://github.com/etcd-io/bbolt/blob/v1.3.9/LICENSE,MIT -go.etcd.io/etcd/api/v3,https://github.com/etcd-io/etcd/blob/api/v3.5.12/api/LICENSE,Apache-2.0 -go.etcd.io/etcd/client/pkg/v3,https://github.com/etcd-io/etcd/blob/client/pkg/v3.5.12/client/pkg/LICENSE,Apache-2.0 -go.etcd.io/etcd/client/v2,https://github.com/etcd-io/etcd/blob/client/v2.305.12/client/v2/LICENSE,Apache-2.0 -go.etcd.io/etcd/client/v3,https://github.com/etcd-io/etcd/blob/client/v3.5.12/client/v3/LICENSE,Apache-2.0 -go.etcd.io/etcd/pkg/v3,https://github.com/etcd-io/etcd/blob/pkg/v3.5.12/pkg/LICENSE,Apache-2.0 -go.etcd.io/etcd/raft/v3,https://github.com/etcd-io/etcd/blob/raft/v3.5.12/raft/LICENSE,Apache-2.0 -go.etcd.io/etcd/server/v3,https://github.com/etcd-io/etcd/blob/server/v3.5.12/server/LICENSE,Apache-2.0 -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc,https://github.com/open-telemetry/opentelemetry-go-contrib/blob/instrumentation/google.golang.org/grpc/otelgrpc/v0.49.0/instrumentation/google.golang.org/grpc/otelgrpc/LICENSE,Apache-2.0 -go.opentelemetry.io/otel,https://github.com/open-telemetry/opentelemetry-go/blob/v1.24.0/LICENSE,Apache-2.0 -go.opentelemetry.io/otel/exporters/otlp/otlptrace,https://github.com/open-telemetry/opentelemetry-go/blob/exporters/otlp/otlptrace/v1.24.0/exporters/otlp/otlptrace/LICENSE,Apache-2.0 -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc,https://github.com/open-telemetry/opentelemetry-go/blob/exporters/otlp/otlptrace/otlptracegrpc/v1.24.0/exporters/otlp/otlptrace/otlptracegrpc/LICENSE,Apache-2.0 -go.opentelemetry.io/otel/metric,https://github.com/open-telemetry/opentelemetry-go/blob/metric/v1.24.0/metric/LICENSE,Apache-2.0 -go.opentelemetry.io/otel/sdk,https://github.com/open-telemetry/opentelemetry-go/blob/sdk/v1.24.0/sdk/LICENSE,Apache-2.0 -go.opentelemetry.io/otel/trace,https://github.com/open-telemetry/opentelemetry-go/blob/trace/v1.24.0/trace/LICENSE,Apache-2.0 -go.opentelemetry.io/proto/otlp,https://github.com/open-telemetry/opentelemetry-proto-go/blob/otlp/v1.1.0/otlp/LICENSE,Apache-2.0 -go.uber.org/multierr,https://github.com/uber-go/multierr/blob/v1.11.0/LICENSE.txt,MIT -go.uber.org/zap,https://github.com/uber-go/zap/blob/v1.27.0/LICENSE,MIT -golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/v0.21.0:LICENSE,BSD-3-Clause -golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.22.0:LICENSE,BSD-3-Clause -golang.org/x/sync/errgroup,https://cs.opensource.google/go/x/sync/+/v0.6.0:LICENSE,BSD-3-Clause -golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.18.0:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.14.0:LICENSE,BSD-3-Clause -golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/v0.5.0:LICENSE,BSD-3-Clause -google.golang.org/genproto/googleapis/api,https://github.com/googleapis/go-genproto/blob/29370a3891b7/googleapis/api/LICENSE,Apache-2.0 -google.golang.org/genproto/googleapis/rpc,https://github.com/googleapis/go-genproto/blob/29370a3891b7/googleapis/rpc/LICENSE,Apache-2.0 -google.golang.org/genproto/protobuf/field_mask,https://github.com/googleapis/go-genproto/blob/29370a3891b7/LICENSE,Apache-2.0 -google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.62.1/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.33.0/LICENSE,BSD-3-Clause -gopkg.in/natefinch/lumberjack.v2,https://github.com/natefinch/lumberjack/blob/v2.2.1/LICENSE,MIT -gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT -sigs.k8s.io/yaml,https://github.com/kubernetes-sigs/yaml/blob/v1.4.0/LICENSE,Apache-2.0 -sigs.k8s.io/yaml/goyaml.v2,https://github.com/kubernetes-sigs/yaml/blob/v1.4.0/goyaml.v2/LICENSE,Apache-2.0 diff --git a/horaemeta/Dockerfile b/horaemeta/Dockerfile deleted file mode 100644 index 6983d744af..0000000000 --- a/horaemeta/Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -## Builder -ARG GOLANG_VERSION=1.21.3 -FROM golang:${GOLANG_VERSION}-bullseye as build - -# cache mounts below may already exist and owned by root -USER root - -RUN apt update && apt install --yes gcc g++ libssl-dev pkg-config cmake && rm -rf /var/lib/apt/lists/* - -COPY . /horaemeta -WORKDIR /horaemeta - -RUN make build - -## HoraeMeta -FROM ubuntu:20.04 - -RUN useradd -m -s /bin/bash horae - -RUN apt update && \ - apt install --yes curl gdb iotop cron vim less net-tools && \ - apt clean - -COPY --from=build /horaemeta/bin/horaemeta-server /usr/bin/horaemeta-server -COPY --from=build /horaemeta/docker/entrypoint.sh /entrypoint.sh -COPY --from=build /horaemeta/config/example-standalone.toml /etc/horaemeta/horaemeta.toml - -RUN chmod +x /usr/bin/horaemeta-server - -ARG TINI_VERSION=v0.19.0 -ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini -RUN chmod +x /tini - -ARG USER horae - -ENTRYPOINT ["/tini", "--", "/entrypoint.sh"] diff --git a/horaemeta/Makefile b/horaemeta/Makefile deleted file mode 100644 index b2a8e55903..0000000000 --- a/horaemeta/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -GO_TOOLS_BIN_PATH := $(shell pwd)/.tools/bin -PATH := $(GO_TOOLS_BIN_PATH):$(PATH) -SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) -ROOT = $(shell pwd) - -COMMIT_ID := $(shell git rev-parse HEAD) -BRANCH_NAME := $(shell git rev-parse --abbrev-ref HEAD) -BUILD_DATE := $(shell date +'%Y/%m/%dT%H:%M:%S') - -default: build - -install-tools: - @mkdir -p $(GO_TOOLS_BIN_PATH) - @(which golangci-lint && golangci-lint version | grep '1.54') >/dev/null 2>&1 || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(GO_TOOLS_BIN_PATH) v1.54.2 - -META_PKG := github.com/apache/incubator-horaedb-meta -PACKAGES := $(shell go list ./... | tail -n +2) -PACKAGE_DIRECTORIES := $(subst $(META_PKG)/,,$(PACKAGES)) - -check: - @ echo "gofmt ..." - @ gofmt -s -l -d $(PACKAGE_DIRECTORIES) 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' - @ echo "golangci-lint ..." - @ golangci-lint run $(PACKAGE_DIRECTORIES) --config .golangci.yml - -test: - @ echo "go test ..." - @ go test -timeout 5m -coverprofile=coverage.txt -covermode=atomic $(PACKAGES) - -build: - @ go build -ldflags="-X main.commitID=$(COMMIT_ID) -X main.branchName=$(BRANCH_NAME) -X main.buildDate=$(BUILD_DATE)" -o bin/horaemeta-server ./cmd/horaemeta-server diff --git a/horaemeta/README.md b/horaemeta/README.md deleted file mode 100644 index 79dd40effd..0000000000 --- a/horaemeta/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# HoraeMeta - -[![codecov](https://codecov.io/gh/apache/incubator-horaedb-meta/branch/main/graph/badge.svg?token=VTYXEAB2WU)](https://codecov.io/gh/apache/incubator-horaedb-meta) -![License](https://img.shields.io/badge/license-Apache--2.0-green.svg) - -HoraeMeta is the meta service for managing the Apache HoraeDB (incubating) cluster. - -## Status -The project is in a very early stage. - -## Quick Start -### Build HoraeMeta binary -```bash -make build -``` - -### Standalone Mode -Although HoraeMeta is designed to deployed as a cluster with three or more instances, it can also be started standalone: -```bash -# HoraeMeta0 -mkdir /tmp/meta0 -./bin/horaemeta-server --config ./config/example-standalone.toml -``` - -### Cluster mode -Here is an example for starting HoraeMeta in cluster mode (three instances) on single machine by using different ports: -```bash -# Create directories. -mkdir /tmp/meta0 -mkdir /tmp/meta1 -mkdir /tmp/meta2 - -# horaemeta0 -./bin/horaemeta-server --config ./config/exampl-cluster0.toml - -# horaemeta1 -./bin/horaemeta-server --config ./config/exampl-cluster1.toml - -# horaemeta2 -./bin/horaemeta-server --config ./config/exampl-cluster2.toml -``` - -## Acknowledgment -HoraeMeta refers to the excellent project [pd](https://github.com/tikv/pd) in design and some module and codes are forked from [pd](https://github.com/tikv/pd), thanks to the TiKV team. - -## Contributing -The project is under rapid development so that any contribution is welcome. -Check our [Contributing Guide](https://github.com/apache/horaedb/blob/main/horaemeta/CONTRIBUTING.md) and make your first contribution! - -## License -[Apache License 2.0](./LICENSE) diff --git a/horaemeta/cmd/horaemeta-server/main.go b/horaemeta/cmd/horaemeta-server/main.go deleted file mode 100644 index b9c79bf03b..0000000000 --- a/horaemeta/cmd/horaemeta-server/main.go +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package main - -import ( - "context" - "fmt" - "os" - "os/signal" - "syscall" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server" - "github.com/apache/incubator-horaedb-meta/server/config" - "github.com/pelletier/go-toml/v2" - "go.uber.org/zap" -) - -var ( - buildDate string - branchName string - commitID string -) - -func buildVersion() string { - return fmt.Sprintf("HoraeMeta Server\nGit commit:%s\nGit branch:%s\nBuild date:%s", commitID, branchName, buildDate) -} - -func panicf(format string, args ...any) { - msg := fmt.Sprintf(format, args...) - panic(msg) -} - -func main() { - cfgParser, err := config.MakeConfigParser() - if err != nil { - panicf("fail to generate config builder, err:%v", err) - } - - cfg, err := cfgParser.Parse(os.Args[1:]) - if coderr.Is(err, coderr.PrintHelpUsage) { - return - } - - if err != nil { - panicf("fail to parse config from command line params, err:%v", err) - } - - if cfgParser.NeedPrintVersion() { - println(buildVersion()) - return - } - - if err := cfg.ValidateAndAdjust(); err != nil { - panicf("invalid config, err:%v", err) - } - - if err := cfgParser.ParseConfigFromToml(); err != nil { - panicf("fail to parse config from toml file, err:%v", err) - } - - if err := cfgParser.ParseConfigFromEnv(); err != nil { - panicf("fail to parse config from environment variable, err:%v", err) - } - - cfgByte, err := toml.Marshal(cfg) - if err != nil { - panicf("fail to marshal server config, err:%v", err) - } - - if err = os.MkdirAll(cfg.DataDir, os.ModePerm); err != nil { - panicf("fail to create data dir, data_dir:%v, err:%v", cfg.DataDir, err) - } - - logger, err := log.InitGlobalLogger(&cfg.Log) - if err != nil { - panicf("fail to init global logger, err:%v", err) - } - defer logger.Sync() //nolint:errcheck - log.Info(fmt.Sprintf("server start with version: %s", buildVersion())) - // TODO: Do adjustment to config for preparing joining existing cluster. - log.Info("server start with config", zap.String("config", string(cfgByte))) - - srv, err := server.CreateServer(cfg) - if err != nil { - log.Error("fail to create server", zap.Error(err)) - return - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - sc := make(chan os.Signal, 1) - signal.Notify(sc, - syscall.SIGHUP, - syscall.SIGINT, - syscall.SIGTERM, - syscall.SIGQUIT) - - var sig os.Signal - go func() { - sig = <-sc - cancel() - }() - - if err := srv.Run(ctx); err != nil { - log.Error("fail to run server", zap.Error(err)) - return - } - - <-ctx.Done() - log.Info("got signal to exit", zap.Any("signal", sig)) - - srv.Close() -} diff --git a/horaemeta/config/example-cluster0.toml b/horaemeta/config/example-cluster0.toml deleted file mode 100644 index 0ca39143a7..0000000000 --- a/horaemeta/config/example-cluster0.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -etcd-start-timeout-ms = 30000 -peer-urls = "http://127.0.0.1:2380" -advertise-client-urls = "http://127.0.0.1:2379" -advertise-peer-urls = "http://127.0.0.1:2380" -client-urls = "http://127.0.0.1:2379" -data-dir = "/tmp/meta0" -node-name = "meta0" -initial-cluster = "meta0=http://127.0.0.1:2380,meta1=http://127.0.0.1:12380,meta2=http://127.0.0.1:22380" -default-cluster-node-count = 2 -http-port = 8080 -grpc-port = 2379 - -[log] -level = "info" - -[etcd-log] -level = "info" diff --git a/horaemeta/config/example-cluster1.toml b/horaemeta/config/example-cluster1.toml deleted file mode 100644 index de359fba62..0000000000 --- a/horaemeta/config/example-cluster1.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -etcd-start-timeout-ms = 30000 -peer-urls = "http://127.0.0.1:12380" -advertise-client-urls = "http://127.0.0.1:12379" -advertise-peer-urls = "http://127.0.0.1:12380" -client-urls = "http://127.0.0.1:12379" -data-dir = "/tmp/meta1" -node-name = "meta1" -initial-cluster = "meta0=http://127.0.0.1:2380,meta1=http://127.0.0.1:12380,meta2=http://127.0.0.1:22380" -default-cluster-node-count = 2 -http-port = 8081 -grpc-port = 12379 - -[log] -level = "info" - -[etcd-log] -level = "info" diff --git a/horaemeta/config/example-cluster2.toml b/horaemeta/config/example-cluster2.toml deleted file mode 100644 index 36461d0d6f..0000000000 --- a/horaemeta/config/example-cluster2.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -etcd-start-timeout-ms = 30000 -peer-urls = "http://127.0.0.1:22380" -advertise-client-urls = "http://127.0.0.1:22379" -advertise-peer-urls = "http://127.0.0.1:22380" -client-urls = "http://127.0.0.1:22379" -data-dir = "/tmp/meta2" -node-name = "meta2" -initial-cluster = "meta0=http://127.0.0.1:2380,meta1=http://127.0.0.1:12380,meta2=http://127.0.0.1:22380" -default-cluster-node-count = 2 -http-port = 8082 -grpc-port = 22379 - -[log] -level = "info" - -[etcd-log] -level = "info" diff --git a/horaemeta/config/example-standalone.toml b/horaemeta/config/example-standalone.toml deleted file mode 100644 index 6650bdd270..0000000000 --- a/horaemeta/config/example-standalone.toml +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -etcd-start-timeout-ms = 30000 -peer-urls = "http://127.0.0.1:2380" -advertise-client-urls = "http://127.0.0.1:2379" -advertise-peer-urls = "http://127.0.0.1:2380" -client-urls = "http://127.0.0.1:2379" -data-dir = "/tmp/meta0" -node-name = "meta0" -initial-cluster = "meta0=http://127.0.0.1:2380" -default-cluster-node-count = 1 - -[log] -level = "info" - -[etcd-log] -level = "info" diff --git a/horaemeta/docker/entrypoint.sh b/horaemeta/docker/entrypoint.sh deleted file mode 100755 index d080b7c285..0000000000 --- a/horaemeta/docker/entrypoint.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - - - -set -exo pipefail - -## init varibles -USER="horae" -DATA_DIR="/tmp/horaemeta0" -CONFIG_FILE="/etc/horaemeta/horaemeta.toml" - -## data dir -mkdir -p ${DATA_DIR} -chmod +777 -R ${DATA_DIR} -chown -R ${USER}.${USER} ${DATA_DIR} - -exec /usr/bin/horaemeta-server --config ${CONFIG_FILE} diff --git a/horaemeta/docs/style_guide.md b/horaemeta/docs/style_guide.md deleted file mode 100644 index 8f79804144..0000000000 --- a/horaemeta/docs/style_guide.md +++ /dev/null @@ -1,111 +0,0 @@ -# Style Guide -HoraeMeta is written in Golang so the basic code style we adhere to is the [CodeReviewComments](https://github.com/golang/go/wiki/CodeReviewComments). - -Besides the [CodeReviewComments](https://github.com/golang/go/wiki/CodeReviewComments), there are also some custom rules for the project: -- Error Handling -- Logging - -## Error Handling -### Principles -- Global error code: - - Any error defined in the repo should be assigned an error code, - - An error code can be used by multiple different errors, - - The error codes are defined in the single global package [coderr](https://github.com/apache/incubator-horaedb-meta/tree/main/pkg/coderr). -- Construct: define leaf errors on package level (often in a separate `error.go` file) by package [coderr](https://github.com/apache/incubator-horaedb-meta/tree/main/pkg/coderr). -- Wrap: wrap errors by `errors.WithMessage` or `errors.WithMessagef`. -- Check: test the error identity by calling `coderr.Is`. -- Log: only log the error on the top level package. -- Respond: respond the `CodeError`(defined in package [coderr](https://github.com/apache/incubator-horaedb-meta/tree/main/pkg/coderr)) unwrapped by `errors.Cause` to client on service level. - -### Example -`errors.go` in the package `server`: -```go -var ErrStartEtcd = coderr.NewCodeError(coderr.Internal, "start embed etcd") -var ErrStartEtcdTimeout = coderr.NewCodeError(coderr.Internal, "start etcd server timeout") -``` - -`server.go` in the package `server`: -```go -func (srv *Server) startEtcd() error { - etcdSrv, err := embed.StartEtcd(srv.etcdCfg) - if err != nil { - return ErrStartEtcd.WithCause(err) - } - - newCtx, cancel := context.WithTimeout(srv.ctx, srv.cfg.EtcdStartTimeout()) - defer cancel() - - select { - case <-etcdSrv.Server.ReadyNotify(): - case <-newCtx.Done(): - return ErrStartEtcdTimeout.WithCausef("timeout is:%v", srv.cfg.EtcdStartTimeout()) - } - - return nil -} -``` - -`main.go` in the package `main`: -```go -func main() { - err := srv.startEtcd() - if err != nil { - return - } - if coderr.Is(err, coderr.Internal) { - log.Error("internal error") - } - - cerr, ok := err.(coderr.CodeError) - if ok { - log.Error("found a CodeError") - } else { - log.Error("not a CodeError) - } - - return -} -``` - -## Logging -### Principles -- Structured log by [zap](https://github.com/uber-go/zap). -- Use the package `github.com/horaemeta/pkg/log` which is based on [zap](https://github.com/uber-go/zap). -- Create local logger with common fields if necessary. - -### Example -Normal usage: -```go -import "github.com/horaemeta/pkg/log" - -func main() { - if err := srv.Run(); err != nil { - log.Error("fail to run server", zap.Error(err)) - return - } -} -``` - -Local logger: -```go -import "github.com/horaemeta/pkg/log" - -type lease struct { - ID int64 - logger *zap.Logger -} - -func NewLease(ID int64) *lease { - logger := log.With(zap.Int64("lease-id", ID)) - - return &lease { - ID, - logger, - } -} - -func (l *lease) Close() { - l.logger.Info("lease is closed") - l.ID = 0 -} -``` diff --git a/horaemeta/go.mod b/horaemeta/go.mod deleted file mode 100644 index b60d1e357b..0000000000 --- a/horaemeta/go.mod +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -module github.com/apache/incubator-horaedb-meta - -go 1.21 - -require ( - github.com/apache/incubator-horaedb-proto/golang v0.0.0-20231228071726-92152841fc8a - github.com/caarlos0/env/v6 v6.10.1 - github.com/julienschmidt/httprouter v1.3.0 - github.com/looplab/fsm v0.3.0 - github.com/pelletier/go-toml/v2 v2.0.6 - github.com/pkg/errors v0.9.1 - github.com/spaolacci/murmur3 v1.1.0 - github.com/stretchr/testify v1.8.4 - github.com/tikv/pd v2.1.19+incompatible - go.etcd.io/etcd/api/v3 v3.5.12 - go.etcd.io/etcd/client/pkg/v3 v3.5.12 - go.etcd.io/etcd/client/v3 v3.5.12 - go.etcd.io/etcd/server/v3 v3.5.12 - go.uber.org/zap v1.27.0 - golang.org/x/sync v0.10.0 - golang.org/x/time v0.5.0 - google.golang.org/grpc v1.62.1 - google.golang.org/protobuf v1.33.0 -) - -require ( - github.com/beorn7/perks v1.0.1 // indirect - github.com/cenkalti/backoff/v4 v4.2.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/coreos/go-semver v0.3.1 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/dustin/go-humanize v1.0.1 // indirect - github.com/go-logr/logr v1.4.1 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang-jwt/jwt/v4 v4.5.1 // indirect - github.com/golang/protobuf v1.5.4 // indirect - github.com/google/btree v1.1.2 // indirect - github.com/gorilla/websocket v1.5.1 // indirect - github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect - github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect - github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 // indirect - github.com/jonboulle/clockwork v0.4.0 // indirect - github.com/json-iterator/go v1.1.12 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/pingcap/log v1.1.0 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.19.0 // indirect - github.com/prometheus/client_model v0.6.0 // indirect - github.com/prometheus/common v0.50.0 // indirect - github.com/prometheus/procfs v0.13.0 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect - github.com/soheilhy/cmux v0.1.5 // indirect - github.com/spf13/pflag v1.0.5 // indirect - github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect - github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 // indirect - go.etcd.io/bbolt v1.3.9 // indirect - go.etcd.io/etcd/client/v2 v2.305.12 // indirect - go.etcd.io/etcd/pkg/v3 v3.5.12 // indirect - go.etcd.io/etcd/raft/v3 v3.5.12 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect - go.opentelemetry.io/otel v1.24.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.24.0 // indirect - go.opentelemetry.io/otel/metric v1.24.0 // indirect - go.opentelemetry.io/otel/sdk v1.24.0 // indirect - go.opentelemetry.io/otel/trace v1.24.0 // indirect - go.opentelemetry.io/proto/otlp v1.1.0 // indirect - go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.31.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.28.0 // indirect - golang.org/x/text v0.21.0 // indirect - google.golang.org/genproto v0.0.0-20240308144416-29370a3891b7 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240308144416-29370a3891b7 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240308144416-29370a3891b7 // indirect - gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect -) diff --git a/horaemeta/go.sum b/horaemeta/go.sum deleted file mode 100644 index e5cc41809e..0000000000 --- a/horaemeta/go.sum +++ /dev/null @@ -1,308 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/incubator-horaedb-proto/golang v0.0.0-20231228071726-92152841fc8a h1:lQVr4wkixN4N5dOKRUBU1lnqkiHTFg6Im+tqJ8Y7XOE= -github.com/apache/incubator-horaedb-proto/golang v0.0.0-20231228071726-92152841fc8a/go.mod h1:Ch92HPIAoGbrgFCtpSgxcYSRgWdpNsIcPG1lfv24Ufs= -github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/caarlos0/env/v6 v6.10.1 h1:t1mPSxNpei6M5yAeu1qtRdPAK29Nbcf/n3G7x+b3/II= -github.com/caarlos0/env/v6 v6.10.1/go.mod h1:hvp/ryKXKipEkcuYjs9mI4bBCg+UI0Yhgm5Zu0ddvwc= -github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= -github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA= -github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= -github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= -github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= -github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= -github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo= -github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= -github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= -github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= -github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= -github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 h1:/c3QmbOGMGTOumP2iT/rCwB7b0QDGLKzqOmktBjT+Is= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1/go.mod h1:5SN9VR2LTsRFsrEC6FHgRbTWrTHu6tqPeKxEQv15giM= -github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= -github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= -github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= -github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= -github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/looplab/fsm v0.3.0 h1:kIgNS3Yyud1tyxhG8kDqh853B7QqwnlWdgL3TD2s3Sw= -github.com/looplab/fsm v0.3.0/go.mod h1:PmD3fFvQEIsjMEfvZdrCDZ6y8VwKTwWNjlpEr6IKPO4= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= -github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/pelletier/go-toml/v2 v2.0.6 h1:nrzqCb7j9cDFj2coyLNLaZuJTLjWjlaz6nvTvIwycIU= -github.com/pelletier/go-toml/v2 v2.0.6/go.mod h1:eumQOmlWiOPt5WriQQqoM5y18pDHwha2N+QD+EUNTek= -github.com/pingcap/errors v0.11.0 h1:DCJQB8jrHbQ1VVlMFIrbj2ApScNNotVmkSNplu2yUt4= -github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= -github.com/pingcap/log v1.1.0 h1:ELiPxACz7vdo1qAvvaWJg1NrYFoY6gqAh/+Uo6aXdD8= -github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= -github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= -github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= -github.com/prometheus/common v0.50.0 h1:YSZE6aa9+luNa2da6/Tik0q0A5AbR+U003TItK57CPQ= -github.com/prometheus/common v0.50.0/go.mod h1:wHFBCEVWVmHMUpg7pYcOm2QUR/ocQdYSJVQJKnHc3xQ= -github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= -github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= -github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= -github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= -github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/tikv/pd v2.1.19+incompatible h1:rqjHqO7t/STke/R2Yz6+lQj6NPA8u7G2Otwqup4K+P8= -github.com/tikv/pd v2.1.19+incompatible/go.mod h1:v6C/D7ONC49SgjI4jbGnooSizvijaO/bdIm62DVR4tI= -github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= -github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= -github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= -github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI= -go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE= -go.etcd.io/etcd/api/v3 v3.5.12 h1:W4sw5ZoU2Juc9gBWuLk5U6fHfNVyY1WC5g9uiXZio/c= -go.etcd.io/etcd/api/v3 v3.5.12/go.mod h1:Ot+o0SWSyT6uHhA56al1oCED0JImsRiU9Dc26+C2a+4= -go.etcd.io/etcd/client/pkg/v3 v3.5.12 h1:EYDL6pWwyOsylrQyLp2w+HkQ46ATiOvoEdMarindU2A= -go.etcd.io/etcd/client/pkg/v3 v3.5.12/go.mod h1:seTzl2d9APP8R5Y2hFL3NVlD6qC/dOT+3kvrqPyTas4= -go.etcd.io/etcd/client/v2 v2.305.12 h1:0m4ovXYo1CHaA/Mp3X/Fak5sRNIWf01wk/X1/G3sGKI= -go.etcd.io/etcd/client/v2 v2.305.12/go.mod h1:aQ/yhsxMu+Oht1FOupSr60oBvcS9cKXHrzBpDsPTf9E= -go.etcd.io/etcd/client/v3 v3.5.12 h1:v5lCPXn1pf1Uu3M4laUE2hp/geOTc5uPcYYsNe1lDxg= -go.etcd.io/etcd/client/v3 v3.5.12/go.mod h1:tSbBCakoWmmddL+BKVAJHa9km+O/E+bumDe9mSbPiqw= -go.etcd.io/etcd/pkg/v3 v3.5.12 h1:OK2fZKI5hX/+BTK76gXSTyZMrbnARyX9S643GenNGb8= -go.etcd.io/etcd/pkg/v3 v3.5.12/go.mod h1:UVwg/QIMoJncyeb/YxvJBJCE/NEwtHWashqc8A1nj/M= -go.etcd.io/etcd/raft/v3 v3.5.12 h1:7r22RufdDsq2z3STjoR7Msz6fYH8tmbkdheGfwJNRmU= -go.etcd.io/etcd/raft/v3 v3.5.12/go.mod h1:ERQuZVe79PI6vcC3DlKBukDCLja/L7YMu29B74Iwj4U= -go.etcd.io/etcd/server/v3 v3.5.12 h1:EtMjsbfyfkwZuA2JlKOiBfuGkFCekv5H178qjXypbG8= -go.etcd.io/etcd/server/v3 v3.5.12/go.mod h1:axB0oCjMy+cemo5290/CutIjoxlfA6KVYKD1w0uue10= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 h1:4Pp6oUg3+e/6M4C0A/3kJ2VYa++dsWVTtGgLVj5xtHg= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0/go.mod h1:Mjt1i1INqiaoZOMGR1RIUJN+i3ChKoFRqzrRQhlkbs0= -go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= -go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 h1:t6wl9SPayj+c7lEIFgm4ooDBZVb01IhLB4InpomhRw8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0/go.mod h1:iSDOcsnSA5INXzZtwaBPrKp/lWu/V14Dd+llD0oI2EA= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.24.0 h1:Mw5xcxMwlqoJd97vwPxA8isEaIoxsta9/Q51+TTJLGE= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.24.0/go.mod h1:CQNu9bj7o7mC6U7+CA/schKEYakYXWr79ucDHTMGhCM= -go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= -go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= -go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw= -go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= -go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= -go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= -go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI= -go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= -go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= -go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= -go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= -go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= -go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20240308144416-29370a3891b7 h1:5cmXPmmYZddhZs05mvqVzGwPsoE/uq+1YBCeRmBDyMo= -google.golang.org/genproto v0.0.0-20240308144416-29370a3891b7/go.mod h1:yA7a1bW1kwl459Ol0m0lV4hLTfrL/7Bkk4Mj2Ir1mWI= -google.golang.org/genproto/googleapis/api v0.0.0-20240308144416-29370a3891b7 h1:bITUotW/BD35GhBwrwGexWa8/P5CKHXACICrmuFJBa8= -google.golang.org/genproto/googleapis/api v0.0.0-20240308144416-29370a3891b7/go.mod h1:O1cOfN1Cy6QEYr7VxtjOyP5AdAuR0aJ/MYZaaof623Y= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240308144416-29370a3891b7 h1:em/y72n4XlYRtayY/cVj6pnVzHa//BDA1BdoO+z9mdE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240308144416-29370a3891b7/go.mod h1:UCOku4NytXMJuLQE5VuqA5lX3PcHCBo8pxNyvkf4xBs= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= -google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= -google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= -gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= -gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/horaemeta/pkg/assert/assert.go b/horaemeta/pkg/assert/assert.go deleted file mode 100644 index ae9e0a5adc..0000000000 --- a/horaemeta/pkg/assert/assert.go +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package assert - -import "fmt" - -// Assertf panics and prints the appended message if the cond is false. -func Assertf(cond bool, format string, a ...any) { - if !cond { - msg := fmt.Sprintf(format, a...) - panic(msg) - } -} - -// Assertf panics and prints the appended message if the cond is false. -func Assert(cond bool) { - Assertf(cond, "unexpected case") -} diff --git a/horaemeta/pkg/coderr/code.go b/horaemeta/pkg/coderr/code.go deleted file mode 100644 index b941013bbf..0000000000 --- a/horaemeta/pkg/coderr/code.go +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coderr - -import "net/http" - -type Code int - -const ( - Invalid Code = -1 - Ok = 0 - InvalidParams = http.StatusBadRequest - BadRequest = http.StatusBadRequest - NotFound = http.StatusNotFound - TooManyRequests = http.StatusTooManyRequests - Internal = http.StatusInternalServerError - ErrNotImplemented = http.StatusNotImplemented - - // HTTPCodeUpperBound is a bound under which any Code should have the same meaning with the http status code. - HTTPCodeUpperBound = Code(1000) - PrintHelpUsage = 1001 - ClusterAlreadyExists = 1002 -) - -// ToHTTPCode converts the Code to http code. -// The Code below the HTTPCodeUpperBound has the same meaning as the http status code. However, for the other codes, we -// should define the conversion rules by ourselves. -func (c Code) ToHTTPCode() int { - if c < HTTPCodeUpperBound { - return int(c) - } - - // TODO: use switch to convert the code to http code. - return int(c) -} diff --git a/horaemeta/pkg/coderr/error.go b/horaemeta/pkg/coderr/error.go deleted file mode 100644 index 5977214c2b..0000000000 --- a/horaemeta/pkg/coderr/error.go +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coderr - -import ( - "fmt" - - "github.com/pkg/errors" -) - -var _ CodeError = &codeError{code: 0, desc: "", cause: nil} - -// CodeError is an error with code. -type CodeError interface { - error - Code() Code - // WithCausef should generate a new CodeError instance with the provided cause details. - WithCausef(format string, a ...any) CodeError - // WithCause should generate a new CodeError instance with the provided cause details. - WithCause(cause error) CodeError -} - -// Is checks whether the cause of `err` is the kind of error specified by the `expectCode`. -// Returns false if the cause of `err` is not CodeError. -func Is(err error, expectCode Code) bool { - code, b := GetCauseCode(err) - if b && code == expectCode { - return true - } - - return false -} - -func GetCauseCode(err error) (Code, bool) { - if err == nil { - return Invalid, false - } - - cause := errors.Cause(err) - cerr, ok := cause.(CodeError) - if !ok { - return Invalid, false - } - return cerr.Code(), true -} - -// NewCodeError creates a base CodeError definition. -// The provided code should be defined in the code.go in this package. -func NewCodeError(code Code, desc string) CodeError { - return &codeError{ - code: code, - desc: desc, - cause: nil, - } -} - -// codeError is the default implementation of CodeError. -type codeError struct { - code Code - desc string - cause error -} - -func (e *codeError) Error() string { - return fmt.Sprintf("(#%d)%s, cause:%+v", e.code, e.desc, e.cause) -} - -func (e *codeError) Code() Code { - return e.code -} - -func (e *codeError) WithCausef(format string, a ...any) CodeError { - errMsg := fmt.Sprintf(format, a...) - causeWithStack := errors.WithStack(errors.New(errMsg)) - return &codeError{ - code: e.code, - desc: e.desc, - cause: causeWithStack, - } -} - -func (e *codeError) WithCause(cause error) CodeError { - causeWithStack := errors.WithStack(cause) - return &codeError{ - code: e.code, - desc: e.desc, - cause: causeWithStack, - } -} diff --git a/horaemeta/pkg/log/config.go b/horaemeta/pkg/log/config.go deleted file mode 100644 index cc30a2a26d..0000000000 --- a/horaemeta/pkg/log/config.go +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package log - -import ( - "go.uber.org/zap" - "go.uber.org/zap/zapcore" -) - -const ( - DefaultLogLevel = "info" - DefaultLogFile = "stdout" -) - -type Config struct { - Level string `toml:"level" env:"LEVEL"` - File string -} - -// DefaultZapLoggerConfig defines default zap logger configuration. -var DefaultZapLoggerConfig = zap.Config{ - Level: zap.NewAtomicLevelAt(zapcore.InfoLevel), - Development: false, - Sampling: &zap.SamplingConfig{ - Initial: 100, - Thereafter: 100, - }, - Encoding: "console", - EncoderConfig: zapcore.EncoderConfig{ - TimeKey: "ts", - LevelKey: "level", - NameKey: "logger", - CallerKey: "caller", - MessageKey: "msg", - StacktraceKey: "stacktrace", - LineEnding: zapcore.DefaultLineEnding, - EncodeLevel: zapcore.LowercaseLevelEncoder, - EncodeTime: zapcore.ISO8601TimeEncoder, - EncodeDuration: zapcore.StringDurationEncoder, - EncodeCaller: zapcore.ShortCallerEncoder, - }, - OutputPaths: []string{"stdout"}, - ErrorOutputPaths: []string{"stdout"}, -} diff --git a/horaemeta/pkg/log/global.go b/horaemeta/pkg/log/global.go deleted file mode 100644 index 0b4a207bac..0000000000 --- a/horaemeta/pkg/log/global.go +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package log - -import ( - "go.uber.org/zap" - "go.uber.org/zap/zapcore" -) - -func Debug(msg string, fields ...zap.Field) { - globalLogger.WithOptions(zap.AddCallerSkip(1)).Debug(msg, fields...) -} - -func Info(msg string, fields ...zap.Field) { - globalLogger.WithOptions(zap.AddCallerSkip(1)).Info(msg, fields...) -} - -func Warn(msg string, fields ...zap.Field) { - globalLogger.WithOptions(zap.AddCallerSkip(1)).Warn(msg, fields...) -} - -func Error(msg string, fields ...zap.Field) { - globalLogger.WithOptions(zap.AddCallerSkip(1)).Error(msg, fields...) -} - -func Panic(msg string, fields ...zap.Field) { - globalLogger.WithOptions(zap.AddCallerSkip(1)).Panic(msg, fields...) -} - -func Fatal(msg string, fields ...zap.Field) { - globalLogger.Fatal(msg, fields...) -} - -func With(fields ...zap.Field) *zap.Logger { - return globalLogger.With(fields...) -} - -func SetLevel(lvl string) error { - level, err := zapcore.ParseLevel(lvl) - if err != nil { - return err - } - globalLoggerCfg.Level.SetLevel(level) - return nil -} - -func GetLevel() zapcore.Level { - return globalLoggerCfg.Level.Level() -} diff --git a/horaemeta/pkg/log/log.go b/horaemeta/pkg/log/log.go deleted file mode 100644 index c372631b5f..0000000000 --- a/horaemeta/pkg/log/log.go +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package log - -import ( - "fmt" - - "go.uber.org/zap" - "go.uber.org/zap/zapcore" -) - -func init() { - defaultConfig := &Config{ - Level: "info", - File: "stdout", - } - _, err := InitGlobalLogger(defaultConfig) - if err != nil { - fmt.Println("fail to init global logger, err:", err) - } -} - -var ( - globalLogger *zap.Logger - globalLoggerCfg *zap.Config -) - -// InitGlobalLogger initializes the global logger with Config. -func InitGlobalLogger(cfg *Config) (*zap.Logger, error) { - zapCfg := DefaultZapLoggerConfig - - level, err := zapcore.ParseLevel(cfg.Level) - if err != nil { - return nil, err - } - zapCfg.Level.SetLevel(level) - - if len(cfg.File) > 0 { - zapCfg.OutputPaths = []string{cfg.File} - zapCfg.ErrorOutputPaths = []string{cfg.File} - } - - logger, err := zapCfg.Build() - if err != nil { - return nil, err - } - - globalLogger = logger - globalLoggerCfg = &zapCfg - return logger, nil -} - -func GetLogger() *zap.Logger { - return globalLogger -} - -func GetLoggerConfig() *zap.Config { - return globalLoggerCfg -} diff --git a/horaemeta/server/cluster/cluster.go b/horaemeta/server/cluster/cluster.go deleted file mode 100644 index 48ff07abb6..0000000000 --- a/horaemeta/server/cluster/cluster.go +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package cluster - -import ( - "context" - "strings" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/inspector" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/manager" - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -const ( - defaultProcedurePrefixKey = "ProcedureID" - defaultAllocStep = 50 -) - -type Cluster struct { - logger *zap.Logger - metadata *metadata.ClusterMetadata - - procedureFactory *coordinator.Factory - procedureManager procedure.Manager - schedulerManager manager.SchedulerManager - nodeInspector *inspector.NodeInspector -} - -func NewCluster(logger *zap.Logger, metadata *metadata.ClusterMetadata, client *clientv3.Client, rootPath string) (*Cluster, error) { - procedureStorage := procedure.NewEtcdStorageImpl(client, rootPath, uint32(metadata.GetClusterID())) - procedureManager, err := procedure.NewManagerImpl(logger, metadata) - if err != nil { - return nil, errors.WithMessage(err, "create procedure manager") - } - dispatch := eventdispatch.NewDispatchImpl() - - procedureIDRootPath := strings.Join([]string{rootPath, metadata.Name(), defaultProcedurePrefixKey}, "/") - procedureFactory := coordinator.NewFactory(logger, id.NewAllocatorImpl(logger, client, procedureIDRootPath, defaultAllocStep), dispatch, procedureStorage, metadata) - - schedulerManager := manager.NewManager(logger, procedureManager, procedureFactory, metadata, client, rootPath, metadata.GetTopologyType(), metadata.GetProcedureExecutingBatchSize()) - - nodeInspector := inspector.NewNodeInspector(logger, metadata) - - return &Cluster{ - logger: logger, - metadata: metadata, - procedureFactory: procedureFactory, - procedureManager: procedureManager, - schedulerManager: schedulerManager, - nodeInspector: nodeInspector, - }, nil -} - -func (c *Cluster) Start(ctx context.Context) error { - if err := c.procedureManager.Start(ctx); err != nil { - return errors.WithMessage(err, "start procedure manager") - } - if err := c.schedulerManager.Start(ctx); err != nil { - return errors.WithMessage(err, "start scheduler manager") - } - if err := c.nodeInspector.Start(ctx); err != nil { - return errors.WithMessage(err, "start node inspector") - } - return nil -} - -func (c *Cluster) Stop(ctx context.Context) error { - if err := c.procedureManager.Stop(ctx); err != nil { - return errors.WithMessage(err, "stop procedure manager") - } - if err := c.schedulerManager.Stop(ctx); err != nil { - return errors.WithMessage(err, "stop scheduler manager") - } - if err := c.nodeInspector.Stop(ctx); err != nil { - return errors.WithMessage(err, "stop node inspector") - } - return nil -} - -func (c *Cluster) GetMetadata() *metadata.ClusterMetadata { - return c.metadata -} - -func (c *Cluster) GetProcedureManager() procedure.Manager { - return c.procedureManager -} - -func (c *Cluster) GetProcedureFactory() *coordinator.Factory { - return c.procedureFactory -} - -func (c *Cluster) GetSchedulerManager() manager.SchedulerManager { - return c.schedulerManager -} - -func (c *Cluster) GetShards() []storage.ShardID { - return c.metadata.GetShards() -} - -func (c *Cluster) GetShardNodes() metadata.GetShardNodesResult { - return c.metadata.GetShardNodes() -} diff --git a/horaemeta/server/cluster/manager.go b/horaemeta/server/cluster/manager.go deleted file mode 100644 index 648236bdfd..0000000000 --- a/horaemeta/server/cluster/manager.go +++ /dev/null @@ -1,528 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package cluster - -import ( - "context" - "fmt" - "path" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -const ( - AllocClusterIDPrefix = "ClusterID" -) - -type Manager interface { - // Start must be called before manager is used. - Start(ctx context.Context) error - // Stop must be called before manager is dropped. - Stop(ctx context.Context) error - - ListClusters(ctx context.Context) ([]*Cluster, error) - CreateCluster(ctx context.Context, clusterName string, opts metadata.CreateClusterOpts) (*Cluster, error) - UpdateCluster(ctx context.Context, clusterName string, opt metadata.UpdateClusterOpts) error - GetCluster(ctx context.Context, clusterName string) (*Cluster, error) - // AllocSchemaID means get or create schema. - // The second output parameter bool: Returns true if the table was newly created. - AllocSchemaID(ctx context.Context, clusterName, schemaName string) (storage.SchemaID, bool, error) - GetTables(clusterName, schemaName string, tableNames []string) ([]metadata.TableInfo, error) - GetTablesByIDs(clusterName string, tableID []storage.TableID) ([]metadata.TableInfo, error) - GetTablesByShardIDs(clusterName, nodeName string, shardIDs []storage.ShardID) (map[storage.ShardID]metadata.ShardTables, error) - DropTable(ctx context.Context, clusterName, schemaName, tableName string) error - RouteTables(ctx context.Context, clusterName, schemaName string, tableNames []string) (metadata.RouteTablesResult, error) - GetNodeShards(ctx context.Context, clusterName string) (metadata.GetNodeShardsResult, error) - - RegisterNode(ctx context.Context, clusterName string, registeredNode metadata.RegisteredNode) error - GetRegisteredNode(ctx context.Context, clusterName string, node string) (metadata.RegisteredNode, error) - ListRegisteredNodes(ctx context.Context, clusterName string) ([]metadata.RegisteredNode, error) -} - -type managerImpl struct { - // RWMutex is used to protect clusters when creating new cluster. - lock sync.RWMutex - running bool - clusters map[string]*Cluster - - storage storage.Storage - kv clientv3.KV - client *clientv3.Client - alloc id.Allocator - rootPath string - idAllocatorStep uint - - // TODO: topologyType is used to be compatible with cluster data changes and needs to be deleted later. - topologyType storage.TopologyType -} - -func NewManagerImpl(storage storage.Storage, kv clientv3.KV, client *clientv3.Client, rootPath string, idAllocatorStep uint, topologyType storage.TopologyType) (Manager, error) { - alloc := id.NewAllocatorImpl(log.GetLogger(), kv, path.Join(rootPath, AllocClusterIDPrefix), idAllocatorStep) - - manager := &managerImpl{ - lock: sync.RWMutex{}, - running: false, - clusters: map[string]*Cluster{}, - - kv: kv, - storage: storage, - client: client, - alloc: alloc, - rootPath: rootPath, - idAllocatorStep: idAllocatorStep, - topologyType: topologyType, - } - - return manager, nil -} - -func (m *managerImpl) ListClusters(_ context.Context) ([]*Cluster, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - clusters := make([]*Cluster, 0, len(m.clusters)) - for _, cluster := range m.clusters { - clusters = append(clusters, cluster) - } - return clusters, nil -} - -func (m *managerImpl) CreateCluster(ctx context.Context, clusterName string, opts metadata.CreateClusterOpts) (*Cluster, error) { - if opts.NodeCount < 1 { - log.Error("cluster's nodeCount must > 0", zap.String("clusterName", clusterName)) - return nil, metadata.ErrCreateCluster.WithCausef("nodeCount must > 0") - } - - m.lock.Lock() - defer m.lock.Unlock() - - cluster, ok := m.clusters[clusterName] - if ok { - return cluster, metadata.ErrClusterAlreadyExists - } - - clusterID, err := m.allocClusterID(ctx) - if err != nil { - log.Error("fail to alloc cluster id", zap.Error(err), zap.String("clusterName", clusterName)) - return nil, errors.WithMessagef(err, "cluster manager CreateCluster, clusterName:%s", clusterName) - } - - createTime := time.Now().UnixMilli() - clusterMetadataStorage := storage.Cluster{ - ID: clusterID, - Name: clusterName, - MinNodeCount: opts.NodeCount, - ShardTotal: opts.ShardTotal, - TopologyType: opts.TopologyType, - ProcedureExecutingBatchSize: opts.ProcedureExecutingBatchSize, - CreatedAt: uint64(createTime), - ModifiedAt: uint64(createTime), - } - err = m.storage.CreateCluster(ctx, storage.CreateClusterRequest{ - Cluster: clusterMetadataStorage, - }) - if err != nil { - log.Error("fail to create cluster", zap.Error(err), zap.String("clusterName", clusterName)) - return nil, errors.WithMessage(err, "cluster create cluster") - } - - logger := log.With(zap.String("clusterName", clusterName)) - - clusterMetadata := metadata.NewClusterMetadata(logger, clusterMetadataStorage, m.storage, m.kv, m.rootPath, m.idAllocatorStep) - - if err = clusterMetadata.Init(ctx); err != nil { - log.Error("fail to init cluster", zap.Error(err), zap.String("clusterName", clusterName)) - return nil, errors.WithMessage(err, "cluster init") - } - - if err := clusterMetadata.Load(ctx); err != nil { - log.Error("fail to load cluster", zap.Error(err), zap.String("clusterName", clusterName)) - return nil, errors.WithMessage(err, "cluster load") - } - - c, err := NewCluster(logger, clusterMetadata, m.client, m.rootPath) - if err != nil { - return nil, errors.WithMessage(err, "new cluster") - } - m.clusters[clusterName] = c - - if err := c.Start(ctx); err != nil { - return nil, errors.WithMessage(err, "start cluster") - } - - return c, nil -} - -func (m *managerImpl) UpdateCluster(ctx context.Context, clusterName string, opt metadata.UpdateClusterOpts) error { - c, err := m.getCluster(clusterName) - if err != nil { - log.Error("get cluster", zap.Error(err)) - return err - } - - err = m.storage.UpdateCluster(ctx, storage.UpdateClusterRequest{Cluster: storage.Cluster{ - ID: c.GetMetadata().GetClusterID(), - Name: c.GetMetadata().Name(), - MinNodeCount: c.GetMetadata().GetClusterMinNodeCount(), - ShardTotal: c.GetMetadata().GetTotalShardNum(), - TopologyType: opt.TopologyType, - ProcedureExecutingBatchSize: opt.ProcedureExecutingBatchSize, - CreatedAt: c.GetMetadata().GetCreateTime(), - ModifiedAt: uint64(time.Now().UnixMilli()), - }}) - if err != nil { - log.Error("update cluster", zap.Error(err)) - return err - } - - if err := c.GetMetadata().LoadMetadata(ctx); err != nil { - log.Error("fail to load cluster", zap.Error(err), zap.String("clusterName", clusterName)) - return err - } - - return nil -} - -func (m *managerImpl) GetCluster(_ context.Context, clusterName string) (*Cluster, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - cluster, exist := m.clusters[clusterName] - if exist { - return cluster, nil - } - return nil, metadata.ErrClusterNotFound -} - -func (m *managerImpl) AllocSchemaID(ctx context.Context, clusterName, schemaName string) (storage.SchemaID, bool, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return 0, false, errors.WithMessage(err, "get cluster") - } - - // create new schema - schema, exists, err := cluster.metadata.GetOrCreateSchema(ctx, schemaName) - if err != nil { - log.Error("fail to create schema", zap.Error(err)) - return 0, false, errors.WithMessage(err, "get or create schema") - } - return schema.ID, exists, nil -} - -func (m *managerImpl) GetTables(clusterName, schemaName string, tableNames []string) ([]metadata.TableInfo, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return []metadata.TableInfo{}, errors.WithMessage(err, "get cluster") - } - - tables, err := cluster.metadata.GetTables(schemaName, tableNames) - if err != nil { - return []metadata.TableInfo{}, errors.WithMessage(err, "metadata get tables") - } - - tableInfos := make([]metadata.TableInfo, 0, len(tables)) - for _, table := range tables { - tableInfos = append(tableInfos, metadata.TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: schemaName, - CreatedAt: table.CreatedAt, - PartitionInfo: table.PartitionInfo, - }) - } - return tableInfos, nil -} - -func (m *managerImpl) GetTablesByIDs(clusterName string, tableIDs []storage.TableID) ([]metadata.TableInfo, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return []metadata.TableInfo{}, errors.WithMessage(err, "get cluster") - } - - tables := cluster.metadata.GetTablesByIDs(tableIDs) - tableInfos := make([]metadata.TableInfo, 0, len(tables)) - for _, table := range tables { - tableInfos = append(tableInfos, metadata.TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - // FIXME: We need the schema name here. - SchemaName: "", - PartitionInfo: table.PartitionInfo, - CreatedAt: table.CreatedAt, - }) - } - return tableInfos, nil -} - -func (m *managerImpl) GetTablesByShardIDs(clusterName, _ string, shardIDs []storage.ShardID) (map[storage.ShardID]metadata.ShardTables, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return nil, errors.WithMessage(err, "get cluster") - } - - shardTables := cluster.metadata.GetShardTables(shardIDs) - return shardTables, nil -} - -// DropTable is only used for the HTTP interface. -// It only deletes the table data in ETCD and does not initiate a table deletion request to HoraeDB. -func (m *managerImpl) DropTable(ctx context.Context, clusterName, schemaName, tableName string) error { - cluster, err := m.getCluster(clusterName) - if err != nil { - return errors.WithMessage(err, "get cluster") - } - - table, ok, err := cluster.metadata.GetTable(schemaName, tableName) - if !ok { - return metadata.ErrTableNotFound - } - if err != nil { - return errors.WithMessage(err, "get table") - } - - // If the table is partitioned, delete the table metadata directly. - if table.IsPartitioned() { - _, err = cluster.metadata.DropTableMetadata(ctx, schemaName, tableName) - if err != nil { - return errors.WithMessage(err, "cluster drop table metadata") - } - return nil - } - - // If the table is not a partition table, delete the table metadata and remove the table from the shard. - // So we need to check if the table has been assigned to a shard. - getShardNodeResult, err := cluster.metadata.GetShardNodeByTableIDs([]storage.TableID{table.ID}) - if err != nil { - return errors.WithMessage(err, "get shard node by tableID") - } - - if _, ok := getShardNodeResult.ShardNodes[table.ID]; !ok { - return metadata.ErrShardNotFound - } - - if len(getShardNodeResult.ShardNodes[table.ID]) != 1 || len(getShardNodeResult.Version) != 1 { - return metadata.ErrShardNotFound - } - - shardID := getShardNodeResult.ShardNodes[table.ID][0].ID - version, ok := getShardNodeResult.Version[shardID] - if !ok { - return metadata.ErrVersionNotFound - } - - err = cluster.metadata.DropTable(ctx, metadata.DropTableRequest{ - SchemaName: schemaName, - TableName: tableName, - ShardID: shardID, - LatestVersion: version, - }) - if err != nil { - return errors.WithMessage(err, "cluster drop table") - } - - return nil -} - -func (m *managerImpl) RegisterNode(ctx context.Context, clusterName string, registeredNode metadata.RegisteredNode) error { - m.lock.RLock() - defer m.lock.RUnlock() - - if !m.running { - return nil - } - - cluster, err := m.getCluster(clusterName) - if err != nil { - return errors.WithMessage(err, "get cluster") - } - - err = cluster.metadata.RegisterNode(ctx, registeredNode) - - if err != nil { - return errors.WithMessage(err, "cluster register node") - } - - return nil -} - -func (m *managerImpl) GetRegisteredNode(_ context.Context, clusterName string, nodeName string) (metadata.RegisteredNode, error) { - var registeredNode metadata.RegisteredNode - cluster, err := m.getCluster(clusterName) - if err != nil { - log.Error("get cluster", zap.Error(err), zap.String("clusterName", clusterName)) - return registeredNode, errors.WithMessage(err, "get cluster") - } - - registeredNode, ok := cluster.metadata.GetRegisteredNodeByName(nodeName) - if !ok { - return registeredNode, metadata.ErrNodeNotFound.WithCausef("registeredNode is not found, registeredNode:%s, cluster:%s", nodeName, clusterName) - } - - return registeredNode, nil -} - -func (m *managerImpl) ListRegisteredNodes(_ context.Context, clusterName string) ([]metadata.RegisteredNode, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return []metadata.RegisteredNode{}, errors.WithMessage(err, "get cluster") - } - - nodes := cluster.metadata.GetRegisteredNodes() - return nodes, nil -} - -func (m *managerImpl) getCluster(clusterName string) (*Cluster, error) { - m.lock.RLock() - cluster, ok := m.clusters[clusterName] - m.lock.RUnlock() - if !ok { - return nil, metadata.ErrClusterNotFound.WithCausef("cluster name:%s", clusterName) - } - return cluster, nil -} - -func (m *managerImpl) allocClusterID(ctx context.Context) (storage.ClusterID, error) { - ID, err := m.alloc.Alloc(ctx) - if err != nil { - return 0, errors.WithMessagef(err, "alloc cluster id") - } - return storage.ClusterID(ID), nil -} - -func (m *managerImpl) Start(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - if m.running { - log.Warn("cluster manager has already been started") - return nil - } - - clusters, err := m.storage.ListClusters(ctx) - if err != nil { - log.Error("cluster manager fail to start, fail to list clusters", zap.Error(err)) - return errors.WithMessage(err, "cluster manager start") - } - - m.clusters = make(map[string]*Cluster, len(clusters.Clusters)) - for _, metadataStorage := range clusters.Clusters { - logger := log.With(zap.String("clusterName", metadataStorage.Name)) - clusterMetadata := metadata.NewClusterMetadata(logger, metadataStorage, m.storage, m.kv, m.rootPath, m.idAllocatorStep) - if err = clusterMetadata.Load(ctx); err != nil { - log.Error("fail to load cluster", zap.String("cluster", clusterMetadata.Name()), zap.Error(err)) - return errors.WithMessage(err, "fail to load cluster") - } - - // TODO: topologyType is used to be compatible with cluster data changes and needs to be deleted later - if clusterMetadata.GetStorageMetadata().TopologyType == storage.TopologyTypeUnknown { - req := storage.UpdateClusterRequest{ - Cluster: storage.Cluster{ - ID: metadataStorage.ID, - Name: metadataStorage.Name, - MinNodeCount: metadataStorage.MinNodeCount, - ShardTotal: metadataStorage.ShardTotal, - TopologyType: m.topologyType, - ProcedureExecutingBatchSize: metadataStorage.ProcedureExecutingBatchSize, - CreatedAt: metadataStorage.CreatedAt, - ModifiedAt: uint64(time.Now().UnixMilli()), - }, - } - if err := m.storage.UpdateCluster(ctx, req); err != nil { - return errors.WithMessagef(err, "update cluster topology type failed, clusterName:%s", clusterMetadata.Name()) - } - log.Info("update cluster topology type successfully", zap.String("request", fmt.Sprintf("%v", req))) - if err := clusterMetadata.LoadMetadata(ctx); err != nil { - log.Error("fail to load cluster", zap.String("clusterName", clusterMetadata.Name()), zap.Error(err)) - return err - } - } - - log.Info("open cluster successfully", zap.String("cluster", clusterMetadata.Name())) - c, err := NewCluster(logger, clusterMetadata, m.client, m.rootPath) - if err != nil { - return errors.WithMessage(err, "new cluster") - } - m.clusters[clusterMetadata.Name()] = c - if err := c.Start(ctx); err != nil { - return errors.WithMessage(err, "start cluster") - } - } - - m.running = true - - return nil -} - -func (m *managerImpl) Stop(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - if !m.running { - return nil - } - - for _, cluster := range m.clusters { - if err := cluster.Stop(ctx); err != nil { - return errors.WithMessage(err, "stop cluster") - } - } - - m.clusters = make(map[string]*Cluster) - m.running = false - return nil -} - -func (m *managerImpl) RouteTables(ctx context.Context, clusterName, schemaName string, tableNames []string) (metadata.RouteTablesResult, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return metadata.RouteTablesResult{}, errors.WithMessage(err, "get cluster") - } - - ret, err := cluster.metadata.RouteTables(ctx, schemaName, tableNames) - if err != nil { - return metadata.RouteTablesResult{}, errors.WithMessage(err, "cluster route tables") - } - - return ret, nil -} - -func (m *managerImpl) GetNodeShards(ctx context.Context, clusterName string) (metadata.GetNodeShardsResult, error) { - cluster, err := m.getCluster(clusterName) - if err != nil { - return metadata.GetNodeShardsResult{}, errors.WithMessage(err, "get cluster") - } - - ret, err := cluster.metadata.GetNodeShards(ctx) - if err != nil { - return metadata.GetNodeShardsResult{}, errors.WithMessage(err, "cluster get NodeShards") - } - - return ret, nil -} diff --git a/horaemeta/server/cluster/manager_test.go b/horaemeta/server/cluster/manager_test.go deleted file mode 100644 index 779e1cac62..0000000000 --- a/horaemeta/server/cluster/manager_test.go +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package cluster_test - -import ( - "context" - "crypto/rand" - "fmt" - "math/big" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - clientv3 "go.etcd.io/etcd/client/v3" -) - -const ( - defaultTimeout = time.Second * 20 - cluster1 = "testCluster1" - defaultSchema = "testSchema" - defaultNodeCount = 2 - defaultShardTotal = 8 - defaultProcedureExecutingBatchSize = 100 - defaultTopologyType = storage.TopologyTypeStatic - node1 = "127.0.0.1:8081" - node2 = "127.0.0.2:8081" - defaultSchemaID = 0 - testRootPath = "/rootPath" - defaultIDAllocatorStep = 20 -) - -func newTestStorage(t *testing.T) (storage.Storage, clientv3.KV, *clientv3.Client, etcdutil.CloseFn) { - _, client, closeSrv := etcdutil.PrepareEtcdServerAndClient(t) - storage := storage.NewStorageWithEtcdBackend(client, testRootPath, storage.Options{ - MaxScanLimit: 100, MinScanLimit: 10, MaxOpsPerTxn: 32, - }) - return storage, client, client, closeSrv -} - -func newClusterManagerWithStorage(storage storage.Storage, kv clientv3.KV, client *clientv3.Client) (cluster.Manager, error) { - return cluster.NewManagerImpl(storage, kv, client, testRootPath, defaultIDAllocatorStep, defaultTopologyType) -} - -func TestClusterManager(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) - defer cancel() - - s, kv, client, closeSrv := newTestStorage(t) - defer closeSrv() - manager, err := newClusterManagerWithStorage(s, kv, client) - re.NoError(err) - - re.NoError(manager.Start(ctx)) - - testCreateCluster(ctx, re, manager, cluster1) - - testRegisterNode(ctx, re, manager, cluster1, node1) - testRegisterNode(ctx, re, manager, cluster1, node2) - - testInitShardView(ctx, re, manager, cluster1) - - testGetNodeAndShard(ctx, re, manager, cluster1) - - testGetTables(re, manager, node1, cluster1, 0) - - testAllocSchemaID(ctx, re, manager, cluster1, defaultSchema, defaultSchemaID) - testAllocSchemaID(ctx, re, manager, cluster1, defaultSchema, defaultSchemaID) - - var testTableNames []string - for i := uint64(0); i < 5; i++ { - testTableName := fmt.Sprintf("testTable%d", i) - testTableNames = append(testTableNames, testTableName) - testCreateTable(ctx, re, manager, cluster1, defaultSchema, testTableName, storage.ShardID(i)) - } - - testRouteTables(ctx, re, manager, cluster1, defaultSchema, testTableNames) - - for _, tableName := range testTableNames { - testDropTable(ctx, re, manager, cluster1, defaultSchema, tableName) - } - - re.NoError(manager.Stop(ctx)) -} - -func testGetNodeAndShard(ctx context.Context, re *require.Assertions, manager cluster.Manager, clusterName string) { - c, err := manager.GetCluster(ctx, clusterName) - re.NoError(err) - - nodes, err := manager.ListRegisteredNodes(ctx, cluster1) - re.NoError(err) - re.Equal(2, len(nodes)) - - node, err := manager.GetRegisteredNode(ctx, cluster1, node1) - re.NoError(err) - re.Equal(node1, node.Node.Name) - - nodShards, err := manager.GetNodeShards(ctx, cluster1) - re.NoError(err) - re.Equal(int(c.GetMetadata().GetTotalShardNum()), len(nodShards.NodeShards)) -} - -func testInitShardView(ctx context.Context, re *require.Assertions, manager cluster.Manager, clusterName string) { - c, err := manager.GetCluster(ctx, clusterName) - re.NoError(err) - snapshot := c.GetMetadata().GetClusterSnapshot() - shardNodes := make([]storage.ShardNode, 0, c.GetMetadata().GetTotalShardNum()) - for _, shardView := range snapshot.Topology.ShardViewsMapping { - selectNodeIdx, err := rand.Int(rand.Reader, big.NewInt(int64(len(snapshot.RegisteredNodes)))) - re.NoError(err) - shardNodes = append(shardNodes, storage.ShardNode{ - ID: shardView.ShardID, - ShardRole: storage.ShardRoleLeader, - NodeName: snapshot.RegisteredNodes[selectNodeIdx.Int64()].Node.Name, - }) - } - err = c.GetMetadata().UpdateClusterView(ctx, storage.ClusterStateStable, shardNodes) - re.NoError(err) -} - -func testCreateCluster(ctx context.Context, re *require.Assertions, manager cluster.Manager, clusterName string) { - _, err := manager.CreateCluster(ctx, clusterName, metadata.CreateClusterOpts{ - NodeCount: defaultNodeCount, - EnableSchedule: false, - ShardTotal: defaultShardTotal, - TopologyType: defaultTopologyType, - ProcedureExecutingBatchSize: defaultProcedureExecutingBatchSize, - }) - re.NoError(err) -} - -func testRegisterNode(ctx context.Context, re *require.Assertions, manager cluster.Manager, - clusterName, nodeName string, -) { - node := metadata.RegisteredNode{ - Node: storage.Node{ - Name: nodeName, - LastTouchTime: uint64(time.Now().UnixMilli()), - State: storage.NodeStateOnline, - NodeStats: storage.NewEmptyNodeStats(), - }, ShardInfos: []metadata.ShardInfo{}, - } - err := manager.RegisterNode(ctx, clusterName, node) - re.NoError(err) -} - -func testAllocSchemaID(ctx context.Context, re *require.Assertions, manager cluster.Manager, - cluster, schema string, schemaID uint32, -) { - id, _, err := manager.AllocSchemaID(ctx, cluster, schema) - re.NoError(err) - re.Equal(storage.SchemaID(schemaID), id) -} - -func testCreateTable(ctx context.Context, re *require.Assertions, manager cluster.Manager, - clusterName, schema, tableName string, shardID storage.ShardID, -) { - c, err := manager.GetCluster(ctx, clusterName) - re.NoError(err) - _, err = c.GetMetadata().CreateTable(ctx, metadata.CreateTableRequest{ - ShardID: shardID, - LatestVersion: 0, - SchemaName: schema, - TableName: tableName, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }) - re.NoError(err) -} - -func testGetTables(re *require.Assertions, manager cluster.Manager, node, cluster string, num int) { - shardIDs := make([]storage.ShardID, 0, defaultShardTotal) - for i := 0; i < defaultShardTotal; i++ { - shardIDs = append(shardIDs, storage.ShardID(i)) - } - shardTables, err := manager.GetTablesByShardIDs(cluster, node, shardIDs) - re.NoError(err) - re.Equal(defaultShardTotal, len(shardTables)) - - tableNum := 0 - for _, tables := range shardTables { - re.Equal(storage.ShardRoleLeader, tables.Shard.Role) - tableNum += len(tables.Tables) - } - re.Equal(num, tableNum) -} - -func testRouteTables(ctx context.Context, re *require.Assertions, manager cluster.Manager, cluster, schema string, tableNames []string) { - ret, err := manager.RouteTables(ctx, cluster, schema, tableNames) - re.NoError(err) - re.Equal(len(tableNames), len(ret.RouteEntries)) - for _, entry := range ret.RouteEntries { - re.Equal(1, len(entry.NodeShards)) - re.Equal(storage.ShardRoleLeader, entry.NodeShards[0].ShardNode.ShardRole) - } -} - -func testDropTable(ctx context.Context, re *require.Assertions, manager cluster.Manager, clusterName string, schemaName string, tableName string) { - err := manager.DropTable(ctx, clusterName, schemaName, tableName) - re.NoError(err) -} diff --git a/horaemeta/server/cluster/metadata/cluster_metadata.go b/horaemeta/server/cluster/metadata/cluster_metadata.go deleted file mode 100644 index d68b4d9b36..0000000000 --- a/horaemeta/server/cluster/metadata/cluster_metadata.go +++ /dev/null @@ -1,815 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata - -import ( - "context" - "crypto/rand" - "fmt" - "math/big" - "path" - "sort" - "sync" - - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -const ( - AllocSchemaIDPrefix = "SchemaID" - AllocTableIDPrefix = "TableID" -) - -type ClusterMetadata struct { - logger *zap.Logger - clusterID storage.ClusterID - - // RWMutex is used to protect following fields. - // TODO: Encapsulated maps as a specific struct. - lock sync.RWMutex - metaData storage.Cluster - - tableManager TableManager - topologyManager TopologyManager - - // Manage the registered nodes from heartbeat. - registeredNodesCache map[string]RegisteredNode // nodeName -> NodeName - - storage storage.Storage - kv clientv3.KV - shardIDAlloc id.Allocator -} - -func NewClusterMetadata(logger *zap.Logger, meta storage.Cluster, storage storage.Storage, kv clientv3.KV, rootPath string, idAllocatorStep uint) *ClusterMetadata { - schemaIDAlloc := id.NewAllocatorImpl(logger, kv, path.Join(rootPath, meta.Name, AllocSchemaIDPrefix), idAllocatorStep) - tableIDAlloc := id.NewAllocatorImpl(logger, kv, path.Join(rootPath, meta.Name, AllocTableIDPrefix), idAllocatorStep) - // FIXME: Load ShardTopology when cluster create, pass exist ShardID to allocator. - shardIDAlloc := id.NewReusableAllocatorImpl([]uint64{}, MinShardID) - - cluster := &ClusterMetadata{ - logger: logger, - clusterID: meta.ID, - lock: sync.RWMutex{}, - metaData: meta, - tableManager: NewTableManagerImpl(logger, storage, meta.ID, schemaIDAlloc, tableIDAlloc), - topologyManager: NewTopologyManagerImpl(logger, storage, meta.ID, shardIDAlloc), - registeredNodesCache: map[string]RegisteredNode{}, - storage: storage, - kv: kv, - shardIDAlloc: shardIDAlloc, - } - - return cluster -} - -// Initialize the cluster view and shard view of the cluster. -// It will be used when we create the cluster. -func (c *ClusterMetadata) Init(ctx context.Context) error { - c.lock.Lock() - defer c.lock.Unlock() - - createShardViews := make([]CreateShardView, 0, c.metaData.ShardTotal) - for i := uint32(0); i < c.metaData.ShardTotal; i++ { - shardID, err := c.AllocShardID(ctx) - if err != nil { - return errors.WithMessage(err, "alloc shard id failed") - } - createShardViews = append(createShardViews, CreateShardView{ - ShardID: storage.ShardID(shardID), - Tables: []storage.TableID{}, - }) - } - if err := c.topologyManager.CreateShardViews(ctx, createShardViews); err != nil { - return errors.WithMessage(err, "create shard view") - } - - return c.topologyManager.InitClusterView(ctx) -} - -// Load cluster NodeName from storage into memory. -func (c *ClusterMetadata) Load(ctx context.Context) error { - c.lock.Lock() - defer c.lock.Unlock() - - if err := c.tableManager.Load(ctx); err != nil { - return errors.WithMessage(err, "load table manager") - } - - schemas := c.tableManager.GetSchemas() - if err := c.topologyManager.Load(ctx, schemas); err != nil { - return errors.WithMessage(err, "load topology manager") - } - - return nil -} - -func (c *ClusterMetadata) GetClusterID() storage.ClusterID { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.clusterID -} - -func (c *ClusterMetadata) Name() string { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData.Name -} - -func (c *ClusterMetadata) GetShardTables(shardIDs []storage.ShardID) map[storage.ShardID]ShardTables { - shardTableIDs := c.topologyManager.GetTableIDs(shardIDs) - - result := make(map[storage.ShardID]ShardTables, len(shardIDs)) - - schemas := c.tableManager.GetSchemas() - schemaByID := make(map[storage.SchemaID]storage.Schema) - for _, schema := range schemas { - schemaByID[schema.ID] = schema - } - - for shardID, shardTableID := range shardTableIDs { - tables := c.tableManager.GetTablesByIDs(shardTableID.TableIDs) - tableInfos := make([]TableInfo, 0, len(tables)) - for _, table := range tables { - schema, ok := schemaByID[table.SchemaID] - if !ok { - c.logger.Warn("schema not exits", zap.Uint64("schemaID", uint64(table.SchemaID))) - } - tableInfos = append(tableInfos, TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: schema.Name, - PartitionInfo: table.PartitionInfo, - CreatedAt: table.CreatedAt, - }) - } - result[shardID] = ShardTables{ - Shard: ShardInfo{ - ID: shardID, - Role: storage.ShardRoleLeader, - Version: shardTableID.Version, - Status: storage.ShardStatusUnknown, - }, - Tables: tableInfos, - } - } - - for _, shardID := range shardIDs { - _, exists := result[shardID] - if !exists { - result[shardID] = ShardTables{ - Shard: ShardInfo{ - ID: shardID, - Role: storage.ShardRoleLeader, - Version: 0, - Status: storage.ShardStatusUnknown, - }, - Tables: []TableInfo{}, - } - } - } - return result -} - -// DropTable will drop table metadata and all mapping of this table. -// If the table to be dropped has been opened multiple times, all its mapping will be dropped. -func (c *ClusterMetadata) DropTable(ctx context.Context, request DropTableRequest) error { - c.logger.Info("drop table start", zap.String("cluster", c.Name()), zap.String("schemaName", request.SchemaName), zap.String("tableName", request.TableName)) - - if !c.ensureClusterStable() { - return errors.WithMessage(ErrClusterStateInvalid, "invalid cluster state, cluster state must be stable") - } - - table, ok, err := c.tableManager.GetTable(request.SchemaName, request.TableName) - if err != nil { - return errors.WithMessage(err, "get table") - } - - if !ok { - return ErrTableNotFound - } - - // Drop table. - err = c.tableManager.DropTable(ctx, request.SchemaName, request.TableName) - if err != nil { - return errors.WithMessage(err, "table manager drop table") - } - - // Remove dropped table in shard view. - err = c.topologyManager.RemoveTable(ctx, request.ShardID, request.LatestVersion, []storage.TableID{table.ID}) - if err != nil { - return errors.WithMessage(err, "topology manager remove table") - } - - c.logger.Info("drop table success", zap.String("cluster", c.Name()), zap.String("schemaName", request.SchemaName), zap.String("tableName", request.TableName)) - - return nil -} - -// MigrateTable used to migrate tables from old shard to new shard. -// The mapping relationship between table and shard will be modified. -func (c *ClusterMetadata) MigrateTable(ctx context.Context, request MigrateTableRequest) error { - c.logger.Info("migrate table", zap.String("request", fmt.Sprintf("%v", request))) - - if !c.ensureClusterStable() { - return errors.WithMessage(ErrClusterStateInvalid, "invalid cluster state, cluster state must be stable") - } - - tables := make([]storage.Table, 0, len(request.TableNames)) - tableIDs := make([]storage.TableID, 0, len(request.TableNames)) - - for _, tableName := range request.TableNames { - table, exists, err := c.tableManager.GetTable(request.SchemaName, tableName) - if err != nil { - c.logger.Error("get table", zap.Error(err), zap.String("schemaName", request.SchemaName), zap.String("tableName", tableName)) - return err - } - - if !exists { - c.logger.Error("the table to be closed does not exist", zap.String("schemaName", request.SchemaName), zap.String("tableName", tableName)) - return errors.WithMessagef(ErrTableNotFound, "table not exists, schemaName:%s, tableName:%s", request.SchemaName, tableName) - } - - tables = append(tables, table) - tableIDs = append(tableIDs, table.ID) - } - - if err := c.topologyManager.RemoveTable(ctx, request.OldShardID, request.latestOldShardVersion, tableIDs); err != nil { - c.logger.Error("remove table from topology") - return err - } - - if err := c.topologyManager.AddTable(ctx, request.NewShardID, request.latestNewShardVersion, tables); err != nil { - c.logger.Error("add table from topology") - return err - } - - c.logger.Info("migrate table finish", zap.String("request", fmt.Sprintf("%v", request))) - return nil -} - -// GetOrCreateSchema the second output parameter bool: returns true if the schema was newly created. -func (c *ClusterMetadata) GetOrCreateSchema(ctx context.Context, schemaName string) (storage.Schema, bool, error) { - return c.tableManager.GetOrCreateSchema(ctx, schemaName) -} - -// GetTable the second output parameter bool: returns true if the table exists. -func (c *ClusterMetadata) GetTable(schemaName, tableName string) (storage.Table, bool, error) { - return c.tableManager.GetTable(schemaName, tableName) -} - -// GetTableShard get the shard where the table actually exists. -func (c *ClusterMetadata) GetTableShard(ctx context.Context, table storage.Table) (storage.ShardID, bool) { - return c.topologyManager.GetTableShardID(ctx, table) -} - -func (c *ClusterMetadata) CreateTableMetadata(ctx context.Context, request CreateTableMetadataRequest) (CreateTableMetadataResult, error) { - c.logger.Info("create table start", zap.String("cluster", c.Name()), zap.String("schemaName", request.SchemaName), zap.String("tableName", request.TableName)) - - if !c.ensureClusterStable() { - return CreateTableMetadataResult{}, errors.WithMessage(ErrClusterStateInvalid, "invalid cluster state, cluster state must be stable") - } - - _, exists, err := c.tableManager.GetTable(request.SchemaName, request.TableName) - if err != nil { - return CreateTableMetadataResult{}, err - } - - if exists { - return CreateTableMetadataResult{}, errors.WithMessagef(ErrTableAlreadyExists, "tableName:%s", request.TableName) - } - - // Create table in table manager. - table, err := c.tableManager.CreateTable(ctx, request.SchemaName, request.TableName, request.PartitionInfo) - if err != nil { - return CreateTableMetadataResult{}, errors.WithMessage(err, "table manager create table") - } - - res := CreateTableMetadataResult{ - Table: table, - } - - c.logger.Info("create table metadata succeed", zap.String("cluster", c.Name()), zap.String("result", fmt.Sprintf("%+v", res))) - return res, nil -} - -func (c *ClusterMetadata) AddTableTopology(ctx context.Context, shardVersionUpdate ShardVersionUpdate, table storage.Table) error { - c.logger.Info("add table topology start", zap.String("cluster", c.Name()), zap.String("tableName", table.Name)) - - if !c.ensureClusterStable() { - return errors.WithMessage(ErrClusterStateInvalid, "invalid cluster state, cluster state must be stable") - } - - // Add table to topology manager. - err := c.topologyManager.AddTable(ctx, shardVersionUpdate.ShardID, shardVersionUpdate.LatestVersion, []storage.Table{table}) - if err != nil { - return errors.WithMessage(err, "topology manager add table") - } - - c.logger.Info("add table topology succeed", zap.String("cluster", c.Name()), zap.String("table", fmt.Sprintf("%+v", table)), zap.String("shardVersionUpdate", fmt.Sprintf("%+v", shardVersionUpdate))) - return nil -} - -func (c *ClusterMetadata) DropTableMetadata(ctx context.Context, schemaName, tableName string) (DropTableMetadataResult, error) { - c.logger.Info("drop table start", zap.String("cluster", c.Name()), zap.String("schemaName", schemaName), zap.String("tableName", tableName)) - - var dropRes DropTableMetadataResult - if !c.ensureClusterStable() { - return dropRes, errors.WithMessage(ErrClusterStateInvalid, "invalid cluster state, cluster state must be stable") - } - - table, ok, err := c.tableManager.GetTable(schemaName, tableName) - if err != nil { - return dropRes, errors.WithMessage(err, "get table") - } - - if !ok { - return dropRes, ErrTableNotFound - } - - err = c.tableManager.DropTable(ctx, schemaName, tableName) - if err != nil { - return dropRes, errors.WithMessage(err, "table manager drop table") - } - - c.logger.Info("drop table metadata success", zap.String("cluster", c.Name()), zap.String("schemaName", schemaName), zap.String("tableName", tableName), zap.String("result", fmt.Sprintf("%+v", table))) - dropRes = DropTableMetadataResult{Table: table} - return dropRes, nil -} - -func (c *ClusterMetadata) CreateTable(ctx context.Context, request CreateTableRequest) (CreateTableResult, error) { - c.logger.Info("create table start", zap.String("cluster", c.Name()), zap.String("schemaName", request.SchemaName), zap.String("tableName", request.TableName)) - - if !c.ensureClusterStable() { - return CreateTableResult{}, errors.WithMessage(ErrClusterStateInvalid, "invalid cluster state, cluster state must be stable") - } - - _, exists, err := c.tableManager.GetTable(request.SchemaName, request.TableName) - if err != nil { - return CreateTableResult{}, err - } - - if exists { - return CreateTableResult{}, errors.WithMessagef(ErrTableAlreadyExists, "tableName:%s", request.TableName) - } - - // Create table in table manager. - table, err := c.tableManager.CreateTable(ctx, request.SchemaName, request.TableName, request.PartitionInfo) - if err != nil { - return CreateTableResult{}, errors.WithMessage(err, "table manager create table") - } - - // Add table to topology manager. - err = c.topologyManager.AddTable(ctx, request.ShardID, request.LatestVersion, []storage.Table{table}) - if err != nil { - return CreateTableResult{}, errors.WithMessage(err, "topology manager add table") - } - - ret := CreateTableResult{ - Table: table, - ShardVersionUpdate: ShardVersionUpdate{ - ShardID: request.ShardID, - LatestVersion: request.LatestVersion, - }, - } - c.logger.Info("create table succeed", zap.String("cluster", c.Name()), zap.String("result", fmt.Sprintf("%+v", ret))) - return ret, nil -} - -func (c *ClusterMetadata) GetTableAssignedShard(ctx context.Context, schemaName string, tableName string) (storage.ShardID, bool, error) { - schema, exists := c.tableManager.GetSchema(schemaName) - if !exists { - return 0, false, errors.WithMessagef(ErrSchemaNotFound, "schema %s not found", schemaName) - } - shardIDs, exists := c.topologyManager.GetTableAssignedShard(ctx, schema.ID, tableName) - return shardIDs, exists, nil -} - -func (c *ClusterMetadata) AssignTableToShard(ctx context.Context, schemaName string, tableName string, shardID storage.ShardID) error { - schema, exists := c.tableManager.GetSchema(schemaName) - if !exists { - return errors.WithMessagef(ErrSchemaNotFound, "schema %s not found", schemaName) - } - return c.topologyManager.AssignTableToShard(ctx, schema.ID, tableName, shardID) -} - -func (c *ClusterMetadata) DeleteTableAssignedShard(ctx context.Context, schemaName string, tableName string) error { - schema, exists := c.tableManager.GetSchema(schemaName) - if !exists { - return errors.WithMessagef(ErrSchemaNotFound, "schema %s not found", schemaName) - } - return c.topologyManager.DeleteTableAssignedShard(ctx, schema.ID, tableName) -} - -func (c *ClusterMetadata) GetShards() []storage.ShardID { - return c.topologyManager.GetShards() -} - -func (c *ClusterMetadata) GetShardNodesByShardID(id storage.ShardID) ([]storage.ShardNode, error) { - return c.topologyManager.GetShardNodesByID(id) -} - -func (c *ClusterMetadata) GetShardNodeByTableIDs(tableIDs []storage.TableID) (GetShardNodesByTableIDsResult, error) { - return c.topologyManager.GetShardNodesByTableIDs(tableIDs) -} - -func (c *ClusterMetadata) RegisterNode(ctx context.Context, registeredNode RegisteredNode) error { - registeredNode.Node.State = storage.NodeStateOnline - err := c.storage.CreateOrUpdateNode(ctx, storage.CreateOrUpdateNodeRequest{ - ClusterID: c.clusterID, - Node: registeredNode.Node, - }) - if err != nil { - return errors.WithMessage(err, "create or update registered node") - } - - c.lock.Lock() - defer c.lock.Unlock() - - // When the number of nodes in the cluster reaches the threshold, modify the cluster status to prepare. - // TODO: Consider the design of the entire cluster state, which may require refactoring. - if uint32(len(c.registeredNodesCache)) >= c.metaData.MinNodeCount && c.topologyManager.GetClusterState() == storage.ClusterStateEmpty { - if err := c.UpdateClusterView(ctx, storage.ClusterStatePrepare, []storage.ShardNode{}); err != nil { - c.logger.Error("update cluster view failed", zap.Error(err)) - } - } - - // Update shard node mapping. - // Check whether to update persistence data. - oldCache, exists := c.registeredNodesCache[registeredNode.Node.Name] - c.registeredNodesCache[registeredNode.Node.Name] = registeredNode - enableUpdateWhenStable := c.metaData.TopologyType == storage.TopologyTypeDynamic - if !enableUpdateWhenStable && c.topologyManager.GetClusterState() == storage.ClusterStateStable { - return nil - } - if exists && !needUpdate(oldCache, registeredNode) { - // Check whether the shard versions need to be corrected. - c.maybeCorrectShardVersion(ctx, registeredNode) - return nil - } - - shardNodes := make(map[string][]storage.ShardNode, 1) - shardNodes[registeredNode.Node.Name] = make([]storage.ShardNode, 0, len(registeredNode.ShardInfos)) - for _, shardInfo := range registeredNode.ShardInfos { - shardNodes[registeredNode.Node.Name] = append(shardNodes[registeredNode.Node.Name], storage.ShardNode{ - ID: shardInfo.ID, - ShardRole: shardInfo.Role, - NodeName: registeredNode.Node.Name, - }) - } - - if err := c.UpdateClusterViewByNode(ctx, shardNodes); err != nil { - return errors.WithMessage(err, "update cluster view failed") - } - - return nil -} - -func (c *ClusterMetadata) GetRegisteredNodes() []RegisteredNode { - c.lock.RLock() - defer c.lock.RUnlock() - - nodes := make([]RegisteredNode, 0, len(c.registeredNodesCache)) - for _, node := range c.registeredNodesCache { - nodes = append(nodes, node) - } - return nodes -} - -func (c *ClusterMetadata) GetRegisteredNodeByName(nodeName string) (RegisteredNode, bool) { - c.lock.RLock() - defer c.lock.RUnlock() - - registeredNode, ok := c.registeredNodesCache[nodeName] - return registeredNode, ok -} - -func (c *ClusterMetadata) AllocShardID(ctx context.Context) (uint32, error) { - id, err := c.shardIDAlloc.Alloc(ctx) - if err != nil { - return 0, errors.WithMessage(err, "alloc shard id") - } - return uint32(id), nil -} - -func (c *ClusterMetadata) RouteTables(_ context.Context, schemaName string, tableNames []string) (RouteTablesResult, error) { - routeEntries := make(map[string]RouteEntry, len(tableNames)) - tables := make(map[storage.TableID]storage.Table, len(tableNames)) - tableIDs := make([]storage.TableID, 0, len(tableNames)) - for _, tableName := range tableNames { - table, exists, err := c.tableManager.GetTable(schemaName, tableName) - if err != nil { - return RouteTablesResult{}, errors.WithMessage(err, "table manager get table") - } - if !exists { - continue - } - - // TODO: Adapt to the current implementation of the partition table, which may need to be reconstructed later. - if !table.IsPartitioned() { - tables[table.ID] = table - tableIDs = append(tableIDs, table.ID) - } else { - routeEntries[table.Name] = RouteEntry{ - Table: TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: schemaName, - PartitionInfo: table.PartitionInfo, - CreatedAt: table.CreatedAt, - }, - NodeShards: nil, - } - } - } - - tableShardNodesWithShardViewVersion, err := c.topologyManager.GetShardNodesByTableIDs(tableIDs) - if err != nil { - return RouteTablesResult{}, errors.WithMessage(err, "topology get shard nodes by table ids") - } - for tableID, value := range tableShardNodesWithShardViewVersion.ShardNodes { - nodeShards := make([]ShardNodeWithVersion, 0, len(value)) - for _, shardNode := range value { - nodeShards = append(nodeShards, ShardNodeWithVersion{ - ShardInfo: ShardInfo{ - ID: shardNode.ID, - Role: shardNode.ShardRole, - Version: tableShardNodesWithShardViewVersion.Version[shardNode.ID], - Status: storage.ShardStatusUnknown, - }, - ShardNode: shardNode, - }) - } - // If nodeShards length bigger than 1, randomly select a nodeShard. - nodeShardsResult := nodeShards - if len(nodeShards) > 1 { - selectIndex, err2 := rand.Int(rand.Reader, big.NewInt(int64(len(nodeShards)))) - if err2 != nil { - return RouteTablesResult{}, errors.WithMessage(err2, "generate random node index") - } - nodeShardsResult = []ShardNodeWithVersion{nodeShards[selectIndex.Uint64()]} - } - table := tables[tableID] - routeEntries[table.Name] = RouteEntry{ - Table: TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: schemaName, - PartitionInfo: table.PartitionInfo, - CreatedAt: table.CreatedAt, - }, - NodeShards: nodeShardsResult, - } - } - return RouteTablesResult{ - ClusterViewVersion: c.topologyManager.GetVersion(), - RouteEntries: routeEntries, - }, nil -} - -func (c *ClusterMetadata) GetNodeShards(_ context.Context) (GetNodeShardsResult, error) { - getNodeShardsResult := c.topologyManager.GetShardNodes() - - shardNodesWithVersion := make([]ShardNodeWithVersion, 0, len(getNodeShardsResult.ShardNodes)) - - for _, shardNode := range getNodeShardsResult.ShardNodes { - shardNodesWithVersion = append(shardNodesWithVersion, ShardNodeWithVersion{ - ShardInfo: ShardInfo{ - ID: shardNode.ID, - Role: shardNode.ShardRole, - Version: getNodeShardsResult.Versions[shardNode.ID], - Status: storage.ShardStatusUnknown, - }, - ShardNode: shardNode, - }) - } - - return GetNodeShardsResult{ - ClusterTopologyVersion: c.topologyManager.GetVersion(), - NodeShards: shardNodesWithVersion, - }, nil -} - -func (c *ClusterMetadata) GetClusterViewVersion() uint64 { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.topologyManager.GetVersion() -} - -func (c *ClusterMetadata) GetClusterMinNodeCount() uint32 { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData.MinNodeCount -} - -func (c *ClusterMetadata) GetTotalShardNum() uint32 { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData.ShardTotal -} - -func (c *ClusterMetadata) GetTopologyType() storage.TopologyType { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData.TopologyType -} - -func (c *ClusterMetadata) GetProcedureExecutingBatchSize() uint32 { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData.ProcedureExecutingBatchSize -} - -func (c *ClusterMetadata) GetCreateTime() uint64 { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData.CreatedAt -} - -func (c *ClusterMetadata) GetClusterState() storage.ClusterState { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.topologyManager.GetClusterState() -} - -func (c *ClusterMetadata) ensureClusterStable() bool { - return c.GetClusterState() == storage.ClusterStateStable -} - -func (c *ClusterMetadata) GetClusterView() storage.ClusterView { - return c.topologyManager.GetClusterView() -} - -func (c *ClusterMetadata) UpdateClusterView(ctx context.Context, state storage.ClusterState, shardNodes []storage.ShardNode) error { - if err := c.topologyManager.UpdateClusterView(ctx, state, shardNodes); err != nil { - return errors.WithMessage(err, "update cluster view") - } - return nil -} - -func (c *ClusterMetadata) UpdateClusterViewByNode(ctx context.Context, shardNodes map[string][]storage.ShardNode) error { - if err := c.topologyManager.UpdateClusterViewByNode(ctx, shardNodes); err != nil { - return errors.WithMessage(err, "update cluster view") - } - return nil -} - -func (c *ClusterMetadata) DropShardNodes(ctx context.Context, shardNodes []storage.ShardNode) error { - if err := c.topologyManager.DropShardNodes(ctx, shardNodes); err != nil { - return errors.WithMessage(err, "drop shard nodes") - } - return nil -} - -func (c *ClusterMetadata) CreateShardViews(ctx context.Context, views []CreateShardView) error { - if err := c.topologyManager.CreateShardViews(ctx, views); err != nil { - return errors.WithMessage(err, "topology manager create shard views") - } - - return nil -} - -func (c *ClusterMetadata) GetClusterSnapshot() Snapshot { - return Snapshot{ - Topology: c.topologyManager.GetTopology(), - RegisteredNodes: c.GetRegisteredNodes(), - } -} - -func (c *ClusterMetadata) GetStorageMetadata() storage.Cluster { - c.lock.RLock() - defer c.lock.RUnlock() - - return c.metaData -} - -// LoadMetadata load cluster metadata from storage. -func (c *ClusterMetadata) LoadMetadata(ctx context.Context) error { - c.lock.Lock() - defer c.lock.Unlock() - - metadata, err := c.storage.GetCluster(ctx, c.clusterID) - if err != nil { - return errors.WithMessage(err, "get cluster") - } - c.metaData = metadata - return nil -} - -func (c *ClusterMetadata) GetShardNodes() GetShardNodesResult { - return c.topologyManager.GetShardNodes() -} - -func (c *ClusterMetadata) GetTables(schemaName string, tableNames []string) ([]storage.Table, error) { - return c.tableManager.GetTables(schemaName, tableNames) -} - -func (c *ClusterMetadata) GetTablesByIDs(tableIDs []storage.TableID) []storage.Table { - return c.tableManager.GetTablesByIDs(tableIDs) -} - -func needUpdate(oldCache RegisteredNode, registeredNode RegisteredNode) bool { - if len(oldCache.ShardInfos) >= 50 { - return !sortCompare(oldCache.ShardInfos, registeredNode.ShardInfos) - } - return !simpleCompare(oldCache.ShardInfos, registeredNode.ShardInfos) -} - -// sortCompare compare if they are the same by sorted slice, return true when they are the same. -func sortCompare(oldShardInfos, newShardInfos []ShardInfo) bool { - if len(oldShardInfos) != len(newShardInfos) { - return false - } - oldShardIDs := make([]storage.ShardID, 0, len(oldShardInfos)) - for i := 0; i < len(oldShardInfos); i++ { - oldShardIDs = append(oldShardIDs, oldShardInfos[i].ID) - } - sort.Slice(oldShardIDs, func(i, j int) bool { - return oldShardIDs[i] < oldShardIDs[j] - }) - curShardIDs := make([]storage.ShardID, 0, len(newShardInfos)) - for i := 0; i < len(newShardInfos); i++ { - curShardIDs = append(curShardIDs, newShardInfos[i].ID) - } - sort.Slice(curShardIDs, func(i, j int) bool { - return curShardIDs[i] < curShardIDs[j] - }) - for i := 0; i < len(curShardIDs); i++ { - if curShardIDs[i] != oldShardIDs[i] { - return false - } - } - return true -} - -// simpleCompare compare if they are the same by simple loop, return true when they are the same. -func simpleCompare(oldShardInfos, newShardInfos []ShardInfo) bool { - if len(oldShardInfos) != len(newShardInfos) { - return false - } -L1: - for i := 0; i < len(newShardInfos); i++ { - for j := 0; j < len(newShardInfos); j++ { - if oldShardInfos[i].ID == newShardInfos[j].ID { - continue L1 - } - } - return false - } - - return true -} - -func (c *ClusterMetadata) maybeCorrectShardVersion(ctx context.Context, node RegisteredNode) { - topology := c.topologyManager.GetTopology() - for _, shardInfo := range node.ShardInfos { - oldShardView, ok := topology.ShardViewsMapping[shardInfo.ID] - if !ok { - c.logger.Error("shard out found in topology", zap.Uint32("shardID", uint32(shardInfo.ID))) - return - } - if oldShardView.Version != shardInfo.Version { - c.logger.Warn("shard version mismatch", zap.Uint32("shardID", uint32(shardInfo.ID)), zap.Uint64("metaVersion", oldShardView.Version), zap.Uint64("nodeVersion", shardInfo.Version)) - } - if oldShardView.Version < shardInfo.Version { - // Shard version in meta not equal to HoraeDB, it is needed to be corrected. - // Update with expect value. - c.logger.Info("try to update shard version", zap.Uint32("shardID", uint32(shardInfo.ID)), zap.Uint64("expectVersion", oldShardView.Version), zap.Uint64("newVersion", shardInfo.Version)) - if err := c.topologyManager.UpdateShardVersionWithExpect(ctx, shardInfo.ID, shardInfo.Version, oldShardView.Version); err != nil { - c.logger.Warn("update shard version with expect failed", zap.Uint32("shardID", uint32(shardInfo.ID)), zap.Uint64("expectVersion", oldShardView.Version), zap.Uint64("newVersion", shardInfo.Version)) - } - // TODO: Maybe we need do some thing to ensure HoraeDB status after update shard version. - } - } -} diff --git a/horaemeta/server/cluster/metadata/cluster_metadata_test.go b/horaemeta/server/cluster/metadata/cluster_metadata_test.go deleted file mode 100644 index e29761491c..0000000000 --- a/horaemeta/server/cluster/metadata/cluster_metadata_test.go +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata_test - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -func TestClusterMetadata(t *testing.T) { - ctx := context.Background() - re := require.New(t) - - metadata := test.InitStableCluster(ctx, t).GetMetadata() - - testUpdateClusterView(ctx, re, metadata) - testRegisterNode(ctx, re, metadata) - testTableOperation(ctx, re, metadata) - testShardOperation(ctx, re, metadata) - testMetadataOperation(ctx, re, metadata) -} - -func testUpdateClusterView(ctx context.Context, re *require.Assertions, m *metadata.ClusterMetadata) { - // Remove a shard on node. - currentShardNodes := m.GetClusterSnapshot().Topology.ClusterView.ShardNodes - removeTarget := currentShardNodes[0] - newShardNodes := make(map[string][]storage.ShardNode) - newShardNodes[removeTarget.NodeName] = []storage.ShardNode{} - - for i := 1; i < len(currentShardNodes); i++ { - if removeTarget.NodeName == currentShardNodes[i].NodeName { - if removeTarget.ID != currentShardNodes[i].ID { - newShardNodes[currentShardNodes[i].NodeName] = append(newShardNodes[currentShardNodes[i].NodeName], currentShardNodes[i]) - } - } - } - err := m.UpdateClusterViewByNode(ctx, newShardNodes) - re.NoError(err) - // New topology shard not contains the target shardNode. - for _, shardNode := range m.GetClusterSnapshot().Topology.ClusterView.ShardNodes { - re.NotEqualf(removeTarget.ID, shardNode.ID, fmt.Sprintf("%v \n %v", m.GetClusterSnapshot().Topology.ClusterView.ShardNodes, newShardNodes)) - } - re.Equal(len(currentShardNodes)-1, len(m.GetClusterSnapshot().Topology.ClusterView.ShardNodes)) - - // Update cluster state and reset shardNodes. - err = m.UpdateClusterView(ctx, storage.ClusterStateEmpty, currentShardNodes) - re.NoError(err) - re.Equal(storage.ClusterStateEmpty, m.GetClusterState()) - re.Equal(len(currentShardNodes), len(m.GetClusterSnapshot().Topology.ClusterView.ShardNodes)) -} - -func testRegisterNode(ctx context.Context, re *require.Assertions, m *metadata.ClusterMetadata) { - currentShardNodes := m.GetClusterSnapshot().Topology.ClusterView.ShardNodes - currentRegisterNodes := m.GetRegisteredNodes() - // Register node with empty shard. - newNodeName := "testRegisterNode" - lastTouchTime := uint64(time.Now().UnixMilli()) - err := m.RegisterNode(ctx, metadata.RegisteredNode{ - Node: storage.Node{ - Name: newNodeName, - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: lastTouchTime, - State: 0, - }, - ShardInfos: nil, - }) - re.NoError(err) - re.Equal(len(currentRegisterNodes)+1, len(m.GetRegisteredNodes())) - node, exists := m.GetRegisteredNodeByName(newNodeName) - re.True(exists) - re.Equal(lastTouchTime, node.Node.LastTouchTime) - - // Update lastTouchTime. - lastTouchTime = uint64(time.Now().UnixMilli()) - node.Node.LastTouchTime = lastTouchTime - err = m.RegisterNode(ctx, node) - re.NoError(err) - re.Equal(len(currentRegisterNodes)+1, len(m.GetRegisteredNodes())) - node, exists = m.GetRegisteredNodeByName(newNodeName) - re.True(exists) - re.Equal(lastTouchTime, node.Node.LastTouchTime) - - // Reset shardNodes. - err = m.UpdateClusterView(ctx, storage.ClusterStateStable, currentShardNodes) - re.NoError(err) - re.Equal(len(currentShardNodes), len(m.GetClusterSnapshot().Topology.ClusterView.ShardNodes)) -} - -func testTableOperation(ctx context.Context, re *require.Assertions, m *metadata.ClusterMetadata) { - testSchema := "testSchemaName" - testTableName := "testTableName0" - // Test create schema. - schema, _, err := m.GetOrCreateSchema(ctx, testSchema) - re.NoError(err) - re.Equal(testSchema, schema.Name) - - // Test create table metadata. - createMetadataResult, err := m.CreateTableMetadata(ctx, metadata.CreateTableMetadataRequest{ - SchemaName: testSchema, - TableName: testTableName, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }) - re.NoError(err) - re.Equal(createMetadataResult.Table.Name, testTableName) - - // Table metadata is exists. - t, exists, err := m.GetTable(testSchema, testTableName) - re.NoError(err) - re.True(exists) - re.Equal(testTableName, t.Name) - - // Route table return empty when table not assign to any node. - routeTable, err := m.RouteTables(ctx, testSchema, []string{testTableName}) - re.NoError(err) - re.Equal(0, len(routeTable.RouteEntries[testTableName].NodeShards)) - - // Test drop table metadata. - dropMetadataResult, err := m.DropTableMetadata(ctx, testSchema, testTableName) - re.NoError(err) - re.Equal(testTableName, dropMetadataResult.Table.Name) - - // Table metadata is not exists. - t, exists, err = m.GetTable(testSchema, testTableName) - re.NoError(err) - re.False(exists) - re.False(exists) - - // Test create table. - createResult, err := m.CreateTable(ctx, metadata.CreateTableRequest{ - ShardID: 0, - LatestVersion: 0, - SchemaName: testSchema, - TableName: testTableName, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }) - re.NoError(err) - re.Equal(testTableName, createResult.Table.Name) - - // Test route table, it should return shardNode. - routeResult, err := m.RouteTables(ctx, testSchema, []string{testTableName}) - re.NoError(err) - re.Equal(1, len(routeResult.RouteEntries)) - - // Migrate this table to another shard. - err = m.MigrateTable(ctx, metadata.MigrateTableRequest{ - SchemaName: testSchema, - TableNames: []string{testTableName}, - OldShardID: 0, - NewShardID: 1, - }) - re.NoError(err) - - // Check migrate result, route table should return another shard. - routeResult, err = m.RouteTables(ctx, testSchema, []string{testTableName}) - re.NoError(err) - re.Equal(1, len(routeResult.RouteEntries)) - re.Equal(storage.ShardID(1), routeResult.RouteEntries[testTableName].NodeShards[0].ShardInfo.ID) - - // Drop table already created. - err = m.DropTable(ctx, metadata.DropTableRequest{ - SchemaName: testSchema, - TableName: testTableName, - ShardID: storage.ShardID(1), - LatestVersion: 0, - }) - re.NoError(err) -} - -func testShardOperation(ctx context.Context, re *require.Assertions, m *metadata.ClusterMetadata) { - newID, err := m.AllocShardID(ctx) - re.NoError(err) - - err = m.CreateShardViews(ctx, []metadata.CreateShardView{{ - ShardID: storage.ShardID(newID), - Tables: nil, - }}) - re.NoError(err) - - shardNodeResult, err := m.GetNodeShards(ctx) - re.NoError(err) - - shardNodes, err := m.GetShardNodesByShardID(shardNodeResult.NodeShards[0].ShardInfo.ID) - re.NoError(err) - re.Equal(1, len(shardNodes)) - - shardTables := m.GetShardTables([]storage.ShardID{shardNodeResult.NodeShards[0].ShardInfo.ID}) - re.Equal(1, len(shardTables)) - - _, err = m.GetShardNodeByTableIDs([]storage.TableID{}) - re.NoError(err) - - err = m.DropShardNodes(ctx, []storage.ShardNode{{ - ID: shardNodeResult.NodeShards[0].ShardNode.ID, - ShardRole: shardNodeResult.NodeShards[0].ShardNode.ShardRole, - NodeName: shardNodeResult.NodeShards[0].ShardNode.NodeName, - }}) - re.NoError(err) -} - -func testMetadataOperation(ctx context.Context, re *require.Assertions, m *metadata.ClusterMetadata) { - // Init cluster metadata, it will throw error because it has been init. - err := m.Init(ctx) - re.Error(err) - - err = m.Load(ctx) - re.NoError(err) - - // Load metadata from storage, it will throw error because it is not persisted。 - err = m.LoadMetadata(ctx) - re.Error(err) -} diff --git a/horaemeta/server/cluster/metadata/compare_benchmark_test.go b/horaemeta/server/cluster/metadata/compare_benchmark_test.go deleted file mode 100644 index 1780980ae6..0000000000 --- a/horaemeta/server/cluster/metadata/compare_benchmark_test.go +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata - -import ( - "testing" - - "github.com/apache/incubator-horaedb-meta/server/storage" -) - -func buildRegisterNode(shardNumber int) RegisteredNode { - shardInfos := make([]ShardInfo, 0, shardNumber) - for i := shardNumber; i > 0; i-- { - shardInfos = append(shardInfos, ShardInfo{ - ID: storage.ShardID(i), - Role: 0, - Version: 0, - Status: storage.ShardStatusUnknown, - }) - } - return RegisteredNode{ - Node: storage.Node{ - Name: "", - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: 0, - State: storage.NodeStateUnknown, - }, - ShardInfos: shardInfos, - } -} - -func BenchmarkSortWith10Shards(b *testing.B) { - registerNode := buildRegisterNode(10) - oldCache := registerNode - for i := 0; i < b.N; i++ { - sortCompare(oldCache.ShardInfos, registerNode.ShardInfos) - } -} - -func BenchmarkCompareWith10Shards(b *testing.B) { - registerNode := buildRegisterNode(10) - oldCache := registerNode - for i := 0; i < b.N; i++ { - simpleCompare(oldCache.ShardInfos, registerNode.ShardInfos) - } -} - -func BenchmarkSortWith50Shards(b *testing.B) { - registerNode := buildRegisterNode(50) - oldCache := registerNode - for i := 0; i < b.N; i++ { - sortCompare(oldCache.ShardInfos, registerNode.ShardInfos) - } -} - -func BenchmarkCompareWith50Shards(b *testing.B) { - registerNode := buildRegisterNode(50) - oldCache := registerNode - for i := 0; i < b.N; i++ { - simpleCompare(oldCache.ShardInfos, registerNode.ShardInfos) - } -} - -func BenchmarkSortWith100Shards(b *testing.B) { - registerNode := buildRegisterNode(100) - oldCache := registerNode - for i := 0; i < b.N; i++ { - sortCompare(oldCache.ShardInfos, registerNode.ShardInfos) - } -} - -func BenchmarkCompareWith100Shards(b *testing.B) { - registerNode := buildRegisterNode(100) - oldCache := registerNode - for i := 0; i < b.N; i++ { - simpleCompare(oldCache.ShardInfos, registerNode.ShardInfos) - } -} diff --git a/horaemeta/server/cluster/metadata/error.go b/horaemeta/server/cluster/metadata/error.go deleted file mode 100644 index b0cb5ed69f..0000000000 --- a/horaemeta/server/cluster/metadata/error.go +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrCreateCluster = coderr.NewCodeError(coderr.BadRequest, "create cluster") - ErrUpdateCluster = coderr.NewCodeError(coderr.Internal, "update cluster") - ErrStartCluster = coderr.NewCodeError(coderr.Internal, "start cluster") - ErrClusterAlreadyExists = coderr.NewCodeError(coderr.ClusterAlreadyExists, "cluster already exists") - ErrClusterNotFound = coderr.NewCodeError(coderr.NotFound, "cluster not found") - ErrClusterStateInvalid = coderr.NewCodeError(coderr.Internal, "cluster state invalid") - ErrSchemaNotFound = coderr.NewCodeError(coderr.NotFound, "schema not found") - ErrTableNotFound = coderr.NewCodeError(coderr.NotFound, "table not found") - ErrShardNotFound = coderr.NewCodeError(coderr.NotFound, "shard not found") - ErrVersionNotFound = coderr.NewCodeError(coderr.NotFound, "version not found") - ErrNodeNotFound = coderr.NewCodeError(coderr.NotFound, "NodeName not found") - ErrTableAlreadyExists = coderr.NewCodeError(coderr.Internal, "table already exists") - ErrOpenTable = coderr.NewCodeError(coderr.Internal, "open table") - ErrParseTopologyType = coderr.NewCodeError(coderr.Internal, "parse topology type") -) diff --git a/horaemeta/server/cluster/metadata/table_manager.go b/horaemeta/server/cluster/metadata/table_manager.go deleted file mode 100644 index 806aeeff88..0000000000 --- a/horaemeta/server/cluster/metadata/table_manager.go +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -// TableManager manages table metadata by schema. -type TableManager interface { - // Load load table meta data from storage. - Load(ctx context.Context) error - // GetTable get table with schemaName and tableName, the second output parameter bool: returns true if the table exists. - GetTable(schemaName string, tableName string) (storage.Table, bool, error) - // GetTables get tables with schemaName and tableNames. - GetTables(schemaName string, tableNames []string) ([]storage.Table, error) - // GetTablesByIDs get tables with tableIDs. - GetTablesByIDs(tableIDs []storage.TableID) []storage.Table - // CreateTable create table with schemaName and tableName. - CreateTable(ctx context.Context, schemaName string, tableName string, partitionInfo storage.PartitionInfo) (storage.Table, error) - // DropTable drop table with schemaName and tableName. - DropTable(ctx context.Context, schemaName string, tableName string) error - // GetSchema get schema with schemaName. - GetSchema(schemaName string) (storage.Schema, bool) - // GetSchemaByID get schema with schemaName. - GetSchemaByID(schemaID storage.SchemaID) (storage.Schema, bool) - // GetSchemas get all schemas in cluster. - GetSchemas() []storage.Schema - // GetOrCreateSchema get or create schema with schemaName. - GetOrCreateSchema(ctx context.Context, schemaName string) (storage.Schema, bool, error) -} - -type Tables struct { - tables map[string]storage.Table // tableName -> table - tablesByID map[storage.TableID]storage.Table // tableID -> table -} - -type TableManagerImpl struct { - logger *zap.Logger - storage storage.Storage - clusterID storage.ClusterID - schemaIDAlloc id.Allocator - tableIDAlloc id.Allocator - - // RWMutex is used to protect following fields. - lock sync.RWMutex - schemas map[string]storage.Schema // schemaName -> schema - schemaTables map[storage.SchemaID]*Tables // schemaName -> tables -} - -func NewTableManagerImpl(logger *zap.Logger, storage storage.Storage, clusterID storage.ClusterID, schemaIDAlloc id.Allocator, tableIDAlloc id.Allocator) TableManager { - return &TableManagerImpl{ - logger: logger, - storage: storage, - clusterID: clusterID, - schemaIDAlloc: schemaIDAlloc, - tableIDAlloc: tableIDAlloc, - lock: sync.RWMutex{}, - // It will be initialized in loadSchemas. - schemas: nil, - // It will be initialized in loadTables. - schemaTables: nil, - } -} - -func (m *TableManagerImpl) Load(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - if err := m.loadSchemas(ctx); err != nil { - return errors.WithMessage(err, "load schemas") - } - - if err := m.loadTables(ctx); err != nil { - return errors.WithMessage(err, "load tables") - } - - return nil -} - -func (m *TableManagerImpl) GetTable(schemaName, tableName string) (storage.Table, bool, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - return m.getTable(schemaName, tableName) -} - -func (m *TableManagerImpl) GetTables(schemaName string, tableNames []string) ([]storage.Table, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - return m.getTables(schemaName, tableNames) -} - -func (m *TableManagerImpl) GetTablesByIDs(tableIDs []storage.TableID) []storage.Table { - m.lock.RLock() - defer m.lock.RUnlock() - - result := make([]storage.Table, 0, len(tableIDs)) - for _, tables := range m.schemaTables { - for _, tableID := range tableIDs { - table, ok := tables.tablesByID[tableID] - if !ok { - m.logger.Warn("table not exists", zap.Uint64("tableID", uint64(tableID))) - continue - } - result = append(result, table) - } - } - - return result -} - -func (m *TableManagerImpl) CreateTable(ctx context.Context, schemaName string, tableName string, partitionInfo storage.PartitionInfo) (storage.Table, error) { - m.lock.Lock() - defer m.lock.Unlock() - - var emptyTable storage.Table - _, exists, err := m.getTable(schemaName, tableName) - if err != nil { - return emptyTable, errors.WithMessage(err, "get table") - } - - if exists { - return emptyTable, errors.WithMessagef(ErrTableAlreadyExists, "tableName:%s", tableName) - } - - // Create table in storage. - schema, ok := m.schemas[schemaName] - if !ok { - return emptyTable, ErrSchemaNotFound.WithCausef("schema name:%s", schemaName) - } - - id, err := m.tableIDAlloc.Alloc(ctx) - if err != nil { - return emptyTable, errors.WithMessagef(err, "alloc table id, table name:%s", tableName) - } - - table := storage.Table{ - ID: storage.TableID(id), - Name: tableName, - SchemaID: schema.ID, - CreatedAt: uint64(time.Now().UnixMilli()), - PartitionInfo: partitionInfo, - } - err = m.storage.CreateTable(ctx, storage.CreateTableRequest{ - ClusterID: m.clusterID, - SchemaID: schema.ID, - Table: table, - }) - - if err != nil { - return emptyTable, errors.WithMessage(err, "storage create table") - } - - // Update table in memory. - _, ok = m.schemaTables[schema.ID] - if !ok { - m.schemaTables[schema.ID] = &Tables{ - tables: make(map[string]storage.Table), - tablesByID: make(map[storage.TableID]storage.Table), - } - } - tables := m.schemaTables[schema.ID] - tables.tables[tableName] = table - tables.tablesByID[table.ID] = table - - return table, nil -} - -func (m *TableManagerImpl) DropTable(ctx context.Context, schemaName string, tableName string) error { - m.lock.Lock() - defer m.lock.Unlock() - - schema, ok := m.schemas[schemaName] - if !ok { - return nil - } - - table, ok := m.schemaTables[schema.ID].tables[tableName] - if !ok { - return nil - } - - // Delete table in storage. - err := m.storage.DeleteTable(ctx, storage.DeleteTableRequest{ - ClusterID: m.clusterID, - SchemaID: schema.ID, - TableName: tableName, - }) - if err != nil { - return errors.WithMessagef(err, "storage delete table") - } - - tables := m.schemaTables[schema.ID] - delete(tables.tables, tableName) - delete(tables.tablesByID, table.ID) - return nil -} - -func (m *TableManagerImpl) GetSchema(schemaName string) (storage.Schema, bool) { - m.lock.RLock() - defer m.lock.RUnlock() - - schema, ok := m.schemas[schemaName] - return schema, ok -} - -func (m *TableManagerImpl) GetSchemaByID(schemaID storage.SchemaID) (storage.Schema, bool) { - m.lock.RLock() - defer m.lock.RUnlock() - - for _, schema := range m.schemas { - if schema.ID == schemaID { - return schema, true - } - } - - var emptySchema storage.Schema - return emptySchema, false -} - -func (m *TableManagerImpl) GetSchemas() []storage.Schema { - m.lock.RLock() - defer m.lock.RUnlock() - - schemas := make([]storage.Schema, len(m.schemas)) - - for _, schema := range m.schemas { - schemas = append(schemas, schema) - } - - return schemas -} - -func (m *TableManagerImpl) GetOrCreateSchema(ctx context.Context, schemaName string) (storage.Schema, bool, error) { - m.lock.Lock() - defer m.lock.Unlock() - - schema, ok := m.schemas[schemaName] - if ok { - return schema, true, nil - } - - id, err := m.schemaIDAlloc.Alloc(ctx) - if err != nil { - return storage.Schema{}, false, errors.WithMessage(err, "alloc schema id") - } - - schema = storage.Schema{ - ID: storage.SchemaID(id), - ClusterID: m.clusterID, - Name: schemaName, - CreatedAt: uint64(time.Now().UnixMilli()), - } - - // Create schema in storage. - if err = m.storage.CreateSchema(ctx, storage.CreateSchemaRequest{ - ClusterID: m.clusterID, - Schema: schema, - }); err != nil { - return storage.Schema{}, false, errors.WithMessage(err, "storage create schema") - } - // Update schema in memory. - m.schemas[schemaName] = schema - return schema, false, nil -} - -func (m *TableManagerImpl) loadSchemas(ctx context.Context) error { - schemasResult, err := m.storage.ListSchemas(ctx, storage.ListSchemasRequest{ClusterID: m.clusterID}) - if err != nil { - return errors.WithMessage(err, "list schemas") - } - m.logger.Debug("load schema", zap.String("data", fmt.Sprintf("%+v", schemasResult))) - - // Reset data in memory. - m.schemas = make(map[string]storage.Schema, len(schemasResult.Schemas)) - for _, schema := range schemasResult.Schemas { - m.schemas[schema.Name] = schema - } - - return nil -} - -func (m *TableManagerImpl) loadTables(ctx context.Context) error { - // Reset data in memory. - m.schemaTables = make(map[storage.SchemaID]*Tables, len(m.schemas)) - for _, schema := range m.schemas { - tablesResult, err := m.storage.ListTables(ctx, storage.ListTableRequest{ - ClusterID: m.clusterID, - SchemaID: schema.ID, - }) - if err != nil { - return errors.WithMessage(err, "list tables") - } - m.logger.Debug("load table", zap.String("schema", fmt.Sprintf("%+v", schema)), zap.String("tables", fmt.Sprintf("%+v", tablesResult))) - - for _, table := range tablesResult.Tables { - tables, ok := m.schemaTables[table.SchemaID] - if !ok { - tables = &Tables{ - tables: make(map[string]storage.Table, 0), - tablesByID: make(map[storage.TableID]storage.Table, 0), - } - m.schemaTables[table.SchemaID] = tables - } - - tables.tables[table.Name] = table - tables.tablesByID[table.ID] = table - } - } - return nil -} - -func (m *TableManagerImpl) getTable(schemaName, tableName string) (storage.Table, bool, error) { - schema, ok := m.schemas[schemaName] - var emptyTable storage.Table - if !ok { - return emptyTable, false, ErrSchemaNotFound.WithCausef("schema name", schemaName) - } - - tables, ok := m.schemaTables[schema.ID] - if !ok { - return emptyTable, false, nil - } - - table, ok := tables.tables[tableName] - return table, ok, nil -} - -func (m *TableManagerImpl) getTables(schemaName string, tableNames []string) ([]storage.Table, error) { - schema, ok := m.schemas[schemaName] - if !ok { - return []storage.Table{}, ErrSchemaNotFound.WithCausef("schema name", schemaName) - } - - schemaTables, ok := m.schemaTables[schema.ID] - if !ok { - return []storage.Table{}, nil - } - - tables := make([]storage.Table, 0, len(tableNames)) - for _, tableName := range tableNames { - if table, ok := schemaTables.tables[tableName]; ok { - tables = append(tables, table) - } - } - - return tables, nil -} diff --git a/horaemeta/server/cluster/metadata/table_manager_test.go b/horaemeta/server/cluster/metadata/table_manager_test.go deleted file mode 100644 index 56cc0400e3..0000000000 --- a/horaemeta/server/cluster/metadata/table_manager_test.go +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata_test - -import ( - "context" - "path" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -const ( - TestRootPath = "/testRootPath" - TestClusterID = 0 - TestClusterName = "TestClusterName" - TestSchemaIDPrefix = "TestSchemaIDPrefix" - TestTableIDPrefix = "TestTableIDPrefix" - TestIDAllocatorStep = 5 - TestSchemaName = "TestSchemaName" - TestTableName = "TestTableName" -) - -func TestTableManager(t *testing.T) { - ctx := context.Background() - re := require.New(t) - - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - clusterStorage := storage.NewStorageWithEtcdBackend(client, TestRootPath, storage.Options{ - MaxScanLimit: 100, MinScanLimit: 10, MaxOpsPerTxn: 10, - }) - - schemaIDAlloc := id.NewAllocatorImpl(zap.NewNop(), client, path.Join(TestRootPath, TestClusterName, TestSchemaIDPrefix), TestIDAllocatorStep) - tableIDAlloc := id.NewAllocatorImpl(zap.NewNop(), client, path.Join(TestRootPath, TestClusterName, TestTableIDPrefix), TestIDAllocatorStep) - tableManager := metadata.NewTableManagerImpl(zap.NewNop(), clusterStorage, storage.ClusterID(TestClusterID), schemaIDAlloc, tableIDAlloc) - err := tableManager.Load(ctx) - re.NoError(err) - - testSchema(ctx, re, tableManager) - testCreateAndDropTable(ctx, re, tableManager) -} - -func testSchema(ctx context.Context, re *require.Assertions, manager metadata.TableManager) { - _, exists := manager.GetSchema(TestSchemaName) - re.False(exists) - schema, exists, err := manager.GetOrCreateSchema(ctx, TestSchemaName) - re.NoError(err) - re.False(exists) - re.Equal(TestSchemaName, schema.Name) -} - -func testCreateAndDropTable(ctx context.Context, re *require.Assertions, manager metadata.TableManager) { - _, exists, err := manager.GetTable(TestSchemaName, TestTableName) - re.NoError(err) - re.False(exists) - - t, err := manager.CreateTable(ctx, TestSchemaName, TestTableName, storage.PartitionInfo{Info: nil}) - re.NoError(err) - re.Equal(TestTableName, t.Name) - - t, exists, err = manager.GetTable(TestSchemaName, TestTableName) - re.NoError(err) - re.True(exists) - re.Equal(TestTableName, t.Name) - - err = manager.DropTable(ctx, TestSchemaName, TestTableName) - re.NoError(err) - - _, exists, err = manager.GetTable(TestSchemaName, TestTableName) - re.NoError(err) - re.False(exists) -} diff --git a/horaemeta/server/cluster/metadata/topology_manager.go b/horaemeta/server/cluster/metadata/topology_manager.go deleted file mode 100644 index f744a5d9a2..0000000000 --- a/horaemeta/server/cluster/metadata/topology_manager.go +++ /dev/null @@ -1,691 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata - -import ( - "context" - "fmt" - "sync" - - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -// TopologyManager manages the cluster topology, including the mapping relationship between shards, nodes, and tables. -type TopologyManager interface { - // Load load cluster topology from storage. - Load(ctx context.Context, schemas []storage.Schema) error - // GetVersion get cluster view version. - GetVersion() uint64 - // GetClusterState get cluster view state. - GetClusterState() storage.ClusterState - // GetTableIDs get shardNode and tablesIDs with shardID and nodeName. - GetTableIDs(shardIDs []storage.ShardID) map[storage.ShardID]ShardTableIDs - // AddTable add table to cluster topology. - AddTable(ctx context.Context, shardID storage.ShardID, latestVersion uint64, tables []storage.Table) error - // RemoveTable remove table on target shards from cluster topology. - RemoveTable(ctx context.Context, shardID storage.ShardID, latestVersion uint64, tableIDs []storage.TableID) error - // GetTableShardID get the shardID of the shard where the table is located. - GetTableShardID(ctx context.Context, table storage.Table) (storage.ShardID, bool) - // AssignTableToShard persistent table shard mapping, it is used to store assign results and make the table creation idempotent. - AssignTableToShard(ctx context.Context, schemaID storage.SchemaID, tableName string, shardID storage.ShardID) error - // GetTableAssignedShard get table assign result. - GetTableAssignedShard(ctx context.Context, schemaID storage.SchemaID, tableName string) (storage.ShardID, bool) - // DeleteTableAssignedShard delete table assign result. - DeleteTableAssignedShard(ctx context.Context, schemaID storage.SchemaID, tableName string) error - // GetShards get all shards in cluster topology. - GetShards() []storage.ShardID - // GetShardNodesByID get shardNodes with shardID. - GetShardNodesByID(shardID storage.ShardID) ([]storage.ShardNode, error) - // GetShardNodesByTableIDs get shardNodes with tableIDs. - GetShardNodesByTableIDs(tableID []storage.TableID) (GetShardNodesByTableIDsResult, error) - // GetShardNodes get all shardNodes in cluster topology. - GetShardNodes() GetShardNodesResult - // DropShardNodes drop target shardNodes in cluster topology. - DropShardNodes(ctx context.Context, shardNodes []storage.ShardNode) error - // InitClusterView init cluster view when create new cluster. - InitClusterView(ctx context.Context) error - // UpdateClusterView update cluster view with shardNodes. - UpdateClusterView(ctx context.Context, state storage.ClusterState, shardNodes []storage.ShardNode) error - // UpdateClusterViewByNode update cluster view with target shardNodes, it will only update shardNodes corresponding the node name. - UpdateClusterViewByNode(ctx context.Context, shardNodes map[string][]storage.ShardNode) error - // GetClusterView return current cluster view. - GetClusterView() storage.ClusterView - // CreateShardViews create shardViews. - CreateShardViews(ctx context.Context, shardViews []CreateShardView) error - // UpdateShardVersionWithExpect update shard version when pre version is same as expect version. - UpdateShardVersionWithExpect(ctx context.Context, shardID storage.ShardID, version uint64, expect uint64) error - // GetTopology get current topology snapshot. - GetTopology() Topology -} - -type ShardTableIDs struct { - TableIDs []storage.TableID - Version uint64 -} - -type GetShardTablesByNodeResult struct { - ShardTableIDs map[storage.ShardID]ShardTableIDs -} - -type GetShardNodesByTableIDsResult struct { - ShardNodes map[storage.TableID][]storage.ShardNode - Version map[storage.ShardID]uint64 -} - -type GetShardNodesResult struct { - ShardNodes []storage.ShardNode - Versions map[storage.ShardID]uint64 -} - -type CreateShardView struct { - ShardID storage.ShardID - Tables []storage.TableID -} - -type Topology struct { - ShardViewsMapping map[storage.ShardID]storage.ShardView - ClusterView storage.ClusterView -} - -func (t *Topology) IsStable() bool { - if t.ClusterView.State != storage.ClusterStateStable { - return false - } - if len(t.ClusterView.ShardNodes) != len(t.ShardViewsMapping) { - return false - } - return true -} - -func (t *Topology) IsPrepareFinished() bool { - if t.ClusterView.State != storage.ClusterStatePrepare { - return false - } - if len(t.ShardViewsMapping) != len(t.ClusterView.ShardNodes) { - return false - } - return true -} - -type TopologyManagerImpl struct { - logger *zap.Logger - storage storage.Storage - clusterID storage.ClusterID - shardIDAlloc id.Allocator - - // RWMutex is used to protect following fields. - lock sync.RWMutex - clusterView *storage.ClusterView // ClusterView in memory. - shardNodesMapping map[storage.ShardID][]storage.ShardNode // ShardID -> nodes of the shard - nodeShardsMapping map[string][]storage.ShardNode // nodeName -> shards of the NodeName - // ShardView in memory. - shardTablesMapping map[storage.ShardID]*storage.ShardView // ShardID -> shardTopology - tableShardMapping map[storage.TableID][]storage.ShardID // tableID -> ShardID - // Table assign result in memory. - tableAssignMapping map[storage.SchemaID]map[string]storage.ShardID // tableName -> shardID - - nodes map[string]storage.Node // NodeName in memory. -} - -func NewTopologyManagerImpl(logger *zap.Logger, storage storage.Storage, clusterID storage.ClusterID, shardIDAlloc id.Allocator) TopologyManager { - return &TopologyManagerImpl{ - logger: logger, - storage: storage, - clusterID: clusterID, - shardIDAlloc: shardIDAlloc, - lock: sync.RWMutex{}, - // The following fields will be initialized in the Load method. - clusterView: nil, - shardNodesMapping: nil, - nodeShardsMapping: nil, - shardTablesMapping: nil, - tableShardMapping: nil, - tableAssignMapping: nil, - nodes: nil, - } -} - -func (m *TopologyManagerImpl) Load(ctx context.Context, schemas []storage.Schema) error { - m.lock.Lock() - defer m.lock.Unlock() - - if err := m.loadClusterView(ctx); err != nil { - return errors.WithMessage(err, "load cluster view") - } - - if err := m.loadShardViews(ctx); err != nil { - return errors.WithMessage(err, "load shard views") - } - - if err := m.loadNodes(ctx); err != nil { - return errors.WithMessage(err, "load nodes") - } - - if err := m.loadAssignTable(ctx, schemas); err != nil { - return errors.WithMessage(err, "load assign table") - } - - return nil -} - -func (m *TopologyManagerImpl) GetVersion() uint64 { - m.lock.RLock() - defer m.lock.RUnlock() - - return m.clusterView.Version -} - -func (m *TopologyManagerImpl) GetClusterState() storage.ClusterState { - m.lock.RLock() - defer m.lock.RUnlock() - - return m.clusterView.State -} - -func (m *TopologyManagerImpl) GetTableIDs(shardIDs []storage.ShardID) map[storage.ShardID]ShardTableIDs { - m.lock.RLock() - defer m.lock.RUnlock() - - shardTableIDs := make(map[storage.ShardID]ShardTableIDs, len(shardIDs)) - for _, shardID := range shardIDs { - shardView := m.shardTablesMapping[shardID] - shardTableIDs[shardID] = ShardTableIDs{ - TableIDs: shardView.TableIDs, - Version: shardView.Version, - } - } - - return shardTableIDs -} - -func (m *TopologyManagerImpl) AddTable(ctx context.Context, shardID storage.ShardID, latestVersion uint64, tables []storage.Table) error { - m.lock.Lock() - defer m.lock.Unlock() - - shardView, ok := m.shardTablesMapping[shardID] - if !ok { - return ErrShardNotFound.WithCausef("shard id:%d", shardID) - } - - tableIDsToAdd := make([]storage.TableID, 0, len(tables)) - for _, table := range tables { - tableIDsToAdd = append(tableIDsToAdd, table.ID) - } - - tableIDs := make([]storage.TableID, 0, len(shardView.TableIDs)+1) - tableIDs = append(tableIDs, shardView.TableIDs...) - tableIDs = append(tableIDs, tableIDsToAdd...) - - newShardView := storage.NewShardView(shardID, latestVersion, tableIDs) - - // Update shard view in storage. - err := m.storage.UpdateShardView(ctx, storage.UpdateShardViewRequest{ - ClusterID: m.clusterID, - ShardView: newShardView, - PrevVersion: shardView.Version, - }) - if err != nil { - return errors.WithMessage(err, "storage update shard view") - } - - // Update shard view in memory. - m.shardTablesMapping[shardID] = &newShardView - for _, tableID := range tableIDsToAdd { - _, exists := m.tableShardMapping[tableID] - if !exists { - m.tableShardMapping[tableID] = []storage.ShardID{shardID} - } else { - m.tableShardMapping[tableID] = append(m.tableShardMapping[tableID], shardID) - } - } - - return nil -} - -func (m *TopologyManagerImpl) RemoveTable(ctx context.Context, shardID storage.ShardID, latestVersion uint64, tableIDs []storage.TableID) error { - m.lock.Lock() - defer m.lock.Unlock() - - shardView, ok := m.shardTablesMapping[shardID] - if !ok { - return ErrShardNotFound.WithCausef("shard id:%d", shardID) - } - - newTableIDs := make([]storage.TableID, 0, len(shardView.TableIDs)) - for _, tableID := range shardView.TableIDs { - for _, tableIDToRemove := range tableIDs { - if tableID != tableIDToRemove { - newTableIDs = append(newTableIDs, tableID) - } - } - } - - // Update shardView in storage. - newShardView := storage.NewShardView(shardView.ShardID, latestVersion, newTableIDs) - if err := m.storage.UpdateShardView(ctx, storage.UpdateShardViewRequest{ - ClusterID: m.clusterID, - ShardView: newShardView, - PrevVersion: shardView.Version, - }); err != nil { - return errors.WithMessage(err, "storage update shard view") - } - - // Update shardView in memory. - shardView.Version = latestVersion - shardView.TableIDs = newTableIDs - for _, tableID := range tableIDs { - delete(m.tableShardMapping, tableID) - } - - for i, tableID := range m.shardTablesMapping[shardID].TableIDs { - for _, tableIDToRemove := range tableIDs { - if tableIDToRemove == tableID { - lastElementIndex := len(m.shardTablesMapping[shardID].TableIDs) - 1 - m.shardTablesMapping[shardID].TableIDs[i] = m.shardTablesMapping[shardID].TableIDs[lastElementIndex] - m.shardTablesMapping[shardID].TableIDs = append(m.shardTablesMapping[shardID].TableIDs[:lastElementIndex], m.shardTablesMapping[shardID].TableIDs[lastElementIndex+1:]...) - } - } - } - - return nil -} - -func (m *TopologyManagerImpl) GetTableShardID(_ context.Context, table storage.Table) (storage.ShardID, bool) { - m.lock.RLock() - defer m.lock.RUnlock() - - shardIDs, exists := m.tableShardMapping[table.ID] - if exists { - return shardIDs[0], true - } - - return 0, false -} - -func (m *TopologyManagerImpl) AssignTableToShard(ctx context.Context, schemaID storage.SchemaID, tableName string, shardID storage.ShardID) error { - m.lock.Lock() - defer m.lock.Unlock() - - if err := m.storage.AssignTableToShard(ctx, storage.AssignTableToShardRequest{ - ClusterID: m.clusterID, - SchemaID: schemaID, - TableName: tableName, - ShardID: shardID, - }); err != nil { - return errors.WithMessage(err, "storage assign table") - } - - // Update cache im memory. - if _, exists := m.tableAssignMapping[schemaID]; !exists { - m.tableAssignMapping[schemaID] = make(map[string]storage.ShardID, 0) - } - - m.tableAssignMapping[schemaID][tableName] = shardID - - return nil -} - -func (m *TopologyManagerImpl) GetTableAssignedShard(_ context.Context, schemaID storage.SchemaID, tableName string) (storage.ShardID, bool) { - assignResult, exists := m.tableAssignMapping[schemaID][tableName] - return assignResult, exists -} - -func (m *TopologyManagerImpl) DeleteTableAssignedShard(ctx context.Context, schemaID storage.SchemaID, tableName string) error { - m.lock.Lock() - defer m.lock.Unlock() - - if err := m.storage.DeleteTableAssignedShard(ctx, storage.DeleteTableAssignedRequest{ - ClusterID: m.clusterID, - SchemaID: schemaID, - TableName: tableName, - }); err != nil { - return errors.WithMessage(err, "storage delete assign table") - } - - // Update cache im memory. - delete(m.tableAssignMapping[schemaID], tableName) - - return nil -} - -func (m *TopologyManagerImpl) GetShards() []storage.ShardID { - m.lock.RLock() - defer m.lock.RUnlock() - - shards := make([]storage.ShardID, 0, len(m.shardTablesMapping)) - for _, shardView := range m.shardTablesMapping { - shards = append(shards, shardView.ShardID) - } - - return shards -} - -func (m *TopologyManagerImpl) GetShardNodesByID(shardID storage.ShardID) ([]storage.ShardNode, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - shardNodes, ok := m.shardNodesMapping[shardID] - if !ok { - return nil, ErrShardNotFound.WithCausef("shard id:%d", shardID) - } - - return shardNodes, nil -} - -func (m *TopologyManagerImpl) GetShardNodesByTableIDs(tableIDs []storage.TableID) (GetShardNodesByTableIDsResult, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - tableShardNodes := make(map[storage.TableID][]storage.ShardNode, len(tableIDs)) - shardViewVersions := make(map[storage.ShardID]uint64, 0) - for _, tableID := range tableIDs { - shardIDs, ok := m.tableShardMapping[tableID] - // If the table is not assigned to any shard, return an empty slice. - if !ok { - tableShardNodes[tableID] = []storage.ShardNode{} - continue - } - - for _, shardID := range shardIDs { - shardNodes, ok := m.shardNodesMapping[shardID] - if !ok { - // If the shard is not assigned to any node, return an empty slice. - tableShardNodes[tableID] = []storage.ShardNode{} - continue - } - - if _, exists := tableShardNodes[tableID]; !exists { - tableShardNodes[tableID] = shardNodes - } else { - tableShardNodes[tableID] = append(tableShardNodes[tableID], shardNodes...) - } - - _, ok = shardViewVersions[shardID] - if !ok { - shardViewVersions[shardID] = m.shardTablesMapping[shardID].Version - } - } - } - - return GetShardNodesByTableIDsResult{ - ShardNodes: tableShardNodes, - Version: shardViewVersions, - }, nil -} - -func (m *TopologyManagerImpl) GetShardNodes() GetShardNodesResult { - m.lock.RLock() - defer m.lock.RUnlock() - - shardNodes := make([]storage.ShardNode, 0, len(m.shardNodesMapping)) - shardViewVersions := make(map[storage.ShardID]uint64, len(m.shardTablesMapping)) - for _, shardNode := range m.shardNodesMapping { - shardNodes = append(shardNodes, shardNode...) - } - for shardID, shardView := range m.shardTablesMapping { - shardViewVersions[shardID] = shardView.Version - } - - return GetShardNodesResult{ - ShardNodes: shardNodes, - Versions: shardViewVersions, - } -} - -func (m *TopologyManagerImpl) DropShardNodes(ctx context.Context, shardNodes []storage.ShardNode) error { - m.lock.Lock() - defer m.lock.Unlock() - - newShardNodes := make([]storage.ShardNode, 0, len(m.clusterView.ShardNodes)) - - for i := 0; i < len(m.clusterView.ShardNodes); i++ { - if !contains(shardNodes, m.clusterView.ShardNodes[i]) { - newShardNodes = append(newShardNodes, m.clusterView.ShardNodes[i]) - } - } - - return m.updateClusterViewWithLock(ctx, m.clusterView.State, newShardNodes) -} - -func contains(shardNodes []storage.ShardNode, originShardNode storage.ShardNode) bool { - for _, dropShardNode := range shardNodes { - if originShardNode.NodeName == dropShardNode.NodeName && originShardNode.ID == dropShardNode.ID { - return true - } - } - return false -} - -func (m *TopologyManagerImpl) InitClusterView(ctx context.Context) error { - clusterView := storage.NewClusterView(m.clusterID, 0, storage.ClusterStateEmpty, []storage.ShardNode{}) - - err := m.storage.CreateClusterView(ctx, storage.CreateClusterViewRequest{ClusterView: clusterView}) - if err != nil { - return errors.WithMessage(err, "storage create cluster view") - } - - m.lock.Lock() - defer m.lock.Unlock() - // Load cluster view into memory. - if err := m.loadClusterView(ctx); err != nil { - return errors.WithMessage(err, "load cluster view") - } - return nil -} - -func (m *TopologyManagerImpl) UpdateClusterView(ctx context.Context, state storage.ClusterState, shardNodes []storage.ShardNode) error { - m.lock.Lock() - defer m.lock.Unlock() - - return m.updateClusterViewWithLock(ctx, state, shardNodes) -} - -func (m *TopologyManagerImpl) updateClusterViewWithLock(ctx context.Context, state storage.ClusterState, shardNodes []storage.ShardNode) error { - // Update cluster view in storage. - newClusterView := storage.NewClusterView(m.clusterID, m.clusterView.Version+1, state, shardNodes) - if err := m.storage.UpdateClusterView(ctx, storage.UpdateClusterViewRequest{ - ClusterID: m.clusterID, - ClusterView: newClusterView, - LatestVersion: m.clusterView.Version, - }); err != nil { - return errors.WithMessage(err, "storage update cluster view") - } - - // Load cluster view into memory. - if err := m.loadClusterView(ctx); err != nil { - return errors.WithMessage(err, "load cluster view") - } - return nil -} - -func (m *TopologyManagerImpl) UpdateClusterViewByNode(ctx context.Context, shardNodes map[string][]storage.ShardNode) error { - m.lock.Lock() - defer m.lock.Unlock() - - newShardNodes := make([]storage.ShardNode, 0, len(m.clusterView.ShardNodes)) - for _, shardNode := range shardNodes { - newShardNodes = append(newShardNodes, shardNode...) - } - - originShardNodes := m.clusterView.ShardNodes - for _, shardNode := range originShardNodes { - if _, exists := shardNodes[shardNode.NodeName]; !exists { - newShardNodes = append(newShardNodes, shardNode) - } - } - - return m.updateClusterViewWithLock(ctx, m.clusterView.State, newShardNodes) -} - -func (m *TopologyManagerImpl) GetClusterView() storage.ClusterView { - m.lock.RLock() - defer m.lock.RUnlock() - - return *m.clusterView -} - -func (m *TopologyManagerImpl) CreateShardViews(ctx context.Context, createShardViews []CreateShardView) error { - m.lock.Lock() - defer m.lock.Unlock() - - // Create shard view in storage. - shardViews := make([]storage.ShardView, 0, len(createShardViews)) - for _, createShardView := range createShardViews { - shardViews = append(shardViews, storage.NewShardView(createShardView.ShardID, 0, createShardView.Tables)) - } - if err := m.storage.CreateShardViews(ctx, storage.CreateShardViewsRequest{ - ClusterID: m.clusterID, - ShardViews: shardViews, - }); err != nil { - return errors.WithMessage(err, "storage create shard view") - } - - // Load shard view into memory. - if err := m.loadShardViews(ctx); err != nil { - return errors.WithMessage(err, "load shard view") - } - return nil -} - -func (m *TopologyManagerImpl) UpdateShardVersionWithExpect(ctx context.Context, shardID storage.ShardID, version uint64, expect uint64) error { - m.lock.Lock() - defer m.lock.Unlock() - - shardView, ok := m.shardTablesMapping[shardID] - if !ok { - return ErrShardNotFound.WithCausef("shard id:%d", shardID) - } - - newShardView := storage.NewShardView(shardID, version, shardView.TableIDs) - if err := m.storage.UpdateShardView(ctx, storage.UpdateShardViewRequest{ - ClusterID: m.clusterID, - ShardView: newShardView, - PrevVersion: expect, - }); err != nil { - return errors.WithMessage(err, "storage update shard view") - } - - // Update shard view into memory. - m.shardTablesMapping[shardID] = &newShardView - - return nil -} - -func (m *TopologyManagerImpl) GetTopology() Topology { - m.lock.RLock() - defer m.lock.RUnlock() - - shardViewsMapping := make(map[storage.ShardID]storage.ShardView, len(m.shardTablesMapping)) - for shardID, view := range m.shardTablesMapping { - shardViewsMapping[shardID] = storage.ShardView{ - ShardID: view.ShardID, - Version: view.Version, - TableIDs: view.TableIDs, - CreatedAt: view.CreatedAt, - } - } - - return Topology{ - ShardViewsMapping: shardViewsMapping, - ClusterView: *m.clusterView, - } -} - -func (m *TopologyManagerImpl) loadClusterView(ctx context.Context) error { - clusterViewResult, err := m.storage.GetClusterView(ctx, storage.GetClusterViewRequest{ - ClusterID: m.clusterID, - }) - if err != nil { - return errors.WithMessage(err, "storage get cluster view") - } - m.logger.Debug("load cluster view", zap.String("clusterViews", fmt.Sprintf("%+v", clusterViewResult))) - - m.shardNodesMapping = make(map[storage.ShardID][]storage.ShardNode, len(clusterViewResult.ClusterView.ShardNodes)) - m.nodeShardsMapping = make(map[string][]storage.ShardNode, len(clusterViewResult.ClusterView.ShardNodes)) - for _, shardNode := range clusterViewResult.ClusterView.ShardNodes { - m.shardNodesMapping[shardNode.ID] = append(m.shardNodesMapping[shardNode.ID], shardNode) - m.nodeShardsMapping[shardNode.NodeName] = append(m.nodeShardsMapping[shardNode.NodeName], shardNode) - } - m.clusterView = &clusterViewResult.ClusterView - - return nil -} - -func (m *TopologyManagerImpl) loadShardViews(ctx context.Context) error { - shardViewsResult, err := m.storage.ListShardViews(ctx, storage.ListShardViewsRequest{ - ClusterID: m.clusterID, - ShardIDs: []storage.ShardID{}, - }) - if err != nil { - return errors.WithMessage(err, "storage list shard views") - } - m.logger.Debug("load shard views", zap.Int32("clusterID", int32(m.clusterID)), zap.String("shardViews", fmt.Sprintf("%+v", shardViewsResult))) - - // Reset data in memory. - m.shardTablesMapping = make(map[storage.ShardID]*storage.ShardView, len(shardViewsResult.ShardViews)) - m.tableShardMapping = make(map[storage.TableID][]storage.ShardID, 0) - for _, shardView := range shardViewsResult.ShardViews { - view := shardView - m.shardTablesMapping[shardView.ShardID] = &view - for _, tableID := range shardView.TableIDs { - if _, exists := m.tableShardMapping[tableID]; !exists { - m.tableShardMapping[tableID] = []storage.ShardID{} - } - m.tableShardMapping[tableID] = append(m.tableShardMapping[tableID], shardView.ShardID) - } - } - - return nil -} - -func (m *TopologyManagerImpl) loadAssignTable(ctx context.Context, schemas []storage.Schema) error { - m.tableAssignMapping = make(map[storage.SchemaID]map[string]storage.ShardID, len(schemas)) - for _, schema := range schemas { - m.tableAssignMapping[schema.ID] = make(map[string]storage.ShardID, 0) - - listAssignTableResult, err := m.storage.ListTableAssignedShard(ctx, storage.ListAssignTableRequest{ClusterID: m.clusterID, SchemaID: schema.ID}) - if err != nil { - return errors.WithMessage(err, "storage list assign table") - } - for _, assignTable := range listAssignTableResult.TableAssigns { - m.tableAssignMapping[schema.ID][assignTable.TableName] = assignTable.ShardID - } - } - - return nil -} - -func (m *TopologyManagerImpl) loadNodes(ctx context.Context) error { - nodesResult, err := m.storage.ListNodes(ctx, storage.ListNodesRequest{ClusterID: m.clusterID}) - if err != nil { - return errors.WithMessage(err, "storage list nodes") - } - - // Reset data in memory. - m.nodes = make(map[string]storage.Node, len(nodesResult.Nodes)) - for _, node := range nodesResult.Nodes { - m.nodes[node.Name] = node - } - - return nil -} diff --git a/horaemeta/server/cluster/metadata/topology_manager_test.go b/horaemeta/server/cluster/metadata/topology_manager_test.go deleted file mode 100644 index be38195f58..0000000000 --- a/horaemeta/server/cluster/metadata/topology_manager_test.go +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -const ( - TestMinShardID = 0 - TestShardID = 0 - TestShardID1 = 1 - TestShardID2 = 2 - TestNodeName = "TestNodeName" - TestNodeName1 = "TestNodeName1" - TestTableID = 0 - TestSchemaID = 0 -) - -func TestTopologyManager(t *testing.T) { - ctx := context.Background() - re := require.New(t) - - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - clusterStorage := storage.NewStorageWithEtcdBackend(client, TestRootPath, storage.Options{ - MaxScanLimit: 100, MinScanLimit: 10, MaxOpsPerTxn: 32, - }) - shardIDAlloc := id.NewReusableAllocatorImpl([]uint64{}, TestMinShardID) - - topologyManager := metadata.NewTopologyManagerImpl(zap.NewNop(), clusterStorage, TestClusterID, shardIDAlloc) - - err := topologyManager.InitClusterView(ctx) - re.NoError(err) - err = topologyManager.CreateShardViews(ctx, []metadata.CreateShardView{ - { - ShardID: TestShardID, - Tables: nil, - }, - }) - re.NoError(err) - - testTableTopology(ctx, re, topologyManager) - testShardTopology(ctx, re, topologyManager) -} - -func testTableTopology(ctx context.Context, re *require.Assertions, manager metadata.TopologyManager) { - err := manager.AddTable(ctx, TestShardID, 0, []storage.Table{{ - ID: TestTableID, - Name: TestTableName, - SchemaID: TestSchemaID, - CreatedAt: 0, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }}) - re.NoError(err) - - shardTables := manager.GetTableIDs([]storage.ShardID{TestShardID}) - found := foundTable(TestTableID, shardTables, TestTableID) - re.Equal(true, found) - - err = manager.RemoveTable(ctx, TestShardID, 0, []storage.TableID{TestTableID}) - re.NoError(err) - - shardTables = manager.GetTableIDs([]storage.ShardID{TestTableID}) - found = foundTable(TestTableID, shardTables, TestTableID) - re.Equal(false, found) - - err = manager.AddTable(ctx, TestShardID, 0, []storage.Table{{ - ID: TestTableID, - Name: TestTableName, - SchemaID: TestSchemaID, - CreatedAt: 0, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }}) - re.NoError(err) - - shardTables = manager.GetTableIDs([]storage.ShardID{TestTableID}) - found = foundTable(TestTableID, shardTables, TestTableID) - re.Equal(true, found) - - err = manager.RemoveTable(ctx, TestTableID, 0, []storage.TableID{TestTableID}) - re.NoError(err) -} - -func foundTable(targetTableID storage.TableID, shardTables map[storage.ShardID]metadata.ShardTableIDs, shardID storage.ShardID) bool { - tableIDs := shardTables[shardID].TableIDs - for _, tableID := range tableIDs { - if tableID == targetTableID { - return true - } - } - return false -} - -func testShardTopology(ctx context.Context, re *require.Assertions, manager metadata.TopologyManager) { - var shardNodes []storage.ShardNode - shardNodes = append(shardNodes, storage.ShardNode{ - ID: TestShardID1, - ShardRole: 0, - NodeName: TestNodeName, - }) - shardNodes = append(shardNodes, storage.ShardNode{ - ID: TestShardID2, - ShardRole: 0, - NodeName: TestNodeName1, - }) - - err := manager.UpdateClusterView(ctx, storage.ClusterStateStable, shardNodes) - re.NoError(err) - re.Equal(2, len(manager.GetTopology().ClusterView.ShardNodes)) - - shardNodeMapping := map[string][]storage.ShardNode{} - shardNodeMapping[TestNodeName] = []storage.ShardNode{} - err = manager.UpdateClusterViewByNode(ctx, shardNodeMapping) - re.NoError(err) - re.Equal(1, len(manager.GetTopology().ClusterView.ShardNodes)) - - err = manager.DropShardNodes(ctx, []storage.ShardNode{ - { - ID: TestShardID2, - ShardRole: 0, - NodeName: TestNodeName1, - }, - }) - re.NoError(err) - re.Equal(0, len(manager.GetTopology().ClusterView.ShardNodes)) -} diff --git a/horaemeta/server/cluster/metadata/types.go b/horaemeta/server/cluster/metadata/types.go deleted file mode 100644 index f45117b268..0000000000 --- a/horaemeta/server/cluster/metadata/types.go +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package metadata - -import ( - "time" - - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/pkg/errors" -) - -const ( - expiredThreshold = time.Second * 10 - MinShardID = 0 -) - -type Snapshot struct { - Topology Topology - RegisteredNodes []RegisteredNode -} - -type TableInfo struct { - ID storage.TableID - Name string - SchemaID storage.SchemaID - SchemaName string - PartitionInfo storage.PartitionInfo - CreatedAt uint64 -} - -type ShardTables struct { - Shard ShardInfo - Tables []TableInfo -} - -type ShardInfo struct { - ID storage.ShardID - Role storage.ShardRole - // ShardViewVersion - Version uint64 - // The open state of the shard, which is used to determine whether the shard needs to be opened again. - Status storage.ShardStatus -} - -type ShardNodeWithVersion struct { - ShardInfo ShardInfo - ShardNode storage.ShardNode -} - -type CreateClusterOpts struct { - NodeCount uint32 - ShardTotal uint32 - EnableSchedule bool - TopologyType storage.TopologyType - ProcedureExecutingBatchSize uint32 -} - -type UpdateClusterOpts struct { - TopologyType storage.TopologyType - ProcedureExecutingBatchSize uint32 -} - -type CreateTableMetadataRequest struct { - SchemaName string - TableName string - PartitionInfo storage.PartitionInfo -} - -type CreateTableMetadataResult struct { - Table storage.Table -} - -type CreateTableRequest struct { - ShardID storage.ShardID - LatestVersion uint64 - SchemaName string - TableName string - PartitionInfo storage.PartitionInfo -} - -type CreateTableResult struct { - Table storage.Table - ShardVersionUpdate ShardVersionUpdate -} - -type DropTableRequest struct { - SchemaName string - TableName string - ShardID storage.ShardID - LatestVersion uint64 -} - -type DropTableMetadataResult struct { - Table storage.Table -} - -type OpenTableRequest struct { - SchemaName string - TableName string - ShardID storage.ShardID - NodeName string -} - -type CloseTableRequest struct { - SchemaName string - TableName string - ShardID storage.ShardID - NodeName string -} - -type MigrateTableRequest struct { - SchemaName string - TableNames []string - OldShardID storage.ShardID - // TODO: refactor migrate table request, simplify params. - latestOldShardVersion uint64 - NewShardID storage.ShardID - latestNewShardVersion uint64 -} - -type ShardVersionUpdate struct { - ShardID storage.ShardID - LatestVersion uint64 -} - -type RouteEntry struct { - Table TableInfo - NodeShards []ShardNodeWithVersion -} - -type RouteTablesResult struct { - ClusterViewVersion uint64 - RouteEntries map[string]RouteEntry -} - -type GetNodeShardsResult struct { - ClusterTopologyVersion uint64 - NodeShards []ShardNodeWithVersion -} - -type RegisteredNode struct { - Node storage.Node - ShardInfos []ShardInfo -} - -func NewRegisteredNode(meta storage.Node, shardInfos []ShardInfo) RegisteredNode { - return RegisteredNode{ - meta, - shardInfos, - } -} - -func (n RegisteredNode) IsExpired(now time.Time) bool { - expiredTime := time.UnixMilli(int64(n.Node.LastTouchTime)).Add(expiredThreshold) - - return now.After(expiredTime) -} - -func ConvertShardsInfoToPB(shard ShardInfo) *metaservicepb.ShardInfo { - status := storage.ConvertShardStatusToPB(shard.Status) - return &metaservicepb.ShardInfo{ - Id: uint32(shard.ID), - Role: storage.ConvertShardRoleToPB(shard.Role), - Version: shard.Version, - Status: &status, - } -} - -func ConvertShardsInfoPB(shard *metaservicepb.ShardInfo) ShardInfo { - return ShardInfo{ - ID: storage.ShardID(shard.Id), - Role: storage.ConvertShardRolePB(shard.Role), - Version: shard.Version, - Status: storage.ConvertShardStatusPB(shard.Status), - } -} - -func ConvertTableInfoToPB(table TableInfo) *metaservicepb.TableInfo { - return &metaservicepb.TableInfo{ - Id: uint64(table.ID), - Name: table.Name, - SchemaId: uint32(table.SchemaID), - SchemaName: table.SchemaName, - PartitionInfo: table.PartitionInfo.Info, - } -} - -func ParseTopologyType(rawString string) (storage.TopologyType, error) { - switch rawString { - case storage.TopologyTypeStatic: - return storage.TopologyTypeStatic, nil - case storage.TopologyTypeDynamic: - return storage.TopologyTypeDynamic, nil - } - - return "", errors.WithMessagef(ErrParseTopologyType, "could not be parsed to topologyType, rawString:%s", rawString) -} diff --git a/horaemeta/server/config/config.go b/horaemeta/server/config/config.go deleted file mode 100644 index 81ebdb9fe5..0000000000 --- a/horaemeta/server/config/config.go +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package config - -import ( - "flag" - "fmt" - "math" - "os" - "strings" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/caarlos0/env/v6" - "github.com/pelletier/go-toml/v2" - "github.com/pkg/errors" - "go.etcd.io/etcd/server/v3/embed" - "go.uber.org/zap" -) - -const ( - defaultEnableEmbedEtcd bool = true - defaultEtcdCaCertPath = "" - defaultEtcdKeyPath = "" - defaultEtcdCertPath = "" - - defaultEnableLimiter bool = true - defaultInitialLimiterCapacity int = 100 * 1000 - defaultInitialLimiterRate int = 10 * 1000 - defaultEtcdStartTimeoutMs int64 = 60 * 1000 - defaultCallTimeoutMs = 5 * 1000 - defaultEtcdMaxTxnOps = 128 - defaultEtcdLeaseTTLSec = 10 - - defaultGrpcHandleTimeoutMs int = 60 * 1000 - // GrpcServiceMaxSendMsgSize controls the max size of the sent message(200MB by default). - defaultGrpcServiceMaxSendMsgSize int = 200 * 1024 * 1024 - // GrpcServiceMaxRecvMsgSize controls the max size of the received message(100MB by default). - defaultGrpcServiceMaxRecvMsgSize int = 100 * 1024 * 1024 - // GrpcServiceKeepAlivePingMinIntervalSec controls the min interval for one keepalive ping. - defaultGrpcServiceKeepAlivePingMinIntervalSec int = 20 - - defaultNodeNamePrefix = "horaemeta" - defaultEndpoint = "127.0.0.1" - defaultRootPath = "/horaedb" - defaultClientUrls = "http://0.0.0.0:2379" - defaultPeerUrls = "http://0.0.0.0:2380" - defaultInitialClusterState = embed.ClusterStateFlagNew - defaultInitialClusterToken = "horaemeta-cluster" //#nosec G101 - defaultCompactionMode = "periodic" - defaultAutoCompactionRetention = "1h" - - defaultTickIntervalMs int64 = 500 - defaultElectionTimeoutMs = 3000 - defaultQuotaBackendBytes = 8 * 1024 * 1024 * 1024 // 8GB - - defaultMaxRequestBytes uint = 2 * 1024 * 1024 // 2MB - - defaultMaxScanLimit int = 100 - defaultMinScanLimit int = 20 - defaultMaxOpsPerTxn int = 32 - defaultIDAllocatorStep uint = 20 - - DefaultClusterName = "defaultCluster" - defaultClusterNodeCount = 2 - defaultClusterShardTotal = 8 - enableSchedule = true - // topologyType is used to determine the scheduling cluster strategy of HoraeMeta. It should be determined according to the storage method of HoraeDB. The default is static to support local storage. - defaultTopologyType = "static" - defaultProcedureExecutingBatchSize = math.MaxUint32 - - defaultHTTPPort = 8080 - defaultGrpcPort = 2379 - - defaultDataDir = "/tmp/horaemeta" - - defaultEtcdDataDir = "/etcd" - defaultWalDir = "/wal" - - defaultEtcdLogFile = "/etcd.log" -) - -type LimiterConfig struct { - // Enable is used to control the switch of the limiter. - Enable bool `toml:"enable" env:"FLOW_LIMITER_ENABLE"` - // Limit is the updated rate of tokens. - Limit int `toml:"limit" env:"FLOW_LIMITER_LIMIT"` - // Burst is the maximum number of tokens. - Burst int `toml:"burst" env:"FLOW_LIMITER_BURST"` -} - -// Config is server start config, it has three input modes: -// 1. toml config file -// 2. env variables -// Their loading has priority, and low priority configurations will be overwritten by high priority configurations. -// The priority from high to low is: env variables > toml config file. -type Config struct { - Log log.Config `toml:"log" env:"LOG"` - EtcdLog log.Config `toml:"etcd-log" env:"ETCD_LOG"` - FlowLimiter LimiterConfig `toml:"flow-limiter" env:"FLOW_LIMITER"` - - EnableEmbedEtcd bool `toml:"enable-embed-etcd" env:"ENABLE_EMBED_ETCD"` - EtcdCaCertPath string `toml:"etcd-ca-cert-path" env:"ETCD_CA_CERT_PATH"` - EtcdKeyPath string `toml:"etcd-key-path" env:"ETCD_KEY_PATH"` - EtcdCertPath string `toml:"etcd-cert-path" env:"ETCD_CERT_PATH"` - - EtcdStartTimeoutMs int64 `toml:"etcd-start-timeout-ms" env:"ETCD_START_TIMEOUT_MS"` - EtcdCallTimeoutMs int64 `toml:"etcd-call-timeout-ms" env:"ETCD_CALL_TIMEOUT_MS"` - EtcdMaxTxnOps int64 `toml:"etcd-max-txn-ops" env:"ETCD_MAX_TXN_OPS"` - - GrpcHandleTimeoutMs int `toml:"grpc-handle-timeout-ms" env:"GRPC_HANDLER_TIMEOUT_MS"` - GrpcServiceMaxSendMsgSize int `toml:"grpc-service-max-send-msg-size" env:"GRPC_SERVICE_MAX_SEND_MSG_SIZE"` - GrpcServiceMaxRecvMsgSize int `toml:"grpc-service-max-recv-msg-size" env:"GRPC_SERVICE_MAX_RECV_MSG_SIZE"` - GrpcServiceKeepAlivePingMinIntervalSec int `toml:"grpc-service-keep-alive-ping-min-interval-sec" env:"GRPC_SERVICE_KEEP_ALIVE_PING_MIN_INTERVAL_SEC"` - - LeaseTTLSec int64 `toml:"lease-sec" env:"LEASE_SEC"` - - NodeName string `toml:"node-name" env:"NODE_NAME"` - Addr string `toml:"addr" env:"ADDR"` - DataDir string `toml:"data-dir" env:"DATA_DIR"` - StorageRootPath string `toml:"storage-root-path" env:"STORAGE_ROOT_PATH"` - InitialCluster string `toml:"initial-cluster" env:"INITIAL_CLUSTER"` - InitialClusterState string `toml:"initial-cluster-state" env:"INITIAL_CLUSTER_STATE"` - InitialClusterToken string `toml:"initial-cluster-token" env:"INITIAL_CLUSTER_TOKEN"` - // TickInterval is the interval for etcd Raft tick. - TickIntervalMs int64 `toml:"tick-interval-ms" env:"TICK_INTERVAL_MS"` - ElectionTimeoutMs int64 `toml:"election-timeout-ms" env:"ELECTION_TIMEOUT_MS"` - // QuotaBackendBytes Raise alarms when backend size exceeds the given quota. 0 means use the default quota. - // the default size is 2GB, the maximum is 8GB. - QuotaBackendBytes int64 `toml:"quota-backend-bytes" env:"QUOTA_BACKEND_BYTES"` - // AutoCompactionMode is either 'periodic' or 'revision'. The default value is 'periodic'. - AutoCompactionMode string `toml:"auto-compaction-mode" env:"AUTO-COMPACTION-MODE"` - // AutoCompactionRetention is either duration string with time unit - // (e.g. '5m' for 5-minute), or revision unit (e.g. '5000'). - // If no time unit is provided and compaction mode is 'periodic', - // the unit defaults to hour. For example, '5' translates into 5-hour. - // The default retention is 1 hour. - // Before etcd v3.3.x, the type of retention is int. We add 'v2' suffix to make it backward compatible. - AutoCompactionRetention string `toml:"auto-compaction-retention" env:"AUTO_COMPACTION_RETENTION"` - MaxRequestBytes uint `toml:"max-request-bytes" env:"MAX_REQUEST_BYTES"` - MaxScanLimit int `toml:"max-scan-limit" env:"MAX_SCAN_LIMIT"` - MinScanLimit int `toml:"min-scan-limit" env:"MIN_SCAN_LIMIT"` - MaxOpsPerTxn int `toml:"max-ops-per-txn" env:"MAX_OPS_PER_TXN"` - IDAllocatorStep uint `toml:"id-allocator-step" env:"ID_ALLOCATOR_STEP"` - - // Following fields are the settings for the default cluster. - DefaultClusterName string `toml:"default-cluster-name" env:"DEFAULT_CLUSTER_NAME"` - DefaultClusterNodeCount int `toml:"default-cluster-node-count" env:"DEFAULT_CLUSTER_NODE_COUNT"` - DefaultClusterShardTotal int `toml:"default-cluster-shard-total" env:"DEFAULT_CLUSTER_SHARD_TOTAL"` - - // When the EnableSchedule is turned on, the failover scheduling will be turned on, which is used for HoraeDB cluster publishing and using local storage. - EnableSchedule bool `toml:"enable-schedule" env:"ENABLE_SCHEDULE"` - // TopologyType indicates the schedule type used by the HoraeDB cluster, it will determine the strategy of HoraeMeta scheduling cluster. - TopologyType string `toml:"topology-type" env:"TOPOLOGY_TYPE"` - // ProcedureExecutingBatchSize determines the maximum number of shards in a single batch when opening shards concurrently. - ProcedureExecutingBatchSize uint32 `toml:"procedure-executing-batch-size" env:"PROCEDURE_EXECUTING_BATCH_SIZE"` - - ClientUrls string `toml:"client-urls" env:"CLIENT_URLS"` - PeerUrls string `toml:"peer-urls" env:"PEER_URLS"` - AdvertiseClientUrls string `toml:"advertise-client-urls" env:"ADVERTISE_CLIENT_URLS"` - AdvertisePeerUrls string `toml:"advertise-peer-urls" env:"ADVERTISE_PEER_URLS"` - - HTTPPort int `toml:"http-port" env:"HTTP_PORT"` - GrpcPort int `toml:"grpc-port" env:"GRPC_PORT"` -} - -func (c *Config) GrpcHandleTimeout() time.Duration { - return time.Duration(c.GrpcHandleTimeoutMs) * time.Millisecond -} - -func (c *Config) EtcdStartTimeout() time.Duration { - return time.Duration(c.EtcdStartTimeoutMs) * time.Millisecond -} - -func (c *Config) EtcdCallTimeout() time.Duration { - return time.Duration(c.EtcdCallTimeoutMs) * time.Millisecond -} - -// ValidateAndAdjust validates the config fields and adjusts some fields which should be adjusted. -// Return error if any field is invalid. -func (c *Config) ValidateAndAdjust() error { - return nil -} - -func (c *Config) GenEtcdConfig() (*embed.Config, error) { - cfg := embed.NewConfig() - - cfg.Name = c.NodeName - cfg.Dir = strings.Join([]string{c.DataDir, defaultEtcdDataDir}, "") - cfg.WalDir = strings.Join([]string{c.DataDir, defaultWalDir}, "") - cfg.InitialCluster = c.InitialCluster - cfg.ClusterState = c.InitialClusterState - cfg.InitialClusterToken = c.InitialClusterToken - cfg.EnablePprof = true - cfg.TickMs = uint(c.TickIntervalMs) - cfg.ElectionMs = uint(c.ElectionTimeoutMs) - cfg.AutoCompactionMode = c.AutoCompactionMode - cfg.AutoCompactionRetention = c.AutoCompactionRetention - cfg.QuotaBackendBytes = c.QuotaBackendBytes - cfg.MaxRequestBytes = c.MaxRequestBytes - cfg.MaxTxnOps = uint(c.EtcdMaxTxnOps) - - var err error - cfg.ListenPeerUrls, err = parseUrls(c.PeerUrls) - if err != nil { - return nil, err - } - - cfg.AdvertisePeerUrls, err = parseUrls(c.AdvertisePeerUrls) - if err != nil { - return nil, err - } - - cfg.ListenClientUrls, err = parseUrls(c.ClientUrls) - if err != nil { - return nil, err - } - - cfg.AdvertiseClientUrls, err = parseUrls(c.AdvertiseClientUrls) - if err != nil { - return nil, err - } - - cfg.Logger = "zap" - cfg.LogOutputs = []string{strings.Join([]string{c.DataDir, defaultEtcdLogFile}, "")} - cfg.LogLevel = c.EtcdLog.Level - - return cfg, nil -} - -// Parser builds the config from the flags. -type Parser struct { - flagSet *flag.FlagSet - cfg *Config - configFilePath string - version *bool -} - -func (p *Parser) Parse(arguments []string) (*Config, error) { - if err := p.flagSet.Parse(arguments); err != nil { - if errors.Is(err, flag.ErrHelp) { - return nil, ErrHelpRequested.WithCause(err) - } - return nil, ErrInvalidCommandArgs.WithCausef("fail to parse flag arguments:%v, err:%v", arguments, err) - } - return p.cfg, nil -} - -func (p *Parser) NeedPrintVersion() bool { - return *p.version -} - -func makeDefaultNodeName() (string, error) { - host, err := os.Hostname() - if err != nil { - return "", ErrRetrieveHostname.WithCause(err) - } - - return fmt.Sprintf("%s-%s", defaultNodeNamePrefix, host), nil -} - -func makeDefaultInitialCluster(nodeName string) string { - return fmt.Sprintf("%s=%s", nodeName, defaultPeerUrls) -} - -func MakeConfigParser() (*Parser, error) { - defaultNodeName, err := makeDefaultNodeName() - if err != nil { - return nil, err - } - defaultInitialCluster := makeDefaultInitialCluster(defaultNodeName) - - fs, cfg := flag.NewFlagSet("meta", flag.ContinueOnError), &Config{ - Log: log.Config{ - Level: log.DefaultLogLevel, - File: log.DefaultLogFile, - }, - EtcdLog: log.Config{ - Level: log.DefaultLogLevel, - File: log.DefaultLogFile, - }, - FlowLimiter: LimiterConfig{ - Enable: defaultEnableLimiter, - Limit: defaultInitialLimiterRate, - Burst: defaultInitialLimiterCapacity, - }, - - EnableEmbedEtcd: defaultEnableEmbedEtcd, - EtcdCaCertPath: defaultEtcdCaCertPath, - EtcdCertPath: defaultEtcdCertPath, - EtcdKeyPath: defaultEtcdKeyPath, - - EtcdStartTimeoutMs: defaultEtcdStartTimeoutMs, - EtcdCallTimeoutMs: defaultCallTimeoutMs, - EtcdMaxTxnOps: defaultEtcdMaxTxnOps, - - GrpcHandleTimeoutMs: defaultGrpcHandleTimeoutMs, - GrpcServiceMaxSendMsgSize: defaultGrpcServiceMaxSendMsgSize, - GrpcServiceMaxRecvMsgSize: defaultGrpcServiceMaxRecvMsgSize, - GrpcServiceKeepAlivePingMinIntervalSec: defaultGrpcServiceKeepAlivePingMinIntervalSec, - - LeaseTTLSec: defaultEtcdLeaseTTLSec, - - NodeName: defaultNodeName, - Addr: defaultEndpoint, - DataDir: defaultDataDir, - StorageRootPath: defaultRootPath, - - InitialCluster: defaultInitialCluster, - InitialClusterState: defaultInitialClusterState, - InitialClusterToken: defaultInitialClusterToken, - - ClientUrls: defaultClientUrls, - AdvertiseClientUrls: defaultClientUrls, - PeerUrls: defaultPeerUrls, - AdvertisePeerUrls: defaultPeerUrls, - - TickIntervalMs: defaultTickIntervalMs, - ElectionTimeoutMs: defaultElectionTimeoutMs, - - QuotaBackendBytes: defaultQuotaBackendBytes, - AutoCompactionMode: defaultCompactionMode, - AutoCompactionRetention: defaultAutoCompactionRetention, - MaxRequestBytes: defaultMaxRequestBytes, - MaxScanLimit: defaultMaxScanLimit, - MinScanLimit: defaultMinScanLimit, - MaxOpsPerTxn: defaultMaxOpsPerTxn, - IDAllocatorStep: defaultIDAllocatorStep, - - DefaultClusterName: DefaultClusterName, - DefaultClusterNodeCount: defaultClusterNodeCount, - DefaultClusterShardTotal: defaultClusterShardTotal, - EnableSchedule: enableSchedule, - TopologyType: defaultTopologyType, - ProcedureExecutingBatchSize: defaultProcedureExecutingBatchSize, - - HTTPPort: defaultHTTPPort, - GrpcPort: defaultGrpcPort, - } - - version := fs.Bool("version", false, "print version information") - - builder := &Parser{ - flagSet: fs, - cfg: cfg, - version: version, - configFilePath: "", - } - - fs.StringVar(&builder.configFilePath, "config", "", "config file path") - - return builder, nil -} - -// ParseConfigFromToml read configuration from the toml file, if the config item already exists, it will be overwritten. -func (p *Parser) ParseConfigFromToml() error { - if len(p.configFilePath) == 0 { - log.Info("no config file specified, skip parse config") - return nil - } - log.Info("get config from toml", zap.String("configFile", p.configFilePath)) - - file, err := os.ReadFile(p.configFilePath) - if err != nil { - log.Error("err", zap.Error(err)) - return errors.WithMessage(err, fmt.Sprintf("read config file, configFile:%s", p.configFilePath)) - } - log.Info("toml config value", zap.String("config", string(file))) - - err = toml.Unmarshal(file, p.cfg) - if err != nil { - log.Error("err", zap.Error(err)) - return errors.WithMessagef(err, "unmarshal toml config, configFile:%s", p.configFilePath) - } - - return nil -} - -func (p *Parser) ParseConfigFromEnv() error { - err := env.Parse(p.cfg) - if err != nil { - return errors.WithMessagef(err, "parse config from env variables") - } - return nil -} diff --git a/horaemeta/server/config/error.go b/horaemeta/server/config/error.go deleted file mode 100644 index f266fa551c..0000000000 --- a/horaemeta/server/config/error.go +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package config - -import ( - "github.com/apache/incubator-horaedb-meta/pkg/coderr" -) - -var ( - ErrHelpRequested = coderr.NewCodeError(coderr.PrintHelpUsage, "help requested") - ErrInvalidPeerURL = coderr.NewCodeError(coderr.InvalidParams, "invalid peers url") - ErrInvalidCommandArgs = coderr.NewCodeError(coderr.InvalidParams, "invalid command arguments") - ErrRetrieveHostname = coderr.NewCodeError(coderr.Internal, "retrieve local hostname") -) diff --git a/horaemeta/server/config/util.go b/horaemeta/server/config/util.go deleted file mode 100644 index 68b1b37e6e..0000000000 --- a/horaemeta/server/config/util.go +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package config - -import ( - "net/url" - "strings" -) - -// parseUrls parse a string into multiple urls. -func parseUrls(s string) ([]url.URL, error) { - items := strings.Split(s, ",") - urls := make([]url.URL, 0, len(items)) - for _, item := range items { - u, err := url.Parse(item) - if err != nil { - return nil, ErrInvalidPeerURL.WithCausef("original url:%s, parsed item:%v, parse err:%v", s, item, err) - } - - urls = append(urls, *u) - } - - return urls, nil -} diff --git a/horaemeta/server/coordinator/error.go b/horaemeta/server/coordinator/error.go deleted file mode 100644 index 4f22da9b5d..0000000000 --- a/horaemeta/server/coordinator/error.go +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrNodeNumberNotEnough = coderr.NewCodeError(coderr.Internal, "node number not enough") - ErrPickNode = coderr.NewCodeError(coderr.Internal, "no node is picked") -) diff --git a/horaemeta/server/coordinator/eventdispatch/dispatch.go b/horaemeta/server/coordinator/eventdispatch/dispatch.go deleted file mode 100644 index f9065409f2..0000000000 --- a/horaemeta/server/coordinator/eventdispatch/dispatch.go +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package eventdispatch - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" -) - -type Dispatch interface { - OpenShard(context context.Context, address string, request OpenShardRequest) error - CloseShard(context context.Context, address string, request CloseShardRequest) error - CreateTableOnShard(context context.Context, address string, request CreateTableOnShardRequest) (uint64, error) - DropTableOnShard(context context.Context, address string, request DropTableOnShardRequest) (uint64, error) - OpenTableOnShard(ctx context.Context, address string, request OpenTableOnShardRequest) error - CloseTableOnShard(context context.Context, address string, request CloseTableOnShardRequest) error -} - -type OpenShardRequest struct { - Shard metadata.ShardInfo -} - -type CloseShardRequest struct { - ShardID uint32 -} - -type UpdateShardInfo struct { - CurrShardInfo metadata.ShardInfo -} - -type CreateTableOnShardRequest struct { - UpdateShardInfo UpdateShardInfo - TableInfo metadata.TableInfo - EncodedSchema []byte - Engine string - CreateIfNotExist bool - Options map[string]string -} - -type DropTableOnShardRequest struct { - UpdateShardInfo UpdateShardInfo - TableInfo metadata.TableInfo -} - -type OpenTableOnShardRequest struct { - UpdateShardInfo UpdateShardInfo - TableInfo metadata.TableInfo -} - -type CloseTableOnShardRequest struct { - UpdateShardInfo UpdateShardInfo - TableInfo metadata.TableInfo -} diff --git a/horaemeta/server/coordinator/eventdispatch/dispatch_impl.go b/horaemeta/server/coordinator/eventdispatch/dispatch_impl.go deleted file mode 100644 index c9dc07f9a2..0000000000 --- a/horaemeta/server/coordinator/eventdispatch/dispatch_impl.go +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package eventdispatch - -import ( - "context" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/service" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaeventpb" - "github.com/pkg/errors" - "google.golang.org/grpc" -) - -var ErrDispatch = coderr.NewCodeError(coderr.Internal, "event dispatch failed") - -type DispatchImpl struct { - conns sync.Map -} - -func NewDispatchImpl() *DispatchImpl { - return &DispatchImpl{ - conns: sync.Map{}, - } -} - -func (d *DispatchImpl) OpenShard(ctx context.Context, addr string, request OpenShardRequest) error { - client, err := d.getMetaEventClient(ctx, addr) - if err != nil { - return err - } - resp, err := client.OpenShard(ctx, &metaeventpb.OpenShardRequest{ - Shard: metadata.ConvertShardsInfoToPB(request.Shard), - }) - if err != nil { - return errors.WithMessagef(err, "open shard, addr:%s, request:%v", addr, request) - } - if resp.GetHeader().Code != 0 { - return ErrDispatch.WithCausef("open shard, addr:%s, request:%v, err:%s", addr, request, resp.GetHeader().GetError()) - } - return nil -} - -func (d *DispatchImpl) CloseShard(ctx context.Context, addr string, request CloseShardRequest) error { - client, err := d.getMetaEventClient(ctx, addr) - if err != nil { - return err - } - resp, err := client.CloseShard(ctx, &metaeventpb.CloseShardRequest{ - ShardId: request.ShardID, - }) - if err != nil { - return errors.WithMessagef(err, "close shard, addr:%s, request:%v", addr, request) - } - if resp.GetHeader().Code != 0 { - return ErrDispatch.WithCausef("close shard, addr:%s, request:%v, err:%s", addr, request, resp.GetHeader().GetError()) - } - return nil -} - -func (d *DispatchImpl) CreateTableOnShard(ctx context.Context, addr string, request CreateTableOnShardRequest) (uint64, error) { - client, err := d.getMetaEventClient(ctx, addr) - if err != nil { - return 0, err - } - resp, err := client.CreateTableOnShard(ctx, convertCreateTableOnShardRequestToPB(request)) - if err != nil { - return 0, errors.WithMessagef(err, "create table on shard, addr:%s, request:%v", addr, request) - } - if resp.GetHeader().Code != 0 { - return 0, ErrDispatch.WithCausef("create table on shard, addr:%s, request:%v, err:%s", addr, request, resp.GetHeader().GetError()) - } - return resp.GetLatestShardVersion(), nil -} - -func (d *DispatchImpl) DropTableOnShard(ctx context.Context, addr string, request DropTableOnShardRequest) (uint64, error) { - client, err := d.getMetaEventClient(ctx, addr) - if err != nil { - return 0, err - } - resp, err := client.DropTableOnShard(ctx, convertDropTableOnShardRequestToPB(request)) - if err != nil { - return 0, errors.WithMessagef(err, "drop table on shard, addr:%s, request:%v", addr, request) - } - if resp.GetHeader().Code != 0 { - return 0, ErrDispatch.WithCausef("drop table on shard, addr:%s, request:%v, err:%s", addr, request, resp.GetHeader().GetError()) - } - return resp.GetLatestShardVersion(), nil -} - -func (d *DispatchImpl) OpenTableOnShard(ctx context.Context, addr string, request OpenTableOnShardRequest) error { - client, err := d.getMetaEventClient(ctx, addr) - if err != nil { - return err - } - - resp, err := client.OpenTableOnShard(ctx, convertOpenTableOnShardRequestToPB(request)) - if err != nil { - return errors.WithMessagef(err, "open table on shard, addr:%s, request:%v", addr, request) - } - if resp.GetHeader().Code != 0 { - return ErrDispatch.WithCausef("open table on shard, addr:%s, request:%v, err:%s", addr, request, resp.GetHeader().GetError()) - } - return nil -} - -func (d *DispatchImpl) CloseTableOnShard(ctx context.Context, addr string, request CloseTableOnShardRequest) error { - client, err := d.getMetaEventClient(ctx, addr) - if err != nil { - return err - } - - resp, err := client.CloseTableOnShard(ctx, convertCloseTableOnShardRequestToPB(request)) - if err != nil { - return errors.WithMessagef(err, "close table on shard, addr:%s, request:%v", addr, request) - } - if resp.GetHeader().Code != 0 { - return ErrDispatch.WithCausef("close table on shard, addr:%s, request:%v, err:%s", addr, request, resp.GetHeader().GetError()) - } - return nil -} - -func (d *DispatchImpl) getGrpcClient(ctx context.Context, addr string) (*grpc.ClientConn, error) { - client, ok := d.conns.Load(addr) - if !ok { - cc, err := service.GetClientConn(ctx, addr) - if err != nil { - return nil, err - } - client = cc - d.conns.Store(addr, cc) - } - return client.(*grpc.ClientConn), nil -} - -func (d *DispatchImpl) getMetaEventClient(ctx context.Context, addr string) (metaeventpb.MetaEventServiceClient, error) { - client, err := d.getGrpcClient(ctx, addr) - if err != nil { - return nil, errors.WithMessagef(err, "get meta event client, addr:%s", addr) - } - return metaeventpb.NewMetaEventServiceClient(client), nil -} - -func convertCreateTableOnShardRequestToPB(request CreateTableOnShardRequest) *metaeventpb.CreateTableOnShardRequest { - return &metaeventpb.CreateTableOnShardRequest{ - UpdateShardInfo: convertUpdateShardInfoToPB(request.UpdateShardInfo), - TableInfo: metadata.ConvertTableInfoToPB(request.TableInfo), - EncodedSchema: request.EncodedSchema, - Engine: request.Engine, - CreateIfNotExist: request.CreateIfNotExist, - Options: request.Options, - } -} - -func convertDropTableOnShardRequestToPB(request DropTableOnShardRequest) *metaeventpb.DropTableOnShardRequest { - return &metaeventpb.DropTableOnShardRequest{ - UpdateShardInfo: convertUpdateShardInfoToPB(request.UpdateShardInfo), - TableInfo: metadata.ConvertTableInfoToPB(request.TableInfo), - } -} - -func convertCloseTableOnShardRequestToPB(request CloseTableOnShardRequest) *metaeventpb.CloseTableOnShardRequest { - return &metaeventpb.CloseTableOnShardRequest{ - UpdateShardInfo: convertUpdateShardInfoToPB(request.UpdateShardInfo), - TableInfo: metadata.ConvertTableInfoToPB(request.TableInfo), - } -} - -func convertOpenTableOnShardRequestToPB(request OpenTableOnShardRequest) *metaeventpb.OpenTableOnShardRequest { - return &metaeventpb.OpenTableOnShardRequest{ - UpdateShardInfo: convertUpdateShardInfoToPB(request.UpdateShardInfo), - TableInfo: metadata.ConvertTableInfoToPB(request.TableInfo), - } -} - -func convertUpdateShardInfoToPB(updateShardInfo UpdateShardInfo) *metaeventpb.UpdateShardInfo { - return &metaeventpb.UpdateShardInfo{ - CurrShardInfo: metadata.ConvertShardsInfoToPB(updateShardInfo.CurrShardInfo), - } -} diff --git a/horaemeta/server/coordinator/factory.go b/horaemeta/server/coordinator/factory.go deleted file mode 100644 index 9e12c7f109..0000000000 --- a/horaemeta/server/coordinator/factory.go +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/createpartitiontable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/createtable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/droppartitiontable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/droptable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/operation/split" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/operation/transferleader" - "github.com/apache/incubator-horaedb-meta/server/id" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -type Factory struct { - logger *zap.Logger - idAllocator id.Allocator - dispatch eventdispatch.Dispatch - storage procedure.Storage - shardPicker *PersistShardPicker -} - -type CreateTableRequest struct { - ClusterMetadata *metadata.ClusterMetadata - SourceReq *metaservicepb.CreateTableRequest - - OnSucceeded func(metadata.CreateTableResult) error - OnFailed func(error) error -} - -func (request *CreateTableRequest) isPartitionTable() bool { - return request.SourceReq.PartitionTableInfo != nil -} - -type DropTableRequest struct { - ClusterMetadata *metadata.ClusterMetadata - ClusterSnapshot metadata.Snapshot - SourceReq *metaservicepb.DropTableRequest - - OnSucceeded func(metadata.TableInfo) error - OnFailed func(error) error -} - -func (d DropTableRequest) IsPartitionTable() bool { - return d.SourceReq.PartitionTableInfo != nil -} - -type TransferLeaderRequest struct { - Snapshot metadata.Snapshot - ShardID storage.ShardID - OldLeaderNodeName string - NewLeaderNodeName string -} - -type SplitRequest struct { - ClusterMetadata *metadata.ClusterMetadata - SchemaName string - TableNames []string - Snapshot metadata.Snapshot - ShardID storage.ShardID - NewShardID storage.ShardID - TargetNodeName string -} - -type CreatePartitionTableRequest struct { - ClusterMetadata *metadata.ClusterMetadata - SourceReq *metaservicepb.CreateTableRequest - - OnSucceeded func(metadata.CreateTableResult) error - OnFailed func(error) error -} - -type BatchRequest struct { - Batch []procedure.Procedure - BatchType procedure.Kind -} - -func NewFactory(logger *zap.Logger, allocator id.Allocator, dispatch eventdispatch.Dispatch, storage procedure.Storage, clusterMetadata *metadata.ClusterMetadata) *Factory { - return &Factory{ - idAllocator: allocator, - dispatch: dispatch, - storage: storage, - logger: logger, - shardPicker: NewPersistShardPicker(clusterMetadata, NewLeastTableShardPicker()), - } -} - -func (f *Factory) MakeCreateTableProcedure(ctx context.Context, request CreateTableRequest) (procedure.Procedure, error) { - isPartitionTable := request.isPartitionTable() - - if isPartitionTable { - req := CreatePartitionTableRequest(request) - return f.makeCreatePartitionTableProcedure(ctx, req) - } - - return f.makeCreateTableProcedure(ctx, request) -} - -func (f *Factory) makeCreateTableProcedure(ctx context.Context, request CreateTableRequest) (procedure.Procedure, error) { - id, err := f.allocProcedureID(ctx) - if err != nil { - return nil, err - } - snapshot := request.ClusterMetadata.GetClusterSnapshot() - - var targetShardID storage.ShardID - shardID, exists, err := request.ClusterMetadata.GetTableAssignedShard(ctx, request.SourceReq.SchemaName, request.SourceReq.Name) - if err != nil { - return nil, err - } - if exists { - targetShardID = shardID - } else { - shards, err := f.shardPicker.PickShards(ctx, snapshot, request.SourceReq.GetSchemaName(), []string{request.SourceReq.GetName()}) - if err != nil { - f.logger.Error("pick table shard", zap.Error(err)) - return nil, errors.WithMessage(err, "pick table shard") - } - if len(shards) != 1 { - f.logger.Error("pick table shards length not equal 1", zap.Int("shards", len(shards))) - return nil, errors.WithMessagef(procedure.ErrPickShard, "pick table shard, shards length:%d", len(shards)) - } - targetShardID = shards[request.SourceReq.GetName()].ID - } - - return createtable.NewProcedure(createtable.ProcedureParams{ - Dispatch: f.dispatch, - ClusterMetadata: request.ClusterMetadata, - ClusterSnapshot: snapshot, - ID: id, - ShardID: targetShardID, - SourceReq: request.SourceReq, - OnSucceeded: request.OnSucceeded, - OnFailed: request.OnFailed, - }) -} - -func (f *Factory) makeCreatePartitionTableProcedure(ctx context.Context, request CreatePartitionTableRequest) (procedure.Procedure, error) { - id, err := f.allocProcedureID(ctx) - if err != nil { - return nil, err - } - - snapshot := request.ClusterMetadata.GetClusterSnapshot() - - nodeNames := make(map[string]int, len(snapshot.Topology.ClusterView.ShardNodes)) - for _, shardNode := range snapshot.Topology.ClusterView.ShardNodes { - nodeNames[shardNode.NodeName] = 1 - } - - subTableShards, err := f.shardPicker.PickShards(ctx, snapshot, request.SourceReq.GetSchemaName(), request.SourceReq.PartitionTableInfo.SubTableNames) - if err != nil { - return nil, errors.WithMessage(err, "pick sub table shards") - } - - shardNodesWithVersion := make([]metadata.ShardNodeWithVersion, 0, len(subTableShards)) - for _, subTableShard := range subTableShards { - shardView, exists := snapshot.Topology.ShardViewsMapping[subTableShard.ID] - if !exists { - return nil, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found, shardID:%d", subTableShard.ID) - } - shardNodesWithVersion = append(shardNodesWithVersion, metadata.ShardNodeWithVersion{ - ShardInfo: metadata.ShardInfo{ - ID: shardView.ShardID, - Role: subTableShard.ShardRole, - Version: shardView.Version, - Status: storage.ShardStatusUnknown, - }, - ShardNode: subTableShard, - }) - } - - return createpartitiontable.NewProcedure(createpartitiontable.ProcedureParams{ - ID: id, - ClusterMetadata: request.ClusterMetadata, - ClusterSnapshot: snapshot, - Dispatch: f.dispatch, - Storage: f.storage, - SourceReq: request.SourceReq, - SubTablesShards: shardNodesWithVersion, - OnSucceeded: request.OnSucceeded, - OnFailed: request.OnFailed, - }) -} - -// CreateDropTableProcedure creates a procedure to do drop table. -// -// And if no error is thrown, the returned boolean value is used to tell whether the procedure is created. -// In some cases, e.g. the table doesn't exist, it should not be an error and false will be returned. -func (f *Factory) CreateDropTableProcedure(ctx context.Context, request DropTableRequest) (procedure.Procedure, bool, error) { - id, err := f.allocProcedureID(ctx) - if err != nil { - return nil, false, err - } - - snapshot := request.ClusterMetadata.GetClusterSnapshot() - - if request.IsPartitionTable() { - return droppartitiontable.NewProcedure(droppartitiontable.ProcedureParams{ - ID: id, - ClusterMetadata: request.ClusterMetadata, - ClusterSnapshot: request.ClusterSnapshot, - Dispatch: f.dispatch, - Storage: f.storage, - SourceReq: request.SourceReq, - OnSucceeded: request.OnSucceeded, - OnFailed: request.OnFailed, - }) - } - - return droptable.NewDropTableProcedure(droptable.ProcedureParams{ - ID: id, - Dispatch: f.dispatch, - ClusterMetadata: request.ClusterMetadata, - ClusterSnapshot: snapshot, - SourceReq: request.SourceReq, - OnSucceeded: request.OnSucceeded, - OnFailed: request.OnFailed, - }) -} - -func (f *Factory) CreateTransferLeaderProcedure(ctx context.Context, request TransferLeaderRequest) (procedure.Procedure, error) { - id, err := f.allocProcedureID(ctx) - if err != nil { - return nil, err - } - - return transferleader.NewProcedure(transferleader.ProcedureParams{ - ID: id, - Dispatch: f.dispatch, - Storage: f.storage, - ClusterSnapshot: request.Snapshot, - ShardID: request.ShardID, - OldLeaderNodeName: request.OldLeaderNodeName, - NewLeaderNodeName: request.NewLeaderNodeName, - }) -} - -func (f *Factory) CreateSplitProcedure(ctx context.Context, request SplitRequest) (procedure.Procedure, error) { - id, err := f.allocProcedureID(ctx) - if err != nil { - return nil, err - } - - return split.NewProcedure( - split.ProcedureParams{ - ID: id, - Dispatch: f.dispatch, - Storage: f.storage, - ClusterMetadata: request.ClusterMetadata, - ClusterSnapshot: request.Snapshot, - ShardID: request.ShardID, - NewShardID: request.NewShardID, - SchemaName: request.SchemaName, - TableNames: request.TableNames, - TargetNodeName: request.TargetNodeName, - }, - ) -} - -func (f *Factory) CreateBatchTransferLeaderProcedure(ctx context.Context, request BatchRequest) (procedure.Procedure, error) { - id, err := f.allocProcedureID(ctx) - if err != nil { - return nil, err - } - - return transferleader.NewBatchTransferLeaderProcedure(id, request.Batch) -} - -func (f *Factory) allocProcedureID(ctx context.Context) (uint64, error) { - id, err := f.idAllocator.Alloc(ctx) - if err != nil { - return 0, errors.WithMessage(err, "alloc procedure id") - } - return id, nil -} diff --git a/horaemeta/server/coordinator/factory_test.go b/horaemeta/server/coordinator/factory_test.go deleted file mode 100644 index 04c83c67a5..0000000000 --- a/horaemeta/server/coordinator/factory_test.go +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -func setupFactory(t *testing.T) (*coordinator.Factory, *metadata.ClusterMetadata) { - ctx := context.Background() - c := test.InitStableCluster(ctx, t) - - dispatch := test.MockDispatch{} - allocator := test.MockIDAllocator{} - storage := test.NewTestStorage(t) - f := coordinator.NewFactory(zap.NewNop(), allocator, dispatch, storage, c.GetMetadata()) - - return f, c.GetMetadata() -} - -func TestCreateTable(t *testing.T) { - re := require.New(t) - ctx := context.Background() - f, m := setupFactory(t) - // Create normal table procedure. - p, err := f.MakeCreateTableProcedure(ctx, coordinator.CreateTableRequest{ - ClusterMetadata: m, - SourceReq: &metaservicepb.CreateTableRequest{ - Header: nil, - SchemaName: test.TestSchemaName, - Name: "test1", - EncodedSchema: nil, - Engine: "", - CreateIfNotExist: false, - Options: nil, - PartitionTableInfo: nil, - }, - OnSucceeded: nil, - OnFailed: nil, - }) - re.NoError(err) - re.Equal(procedure.CreateTable, p.Kind()) - re.Equal(procedure.StateInit, string(p.State())) - - // Create partition table procedure. - p, err = f.MakeCreateTableProcedure(ctx, coordinator.CreateTableRequest{ - ClusterMetadata: m, - SourceReq: &metaservicepb.CreateTableRequest{ - Header: nil, - SchemaName: test.TestSchemaName, - Name: "test2", - EncodedSchema: nil, - Engine: "", - CreateIfNotExist: false, - Options: nil, - PartitionTableInfo: &metaservicepb.PartitionTableInfo{ - PartitionInfo: nil, - SubTableNames: []string{"test2-0,test2-1"}, - }, - }, - OnSucceeded: nil, - OnFailed: nil, - }) - re.NoError(err) - re.Equal(procedure.CreatePartitionTable, p.Kind()) - re.Equal(procedure.StateInit, string(p.State())) -} - -func TestDropTable(t *testing.T) { - re := require.New(t) - ctx := context.Background() - f, m := setupFactory(t) - // Drop normal table procedure. - p, ok, err := f.CreateDropTableProcedure(ctx, coordinator.DropTableRequest{ - ClusterMetadata: m, - ClusterSnapshot: m.GetClusterSnapshot(), - SourceReq: &metaservicepb.DropTableRequest{ - Header: nil, - SchemaName: test.TestSchemaName, - Name: "test1", - PartitionTableInfo: nil, - }, - OnSucceeded: nil, - OnFailed: nil, - }) - re.NoError(err) - re.False(ok) - re.Nil(p) - - // Create partition table procedure. - p, ok, err = f.CreateDropTableProcedure(ctx, coordinator.DropTableRequest{ - ClusterMetadata: m, - ClusterSnapshot: m.GetClusterSnapshot(), - SourceReq: &metaservicepb.DropTableRequest{ - Header: nil, - SchemaName: test.TestSchemaName, - Name: "test2", - PartitionTableInfo: &metaservicepb.PartitionTableInfo{ - PartitionInfo: nil, - SubTableNames: []string{"test2-0,test2-1"}, - }, - }, - OnSucceeded: nil, - OnFailed: nil, - }) - // Drop non-existing partition table. - re.NoError(err) - re.True(ok) - re.NotNil(p) -} - -func TestTransferLeader(t *testing.T) { - re := require.New(t) - ctx := context.Background() - f, m := setupFactory(t) - snapshot := m.GetClusterSnapshot() - p, err := f.CreateTransferLeaderProcedure(ctx, coordinator.TransferLeaderRequest{ - Snapshot: snapshot, - ShardID: 0, - OldLeaderNodeName: "", - NewLeaderNodeName: snapshot.RegisteredNodes[0].Node.Name, - }) - re.NoError(err) - re.Equal(procedure.TransferLeader, p.Kind()) - re.Equal(procedure.StateInit, string(p.State())) -} - -func TestSplit(t *testing.T) { - re := require.New(t) - ctx := context.Background() - f, m := setupFactory(t) - snapshot := m.GetClusterSnapshot() - p, err := f.CreateSplitProcedure(ctx, coordinator.SplitRequest{ - ClusterMetadata: nil, - SchemaName: "", - TableNames: nil, - Snapshot: snapshot, - ShardID: snapshot.Topology.ClusterView.ShardNodes[0].ID, - NewShardID: 100, - TargetNodeName: snapshot.Topology.ClusterView.ShardNodes[0].NodeName, - }) - re.NoError(err) - re.Equal(procedure.Split, p.Kind()) - re.Equal(procedure.StateInit, string(p.State())) -} diff --git a/horaemeta/server/coordinator/inspector/node_inspector.go b/horaemeta/server/coordinator/inspector/node_inspector.go deleted file mode 100644 index e4a3671c8a..0000000000 --- a/horaemeta/server/coordinator/inspector/node_inspector.go +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package inspector - -import ( - "context" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/storage" - "go.uber.org/zap" -) - -var ErrStartAgain = coderr.NewCodeError(coderr.Internal, "try to start again") -var ErrStopNotStart = coderr.NewCodeError(coderr.Internal, "try to stop a not-started inspector") - -const defaultInspectInterval = time.Second * 5 - -// NodeInspector will inspect node status and remove expired data. -type NodeInspector struct { - logger *zap.Logger - clusterMetadata ClusterMetaDataManipulator - interval time.Duration - - starter sync.Once - // After `Start` is called, the following fields will be initialized - stopCtx context.Context - bgJobCancel context.CancelFunc -} - -// ClusterMetaDataManipulator provides the snapshot for NodeInspector to check and utilities of drop expired shard nodes. -type ClusterMetaDataManipulator interface { - GetClusterSnapshot() metadata.Snapshot - DropShardNodes(context.Context, []storage.ShardNode) error -} - -func NewNodeInspectorWithInterval(logger *zap.Logger, clusterMetadata ClusterMetaDataManipulator, inspectInterval time.Duration) *NodeInspector { - return &NodeInspector{ - logger: logger, - clusterMetadata: clusterMetadata, - interval: inspectInterval, - starter: sync.Once{}, - stopCtx: nil, - bgJobCancel: nil, - } -} - -func NewNodeInspector(logger *zap.Logger, clusterMetadata ClusterMetaDataManipulator) *NodeInspector { - return NewNodeInspectorWithInterval(logger, clusterMetadata, defaultInspectInterval) -} - -func (ni *NodeInspector) Start(ctx context.Context) error { - started := false - ni.starter.Do(func() { - log.Info("node inspector start") - started = true - ni.stopCtx, ni.bgJobCancel = context.WithCancel(ctx) - go func() { - for { - t := time.NewTimer(ni.interval) - select { - case <-ni.stopCtx.Done(): - ni.logger.Info("node inspector is stopped, cancel the bg inspecting") - if !t.Stop() { - <-t.C - } - return - case <-t.C: - } - - ni.inspect(ctx) - } - }() - }) - - if !started { - return ErrStartAgain - } - - return nil -} - -func (ni *NodeInspector) Stop(_ context.Context) error { - if ni.bgJobCancel != nil { - ni.bgJobCancel() - return nil - } - - return ErrStopNotStart -} - -func (ni *NodeInspector) inspect(ctx context.Context) { - // Get latest cluster snapshot. - snapshot := ni.clusterMetadata.GetClusterSnapshot() - expiredShardNodes := findExpiredShardNodes(snapshot) - if len(expiredShardNodes) == 0 { - return - } - - // Try to remove useless data if it exists. - if err := ni.clusterMetadata.DropShardNodes(ctx, expiredShardNodes); err != nil { - log.Error("drop shard node failed", zap.Error(err)) - } -} - -func findExpiredShardNodes(snapshot metadata.Snapshot) []storage.ShardNode { - // In most cases, there is no expired shard nodes so don't pre-allocate the memory here. - expiredNodes := make(map[string]struct{}, 0) - // Check node status. - now := time.Now() - for i := range snapshot.RegisteredNodes { - node := &snapshot.RegisteredNodes[i] - if node.IsExpired(now) { - expiredNodes[node.Node.Name] = struct{}{} - } - } - - expiredShardNodes := make([]storage.ShardNode, 0, len(expiredNodes)) - for _, shardNode := range snapshot.Topology.ClusterView.ShardNodes { - _, ok := expiredNodes[shardNode.NodeName] - if ok { - expiredShardNodes = append(expiredShardNodes, shardNode) - } - } - - return expiredShardNodes -} diff --git a/horaemeta/server/coordinator/inspector/node_inspector_test.go b/horaemeta/server/coordinator/inspector/node_inspector_test.go deleted file mode 100644 index f119299ed2..0000000000 --- a/horaemeta/server/coordinator/inspector/node_inspector_test.go +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package inspector - -import ( - "context" - "slices" - "sync" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/assert" - "go.uber.org/zap" -) - -type mockClusterMetaDataManipulator struct { - snapshot metadata.Snapshot - lock sync.Mutex - droppedShardNodes [][]storage.ShardNode -} - -func newMockClusterMetaDataManipulator(shardNodes []storage.ShardNode, registeredNodes []metadata.RegisteredNode) *mockClusterMetaDataManipulator { - var clusterView storage.ClusterView - clusterView.ShardNodes = shardNodes - topology := metadata.Topology{ - ShardViewsMapping: nil, - ClusterView: clusterView, - } - - snapshot := metadata.Snapshot{ - Topology: topology, - RegisteredNodes: registeredNodes, - } - return &mockClusterMetaDataManipulator{ - snapshot: snapshot, - lock: sync.Mutex{}, - droppedShardNodes: make([][]storage.ShardNode, 0), - } -} - -func (n *mockClusterMetaDataManipulator) GetClusterSnapshot() metadata.Snapshot { - return n.snapshot -} - -func (n *mockClusterMetaDataManipulator) DropShardNodes(_ context.Context, shardNodes []storage.ShardNode) error { - n.lock.Lock() - defer n.lock.Unlock() - - n.droppedShardNodes = append(n.droppedShardNodes, shardNodes) - newShardNodes := make([]storage.ShardNode, 0, 2) - for _, node := range n.snapshot.Topology.ClusterView.ShardNodes { - dropped := slices.ContainsFunc(shardNodes, func(droppedNode storage.ShardNode) bool { - return node.NodeName == droppedNode.NodeName - }) - if !dropped { - newShardNodes = append(newShardNodes, node) - } - } - n.snapshot.Topology.ClusterView.ShardNodes = newShardNodes - return nil -} - -func (n *mockClusterMetaDataManipulator) CheckDroppedShardNodes(check func(droppedShardNodes [][]storage.ShardNode)) { - n.lock.Lock() - defer n.lock.Unlock() - - check(n.droppedShardNodes) -} - -func TestStartStopInspector(t *testing.T) { - inspector := NewNodeInspector(zap.NewNop(), newMockClusterMetaDataManipulator(nil, nil)) - - ctx := context.Background() - assert.NoError(t, inspector.Start(ctx)) - assert.Error(t, inspector.Start(ctx)) - - assert.NoError(t, inspector.Stop(ctx)) -} - -func TestInspect(t *testing.T) { - shardNodes := []storage.ShardNode{ - {ID: storage.ShardID(0), ShardRole: storage.ShardRoleLeader, NodeName: "192.168.1.102"}, - {ID: storage.ShardID(1), ShardRole: storage.ShardRoleLeader, NodeName: "192.168.1.102"}, - {ID: storage.ShardID(2), ShardRole: storage.ShardRoleLeader, NodeName: "192.168.1.103"}, - {ID: storage.ShardID(3), ShardRole: storage.ShardRoleLeader, NodeName: "192.168.1.103"}, - } - registeredNodes := []metadata.RegisteredNode{ - { - Node: storage.Node{ - Name: "192.168.1.102", - NodeStats: storage.NodeStats{Lease: 0, Zone: "", NodeVersion: ""}, - LastTouchTime: uint64(time.Now().UnixMilli()), - State: storage.NodeStateOnline, - }, - ShardInfos: nil, - }, - { - // This node should be outdated. - Node: storage.Node{ - Name: "192.168.1.103", - NodeStats: storage.NodeStats{Lease: 0, Zone: "", NodeVersion: ""}, - LastTouchTime: uint64(time.Now().UnixMilli()) - uint64((time.Second * 20)), - State: storage.NodeStateOnline, - }, - ShardInfos: nil, - }, - } - - metadata := newMockClusterMetaDataManipulator(shardNodes, registeredNodes) - inspector := NewNodeInspectorWithInterval(zap.NewNop(), metadata, time.Millisecond*100) - ctx := context.Background() - assert.NoError(t, inspector.Start(ctx)) - - // The inspect should be triggered after 200ms. - time.Sleep(time.Millisecond * 200) - - // The outdated node should be removed by triggered. - metadata.CheckDroppedShardNodes(func(droppedShardNodes [][]storage.ShardNode) { - assert.True(t, len(droppedShardNodes) == 1) - assert.True(t, len(droppedShardNodes[0]) == 2) - assert.Equal(t, droppedShardNodes[0][0], shardNodes[2]) - assert.Equal(t, droppedShardNodes[0][1], shardNodes[3]) - }) - - assert.NoError(t, inspector.Stop(ctx)) -} diff --git a/horaemeta/server/coordinator/lock/entry_lock.go b/horaemeta/server/coordinator/lock/entry_lock.go deleted file mode 100644 index e3851368ef..0000000000 --- a/horaemeta/server/coordinator/lock/entry_lock.go +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package lock - -import ( - "fmt" - "sync" -) - -type EntryLock struct { - lock sync.Mutex - entryLocks map[uint64]struct{} -} - -func NewEntryLock(initCapacity int) EntryLock { - return EntryLock{ - lock: sync.Mutex{}, - entryLocks: make(map[uint64]struct{}, initCapacity), - } -} - -func (l *EntryLock) TryLock(locks []uint64) bool { - l.lock.Lock() - defer l.lock.Unlock() - - for _, lock := range locks { - _, exists := l.entryLocks[lock] - if exists { - return false - } - } - - for _, lock := range locks { - l.entryLocks[lock] = struct{}{} - } - - return true -} - -func (l *EntryLock) UnLock(locks []uint64) { - l.lock.Lock() - defer l.lock.Unlock() - - for _, lock := range locks { - _, exists := l.entryLocks[lock] - if !exists { - panic(fmt.Sprintf("try to unlock nonexistent lock, exists locks:%v, unlock locks:%v", l.entryLocks, locks)) - } - } - - for _, lock := range locks { - delete(l.entryLocks, lock) - } -} diff --git a/horaemeta/server/coordinator/lock/entry_lock_test.go b/horaemeta/server/coordinator/lock/entry_lock_test.go deleted file mode 100644 index 7dd3a37f00..0000000000 --- a/horaemeta/server/coordinator/lock/entry_lock_test.go +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package lock - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestEntryLock(t *testing.T) { - re := require.New(t) - - lock := NewEntryLock(3) - - lock1 := []uint64{1} - result := lock.TryLock(lock1) - re.Equal(true, result) - result = lock.TryLock(lock1) - re.Equal(false, result) - lock.UnLock(lock1) - result = lock.TryLock(lock1) - re.Equal(true, result) - lock.UnLock(lock1) - - lock2 := []uint64{2, 3, 4} - lock3 := []uint64{3, 4, 5} - result = lock.TryLock(lock2) - re.Equal(true, result) - result = lock.TryLock(lock2) - re.Equal(false, result) - result = lock.TryLock(lock3) - re.Equal(false, result) - lock.UnLock(lock2) - result = lock.TryLock(lock2) - re.Equal(true, result) - lock.UnLock(lock2) - - re.Panics(func() { - lock.UnLock(lock2) - }, "this function did not panic") -} diff --git a/horaemeta/server/coordinator/persist_shard_picker.go b/horaemeta/server/coordinator/persist_shard_picker.go deleted file mode 100644 index b368422177..0000000000 --- a/horaemeta/server/coordinator/persist_shard_picker.go +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/storage" -) - -type PersistShardPicker struct { - cluster *metadata.ClusterMetadata - internal ShardPicker -} - -func NewPersistShardPicker(cluster *metadata.ClusterMetadata, internal ShardPicker) *PersistShardPicker { - return &PersistShardPicker{cluster: cluster, internal: internal} -} - -func (p *PersistShardPicker) PickShards(ctx context.Context, snapshot metadata.Snapshot, schemaName string, tableNames []string) (map[string]storage.ShardNode, error) { - result := map[string]storage.ShardNode{} - - shardNodeMap := make(map[storage.ShardID]storage.ShardNode, len(tableNames)) - for _, shardNode := range snapshot.Topology.ClusterView.ShardNodes { - shardNodeMap[shardNode.ID] = shardNode - } - - var missingTables []string - // If table assign has been created, just reuse it. - for i := 0; i < len(tableNames); i++ { - shardID, exists, err := p.cluster.GetTableAssignedShard(ctx, schemaName, tableNames[i]) - if err != nil { - return map[string]storage.ShardNode{}, err - } - if exists { - result[tableNames[i]] = shardNodeMap[shardID] - } else { - missingTables = append(missingTables, tableNames[i]) - } - } - - // All table has been assigned to shard. - if len(missingTables) == 0 { - return result, nil - } - - // No table assign has been created, try to pick shard and save table assigns. - shardNodes, err := p.internal.PickShards(ctx, snapshot, len(missingTables)) - if err != nil { - return map[string]storage.ShardNode{}, err - } - - for i, shardNode := range shardNodes { - result[missingTables[i]] = shardNode - err = p.cluster.AssignTableToShard(ctx, schemaName, missingTables[i], shardNode.ID) - if err != nil { - return map[string]storage.ShardNode{}, err - } - } - - return result, nil -} diff --git a/horaemeta/server/coordinator/persist_shard_picker_test.go b/horaemeta/server/coordinator/persist_shard_picker_test.go deleted file mode 100644 index 67e3aa7ef1..0000000000 --- a/horaemeta/server/coordinator/persist_shard_picker_test.go +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -func TestPersistShardPicker(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - c := test.InitStableCluster(ctx, t) - - persistShardPicker := coordinator.NewPersistShardPicker(c.GetMetadata(), coordinator.NewLeastTableShardPicker()) - pickResult, err := persistShardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), test.TestSchemaName, []string{test.TestTableName0}) - re.NoError(err) - re.Equal(len(pickResult), 1) - - createResult, err := c.GetMetadata().CreateTable(ctx, metadata.CreateTableRequest{ - ShardID: pickResult[test.TestTableName0].ID, - LatestVersion: 0, - SchemaName: test.TestSchemaName, - TableName: test.TestTableName0, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }) - re.NoError(err) - re.Equal(test.TestTableName0, createResult.Table.Name) - - // Try to pick shard for same table after the table is created. - newPickResult, err := persistShardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), test.TestSchemaName, []string{test.TestTableName0}) - re.NoError(err) - re.Equal(len(newPickResult), 1) - re.Equal(newPickResult[test.TestTableName0], pickResult[test.TestTableName0]) - - // Try to pick shard for another table. - pickResult, err = persistShardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), test.TestSchemaName, []string{test.TestTableName1}) - re.NoError(err) - re.Equal(len(pickResult), 1) - - err = c.GetMetadata().DropTable(ctx, metadata.DropTableRequest{ - SchemaName: test.TestSchemaName, - TableName: test.TestTableName0, - ShardID: pickResult[test.TestTableName0].ID, - LatestVersion: 0, - }) - re.NoError(err) - - // Try to pick shard for table1 after drop table0. - newPickResult, err = persistShardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), test.TestSchemaName, []string{test.TestTableName1}) - re.NoError(err) - re.Equal(len(pickResult), 1) - re.Equal(newPickResult[test.TestTableName1], pickResult[test.TestTableName1]) - - err = c.GetMetadata().DeleteTableAssignedShard(ctx, test.TestSchemaName, test.TestTableName1) - re.NoError(err) - - // Try to pick another for table1 after drop table1 assign result. - newPickResult, err = persistShardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), test.TestSchemaName, []string{test.TestTableName1}) - re.NoError(err) - re.Equal(len(pickResult), 1) - re.NotEqual(newPickResult[test.TestTableName1], pickResult[test.TestTableName1]) -} diff --git a/horaemeta/server/coordinator/procedure/ddl/common_util.go b/horaemeta/server/coordinator/procedure/ddl/common_util.go deleted file mode 100644 index aa55e7570a..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/common_util.go +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package ddl - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -func CreateTableOnShard(ctx context.Context, c *metadata.ClusterMetadata, dispatch eventdispatch.Dispatch, shardID storage.ShardID, request eventdispatch.CreateTableOnShardRequest) (uint64, error) { - log.Debug("CreateTableOnShard", zap.Uint64("version", request.UpdateShardInfo.CurrShardInfo.Version)) - shardNodes, err := c.GetShardNodesByShardID(shardID) - if err != nil { - return 0, errors.WithMessage(err, "cluster get shardNode by id") - } - // TODO: consider followers - var leader storage.ShardNode - found := false - for _, shardNode := range shardNodes { - if shardNode.ShardRole == storage.ShardRoleLeader { - found = true - leader = shardNode - break - } - } - if !found { - return 0, errors.WithMessagef(procedure.ErrShardLeaderNotFound, "shard node can't find leader, shardID:%d", shardID) - } - - latestVersion, err := dispatch.CreateTableOnShard(ctx, leader.NodeName, request) - if err != nil { - return 0, errors.WithMessage(err, "create table on shard") - } - return latestVersion, nil -} - -func BuildCreateTableRequest(table storage.Table, shardVersionUpdate metadata.ShardVersionUpdate, req *metaservicepb.CreateTableRequest) eventdispatch.CreateTableOnShardRequest { - return eventdispatch.CreateTableOnShardRequest{ - UpdateShardInfo: eventdispatch.UpdateShardInfo{ - CurrShardInfo: metadata.ShardInfo{ - ID: shardVersionUpdate.ShardID, - // TODO: dispatch CreateTableOnShard to followers? - Role: storage.ShardRoleLeader, - Version: shardVersionUpdate.LatestVersion, - // FIXME: There is no need to update status here, but it must be set. Shall we provide another struct without status field? - Status: storage.ShardStatusUnknown, - }, - }, - TableInfo: metadata.TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: req.GetSchemaName(), - PartitionInfo: table.PartitionInfo, - CreatedAt: table.CreatedAt, - }, - EncodedSchema: req.EncodedSchema, - Engine: req.Engine, - CreateIfNotExist: req.CreateIfNotExist, - Options: req.Options, - } -} - -func GetTableMetadata(clusterMetadata *metadata.ClusterMetadata, schemaName, tableName string) (storage.Table, error) { - table, exists, err := clusterMetadata.GetTable(schemaName, tableName) - if err != nil { - return storage.Table{}, err - } - if !exists { - return storage.Table{}, errors.WithMessagef(procedure.ErrTableNotExists, "table not exists, tableName:%s", tableName) - } - return table, nil -} - -// BuildShardVersionUpdate builds metadata.ShardVersionUpdate to assist DDL on the shard. -// -// And if no error is thrown, the returned boolean value is used to tell whether this table is allocated to shard. -// In some cases, we need to use this value to determine whether DDL can be executed normally。 -func BuildShardVersionUpdate(table storage.Table, clusterMetadata *metadata.ClusterMetadata, shardVersions map[storage.ShardID]uint64) (metadata.ShardVersionUpdate, bool, error) { - var versionUpdate metadata.ShardVersionUpdate - shardNodesResult, err := clusterMetadata.GetShardNodeByTableIDs([]storage.TableID{table.ID}) - if err != nil { - return versionUpdate, false, err - } - - var leader storage.ShardNode - found := false - for _, shardNode := range shardNodesResult.ShardNodes[table.ID] { - if shardNode.ShardRole == storage.ShardRoleLeader { - found = true - leader = shardNode - break - } - } - - if !found { - log.Warn("table can't find leader shard", zap.String("tableName", table.Name)) - return versionUpdate, false, nil - } - - latestVersion, exists := shardVersions[leader.ID] - if !exists { - return versionUpdate, false, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in shardVersions, shardID:%d", leader.ID) - } - - versionUpdate = metadata.ShardVersionUpdate{ - ShardID: leader.ID, - LatestVersion: latestVersion, - } - return versionUpdate, true, nil -} - -func DropTableOnShard(ctx context.Context, clusterMetadata *metadata.ClusterMetadata, dispatch eventdispatch.Dispatch, schemaName string, table storage.Table, version metadata.ShardVersionUpdate) (uint64, error) { - shardNodes, err := clusterMetadata.GetShardNodesByShardID(version.ShardID) - if err != nil { - return 0, errors.WithMessage(err, "cluster get shard by shard id") - } - - tableInfo := metadata.TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: schemaName, - PartitionInfo: storage.PartitionInfo{Info: nil}, - CreatedAt: 0, - } - - var latestVersion uint64 - for _, shardNode := range shardNodes { - latestVersion, err = dispatch.DropTableOnShard(ctx, shardNode.NodeName, eventdispatch.DropTableOnShardRequest{ - UpdateShardInfo: eventdispatch.UpdateShardInfo{ - CurrShardInfo: metadata.ShardInfo{ - ID: version.ShardID, - Role: storage.ShardRoleLeader, - Version: version.LatestVersion, - // FIXME: We have no need to update the status, but it must be set. Maybe we should provide another struct without status field. - Status: storage.ShardStatusUnknown, - }, - }, - TableInfo: tableInfo, - }) - if err != nil { - return 0, errors.WithMessage(err, "dispatch drop table on shard") - } - } - - return latestVersion, nil -} diff --git a/horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table.go b/horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table.go deleted file mode 100644 index cb7ac55fce..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table.go +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package createpartitiontable - -import ( - "context" - "encoding/json" - "fmt" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -// fsm state change: -// ┌────────┐ ┌──────────────────────┐ ┌────────────────────┐ ┌───────────┐ -// │ Begin ├─────▶ CreatePartitionTable ├─────▶ CreateDataTables ├──────▶ Finish │ -// └────────┘ └──────────────────────┘ └────────────────────┘ └───────────┘ -const ( - eventCreatePartitionTable = "EventCreatePartitionTable" - eventCreateSubTables = "EventCreateSubTables" - eventFinish = "EventFinish" - - stateBegin = "StateBegin" - stateCreatePartitionTable = "StateCreatePartitionTable" - stateCreateSubTables = "StateCreateSubTables" - stateFinish = "StateFinish" -) - -var ( - createPartitionTableEvents = fsm.Events{ - {Name: eventCreatePartitionTable, Src: []string{stateBegin}, Dst: stateCreatePartitionTable}, - {Name: eventCreateSubTables, Src: []string{stateCreatePartitionTable}, Dst: stateCreateSubTables}, - {Name: eventFinish, Src: []string{stateCreateSubTables}, Dst: stateFinish}, - } - createPartitionTableCallbacks = fsm.Callbacks{ - eventCreatePartitionTable: createPartitionTableCallback, - eventCreateSubTables: createDataTablesCallback, - eventFinish: finishCallback, - } -) - -type Procedure struct { - fsm *fsm.FSM - params ProcedureParams - relatedVersionInfo procedure.RelatedVersionInfo - createPartitionTableResult *metadata.CreateTableMetadataResult - - lock sync.RWMutex - state procedure.State -} - -type ProcedureParams struct { - ID uint64 - ClusterMetadata *metadata.ClusterMetadata - ClusterSnapshot metadata.Snapshot - Dispatch eventdispatch.Dispatch - Storage procedure.Storage - SourceReq *metaservicepb.CreateTableRequest - SubTablesShards []metadata.ShardNodeWithVersion - OnSucceeded func(metadata.CreateTableResult) error - OnFailed func(error) error -} - -func NewProcedure(params ProcedureParams) (procedure.Procedure, error) { - relatedVersionInfo, err := buildRelatedVersionInfo(params) - if err != nil { - return nil, err - } - - fsm := fsm.NewFSM( - stateBegin, - createPartitionTableEvents, - createPartitionTableCallbacks, - ) - - return &Procedure{ - fsm: fsm, - params: params, - relatedVersionInfo: relatedVersionInfo, - createPartitionTableResult: nil, - lock: sync.RWMutex{}, - state: procedure.StateInit, - }, nil -} - -func buildRelatedVersionInfo(params ProcedureParams) (procedure.RelatedVersionInfo, error) { - shardWithVersion := make(map[storage.ShardID]uint64, len(params.SubTablesShards)) - for _, subTableShard := range params.SubTablesShards { - shardView, exists := params.ClusterSnapshot.Topology.ShardViewsMapping[subTableShard.ShardInfo.ID] - if !exists { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in topology, shardID:%d", subTableShard.ShardInfo.ID) - } - shardWithVersion[shardView.ShardID] = shardView.Version - } - - return procedure.RelatedVersionInfo{ - ClusterID: params.ClusterSnapshot.Topology.ClusterView.ClusterID, - ShardWithVersion: shardWithVersion, - ClusterVersion: params.ClusterSnapshot.Topology.ClusterView.Version, - }, nil -} - -func (p *Procedure) ID() uint64 { - return p.params.ID -} - -func (p *Procedure) Kind() procedure.Kind { - return procedure.CreatePartitionTable -} - -func (p *Procedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func (p *Procedure) Priority() procedure.Priority { - return procedure.PriorityLow -} - -func (p *Procedure) Start(ctx context.Context) error { - p.updateStateWithLock(procedure.StateRunning) - - createPartitionTableRequest := &callbackRequest{ - ctx: ctx, - p: p, - } - - for { - switch p.fsm.Current() { - case stateBegin: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "persist create partition table procedure") - } - if err := p.fsm.Event(eventCreatePartitionTable, createPartitionTableRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "create partition table") - } - case stateCreatePartitionTable: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "persist create partition table procedure") - } - if err := p.fsm.Event(eventCreateSubTables, createPartitionTableRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "create data tables") - } - case stateCreateSubTables: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "persist create partition table procedure") - } - if err := p.fsm.Event(eventFinish, createPartitionTableRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "update table shard metadata") - } - case stateFinish: - // TODO: The state update sequence here is inconsistent with the previous one. Consider reconstructing the state update logic of the state machine. - p.updateStateWithLock(procedure.StateFinished) - if err := p.persist(ctx); err != nil { - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "create partition table procedure persist") - } - return nil - } - } -} - -func (p *Procedure) Cancel(_ context.Context) error { - p.updateStateWithLock(procedure.StateCancelled) - return nil -} - -func (p *Procedure) State() procedure.State { - p.lock.RLock() - defer p.lock.RUnlock() - - return p.state -} - -type callbackRequest struct { - ctx context.Context - p *Procedure -} - -// 1. Create partition table in target node. -func createPartitionTableCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - createTableMetadataResult, err := params.ClusterMetadata.CreateTableMetadata(req.ctx, metadata.CreateTableMetadataRequest{ - SchemaName: params.SourceReq.GetSchemaName(), - TableName: params.SourceReq.GetName(), - PartitionInfo: storage.PartitionInfo{Info: params.SourceReq.PartitionTableInfo.GetPartitionInfo()}, - }) - if err != nil { - procedure.CancelEventWithLog(event, err, "create table metadata") - return - } - req.p.createPartitionTableResult = &createTableMetadataResult -} - -// 2. Create data tables in target nodes. -func createDataTablesCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - if len(params.SubTablesShards) != len(params.SourceReq.GetPartitionTableInfo().SubTableNames) { - panic(fmt.Sprintf("shards number must be equal to sub tables number, shardNumber:%d, subTableNumber:%d", len(params.SubTablesShards), len(params.SourceReq.GetPartitionTableInfo().SubTableNames))) - } - - shardVersions := req.p.relatedVersionInfo.ShardWithVersion - shardTableMetaDatas := make(map[storage.ShardID][]metadata.CreateTableMetadataRequest, 0) - for i, subTableShard := range params.SubTablesShards { - tableMetaData := metadata.CreateTableMetadataRequest{ - SchemaName: params.SourceReq.GetSchemaName(), - TableName: params.SourceReq.GetPartitionTableInfo().SubTableNames[i], - PartitionInfo: storage.PartitionInfo{Info: nil}, - } - shardTableMetaDatas[subTableShard.ShardInfo.ID] = append(shardTableMetaDatas[subTableShard.ShardInfo.ID], tableMetaData) - } - succeedCh := make(chan bool) - errCh := make(chan error) - for shardID, tableMetaDatas := range shardTableMetaDatas { - shardVersion := shardVersions[shardID] - go createDataTables(req, shardID, tableMetaDatas, shardVersion, succeedCh, errCh) - } - - goRoutineNumber := len(shardTableMetaDatas) - for { - select { - case err := <-errCh: - procedure.CancelEventWithLog(event, err, "create data tables") - return - case <-succeedCh: - goRoutineNumber-- - if goRoutineNumber == 0 { - return - } - } - } -} - -func createDataTables(req *callbackRequest, shardID storage.ShardID, tableMetaDatas []metadata.CreateTableMetadataRequest, shardVersion uint64, succeedCh chan bool, errCh chan error) { - params := req.p.params - - for _, tableMetaData := range tableMetaDatas { - result, err := params.ClusterMetadata.CreateTableMetadata(req.ctx, tableMetaData) - if err != nil { - errCh <- errors.WithMessage(err, "create table metadata") - return - } - - shardVersionUpdate := metadata.ShardVersionUpdate{ - ShardID: shardID, - LatestVersion: shardVersion, - } - - latestShardVersion, err := ddl.CreateTableOnShard(req.ctx, params.ClusterMetadata, params.Dispatch, shardID, ddl.BuildCreateTableRequest(result.Table, shardVersionUpdate, params.SourceReq)) - if err != nil { - errCh <- errors.WithMessage(err, "dispatch create table on shard") - return - } - - err = params.ClusterMetadata.AddTableTopology(req.ctx, metadata.ShardVersionUpdate{ - ShardID: shardID, - LatestVersion: latestShardVersion, - }, result.Table) - if err != nil { - errCh <- errors.WithMessage(err, "create table metadata") - return - } - shardVersion++ - } - succeedCh <- true -} - -func finishCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - log.Info("create partition table finish", zap.String("tableName", req.p.params.SourceReq.GetName())) - - assert.Assert(req.p.createPartitionTableResult != nil) - var versionUpdate metadata.ShardVersionUpdate - if err := req.p.params.OnSucceeded(metadata.CreateTableResult{ - Table: req.p.createPartitionTableResult.Table, - ShardVersionUpdate: versionUpdate, - }); err != nil { - procedure.CancelEventWithLog(event, err, "create partition table on succeeded") - return - } -} - -func (p *Procedure) updateStateWithLock(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} - -func (p *Procedure) persist(ctx context.Context) error { - meta, err := p.convertToMeta() - if err != nil { - return errors.WithMessage(err, "convert to meta") - } - err = p.params.Storage.CreateOrUpdate(ctx, meta) - if err != nil { - return errors.WithMessage(err, "createOrUpdate procedure storage") - } - return nil -} - -// TODO: Replace rawData with structure defined by proto. -type rawData struct { - ID uint64 - FsmState string - State procedure.State - - CreateTableResult *metadata.CreateTableResult - PartitionTableShards []metadata.ShardNodeWithVersion - SubTablesShards []metadata.ShardNodeWithVersion -} - -func (p *Procedure) convertToMeta() (procedure.Meta, error) { - p.lock.RLock() - defer p.lock.RUnlock() - - rawData := rawData{ - ID: p.params.ID, - FsmState: p.fsm.Current(), - State: p.state, - CreateTableResult: nil, - PartitionTableShards: []metadata.ShardNodeWithVersion{}, - SubTablesShards: p.params.SubTablesShards, - } - rawDataBytes, err := json.Marshal(rawData) - if err != nil { - var emptyMeta procedure.Meta - return emptyMeta, procedure.ErrEncodeRawData.WithCausef("marshal raw data, procedureID:%v, err:%v", p.params.ID, err) - } - - meta := procedure.Meta{ - ID: p.params.ID, - Kind: procedure.CreatePartitionTable, - State: p.state, - - RawData: rawDataBytes, - } - - return meta, nil -} diff --git a/horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table_test.go b/horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table_test.go deleted file mode 100644 index 2c9f7922bb..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/createpartitiontable/create_partition_table_test.go +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package createpartitiontable_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/createpartitiontable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/stretchr/testify/require" -) - -func TestCreatePartitionTable(t *testing.T) { - re := require.New(t) - ctx := context.Background() - dispatch := test.MockDispatch{} - s := test.NewTestStorage(t) - c := test.InitStableCluster(ctx, t) - - shardNode := c.GetMetadata().GetClusterSnapshot().Topology.ClusterView.ShardNodes[0] - - request := &metaservicepb.CreateTableRequest{ - Header: &metaservicepb.RequestHeader{ - Node: shardNode.NodeName, - ClusterName: test.ClusterName, - }, - PartitionTableInfo: &metaservicepb.PartitionTableInfo{ - SubTableNames: []string{"p1", "p2"}, - }, - SchemaName: test.TestSchemaName, - Name: test.TestTableName0, - } - - shardPicker := coordinator.NewLeastTableShardPicker() - subTableShards, err := shardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), len(request.GetPartitionTableInfo().SubTableNames)) - - shardNodesWithVersion := make([]metadata.ShardNodeWithVersion, 0, len(subTableShards)) - for _, subTableShard := range subTableShards { - shardView, exists := c.GetMetadata().GetClusterSnapshot().Topology.ShardViewsMapping[subTableShard.ID] - re.True(exists) - shardNodesWithVersion = append(shardNodesWithVersion, metadata.ShardNodeWithVersion{ - ShardInfo: metadata.ShardInfo{ - ID: shardView.ShardID, - Role: subTableShard.ShardRole, - Version: shardView.Version, - Status: storage.ShardStatusUnknown, - }, - ShardNode: subTableShard, - }) - } - - re.NoError(err) - procedure, err := createpartitiontable.NewProcedure(createpartitiontable.ProcedureParams{ - ID: 0, - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: c.GetMetadata().GetClusterSnapshot(), - Dispatch: dispatch, - Storage: s, - SourceReq: request, - SubTablesShards: shardNodesWithVersion, - OnSucceeded: func(result metadata.CreateTableResult) error { - return nil - }, - OnFailed: func(err error) error { - return nil - }, - }) - re.NoError(err) - - err = procedure.Start(ctx) - re.NoError(err) -} diff --git a/horaemeta/server/coordinator/procedure/ddl/createtable/create_table.go b/horaemeta/server/coordinator/procedure/ddl/createtable/create_table.go deleted file mode 100644 index 292f0900ae..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/createtable/create_table.go +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package createtable - -import ( - "context" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -const ( - eventCheckTableExists = "EventCheckTableExists" - eventCreateMetadata = "EventCreateMetadata" - eventCreateOnShard = "EventCreateOnShard" - eventFinish = "EventFinish" - - stateBegin = "StateBegin" - stateCheckTableExists = "StateCheckTableExists" - stateCreateMetadata = "StateCreateMetadata" - stateCreateOnShard = "StateCreateOnShard" - stateFinish = "StateFinish" -) - -var ( - createTableEvents = fsm.Events{ - {Name: eventCheckTableExists, Src: []string{stateBegin}, Dst: stateCheckTableExists}, - {Name: eventCreateMetadata, Src: []string{stateCheckTableExists}, Dst: stateCreateMetadata}, - {Name: eventCreateOnShard, Src: []string{stateCreateMetadata}, Dst: stateCreateOnShard}, - {Name: eventFinish, Src: []string{stateCreateOnShard}, Dst: stateFinish}, - } - createTableCallbacks = fsm.Callbacks{ - eventCheckTableExists: checkTableExists, - eventCreateMetadata: createMetadata, - eventCreateOnShard: createOnShard, - eventFinish: createFinish, - } -) - -func checkTableExists(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - // Check whether the table metadata already exists. - table, exists, err := params.ClusterMetadata.GetTable(params.SourceReq.GetSchemaName(), params.SourceReq.GetName()) - if err != nil { - procedure.CancelEventWithLog(event, err, "get table metadata") - return - } - if !exists { - return - } - - // Check whether the table shard mapping already exists. - _, exists = params.ClusterMetadata.GetTableShard(req.ctx, table) - if exists { - procedure.CancelEventWithLog(event, metadata.ErrTableAlreadyExists, "table shard already exists") - return - } -} - -func createMetadata(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - _, exists, err := params.ClusterMetadata.GetTable(params.SourceReq.GetSchemaName(), params.SourceReq.GetName()) - if err != nil { - procedure.CancelEventWithLog(event, err, "get table metadata") - return - } - if exists { - log.Info("table metadata already exists", zap.String("schemaName", params.SourceReq.GetSchemaName()), zap.String("tableName", params.SourceReq.GetName())) - return - } - - createTableMetadataRequest := metadata.CreateTableMetadataRequest{ - SchemaName: params.SourceReq.GetSchemaName(), - TableName: params.SourceReq.GetName(), - PartitionInfo: storage.PartitionInfo{Info: params.SourceReq.PartitionTableInfo.GetPartitionInfo()}, - } - _, err = params.ClusterMetadata.CreateTableMetadata(req.ctx, createTableMetadataRequest) - if err != nil { - procedure.CancelEventWithLog(event, err, "create table metadata") - return - } - - log.Debug("create table metadata finish", zap.String("tableName", createTableMetadataRequest.TableName)) -} - -func createOnShard(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - table, ok, err := params.ClusterMetadata.GetTable(params.SourceReq.GetSchemaName(), params.SourceReq.GetName()) - if err != nil { - procedure.CancelEventWithLog(event, err, "get table metadata failed", zap.String("schemaName", params.SourceReq.GetSchemaName()), zap.String("tableName", params.SourceReq.GetName())) - return - } - if !ok { - procedure.CancelEventWithLog(event, err, "table metadata not found", zap.String("schemaName", params.SourceReq.GetSchemaName()), zap.String("tableName", params.SourceReq.GetName())) - return - } - - shardVersionUpdate := metadata.ShardVersionUpdate{ - ShardID: params.ShardID, - LatestVersion: req.p.relatedVersionInfo.ShardWithVersion[params.ShardID], - } - - createTableRequest := ddl.BuildCreateTableRequest(table, shardVersionUpdate, params.SourceReq) - latestShardVersion, err := ddl.CreateTableOnShard(req.ctx, params.ClusterMetadata, params.Dispatch, params.ShardID, createTableRequest) - if err != nil { - procedure.CancelEventWithLog(event, err, "dispatch create table on shard") - return - } - - log.Debug("dispatch createTableOnShard finish", zap.String("tableName", table.Name)) - - shardVersionUpdate = metadata.ShardVersionUpdate{ - ShardID: params.ShardID, - LatestVersion: latestShardVersion, - } - - err = params.ClusterMetadata.AddTableTopology(req.ctx, shardVersionUpdate, table) - if err != nil { - procedure.CancelEventWithLog(event, err, "add table topology") - return - } - - req.createTableResult = &metadata.CreateTableResult{ - Table: table, - ShardVersionUpdate: shardVersionUpdate, - } - - log.Debug("add table topology finish", zap.String("tableName", table.Name)) -} - -func createFinish(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - if err := req.p.params.ClusterMetadata.DeleteTableAssignedShard(req.ctx, params.SourceReq.GetSchemaName(), params.SourceReq.GetName()); err != nil { - log.Warn("delete assign table failed", zap.String("schemaName", params.SourceReq.GetSchemaName()), zap.String("tableName", params.SourceReq.GetName())) - } - - assert.Assert(req.createTableResult != nil) - if err := req.p.params.OnSucceeded(*req.createTableResult); err != nil { - log.Error("exec success callback failed") - } -} - -// callbackRequest is fsm callbacks param. -type callbackRequest struct { - ctx context.Context - p *Procedure - - createTableResult *metadata.CreateTableResult -} - -type ProcedureParams struct { - Dispatch eventdispatch.Dispatch - ClusterMetadata *metadata.ClusterMetadata - ClusterSnapshot metadata.Snapshot - ID uint64 - ShardID storage.ShardID - SourceReq *metaservicepb.CreateTableRequest - OnSucceeded func(metadata.CreateTableResult) error - OnFailed func(error) error -} - -func NewProcedure(params ProcedureParams) (procedure.Procedure, error) { - fsm := fsm.NewFSM( - stateBegin, - createTableEvents, - createTableCallbacks, - ) - - relatedVersionInfo, err := buildRelatedVersionInfo(params) - if err != nil { - return nil, err - } - - return &Procedure{ - fsm: fsm, - params: params, - relatedVersionInfo: relatedVersionInfo, - state: procedure.StateInit, - lock: sync.RWMutex{}, - }, nil -} - -type Procedure struct { - fsm *fsm.FSM - params ProcedureParams - relatedVersionInfo procedure.RelatedVersionInfo - - // Protect the state. - lock sync.RWMutex - state procedure.State -} - -func (p *Procedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func buildRelatedVersionInfo(params ProcedureParams) (procedure.RelatedVersionInfo, error) { - shardWithVersion := make(map[storage.ShardID]uint64, 1) - shardView, exists := params.ClusterSnapshot.Topology.ShardViewsMapping[params.ShardID] - if !exists { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in topology, shardID:%d", params.ShardID) - } - shardWithVersion[params.ShardID] = shardView.Version - return procedure.RelatedVersionInfo{ - ClusterID: params.ClusterSnapshot.Topology.ClusterView.ClusterID, - ShardWithVersion: shardWithVersion, - ClusterVersion: params.ClusterSnapshot.Topology.ClusterView.Version, - }, nil -} - -func (p *Procedure) Priority() procedure.Priority { - return procedure.PriorityLow -} - -func (p *Procedure) ID() uint64 { - return p.params.ID -} - -func (p *Procedure) Kind() procedure.Kind { - return procedure.CreateTable -} - -func (p *Procedure) Start(ctx context.Context) error { - p.updateState(procedure.StateRunning) - - req := &callbackRequest{ - ctx: ctx, - p: p, - createTableResult: nil, - } - - for { - switch p.fsm.Current() { - case stateBegin: - if err := p.fsm.Event(eventCheckTableExists, req); err != nil { - _ = p.params.OnFailed(err) - return err - } - case stateCheckTableExists: - if err := p.fsm.Event(eventCreateMetadata, req); err != nil { - _ = p.params.OnFailed(err) - return err - } - case stateCreateMetadata: - if err := p.fsm.Event(eventCreateOnShard, req); err != nil { - _ = p.params.OnFailed(err) - return err - } - case stateCreateOnShard: - if err := p.fsm.Event(eventFinish, req); err != nil { - _ = p.params.OnFailed(err) - return err - } - case stateFinish: - p.updateState(procedure.StateFinished) - return nil - } - } -} - -func (p *Procedure) Cancel(_ context.Context) error { - p.updateState(procedure.StateCancelled) - return nil -} - -func (p *Procedure) State() procedure.State { - p.lock.RLock() - defer p.lock.RUnlock() - - return p.state -} - -func (p *Procedure) updateState(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} diff --git a/horaemeta/server/coordinator/procedure/ddl/createtable/create_table_test.go b/horaemeta/server/coordinator/procedure/ddl/createtable/create_table_test.go deleted file mode 100644 index 1dd08932b9..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/createtable/create_table_test.go +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package createtable_test - -import ( - "context" - "fmt" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/createtable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/stretchr/testify/require" -) - -func TestCreateTable(t *testing.T) { - re := require.New(t) - ctx := context.Background() - dispatch := test.MockDispatch{} - c := test.InitStableCluster(ctx, t) - - // Select a shard to create table. - snapshot := c.GetMetadata().GetClusterSnapshot() - shardNode := snapshot.Topology.ClusterView.ShardNodes[0] - - // New CreateTableProcedure to create a new table. - p, err := createtable.NewProcedure(createtable.ProcedureParams{ - Dispatch: dispatch, - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: snapshot, - ID: uint64(1), - ShardID: shardNode.ID, - SourceReq: &metaservicepb.CreateTableRequest{ - Header: &metaservicepb.RequestHeader{ - Node: shardNode.NodeName, - ClusterName: test.ClusterName, - }, - SchemaName: test.TestSchemaName, - Name: test.TestTableName0, - }, - OnSucceeded: func(_ metadata.CreateTableResult) error { - return nil - }, - OnFailed: func(err error) error { - panic(fmt.Sprintf("create table failed, err:%v", err)) - }, - }) - re.NoError(err) - err = p.Start(context.Background()) - re.NoError(err) -} diff --git a/horaemeta/server/coordinator/procedure/ddl/droppartitiontable/create_drop_partition_table_test.go b/horaemeta/server/coordinator/procedure/ddl/droppartitiontable/create_drop_partition_table_test.go deleted file mode 100644 index 6ec07bff4e..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/droppartitiontable/create_drop_partition_table_test.go +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package droppartitiontable_test - -import ( - "context" - "fmt" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/createpartitiontable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/droppartitiontable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/clusterpb" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/stretchr/testify/require" -) - -func TestCreateAndDropPartitionTable(t *testing.T) { - re := require.New(t) - ctx := context.Background() - dispatch := test.MockDispatch{} - c := test.InitStableCluster(ctx, t) - s := test.NewTestStorage(t) - - shardNode := c.GetMetadata().GetClusterSnapshot().Topology.ClusterView.ShardNodes[0] - - shardPicker := coordinator.NewLeastTableShardPicker() - - testTableNum := 8 - testSubTableNum := 4 - - // Create table. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - subTableNames := genSubTables(tableName, testSubTableNum) - testCreatePartitionTable(ctx, t, dispatch, c, s, shardPicker, shardNode.NodeName, tableName, subTableNames) - } - - // Check get table. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - table := checkTable(t, c, tableName, true) - re.Equal(table.PartitionInfo.Info != nil, true) - subTableNames := genSubTables(tableName, testSubTableNum) - for _, subTableName := range subTableNames { - checkTable(t, c, subTableName, true) - } - } - - // Drop table. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - subTableNames := genSubTables(tableName, testSubTableNum) - testDropPartitionTable(t, dispatch, c, s, shardNode.NodeName, tableName, subTableNames) - } - - // Check table not exists. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - checkTable(t, c, tableName, false) - subTableNames := genSubTables(tableName, testSubTableNum) - for _, subTableName := range subTableNames { - checkTable(t, c, subTableName, false) - } - } -} - -func testCreatePartitionTable(ctx context.Context, t *testing.T, dispatch eventdispatch.Dispatch, c *cluster.Cluster, s procedure.Storage, shardPicker coordinator.ShardPicker, nodeName string, tableName string, subTableNames []string) { - re := require.New(t) - - partitionInfo := clusterpb.PartitionInfo{ - Info: nil, - } - request := &metaservicepb.CreateTableRequest{ - Header: &metaservicepb.RequestHeader{ - Node: nodeName, - ClusterName: test.ClusterName, - }, - PartitionTableInfo: &metaservicepb.PartitionTableInfo{ - SubTableNames: subTableNames, - PartitionInfo: &partitionInfo, - }, - SchemaName: test.TestSchemaName, - Name: tableName, - } - - subTableShards, err := shardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), len(request.GetPartitionTableInfo().SubTableNames)) - re.NoError(err) - - shardNodesWithVersion := make([]metadata.ShardNodeWithVersion, 0, len(subTableShards)) - for _, subTableShard := range subTableShards { - shardView, exists := c.GetMetadata().GetClusterSnapshot().Topology.ShardViewsMapping[subTableShard.ID] - re.True(exists) - shardNodesWithVersion = append(shardNodesWithVersion, metadata.ShardNodeWithVersion{ - ShardInfo: metadata.ShardInfo{ - ID: shardView.ShardID, - Role: subTableShard.ShardRole, - Version: shardView.Version, - Status: storage.ShardStatusUnknown, - }, - ShardNode: subTableShard, - }) - } - - procedure, err := createpartitiontable.NewProcedure(createpartitiontable.ProcedureParams{ - ID: 0, - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: c.GetMetadata().GetClusterSnapshot(), - Dispatch: dispatch, - Storage: s, - SourceReq: request, - SubTablesShards: shardNodesWithVersion, - OnSucceeded: func(_ metadata.CreateTableResult) error { - return nil - }, - OnFailed: func(err error) error { - return nil - }, - }) - re.NoError(err) - - err = procedure.Start(ctx) - re.NoError(err) -} - -func testDropPartitionTable(t *testing.T, dispatch eventdispatch.Dispatch, c *cluster.Cluster, s procedure.Storage, nodeName string, tableName string, subTableNames []string) { - re := require.New(t) - // Create DropPartitionTableProcedure to drop table. - partitionTableInfo := &metaservicepb.PartitionTableInfo{ - PartitionInfo: nil, - SubTableNames: subTableNames, - } - req := droppartitiontable.ProcedureParams{ - ID: uint64(1), Dispatch: dispatch, ClusterMetadata: c.GetMetadata(), ClusterSnapshot: c.GetMetadata().GetClusterSnapshot(), SourceReq: &metaservicepb.DropTableRequest{ - Header: &metaservicepb.RequestHeader{ - Node: nodeName, - ClusterName: test.ClusterName, - }, - SchemaName: test.TestSchemaName, - Name: tableName, - PartitionTableInfo: partitionTableInfo, - }, OnSucceeded: func(_ metadata.TableInfo) error { - return nil - }, OnFailed: func(_ error) error { - return nil - }, Storage: s, - } - - procedure, ok, err := droppartitiontable.NewProcedure(req) - re.NoError(err) - re.True(ok) - err = procedure.Start(context.Background()) - re.NoError(err) -} - -func genSubTables(tableName string, tableNum int) []string { - var subTableNames []string - for j := 0; j < tableNum; j++ { - subTableNames = append(subTableNames, fmt.Sprintf("%s_%d", tableName, j)) - } - return subTableNames -} - -func checkTable(t *testing.T, c *cluster.Cluster, tableName string, exist bool) storage.Table { - re := require.New(t) - table, b, err := c.GetMetadata().GetTable(test.TestSchemaName, tableName) - re.NoError(err) - re.Equal(b, exist) - return table -} diff --git a/horaemeta/server/coordinator/procedure/ddl/droppartitiontable/drop_partition_table.go b/horaemeta/server/coordinator/procedure/ddl/droppartitiontable/drop_partition_table.go deleted file mode 100644 index f47ba773ec..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/droppartitiontable/drop_partition_table.go +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package droppartitiontable - -import ( - "context" - "encoding/json" - "fmt" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" - "golang.org/x/sync/errgroup" -) - -// fsm state change: -// ┌────────┐ ┌────────────────┐ ┌────────────────────┐ ┌───────────┐ -// │ Begin ├─────▶ DropDataTable ├─────▶ DropPartitionTable ├──────▶ Finish │ -// └────────┘ └────────────────┘ └────────────────────┘ └───────────┘ -const ( - eventDropDataTable = "EventDropDataTable" - eventDropPartitionTable = "EventDropPartitionTable" - eventFinish = "EventFinish" - - stateBegin = "StateBegin" - stateDropDataTable = "StateDropDataTable" - stateDropPartitionTable = "StateDropPartitionTable" - stateFinish = "StateFinish" -) - -var ( - createDropPartitionTableEvents = fsm.Events{ - {Name: eventDropDataTable, Src: []string{stateBegin}, Dst: stateDropDataTable}, - {Name: eventDropPartitionTable, Src: []string{stateDropDataTable}, Dst: stateDropPartitionTable}, - {Name: eventFinish, Src: []string{stateDropPartitionTable}, Dst: stateFinish}, - } - createDropPartitionTableCallbacks = fsm.Callbacks{ - eventDropDataTable: dropDataTablesCallback, - eventDropPartitionTable: dropPartitionTableCallback, - eventFinish: finishCallback, - } -) - -type Procedure struct { - fsm *fsm.FSM - params ProcedureParams - relatedVersionInfo procedure.RelatedVersionInfo - - // Protect the state. - lock sync.RWMutex - state procedure.State -} - -type ProcedureParams struct { - ID uint64 - ClusterMetadata *metadata.ClusterMetadata - ClusterSnapshot metadata.Snapshot - Dispatch eventdispatch.Dispatch - Storage procedure.Storage - SourceReq *metaservicepb.DropTableRequest - OnSucceeded func(result metadata.TableInfo) error - OnFailed func(error) error -} - -func NewProcedure(params ProcedureParams) (*Procedure, bool, error) { - fsm := fsm.NewFSM( - stateBegin, - createDropPartitionTableEvents, - createDropPartitionTableCallbacks, - ) - relatedVersionInfo, err := buildRelatedVersionInfo(params) - if err != nil { - return nil, false, err - } - - return &Procedure{ - fsm: fsm, - params: params, - relatedVersionInfo: relatedVersionInfo, - lock: sync.RWMutex{}, - state: stateBegin, - }, true, nil -} - -func buildRelatedVersionInfo(params ProcedureParams) (procedure.RelatedVersionInfo, error) { - tableShardMapping := make(map[storage.TableID]storage.ShardID, len(params.SourceReq.PartitionTableInfo.GetSubTableNames())) - for shardID, shardView := range params.ClusterSnapshot.Topology.ShardViewsMapping { - for _, tableID := range shardView.TableIDs { - tableShardMapping[tableID] = shardID - } - } - shardViewWithVersion := make(map[storage.ShardID]uint64, 0) - for _, subTableName := range params.SourceReq.PartitionTableInfo.GetSubTableNames() { - table, exists, err := params.ClusterMetadata.GetTable(params.SourceReq.GetSchemaName(), subTableName) - if err != nil { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(err, "get sub table, tableName:%s", subTableName) - } - if !exists { - continue - } - shardID, exists := tableShardMapping[table.ID] - if !exists { - continue - } - shardView, exists := params.ClusterSnapshot.Topology.ShardViewsMapping[shardID] - if !exists { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in topology, shardID:%d", shardID) - } - shardViewWithVersion[shardID] = shardView.Version - } - - relatedVersionInfo := procedure.RelatedVersionInfo{ - ClusterID: params.ClusterSnapshot.Topology.ClusterView.ClusterID, - ShardWithVersion: shardViewWithVersion, - ClusterVersion: params.ClusterSnapshot.Topology.ClusterView.Version, - } - return relatedVersionInfo, nil -} - -func (p *Procedure) ID() uint64 { - return p.params.ID -} - -func (p *Procedure) Kind() procedure.Kind { - return procedure.DropPartitionTable -} - -func (p *Procedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func (p *Procedure) Priority() procedure.Priority { - return procedure.PriorityMed -} - -func (p *Procedure) Start(ctx context.Context) error { - p.updateStateWithLock(procedure.StateRunning) - - dropPartitionTableRequest := &callbackRequest{ - ctx: ctx, - p: p, - // FIXME: shall we initialize the table at the first? - table: nil, - } - - for { - switch p.fsm.Current() { - case stateBegin: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "drop partition table procedure persist") - } - if err := p.fsm.Event(eventDropDataTable, dropPartitionTableRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "drop partition table procedure") - } - case stateDropDataTable: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "drop partition table procedure persist") - } - if err := p.fsm.Event(eventDropPartitionTable, dropPartitionTableRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "drop partition table procedure drop data table") - } - case stateDropPartitionTable: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "drop partition table procedure persist") - } - if err := p.fsm.Event(eventFinish, dropPartitionTableRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "drop partition table procedure drop partition table") - } - case stateFinish: - p.updateStateWithLock(procedure.StateFinished) - if err := p.persist(ctx); err != nil { - _ = p.params.OnFailed(err) - return errors.WithMessage(err, "drop partition table procedure persist") - } - return nil - } - } -} - -func (p *Procedure) Cancel(_ context.Context) error { - p.updateStateWithLock(procedure.StateCancelled) - return nil -} - -func (p *Procedure) State() procedure.State { - p.lock.RLock() - defer p.lock.RUnlock() - - return p.state -} - -func (p *Procedure) updateStateWithLock(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} - -func (p *Procedure) persist(ctx context.Context) error { - meta, err := p.convertToMeta() - if err != nil { - return errors.WithMessage(err, "convert to meta") - } - err = p.params.Storage.CreateOrUpdate(ctx, meta) - if err != nil { - return errors.WithMessage(err, "createOrUpdate procedure storage") - } - return nil -} - -func (p *Procedure) convertToMeta() (procedure.Meta, error) { - p.lock.RLock() - defer p.lock.RUnlock() - - rawData := rawData{ - ID: p.params.ID, - FsmState: p.fsm.Current(), - State: p.state, - DropTableRequest: p.params.SourceReq, - } - rawDataBytes, err := json.Marshal(rawData) - if err != nil { - var emptyMeta procedure.Meta - return emptyMeta, procedure.ErrEncodeRawData.WithCausef("marshal raw data, procedureID:%d, err:%v", p.params.ID, err) - } - - meta := procedure.Meta{ - ID: p.params.ID, - Kind: procedure.DropPartitionTable, - State: p.state, - - RawData: rawDataBytes, - } - - return meta, nil -} - -type rawData struct { - ID uint64 - FsmState string - State procedure.State - - DropTableRequest *metaservicepb.DropTableRequest -} - -type callbackRequest struct { - ctx context.Context - p *Procedure - - table *storage.Table -} - -func (d *callbackRequest) schemaName() string { - return d.p.params.SourceReq.GetSchemaName() -} - -func (d *callbackRequest) tableName() string { - return d.p.params.SourceReq.GetName() -} - -// 1. Drop data tables in target nodes. -func dropDataTablesCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - if len(params.SourceReq.PartitionTableInfo.SubTableNames) == 0 { - procedure.CancelEventWithLog(event, procedure.ErrEmptyPartitionNames, fmt.Sprintf("drop table, table:%s", params.SourceReq.Name)) - return - } - - shardVersions := req.p.relatedVersionInfo.ShardWithVersion - g, _ := errgroup.WithContext(req.ctx) - - // shardID -> tableNames - shardTables := make(map[storage.ShardID][]string) - for _, tableName := range params.SourceReq.PartitionTableInfo.GetSubTableNames() { - table, err := ddl.GetTableMetadata(params.ClusterMetadata, req.schemaName(), tableName) - if err != nil { - log.Warn("get table metadata failed", zap.String("tableName", tableName)) - continue - } - - shardVersionUpdate, shardExists, err := ddl.BuildShardVersionUpdate(table, params.ClusterMetadata, shardVersions) - if err != nil { - log.Error("get shard version by table", zap.String("tableName", tableName), zap.Error(err)) - procedure.CancelEventWithLog(event, err, "build shard version update", zap.String("tableName", tableName)) - return - } - // If the shard corresponding to this table does not exist, it means that the actual table creation failed. - // In order to ensure that the table can be deleted normally, we need to directly delete the metadata of the table. - if !shardExists { - _, err := params.ClusterMetadata.DropTableMetadata(req.ctx, req.schemaName(), tableName) - if err != nil { - procedure.CancelEventWithLog(event, err, "drop table metadata", zap.String("tableName", tableName)) - return - } - continue - } - - shardTables[shardVersionUpdate.ShardID] = append(shardTables[shardVersionUpdate.ShardID], tableName) - } - - for shardID, tableNames := range shardTables { - shardID := shardID - tableNames := tableNames - shardVersion := shardVersions[shardID] - g.Go(func() error { - return dispatchDropDataTable(req, params.Dispatch, params.ClusterMetadata, shardID, params.SourceReq.GetSchemaName(), tableNames, shardVersion) - }) - } - - err = g.Wait() - if err != nil { - procedure.CancelEventWithLog(event, err, "") - return - } -} - -// 2. Drop partition table in target node. -func dropPartitionTableCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - - dropTableMetadataResult, err := req.p.params.ClusterMetadata.DropTableMetadata(req.ctx, req.schemaName(), req.tableName()) - if err != nil { - procedure.CancelEventWithLog(event, err, fmt.Sprintf("drop table, table:%s", req.tableName())) - return - } - - req.table = &dropTableMetadataResult.Table -} - -func finishCallback(event *fsm.Event) { - request, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - log.Info("drop partition table finish") - - tableInfo := metadata.TableInfo{ - ID: request.table.ID, - Name: request.table.Name, - SchemaID: request.table.SchemaID, - SchemaName: request.p.params.SourceReq.GetSchemaName(), - PartitionInfo: storage.PartitionInfo{Info: nil}, - CreatedAt: 0, - } - - if err = request.p.params.OnSucceeded(tableInfo); err != nil { - procedure.CancelEventWithLog(event, err, "drop partition table on succeeded") - return - } -} - -func dispatchDropDataTable(req *callbackRequest, dispatch eventdispatch.Dispatch, clusterMetadata *metadata.ClusterMetadata, shardID storage.ShardID, schema string, tableNames []string, shardVersion uint64) error { - for _, tableName := range tableNames { - table, err := ddl.GetTableMetadata(clusterMetadata, req.schemaName(), tableName) - if err != nil { - return errors.WithMessagef(err, "get table metadata, table:%s", tableName) - } - - shardVersionUpdate := metadata.ShardVersionUpdate{ - ShardID: shardID, - LatestVersion: shardVersion, - } - - latestShardVersion, err := ddl.DropTableOnShard(req.ctx, clusterMetadata, dispatch, schema, table, shardVersionUpdate) - if err != nil { - return errors.WithMessagef(err, "drop table, table:%s", tableName) - } - - err = clusterMetadata.DropTable(req.ctx, metadata.DropTableRequest{ - SchemaName: req.schemaName(), - TableName: tableName, - ShardID: shardID, - LatestVersion: latestShardVersion, - }) - if err != nil { - return errors.WithMessagef(err, "drop table, table:%s", tableName) - } - - shardVersion++ - } - return nil -} diff --git a/horaemeta/server/coordinator/procedure/ddl/droptable/create_drop_table_test.go b/horaemeta/server/coordinator/procedure/ddl/droptable/create_drop_table_test.go deleted file mode 100644 index 06f3a2f15f..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/droptable/create_drop_table_test.go +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package droptable_test - -import ( - "context" - "fmt" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/createtable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl/droptable" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/stretchr/testify/require" -) - -func TestCreateAndDropTable(t *testing.T) { - re := require.New(t) - ctx := context.Background() - dispatch := test.MockDispatch{} - c := test.InitStableCluster(ctx, t) - - nodeName := c.GetMetadata().GetClusterSnapshot().Topology.ClusterView.ShardNodes[0].NodeName - shardID := c.GetMetadata().GetClusterSnapshot().Topology.ClusterView.ShardNodes[0].ID - - testTableNum := 20 - // Create table. - for i := 0; i < testTableNum; i++ { - // Select a shard to open table. - snapshot := c.GetMetadata().GetClusterSnapshot() - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - testCreateTable(t, dispatch, c, snapshot, shardID, nodeName, tableName) - } - // Check get table. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - table, b, err := c.GetMetadata().GetTable(test.TestSchemaName, tableName) - re.NoError(err) - re.Equal(b, true) - re.NotNil(table) - } - - // Check tables by node. - var shardIDs []storage.ShardID - for i := 0; i < test.DefaultShardTotal; i++ { - shardIDs = append(shardIDs, storage.ShardID(i)) - } - shardTables := c.GetMetadata().GetShardTables(shardIDs) - tableTotal := 0 - for _, v := range shardTables { - tableTotal += len(v.Tables) - } - re.Equal(testTableNum, tableTotal) - - // Drop table. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - testDropTable(t, dispatch, c, nodeName, tableName) - } - // Check table not exists. - for i := 0; i < testTableNum; i++ { - tableName := fmt.Sprintf("%s_%d", test.TestTableName0, i) - _, b, err := c.GetMetadata().GetTable(test.TestSchemaName, tableName) - re.NoError(err) - re.Equal(b, false) - } - - // Check tables by node. - shardTables = c.GetMetadata().GetShardTables(shardIDs) - tableTotal = 0 - for _, v := range shardTables { - tableTotal += len(v.Tables) - } - re.Equal(tableTotal, 0) -} - -func testCreateTable(t *testing.T, dispatch eventdispatch.Dispatch, c *cluster.Cluster, snapshot metadata.Snapshot, shardID storage.ShardID, nodeName, tableName string) { - re := require.New(t) - // New CreateTableProcedure to create a new table. - p, err := createtable.NewProcedure(createtable.ProcedureParams{ - Dispatch: dispatch, - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: snapshot, - ID: uint64(1), - ShardID: shardID, - SourceReq: &metaservicepb.CreateTableRequest{ - Header: &metaservicepb.RequestHeader{ - Node: nodeName, - ClusterName: test.ClusterName, - }, - SchemaName: test.TestSchemaName, - Name: tableName, - }, - OnSucceeded: func(_ metadata.CreateTableResult) error { - return nil - }, - OnFailed: func(err error) error { - panic(fmt.Sprintf("create table failed, err:%v", err)) - }, - }) - re.NoError(err) - err = p.Start(context.Background()) - re.NoError(err) -} - -func testDropTable(t *testing.T, dispatch eventdispatch.Dispatch, c *cluster.Cluster, nodeName, tableName string) { - re := require.New(t) - // New DropTableProcedure to drop table. - procedure, ok, err := droptable.NewDropTableProcedure(droptable.ProcedureParams{ - ID: 0, - Dispatch: dispatch, - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: c.GetMetadata().GetClusterSnapshot(), - SourceReq: &metaservicepb.DropTableRequest{ - Header: &metaservicepb.RequestHeader{ - Node: nodeName, - ClusterName: test.ClusterName, - }, - SchemaName: test.TestSchemaName, - Name: tableName, - }, - OnSucceeded: func(_ metadata.TableInfo) error { - return nil - }, - OnFailed: func(_ error) error { - return nil - }, - }) - re.NoError(err) - re.True(ok) - err = procedure.Start(context.Background()) - re.NoError(err) -} diff --git a/horaemeta/server/coordinator/procedure/ddl/droptable/drop_table.go b/horaemeta/server/coordinator/procedure/ddl/droptable/drop_table.go deleted file mode 100644 index 6f704b7f7c..0000000000 --- a/horaemeta/server/coordinator/procedure/ddl/droptable/drop_table.go +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package droptable - -import ( - "context" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/ddl" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -const ( - eventPrepare = "EventPrepare" - eventFailed = "EventFailed" - eventSuccess = "EventSuccess" - - stateBegin = "StateBegin" - stateWaiting = "StateWaiting" - stateFinish = "StateFinish" - stateFailed = "StateFailed" -) - -var ( - dropTableEvents = fsm.Events{ - {Name: eventPrepare, Src: []string{stateBegin}, Dst: stateWaiting}, - {Name: eventSuccess, Src: []string{stateWaiting}, Dst: stateFinish}, - {Name: eventFailed, Src: []string{stateWaiting}, Dst: stateFailed}, - } - dropTableCallbacks = fsm.Callbacks{ - eventPrepare: prepareCallback, - eventFailed: failedCallback, - eventSuccess: successCallback, - } -) - -func prepareCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[*callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - params := req.p.params - - table, err := ddl.GetTableMetadata(params.ClusterMetadata, params.SourceReq.GetSchemaName(), params.SourceReq.GetName()) - if err != nil { - procedure.CancelEventWithLog(event, err, "get table metadata", zap.String("tableName", params.SourceReq.GetName()), zap.Error(err)) - return - } - req.droppedTable = &metadata.TableInfo{ - ID: table.ID, - Name: table.Name, - SchemaID: table.SchemaID, - SchemaName: params.SourceReq.GetSchemaName(), - PartitionInfo: table.PartitionInfo, - CreatedAt: table.CreatedAt, - } - - shardVersionUpdate, shardExists, err := ddl.BuildShardVersionUpdate(table, params.ClusterMetadata, req.p.relatedVersionInfo.ShardWithVersion) - if err != nil { - log.Error("get shard version by table", zap.String("tableName", params.SourceReq.GetName()), zap.Bool("shardExists", shardExists), zap.Error(err)) - procedure.CancelEventWithLog(event, err, "get shard version by table name", zap.String("tableName", params.SourceReq.GetName()), zap.Bool("shardExists", shardExists), zap.Error(err)) - return - } - // If the shard corresponding to this table does not exist, it means that the actual table creation failed. - // In order to ensure that the table can be deleted normally, we need to directly delete the metadata of the table. - if !shardExists { - // Try to drop table with the latest shard version. - err = params.ClusterMetadata.DropTable(req.ctx, metadata.DropTableRequest{ - SchemaName: params.SourceReq.GetSchemaName(), - TableName: params.SourceReq.GetName(), - ShardID: shardVersionUpdate.ShardID, - LatestVersion: shardVersionUpdate.LatestVersion, - }) - if err != nil { - procedure.CancelEventWithLog(event, err, "drop table metadata", zap.String("tableName", params.SourceReq.GetName())) - return - } - return - } - - latestShardVersion, err := ddl.DropTableOnShard(req.ctx, params.ClusterMetadata, params.Dispatch, params.SourceReq.GetSchemaName(), table, shardVersionUpdate) - if err != nil { - procedure.CancelEventWithLog(event, err, "dispatch drop table on shard") - return - } - - log.Debug("dispatch dropTableOnShard finish", zap.String("tableName", params.SourceReq.GetName()), zap.Uint64("procedureID", params.ID)) - - if err = params.ClusterMetadata.DropTable(req.ctx, metadata.DropTableRequest{ - SchemaName: params.SourceReq.GetSchemaName(), - TableName: params.SourceReq.GetName(), - ShardID: shardVersionUpdate.ShardID, - LatestVersion: latestShardVersion, - }); err != nil { - procedure.CancelEventWithLog(event, err, "cluster drop table") - return - } - - log.Debug("drop table finish", zap.String("tableName", params.SourceReq.GetName()), zap.Uint64("procedureID", params.ID)) -} - -func successCallback(event *fsm.Event) { - req := event.Args[0].(*callbackRequest) - - assert.Assert(req.droppedTable != nil) - if err := req.p.params.OnSucceeded(*req.droppedTable); err != nil { - log.Error("exec success callback failed") - } -} - -func failedCallback(event *fsm.Event) { - req := event.Args[0].(*callbackRequest) - - if err := req.p.params.OnFailed(event.Err); err != nil { - log.Error("exec failed callback failed") - } -} - -// callbackRequest is fsm callbacks param. -type callbackRequest struct { - ctx context.Context - p *Procedure - - droppedTable *metadata.TableInfo -} - -type ProcedureParams struct { - ID uint64 - Dispatch eventdispatch.Dispatch - ClusterMetadata *metadata.ClusterMetadata - ClusterSnapshot metadata.Snapshot - - SourceReq *metaservicepb.DropTableRequest - OnSucceeded func(metadata.TableInfo) error - OnFailed func(error) error -} - -func NewDropTableProcedure(params ProcedureParams) (procedure.Procedure, bool, error) { - table, exists, err := params.ClusterMetadata.GetTable(params.SourceReq.GetSchemaName(), params.SourceReq.GetName()) - if err != nil { - log.Error("get table", zap.Error(err)) - return nil, false, err - } - if !exists { - log.Warn("drop non-existing table", zap.String("schema", params.SourceReq.GetSchemaName()), zap.String("table", params.SourceReq.GetName())) - return nil, false, nil - } - - shardID, err := findShardID(table.ID, params) - if err != nil { - return nil, false, err - } - - relatedVersionInfo, err := buildRelatedVersionInfo(params, shardID) - if err != nil { - return nil, false, err - } - - fsm := fsm.NewFSM( - stateBegin, - dropTableEvents, - dropTableCallbacks, - ) - - return &Procedure{ - fsm: fsm, - shardID: shardID, - relatedVersionInfo: relatedVersionInfo, - params: params, - lock: sync.RWMutex{}, - state: procedure.StateInit, - }, true, nil -} - -func buildRelatedVersionInfo(params ProcedureParams, shardID storage.ShardID) (procedure.RelatedVersionInfo, error) { - shardWithVersion := make(map[storage.ShardID]uint64, 1) - shardView, exists := params.ClusterSnapshot.Topology.ShardViewsMapping[shardID] - if !exists { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in topology, shardID:%d", shardID) - } - shardWithVersion[shardID] = shardView.Version - return procedure.RelatedVersionInfo{ - ClusterID: params.ClusterSnapshot.Topology.ClusterView.ClusterID, - ShardWithVersion: shardWithVersion, - ClusterVersion: params.ClusterSnapshot.Topology.ClusterView.Version, - }, nil -} - -func findShardID(tableID storage.TableID, params ProcedureParams) (storage.ShardID, error) { - for _, shardView := range params.ClusterSnapshot.Topology.ShardViewsMapping { - for _, id := range shardView.TableIDs { - if tableID == id { - return shardView.ShardID, nil - } - } - } - - return 0, errors.WithMessagef(metadata.ErrShardNotFound, "The shard corresponding to the table was not found, schema:%s, table:%s", params.SourceReq.GetSchemaName(), params.SourceReq.GetName()) -} - -type Procedure struct { - fsm *fsm.FSM - shardID storage.ShardID - relatedVersionInfo procedure.RelatedVersionInfo - params ProcedureParams - - lock sync.RWMutex - state procedure.State -} - -func (p *Procedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func (p *Procedure) Priority() procedure.Priority { - return procedure.PriorityLow -} - -func (p *Procedure) ID() uint64 { - return p.params.ID -} - -func (p *Procedure) Kind() procedure.Kind { - return procedure.DropTable -} - -func (p *Procedure) Start(ctx context.Context) error { - p.updateState(procedure.StateRunning) - - req := &callbackRequest{ - ctx: ctx, - p: p, - droppedTable: nil, - } - - if err := p.fsm.Event(eventPrepare, req); err != nil { - err1 := p.fsm.Event(eventFailed, req) - p.updateState(procedure.StateFailed) - if err1 != nil { - err = errors.WithMessagef(err, "send eventFailed, err:%v", err1) - } - return errors.WithMessage(err, "send eventPrepare") - } - - if err := p.fsm.Event(eventSuccess, req); err != nil { - return errors.WithMessage(err, "send eventSuccess") - } - - p.updateState(procedure.StateFinished) - return nil -} - -func (p *Procedure) Cancel(_ context.Context) error { - p.updateState(procedure.StateCancelled) - return nil -} - -func (p *Procedure) State() procedure.State { - p.lock.RLock() - defer p.lock.RUnlock() - - return p.state -} - -func (p *Procedure) updateState(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} diff --git a/horaemeta/server/coordinator/procedure/delay_queue.go b/horaemeta/server/coordinator/procedure/delay_queue.go deleted file mode 100644 index 82d8d6a3db..0000000000 --- a/horaemeta/server/coordinator/procedure/delay_queue.go +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "container/heap" - "fmt" - "sync" - "time" - - "github.com/pkg/errors" -) - -type procedureScheduleEntry struct { - procedure Procedure - runAfter time.Time -} - -type DelayQueue struct { - maxLen int - - // This lock is used to protect the following fields. - lock sync.RWMutex - heapQueue *heapPriorityQueue - // existingProcs is used to record procedures has been pushed into the queue, - // and they will be used to verify the addition of duplicate elements. - existingProcs map[uint64]struct{} -} - -// heapPriorityQueue is no internal lock, -// and its thread safety is guaranteed by the external caller. -type heapPriorityQueue struct { - procedures []*procedureScheduleEntry -} - -func (q *heapPriorityQueue) Len() int { - return len(q.procedures) -} - -// The dequeue order of elements is determined by the less method. -// When return procedures[i].runAfter < procedures[j].runAfter, the element with smallest will be pop first. -func (q *heapPriorityQueue) Less(i, j int) bool { - return q.procedures[i].runAfter.Before(q.procedures[j].runAfter) -} - -func (q *heapPriorityQueue) Swap(i, j int) { - q.procedures[i], q.procedures[j] = q.procedures[j], q.procedures[i] -} - -func (q *heapPriorityQueue) Push(x any) { - item := x.(*procedureScheduleEntry) - q.procedures = append(q.procedures, item) -} - -func (q *heapPriorityQueue) Pop() any { - length := len(q.procedures) - if length == 0 { - return nil - } - item := q.procedures[length-1] - q.procedures = q.procedures[:length-1] - return item -} - -func (q *heapPriorityQueue) Peek() any { - length := len(q.procedures) - if length == 0 { - return nil - } - item := q.procedures[0] - return item -} - -func NewProcedureDelayQueue(maxLen int) *DelayQueue { - return &DelayQueue{ - maxLen: maxLen, - - lock: sync.RWMutex{}, - heapQueue: &heapPriorityQueue{procedures: []*procedureScheduleEntry{}}, - existingProcs: map[uint64]struct{}{}, - } -} - -func (q *DelayQueue) Len() int { - q.lock.RLock() - defer q.lock.RUnlock() - - return q.heapQueue.Len() -} - -func (q *DelayQueue) Push(p Procedure, delay time.Duration) error { - q.lock.Lock() - defer q.lock.Unlock() - - if q.heapQueue.Len() >= q.maxLen { - return errors.WithMessage(ErrQueueFull, fmt.Sprintf("queue max length is %d", q.maxLen)) - } - - if _, exists := q.existingProcs[p.ID()]; exists { - return errors.WithMessage(ErrPushDuplicatedProcedure, fmt.Sprintf("procedure has been pushed, %v", p)) - } - - heap.Push(q.heapQueue, &procedureScheduleEntry{ - procedure: p, - runAfter: time.Now().Add(delay), - }) - q.existingProcs[p.ID()] = struct{}{} - - return nil -} - -func (q *DelayQueue) Pop() Procedure { - q.lock.Lock() - defer q.lock.Unlock() - - if q.heapQueue.Len() == 0 { - return nil - } - - entry := q.heapQueue.Peek().(*procedureScheduleEntry) - if time.Now().Before(entry.runAfter) { - return nil - } - - heap.Pop(q.heapQueue) - delete(q.existingProcs, entry.procedure.ID()) - - return entry.procedure -} diff --git a/horaemeta/server/coordinator/procedure/delay_queue_test.go b/horaemeta/server/coordinator/procedure/delay_queue_test.go deleted file mode 100644 index a84c5192d3..0000000000 --- a/horaemeta/server/coordinator/procedure/delay_queue_test.go +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -type TestProcedure struct{ ProcedureID uint64 } - -func (t TestProcedure) RelatedVersionInfo() RelatedVersionInfo { - return RelatedVersionInfo{ - ClusterID: 0, - ShardWithVersion: map[storage.ShardID]uint64{}, - ClusterVersion: 0, - } -} - -func (t TestProcedure) Priority() Priority { - return PriorityLow -} - -func (t TestProcedure) ID() uint64 { - return t.ProcedureID -} - -func (t TestProcedure) Kind() Kind { - return CreateTable -} - -func (t TestProcedure) Start(_ context.Context) error { - return nil -} - -func (t TestProcedure) Cancel(_ context.Context) error { - return nil -} - -func (t TestProcedure) State() State { - return StateInit -} - -func TestDelayQueue(t *testing.T) { - re := require.New(t) - - testProcedure0 := TestProcedure{ProcedureID: 0} - testProcedure1 := TestProcedure{ProcedureID: 1} - testProcedure2 := TestProcedure{ProcedureID: 2} - testProcedure3 := TestProcedure{ProcedureID: 3} - - queue := NewProcedureDelayQueue(3) - err := queue.Push(testProcedure0, time.Millisecond*40) - re.NoError(err) - err = queue.Push(testProcedure0, time.Millisecond*30) - re.Error(err) - err = queue.Push(testProcedure1, time.Millisecond*10) - re.NoError(err) - err = queue.Push(testProcedure2, time.Millisecond*20) - re.NoError(err) - err = queue.Push(testProcedure3, time.Millisecond*20) - re.Error(err) - re.Equal(3, queue.Len()) - - po := queue.Pop() - re.Nil(po) - - time.Sleep(time.Millisecond * 100) - - p0 := queue.Pop() - re.Equal(uint64(1), p0.ID()) - p1 := queue.Pop() - re.Equal(uint64(2), p1.ID()) - p2 := queue.Pop() - re.Equal(uint64(0), p2.ID()) - p := queue.Pop() - re.Nil(p) - - err = queue.Push(testProcedure0, time.Millisecond*20) - re.NoError(err) - - time.Sleep(time.Millisecond * 10) - p0 = queue.Pop() - re.Nil(p0) - - time.Sleep(time.Millisecond * 10) - p0 = queue.Pop() - re.Equal(uint64(0), p0.ID()) -} diff --git a/horaemeta/server/coordinator/procedure/error.go b/horaemeta/server/coordinator/procedure/error.go deleted file mode 100644 index c59dc8fcdb..0000000000 --- a/horaemeta/server/coordinator/procedure/error.go +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrShardLeaderNotFound = coderr.NewCodeError(coderr.Internal, "shard leader not found") - ErrShardNotMatch = coderr.NewCodeError(coderr.Internal, "target shard not match to persis data") - ErrProcedureNotFound = coderr.NewCodeError(coderr.Internal, "procedure not found") - ErrClusterConfigChanged = coderr.NewCodeError(coderr.Internal, "cluster config changed") - ErrTableNotExists = coderr.NewCodeError(coderr.Internal, "table not exists") - ErrTableAlreadyExists = coderr.NewCodeError(coderr.Internal, "table already exists") - ErrListRunningProcedure = coderr.NewCodeError(coderr.Internal, "procedure type not match") - ErrListProcedure = coderr.NewCodeError(coderr.Internal, "list running procedure") - ErrDecodeRawData = coderr.NewCodeError(coderr.Internal, "decode raw data") - ErrEncodeRawData = coderr.NewCodeError(coderr.Internal, "encode raw data") - ErrGetRequest = coderr.NewCodeError(coderr.Internal, "get request from event") - ErrNodeNumberNotEnough = coderr.NewCodeError(coderr.Internal, "node number not enough") - ErrEmptyPartitionNames = coderr.NewCodeError(coderr.Internal, "partition names is empty") - ErrDropTableResult = coderr.NewCodeError(coderr.Internal, "length of shard not correct") - ErrPickShard = coderr.NewCodeError(coderr.Internal, "pick shard failed") - ErrSubmitProcedure = coderr.NewCodeError(coderr.Internal, "submit new procedure") - ErrQueueFull = coderr.NewCodeError(coderr.Internal, "queue is full, unable to offer more data") - ErrPushDuplicatedProcedure = coderr.NewCodeError(coderr.Internal, "try to push duplicated procedure") - ErrShardNumberNotEnough = coderr.NewCodeError(coderr.Internal, "shard number not enough") - ErrEmptyBatchProcedure = coderr.NewCodeError(coderr.Internal, "procedure batch is empty") - ErrMergeBatchProcedure = coderr.NewCodeError(coderr.Internal, "failed to merge procedures batch") -) diff --git a/horaemeta/server/coordinator/procedure/manager.go b/horaemeta/server/coordinator/procedure/manager.go deleted file mode 100644 index c542130f98..0000000000 --- a/horaemeta/server/coordinator/procedure/manager.go +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" -) - -type Manager interface { - // Start must be called before manager is used. - Start(ctx context.Context) error - // Stop must be called before manager is dropped. - Stop(ctx context.Context) error - - // Submit procedure to be executed asynchronously. - // TODO: change result type, add channel to get whether the procedure executed successfully - Submit(ctx context.Context, procedure Procedure) error - // ListRunningProcedure return immutable procedures info. - ListRunningProcedure(ctx context.Context) ([]*Info, error) -} diff --git a/horaemeta/server/coordinator/procedure/manager_impl.go b/horaemeta/server/coordinator/procedure/manager_impl.go deleted file mode 100644 index 97e3d5e20c..0000000000 --- a/horaemeta/server/coordinator/procedure/manager_impl.go +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/lock" - "github.com/apache/incubator-horaedb-meta/server/storage" - "go.uber.org/zap" -) - -const ( - metaListBatchSize = 100 - defaultWaitingQueueLen = 1000 - defaultWaitingQueueDelay = time.Millisecond * 500 - defaultPromoteDelay = time.Millisecond * 100 - defaultProcedureWorkerChanBufSiz = 10 -) - -type ManagerImpl struct { - logger *zap.Logger - metadata *metadata.ClusterMetadata - - // ProcedureShardLock is used to ensure the consistency of procedures' concurrent running on shard, that is to say, only one procedure is allowed to run on a specific shard. - procedureShardLock *lock.EntryLock - // All procedure will be put into waiting queue first, when runningProcedure is empty, try to promote some waiting procedures to new running procedures. - waitingProcedures *DelayQueue - // ProcedureWorkerChan is used to notify that a procedure has been submitted or completed, and the manager will perform promote after receiving the signal. - procedureWorkerChan chan struct{} - - // This lock is used to protect the following fields. - lock sync.RWMutex - running bool - // There is only one procedure running for every shard. - // It will be removed when the procedure is finished or failed. - runningProcedures map[storage.ShardID]Procedure -} - -func (m *ManagerImpl) Start(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - if m.running { - m.logger.Warn("procedure manager has already been started") - return nil - } - - m.procedureWorkerChan = make(chan struct{}, defaultProcedureWorkerChanBufSiz) - go m.startProcedurePromote(ctx, m.procedureWorkerChan) - - m.running = true - - return nil -} - -func (m *ManagerImpl) Stop(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - for _, procedure := range m.runningProcedures { - if procedure.State() == StateRunning { - err := procedure.Cancel(ctx) - m.logger.Error("cancel procedure failed", zap.Error(err), zap.Uint64("procedureID", procedure.ID())) - // TODO: consider whether a single procedure cancel failed should return directly. - return err - } - } - - m.running = false - - return nil -} - -// TODO: Filter duplicate submitted Procedure. -func (m *ManagerImpl) Submit(_ context.Context, procedure Procedure) error { - if err := m.waitingProcedures.Push(procedure, 0); err != nil { - return err - } - - select { - case m.procedureWorkerChan <- struct{}{}: - default: - } - - return nil -} - -func (m *ManagerImpl) ListRunningProcedure(_ context.Context) ([]*Info, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - procedureInfos := make([]*Info, 0, len(m.runningProcedures)) - for _, procedure := range m.runningProcedures { - if procedure.State() == StateRunning { - procedureInfos = append(procedureInfos, &Info{ - ID: procedure.ID(), - Kind: procedure.Kind(), - State: procedure.State(), - }) - } - } - return procedureInfos, nil -} - -func NewManagerImpl(logger *zap.Logger, metadata *metadata.ClusterMetadata) (Manager, error) { - entryLock := lock.NewEntryLock(10) - manager := &ManagerImpl{ - logger: logger, - metadata: metadata, - procedureShardLock: &entryLock, - waitingProcedures: NewProcedureDelayQueue(defaultWaitingQueueLen), - procedureWorkerChan: make(chan struct{}), - lock: sync.RWMutex{}, - running: false, - runningProcedures: map[storage.ShardID]Procedure{}, - } - return manager, nil -} - -func (m *ManagerImpl) startProcedurePromote(ctx context.Context, procedureWorkerChan chan struct{}) { - ticker := time.NewTicker(defaultPromoteDelay) - defer ticker.Stop() - for { - select { - case <-ticker.C: - m.startProcedurePromoteInternal(ctx, procedureWorkerChan) - case <-procedureWorkerChan: - m.startProcedurePromoteInternal(ctx, procedureWorkerChan) - case <-ctx.Done(): - return - } - } -} - -func (m *ManagerImpl) startProcedurePromoteInternal(ctx context.Context, procedureWorkerChan chan struct{}) { - newProcedures, err := m.promoteProcedure(ctx) - if err != nil { - m.logger.Error("promote procedure failed", zap.Error(err)) - return - } - - m.lock.Lock() - for _, newProcedure := range newProcedures { - for shardID := range newProcedure.RelatedVersionInfo().ShardWithVersion { - m.runningProcedures[shardID] = newProcedure - } - } - m.lock.Unlock() - - for _, newProcedure := range newProcedures { - m.logger.Info("promote procedure", zap.Uint64("procedureID", newProcedure.ID())) - m.startProcedureWorker(ctx, newProcedure, procedureWorkerChan) - } -} - -func (m *ManagerImpl) startProcedureWorker(ctx context.Context, newProcedure Procedure, procedureWorkerChan chan struct{}) { - go func() { - start := time.Now() - m.logger.Info("procedure start", zap.Uint64("procedureID", newProcedure.ID())) - err := newProcedure.Start(ctx) - if err != nil { - m.logger.Error("procedure start failed", zap.Error(err), zap.Int64("costTime", time.Since(start).Milliseconds())) - } else { - m.logger.Info("procedure start finish", zap.Uint64("procedureID", newProcedure.ID()), zap.Int64("costTime", time.Since(start).Milliseconds())) - } - for shardID := range newProcedure.RelatedVersionInfo().ShardWithVersion { - m.lock.Lock() - delete(m.runningProcedures, shardID) - m.lock.Unlock() - - m.procedureShardLock.UnLock([]uint64{uint64(shardID)}) - } - select { - case procedureWorkerChan <- struct{}{}: - default: - } - }() -} - -// Whether a waiting procedure could be running procedure. -func checkValid(p Procedure, clusterMetadata *metadata.ClusterMetadata) bool { - // ClusterVersion and ShardVersion in this procedure must be same with current cluster topology. - snapshot := clusterMetadata.GetClusterSnapshot() - curClusterVersion := snapshot.Topology.ClusterView.Version - curShardViews := snapshot.Topology.ShardViewsMapping - - relatedVersionInfo := p.RelatedVersionInfo() - if relatedVersionInfo.ClusterVersion != curClusterVersion { - return false - } - for shardID, version := range relatedVersionInfo.ShardWithVersion { - shardView, exists := curShardViews[shardID] - if !exists { - return false - } - if shardView.Version != version { - return false - } - } - return true -} - -// Promote a waiting procedure to be a running procedure. -// One procedure may be related with multiple shards. -func (m *ManagerImpl) promoteProcedure(_ context.Context) ([]Procedure, error) { - // Get waiting procedures, it has been sorted in queue. - queue := m.waitingProcedures - - var readyProcs []Procedure - // Find next valid procedure. - for { - p := queue.Pop() - if p == nil { - return readyProcs, nil - } - - if !checkValid(p, m.metadata) { - // This procedure is invalid, just remove it. - continue - } - - // Try to get shard locks. - shardIDs := make([]uint64, 0, len(p.RelatedVersionInfo().ShardWithVersion)) - for shardID := range p.RelatedVersionInfo().ShardWithVersion { - shardIDs = append(shardIDs, uint64(shardID)) - } - lockResult := m.procedureShardLock.TryLock(shardIDs) - if lockResult { - // Get lock success, procedure will be executed. - readyProcs = append(readyProcs, p) - } else { - // Get lock failed, procedure will be put back into the queue. - if err := queue.Push(p, defaultWaitingQueueDelay); err != nil { - return nil, err - } - } - } -} diff --git a/horaemeta/server/coordinator/procedure/manager_test.go b/horaemeta/server/coordinator/procedure/manager_test.go deleted file mode 100644 index 831601f529..0000000000 --- a/horaemeta/server/coordinator/procedure/manager_test.go +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure_test - -import ( - "context" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -type MockProcedure struct { - id uint64 - state procedure.State - relatedVersionInfo procedure.RelatedVersionInfo - execTime time.Duration -} - -func (m *MockProcedure) ID() uint64 { - return m.id -} - -func (m *MockProcedure) Kind() procedure.Kind { - return procedure.CreateTable -} - -func (m *MockProcedure) Start(_ context.Context) error { - m.state = procedure.StateRunning - // Mock procedure execute time. - time.Sleep(m.execTime) - m.state = procedure.StateFinished - return nil -} - -func (m *MockProcedure) Cancel(_ context.Context) error { - return nil -} - -func (m *MockProcedure) State() procedure.State { - return m.state -} - -func (m *MockProcedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return m.relatedVersionInfo -} - -func (m *MockProcedure) Priority() procedure.Priority { - return procedure.PriorityMed -} - -func TestManager(t *testing.T) { - ctx := context.Background() - re := require.New(t) - - c := test.InitStableCluster(ctx, t) - manager, err := procedure.NewManagerImpl(zap.NewNop(), c.GetMetadata()) - re.NoError(err) - - err = manager.Start(ctx) - re.NoError(err) - - procedureID := uint64(0) - // Test submit multi single shard procedure. - snapshot := c.GetMetadata().GetClusterSnapshot() - for shardID, shardView := range snapshot.Topology.ShardViewsMapping { - err = manager.Submit(ctx, &MockProcedure{ - id: procedureID, - state: procedure.StateInit, - relatedVersionInfo: procedure.RelatedVersionInfo{ClusterID: c.GetMetadata().GetClusterID(), ShardWithVersion: map[storage.ShardID]uint64{shardID: shardView.Version}, ClusterVersion: c.GetMetadata().GetClusterViewVersion()}, - execTime: time.Millisecond * 50, - }) - procedureID++ - re.NoError(err) - } - // Procedures state is init, list result length should be 0. - infos, err := manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(0, len(infos)) - time.Sleep(time.Millisecond * 10) - // Procedures state is running, list result length should be len(shardViews). - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(len(snapshot.Topology.ShardViewsMapping), len(infos)) - time.Sleep(time.Millisecond * 100) - // Procedures state is finish, list result length should be 0. - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(0, len(infos)) - - { - // Test schedule procedure with multi shards. - // 1. Submit the procedure of all shard, and all shards are locked at this time. - // 2. Submit procedures with single shard. - // 3. Because of the earliest submitted procedure, all subsequent procedures cannot be executed at this time. - // 4. Wait for the execution of the first procedure to complete, and the remaining procedures will be promoted and executed in parallel. - // 5. All procedures are executed finish. - - // Test submit procedure with multi shards. - shardWithVersions := map[storage.ShardID]uint64{} - for id, view := range snapshot.Topology.ShardViewsMapping { - shardWithVersions[id] = view.Version - } - err = manager.Submit(ctx, &MockProcedure{ - id: procedureID, - state: procedure.StateInit, - relatedVersionInfo: procedure.RelatedVersionInfo{ClusterID: c.GetMetadata().GetClusterID(), ShardWithVersion: shardWithVersions, ClusterVersion: c.GetMetadata().GetClusterViewVersion()}, - execTime: time.Millisecond * 100, - }) - re.NoError(err) - procedureID++ - for shardID, shardView := range snapshot.Topology.ShardViewsMapping { - err = manager.Submit(ctx, &MockProcedure{ - id: procedureID, - state: procedure.StateInit, - relatedVersionInfo: procedure.RelatedVersionInfo{ClusterID: c.GetMetadata().GetClusterID(), ShardWithVersion: map[storage.ShardID]uint64{shardID: shardView.Version}, ClusterVersion: c.GetMetadata().GetClusterViewVersion()}, - execTime: time.Millisecond * 50, - }) - procedureID++ - re.NoError(err) - } - - // Procedure with multi shard is running, all shard is locked. - time.Sleep(time.Millisecond * 10) - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(len(snapshot.Topology.ShardViewsMapping), len(infos)) - - // Procedure with multi shard is still running, all shard is still locked. - time.Sleep(time.Millisecond * 50) - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(len(snapshot.Topology.ShardViewsMapping), len(infos)) - - // Procedure with multi shard is finished. - time.Sleep(time.Millisecond * 50) - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(0, len(infos)) - - // Waiting for next promote, procedures with single shard will be scheduled. - time.Sleep(time.Millisecond * 500) - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(len(snapshot.Topology.ShardViewsMapping), len(infos)) - - // procedures with single shard is finished. - time.Sleep(time.Millisecond * 50) - infos, err = manager.ListRunningProcedure(ctx) - re.NoError(err) - re.Equal(0, len(infos)) - - err = manager.Stop(ctx) - re.NoError(err) - } -} diff --git a/horaemeta/server/coordinator/procedure/operation/split/split.go b/horaemeta/server/coordinator/procedure/operation/split/split.go deleted file mode 100644 index 8efd0e158b..0000000000 --- a/horaemeta/server/coordinator/procedure/operation/split/split.go +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package split - -import ( - "context" - "encoding/json" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -// Fsm state change: begin -> CreateNewShardView -> UpdateShardTables -> OpenNewShard -> Finish -// CreateNewShardView will create new shard metadata. -// UpdateShardTables will update shard tables mapping between the old and new shard. -// OpenNewShard will send open shard request to new shard leader. -const ( - eventCreateNewShardView = "EventCreateNewShardView" - eventUpdateShardTables = "EventUpdateShardTables" - eventOpenNewShard = "EventOpenNewShard" - eventFinish = "EventFinish" - - stateBegin = "StateBegin" - stateCreateNewShardView = "StateCreateNewShardView" - stateUpdateShardTables = "StateUpdateShardTables" - stateOpenNewShard = "StateOpenNewShard" - stateFinish = "StateFinish" -) - -var ( - splitEvents = fsm.Events{ - {Name: eventCreateNewShardView, Src: []string{stateBegin}, Dst: stateCreateNewShardView}, - {Name: eventUpdateShardTables, Src: []string{stateCreateNewShardView}, Dst: stateUpdateShardTables}, - {Name: eventOpenNewShard, Src: []string{stateUpdateShardTables}, Dst: stateOpenNewShard}, - {Name: eventFinish, Src: []string{stateOpenNewShard}, Dst: stateFinish}, - } - splitCallbacks = fsm.Callbacks{ - eventCreateNewShardView: createShardViewCallback, - eventUpdateShardTables: updateShardTablesCallback, - eventOpenNewShard: openShardCallback, - eventFinish: finishCallback, - } -) - -type Procedure struct { - fsm *fsm.FSM - params ProcedureParams - relatedVersionInfo procedure.RelatedVersionInfo - - // Protect the state. - lock sync.RWMutex - state procedure.State -} - -type ProcedureParams struct { - ID uint64 - - Dispatch eventdispatch.Dispatch - Storage procedure.Storage - - ClusterMetadata *metadata.ClusterMetadata - ClusterSnapshot metadata.Snapshot - - ShardID storage.ShardID - NewShardID storage.ShardID - - SchemaName string - TableNames []string - TargetNodeName string -} - -func NewProcedure(params ProcedureParams) (procedure.Procedure, error) { - if err := validateClusterTopology(params.ClusterSnapshot.Topology, params.ShardID); err != nil { - return nil, err - } - - relatedVersionInfo, err := buildRelatedVersionInfo(params) - if err != nil { - return nil, err - } - - splitFsm := fsm.NewFSM( - stateBegin, - splitEvents, - splitCallbacks, - ) - - return &Procedure{ - fsm: splitFsm, - params: params, - relatedVersionInfo: relatedVersionInfo, - lock: sync.RWMutex{}, - state: procedure.StateInit, - }, nil -} - -func buildRelatedVersionInfo(params ProcedureParams) (procedure.RelatedVersionInfo, error) { - shardWithVersion := make(map[storage.ShardID]uint64, 0) - - shardView, exists := params.ClusterSnapshot.Topology.ShardViewsMapping[params.ShardID] - if !exists { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in topology, shardID:%d", params.ShardID) - } - shardWithVersion[params.ShardID] = shardView.Version - shardWithVersion[params.NewShardID] = 0 - - relatedVersionInfo := procedure.RelatedVersionInfo{ - ClusterID: params.ClusterSnapshot.Topology.ClusterView.ClusterID, - ShardWithVersion: shardWithVersion, - ClusterVersion: params.ClusterSnapshot.Topology.ClusterView.Version, - } - return relatedVersionInfo, nil -} - -func validateClusterTopology(topology metadata.Topology, shardID storage.ShardID) error { - // Validate cluster state. - curState := topology.ClusterView.State - if curState != storage.ClusterStateStable { - log.Error("cluster state must be stable", zap.Error(metadata.ErrClusterStateInvalid)) - return metadata.ErrClusterStateInvalid - } - - _, found := topology.ShardViewsMapping[shardID] - - if !found { - log.Error("shard not found", zap.Uint64("shardID", uint64(shardID)), zap.Error(metadata.ErrShardNotFound)) - return metadata.ErrShardNotFound - } - - found = false - for _, shardNode := range topology.ClusterView.ShardNodes { - if shardNode.ShardRole == storage.ShardRoleLeader { - found = true - } - } - if !found { - log.Error("shard leader not found", zap.Error(procedure.ErrShardLeaderNotFound)) - return procedure.ErrShardLeaderNotFound - } - return nil -} - -type callbackRequest struct { - ctx context.Context - p *Procedure -} - -func (p *Procedure) ID() uint64 { - return p.params.ID -} - -func (p *Procedure) Kind() procedure.Kind { - return procedure.Split -} - -func (p *Procedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func (p *Procedure) Priority() procedure.Priority { - return procedure.PriorityHigh -} - -func (p *Procedure) Start(ctx context.Context) error { - p.updateStateWithLock(procedure.StateRunning) - - splitCallbackRequest := callbackRequest{ - ctx: ctx, - p: p, - } - - for { - switch p.fsm.Current() { - case stateBegin: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "split procedure persist") - } - if err := p.fsm.Event(eventCreateNewShardView, splitCallbackRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "split procedure create new shard view") - } - case stateCreateNewShardView: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "split procedure persist") - } - if err := p.fsm.Event(eventUpdateShardTables, splitCallbackRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "split procedure create new shard") - } - case stateUpdateShardTables: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "split procedure persist") - } - if err := p.fsm.Event(eventOpenNewShard, splitCallbackRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "split procedure create shard tables") - } - case stateOpenNewShard: - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "split procedure persist") - } - if err := p.fsm.Event(eventFinish, splitCallbackRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "split procedure delete shard tables") - } - case stateFinish: - // TODO: The state update sequence here is inconsistent with the previous one. Consider reconstructing the state update logic of the state machine. - p.updateStateWithLock(procedure.StateFinished) - if err := p.persist(ctx); err != nil { - return errors.WithMessage(err, "split procedure persist") - } - return nil - } - } -} - -func (p *Procedure) Cancel(_ context.Context) error { - p.updateStateWithLock(procedure.StateCancelled) - return nil -} - -func (p *Procedure) State() procedure.State { - p.lock.RLock() - defer p.lock.RUnlock() - return p.state -} - -func (p *Procedure) updateStateWithLock(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} - -func createShardViewCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - - if err := req.p.params.ClusterMetadata.CreateShardViews(req.ctx, []metadata.CreateShardView{{ - ShardID: req.p.params.NewShardID, - Tables: []storage.TableID{}, - }}); err != nil { - procedure.CancelEventWithLog(event, err, "create shard views") - return - } -} - -func updateShardTablesCallback(event *fsm.Event) { - request, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - - if err := request.p.params.ClusterMetadata.MigrateTable(request.ctx, metadata.MigrateTableRequest{ - SchemaName: request.p.params.SchemaName, - TableNames: request.p.params.TableNames, - OldShardID: request.p.params.ShardID, - NewShardID: request.p.params.NewShardID, - }); err != nil { - procedure.CancelEventWithLog(event, err, "update shard tables") - return - } -} - -func openShardCallback(event *fsm.Event) { - request, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - ctx := request.ctx - - // Send open new shard request to CSE. - if err := request.p.params.Dispatch.OpenShard(ctx, request.p.params.TargetNodeName, eventdispatch.OpenShardRequest{ - Shard: metadata.ShardInfo{ - ID: request.p.params.NewShardID, - Role: storage.ShardRoleLeader, - Version: 0, - Status: storage.ShardStatusUnknown, - }, - }); err != nil { - procedure.CancelEventWithLog(event, err, "open shard failed") - return - } -} - -func finishCallback(event *fsm.Event) { - request, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - log.Info("split procedure finish", zap.Uint32("shardID", uint32(request.p.params.ShardID)), zap.Uint32("newShardID", uint32(request.p.params.NewShardID))) -} - -func (p *Procedure) persist(ctx context.Context) error { - meta, err := p.convertToMeta() - if err != nil { - return errors.WithMessage(err, "convert to meta") - } - err = p.params.Storage.CreateOrUpdate(ctx, meta) - if err != nil { - return errors.WithMessage(err, "createOrUpdate procedure storage") - } - return nil -} - -type rawData struct { - SchemaName string - TableNames []string - ShardID uint32 - NewShardID uint32 - TargetNodeName string -} - -func (p *Procedure) convertToMeta() (procedure.Meta, error) { - p.lock.RLock() - defer p.lock.RUnlock() - - rawData := rawData{ - SchemaName: p.params.SchemaName, - TableNames: p.params.TableNames, - ShardID: uint32(p.params.ShardID), - NewShardID: uint32(p.params.NewShardID), - TargetNodeName: p.params.TargetNodeName, - } - rawDataBytes, err := json.Marshal(rawData) - if err != nil { - var emptyMeta procedure.Meta - return emptyMeta, procedure.ErrEncodeRawData.WithCausef("marshal raw data, procedureID:%v, err:%v", p.params.ShardID, err) - } - - meta := procedure.Meta{ - ID: p.params.ID, - Kind: procedure.Split, - State: p.state, - - RawData: rawDataBytes, - } - - return meta, nil -} diff --git a/horaemeta/server/coordinator/procedure/operation/split/split_test.go b/horaemeta/server/coordinator/procedure/operation/split/split_test.go deleted file mode 100644 index f16c7059ab..0000000000 --- a/horaemeta/server/coordinator/procedure/operation/split/split_test.go +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package split_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/operation/split" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -func TestSplit(t *testing.T) { - re := require.New(t) - ctx := context.Background() - dispatch := test.MockDispatch{} - c := test.InitStableCluster(ctx, t) - s := test.NewTestStorage(t) - - snapshot := c.GetMetadata().GetClusterSnapshot() - - // Randomly select a shardNode to split. - createTableNodeShard := snapshot.Topology.ClusterView.ShardNodes[0] - - // Create some tables in this shard. - _, err := c.GetMetadata().CreateTable(ctx, metadata.CreateTableRequest{ - ShardID: createTableNodeShard.ID, - LatestVersion: 0, - SchemaName: test.TestSchemaName, - TableName: test.TestTableName0, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }) - re.NoError(err) - _, err = c.GetMetadata().CreateTable(ctx, metadata.CreateTableRequest{ - ShardID: createTableNodeShard.ID, - LatestVersion: 0, - SchemaName: test.TestSchemaName, - TableName: test.TestTableName1, - PartitionInfo: storage.PartitionInfo{Info: nil}, - }) - re.NoError(err) - - // Split one table from this shard. - targetShardNode := c.GetMetadata().GetClusterSnapshot().Topology.ClusterView.ShardNodes[0] - newShardID, err := c.GetMetadata().AllocShardID(ctx) - re.NoError(err) - p, err := split.NewProcedure(split.ProcedureParams{ - ID: 0, - Dispatch: dispatch, - Storage: s, - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: c.GetMetadata().GetClusterSnapshot(), - ShardID: createTableNodeShard.ID, - NewShardID: storage.ShardID(newShardID), - SchemaName: test.TestSchemaName, - TableNames: []string{test.TestTableName0}, - TargetNodeName: createTableNodeShard.NodeName, - }) - re.NoError(err) - err = p.Start(ctx) - re.NoError(err) - - // Validate split result: - // 1. Shards on node, split shard and new shard must be all exists. - // 2. Tables mapping of split shard and new shard must be all exists. - // 3. Tables in table mapping must be correct, the split table only exists on the new shard. - snapshot = c.GetMetadata().GetClusterSnapshot() - - splitShard, exists := snapshot.Topology.ShardViewsMapping[targetShardNode.ID] - re.True(exists) - newShard, exists := snapshot.Topology.ShardViewsMapping[storage.ShardID(newShardID)] - re.True(exists) - re.NotNil(splitShard) - re.NotNil(newShard) - - splitShardTables := splitShard.TableIDs - newShardTables := newShard.TableIDs - re.NotNil(splitShardTables) - re.NotNil(newShardTables) -} diff --git a/horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader.go b/horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader.go deleted file mode 100644 index dabd4bbe38..0000000000 --- a/horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader.go +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package transferleader - -import ( - "context" - "fmt" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - "go.uber.org/zap" - "golang.org/x/sync/errgroup" -) - -// BatchTransferLeaderProcedure is a proxy procedure contains a batch of TransferLeaderProcedure. -// It is used to support concurrent execution of a batch of TransferLeaderProcedure with same version. -type BatchTransferLeaderProcedure struct { - id uint64 - batch []procedure.Procedure - relatedVersionInfo procedure.RelatedVersionInfo - - // Protect the state. - lock sync.RWMutex - state procedure.State -} - -func NewBatchTransferLeaderProcedure(id uint64, batch []procedure.Procedure) (procedure.Procedure, error) { - if len(batch) == 0 { - return nil, procedure.ErrEmptyBatchProcedure - } - - relateVersionInfo, err := buildBatchRelatedVersionInfo(batch) - if err != nil { - return nil, err - } - - return &BatchTransferLeaderProcedure{ - id: id, - batch: batch, - relatedVersionInfo: relateVersionInfo, - lock: sync.RWMutex{}, - state: procedure.StateInit, - }, nil -} - -func buildBatchRelatedVersionInfo(batch []procedure.Procedure) (procedure.RelatedVersionInfo, error) { - var emptyInfo procedure.RelatedVersionInfo - if len(batch) == 0 { - return emptyInfo, nil - } - - result := procedure.RelatedVersionInfo{ - ClusterID: batch[0].RelatedVersionInfo().ClusterID, - ShardWithVersion: map[storage.ShardID]uint64{}, - ClusterVersion: batch[0].RelatedVersionInfo().ClusterVersion, - } - - // The version of this batch of procedures must be the same. - for _, p := range batch { - if p.RelatedVersionInfo().ClusterID != result.ClusterID { - return emptyInfo, errors.WithMessage(procedure.ErrMergeBatchProcedure, "procedure clusterID in the same batch is inconsistent") - } - if p.RelatedVersionInfo().ClusterVersion != result.ClusterVersion { - return emptyInfo, errors.WithMessage(procedure.ErrMergeBatchProcedure, "procedure clusterVersion in the same batch is inconsistent") - } - // The ShardVersion of the same shard must be consistent. - for shardID, version := range p.RelatedVersionInfo().ShardWithVersion { - if resultVersion, exists := result.ShardWithVersion[shardID]; exists { - if version != resultVersion { - return emptyInfo, errors.WithMessage(procedure.ErrMergeBatchProcedure, fmt.Sprintf("procedure shardVersion in the same batch is inconsistent, shardID:%d, expectedShardVersion:%d, shardVersion:%d", shardID, version, resultVersion)) - } - } else { - result.ShardWithVersion[shardID] = version - } - } - } - - return result, nil -} - -func (p *BatchTransferLeaderProcedure) ID() uint64 { - return p.id -} - -func (p *BatchTransferLeaderProcedure) Kind() procedure.Kind { - return procedure.TransferLeader -} - -func (p *BatchTransferLeaderProcedure) Start(ctx context.Context) error { - // Start procedures with multiple goroutine. - g, _ := errgroup.WithContext(ctx) - for _, p := range p.batch { - p := p - g.Go(func() error { - err := p.Start(ctx) - if err != nil { - log.Error("procedure start failed", zap.Error(err), zap.Uint64("procedureID", p.ID()), zap.Error(err)) - } - return err - }) - } - - if err := g.Wait(); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return err - } - - p.updateStateWithLock(procedure.StateFinished) - return nil -} - -func (p *BatchTransferLeaderProcedure) Cancel(_ context.Context) error { - p.updateStateWithLock(procedure.StateCancelled) - return nil -} - -func (p *BatchTransferLeaderProcedure) State() procedure.State { - return p.state -} - -func (p *BatchTransferLeaderProcedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func (p *BatchTransferLeaderProcedure) Priority() procedure.Priority { - return p.batch[0].Priority() -} - -func (p *BatchTransferLeaderProcedure) updateStateWithLock(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} diff --git a/horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader_test.go b/horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader_test.go deleted file mode 100644 index 1226a7eace..0000000000 --- a/horaemeta/server/coordinator/procedure/operation/transferleader/batch_transfer_leader_test.go +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package transferleader_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/operation/transferleader" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -type mockProcedure struct { - ClusterID storage.ClusterID - clusterVersion uint64 - kind procedure.Kind - ShardWithVersion map[storage.ShardID]uint64 -} - -func (m mockProcedure) ID() uint64 { - return 0 -} - -func (m mockProcedure) Kind() procedure.Kind { - return m.kind -} - -func (m mockProcedure) Start(_ context.Context) error { - return nil -} - -func (m mockProcedure) Cancel(_ context.Context) error { - return nil -} - -func (m mockProcedure) State() procedure.State { - return procedure.StateInit -} - -func (m mockProcedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return procedure.RelatedVersionInfo{ - ClusterID: m.ClusterID, - ShardWithVersion: m.ShardWithVersion, - ClusterVersion: m.clusterVersion, - } -} - -func (m mockProcedure) Priority() procedure.Priority { - return procedure.PriorityLow -} - -func TestBatchProcedure(t *testing.T) { - re := require.New(t) - var procedures []procedure.Procedure - - // Procedures with same type and version. - for i := 0; i < 3; i++ { - shardWithVersion := map[storage.ShardID]uint64{} - shardWithVersion[storage.ShardID(i)] = 0 - p := CreateMockProcedure(storage.ClusterID(0), 0, 0, shardWithVersion) - procedures = append(procedures, p) - } - _, err := transferleader.NewBatchTransferLeaderProcedure(0, procedures) - re.NoError(err) - - // Procedure with different clusterID. - for i := 0; i < 3; i++ { - shardWithVersion := map[storage.ShardID]uint64{} - shardWithVersion[storage.ShardID(i)] = 0 - p := CreateMockProcedure(storage.ClusterID(i), 0, procedure.TransferLeader, shardWithVersion) - procedures = append(procedures, p) - } - _, err = transferleader.NewBatchTransferLeaderProcedure(0, procedures) - re.Error(err) - - // Procedures with different type. - for i := 0; i < 3; i++ { - shardWithVersion := map[storage.ShardID]uint64{} - shardWithVersion[storage.ShardID(i)] = 0 - p := CreateMockProcedure(0, 0, procedure.Kind(i), shardWithVersion) - procedures = append(procedures, p) - } - _, err = transferleader.NewBatchTransferLeaderProcedure(0, procedures) - re.Error(err) - - // Procedures with different version. - for i := 0; i < 3; i++ { - shardWithVersion := map[storage.ShardID]uint64{} - shardWithVersion[storage.ShardID(0)] = uint64(i) - p := CreateMockProcedure(0, 0, procedure.Kind(i), shardWithVersion) - procedures = append(procedures, p) - } - _, err = transferleader.NewBatchTransferLeaderProcedure(0, procedures) - re.Error(err) -} - -func CreateMockProcedure(clusterID storage.ClusterID, clusterVersion uint64, typ procedure.Kind, shardWithVersion map[storage.ShardID]uint64) procedure.Procedure { - return mockProcedure{ - ClusterID: clusterID, - clusterVersion: clusterVersion, - kind: typ, - ShardWithVersion: shardWithVersion, - } -} diff --git a/horaemeta/server/coordinator/procedure/operation/transferleader/transfer_leader.go b/horaemeta/server/coordinator/procedure/operation/transferleader/transfer_leader.go deleted file mode 100644 index 1e2bdf2814..0000000000 --- a/horaemeta/server/coordinator/procedure/operation/transferleader/transfer_leader.go +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package transferleader - -import ( - "context" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -// Fsm state change: Begin -> CloseOldLeader -> OpenNewLeader -> Finish. -// CloseOldLeader will send close shard request if the old leader node exists. -// OpenNewLeader will send open shard request to new leader node. -const ( - eventCloseOldLeader = "EventCloseOldLeader" - eventOpenNewLeader = "EventOpenNewLeader" - eventFinish = "EventFinish" - - stateBegin = "StateBegin" - stateCloseOldLeader = "StateCloseOldLeader" - stateOpenNewLeader = "StateOpenNewLeader" - stateFinish = "StateFinish" -) - -var ( - transferLeaderEvents = fsm.Events{ - {Name: eventCloseOldLeader, Src: []string{stateBegin}, Dst: stateCloseOldLeader}, - {Name: eventOpenNewLeader, Src: []string{stateCloseOldLeader}, Dst: stateOpenNewLeader}, - {Name: eventFinish, Src: []string{stateOpenNewLeader}, Dst: stateFinish}, - } - transferLeaderCallbacks = fsm.Callbacks{ - eventCloseOldLeader: closeOldLeaderCallback, - eventOpenNewLeader: openNewShardCallback, - eventFinish: finishCallback, - } -) - -// Procedure will not persist. -// TODO: After supporting the expiration cleanup mechanism of Procedure, we can consider persisting it to facilitate tracing historical information. -type Procedure struct { - fsm *fsm.FSM - params ProcedureParams - relatedVersionInfo procedure.RelatedVersionInfo - - // Protect the state. - // FIXME: the procedure should be executed sequentially, so any need to use a lock to protect it? - lock sync.RWMutex - state procedure.State -} - -// callbackRequest is fsm callbacks param. -type callbackRequest struct { - ctx context.Context - p *Procedure -} - -type ProcedureParams struct { - ID uint64 - - Dispatch eventdispatch.Dispatch - Storage procedure.Storage - - ClusterSnapshot metadata.Snapshot - - ShardID storage.ShardID - OldLeaderNodeName string - NewLeaderNodeName string -} - -func NewProcedure(params ProcedureParams) (procedure.Procedure, error) { - if err := validateClusterTopology(params.ClusterSnapshot.Topology, params.ShardID, params.OldLeaderNodeName); err != nil { - return nil, err - } - - relatedVersionInfo, err := buildRelatedVersionInfo(params) - if err != nil { - return nil, err - } - - transferLeaderOperationFsm := fsm.NewFSM( - stateBegin, - transferLeaderEvents, - transferLeaderCallbacks, - ) - - return &Procedure{ - fsm: transferLeaderOperationFsm, - params: params, - relatedVersionInfo: relatedVersionInfo, - lock: sync.RWMutex{}, - state: procedure.StateInit, - }, nil -} - -func buildRelatedVersionInfo(params ProcedureParams) (procedure.RelatedVersionInfo, error) { - shardViewWithVersion := make(map[storage.ShardID]uint64, 0) - shardView, exists := params.ClusterSnapshot.Topology.ShardViewsMapping[params.ShardID] - if !exists { - return procedure.RelatedVersionInfo{}, errors.WithMessagef(metadata.ErrShardNotFound, "shard not found in topology, shardID:%d", params.ShardID) - } - shardViewWithVersion[params.ShardID] = shardView.Version - - relatedVersionInfo := procedure.RelatedVersionInfo{ - ClusterID: params.ClusterSnapshot.Topology.ClusterView.ClusterID, - ShardWithVersion: shardViewWithVersion, - ClusterVersion: params.ClusterSnapshot.Topology.ClusterView.Version, - } - return relatedVersionInfo, nil -} - -func validateClusterTopology(topology metadata.Topology, shardID storage.ShardID, oldLeaderNodeName string) error { - _, found := topology.ShardViewsMapping[shardID] - if !found { - log.Error("shard not found", zap.Uint64("shardID", uint64(shardID))) - return metadata.ErrShardNotFound - } - if len(oldLeaderNodeName) == 0 { - return nil - } - shardNodes := topology.ClusterView.ShardNodes - if len(shardNodes) == 0 { - log.Error("shard not exist in any node", zap.Uint32("shardID", uint32(shardID))) - return metadata.ErrShardNotFound - } - for _, shardNode := range shardNodes { - if shardNode.ID == shardID { - leaderNodeName := shardNode.NodeName - if leaderNodeName != oldLeaderNodeName { - log.Error("shard leader node not match", zap.String("requestOldLeaderNodeName", oldLeaderNodeName), zap.String("actualOldLeaderNodeName", leaderNodeName)) - return metadata.ErrNodeNotFound - } - } - } - return nil -} - -func (p *Procedure) ID() uint64 { - return p.params.ID -} - -func (p *Procedure) Kind() procedure.Kind { - return procedure.TransferLeader -} - -func (p *Procedure) RelatedVersionInfo() procedure.RelatedVersionInfo { - return p.relatedVersionInfo -} - -func (p *Procedure) Priority() procedure.Priority { - return procedure.PriorityHigh -} - -func (p *Procedure) Start(ctx context.Context) error { - p.updateStateWithLock(procedure.StateRunning) - - transferLeaderRequest := callbackRequest{ - ctx: ctx, - p: p, - } - - for { - switch p.fsm.Current() { - case stateBegin: - if err := p.fsm.Event(eventCloseOldLeader, transferLeaderRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "transferLeader procedure close old leader") - } - case stateCloseOldLeader: - if err := p.fsm.Event(eventOpenNewLeader, transferLeaderRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "transferLeader procedure open new leader") - } - case stateOpenNewLeader: - if err := p.fsm.Event(eventFinish, transferLeaderRequest); err != nil { - p.updateStateWithLock(procedure.StateFailed) - return errors.WithMessage(err, "transferLeader procedure finish") - } - case stateFinish: - // TODO: The state update sequence here is inconsistent with the previous one. Consider reconstructing the state update logic of the state machine. - p.updateStateWithLock(procedure.StateFinished) - return nil - } - } -} - -func (p *Procedure) Cancel(_ context.Context) error { - p.updateStateWithLock(procedure.StateCancelled) - return nil -} - -func (p *Procedure) State() procedure.State { - p.lock.RLock() - defer p.lock.RUnlock() - return p.state -} - -func closeOldLeaderCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - ctx := req.ctx - - if len(req.p.params.OldLeaderNodeName) == 0 { - return - } - - log.Info("try to close shard", zap.Uint64("procedureID", req.p.ID()), zap.Uint64("shardID", uint64(req.p.params.ShardID)), zap.String("oldLeader", req.p.params.OldLeaderNodeName)) - - closeShardRequest := eventdispatch.CloseShardRequest{ - ShardID: uint32(req.p.params.ShardID), - } - if err := req.p.params.Dispatch.CloseShard(ctx, req.p.params.OldLeaderNodeName, closeShardRequest); err != nil { - procedure.CancelEventWithLog(event, err, "close shard", zap.Uint32("shardID", uint32(req.p.params.ShardID)), zap.String("oldLeaderName", req.p.params.OldLeaderNodeName)) - return - } - - log.Info("close shard finish", zap.Uint64("procedureID", req.p.ID()), zap.Uint64("shardID", req.p.params.ID), zap.String("oldLeader", req.p.params.OldLeaderNodeName)) -} - -func openNewShardCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - ctx := req.ctx - - shardView, exists := req.p.params.ClusterSnapshot.Topology.ShardViewsMapping[req.p.params.ShardID] - if !exists { - procedure.CancelEventWithLog(event, metadata.ErrShardNotFound, "shard not found in topology", zap.Uint64("shardID", uint64(req.p.params.ShardID))) - return - } - - openShardRequest := eventdispatch.OpenShardRequest{ - Shard: metadata.ShardInfo{ - ID: req.p.params.ShardID, - Role: storage.ShardRoleLeader, - Version: shardView.Version, - Status: storage.ShardStatusUnknown, - }, - } - - log.Info("try to open shard", zap.Uint64("procedureID", req.p.ID()), zap.Uint64("shardID", uint64(req.p.params.ShardID)), zap.String("newLeader", req.p.params.NewLeaderNodeName)) - - if err := req.p.params.Dispatch.OpenShard(ctx, req.p.params.NewLeaderNodeName, openShardRequest); err != nil { - procedure.CancelEventWithLog(event, err, "open shard", zap.Uint32("shardID", uint32(req.p.params.ShardID)), zap.String("newLeaderNode", req.p.params.NewLeaderNodeName)) - return - } - - log.Info("open shard finish", zap.Uint64("procedureID", req.p.ID()), zap.Uint64("shardID", uint64(req.p.params.ShardID)), zap.String("newLeader", req.p.params.NewLeaderNodeName)) -} - -func finishCallback(event *fsm.Event) { - req, err := procedure.GetRequestFromEvent[callbackRequest](event) - if err != nil { - procedure.CancelEventWithLog(event, err, "get request from event") - return - } - - log.Info("transfer leader finish", zap.Uint64("procedureID", req.p.ID()), zap.Uint32("shardID", uint32(req.p.params.ShardID)), zap.String("oldLeaderNode", req.p.params.OldLeaderNodeName), zap.String("newLeaderNode", req.p.params.NewLeaderNodeName)) -} - -func (p *Procedure) updateStateWithLock(state procedure.State) { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = state -} diff --git a/horaemeta/server/coordinator/procedure/operation/transferleader/trasnfer_leader_test.go b/horaemeta/server/coordinator/procedure/operation/transferleader/trasnfer_leader_test.go deleted file mode 100644 index 25951a3cd6..0000000000 --- a/horaemeta/server/coordinator/procedure/operation/transferleader/trasnfer_leader_test.go +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package transferleader_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/operation/transferleader" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -func TestTransferLeader(t *testing.T) { - re := require.New(t) - ctx := context.Background() - dispatch := test.MockDispatch{} - c := test.InitEmptyCluster(ctx, t) - s := test.NewTestStorage(t) - - snapshot := c.GetMetadata().GetClusterSnapshot() - - var targetShardID storage.ShardID - for shardID := range snapshot.Topology.ShardViewsMapping { - targetShardID = shardID - break - } - newLeaderNodeName := snapshot.RegisteredNodes[0].Node.Name - - p, err := transferleader.NewProcedure(transferleader.ProcedureParams{ - ID: 0, - Dispatch: dispatch, - Storage: s, - ClusterSnapshot: snapshot, - ShardID: targetShardID, - OldLeaderNodeName: "", - NewLeaderNodeName: newLeaderNodeName, - }) - re.NoError(err) - - err = p.Start(ctx) - re.NoError(err) -} diff --git a/horaemeta/server/coordinator/procedure/procedure.go b/horaemeta/server/coordinator/procedure/procedure.go deleted file mode 100644 index 79f09119c5..0000000000 --- a/horaemeta/server/coordinator/procedure/procedure.go +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/server/storage" -) - -type State string - -const ( - StateInit = "init" - StateRunning = "running" - StateFinished = "finished" - StateFailed = "failed" - StateCancelled = "cancelled" -) - -type Kind uint - -const ( - // Cluster Operation - Create Kind = iota - Delete - TransferLeader - Migrate - Split - Merge - Scatter - - // DDL - CreateTable - DropTable - CreatePartitionTable - DropPartitionTable -) - -type Priority uint32 - -// Lower value means higher priority. -const ( - PriorityHigh Priority = 3 - PriorityMed Priority = 5 - PriorityLow Priority = 10 -) - -// Procedure is used to describe how to execute a set of operations from the scheduler, e.g. SwitchLeaderProcedure, MergeShardProcedure. -type Procedure interface { - // ID of the procedure. - ID() uint64 - - // Kind of the procedure. - Kind() Kind - - // Start the procedure. - Start(ctx context.Context) error - - // Cancel the procedure. - Cancel(ctx context.Context) error - - // State of the procedure. Retrieve the state of this procedure. - State() State - - // RelatedVersionInfo return the related shard and version information corresponding to this procedure for verifying whether the procedure can be executed. - RelatedVersionInfo() RelatedVersionInfo - - // Priority present the priority of this procedure, the procedure with high level priority will be executed first. - Priority() Priority -} - -// Info is used to provide immutable description procedure information. -type Info struct { - ID uint64 - Kind Kind - State State -} - -type RelatedVersionInfo struct { - ClusterID storage.ClusterID - // shardWithVersion return the shardID associated with this procedure. - ShardWithVersion map[storage.ShardID]uint64 - // clusterVersion return the cluster version when the procedure is created. - // When performing cluster operation, it is necessary to ensure cluster version consistency. - ClusterVersion uint64 -} diff --git a/horaemeta/server/coordinator/procedure/storage.go b/horaemeta/server/coordinator/procedure/storage.go deleted file mode 100644 index 47133a70d0..0000000000 --- a/horaemeta/server/coordinator/procedure/storage.go +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" -) - -type Write interface { - CreateOrUpdate(ctx context.Context, meta Meta) error - CreateOrUpdateWithTTL(ctx context.Context, meta Meta, ttlSec int64) error -} - -type Meta struct { - ID uint64 - Kind Kind - State State - RawData []byte -} - -type Storage interface { - Write - List(ctx context.Context, procedureType Kind, batchSize int) ([]*Meta, error) - Delete(ctx context.Context, procedureType Kind, id uint64) error - MarkDeleted(ctx context.Context, procedureType Kind, id uint64) error -} diff --git a/horaemeta/server/coordinator/procedure/storage_impl.go b/horaemeta/server/coordinator/procedure/storage_impl.go deleted file mode 100644 index 3d60cd0585..0000000000 --- a/horaemeta/server/coordinator/procedure/storage_impl.go +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" - "encoding/json" - "fmt" - "math" - "path" - "strconv" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/client/v3/clientv3util" - "go.uber.org/zap" -) - -const ( - Version = "v1" - PathProcedure = "procedure" - PathDeletedProcedure = "deletedProcedure" -) - -type EtcdStorageImpl struct { - client *clientv3.Client - clusterID uint32 - rootPath string -} - -func NewEtcdStorageImpl(client *clientv3.Client, rootPath string, clusterID uint32) Storage { - return &EtcdStorageImpl{ - client: client, - clusterID: clusterID, - rootPath: rootPath, - } -} - -// CreateOrUpdate example: -// /{rootPath}/v1/procedure/{procedureType}/{procedureID} -> {procedureState} + {data} -// ttl is only valid when greater than 0, if it is less than or equal to 0, it will be ignored. -func (e EtcdStorageImpl) CreateOrUpdate(ctx context.Context, meta Meta) error { - s, err := encode(&meta) - if err != nil { - return errors.WithMessage(err, "encode meta failed") - } - - keyPath := e.generaNormalKeyPath(meta.Kind, meta.ID) - - opPut := clientv3.OpPut(keyPath, s) - - if _, err = e.client.Do(ctx, opPut); err != nil { - return errors.WithMessage(err, "etcd put data failed") - } - - return nil -} - -// CreateOrUpdateWithTTL -// ttl is only valid when greater than 0, if it is less than or equal to 0, it will be ignored. -func (e EtcdStorageImpl) CreateOrUpdateWithTTL(ctx context.Context, meta Meta, ttlSec int64) error { - s, err := encode(&meta) - if err != nil { - return errors.WithMessage(err, "encode meta failed") - } - - keyPath := e.generaNormalKeyPath(meta.Kind, meta.ID) - - // TODO: This implementation will cause each procedure to correspond to an etcd lease, which may cause too much pressure on etcd? May need to optimize implementation. - resp, err := e.client.Grant(ctx, ttlSec) - if err != nil { - return errors.WithMessage(err, "etcd get lease failed") - } - opPut := clientv3.OpPut(keyPath, s, clientv3.WithLease(resp.ID)) - - if _, err = e.client.Do(ctx, opPut); err != nil { - return errors.WithMessage(err, "etcd put data failed") - } - - return nil -} - -// Delete will delete the specified procedure, and try to delete its corresponding history procedure if it exists. -func (e EtcdStorageImpl) Delete(ctx context.Context, procedureType Kind, id uint64) error { - keyPath := e.generaNormalKeyPath(procedureType, id) - opDelete := clientv3.OpDelete(keyPath) - - if _, err := e.client.Txn(ctx).Then(opDelete).Commit(); err != nil { - return err - } - - deletedKeyPath := e.generaDeletedKeyPath(procedureType, id) - opDeleteMark := clientv3.OpDelete(deletedKeyPath) - // Try to delete history procedure if it exists. - keyExists := clientv3util.KeyExists(deletedKeyPath) - if _, err := e.client.Txn(ctx).If(keyExists).Then(opDeleteMark).Commit(); err != nil { - log.Warn("drop history procedure failed", zap.String("deletedKeyPath", deletedKeyPath), zap.Error(err)) - } - - return nil -} - -// MarkDeleted Do a soft deletion, and the deleted key's format is: -// /{rootPath}/v1/historyProcedure/{clusterID}/{procedureID} -func (e EtcdStorageImpl) MarkDeleted(ctx context.Context, procedureType Kind, id uint64) error { - keyPath := e.generaNormalKeyPath(procedureType, id) - meta, err := etcdutil.Get(ctx, e.client, keyPath) - if err != nil { - return errors.WithMessage(err, "get meta failed") - } - - deletedKeyPath := e.generaDeletedKeyPath(procedureType, id) - opDelete := clientv3.OpDelete(keyPath) - opPut := clientv3.OpPut(deletedKeyPath, meta) - - _, err = e.client.Txn(ctx).Then(opDelete, opPut).Commit() - - return err -} - -func (e EtcdStorageImpl) List(ctx context.Context, procedureType Kind, batchSize int) ([]*Meta, error) { - var metas []*Meta - do := func(key string, value []byte) error { - meta, err := decodeMeta(string(value)) - if err != nil { - return errors.WithMessagef(err, "decode meta failed, key:%s, value:%v", key, value) - } - - metas = append(metas, meta) - return nil - } - - startKey := e.generaNormalKeyPath(procedureType, uint64(0)) - endKey := e.generaNormalKeyPath(procedureType, math.MaxUint64) - - err := etcdutil.Scan(ctx, e.client, startKey, endKey, batchSize, do) - if err != nil { - return nil, errors.WithMessage(err, "scan procedure failed") - } - return metas, nil -} - -func (e EtcdStorageImpl) generaNormalKeyPath(procedureType Kind, procedureID uint64) string { - return e.generateKeyPath(procedureID, procedureType, false) -} - -func (e EtcdStorageImpl) generaDeletedKeyPath(procedureType Kind, procedureID uint64) string { - return e.generateKeyPath(procedureID, procedureType, true) -} - -func (e EtcdStorageImpl) generateKeyPath(procedureID uint64, procedureType Kind, isDeleted bool) string { - var procedurePath string - if isDeleted { - procedurePath = PathDeletedProcedure - } else { - procedurePath = PathProcedure - } - return path.Join(e.rootPath, Version, procedurePath, fmtID(uint64(e.clusterID)), strconv.Itoa(int(procedureType)), fmtID(procedureID)) -} - -func fmtID(id uint64) string { - return fmt.Sprintf("%020d", id) -} - -// TODO: Use proto.Marshal replace json.Marshal -func encode(meta *Meta) (string, error) { - bytes, err := json.Marshal(meta) - if err != nil { - return "", err - } - return string(bytes), nil -} - -// TODO: Use proto.Unmarshal replace json.unmarshal -func decodeMeta(meta string) (*Meta, error) { - var m Meta - err := json.Unmarshal([]byte(meta), &m) - return &m, err -} diff --git a/horaemeta/server/coordinator/procedure/storage_test.go b/horaemeta/server/coordinator/procedure/storage_test.go deleted file mode 100644 index 5ee496d69e..0000000000 --- a/horaemeta/server/coordinator/procedure/storage_test.go +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "context" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/stretchr/testify/require" -) - -const ( - TestClusterID = 1 - DefaultTimeout = time.Second * 10 - DefaultScanBatchSie = 100 - TestRootPath = "/rootPath" -) - -func testWrite(t *testing.T, storage Storage) { - re := require.New(t) - ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) - defer cancel() - - testMeta1 := Meta{ - ID: uint64(1), - Kind: TransferLeader, - State: StateInit, - RawData: []byte("test"), - } - - // Test create new procedure - err := storage.CreateOrUpdate(ctx, testMeta1) - re.NoError(err) - - testMeta2 := Meta{ - ID: uint64(2), - Kind: TransferLeader, - State: StateInit, - RawData: []byte("test"), - } - err = storage.CreateOrUpdate(ctx, testMeta2) - re.NoError(err) - - // Test update procedure - testMeta2.RawData = []byte("test update") - err = storage.CreateOrUpdate(ctx, testMeta2) - re.NoError(err) -} - -func testScan(t *testing.T, storage Storage) { - re := require.New(t) - ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) - defer cancel() - - metas, err := storage.List(ctx, TransferLeader, DefaultScanBatchSie) - re.NoError(err) - re.Equal(2, len(metas)) - re.Equal("test", string(metas[0].RawData)) - re.Equal("test update", string(metas[1].RawData)) -} - -func testDelete(t *testing.T, storage Storage) { - re := require.New(t) - ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) - defer cancel() - - testMeta1 := &Meta{ - ID: uint64(1), - Kind: TransferLeader, - State: StateInit, - RawData: []byte("test"), - } - err := storage.MarkDeleted(ctx, TransferLeader, testMeta1.ID) - re.NoError(err) - - metas, err := storage.List(ctx, TransferLeader, DefaultScanBatchSie) - re.NoError(err) - re.Equal(1, len(metas)) - - testMeta2 := Meta{ - ID: uint64(2), - Kind: TransferLeader, - State: StateInit, - RawData: []byte("test"), - } - err = storage.Delete(ctx, TransferLeader, testMeta2.ID) - re.NoError(err) - - metas, err = storage.List(ctx, TransferLeader, DefaultScanBatchSie) - re.NoError(err) - re.Equal(0, len(metas)) -} - -func NewTestStorage(t *testing.T) Storage { - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - storage := NewEtcdStorageImpl(client, TestRootPath, TestClusterID) - return storage -} - -func TestStorage(t *testing.T) { - storage := NewTestStorage(t) - testWrite(t, storage) - testScan(t, storage) - testDelete(t, storage) -} diff --git a/horaemeta/server/coordinator/procedure/test/common.go b/horaemeta/server/coordinator/procedure/test/common.go deleted file mode 100644 index 7550601226..0000000000 --- a/horaemeta/server/coordinator/procedure/test/common.go +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package test - -import ( - "context" - "crypto/rand" - "fmt" - "math" - "math/big" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/eventdispatch" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -const ( - TestTableName0 = "table0" - TestTableName1 = "table1" - TestSchemaName = "TestSchemaName" - TestRootPath = "/rootPath" - DefaultIDAllocatorStep = 20 - ClusterName = "testCluster1" - DefaultNodeCount = 2 - DefaultShardTotal = 4 - DefaultSchedulerOperator = true - DefaultTopologyType = "static" - DefaultProcedureExecutingBatchSize = math.MaxUint32 -) - -type MockDispatch struct{} - -func (m MockDispatch) OpenShard(_ context.Context, _ string, _ eventdispatch.OpenShardRequest) error { - return nil -} - -func (m MockDispatch) CloseShard(_ context.Context, _ string, _ eventdispatch.CloseShardRequest) error { - return nil -} - -func (m MockDispatch) CreateTableOnShard(_ context.Context, _ string, _ eventdispatch.CreateTableOnShardRequest) (uint64, error) { - return 0, nil -} - -func (m MockDispatch) DropTableOnShard(_ context.Context, _ string, _ eventdispatch.DropTableOnShardRequest) (uint64, error) { - return 0, nil -} - -func (m MockDispatch) OpenTableOnShard(_ context.Context, _ string, _ eventdispatch.OpenTableOnShardRequest) error { - return nil -} - -func (m MockDispatch) CloseTableOnShard(_ context.Context, _ string, _ eventdispatch.CloseTableOnShardRequest) error { - return nil -} - -type MockStorage struct{} - -func (m MockStorage) CreateOrUpdate(_ context.Context, _ procedure.Meta) error { - return nil -} - -func (m MockStorage) CreateOrUpdateWithTTL(_ context.Context, _ procedure.Meta, _ int64) error { - return nil -} - -func (m MockStorage) List(_ context.Context, _ procedure.Kind, _ int) ([]*procedure.Meta, error) { - return nil, nil -} - -func (m MockStorage) Delete(_ context.Context, _ procedure.Kind, _ uint64) error { - return nil -} - -func (m MockStorage) MarkDeleted(_ context.Context, _ procedure.Kind, _ uint64) error { - return nil -} - -func NewTestStorage(_ *testing.T) procedure.Storage { - return MockStorage{} -} - -type MockIDAllocator struct{} - -func (m MockIDAllocator) Alloc(_ context.Context) (uint64, error) { - return 0, nil -} - -func (m MockIDAllocator) Collect(_ context.Context, _ uint64) error { - return nil -} - -// InitEmptyCluster will return a cluster that has created shards and nodes, but it does not have any shard node mapping. -func InitEmptyCluster(ctx context.Context, t *testing.T) *cluster.Cluster { - re := require.New(t) - - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - clusterStorage := storage.NewStorageWithEtcdBackend(client, TestRootPath, storage.Options{ - MaxScanLimit: 100, MinScanLimit: 10, MaxOpsPerTxn: 10, - }) - - logger := zap.NewNop() - - clusterMetadata := metadata.NewClusterMetadata(logger, storage.Cluster{ - ID: 0, - Name: ClusterName, - MinNodeCount: DefaultNodeCount, - ShardTotal: DefaultShardTotal, - TopologyType: DefaultTopologyType, - ProcedureExecutingBatchSize: DefaultProcedureExecutingBatchSize, - CreatedAt: 0, - ModifiedAt: 0, - }, clusterStorage, client, TestRootPath, DefaultIDAllocatorStep) - - err := clusterMetadata.Init(ctx) - re.NoError(err) - - err = clusterMetadata.Load(ctx) - re.NoError(err) - - c, err := cluster.NewCluster(logger, clusterMetadata, client, TestRootPath) - re.NoError(err) - - _, _, err = c.GetMetadata().GetOrCreateSchema(ctx, TestSchemaName) - re.NoError(err) - - lastTouchTime := time.Now().UnixMilli() - for i := 0; i < DefaultNodeCount; i++ { - node := storage.Node{ - Name: fmt.Sprintf("node%d", i), - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: uint64(lastTouchTime), - State: storage.NodeStateUnknown, - } - err = c.GetMetadata().RegisterNode(ctx, metadata.RegisteredNode{ - Node: node, - ShardInfos: nil, - }) - re.NoError(err) - } - - return c -} - -func InitEmptyClusterWithConfig(ctx context.Context, t *testing.T, shardNumber int, nodeNumber int) *cluster.Cluster { - re := require.New(t) - - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - clusterStorage := storage.NewStorageWithEtcdBackend(client, TestRootPath, storage.Options{ - MaxScanLimit: 100, MinScanLimit: 10, MaxOpsPerTxn: 32, - }) - - logger := zap.NewNop() - - clusterMetadata := metadata.NewClusterMetadata(logger, storage.Cluster{ - ID: 0, - Name: ClusterName, - MinNodeCount: uint32(nodeNumber), - ShardTotal: uint32(shardNumber), - TopologyType: DefaultTopologyType, - ProcedureExecutingBatchSize: DefaultProcedureExecutingBatchSize, - CreatedAt: 0, - ModifiedAt: 0, - }, clusterStorage, client, TestRootPath, DefaultIDAllocatorStep) - - err := clusterMetadata.Init(ctx) - re.NoError(err) - - err = clusterMetadata.Load(ctx) - re.NoError(err) - - c, err := cluster.NewCluster(logger, clusterMetadata, client, TestRootPath) - re.NoError(err) - - _, _, err = c.GetMetadata().GetOrCreateSchema(ctx, TestSchemaName) - re.NoError(err) - - lastTouchTime := time.Now().UnixMilli() - for i := 0; i < nodeNumber; i++ { - node := storage.Node{ - Name: fmt.Sprintf("node%d", i), - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: uint64(lastTouchTime), - State: storage.NodeStateUnknown, - } - err = c.GetMetadata().RegisterNode(ctx, metadata.RegisteredNode{ - Node: node, - ShardInfos: []metadata.ShardInfo{}, - }) - re.NoError(err) - } - - return c -} - -// InitPrepareCluster will return a cluster that has created shards and nodes, and cluster state is prepare. -func InitPrepareCluster(ctx context.Context, t *testing.T) *cluster.Cluster { - re := require.New(t) - c := InitEmptyCluster(ctx, t) - - err := c.GetMetadata().UpdateClusterView(ctx, storage.ClusterStatePrepare, []storage.ShardNode{}) - re.NoError(err) - - return c -} - -// InitStableCluster will return a cluster that has created shards and nodes, and shards have been assigned to existing nodes. -func InitStableCluster(ctx context.Context, t *testing.T) *cluster.Cluster { - re := require.New(t) - c := InitEmptyCluster(ctx, t) - snapshot := c.GetMetadata().GetClusterSnapshot() - shardNodes := make([]storage.ShardNode, 0, DefaultShardTotal) - for _, shardView := range snapshot.Topology.ShardViewsMapping { - selectNodeIdx, err := rand.Int(rand.Reader, big.NewInt(int64(len(snapshot.RegisteredNodes)))) - re.NoError(err) - shardNodes = append(shardNodes, storage.ShardNode{ - ID: shardView.ShardID, - ShardRole: storage.ShardRoleLeader, - NodeName: snapshot.RegisteredNodes[selectNodeIdx.Int64()].Node.Name, - }) - } - - err := c.GetMetadata().UpdateClusterView(ctx, storage.ClusterStateStable, shardNodes) - re.NoError(err) - - return c -} - -func InitStableClusterWithConfig(ctx context.Context, t *testing.T, nodeNumber int, shardNumber int) *cluster.Cluster { - re := require.New(t) - c := InitEmptyClusterWithConfig(ctx, t, shardNumber, nodeNumber) - snapshot := c.GetMetadata().GetClusterSnapshot() - shardNodes := make([]storage.ShardNode, 0, DefaultShardTotal) - nodePicker := nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()) - var unAssignedShardIDs []storage.ShardID - for i := 0; i < shardNumber; i++ { - unAssignedShardIDs = append(unAssignedShardIDs, storage.ShardID(i)) - } - shardNodeMapping, err := nodePicker.PickNode(ctx, nodepicker.Config{ - NumTotalShards: uint32(shardNumber), - ShardAffinityRule: map[storage.ShardID]scheduler.ShardAffinity{}, - }, unAssignedShardIDs, snapshot.RegisteredNodes) - re.NoError(err) - - for shardID, node := range shardNodeMapping { - shardNodes = append(shardNodes, storage.ShardNode{ - ID: shardID, - ShardRole: storage.ShardRoleLeader, - NodeName: node.Node.Name, - }) - } - - err = c.GetMetadata().UpdateClusterView(ctx, storage.ClusterStateStable, shardNodes) - re.NoError(err) - - return c -} diff --git a/horaemeta/server/coordinator/procedure/util.go b/horaemeta/server/coordinator/procedure/util.go deleted file mode 100644 index 2fa808c4ce..0000000000 --- a/horaemeta/server/coordinator/procedure/util.go +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package procedure - -import ( - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/looplab/fsm" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -// CancelEventWithLog Cancel event when error is not nil. If error is nil, do nothing. -func CancelEventWithLog(event *fsm.Event, err error, msg string, fields ...zap.Field) { - if err == nil { - return - } - fields = append(fields, zap.Error(err)) - log.Error(msg, fields...) - event.Cancel(errors.WithMessage(err, msg)) -} - -// nolint -func GetRequestFromEvent[T any](event *fsm.Event) (T, error) { - if len(event.Args) != 1 { - return *new(T), ErrGetRequest.WithCausef("event args length must be 1, actual length:%v", len(event.Args)) - } - - switch request := event.Args[0].(type) { - case T: - return request, nil - default: - return *new(T), ErrGetRequest.WithCausef("event arg type must be same as return type") - } -} diff --git a/horaemeta/server/coordinator/scheduler/manager/error.go b/horaemeta/server/coordinator/scheduler/manager/error.go deleted file mode 100644 index a88dc60a04..0000000000 --- a/horaemeta/server/coordinator/scheduler/manager/error.go +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package manager - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ErrInvalidTopologyType = coderr.NewCodeError(coderr.InvalidParams, "invalid topology type") diff --git a/horaemeta/server/coordinator/scheduler/manager/scheduler_manager.go b/horaemeta/server/coordinator/scheduler/manager/scheduler_manager.go deleted file mode 100644 index 51246a569b..0000000000 --- a/horaemeta/server/coordinator/scheduler/manager/scheduler_manager.go +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package manager - -import ( - "context" - "fmt" - "reflect" - "sync" - "sync/atomic" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/rebalanced" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/reopen" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/static" - "github.com/apache/incubator-horaedb-meta/server/coordinator/watch" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -const ( - schedulerInterval = time.Second * 5 -) - -// SchedulerManager used to manage schedulers, it will register all schedulers when it starts. -// -// Each registered scheduler will generate procedures if the cluster topology matches the scheduling condition. -type SchedulerManager interface { - ListScheduler() []scheduler.Scheduler - - Start(ctx context.Context) error - - Stop(ctx context.Context) error - - // UpdateEnableSchedule can only be used in dynamic mode, it will throw error when topology type is static. - // when enableSchedule is true, shard topology will not be updated, it is usually used in scenarios such as cluster deploy. - UpdateEnableSchedule(ctx context.Context, enable bool) error - - // GetEnableSchedule can only be used in dynamic mode, it will throw error when topology type is static. - GetEnableSchedule(ctx context.Context) (bool, error) - - // AddShardAffinityRule adds a shard affinity rule to the manager, and then apply it to the underlying schedulers. - AddShardAffinityRule(ctx context.Context, rule scheduler.ShardAffinityRule) error - - // Remove the shard rules applied to some specific rule. - RemoveShardAffinityRule(ctx context.Context, shardID storage.ShardID) error - - // ListShardAffinityRules lists all the rules about shard affinity of all the registered schedulers. - ListShardAffinityRules(ctx context.Context) (map[string]scheduler.ShardAffinityRule, error) - - // Scheduler will be called when received new heartbeat, every scheduler registered in schedulerManager will be called to generate procedures. - // Scheduler cloud be schedule with fix time interval or heartbeat. - Scheduler(ctx context.Context, clusterSnapshot metadata.Snapshot) []scheduler.ScheduleResult -} - -type schedulerManagerImpl struct { - logger *zap.Logger - procedureManager procedure.Manager - factory *coordinator.Factory - nodePicker nodepicker.NodePicker - client *clientv3.Client - clusterMetadata *metadata.ClusterMetadata - rootPath string - - // This lock is used to protect the following field. - lock sync.RWMutex - registerSchedulers []scheduler.Scheduler - shardWatch watch.ShardWatch - isRunning atomic.Bool - topologyType storage.TopologyType - procedureExecutingBatchSize uint32 - enableSchedule bool - shardAffinities map[storage.ShardID]scheduler.ShardAffinityRule -} - -func NewManager(logger *zap.Logger, procedureManager procedure.Manager, factory *coordinator.Factory, clusterMetadata *metadata.ClusterMetadata, client *clientv3.Client, rootPath string, topologyType storage.TopologyType, procedureExecutingBatchSize uint32) SchedulerManager { - var shardWatch watch.ShardWatch - switch topologyType { - case storage.TopologyTypeDynamic: - shardWatch = watch.NewEtcdShardWatch(logger, clusterMetadata.Name(), rootPath, client) - shardWatch.RegisteringEventCallback(&schedulerWatchCallback{c: clusterMetadata}) - case storage.TopologyTypeStatic: - shardWatch = watch.NewNoopShardWatch() - } - - return &schedulerManagerImpl{ - logger: logger, - procedureManager: procedureManager, - factory: factory, - nodePicker: nodepicker.NewConsistentUniformHashNodePicker(logger), - client: client, - clusterMetadata: clusterMetadata, - rootPath: rootPath, - lock: sync.RWMutex{}, - registerSchedulers: []scheduler.Scheduler{}, - shardWatch: shardWatch, - isRunning: atomic.Bool{}, - topologyType: topologyType, - procedureExecutingBatchSize: procedureExecutingBatchSize, - enableSchedule: false, - shardAffinities: make(map[storage.ShardID]scheduler.ShardAffinityRule), - } -} - -func (m *schedulerManagerImpl) Stop(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - if m.isRunning.Load() { - m.registerSchedulers = m.registerSchedulers[:0] - m.isRunning.Store(false) - if err := m.shardWatch.Stop(ctx); err != nil { - return errors.WithMessage(err, "stop shard watch failed") - } - } - - return nil -} - -func (m *schedulerManagerImpl) Start(ctx context.Context) error { - m.lock.Lock() - defer m.lock.Unlock() - - if m.isRunning.Load() { - return nil - } - - m.initRegister() - - if err := m.shardWatch.Start(ctx); err != nil { - return errors.WithMessage(err, "start shard watch failed") - } - - go func() { - m.isRunning.Store(true) - for { - if !m.isRunning.Load() { - m.logger.Info("scheduler manager is canceled") - return - } - - time.Sleep(schedulerInterval) - // Get latest cluster snapshot. - clusterSnapshot := m.clusterMetadata.GetClusterSnapshot() - m.logger.Debug("scheduler manager invoke", zap.String("clusterSnapshot", fmt.Sprintf("%v", clusterSnapshot))) - - if clusterSnapshot.Topology.IsPrepareFinished() { - m.logger.Info("try to update cluster state to stable") - if err := m.clusterMetadata.UpdateClusterView(ctx, storage.ClusterStateStable, clusterSnapshot.Topology.ClusterView.ShardNodes); err != nil { - m.logger.Error("update cluster view failed", zap.Error(err)) - } - continue - } - - results := m.Scheduler(ctx, clusterSnapshot) - for _, result := range results { - if result.Procedure != nil { - m.logger.Info("scheduler submit new procedure", zap.Uint64("ProcedureID", result.Procedure.ID()), zap.String("Reason", result.Reason)) - if err := m.procedureManager.Submit(ctx, result.Procedure); err != nil { - m.logger.Error("scheduler submit new procedure failed", zap.Uint64("ProcedureID", result.Procedure.ID()), zap.Error(err)) - } - } - } - } - }() - - return nil -} - -type schedulerWatchCallback struct { - c *metadata.ClusterMetadata -} - -func (callback *schedulerWatchCallback) OnShardRegistered(_ context.Context, _ watch.ShardRegisterEvent) error { - return nil -} - -func (callback *schedulerWatchCallback) OnShardExpired(ctx context.Context, event watch.ShardExpireEvent) error { - oldLeader := event.OldLeaderNode - shardID := event.ShardID - return callback.c.DropShardNodes(ctx, []storage.ShardNode{ - { - ID: shardID, - ShardRole: storage.ShardRoleLeader, - NodeName: oldLeader, - }, - }) -} - -// Schedulers should to be initialized and registered here. -func (m *schedulerManagerImpl) initRegister() { - var schedulers []scheduler.Scheduler - switch m.topologyType { - case storage.TopologyTypeDynamic: - schedulers = m.createDynamicTopologySchedulers() - case storage.TopologyTypeStatic: - schedulers = m.createStaticTopologySchedulers() - } - for i := 0; i < len(schedulers); i++ { - m.registerScheduler(schedulers[i]) - } -} - -func (m *schedulerManagerImpl) createStaticTopologySchedulers() []scheduler.Scheduler { - staticTopologyShardScheduler := static.NewShardScheduler(m.factory, m.nodePicker, m.procedureExecutingBatchSize) - reopenShardScheduler := reopen.NewShardScheduler(m.factory, m.procedureExecutingBatchSize) - return []scheduler.Scheduler{staticTopologyShardScheduler, reopenShardScheduler} -} - -func (m *schedulerManagerImpl) createDynamicTopologySchedulers() []scheduler.Scheduler { - rebalancedShardScheduler := rebalanced.NewShardScheduler(m.logger, m.factory, m.nodePicker, m.procedureExecutingBatchSize) - reopenShardScheduler := reopen.NewShardScheduler(m.factory, m.procedureExecutingBatchSize) - return []scheduler.Scheduler{rebalancedShardScheduler, reopenShardScheduler} -} - -func (m *schedulerManagerImpl) registerScheduler(scheduler scheduler.Scheduler) { - m.logger.Info("register new scheduler", zap.String("schedulerName", reflect.TypeOf(scheduler).String()), zap.Int("totalSchedulerLen", len(m.registerSchedulers))) - m.registerSchedulers = append(m.registerSchedulers, scheduler) -} - -func (m *schedulerManagerImpl) ListScheduler() []scheduler.Scheduler { - m.lock.RLock() - defer m.lock.RUnlock() - - return m.registerSchedulers -} - -func (m *schedulerManagerImpl) Scheduler(ctx context.Context, clusterSnapshot metadata.Snapshot) []scheduler.ScheduleResult { - // TODO: Every scheduler should run in an independent goroutine. - results := make([]scheduler.ScheduleResult, 0, len(m.registerSchedulers)) - for _, scheduler := range m.registerSchedulers { - result, err := scheduler.Schedule(ctx, clusterSnapshot) - if err != nil { - m.logger.Error("scheduler failed", zap.Error(err)) - continue - } - results = append(results, result) - } - return results -} - -func (m *schedulerManagerImpl) UpdateEnableSchedule(ctx context.Context, enable bool) error { - m.lock.Lock() - defer m.lock.Unlock() - - if m.topologyType != storage.TopologyTypeDynamic { - return ErrInvalidTopologyType.WithCausef("deploy mode could only update when topology type is dynamic") - } - - m.enableSchedule = enable - for _, scheduler := range m.registerSchedulers { - scheduler.UpdateEnableSchedule(ctx, enable) - } - - return nil -} - -func (m *schedulerManagerImpl) GetEnableSchedule(_ context.Context) (bool, error) { - m.lock.RLock() - defer m.lock.RUnlock() - - if m.topologyType != storage.TopologyTypeDynamic { - return false, ErrInvalidTopologyType.WithCausef("deploy mode could only get when topology type is dynamic") - } - - return m.enableSchedule, nil -} - -func (m *schedulerManagerImpl) AddShardAffinityRule(ctx context.Context, rule scheduler.ShardAffinityRule) error { - var lastErr error - for _, scheduler := range m.registerSchedulers { - if err := scheduler.AddShardAffinityRule(ctx, rule); err != nil { - log.Error("failed to add shard affinity rule of a scheduler", zap.String("scheduler", scheduler.Name()), zap.Error(err)) - lastErr = err - } - } - - return lastErr -} - -func (m *schedulerManagerImpl) RemoveShardAffinityRule(ctx context.Context, shardID storage.ShardID) error { - var lastErr error - for _, scheduler := range m.registerSchedulers { - if err := scheduler.RemoveShardAffinityRule(ctx, shardID); err != nil { - log.Error("failed to remove shard affinity rule of a scheduler", zap.String("scheduler", scheduler.Name()), zap.Error(err)) - lastErr = err - } - } - - return lastErr -} - -func (m *schedulerManagerImpl) ListShardAffinityRules(ctx context.Context) (map[string]scheduler.ShardAffinityRule, error) { - rules := make(map[string]scheduler.ShardAffinityRule, len(m.registerSchedulers)) - var lastErr error - - for _, scheduler := range m.registerSchedulers { - rule, err := scheduler.ListShardAffinityRule(ctx) - if err != nil { - log.Error("failed to list shard affinity rule of a scheduler", zap.String("scheduler", scheduler.Name()), zap.Error(err)) - lastErr = err - } - - rules[scheduler.Name()] = rule - } - - return rules, lastErr -} diff --git a/horaemeta/server/coordinator/scheduler/manager/scheduler_manager_test.go b/horaemeta/server/coordinator/scheduler/manager/scheduler_manager_test.go deleted file mode 100644 index 7edbf5ec05..0000000000 --- a/horaemeta/server/coordinator/scheduler/manager/scheduler_manager_test.go +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package manager_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/manager" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -func TestSchedulerManager(t *testing.T) { - ctx := context.Background() - re := require.New(t) - - // Init dependencies for scheduler manager. - c := test.InitStableCluster(ctx, t) - procedureManager, err := procedure.NewManagerImpl(zap.NewNop(), c.GetMetadata()) - re.NoError(err) - dispatch := test.MockDispatch{} - allocator := test.MockIDAllocator{} - s := test.NewTestStorage(t) - f := coordinator.NewFactory(zap.NewNop(), allocator, dispatch, s, c.GetMetadata()) - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - - // Create scheduler manager with enableScheduler equal to false. - schedulerManager := manager.NewManager(zap.NewNop(), procedureManager, f, c.GetMetadata(), client, "/rootPath", storage.TopologyTypeStatic, 1) - err = schedulerManager.Start(ctx) - re.NoError(err) - err = schedulerManager.Stop(ctx) - re.NoError(err) - - // Create scheduler manager with static topology. - schedulerManager = manager.NewManager(zap.NewNop(), procedureManager, f, c.GetMetadata(), client, "/rootPath", storage.TopologyTypeStatic, 1) - err = schedulerManager.Start(ctx) - re.NoError(err) - schedulers := schedulerManager.ListScheduler() - re.Equal(2, len(schedulers)) - err = schedulerManager.Stop(ctx) - re.NoError(err) - - // Create scheduler manager with dynamic topology. - schedulerManager = manager.NewManager(zap.NewNop(), procedureManager, f, c.GetMetadata(), client, "/rootPath", storage.TopologyTypeDynamic, 1) - err = schedulerManager.Start(ctx) - re.NoError(err) - schedulers = schedulerManager.ListScheduler() - re.Equal(2, len(schedulers)) - err = schedulerManager.Stop(ctx) - re.NoError(err) -} diff --git a/horaemeta/server/coordinator/scheduler/nodepicker/error.go b/horaemeta/server/coordinator/scheduler/nodepicker/error.go deleted file mode 100644 index d154162ea5..0000000000 --- a/horaemeta/server/coordinator/scheduler/nodepicker/error.go +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package nodepicker - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ErrNoAliveNodes = coderr.NewCodeError(coderr.InvalidParams, "no alive nodes is found") diff --git a/horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform.go b/horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform.go deleted file mode 100644 index ce25e59403..0000000000 --- a/horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform.go +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright (c) 2018 Burak Sezer -// All rights reserved. -// -// This code is licensed under the MIT License. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// This file is copied from: -// https://github.com/buraksezer/consistent/blob/4516339c49db00f725fa89d0e3e7e970e4039af0/consistent.go -// Package hash provides a consistent hashing function with bounded loads. -// For more information about the underlying algorithm, please take a look at -// https://research.googleblog.com/2017/04/consistent-hashing-with-bounded-loads.html -// -// We optimized and simplify this hash algorithm [implementation](https://github.com/buraksezer/consistent/issues/13) -package hash - -import ( - "encoding/binary" - "errors" - "fmt" - "math" - "slices" - "sort" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "go.uber.org/zap" -) - -// TODO: Modify these error definitions to coderr. -var ( - // ErrInsufficientMemberCount represents an error which means there are not enough members to complete the task. - ErrInsufficientMemberCount = errors.New("insufficient member count") - - // ErrMemberNotFound represents an error which means requested member could not be found in consistent hash ring. - ErrMemberNotFound = errors.New("member could not be found in ring") - - // ErrHasherNotProvided will be thrown if the hasher is not provided. - ErrHasherNotProvided = errors.New("hasher is required") - - // ErrInvalidReplication will be thrown if the replication factor is zero or negative. - ErrInvalidReplicationFactor = errors.New("positive replication factor is required") - - // ErrInvalidNumPartitions will be thrown if the number of partitions is negative. - ErrInvalidNumPartitions = errors.New("invalid number of the partitions") - - // ErrEmptyMembers will be thrown if no member is provided. - ErrEmptyMembers = errors.New("at least one member is required") -) - -// hashSeparator is used to building the virtual node name for member. -// With this special separator, it will be hard to generate duplicate virtual node names. -const hashSeparator = "@$" - -type Hasher interface { - Sum64([]byte) uint64 -} - -// Member interface represents a member in consistent hash ring. -type Member interface { - String() string -} - -type PartitionAffinity struct { - PartitionID int - NumAllowedOtherPartitions uint -} - -// Config represents a structure to control consistent package. -type Config struct { - // Hasher is responsible for generating unsigned, 64 bit hash of provided byte slice. - Hasher Hasher - - // Keys are distributed among partitions. Prime numbers are good to - // distribute keys uniformly. Select a big PartitionCount if you have - // too many keys. - ReplicationFactor int - - // The rule describes the partition affinity. - PartitionAffinities []PartitionAffinity -} - -type virtualNode uint64 - -// ConsistentUniformHash generates a uniform distribution of partitions over the members, and this distribution will keep as -// consistent as possible while the members has some tiny changes. -type ConsistentUniformHash struct { - config Config - minLoad int - maxLoad int - numPartitions uint32 - // Member name => Member - members map[string]Member - // Member name => Partitions allocated to this member - memPartitions map[string]map[int]struct{} - // Partition ID => index of the virtualNode in the sortedRing - partitionDist map[int]int - // The nodeToMems contains all the virtual nodes - nodeToMems map[virtualNode]Member - sortedRing []virtualNode -} - -func (c *Config) Sanitize() error { - if c.Hasher == nil { - return ErrHasherNotProvided - } - - if c.ReplicationFactor <= 0 { - return ErrInvalidReplicationFactor - } - - return nil -} - -// BuildConsistentUniformHash creates and returns a new hash which is ensured to be uniform and as consistent as possible. -func BuildConsistentUniformHash(numPartitions int, members []Member, config Config) (*ConsistentUniformHash, error) { - if err := config.Sanitize(); err != nil { - return nil, err - } - if numPartitions < 0 { - return nil, ErrInvalidNumPartitions - } - if len(members) == 0 { - return nil, ErrEmptyMembers - } - - numReplicatedNodes := len(members) * config.ReplicationFactor - avgLoad := float64(numPartitions) / float64(len(members)) - minLoad := int(math.Floor(avgLoad)) - maxLoad := int(math.Ceil(avgLoad)) - - memPartitions := make(map[string]map[int]struct{}, len(members)) - for _, mem := range members { - memPartitions[mem.String()] = make(map[int]struct{}, maxLoad) - } - - // Sort the affinity rule to ensure consistency. - sort.Slice(config.PartitionAffinities, func(i, j int) bool { - return config.PartitionAffinities[i].PartitionID < config.PartitionAffinities[j].PartitionID - }) - c := &ConsistentUniformHash{ - config: config, - minLoad: minLoad, - maxLoad: maxLoad, - numPartitions: uint32(numPartitions), - sortedRing: make([]virtualNode, 0, numReplicatedNodes), - memPartitions: memPartitions, - members: make(map[string]Member, len(members)), - partitionDist: make(map[int]int, numPartitions), - nodeToMems: make(map[virtualNode]Member, numReplicatedNodes), - } - - c.initializeVirtualNodes(members) - c.distributePartitions() - c.ensureAffinity() - return c, nil -} - -func (c *ConsistentUniformHash) distributePartitionWithLoad(partID, virtualNodeIdx int, allowedLoad int) bool { - // A fast path to avoid unnecessary loop. - if allowedLoad == 0 { - return false - } - - var count int - for { - count++ - if count > len(c.sortedRing) { - return false - } - i := c.sortedRing[virtualNodeIdx] - member := c.nodeToMems[i] - partitions, ok := c.memPartitions[member.String()] - assert.Assert(ok) - - if len(partitions)+1 <= allowedLoad { - c.partitionDist[partID] = virtualNodeIdx - partitions[partID] = struct{}{} - return true - } - virtualNodeIdx++ - if virtualNodeIdx >= len(c.sortedRing) { - virtualNodeIdx = 0 - } - } -} - -func (c *ConsistentUniformHash) distributePartition(partID, virtualNodeIdx int) { - ok := c.distributePartitionWithLoad(partID, virtualNodeIdx, c.minLoad) - if ok { - return - } - - ok = c.distributePartitionWithLoad(partID, virtualNodeIdx, c.maxLoad) - assert.Assertf(ok, "not enough room to distribute partitions") -} - -func (c *ConsistentUniformHash) distributePartitions() { - bs := make([]byte, 8) - for partID := uint32(0); partID < c.numPartitions; partID++ { - binary.LittleEndian.PutUint32(bs, partID) - key := c.config.Hasher.Sum64(bs) - idx := sort.Search(len(c.sortedRing), func(i int) bool { - return c.sortedRing[i] >= virtualNode(key) - }) - if idx >= len(c.sortedRing) { - idx = 0 - } - c.distributePartition(int(partID), idx) - } -} - -func (c *ConsistentUniformHash) MinLoad() uint { - return uint(c.minLoad) -} - -func (c *ConsistentUniformHash) MaxLoad() uint { - return uint(c.maxLoad) -} - -// LoadDistribution exposes load distribution of members. -func (c *ConsistentUniformHash) LoadDistribution() map[string]uint { - loads := make(map[string]uint, len(c.memPartitions)) - for member, partitions := range c.memPartitions { - loads[member] = uint(len(partitions)) - } - return loads -} - -// GetPartitionOwner returns the owner of the given partition. -func (c *ConsistentUniformHash) GetPartitionOwner(partID int) Member { - virtualNodeIdx, ok := c.partitionDist[partID] - if !ok { - return nil - } - virtualNode := c.sortedRing[virtualNodeIdx] - mem, ok := c.nodeToMems[virtualNode] - assert.Assertf(ok, "member must exist for the virtual node") - return mem -} - -func (c *ConsistentUniformHash) initializeVirtualNodes(members []Member) { - // Ensure the order of members to avoid inconsistency caused by hash collisions. - sort.Slice(members, func(i, j int) bool { - return members[i].String() < members[j].String() - }) - - for _, mem := range members { - for i := 0; i < c.config.ReplicationFactor; i++ { - // TODO: Shall use a more generic hasher which receives multiple slices or string? - key := []byte(fmt.Sprintf("%s%s%d", mem.String(), hashSeparator, i)) - h := virtualNode(c.config.Hasher.Sum64(key)) - - oldMem, ok := c.nodeToMems[h] - if ok { - log.Warn("found hash collision", zap.String("oldMem", oldMem.String()), zap.String("newMem", mem.String())) - } - - c.nodeToMems[h] = mem - c.sortedRing = append(c.sortedRing, h) - } - c.members[mem.String()] = mem - } - - sort.Slice(c.sortedRing, func(i int, j int) bool { - return c.sortedRing[i] < c.sortedRing[j] - }) -} - -func (c *ConsistentUniformHash) ensureAffinity() { - offloadedMems := make(map[string]struct{}, len(c.config.PartitionAffinities)) - - for _, affinity := range c.config.PartitionAffinities { - partID := affinity.PartitionID - vNodeIdx := c.partitionDist[partID] - vNode := c.sortedRing[vNodeIdx] - mem, ok := c.nodeToMems[vNode] - assert.Assert(ok) - offloadedMems[mem.String()] = struct{}{} - - allowedLoad := int(affinity.NumAllowedOtherPartitions) + 1 - memPartIDs, ok := c.memPartitions[mem.String()] - assert.Assert(ok) - memLoad := len(memPartIDs) - if memLoad > allowedLoad { - c.offloadMember(mem, memPartIDs, partID, allowedLoad, offloadedMems) - } - } -} - -// offloadMember tries to offload the given member by moving its partitions to other members. -func (c *ConsistentUniformHash) offloadMember(mem Member, memPartitions map[int]struct{}, retainedPartID, numAllowedParts int, offloadedMems map[string]struct{}) { - assert.Assertf(numAllowedParts >= 1, "At least the partition itself should be allowed") - partIDsToOffload := make([]int, 0, len(memPartitions)-numAllowedParts) - // The `retainedPartID` must be retained. - numRetainedParts := 1 - for partID := range memPartitions { - if partID == retainedPartID { - continue - } - - if numRetainedParts < numAllowedParts { - numRetainedParts++ - continue - } - - partIDsToOffload = append(partIDsToOffload, partID) - } - - slices.Sort(partIDsToOffload) - for _, partID := range partIDsToOffload { - c.offloadPartition(partID, mem, offloadedMems) - } -} - -func (c *ConsistentUniformHash) offloadPartition(sourcePartID int, sourceMem Member, blackedMembers map[string]struct{}) { - // Ensure all members' load smaller than the max load as much as possible. - loadUpperBound := c.numPartitions - for load := c.maxLoad; load < int(loadUpperBound); load++ { - if done := c.offloadPartitionWithAllowedLoad(sourcePartID, sourceMem, load, blackedMembers); done { - return - } - } - - log.Warn("failed to offload partition") -} - -func (c *ConsistentUniformHash) offloadPartitionWithAllowedLoad(sourcePartID int, sourceMem Member, allowedMaxLoad int, blackedMembers map[string]struct{}) bool { - vNodeIdx := c.partitionDist[sourcePartID] - // Skip the first member which must not be the target to move. - for loopCnt := 1; loopCnt < len(c.sortedRing); loopCnt++ { - vNodeIdx++ - if vNodeIdx == len(c.sortedRing) { - vNodeIdx = 0 - } - - vNode := c.sortedRing[vNodeIdx] - mem, ok := c.nodeToMems[vNode] - assert.Assert(ok) - - // Check whether this member is blacked. - if _, blacked := blackedMembers[mem.String()]; blacked { - continue - } - - memPartitions, ok := c.memPartitions[mem.String()] - assert.Assert(ok) - memLoad := len(memPartitions) - // Check whether the member's load is too allowed. - if memLoad+1 > allowedMaxLoad { - continue - } - - // The member meets the requirement, let's move the `sourcePartID` to this member. - memPartitions[sourcePartID] = struct{}{} - c.partitionDist[sourcePartID] = vNodeIdx - sourceMemPartitions, ok := c.memPartitions[sourceMem.String()] - assert.Assert(ok) - delete(sourceMemPartitions, sourcePartID) - return true - } - - return false -} diff --git a/horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform_test.go b/horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform_test.go deleted file mode 100644 index 5553630f0b..0000000000 --- a/horaemeta/server/coordinator/scheduler/nodepicker/hash/consistent_uniform_test.go +++ /dev/null @@ -1,311 +0,0 @@ -// Copyright (c) 2018 Burak Sezer -// All rights reserved. -// -// This code is licensed under the MIT License. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package hash - -import ( - "fmt" - "hash/fnv" - "testing" - - "github.com/stretchr/testify/assert" -) - -type testMember string - -func (tm testMember) String() string { - return string(tm) -} - -type testHasher struct{} - -func (hs testHasher) Sum64(data []byte) uint64 { - h := fnv.New64() - _, _ = h.Write(data) - return h.Sum64() -} - -func buildTestMembers(n int) []Member { - members := []Member{} - for i := 0; i < n; i++ { - member := testMember(fmt.Sprintf("node-%d", i)) - members = append(members, member) - } - - return members -} - -func checkUniform(t *testing.T, numPartitions, numMembers int) { - members := buildTestMembers(numMembers) - cfg := Config{ - ReplicationFactor: 127, - Hasher: testHasher{}, - PartitionAffinities: []PartitionAffinity{}, - } - c, err := BuildConsistentUniformHash(numPartitions, members, cfg) - assert.NoError(t, err) - - minLoad := c.MinLoad() - maxLoad := c.MaxLoad() - loadDistribution := c.LoadDistribution() - for _, mem := range members { - load, ok := loadDistribution[mem.String()] - if ok { - assert.GreaterOrEqual(t, load, minLoad) - assert.LessOrEqual(t, load, maxLoad) - } else { - assert.Equal(t, 0.0, minLoad) - } - } -} - -func TestZeroReplicationFactor(t *testing.T) { - cfg := Config{ - ReplicationFactor: 0, - Hasher: testHasher{}, - PartitionAffinities: []PartitionAffinity{}, - } - _, err := BuildConsistentUniformHash(0, []Member{testMember("")}, cfg) - assert.Error(t, err) -} - -func TestEmptyHasher(t *testing.T) { - cfg := Config{ - Hasher: nil, - ReplicationFactor: 127, - PartitionAffinities: []PartitionAffinity{}, - } - _, err := BuildConsistentUniformHash(0, []Member{testMember("")}, cfg) - assert.Error(t, err) -} - -func TestEmptyMembers(t *testing.T) { - cfg := Config{ - Hasher: testHasher{}, - ReplicationFactor: 127, - PartitionAffinities: []PartitionAffinity{}, - } - _, err := BuildConsistentUniformHash(0, []Member{}, cfg) - assert.Error(t, err) -} - -func TestNegativeNumPartitions(t *testing.T) { - cfg := Config{ - Hasher: testHasher{}, - ReplicationFactor: 127, - PartitionAffinities: []PartitionAffinity{}, - } - _, err := BuildConsistentUniformHash(-1, []Member{testMember("")}, cfg) - assert.Error(t, err) -} - -func TestUniform(t *testing.T) { - checkUniform(t, 23, 8) - checkUniform(t, 128, 72) - checkUniform(t, 10, 72) - checkUniform(t, 1, 8) - checkUniform(t, 0, 8) - checkUniform(t, 100, 1) -} - -func computeDiffBetweenDist(t *testing.T, oldDist, newDist map[int]string) int { - numDiffs := 0 - assert.Equal(t, len(oldDist), len(newDist)) - for partID, oldMem := range oldDist { - newMem, ok := newDist[partID] - assert.True(t, ok) - if newMem != oldMem { - numDiffs++ - } - } - - return numDiffs -} - -func checkConsistent(t *testing.T, numPartitions, numMembers, maxDiff int) { - members := buildTestMembers(numMembers) - cfg := Config{ - Hasher: testHasher{}, - ReplicationFactor: 127, - PartitionAffinities: []PartitionAffinity{}, - } - c, err := BuildConsistentUniformHash(numPartitions, members, cfg) - assert.NoError(t, err) - - distribution := make(map[int]string, numPartitions) - for partID := 0; partID < numPartitions; partID++ { - distribution[partID] = c.GetPartitionOwner(partID).String() - } - sortedRing := c.sortedRing - nodeToMems := c.nodeToMems - - { - newMembers := make([]Member, 0, numMembers) - for i := numMembers - 1; i >= 0; i-- { - newMembers = append(newMembers, members[i]) - } - c, err := BuildConsistentUniformHash(numPartitions, newMembers, cfg) - assert.NoError(t, err) - - newSortedRing := c.sortedRing - assert.Equal(t, sortedRing, newSortedRing) - - newNodeToMems := c.nodeToMems - assert.Equal(t, nodeToMems, newNodeToMems) - - newDistribution := make(map[int]string, numPartitions) - for partID := 0; partID < numPartitions; partID++ { - newDistribution[partID] = c.GetPartitionOwner(partID).String() - } - numDiffs := computeDiffBetweenDist(t, distribution, newDistribution) - assert.Equal(t, numDiffs, 0) - } - - oldMem0 := members[0].String() - newMem0 := "new-node-0" - members[0] = testMember(newMem0) - c, err = BuildConsistentUniformHash(numPartitions, members, cfg) - assert.NoError(t, err) - - numDiffs := 0 - for partID := 0; partID < numPartitions; partID++ { - newMem := c.GetPartitionOwner(partID).String() - oldMem := distribution[partID] - if newMem0 == newMem && oldMem != oldMem0 { - numDiffs++ - continue - } - - if newMem != oldMem { - numDiffs++ - } - } - - assert.LessOrEqual(t, numDiffs, maxDiff) -} - -func TestConsistency(t *testing.T) { - checkConsistent(t, 120, 20, 12) - checkConsistent(t, 100, 20, 11) - checkConsistent(t, 128, 70, 10) - checkConsistent(t, 256, 30, 42) - checkConsistent(t, 17, 5, 10) -} - -func checkAffinity(t *testing.T, numPartitions, numMembers int, affinities []PartitionAffinity, revisedMaxLoad uint) { - members := buildTestMembers(numMembers) - cfg := Config{ - ReplicationFactor: 127, - Hasher: testHasher{}, - PartitionAffinities: affinities, - } - c, err := BuildConsistentUniformHash(numPartitions, members, cfg) - assert.NoError(t, err) - - minLoad := c.MinLoad() - maxLoad := c.MaxLoad() - if maxLoad < revisedMaxLoad { - maxLoad = revisedMaxLoad - } - loadDistribution := c.LoadDistribution() - for _, mem := range members { - load, ok := loadDistribution[mem.String()] - if !ok { - assert.Equal(t, 0.0, minLoad) - } - assert.LessOrEqual(t, load, maxLoad) - } - - for _, affinity := range affinities { - mem := c.GetPartitionOwner(affinity.PartitionID) - load := loadDistribution[mem.String()] - allowedMaxLoad := affinity.NumAllowedOtherPartitions + 1 - assert.LessOrEqual(t, load, allowedMaxLoad) - } - - distribution := make(map[int]string, numPartitions) - for partID := 0; partID < numPartitions; partID++ { - distribution[partID] = c.GetPartitionOwner(partID).String() - } - { - newMembers := make([]Member, 0, numMembers) - for i := numMembers - 1; i >= 0; i-- { - newMembers = append(newMembers, members[i]) - } - c, err := BuildConsistentUniformHash(numPartitions, newMembers, cfg) - assert.NoError(t, err) - - newDistribution := make(map[int]string, numPartitions) - for partID := 0; partID < numPartitions; partID++ { - newDistribution[partID] = c.GetPartitionOwner(partID).String() - } - numDiffs := computeDiffBetweenDist(t, distribution, newDistribution) - assert.Equal(t, numDiffs, 0) - } -} - -func TestAffinity(t *testing.T) { - rule := []PartitionAffinity{} - checkAffinity(t, 120, 72, rule, 0) - checkAffinity(t, 0, 72, rule, 0) - - rule = []PartitionAffinity{ - {0, 0}, - {1, 0}, - {2, 120}, - } - checkAffinity(t, 3, 72, rule, 0) - checkAffinity(t, 72, 72, rule, 0) - - rule = []PartitionAffinity{ - {7, 0}, - {31, 0}, - {41, 0}, - {45, 0}, - {58, 0}, - {81, 0}, - {87, 0}, - {88, 0}, - {89, 0}, - } - checkAffinity(t, 128, 72, rule, 0) -} - -func TestInvalidAffinity(t *testing.T) { - // This affinity rule requires at least 4 member, but it should work too. - rule := []PartitionAffinity{ - {0, 0}, - {1, 0}, - {2, 0}, - {3, 0}, - } - - members := buildTestMembers(3) - cfg := Config{ - ReplicationFactor: 127, - Hasher: testHasher{}, - PartitionAffinities: rule, - } - _, err := BuildConsistentUniformHash(4, members, cfg) - assert.NoError(t, err) -} diff --git a/horaemeta/server/coordinator/scheduler/nodepicker/node_picker.go b/horaemeta/server/coordinator/scheduler/nodepicker/node_picker.go deleted file mode 100644 index 1ac75b59a4..0000000000 --- a/horaemeta/server/coordinator/scheduler/nodepicker/node_picker.go +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package nodepicker - -import ( - "context" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker/hash" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/spaolacci/murmur3" - "go.uber.org/zap" -) - -type Config struct { - NumTotalShards uint32 - ShardAffinityRule map[storage.ShardID]scheduler.ShardAffinity -} - -func (c Config) genPartitionAffinities() []hash.PartitionAffinity { - affinities := make([]hash.PartitionAffinity, 0, len(c.ShardAffinityRule)) - for shardID, affinity := range c.ShardAffinityRule { - partitionID := int(shardID) - affinities = append(affinities, hash.PartitionAffinity{ - PartitionID: partitionID, - NumAllowedOtherPartitions: affinity.NumAllowedOtherShards, - }) - } - - return affinities -} - -type NodePicker interface { - PickNode(ctx context.Context, config Config, shardIDs []storage.ShardID, registerNodes []metadata.RegisteredNode) (map[storage.ShardID]metadata.RegisteredNode, error) -} - -type ConsistentUniformHashNodePicker struct { - logger *zap.Logger -} - -func NewConsistentUniformHashNodePicker(logger *zap.Logger) NodePicker { - return &ConsistentUniformHashNodePicker{logger: logger} -} - -type nodeMember string - -var _ hash.Member = nodeMember("") - -func (m nodeMember) String() string { - return string(m) -} - -const uniformHashReplicationFactor int = 127 - -type hasher struct{} - -func (h hasher) Sum64(data []byte) uint64 { - return murmur3.Sum64(data) -} - -// filterExpiredNodes will retain the alive nodes only. -func filterExpiredNodes(nodes []metadata.RegisteredNode) map[string]metadata.RegisteredNode { - now := time.Now() - - aliveNodes := make(map[string]metadata.RegisteredNode, len(nodes)) - for _, node := range nodes { - if !node.IsExpired(now) { - aliveNodes[node.Node.Name] = node - } - } - - return aliveNodes -} - -func (p *ConsistentUniformHashNodePicker) PickNode(_ context.Context, config Config, shardIDs []storage.ShardID, registerNodes []metadata.RegisteredNode) (map[storage.ShardID]metadata.RegisteredNode, error) { - aliveNodes := filterExpiredNodes(registerNodes) - if len(aliveNodes) == 0 { - return nil, ErrNoAliveNodes.WithCausef("registerNodes:%+v", registerNodes) - } - - mems := make([]hash.Member, 0, len(aliveNodes)) - for _, node := range registerNodes { - if _, alive := aliveNodes[node.Node.Name]; alive { - mems = append(mems, nodeMember(node.Node.Name)) - } - } - - hashConf := hash.Config{ - ReplicationFactor: uniformHashReplicationFactor, - Hasher: hasher{}, - PartitionAffinities: config.genPartitionAffinities(), - } - h, err := hash.BuildConsistentUniformHash(int(config.NumTotalShards), mems, hashConf) - if err != nil { - return nil, err - } - - shardNodes := make(map[storage.ShardID]metadata.RegisteredNode, len(registerNodes)) - for _, shardID := range shardIDs { - assert.Assert(shardID < storage.ShardID(config.NumTotalShards)) - partID := int(shardID) - nodeName := h.GetPartitionOwner(partID).String() - node, ok := aliveNodes[nodeName] - assert.Assertf(ok, "node:%s must be in the aliveNodes:%v", nodeName, aliveNodes) - shardNodes[storage.ShardID(partID)] = node - - p.logger.Debug("shard is allocated to the node", zap.Uint32("shardID", uint32(shardID)), zap.String("node", nodeName)) - } - - return shardNodes, nil -} diff --git a/horaemeta/server/coordinator/scheduler/nodepicker/node_picker_test.go b/horaemeta/server/coordinator/scheduler/nodepicker/node_picker_test.go deleted file mode 100644 index 014aac10ec..0000000000 --- a/horaemeta/server/coordinator/scheduler/nodepicker/node_picker_test.go +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package nodepicker_test - -import ( - "context" - "fmt" - "strconv" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -const ( - nodeLength = 3 - selectOnlineNodeIndex = 1 - defaultTotalShardNum = 10 -) - -func TestNodePicker(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - nodePicker := nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()) - - var nodes []metadata.RegisteredNode - config := nodepicker.Config{ - NumTotalShards: defaultTotalShardNum, - ShardAffinityRule: nil, - } - _, err := nodePicker.PickNode(ctx, config, []storage.ShardID{0}, nodes) - re.Error(err) - - for i := 0; i < nodeLength; i++ { - node := storage.Node{ - Name: strconv.Itoa(i), - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: generateLastTouchTime(time.Minute), - State: storage.NodeStateUnknown, - } - nodes = append(nodes, metadata.RegisteredNode{ - Node: node, - ShardInfos: nil, - }) - } - _, err = nodePicker.PickNode(ctx, config, []storage.ShardID{0}, nodes) - re.Error(err) - - nodes = nodes[:0] - for i := 0; i < nodeLength; i++ { - node := storage.Node{ - Name: strconv.Itoa(i), - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: generateLastTouchTime(0), - State: storage.NodeStateUnknown, - } - nodes = append(nodes, metadata.RegisteredNode{ - Node: node, - ShardInfos: nil, - }) - } - _, err = nodePicker.PickNode(ctx, config, []storage.ShardID{0}, nodes) - re.NoError(err) - - nodes = nodes[:0] - for i := 0; i < nodeLength; i++ { - node := storage.Node{ - Name: strconv.Itoa(i), - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: generateLastTouchTime(time.Minute), - State: storage.NodeStateUnknown, - } - nodes = append(nodes, metadata.RegisteredNode{ - Node: node, - ShardInfos: nil, - }) - } - nodes[selectOnlineNodeIndex].Node.LastTouchTime = uint64(time.Now().UnixMilli()) - shardNodeMapping, err := nodePicker.PickNode(ctx, config, []storage.ShardID{0}, nodes) - re.NoError(err) - re.Equal(strconv.Itoa(selectOnlineNodeIndex), shardNodeMapping[0].Node.Name) -} - -func TestUniformity(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - nodePicker := nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()) - mapping := allocShards(ctx, nodePicker, 30, 256, re) - maxShardNum := 256/30 + 1 - for _, shards := range mapping { - re.LessOrEqual(len(shards), maxShardNum) - } - - // Verify that the result of hash remains unchanged through the same nodes and shards. - t.Log("Verify that the result of hash remains unchanged through the same nodes and shards") - newMapping := allocShards(ctx, nodePicker, 30, 256, re) - maxShardNum = 256/30 + 1 - for _, shards := range newMapping { - re.LessOrEqual(len(shards), maxShardNum) - } - - for nodeName, shardIds := range mapping { - newShardIDs := newMapping[nodeName] - diffShardID := diffShardIds(shardIds, newShardIDs) - println(fmt.Sprintf("diff shardID, nodeName:%s, diffShardIDs:%d", nodeName, len(diffShardID))) - re.Equal(0, len(diffShardID)) - } - - // Add new node and testing shard rebalanced. - t.Log("Add new node and testing shard rebalanced") - newMapping = allocShards(ctx, nodePicker, 31, 256, re) - maxShardNum = 256/31 + 1 - for _, shards := range newMapping { - re.LessOrEqual(len(shards), maxShardNum) - } - maxDiffNum := 5 - for nodeName, shardIds := range mapping { - newShardIDs := newMapping[nodeName] - diffShardID := diffShardIds(shardIds, newShardIDs) - re.LessOrEqual(len(diffShardID), maxDiffNum) - } - - // Add new shard and testing shard rebalanced. - t.Log("Add new shard and testing shard rebalanced") - newShardMapping := allocShards(ctx, nodePicker, 30, 257, re) - maxShardNum = 257/31 + 1 - for _, shards := range newShardMapping { - re.LessOrEqual(len(shards), maxShardNum) - } - maxDiffNum = 5 - for nodeName, shardIds := range newShardMapping { - newShardIDs := newMapping[nodeName] - diffShardID := diffShardIds(shardIds, newShardIDs) - re.LessOrEqual(len(diffShardID), maxDiffNum) - } -} - -func allocShards(ctx context.Context, nodePicker nodepicker.NodePicker, nodeNum int, shardNum int, re *require.Assertions) map[string][]int { - var nodes []metadata.RegisteredNode - for i := 0; i < nodeNum; i++ { - node := storage.Node{ - Name: strconv.Itoa(i), - NodeStats: storage.NewEmptyNodeStats(), - LastTouchTime: generateLastTouchTime(0), - State: storage.NodeStateUnknown, - } - nodes = append(nodes, metadata.RegisteredNode{ - Node: node, - ShardInfos: nil, - }) - } - mapping := make(map[string][]int, 0) - shardIDs := make([]storage.ShardID, 0, shardNum) - for i := 0; i < shardNum; i++ { - shardIDs = append(shardIDs, storage.ShardID(i)) - } - config := nodepicker.Config{ - NumTotalShards: uint32(shardNum), - ShardAffinityRule: nil, - } - shardNodeMapping, err := nodePicker.PickNode(ctx, config, shardIDs, nodes) - re.NoError(err) - for shardID, node := range shardNodeMapping { - mapping[node.Node.Name] = append(mapping[node.Node.Name], int(shardID)) - } - - return mapping -} - -func generateLastTouchTime(duration time.Duration) uint64 { - return uint64(time.Now().UnixMilli() - int64(duration)) -} - -func diffShardIds(oldShardIDs, newShardIDs []int) []int { - diff := make(map[int]bool, 0) - for i := 0; i < len(oldShardIDs); i++ { - diff[oldShardIDs[i]] = false - } - for i := 0; i < len(newShardIDs); i++ { - if diff[newShardIDs[i]] == false { - diff[newShardIDs[i]] = true - } - } - - var result []int - for k, v := range diff { - if !v { - result = append(result, k) - } - } - return result -} diff --git a/horaemeta/server/coordinator/scheduler/rebalanced/scheduler.go b/horaemeta/server/coordinator/scheduler/rebalanced/scheduler.go deleted file mode 100644 index 5a8a48f2ff..0000000000 --- a/horaemeta/server/coordinator/scheduler/rebalanced/scheduler.go +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package rebalanced - -import ( - "context" - "fmt" - "maps" - "strings" - "sync" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/storage" - "go.uber.org/zap" -) - -type schedulerImpl struct { - logger *zap.Logger - factory *coordinator.Factory - nodePicker nodepicker.NodePicker - procedureExecutingBatchSize uint32 - - // The lock is used to protect following fields. - lock sync.Mutex - // latestShardNodeMapping is used to record last stable shard topology, - // when enableSchedule is true, rebalancedShardScheduler will recover cluster according to the topology. - latestShardNodeMapping map[storage.ShardID]metadata.RegisteredNode - // The `latestShardNodeMapping` will be used directly, if enableSchedule is set. - enableSchedule bool - // shardAffinityRule is used to control the shard distribution. - shardAffinityRule map[storage.ShardID]scheduler.ShardAffinity -} - -func NewShardScheduler(logger *zap.Logger, factory *coordinator.Factory, nodePicker nodepicker.NodePicker, procedureExecutingBatchSize uint32) scheduler.Scheduler { - return &schedulerImpl{ - logger: logger, - factory: factory, - nodePicker: nodePicker, - procedureExecutingBatchSize: procedureExecutingBatchSize, - lock: sync.Mutex{}, - latestShardNodeMapping: map[storage.ShardID]metadata.RegisteredNode{}, - enableSchedule: false, - shardAffinityRule: map[storage.ShardID]scheduler.ShardAffinity{}, - } -} - -func (r *schedulerImpl) Name() string { - return "rebalanced_scheduler" -} - -func (r *schedulerImpl) UpdateEnableSchedule(_ context.Context, enable bool) { - r.updateEnableSchedule(enable) -} - -func (r *schedulerImpl) AddShardAffinityRule(_ context.Context, rule scheduler.ShardAffinityRule) error { - r.lock.Lock() - defer r.lock.Unlock() - - for _, shardAffinity := range rule.Affinities { - r.shardAffinityRule[shardAffinity.ShardID] = shardAffinity - } - - return nil -} - -func (r *schedulerImpl) RemoveShardAffinityRule(_ context.Context, shardID storage.ShardID) error { - r.lock.Lock() - defer r.lock.Unlock() - - delete(r.shardAffinityRule, shardID) - - return nil -} - -func (r *schedulerImpl) ListShardAffinityRule(_ context.Context) (scheduler.ShardAffinityRule, error) { - r.lock.Lock() - defer r.lock.Unlock() - - affinities := make([]scheduler.ShardAffinity, 0, len(r.shardAffinityRule)) - for _, affinity := range r.shardAffinityRule { - affinities = append(affinities, affinity) - } - - return scheduler.ShardAffinityRule{Affinities: affinities}, nil -} - -func (r *schedulerImpl) Schedule(ctx context.Context, clusterSnapshot metadata.Snapshot) (scheduler.ScheduleResult, error) { - var emptySchedulerRes scheduler.ScheduleResult - // RebalancedShardScheduler can only be scheduled when the cluster is not empty. - if clusterSnapshot.Topology.ClusterView.State == storage.ClusterStateEmpty { - return emptySchedulerRes, nil - } - - var procedures []procedure.Procedure - var reasons strings.Builder - - // ShardNodeMapping only update when enableSchedule is false. - shardNodeMapping, err := r.generateLatestShardNodeMapping(ctx, clusterSnapshot) - if err != nil { - return emptySchedulerRes, nil - } - - numShards := uint32(len(clusterSnapshot.Topology.ShardViewsMapping)) - // Generate assigned shards mapping and transfer leader if node is changed. - assignedShardIDs := make(map[storage.ShardID]struct{}, numShards) - for _, shardNode := range clusterSnapshot.Topology.ClusterView.ShardNodes { - if len(procedures) >= int(r.procedureExecutingBatchSize) { - r.logger.Warn("procedure length reached procedure executing batch size", zap.Uint32("procedureExecutingBatchSize", r.procedureExecutingBatchSize)) - break - } - - // Mark the shard assigned. - assignedShardIDs[shardNode.ID] = struct{}{} - newLeaderNode, ok := shardNodeMapping[shardNode.ID] - assert.Assert(ok) - if newLeaderNode.Node.Name != shardNode.NodeName { - r.logger.Info("rebalanced shard scheduler try to assign shard to another node", zap.Uint64("shardID", uint64(shardNode.ID)), zap.String("originNode", shardNode.NodeName), zap.String("newNode", newLeaderNode.Node.Name)) - p, err := r.factory.CreateTransferLeaderProcedure(ctx, coordinator.TransferLeaderRequest{ - Snapshot: clusterSnapshot, - ShardID: shardNode.ID, - OldLeaderNodeName: shardNode.NodeName, - NewLeaderNodeName: newLeaderNode.Node.Name, - }) - if err != nil { - return emptySchedulerRes, err - } - - procedures = append(procedures, p) - reasons.WriteString(fmt.Sprintf("shard is transferred to another node, shardID:%d, oldNode:%s, newNode:%s\n", shardNode.ID, shardNode.NodeName, newLeaderNode.Node.Name)) - } - } - - // Check whether the assigned shard needs to be reopened. - for id := uint32(0); id < numShards; id++ { - if len(procedures) >= int(r.procedureExecutingBatchSize) { - r.logger.Warn("procedure length reached procedure executing batch size", zap.Uint32("procedureExecutingBatchSize", r.procedureExecutingBatchSize)) - break - } - - shardID := storage.ShardID(id) - if _, assigned := assignedShardIDs[shardID]; !assigned { - node, ok := r.latestShardNodeMapping[shardID] - assert.Assert(ok) - - r.logger.Info("rebalanced shard scheduler try to assign unassigned shard to node", zap.Uint32("shardID", id), zap.String("node", node.Node.Name)) - p, err := r.factory.CreateTransferLeaderProcedure(ctx, coordinator.TransferLeaderRequest{ - Snapshot: clusterSnapshot, - ShardID: shardID, - OldLeaderNodeName: "", - NewLeaderNodeName: node.Node.Name, - }) - if err != nil { - return emptySchedulerRes, err - } - - procedures = append(procedures, p) - reasons.WriteString(fmt.Sprintf("shard is assigned to a node, shardID:%d, node:%s\n", shardID, node.Node.Name)) - } - } - - if len(procedures) == 0 { - return emptySchedulerRes, nil - } - - batchProcedure, err := r.factory.CreateBatchTransferLeaderProcedure(ctx, coordinator.BatchRequest{ - Batch: procedures, - BatchType: procedure.TransferLeader, - }) - if err != nil { - return emptySchedulerRes, err - } - - return scheduler.ScheduleResult{Procedure: batchProcedure, Reason: reasons.String()}, nil -} - -func (r *schedulerImpl) generateLatestShardNodeMapping(ctx context.Context, snapshot metadata.Snapshot) (map[storage.ShardID]metadata.RegisteredNode, error) { - numShards := uint32(len(snapshot.Topology.ShardViewsMapping)) - // TODO: Improve scheduling efficiency and verify whether the topology changes. - shardIDs := make([]storage.ShardID, 0, numShards) - for shardID := range snapshot.Topology.ShardViewsMapping { - shardIDs = append(shardIDs, shardID) - } - - r.lock.Lock() - defer r.lock.Unlock() - var err error - shardNodeMapping := r.latestShardNodeMapping - if !r.enableSchedule { - pickConfig := nodepicker.Config{ - NumTotalShards: numShards, - ShardAffinityRule: maps.Clone(r.shardAffinityRule), - } - shardNodeMapping, err = r.nodePicker.PickNode(ctx, pickConfig, shardIDs, snapshot.RegisteredNodes) - if err != nil { - return nil, err - } - r.latestShardNodeMapping = shardNodeMapping - } - - return shardNodeMapping, nil -} - -func (r *schedulerImpl) updateEnableSchedule(enableSchedule bool) { - r.lock.Lock() - defer r.lock.Unlock() - - r.enableSchedule = enableSchedule -} diff --git a/horaemeta/server/coordinator/scheduler/rebalanced/scheduler_test.go b/horaemeta/server/coordinator/scheduler/rebalanced/scheduler_test.go deleted file mode 100644 index 8c9315724e..0000000000 --- a/horaemeta/server/coordinator/scheduler/rebalanced/scheduler_test.go +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package rebalanced_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/rebalanced" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -func TestRebalancedScheduler(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - // EmptyCluster would be scheduled an empty procedure. - emptyCluster := test.InitEmptyCluster(ctx, t) - procedureFactory := coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), emptyCluster.GetMetadata()) - s := rebalanced.NewShardScheduler(zap.NewNop(), procedureFactory, nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()), 1) - result, err := s.Schedule(ctx, emptyCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) - re.Empty(result) - - // PrepareCluster would be scheduled an empty procedure. - prepareCluster := test.InitPrepareCluster(ctx, t) - procedureFactory = coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), prepareCluster.GetMetadata()) - s = rebalanced.NewShardScheduler(zap.NewNop(), procedureFactory, nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()), 1) - _, err = s.Schedule(ctx, prepareCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) - - // StableCluster with all shards assigned would be scheduled a load balance procedure. - stableCluster := test.InitStableCluster(ctx, t) - procedureFactory = coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), stableCluster.GetMetadata()) - s = rebalanced.NewShardScheduler(zap.NewNop(), procedureFactory, nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()), 1) - _, err = s.Schedule(ctx, stableCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) -} diff --git a/horaemeta/server/coordinator/scheduler/reopen/scheduler.go b/horaemeta/server/coordinator/scheduler/reopen/scheduler.go deleted file mode 100644 index 74535e4825..0000000000 --- a/horaemeta/server/coordinator/scheduler/reopen/scheduler.go +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package reopen - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/storage" -) - -// schedulerImpl used to reopen shards in status PartitionOpen. -type schedulerImpl struct { - factory *coordinator.Factory - procedureExecutingBatchSize uint32 -} - -func NewShardScheduler(factory *coordinator.Factory, procedureExecutingBatchSize uint32) scheduler.Scheduler { - return schedulerImpl{ - factory: factory, - procedureExecutingBatchSize: procedureExecutingBatchSize, - } -} - -func (r schedulerImpl) Name() string { - return "reopen_scheduler" -} - -func (r schedulerImpl) UpdateEnableSchedule(_ context.Context, _ bool) { - // ReopenShardScheduler do not need enableSchedule. -} - -func (r schedulerImpl) AddShardAffinityRule(_ context.Context, _ scheduler.ShardAffinityRule) error { - return nil -} - -func (r schedulerImpl) RemoveShardAffinityRule(_ context.Context, _ storage.ShardID) error { - return nil -} - -func (r schedulerImpl) ListShardAffinityRule(_ context.Context) (scheduler.ShardAffinityRule, error) { - return scheduler.ShardAffinityRule{Affinities: []scheduler.ShardAffinity{}}, nil -} - -func (r schedulerImpl) Schedule(ctx context.Context, clusterSnapshot metadata.Snapshot) (scheduler.ScheduleResult, error) { - var scheduleRes scheduler.ScheduleResult - // ReopenShardScheduler can only be scheduled when the cluster is stable. - if !clusterSnapshot.Topology.IsStable() { - return scheduleRes, nil - } - now := time.Now() - - var procedures []procedure.Procedure - var reasons strings.Builder - - for _, registeredNode := range clusterSnapshot.RegisteredNodes { - if registeredNode.IsExpired(now) { - continue - } - - for _, shardInfo := range registeredNode.ShardInfos { - if !needReopen(shardInfo) { - continue - } - p, err := r.factory.CreateTransferLeaderProcedure(ctx, coordinator.TransferLeaderRequest{ - Snapshot: clusterSnapshot, - ShardID: shardInfo.ID, - OldLeaderNodeName: "", - NewLeaderNodeName: registeredNode.Node.Name, - }) - if err != nil { - return scheduleRes, err - } - - procedures = append(procedures, p) - reasons.WriteString(fmt.Sprintf("the shard needs to be reopen , shardID:%d, shardStatus:%d, node:%s.", shardInfo.ID, shardInfo.Status, registeredNode.Node.Name)) - if len(procedures) >= int(r.procedureExecutingBatchSize) { - break - } - } - } - - if len(procedures) == 0 { - return scheduleRes, nil - } - - batchProcedure, err := r.factory.CreateBatchTransferLeaderProcedure(ctx, coordinator.BatchRequest{ - Batch: procedures, - BatchType: procedure.TransferLeader, - }) - if err != nil { - return scheduleRes, err - } - - scheduleRes = scheduler.ScheduleResult{ - Procedure: batchProcedure, - Reason: reasons.String(), - } - return scheduleRes, nil -} - -func needReopen(shardInfo metadata.ShardInfo) bool { - return shardInfo.Status == storage.ShardStatusPartialOpen -} diff --git a/horaemeta/server/coordinator/scheduler/reopen/scheduler_test.go b/horaemeta/server/coordinator/scheduler/reopen/scheduler_test.go deleted file mode 100644 index 2b23051a70..0000000000 --- a/horaemeta/server/coordinator/scheduler/reopen/scheduler_test.go +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package reopen_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/reopen" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -func TestReopenShardScheduler(t *testing.T) { - re := require.New(t) - ctx := context.Background() - emptyCluster := test.InitEmptyCluster(ctx, t) - - procedureFactory := coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), emptyCluster.GetMetadata()) - - s := reopen.NewShardScheduler(procedureFactory, 1) - - // ReopenShardScheduler should not schedule when cluster is not stable. - result, err := s.Schedule(ctx, emptyCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) - re.Nil(result.Procedure) - - stableCluster := test.InitStableCluster(ctx, t) - snapshot := stableCluster.GetMetadata().GetClusterSnapshot() - - // Add shard with ready status. - snapshot.RegisteredNodes[0].ShardInfos = append(snapshot.RegisteredNodes[0].ShardInfos, metadata.ShardInfo{ - ID: 0, - Role: storage.ShardRoleLeader, - Version: 0, - Status: storage.ShardStatusReady, - }) - re.NoError(err) - re.Nil(result.Procedure) - - // Add shard with partitionOpen status. - snapshot.RegisteredNodes[0].ShardInfos = append(snapshot.RegisteredNodes[0].ShardInfos, metadata.ShardInfo{ - ID: 1, - Role: storage.ShardRoleLeader, - Version: 0, - Status: storage.ShardStatusPartialOpen, - }) - result, err = s.Schedule(ctx, snapshot) - re.NoError(err) - re.NotNil(result.Procedure) -} diff --git a/horaemeta/server/coordinator/scheduler/scheduler.go b/horaemeta/server/coordinator/scheduler/scheduler.go deleted file mode 100644 index 3d5d0fe5bc..0000000000 --- a/horaemeta/server/coordinator/scheduler/scheduler.go +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package scheduler - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/storage" -) - -type ScheduleResult struct { - Procedure procedure.Procedure - // The reason that the procedure is generated for. - Reason string -} - -type ShardAffinity struct { - ShardID storage.ShardID `json:"shardID"` - NumAllowedOtherShards uint `json:"numAllowedOtherShards"` -} - -type ShardAffinityRule struct { - Affinities []ShardAffinity -} - -type Scheduler interface { - Name() string - // Schedule will generate procedure based on current cluster snapshot, which will be submitted to ProcedureManager, and whether it is actually executed depends on the current state of ProcedureManager. - Schedule(ctx context.Context, clusterSnapshot metadata.Snapshot) (ScheduleResult, error) - // UpdateEnableSchedule is used to update enableSchedule for scheduler, - // EnableSchedule means that the cluster topology is locked and the mapping between shards and nodes cannot be changed. - UpdateEnableSchedule(ctx context.Context, enable bool) - AddShardAffinityRule(ctx context.Context, rule ShardAffinityRule) error - RemoveShardAffinityRule(ctx context.Context, shardID storage.ShardID) error - ListShardAffinityRule(ctx context.Context) (ShardAffinityRule, error) -} diff --git a/horaemeta/server/coordinator/scheduler/static/error.go b/horaemeta/server/coordinator/scheduler/static/error.go deleted file mode 100644 index 97bcf66a57..0000000000 --- a/horaemeta/server/coordinator/scheduler/static/error.go +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package static - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ErrNotImplemented = coderr.NewCodeError(coderr.ErrNotImplemented, "no") diff --git a/horaemeta/server/coordinator/scheduler/static/scheduler.go b/horaemeta/server/coordinator/scheduler/static/scheduler.go deleted file mode 100644 index 2da891dd7b..0000000000 --- a/horaemeta/server/coordinator/scheduler/static/scheduler.go +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package static - -import ( - "cmp" - "context" - "fmt" - "slices" - "strings" - "time" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" -) - -type schedulerImpl struct { - factory *coordinator.Factory - nodePicker nodepicker.NodePicker - procedureExecutingBatchSize uint32 -} - -func NewShardScheduler(factory *coordinator.Factory, nodePicker nodepicker.NodePicker, procedureExecutingBatchSize uint32) scheduler.Scheduler { - return schedulerImpl{factory: factory, nodePicker: nodePicker, procedureExecutingBatchSize: procedureExecutingBatchSize} -} - -func (s schedulerImpl) Name() string { - return "static_scheduler" -} - -func (s schedulerImpl) UpdateEnableSchedule(_ context.Context, _ bool) { - // StaticTopologyShardScheduler do not need EnableSchedule. -} - -func (s schedulerImpl) AddShardAffinityRule(_ context.Context, _ scheduler.ShardAffinityRule) error { - return ErrNotImplemented.WithCausef("static topology scheduler doesn't support shard affinity") -} - -func (s schedulerImpl) RemoveShardAffinityRule(_ context.Context, _ storage.ShardID) error { - return ErrNotImplemented.WithCausef("static topology scheduler doesn't support shard affinity") -} - -func (s schedulerImpl) ListShardAffinityRule(_ context.Context) (scheduler.ShardAffinityRule, error) { - var emptyRule scheduler.ShardAffinityRule - return emptyRule, ErrNotImplemented.WithCausef("static topology scheduler doesn't support shard affinity") -} - -func (s schedulerImpl) Schedule(ctx context.Context, clusterSnapshot metadata.Snapshot) (scheduler.ScheduleResult, error) { - var procedures []procedure.Procedure - var reasons strings.Builder - var emptyScheduleRes scheduler.ScheduleResult - - switch clusterSnapshot.Topology.ClusterView.State { - case storage.ClusterStateEmpty: - return emptyScheduleRes, nil - case storage.ClusterStatePrepare: - unassignedShardIds := make([]storage.ShardID, 0, len(clusterSnapshot.Topology.ShardViewsMapping)) - for _, shardView := range clusterSnapshot.Topology.ShardViewsMapping { - _, exists := findNodeByShard(shardView.ShardID, clusterSnapshot.Topology.ClusterView.ShardNodes) - if exists { - continue - } - unassignedShardIds = append(unassignedShardIds, shardView.ShardID) - } - pickConfig := nodepicker.Config{ - NumTotalShards: uint32(len(clusterSnapshot.Topology.ShardViewsMapping)), - ShardAffinityRule: map[storage.ShardID]scheduler.ShardAffinity{}, - } - // Assign shards - shardNodeMapping, err := s.nodePicker.PickNode(ctx, pickConfig, unassignedShardIds, clusterSnapshot.RegisteredNodes) - if err != nil { - return emptyScheduleRes, err - } - for shardID, node := range shardNodeMapping { - // Shard exists and ShardNode not exists. - p, err := s.factory.CreateTransferLeaderProcedure(ctx, coordinator.TransferLeaderRequest{ - Snapshot: clusterSnapshot, - ShardID: shardID, - OldLeaderNodeName: "", - NewLeaderNodeName: node.Node.Name, - }) - if err != nil { - return emptyScheduleRes, err - } - procedures = append(procedures, p) - reasons.WriteString(fmt.Sprintf("Cluster initialization, assign shard to node, shardID:%d, nodeName:%s. ", shardID, node.Node.Name)) - if len(procedures) >= int(s.procedureExecutingBatchSize) { - break - } - } - case storage.ClusterStateStable: - for i := 0; i < len(clusterSnapshot.Topology.ClusterView.ShardNodes); i++ { - shardNode := clusterSnapshot.Topology.ClusterView.ShardNodes[i] - node, err := findOnlineNodeByName(shardNode.NodeName, clusterSnapshot.RegisteredNodes) - if err != nil { - continue - } - if !containsShard(node.ShardInfos, shardNode.ID) { - // Shard need to be reopened - p, err := s.factory.CreateTransferLeaderProcedure(ctx, coordinator.TransferLeaderRequest{ - Snapshot: clusterSnapshot, - ShardID: shardNode.ID, - OldLeaderNodeName: "", - NewLeaderNodeName: node.Node.Name, - }) - if err != nil { - return emptyScheduleRes, err - } - procedures = append(procedures, p) - reasons.WriteString(fmt.Sprintf("Cluster recover, assign shard to node, shardID:%d, nodeName:%s. ", shardNode.ID, node.Node.Name)) - if len(procedures) >= int(s.procedureExecutingBatchSize) { - break - } - } - } - } - - if len(procedures) == 0 { - return emptyScheduleRes, nil - } - - batchProcedure, err := s.factory.CreateBatchTransferLeaderProcedure(ctx, coordinator.BatchRequest{ - Batch: procedures, - BatchType: procedure.TransferLeader, - }) - if err != nil { - return emptyScheduleRes, err - } - - return scheduler.ScheduleResult{Procedure: batchProcedure, Reason: reasons.String()}, nil -} - -func findOnlineNodeByName(nodeName string, nodes []metadata.RegisteredNode) (metadata.RegisteredNode, error) { - now := time.Now() - for i := 0; i < len(nodes); i++ { - node := nodes[i] - if node.IsExpired(now) { - continue - } - if node.Node.Name == nodeName { - return node, nil - } - } - - return metadata.RegisteredNode{}, errors.WithMessagef(metadata.ErrNodeNotFound, "node:%s not found in topology", nodeName) -} - -func containsShard(shardInfos []metadata.ShardInfo, shardID storage.ShardID) bool { - for i := 0; i < len(shardInfos); i++ { - if shardInfos[i].ID == shardID { - return true - } - } - return false -} - -func findNodeByShard(shardID storage.ShardID, shardNodes []storage.ShardNode) (storage.ShardNode, bool) { - n, found := slices.BinarySearchFunc(shardNodes, shardID, func(node storage.ShardNode, id storage.ShardID) int { - return cmp.Compare(node.ID, id) - }) - if !found { - var emptyShardNode storage.ShardNode - return emptyShardNode, false - } - return shardNodes[n], true -} diff --git a/horaemeta/server/coordinator/scheduler/static/scheduler_test.go b/horaemeta/server/coordinator/scheduler/static/scheduler_test.go deleted file mode 100644 index d1c9b83eeb..0000000000 --- a/horaemeta/server/coordinator/scheduler/static/scheduler_test.go +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package static_test - -import ( - "context" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/nodepicker" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler/static" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -func TestStaticTopologyScheduler(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - // EmptyCluster would be scheduled an empty procedure. - emptyCluster := test.InitEmptyCluster(ctx, t) - procedureFactory := coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), emptyCluster.GetMetadata()) - s := static.NewShardScheduler(procedureFactory, nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()), 1) - result, err := s.Schedule(ctx, emptyCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) - re.Empty(result) - - // PrepareCluster would be scheduled a transfer leader procedure. - prepareCluster := test.InitPrepareCluster(ctx, t) - procedureFactory = coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), prepareCluster.GetMetadata()) - s = static.NewShardScheduler(procedureFactory, nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()), 1) - result, err = s.Schedule(ctx, prepareCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) - re.NotEmpty(result) - - // StableCluster with all shards assigned would be scheduled a transfer leader procedure by hash rule. - stableCluster := test.InitStableCluster(ctx, t) - procedureFactory = coordinator.NewFactory(zap.NewNop(), test.MockIDAllocator{}, test.MockDispatch{}, test.NewTestStorage(t), stableCluster.GetMetadata()) - s = static.NewShardScheduler(procedureFactory, nodepicker.NewConsistentUniformHashNodePicker(zap.NewNop()), 1) - result, err = s.Schedule(ctx, stableCluster.GetMetadata().GetClusterSnapshot()) - re.NoError(err) - re.NotEmpty(result) -} diff --git a/horaemeta/server/coordinator/shard_picker.go b/horaemeta/server/coordinator/shard_picker.go deleted file mode 100644 index bc5325f027..0000000000 --- a/horaemeta/server/coordinator/shard_picker.go +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator - -import ( - "context" - "sort" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/pkg/errors" -) - -// ShardPicker is used to pick up the shards suitable for scheduling in the cluster. -// If expectShardNum bigger than cluster node number, the result depends on enableDuplicateNode: -// TODO: Consider refactor this interface, abstracts the parameters of PickShards as PickStrategy. -type ShardPicker interface { - PickShards(ctx context.Context, snapshot metadata.Snapshot, expectShardNum int) ([]storage.ShardNode, error) -} - -// LeastTableShardPicker selects shards based on the number of tables on the current shard, -// and always selects the shard with the smallest number of current tables. -type leastTableShardPicker struct{} - -func NewLeastTableShardPicker() ShardPicker { - return &leastTableShardPicker{} -} - -func (l leastTableShardPicker) PickShards(_ context.Context, snapshot metadata.Snapshot, expectShardNum int) ([]storage.ShardNode, error) { - if len(snapshot.Topology.ClusterView.ShardNodes) == 0 { - return nil, errors.WithMessage(ErrNodeNumberNotEnough, "no shard is assigned") - } - - shardNodeMapping := make(map[storage.ShardID]storage.ShardNode, len(snapshot.Topology.ShardViewsMapping)) - sortedShardsByTableCount := make([]storage.ShardID, 0, len(snapshot.Topology.ShardViewsMapping)) - for _, shardNode := range snapshot.Topology.ClusterView.ShardNodes { - shardNodeMapping[shardNode.ID] = shardNode - // Only collect the shards witch has been allocated to a node. - sortedShardsByTableCount = append(sortedShardsByTableCount, shardNode.ID) - } - - // Sort shard by table number, - // the shard with the smallest number of tables is at the front of the array. - sort.SliceStable(sortedShardsByTableCount, func(i, j int) bool { - shardView1 := snapshot.Topology.ShardViewsMapping[sortedShardsByTableCount[i]] - shardView2 := snapshot.Topology.ShardViewsMapping[sortedShardsByTableCount[j]] - // When the number of tables is the same, sort according to the size of ShardID. - if len(shardView1.TableIDs) == len(shardView2.TableIDs) { - return shardView1.ShardID < shardView2.ShardID - } - return len(shardView1.TableIDs) < len(shardView2.TableIDs) - }) - - result := make([]storage.ShardNode, 0, expectShardNum) - - for i := 0; i < expectShardNum; i++ { - selectShardID := sortedShardsByTableCount[i%len(sortedShardsByTableCount)] - shardNode, ok := shardNodeMapping[selectShardID] - assert.Assert(ok) - result = append(result, shardNode) - } - - return result, nil -} diff --git a/horaemeta/server/coordinator/shard_picker_test.go b/horaemeta/server/coordinator/shard_picker_test.go deleted file mode 100644 index 9cbcc4d1c0..0000000000 --- a/horaemeta/server/coordinator/shard_picker_test.go +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package coordinator_test - -import ( - "context" - "sort" - "testing" - - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure/test" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/stretchr/testify/require" -) - -func TestLeastTableShardPicker(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - c := test.InitStableCluster(ctx, t) - snapshot := c.GetMetadata().GetClusterSnapshot() - - shardPicker := coordinator.NewLeastTableShardPicker() - - shardNodes, err := shardPicker.PickShards(ctx, snapshot, 4) - re.NoError(err) - re.Equal(len(shardNodes), 4) - // Each shardNode should be different shard. - shardIDs := map[storage.ShardID]struct{}{} - for _, shardNode := range shardNodes { - shardIDs[shardNode.ID] = struct{}{} - } - re.Equal(len(shardIDs), 4) - - shardNodes, err = shardPicker.PickShards(ctx, snapshot, 7) - re.NoError(err) - re.Equal(len(shardNodes), 7) - // Each shardNode should be different shard. - shardIDs = map[storage.ShardID]struct{}{} - for _, shardNode := range shardNodes { - shardIDs[shardNode.ID] = struct{}{} - } - re.Equal(len(shardIDs), 4) - - // Create table on shard 0. - _, err = c.GetMetadata().CreateTable(ctx, metadata.CreateTableRequest{ - ShardID: 0, - LatestVersion: 0, - SchemaName: test.TestSchemaName, - TableName: "test", - PartitionInfo: storage.PartitionInfo{ - Info: nil, - }, - }) - re.NoError(err) - - // shard 0 should not exist in pick result. - shardNodes, err = shardPicker.PickShards(ctx, snapshot, 3) - re.NoError(err) - re.Equal(len(shardNodes), 3) - for _, shardNode := range shardNodes { - re.NotEqual(shardNode.ID, 0) - } - - // drop shard node 1, shard 1 should not be picked. - for _, shardNode := range snapshot.Topology.ClusterView.ShardNodes { - if shardNode.ID == 1 { - err = c.GetMetadata().DropShardNodes(ctx, []storage.ShardNode{shardNode}) - re.NoError(err) - } - } - shardNodes, err = shardPicker.PickShards(ctx, snapshot, 8) - re.NoError(err) - for _, shardNode := range shardNodes { - re.NotEqual(shardNode.ID, 1) - } - - checkPartitionTable(ctx, shardPicker, t, 50, 256, 20, 2) - checkPartitionTable(ctx, shardPicker, t, 50, 256, 30, 2) - checkPartitionTable(ctx, shardPicker, t, 50, 256, 40, 2) - checkPartitionTable(ctx, shardPicker, t, 50, 256, 50, 2) -} - -func checkPartitionTable(ctx context.Context, shardPicker coordinator.ShardPicker, t *testing.T, nodeNumber int, shardNumber int, subTableNumber int, maxDifference int) { - re := require.New(t) - - var shardNodes []storage.ShardNode - - c := test.InitStableClusterWithConfig(ctx, t, nodeNumber, shardNumber) - shardNodes, err := shardPicker.PickShards(ctx, c.GetMetadata().GetClusterSnapshot(), subTableNumber) - re.NoError(err) - - nodeTableCountMapping := make(map[string]int, 0) - for _, shardNode := range shardNodes { - nodeTableCountMapping[shardNode.NodeName]++ - } - - // Ensure the difference in the number of tables is no greater than maxDifference - var nodeTableNumberSlice []int - for _, tableNumber := range nodeTableCountMapping { - nodeTableNumberSlice = append(nodeTableNumberSlice, tableNumber) - } - sort.Ints(nodeTableNumberSlice) - minTableNumber := nodeTableNumberSlice[0] - maxTableNumber := nodeTableNumberSlice[len(nodeTableNumberSlice)-1] - re.LessOrEqual(maxTableNumber-minTableNumber, maxDifference) -} diff --git a/horaemeta/server/coordinator/watch/watch.go b/horaemeta/server/coordinator/watch/watch.go deleted file mode 100644 index 18b1a0eacd..0000000000 --- a/horaemeta/server/coordinator/watch/watch.go +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package watch - -import ( - "context" - "fmt" - "strconv" - "strings" - "sync" - - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaeventpb" - "github.com/pkg/errors" - "go.etcd.io/etcd/api/v3/mvccpb" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" - "google.golang.org/protobuf/proto" -) - -const ( - shardPath = "shards" - keySep = "/" -) - -type ShardRegisterEvent struct { - clusterName string - ShardID storage.ShardID - NewLeaderNode string -} - -type ShardExpireEvent struct { - clusterName string - ShardID storage.ShardID - OldLeaderNode string -} - -type ShardEventCallback interface { - OnShardRegistered(ctx context.Context, event ShardRegisterEvent) error - OnShardExpired(ctx context.Context, event ShardExpireEvent) error -} - -type ShardWatch interface { - Start(ctx context.Context) error - Stop(ctx context.Context) error - RegisteringEventCallback(eventCallback ShardEventCallback) -} - -// EtcdShardWatch used to watch the distributed lock of shard, and provide the corresponding callback function. -type EtcdShardWatch struct { - logger *zap.Logger - clusterName string - rootPath string - etcdClient *clientv3.Client - eventCallbacks []ShardEventCallback - - lock sync.RWMutex - isRunning bool - cancel context.CancelFunc -} - -type NoopShardWatch struct{} - -func NewNoopShardWatch() ShardWatch { - return NoopShardWatch{} -} - -func (n NoopShardWatch) Start(_ context.Context) error { - return nil -} - -func (n NoopShardWatch) Stop(_ context.Context) error { - return nil -} - -func (n NoopShardWatch) RegisteringEventCallback(_ ShardEventCallback) {} - -func NewEtcdShardWatch(logger *zap.Logger, clusterName string, rootPath string, client *clientv3.Client) ShardWatch { - return &EtcdShardWatch{ - logger: logger, - clusterName: clusterName, - rootPath: rootPath, - etcdClient: client, - eventCallbacks: []ShardEventCallback{}, - - lock: sync.RWMutex{}, - isRunning: false, - cancel: nil, - } -} - -func (w *EtcdShardWatch) Start(ctx context.Context) error { - w.lock.Lock() - defer w.lock.Unlock() - - shardKeyPrefix := encodeShardKeyPrefix(w.rootPath, w.clusterName, shardPath) - if err := w.startWatch(ctx, shardKeyPrefix); err != nil { - return errors.WithMessage(err, "etcd register watch failed") - } - if w.isRunning { - return nil - } - - w.isRunning = true - return nil -} - -func (w *EtcdShardWatch) Stop(_ context.Context) error { - w.lock.Lock() - defer w.lock.Unlock() - - w.cancel() - - w.isRunning = false - return nil -} - -func (w *EtcdShardWatch) RegisteringEventCallback(eventCallback ShardEventCallback) { - w.eventCallbacks = append(w.eventCallbacks, eventCallback) -} - -func (w *EtcdShardWatch) startWatch(ctx context.Context, path string) error { - w.logger.Info("register shard watch", zap.String("watchPath", path)) - go func() { - ctxWithCancel, cancel := context.WithCancel(ctx) - w.cancel = cancel - respChan := w.etcdClient.Watch(ctxWithCancel, path, clientv3.WithPrefix(), clientv3.WithPrevKV()) - for resp := range respChan { - for _, event := range resp.Events { - if err := w.processEvent(ctx, event); err != nil { - w.logger.Error("process event", zap.Error(err)) - } - } - } - }() - return nil -} - -func (w *EtcdShardWatch) processEvent(ctx context.Context, event *clientv3.Event) error { - switch event.Type { - case mvccpb.DELETE: - shardID, err := decodeShardKey(string(event.Kv.Key)) - if err != nil { - return err - } - shardLockValue, err := convertShardLockValueToPB(event.PrevKv.Value) - if err != nil { - return err - } - w.logger.Info("receive delete event", zap.String("preKV", fmt.Sprintf("%v", event.PrevKv)), zap.String("event", fmt.Sprintf("%v", event)), zap.Uint64("shardID", shardID), zap.String("oldLeader", shardLockValue.NodeName)) - for _, callback := range w.eventCallbacks { - if err := callback.OnShardExpired(ctx, ShardExpireEvent{ - clusterName: w.clusterName, - ShardID: storage.ShardID(shardID), - OldLeaderNode: shardLockValue.NodeName, - }); err != nil { - return err - } - } - case mvccpb.PUT: - shardID, err := decodeShardKey(string(event.Kv.Key)) - if err != nil { - return err - } - shardLockValue, err := convertShardLockValueToPB(event.Kv.Value) - if err != nil { - return err - } - w.logger.Info("receive put event", zap.String("event", fmt.Sprintf("%v", event)), zap.Uint64("shardID", shardID), zap.String("oldLeader", shardLockValue.NodeName)) - for _, callback := range w.eventCallbacks { - if err := callback.OnShardRegistered(ctx, ShardRegisterEvent{ - clusterName: w.clusterName, - ShardID: storage.ShardID(shardID), - NewLeaderNode: shardLockValue.NodeName, - }); err != nil { - return err - } - } - } - return nil -} - -func decodeShardKey(keyPath string) (uint64, error) { - pathList := strings.Split(keyPath, keySep) - shardID, err := strconv.ParseUint(pathList[len(pathList)-1], 10, 64) - if err != nil { - return 0, errors.WithMessage(err, "decode etcd event key failed") - } - return shardID, nil -} - -func encodeShardKeyPrefix(rootPath, shardPath, clusterName string) string { - return strings.Join([]string{rootPath, shardPath, clusterName}, keySep) -} - -func encodeShardKey(rootPath, shardPath, clusterName string, shardID uint64) string { - shardKeyPrefix := encodeShardKeyPrefix(rootPath, shardPath, clusterName) - return strings.Join([]string{shardKeyPrefix, strconv.FormatUint(shardID, 10)}, keySep) -} - -func convertShardLockValueToPB(value []byte) (*metaeventpb.ShardLockValue, error) { - shardLockValue := &metaeventpb.ShardLockValue{} - if err := proto.Unmarshal(value, shardLockValue); err != nil { - return shardLockValue, errors.WithMessage(err, "unmarshal shardLockValue failed") - } - return shardLockValue, nil -} diff --git a/horaemeta/server/coordinator/watch/watch_test.go b/horaemeta/server/coordinator/watch/watch_test.go deleted file mode 100644 index 41e6628d56..0000000000 --- a/horaemeta/server/coordinator/watch/watch_test.go +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package watch - -import ( - "context" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaeventpb" - "github.com/stretchr/testify/require" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" - "google.golang.org/protobuf/proto" -) - -const ( - TestClusterName = "defaultCluster" - TestRootPath = "/rootPath" - TestShardPath = "shards" - TestShardID = 1 - TestNodeName = "testNode" -) - -func TestWatch(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - _, client, _ := etcdutil.PrepareEtcdServerAndClient(t) - watch := NewEtcdShardWatch(zap.NewNop(), TestClusterName, TestRootPath, client) - err := watch.Start(ctx) - re.NoError(err) - - testCallback := testShardEventCallback{ - result: 0, - re: re, - } - - watch.RegisteringEventCallback(&testCallback) - - // Valid that callback function is executed and the params are as expected. - b, err := proto.Marshal(&metaeventpb.ShardLockValue{NodeName: TestNodeName}) - re.NoError(err) - - keyPath := encodeShardKey(TestRootPath, TestClusterName, TestShardPath, TestShardID) - _, err = client.Put(ctx, keyPath, string(b)) - re.NoError(err) - time.Sleep(time.Millisecond * 10) - re.Equal(2, testCallback.result) - - _, err = client.Delete(ctx, keyPath, clientv3.WithPrevKV()) - re.NoError(err) - time.Sleep(time.Millisecond * 10) - re.Equal(1, testCallback.result) -} - -type testShardEventCallback struct { - result int - re *require.Assertions -} - -func (c *testShardEventCallback) OnShardRegistered(_ context.Context, event ShardRegisterEvent) error { - c.result = 2 - c.re.Equal(storage.ShardID(TestShardID), event.ShardID) - c.re.Equal(TestNodeName, event.NewLeaderNode) - return nil -} - -func (c *testShardEventCallback) OnShardExpired(_ context.Context, event ShardExpireEvent) error { - c.result = 1 - c.re.Equal(storage.ShardID(TestShardID), event.ShardID) - c.re.Equal(TestNodeName, event.OldLeaderNode) - return nil -} diff --git a/horaemeta/server/error.go b/horaemeta/server/error.go deleted file mode 100644 index 8d7113ddc3..0000000000 --- a/horaemeta/server/error.go +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package server - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrCreateEtcdClient = coderr.NewCodeError(coderr.Internal, "create etcd etcdCli") - ErrStartEtcd = coderr.NewCodeError(coderr.Internal, "start embed etcd") - ErrStartEtcdTimeout = coderr.NewCodeError(coderr.Internal, "start etcd server timeout") - ErrStartServer = coderr.NewCodeError(coderr.Internal, "start server") - ErrFlowLimiterNotFound = coderr.NewCodeError(coderr.Internal, "flow limiter not found") -) diff --git a/horaemeta/server/etcdutil/config.go b/horaemeta/server/etcdutil/config.go deleted file mode 100644 index 6f271b4dec..0000000000 --- a/horaemeta/server/etcdutil/config.go +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package etcdutil - -import ( - "fmt" - "net/url" - "os" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/tikv/pd/pkg/tempurl" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/server/v3/embed" -) - -type CloseFn = func() - -// NewTestSingleConfig is used to create an etcd config for the unit test purpose. -func NewTestSingleConfig() *embed.Config { - cfg := embed.NewConfig() - cfg.Name = "test_etcd" - cfg.Dir, _ = os.MkdirTemp("/tmp", "test_etcd") - cfg.WalDir = "" - cfg.Logger = "zap" - cfg.LogOutputs = []string{"stdout"} - - pu, _ := url.Parse(tempurl.Alloc()) - cfg.ListenPeerUrls = []url.URL{*pu} - cfg.AdvertisePeerUrls = cfg.ListenPeerUrls - cu, _ := url.Parse(tempurl.Alloc()) - cfg.ListenClientUrls = []url.URL{*cu} - cfg.AdvertiseClientUrls = cfg.ListenClientUrls - - cfg.StrictReconfigCheck = false - cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, &cfg.ListenPeerUrls[0]) - cfg.ClusterState = embed.ClusterStateFlagNew - return cfg -} - -// CleanConfig is used to clean the etcd data for the unit test purpose. -func CleanConfig(cfg *embed.Config) { - // Clean data directory - os.RemoveAll(cfg.Dir) -} - -// PrepareEtcdServerAndClient makes the server and client for testing. -// -// Caller should take responsibilities to close the server and client. -func PrepareEtcdServerAndClient(t *testing.T) (*embed.Etcd, *clientv3.Client, CloseFn) { - cfg := NewTestSingleConfig() - etcd, err := embed.StartEtcd(cfg) - assert.NoError(t, err) - - <-etcd.Server.ReadyNotify() - - endpoint := cfg.ListenClientUrls[0].String() - client, err := clientv3.New(clientv3.Config{ - Endpoints: []string{endpoint}, - }) - assert.NoError(t, err) - - closeSrv := func() { - etcd.Close() - CleanConfig(cfg) - } - return etcd, client, closeSrv -} diff --git a/horaemeta/server/etcdutil/error.go b/horaemeta/server/etcdutil/error.go deleted file mode 100644 index dd6bb6915a..0000000000 --- a/horaemeta/server/etcdutil/error.go +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package etcdutil - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrEtcdKVGet = coderr.NewCodeError(coderr.Internal, "etcd KV get failed") - ErrEtcdKVGetResponse = coderr.NewCodeError(coderr.Internal, "etcd invalid get value response must only one") - ErrEtcdKVGetNotFound = coderr.NewCodeError(coderr.Internal, "etcd KV get value not found") -) diff --git a/horaemeta/server/etcdutil/get_leader.go b/horaemeta/server/etcdutil/get_leader.go deleted file mode 100644 index d7918f063a..0000000000 --- a/horaemeta/server/etcdutil/get_leader.go +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package etcdutil - -import ( - "go.etcd.io/etcd/server/v3/etcdserver" -) - -type EtcdLeaderGetter interface { - EtcdLeaderID() (uint64, error) -} - -type LeaderGetterWrapper struct { - Server *etcdserver.EtcdServer -} - -func (w *LeaderGetterWrapper) EtcdLeaderID() (uint64, error) { - return w.Server.Lead(), nil -} diff --git a/horaemeta/server/etcdutil/util.go b/horaemeta/server/etcdutil/util.go deleted file mode 100644 index 62ef077390..0000000000 --- a/horaemeta/server/etcdutil/util.go +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package etcdutil - -import ( - "context" - "path" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -func Get(ctx context.Context, client *clientv3.Client, key string) (string, error) { - resp, err := client.Get(ctx, key) - if err != nil { - return "", ErrEtcdKVGet.WithCause(err) - } - if n := len(resp.Kvs); n == 0 { - return "", ErrEtcdKVGetNotFound - } else if n > 1 { - return "", ErrEtcdKVGetResponse.WithCausef("%v", resp.Kvs) - } - - return string(resp.Kvs[0].Value), nil -} - -func List(ctx context.Context, client *clientv3.Client, prefix string) ([]string, error) { - resp, err := client.Get(ctx, prefix, clientv3.WithPrefix(), clientv3.WithKeysOnly()) - if err != nil { - return []string{}, ErrEtcdKVGet.WithCause(err) - } - var result []string - for _, kv := range resp.Kvs { - result = append(result, string(kv.Key)) - } - return result, nil -} - -func Scan(ctx context.Context, client *clientv3.Client, startKey, endKey string, batchSize int, do func(key string, val []byte) error) error { - withRange := clientv3.WithRange(endKey) - withLimit := clientv3.WithLimit(int64(batchSize)) - - // Take a special process for the first batch. - resp, err := client.Get(ctx, startKey, withRange, withLimit) - if err != nil { - return ErrEtcdKVGet.WithCause(err) - } - if len(resp.Kvs) == 0 { - return nil - } - - doIfNotEndKey := func(key, val []byte) error { - // TODO: avoid such a copy on key. - keyStr := string(key) - if keyStr == endKey { - return nil - } - - return do(keyStr, val) - } - - for _, item := range resp.Kvs { - err := doIfNotEndKey(item.Key, item.Value) - if err != nil { - return err - } - } - - lastKeyInPrevBatch := string(resp.Kvs[len(resp.Kvs)-1].Key) - // The following batches always contain one key in the previous batch, so we have to increment the batchSize to batchSize + 1; - withLimit = clientv3.WithLimit(int64(batchSize + 1)) - for { - if lastKeyInPrevBatch == endKey { - log.Warn("Stop scanning because the end key is reached", zap.String("endKey", endKey)) - return nil - } - startKey = lastKeyInPrevBatch - - // Get the keys range [startKey, endKey). - resp, err := client.Get(ctx, startKey, withRange, withLimit) - if err != nil { - return ErrEtcdKVGet.WithCause(err) - } - - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - - if len(resp.Kvs) <= 1 { - // The only one key is `startKey` which is actually processed already. - return nil - } - - // Skip the first key which is processed already. - for _, item := range resp.Kvs[1:] { - err := doIfNotEndKey(item.Key, item.Value) - if err != nil { - return err - } - } - - // Check whether the keys are exhausted. - if len(resp.Kvs) < batchSize { - return nil - } - - lastKeyInPrevBatch = string(resp.Kvs[len(resp.Kvs)-1].Key) - } -} - -func ScanWithPrefix(ctx context.Context, client *clientv3.Client, prefix string, do func(key string, val []byte) error) error { - rangeEnd := clientv3.GetPrefixRangeEnd(prefix) - resp, err := client.Get(ctx, prefix, clientv3.WithRange(rangeEnd)) - if err != nil { - return ErrEtcdKVGet.WithCause(err) - } - // Check whether the keys are exhausted. - if len(resp.Kvs) == 0 { - return nil - } - - for _, item := range resp.Kvs { - err := do(string(item.Key), item.Value) - if err != nil { - return err - } - } - - return nil -} - -// GetLastPathSegment get the last path segment from completePath, path is split by '/'. -func GetLastPathSegment(completePath string) string { - return path.Base(path.Clean(completePath)) -} diff --git a/horaemeta/server/etcdutil/util_test.go b/horaemeta/server/etcdutil/util_test.go deleted file mode 100644 index 6b816a4a54..0000000000 --- a/horaemeta/server/etcdutil/util_test.go +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package etcdutil - -import ( - "context" - "fmt" - "testing" - - "github.com/stretchr/testify/require" -) - -func makeTestKeys(num int) []string { - keys := make([]string, 0, num) - for idx := 0; idx < num; idx++ { - keys = append(keys, fmt.Sprintf("%010d", idx)) - } - - return keys -} - -// Put some keys and scan all of them successfully. -func TestScanNormal(t *testing.T) { - r := require.New(t) - - _, client, closeSrv := PrepareEtcdServerAndClient(t) - defer closeSrv() - - keys := makeTestKeys(51) - lastKey := keys[len(keys)-1] - keys = keys[0 : len(keys)-1] - ctx := context.Background() - - // Put the keys. - for _, key := range keys { - // Let the value equal key for simplicity. - val := key - _, err := client.Put(ctx, key, val) - r.NoError(err) - } - - // Scan the keys with different batch size. - batchSizes := []int{1, 10, 12, 30, 50, 90} - startKey, endKey := keys[0], lastKey - collectedKeys := make([]string, 0, len(keys)) - for _, batchSz := range batchSizes { - collectedKeys = collectedKeys[:0] - - do := func(key string, value []byte) error { - r.Equal(key, string(value)) - - collectedKeys = append(collectedKeys, key) - return nil - } - err := Scan(ctx, client, startKey, endKey, batchSz, do) - r.NoError(err) - - r.Equal(collectedKeys, keys) - } -} - -// Test the cases where scan fails. -func TestScanFailed(t *testing.T) { - r := require.New(t) - - _, client, closeSrv := PrepareEtcdServerAndClient(t) - defer closeSrv() - - keys := makeTestKeys(50) - ctx := context.Background() - - // Put the keys. - for _, key := range keys { - // Let the value equal key for simplicity. - val := key - _, err := client.Put(ctx, key, val) - r.NoError(err) - } - - fakeErr := fmt.Errorf("fake error for mock failed scan") - do := func(key string, value []byte) error { - if key > keys[len(keys)/2] { - return fakeErr - } - return nil - } - startKey, endKey := keys[0], keys[len(keys)-1] - err := Scan(ctx, client, startKey, endKey, 10, do) - r.Equal(fakeErr, err) -} - -func TestScanWithPrefix(t *testing.T) { - r := require.New(t) - - _, client, closeSrv := PrepareEtcdServerAndClient(t) - defer closeSrv() - ctx := context.Background() - - // Build keys with different prefix. - keys := []string{} - keys = append(keys, "/prefix/0") - keys = append(keys, "/prefix/1") - keys = append(keys, "/diff/0") - - // Put the keys. - for _, key := range keys { - // Let the value equal key for simplicity. - val := key - _, err := client.Put(ctx, key, val) - r.NoError(err) - } - - var scanResult []string - do := func(key string, value []byte) error { - scanResult = append(scanResult, key) - return nil - } - err := ScanWithPrefix(ctx, client, "/prefix", do) - r.NoError(err) - r.Equal(len(scanResult), 2) -} - -func TestGetLastPathSegment(t *testing.T) { - r := require.New(t) - - path := "/prefix/a/b/c" - lastPathSegment := GetLastPathSegment(path) - r.Equal("c", lastPathSegment) -} diff --git a/horaemeta/server/id/error.go b/horaemeta/server/id/error.go deleted file mode 100644 index af3a3ea518..0000000000 --- a/horaemeta/server/id/error.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package id - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrTxnPutEndID = coderr.NewCodeError(coderr.Internal, "put end id in txn") - ErrAllocID = coderr.NewCodeError(coderr.Internal, "alloc id") - ErrCollectID = coderr.NewCodeError(coderr.Internal, "collect invalid id") - ErrCollectNotSupported = coderr.NewCodeError(coderr.Internal, "collect is not supported") -) diff --git a/horaemeta/server/id/id.go b/horaemeta/server/id/id.go deleted file mode 100644 index f9552187fc..0000000000 --- a/horaemeta/server/id/id.go +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package id - -import "context" - -// Allocator defines the id allocator on the horaedb cluster meta info. -type Allocator interface { - // Alloc allocs a unique id. - Alloc(ctx context.Context) (uint64, error) - - // Collect collect unused id to reused in alloc - Collect(ctx context.Context, id uint64) error -} diff --git a/horaemeta/server/id/id_impl.go b/horaemeta/server/id/id_impl.go deleted file mode 100644 index 002ffbb16b..0000000000 --- a/horaemeta/server/id/id_impl.go +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package id - -import ( - "context" - "fmt" - "strconv" - "sync" - - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/client/v3/clientv3util" - "go.uber.org/zap" -) - -type AllocatorImpl struct { - logger *zap.Logger - // RWMutex is used to protect following fields. - lock sync.Mutex - base uint64 - end uint64 - - kv clientv3.KV - key string - allocStep uint - isInitialized bool -} - -func NewAllocatorImpl(logger *zap.Logger, kv clientv3.KV, key string, allocStep uint) Allocator { - return &AllocatorImpl{ - logger: logger, - lock: sync.Mutex{}, - base: 0, - end: 0, - kv: kv, - key: key, - allocStep: allocStep, - isInitialized: false, - } -} - -func (a *AllocatorImpl) isExhausted() bool { - return a.base == a.end -} - -func (a *AllocatorImpl) Alloc(ctx context.Context) (uint64, error) { - a.lock.Lock() - defer a.lock.Unlock() - - if !a.isInitialized { - if err := a.slowRebaseLocked(ctx); err != nil { - return 0, errors.WithMessage(err, "alloc id") - } - a.isInitialized = true - } - - if a.isExhausted() { - if err := a.fastRebaseLocked(ctx); err != nil { - a.logger.Warn("fast rebase failed", zap.Error(err)) - - if err = a.slowRebaseLocked(ctx); err != nil { - return 0, errors.WithMessage(err, "alloc id") - } - } - } - - ret := a.base - a.base++ - return ret, nil -} - -func (a *AllocatorImpl) Collect(_ context.Context, _ uint64) error { - return ErrCollectNotSupported -} - -func (a *AllocatorImpl) slowRebaseLocked(ctx context.Context) error { - resp, err := a.kv.Get(ctx, a.key) - if err != nil { - a.logger.Error("get end id", zap.String("resp", fmt.Sprintf("%v", resp)), zap.String("key", a.key)) - return errors.WithMessagef(err, "get end id failed, key:%s", a.key) - } - - if n := len(resp.Kvs); n > 1 { - return etcdutil.ErrEtcdKVGetResponse.WithCausef("%v", resp.Kvs) - } - - // Key is not exist, create key in kv storage. - if len(resp.Kvs) == 0 { - return a.firstDoRebaseLocked(ctx) - } - - currEnd := string(resp.Kvs[0].Value) - return a.doRebaseLocked(ctx, decodeID(a.logger, currEnd)) -} - -func (a *AllocatorImpl) fastRebaseLocked(ctx context.Context) error { - return a.doRebaseLocked(ctx, a.end) -} - -func (a *AllocatorImpl) firstDoRebaseLocked(ctx context.Context) error { - newEnd := a.allocStep - - keyMissing := clientv3util.KeyMissing(a.key) - opPutEnd := clientv3.OpPut(a.key, encodeID(uint64(newEnd))) - - resp, err := a.kv.Txn(ctx). - If(keyMissing). - Then(opPutEnd). - Commit() - if err != nil { - return errors.WithMessagef(err, "put end id failed, key:%s", a.key) - } else if !resp.Succeeded { - return ErrTxnPutEndID.WithCausef("txn put end id failed, key is exist, key:%s, resp:%v", a.key, resp) - } - - a.end = uint64(newEnd) - - a.logger.Info("Allocator allocates a new base id", zap.String("key", a.key), zap.Uint64("id", a.base)) - return nil -} - -func (a *AllocatorImpl) doRebaseLocked(ctx context.Context, currEnd uint64) error { - if currEnd < a.base { - return ErrAllocID.WithCausef("ID in storage can't less than memory, base:%d, end:%d", a.base, currEnd) - } - - newEnd := currEnd + uint64(a.allocStep) - - endEquals := clientv3.Compare(clientv3.Value(a.key), "=", encodeID(currEnd)) - opPutEnd := clientv3.OpPut(a.key, encodeID(newEnd)) - - resp, err := a.kv.Txn(ctx). - If(endEquals). - Then(opPutEnd). - Commit() - if err != nil { - return errors.WithMessagef(err, "put end id failed, key:%s, old value:%d, new value:%d", a.key, currEnd, newEnd) - } else if !resp.Succeeded { - return ErrTxnPutEndID.WithCausef("txn put end id failed, endEquals failed, key:%s, value:%d, resp:%v", a.key, currEnd, resp) - } - - a.base = currEnd - a.end = newEnd - - a.logger.Info("Allocator allocates a new base id", zap.String("key", a.key), zap.Uint64("id", a.base)) - - return nil -} - -func encodeID(value uint64) string { - return fmt.Sprintf("%d", value) -} - -func decodeID(logger *zap.Logger, value string) uint64 { - res, err := strconv.ParseUint(value, 10, 64) - if err != nil { - logger.Error("convert string to int failed", zap.Error(err), zap.String("val", value)) - } - return res -} diff --git a/horaemeta/server/id/id_test.go b/horaemeta/server/id/id_test.go deleted file mode 100644 index 7d42785b33..0000000000 --- a/horaemeta/server/id/id_test.go +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package id - -import ( - "context" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/stretchr/testify/require" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -const ( - defaultRequestTimeout = time.Second * 30 - defaultStep = 100 - defaultRootPath = "/meta" - defaultAllocIDKey = "/id" -) - -func TestMultipleAllocBasedOnKV(t *testing.T) { - start := 0 - size := 201 - _, kv, closeSrv := etcdutil.PrepareEtcdServerAndClient(t) - defer closeSrv() - - testAllocIDValue(t, kv, start, size) - testAllocIDValue(t, kv, ((start+size)/defaultStep+1)*defaultStep, size) -} - -func testAllocIDValue(t *testing.T, kv clientv3.KV, start, size int) { - re := require.New(t) - alloc := NewAllocatorImpl(zap.NewNop(), kv, defaultRootPath+defaultAllocIDKey, defaultStep) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - - for i := start; i < start+size; i++ { - value, err := alloc.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(i), value) - } -} diff --git a/horaemeta/server/id/reusable_id_impl.go b/horaemeta/server/id/reusable_id_impl.go deleted file mode 100644 index 0512980c60..0000000000 --- a/horaemeta/server/id/reusable_id_impl.go +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package id - -import ( - "context" - "sort" - "sync" -) - -type ReusableAllocatorImpl struct { - // Mutex is used to protect following fields. - lock sync.Mutex - - minID uint64 - existIDs *OrderedList -} - -type OrderedList struct { - sorted []uint64 -} - -// FindMinHoleValueAndIndex Find the minimum hole value and its index. -// If the list is empty, then return min value and 0 as index; -// If no hole is found, then return the `last_value + 1` in the list and l.Len() as the index; -func (l *OrderedList) FindMinHoleValueAndIndex(min uint64) (uint64, int) { - if len(l.sorted) == 0 { - return min, 0 - } - if l.sorted[0] > min { - return min, 0 - } - if len(l.sorted) == 1 { - return l.sorted[0] + 1, 1 - } - - s := l.sorted - for i := 0; i < len(l.sorted)-1; i++ { - if s[i]+1 != s[i+1] { - return s[i] + 1, i + 1 - } - } - - return s[len(s)-1] + 1, len(s) -} - -// Insert the value at the idx whose correctness should be ensured by the caller. -func (l *OrderedList) Insert(v uint64, i int) { - if len(l.sorted) == i { - l.sorted = append(l.sorted, v) - } else { - l.sorted = append(l.sorted[:i+1], l.sorted[i:]...) - l.sorted[i] = v - } -} - -func (l *OrderedList) Remove(v uint64) int { - removeIndex := -1 - for i, value := range l.sorted { - if value == v { - removeIndex = i - } - } - l.sorted = append(l.sorted[:removeIndex], l.sorted[removeIndex+1:]...) - return removeIndex -} - -func NewReusableAllocatorImpl(existIDs []uint64, minID uint64) Allocator { - sort.Slice(existIDs, func(i, j int) bool { - return existIDs[i] < existIDs[j] - }) - return &ReusableAllocatorImpl{ - lock: sync.Mutex{}, - - minID: minID, - existIDs: &OrderedList{sorted: existIDs}, - } -} - -func (a *ReusableAllocatorImpl) Alloc(_ context.Context) (uint64, error) { - a.lock.Lock() - defer a.lock.Unlock() - // Find minimum unused ID bigger than minID - v, i := a.existIDs.FindMinHoleValueAndIndex(a.minID) - a.existIDs.Insert(v, i) - return v, nil -} - -func (a *ReusableAllocatorImpl) Collect(_ context.Context, id uint64) error { - a.lock.Lock() - defer a.lock.Unlock() - a.existIDs.Remove(id) - return nil -} diff --git a/horaemeta/server/id/reusable_id_test.go b/horaemeta/server/id/reusable_id_test.go deleted file mode 100644 index 78b30dde0e..0000000000 --- a/horaemeta/server/id/reusable_id_test.go +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package id - -import ( - "context" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestAlloc(t *testing.T) { - re := require.New(t) - ctx := context.Background() - - allocor := NewReusableAllocatorImpl([]uint64{}, uint64(1)) - // IDs: [] - // Alloc: 1 - // IDs: [1] - id, err := allocor.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(1), id) - - // IDs: [1] - // Alloc: 2 - // IDs: [1,2] - id, err = allocor.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(2), id) - - // IDs: [1,2] - // Collect: 2 - // IDs: [1] - err = allocor.Collect(ctx, uint64(2)) - re.NoError(err) - - // IDs: [1] - // Alloc: 2 - // IDs: [1,2] - id, err = allocor.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(2), id) - - // IDs: [1,2,3,5,6] - allocor = NewReusableAllocatorImpl([]uint64{1, 2, 3, 5, 6}, uint64(1)) - - // IDs: [1,2,3,5,6] - // Alloc: 4 - // IDs: [1,2,3,4,5,6] - id, err = allocor.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(4), id) - - // IDs: [1,2,3,4,5,6] - // Alloc: 7 - // IDs: [1,2,3,4,5,6,7] - id, err = allocor.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(7), id) - - // IDs: [1,2,3,4,5,6,7] - // Collect: 1 - // IDs: [2,3,4,5,6,7] - err = allocor.Collect(ctx, uint64(1)) - re.NoError(err) - - // IDs: [2,3,4,5,6,7] - // Alloc: 1 - // IDs: [1,2,3,4,5,6,7] - id, err = allocor.Alloc(ctx) - re.NoError(err) - re.Equal(uint64(1), id) -} diff --git a/horaemeta/server/limiter/limiter.go b/horaemeta/server/limiter/limiter.go deleted file mode 100644 index 3e3892930c..0000000000 --- a/horaemeta/server/limiter/limiter.go +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package limiter - -import ( - "sync" - - "github.com/apache/incubator-horaedb-meta/server/config" - "golang.org/x/time/rate" -) - -type FlowLimiter struct { - // enable is used to control the switch of the limiter. - enable bool - l *rate.Limiter - // RWMutex is used to protect following fields. - lock sync.RWMutex - // limit is the updated rate of tokens. - limit int - // burst is the maximum number of tokens. - burst int -} - -func NewFlowLimiter(config config.LimiterConfig) *FlowLimiter { - newLimiter := rate.NewLimiter(rate.Limit(config.Limit), config.Burst) - - return &FlowLimiter{ - enable: config.Enable, - l: newLimiter, - lock: sync.RWMutex{}, - limit: config.Limit, - burst: config.Burst, - } -} - -func (f *FlowLimiter) Allow() bool { - if !f.enable { - return true - } - return f.l.Allow() -} - -func (f *FlowLimiter) UpdateLimiter(config config.LimiterConfig) error { - f.lock.Lock() - defer f.lock.Unlock() - - f.enable = config.Enable - f.l.SetLimit(rate.Limit(config.Limit)) - f.l.SetBurst(config.Burst) - f.limit = config.Limit - f.burst = config.Burst - return nil -} - -func (f *FlowLimiter) GetConfig() *config.LimiterConfig { - return &config.LimiterConfig{ - Enable: f.enable, - Limit: f.limit, - Burst: f.burst, - } -} diff --git a/horaemeta/server/limiter/limiter_test.go b/horaemeta/server/limiter/limiter_test.go deleted file mode 100644 index 6e71816376..0000000000 --- a/horaemeta/server/limiter/limiter_test.go +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package limiter - -import ( - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/config" - "github.com/stretchr/testify/require" -) - -const ( - defaultInitialLimiterRate = 10 * 1000 - defaultInitialLimiterCapacity = 1000 - defaultEnableLimiter = true - defaultUpdateLimiterRate = 100 * 1000 - defaultUpdateLimiterCapacity = 100 * 1000 -) - -func TestFlowLimiter(t *testing.T) { - re := require.New(t) - flowLimiter := NewFlowLimiter(config.LimiterConfig{ - Limit: defaultInitialLimiterRate, - Burst: defaultInitialLimiterCapacity, - Enable: defaultEnableLimiter, - }) - - for i := 0; i < defaultInitialLimiterCapacity; i++ { - flag := flowLimiter.Allow() - re.Equal(true, flag) - } - - time.Sleep(time.Millisecond) - for i := 0; i < defaultInitialLimiterRate/1000; i++ { - flag := flowLimiter.Allow() - re.Equal(true, flag) - } - - err := flowLimiter.UpdateLimiter(config.LimiterConfig{ - Limit: defaultUpdateLimiterRate, - Burst: defaultUpdateLimiterCapacity, - Enable: defaultEnableLimiter, - }) - re.NoError(err) - - limiter := flowLimiter.GetConfig() - re.Equal(defaultUpdateLimiterRate, limiter.Limit) - re.Equal(defaultUpdateLimiterCapacity, limiter.Burst) - re.Equal(defaultEnableLimiter, limiter.Enable) - - time.Sleep(time.Millisecond) - for i := 0; i < defaultUpdateLimiterRate/1000; i++ { - flag := flowLimiter.Allow() - re.Equal(true, flag) - } -} diff --git a/horaemeta/server/member/error.go b/horaemeta/server/member/error.go deleted file mode 100644 index e7520fa545..0000000000 --- a/horaemeta/server/member/error.go +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package member - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrResetLeader = coderr.NewCodeError(coderr.Internal, "reset leader by deleting leader key") - ErrGetLeader = coderr.NewCodeError(coderr.Internal, "get leader by querying leader key") - ErrTxnPutLeader = coderr.NewCodeError(coderr.Internal, "put leader key in txn") - ErrMultipleLeader = coderr.NewCodeError(coderr.Internal, "multiple leaders found") - ErrInvalidLeaderValue = coderr.NewCodeError(coderr.Internal, "invalid leader value") - ErrMarshalMember = coderr.NewCodeError(coderr.Internal, "marshal member information") - ErrGrantLease = coderr.NewCodeError(coderr.Internal, "grant lease") - ErrRevokeLease = coderr.NewCodeError(coderr.Internal, "revoke lease") - ErrCloseLease = coderr.NewCodeError(coderr.Internal, "close lease") -) diff --git a/horaemeta/server/member/lease.go b/horaemeta/server/member/lease.go deleted file mode 100644 index 306525c55b..0000000000 --- a/horaemeta/server/member/lease.go +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package member - -import ( - "context" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -// lease helps use etcd lease by providing Grant, Close and auto renewing the lease. -type lease struct { - rawLease clientv3.Lease - // timeout is the rpc timeout and always equals to the ttlSec. - timeout time.Duration - ttlSec int64 - // logger will be updated after Grant is called. - logger *zap.Logger - - // The fields below are initialized after Grant is called. - ID clientv3.LeaseID - - expireTimeL sync.RWMutex - // expireTime helps determine the lease whether is expired. - expireTime time.Time -} - -func newLease(rawLease clientv3.Lease, ttlSec int64) *lease { - return &lease{ - rawLease: rawLease, - timeout: time.Duration(ttlSec) * time.Second, - ttlSec: ttlSec, - logger: log.GetLogger(), - - ID: 0, - expireTimeL: sync.RWMutex{}, - expireTime: time.Time{}, - } -} - -type renewLeaseResult int - -const ( - renewLeaseAlive renewLeaseResult = iota - renewLeaseFailed - renewLeaseExpired -) - -func (r renewLeaseResult) failed() bool { - return r == renewLeaseFailed -} - -func (r renewLeaseResult) alive() bool { - return r == renewLeaseAlive -} - -func (l *lease) Grant(ctx context.Context) error { - ctx, cancel := context.WithTimeout(ctx, l.timeout) - defer cancel() - leaseResp, err := l.rawLease.Grant(ctx, l.ttlSec) - if err != nil { - return ErrGrantLease.WithCause(err) - } - - l.ID = leaseResp.ID - l.logger = log.With(zap.Int64("lease-id", int64(leaseResp.ID))) - - expiredAt := time.Now().Add(time.Second * time.Duration(leaseResp.TTL)) - l.setExpireTime(expiredAt) - - l.logger.Debug("lease is granted", zap.Time("expired-at", expiredAt)) - return nil -} - -func (l *lease) Close(ctx context.Context) error { - // Check whether the lease was granted. - if l.ID == 0 { - return nil - } - - // Release and reset all the resources. - l.setExpireTime(time.Time{}) - ctx, cancel := context.WithTimeout(ctx, l.timeout) - defer cancel() - if _, err := l.rawLease.Revoke(ctx, l.ID); err != nil { - return ErrRevokeLease.WithCause(err) - } - if err := l.rawLease.Close(); err != nil { - return ErrCloseLease.WithCause(err) - } - - return nil -} - -// KeepAlive renews the lease until timeout for renewing lease. -func (l *lease) KeepAlive(ctx context.Context) { - // Used to receive the renewed event. - renewed := make(chan bool, 1) - ctx1, cancelRenewBg := context.WithCancel(ctx) - // Used to join with the renew goroutine in the background. - wg := sync.WaitGroup{} - wg.Add(1) - go func() { - l.renewLeaseBg(ctx1, l.timeout/3, renewed) - wg.Done() - }() - -L: - for { - select { - case alive := <-renewed: - l.logger.Debug("received renew result", zap.Bool("renew-alive", alive)) - if !alive { - break L - } - case <-time.After(l.timeout): - l.logger.Warn("lease timeout, stop keeping lease alive") - break L - case <-ctx.Done(): - l.logger.Info("stop keeping lease alive because ctx is done") - break L - } - } - - cancelRenewBg() - wg.Wait() -} - -// IsExpired is goroutine safe. -func (l *lease) IsExpired() bool { - expiredAt := l.getExpireTime() - return time.Now().After(expiredAt) -} - -func (l *lease) setExpireTime(newExpireTime time.Time) { - l.expireTimeL.Lock() - defer l.expireTimeL.Unlock() - - l.expireTime = newExpireTime -} - -// `setExpireTimeIfNewer` updates the l.expireTime only if the newExpireTime is after l.expireTime. -// Returns true if l.expireTime is updated. -func (l *lease) setExpireTimeIfNewer(newExpireTime time.Time) bool { - l.expireTimeL.Lock() - defer l.expireTimeL.Unlock() - - if newExpireTime.After(l.expireTime) { - l.expireTime = newExpireTime - return true - } - - return false -} - -func (l *lease) getExpireTime() time.Time { - l.expireTimeL.RLock() - defer l.expireTimeL.RUnlock() - - return l.expireTime -} - -// `renewLeaseBg` keeps the lease alive by periodically call `lease.KeepAliveOnce`. -// The l.expireTime will be updated during renewing and the renew lease result (whether alive) will be told to caller by `renewed` channel. -func (l *lease) renewLeaseBg(ctx context.Context, interval time.Duration, renewed chan<- bool) { - l.logger.Info("start renewing lease background", zap.Duration("interval", interval)) - defer l.logger.Info("stop renewing lease background", zap.Duration("interval", interval)) - -L: - for { - renewOnce := func() renewLeaseResult { - start := time.Now() - ctx1, cancel := context.WithTimeout(ctx, l.timeout) - defer cancel() - resp, err := l.rawLease.KeepAliveOnce(ctx1, l.ID) - if err != nil { - l.logger.Error("lease keep alive failed", zap.Error(err)) - return renewLeaseFailed - } - if resp.TTL < 0 { - l.logger.Warn("lease is expired") - return renewLeaseExpired - } - - expireAt := start.Add(time.Duration(resp.TTL) * time.Second) - updated := l.setExpireTimeIfNewer(expireAt) - l.logger.Debug("got next expired time", zap.Time("expired-at", expireAt), zap.Bool("updated", updated)) - return renewLeaseAlive - } - - renewRes := renewOnce() - - // Init the timer for next keep alive action. - t := time.After(interval) - - if !renewRes.failed() { - // Notify result of the renew. - select { - case renewed <- renewRes.alive(): - case <-ctx.Done(): - break L - } - } - - // Wait for next keep alive action. - select { - case <-t: - case <-ctx.Done(): - break L - } - } -} diff --git a/horaemeta/server/member/member.go b/horaemeta/server/member/member.go deleted file mode 100644 index 9239498144..0000000000 --- a/horaemeta/server/member/member.go +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package member - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metastoragepb" - "github.com/pkg/errors" - "go.etcd.io/etcd/api/v3/mvccpb" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" - "google.golang.org/protobuf/proto" -) - -const leaderCheckInterval = time.Duration(100) * time.Millisecond - -// Member manages the leadership and the role of the node in the horaemeta cluster. -type Member struct { - ID uint64 - Name string - Endpoint string - rootPath string - leaderKey string - etcdCli *clientv3.Client - etcdLeaderGetter etcdutil.EtcdLeaderGetter - leader *metastoragepb.Member - rpcTimeout time.Duration - logger *zap.Logger -} - -func formatLeaderKey(rootPath string) string { - return fmt.Sprintf("%s/members/leader", rootPath) -} - -func NewMember(rootPath string, id uint64, name, endpoint string, etcdCli *clientv3.Client, etcdLeaderGetter etcdutil.EtcdLeaderGetter, rpcTimeout time.Duration) *Member { - leaderKey := formatLeaderKey(rootPath) - logger := log.With(zap.String("node-name", name), zap.Uint64("node-id", id)) - return &Member{ - ID: id, - Name: name, - Endpoint: endpoint, - rootPath: rootPath, - leaderKey: leaderKey, - etcdCli: etcdCli, - etcdLeaderGetter: etcdLeaderGetter, - leader: nil, - rpcTimeout: rpcTimeout, - logger: logger, - } -} - -// getLeader gets the leader of the cluster. -// getLeaderResp.Leader == nil if no leader found. -func (m *Member) getLeader(ctx context.Context) (*getLeaderResp, error) { - ctx, cancel := context.WithTimeout(ctx, m.rpcTimeout) - defer cancel() - resp, err := m.etcdCli.Get(ctx, m.leaderKey) - if err != nil { - return nil, ErrGetLeader.WithCause(err) - } - if len(resp.Kvs) > 1 { - return nil, ErrMultipleLeader - } - if len(resp.Kvs) == 0 { - return &getLeaderResp{ - Leader: nil, - Revision: 0, - IsLocal: false, - }, nil - } - - leaderKv := resp.Kvs[0] - leader := &metastoragepb.Member{} - err = proto.Unmarshal(leaderKv.Value, leader) - if err != nil { - return nil, ErrInvalidLeaderValue.WithCause(err) - } - - return &getLeaderResp{Leader: leader, Revision: leaderKv.ModRevision, IsLocal: leader.GetEndpoint() == m.Endpoint}, nil -} - -// GetLeaderAddr gets the leader address of the cluster with memory cache. -// return error if no leader found. -func (m *Member) GetLeaderAddr(_ context.Context) (GetLeaderAddrResp, error) { - if m.leader == nil { - return GetLeaderAddrResp{ - LeaderEndpoint: "", - IsLocal: false, - }, errors.WithMessage(ErrGetLeader, "no leader found") - } - return GetLeaderAddrResp{ - LeaderEndpoint: m.leader.Endpoint, - IsLocal: m.leader.Endpoint == m.Endpoint, - }, nil -} - -func (m *Member) ResetLeader(ctx context.Context) error { - ctx, cancel := context.WithTimeout(ctx, m.rpcTimeout) - defer cancel() - if _, err := m.etcdCli.Delete(ctx, m.leaderKey); err != nil { - return ErrResetLeader.WithCause(err) - } - return nil -} - -func (m *Member) WaitForLeaderChange(ctx context.Context, revision int64) { - watcher := clientv3.NewWatcher(m.etcdCli) - defer func() { - if err := watcher.Close(); err != nil { - m.logger.Error("close watcher failed", zap.Error(err)) - } - }() - - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - for { - wch := watcher.Watch(ctx, m.leaderKey, clientv3.WithRev(revision)) - for resp := range wch { - // Meet compacted error, use the compact revision. - if resp.CompactRevision != 0 { - m.logger.Warn("required revision has been compacted, use the compact revision", - zap.Int64("required-revision", revision), - zap.Int64("compact-revision", resp.CompactRevision)) - revision = resp.CompactRevision - break - } - - if resp.Canceled { - m.logger.Error("watcher is cancelled", zap.Int64("revision", revision), zap.String("leader-key", m.leaderKey)) - return - } - - for _, ev := range resp.Events { - if ev.Type == mvccpb.DELETE { - m.logger.Info("current leader is deleted", zap.String("leader-key", m.leaderKey)) - return - } - } - } - - select { - case <-ctx.Done(): - return - default: - } - } -} - -func (m *Member) CampaignAndKeepLeader(ctx context.Context, leaseTTLSec int64, leadershipChecker LeadershipChecker, callbacks LeadershipEventCallbacks) error { - leaderVal, err := m.Marshal() - if err != nil { - return err - } - - rawLease := clientv3.NewLease(m.etcdCli) - newLease := newLease(rawLease, leaseTTLSec) - closeLeaseOnce := sync.Once{} - closeLeaseWg := sync.WaitGroup{} - closeLease := func() { - log.Debug("try to close lease") - ctx1, cancel := context.WithTimeout(context.Background(), m.rpcTimeout) - defer cancel() - if err := newLease.Close(ctx1); err != nil { - m.logger.Error("close lease failed", zap.Error(err)) - } - log.Debug("try to close lease finish") - } - defer closeLeaseOnce.Do(closeLease) - - ctx1, cancel := context.WithTimeout(ctx, m.rpcTimeout) - defer cancel() - if err := newLease.Grant(ctx1); err != nil { - return err - } - - // The leader key must not exist, so the CreateRevision is 0. - cmp := clientv3.Compare(clientv3.CreateRevision(m.leaderKey), "=", 0) - ctx1, cancel = context.WithTimeout(ctx, m.rpcTimeout) - defer cancel() - resp, err := m.etcdCli. - Txn(ctx1). - If(cmp). - Then(clientv3.OpPut(m.leaderKey, leaderVal, clientv3.WithLease(newLease.ID))). - Commit() - if err != nil { - return ErrTxnPutLeader.WithCause(err) - } else if !resp.Succeeded { - return ErrTxnPutLeader.WithCausef("txn put leader failed, resp:%v", resp) - } - - m.logger.Info("[SetLeader]", zap.String("leader-key", m.leaderKey), zap.String("leader", m.Name)) - // Update leader memory cache. - m.leader = &metastoragepb.Member{ - Name: m.Name, - Id: m.ID, - Endpoint: m.Endpoint, - } - - if callbacks != nil { - // The leader has been elected and trigger the callbacks. - callbacks.AfterElected(ctx) - // The leader will be transferred after exit this method. - defer func() { - callbacks.BeforeTransfer(ctx) - }() - } - - // Keep the leadership by renewing the lease periodically after success in campaigning leader. - closeLeaseWg.Add(1) - go func() { - newLease.KeepAlive(ctx) - closeLeaseWg.Done() - closeLeaseOnce.Do(closeLease) - }() - - // Check the leadership periodically and exit if it changes. - leaderCheckTicker := time.NewTicker(leaderCheckInterval) - defer leaderCheckTicker.Stop() - - for { - select { - case <-leaderCheckTicker.C: - if newLease.IsExpired() { - m.logger.Info("no longer a leader because lease has expired") - return nil - } - - if !leadershipChecker.ShouldCampaign(m) { - m.logger.Info("etcd leader changed and should re-assign the leadership", zap.String("old-leader", m.Name)) - return nil - } - case <-ctx.Done(): - m.logger.Info("server is closed") - return nil - } - } -} - -func (m *Member) Marshal() (string, error) { - memPB := &metastoragepb.Member{ - Name: m.Name, - Id: m.ID, - Endpoint: m.Endpoint, - } - bs, err := proto.Marshal(memPB) - if err != nil { - return "", ErrMarshalMember.WithCause(err) - } - - return string(bs), nil -} - -type getLeaderResp struct { - Leader *metastoragepb.Member - Revision int64 - IsLocal bool -} - -type GetLeaderAddrResp struct { - LeaderEndpoint string - IsLocal bool -} diff --git a/horaemeta/server/member/watch_leader.go b/horaemeta/server/member/watch_leader.go deleted file mode 100644 index 8ca6ee0ca7..0000000000 --- a/horaemeta/server/member/watch_leader.go +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package member - -import ( - "context" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/assert" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metastoragepb" - "go.uber.org/zap" -) - -const ( - watchLeaderFailInterval = time.Duration(200) * time.Millisecond - - waitReasonFailEtcd = "fail to access etcd" - waitReasonResetLeader = "leader is reset" - waitReasonElectLeader = "leader is electing" - waitReasonNoWait = "" -) - -type WatchContext interface { - etcdutil.EtcdLeaderGetter - ShouldStop() bool -} - -// LeaderWatcher watches the changes of the HoraeMeta cluster's leadership. -type LeaderWatcher struct { - watchCtx WatchContext - self *Member - leaseTTLSec int64 - - leadershipChecker LeadershipChecker -} - -type LeadershipEventCallbacks interface { - AfterElected(ctx context.Context) - BeforeTransfer(ctx context.Context) -} - -// LeadershipChecker tells which member should campaign the HoraeMeta cluster's leadership, and whether the current leader is valid. -type LeadershipChecker interface { - ShouldCampaign(self *Member) bool - IsValidLeader(memLeader *metastoragepb.Member) bool -} - -// embeddedEtcdLeadershipChecker ensures the HoraeMeta cluster's leader as the embedded ETCD cluster's leader. -type embeddedEtcdLeadershipChecker struct { - etcdLeaderGetter etcdutil.EtcdLeaderGetter -} - -func (c embeddedEtcdLeadershipChecker) ShouldCampaign(self *Member) bool { - etcdLeaderID, err := c.etcdLeaderGetter.EtcdLeaderID() - assert.Assertf(err == nil, "EtcdLeaderID must exist") - return self.ID == etcdLeaderID -} - -func (c embeddedEtcdLeadershipChecker) IsValidLeader(memLeader *metastoragepb.Member) bool { - etcdLeaderID, err := c.etcdLeaderGetter.EtcdLeaderID() - assert.Assertf(err == nil, "EtcdLeaderID must exist") - return memLeader.Id == etcdLeaderID -} - -// externalEtcdLeadershipChecker has no preference over the leadership of the HoraeMeta cluster, that is to say, the leadership is random. -type externalEtcdLeadershipChecker struct{} - -func (c externalEtcdLeadershipChecker) ShouldCampaign(_ *Member) bool { - return true -} - -func (c externalEtcdLeadershipChecker) IsValidLeader(_ *metastoragepb.Member) bool { - return true -} - -func NewLeaderWatcher(ctx WatchContext, self *Member, leaseTTLSec int64, embedEtcd bool) *LeaderWatcher { - var leadershipChecker LeadershipChecker - if embedEtcd { - leadershipChecker = embeddedEtcdLeadershipChecker{ - etcdLeaderGetter: ctx, - } - } else { - leadershipChecker = externalEtcdLeadershipChecker{} - } - - return &LeaderWatcher{ - ctx, - self, - leaseTTLSec, - leadershipChecker, - } -} - -// Watch watches the leader changes: -// 1. Check whether the leader is valid if leader exists. -// - Leader is valid: wait for the leader changes. -// - Leader is not valid: reset the leader by the current leader. -// 2. Campaign the leadership if leader does not exist. -// - Campaign the leader if this member should. -// - The leader keeps the leadership lease alive. -// - The other members keeps waiting for the leader changes. -// -// The LeadershipCallbacks `callbacks` will be triggered when specific events occur. -func (l *LeaderWatcher) Watch(ctx context.Context, callbacks LeadershipEventCallbacks) { - var wait string - logger := log.With(zap.String("self", l.self.Name)) - - for { - if l.watchCtx.ShouldStop() { - logger.Warn("stop watching leader because of server is closed") - return - } - - select { - case <-ctx.Done(): - logger.Warn("stop watching leader because ctx is done") - return - default: - } - - if wait != waitReasonNoWait { - logger.Warn("sleep a while during watch", zap.String("wait-reason", wait)) - time.Sleep(watchLeaderFailInterval) - wait = waitReasonNoWait - } - - // Check whether leader exists. - resp, err := l.self.getLeader(ctx) - if err != nil { - logger.Error("fail to get leader", zap.Error(err)) - wait = waitReasonFailEtcd - continue - } - - memLeader := resp.Leader - if memLeader == nil { - // Leader does not exist. - // A new leader should be elected and the etcd leader should be elected as the new leader. - if l.leadershipChecker.ShouldCampaign(l.self) { - // Campaign the leader and block until leader changes. - if err := l.self.CampaignAndKeepLeader(ctx, l.leaseTTLSec, l.leadershipChecker, callbacks); err != nil { - logger.Error("fail to campaign and keep leader", zap.Error(err)) - wait = waitReasonFailEtcd - } else { - logger.Info("stop keeping leader") - } - continue - } - - // For other nodes that is not etcd leader, just wait for the new leader elected. - wait = waitReasonElectLeader - } else { - // Cache leader in memory. - l.self.leader = memLeader - log.Info("update leader cache", zap.String("endpoint", memLeader.Endpoint)) - - // Leader does exist. - // A new leader should be elected (the leader should be reset by the current leader itself) if the leader is - // not the etcd leader. - if l.leadershipChecker.IsValidLeader(memLeader) { - // watch the leader and block until leader changes. - l.self.WaitForLeaderChange(ctx, resp.Revision) - logger.Warn("leader changes and stop watching") - continue - } - - // This leader is not valid, reset it if this member will campaign this leadership. - if l.leadershipChecker.ShouldCampaign(l.self) { - if err = l.self.ResetLeader(ctx); err != nil { - logger.Error("fail to reset leader", zap.Error(err)) - wait = waitReasonFailEtcd - } - continue - } - - // The leader is not etcd leader and this node is not the leader so just wait a moment and check leader again. - wait = waitReasonResetLeader - } - } -} diff --git a/horaemeta/server/member/watch_leader_test.go b/horaemeta/server/member/watch_leader_test.go deleted file mode 100644 index 78958a2f96..0000000000 --- a/horaemeta/server/member/watch_leader_test.go +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package member - -import ( - "context" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/stretchr/testify/assert" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/server/v3/etcdserver" -) - -type mockWatchCtx struct { - stopped bool - client *clientv3.Client - srv *etcdserver.EtcdServer -} - -func (ctx *mockWatchCtx) ShouldStop() bool { - return ctx.stopped -} - -func (ctx *mockWatchCtx) EtcdLeaderID() (uint64, error) { - return ctx.srv.Lead(), nil -} - -func TestWatchLeaderSingle(t *testing.T) { - etcd, client, closeSrv := etcdutil.PrepareEtcdServerAndClient(t) - defer closeSrv() - - watchCtx := &mockWatchCtx{ - stopped: false, - client: client, - srv: etcd.Server, - } - leaderGetter := &etcdutil.LeaderGetterWrapper{Server: etcd.Server} - rpcTimeout := time.Duration(10) * time.Second - leaseTTLSec := int64(1) - mem := NewMember("", uint64(etcd.Server.ID()), "mem0", "", client, leaderGetter, rpcTimeout) - leaderWatcher := NewLeaderWatcher(watchCtx, mem, leaseTTLSec, true) - - ctx, cancelWatch := context.WithCancel(context.Background()) - watchedDone := make(chan struct{}, 1) - go func() { - leaderWatcher.Watch(ctx, nil) - watchedDone <- struct{}{} - }() - - // Wait for watcher starting - // TODO: This unit test may fail. Currently, it is solved by increasing the sleep time, and the code needs to be optimized in the future. - time.Sleep(time.Duration(2000) * time.Millisecond) - - // check the member has been the leader - ctx, cancel := context.WithTimeout(context.Background(), rpcTimeout) - defer cancel() - resp, err := mem.getLeader(ctx) - assert.NoError(t, err) - assert.NotNil(t, resp) - assert.Equal(t, resp.Leader.Id, mem.ID) - - // cancel the watch - cancelWatch() - <-watchedDone - - // check again whether the leader should be reset - ctx, cancel = context.WithTimeout(context.Background(), rpcTimeout) - defer cancel() - resp, err = mem.getLeader(ctx) - assert.NoError(t, err) - assert.NotNil(t, resp) - assert.Nil(t, resp.Leader) -} diff --git a/horaemeta/server/server.go b/horaemeta/server/server.go deleted file mode 100644 index 4ed6e39542..0000000000 --- a/horaemeta/server/server.go +++ /dev/null @@ -1,416 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package server - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "sync" - "sync/atomic" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/config" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-meta/server/limiter" - "github.com/apache/incubator-horaedb-meta/server/member" - metagrpc "github.com/apache/incubator-horaedb-meta/server/service/grpc" - "github.com/apache/incubator-horaedb-meta/server/service/http" - "github.com/apache/incubator-horaedb-meta/server/status" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/pkg/errors" - "go.etcd.io/etcd/client/pkg/v3/transport" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/server/v3/embed" - "go.uber.org/zap" - "google.golang.org/grpc" - "google.golang.org/grpc/keepalive" -) - -type Server struct { - isClosed int32 - status *status.ServerStatus - - cfg *config.Config - - etcdCfg *embed.Config - - // The fields below are initialized after Run of server is called. - clusterManager cluster.Manager - flowLimiter *limiter.FlowLimiter - - // member describes membership in horaemeta cluster. - member *member.Member - etcdCli *clientv3.Client - etcdSrv *embed.Etcd - - // httpService contains http server and api set. - httpService *http.Service - - // bgJobWg can be used to join with the background jobs. - bgJobWg sync.WaitGroup - // bgJobCancel can be used to cancel all pending background jobs. - bgJobCancel func() -} - -// CreateServer creates the server instance without starting any services or background jobs. -func CreateServer(cfg *config.Config) (*Server, error) { - etcdCfg, err := cfg.GenEtcdConfig() - if err != nil { - return nil, err - } - - srv := &Server{ - isClosed: 0, - status: status.NewServerStatus(), - cfg: cfg, - etcdCfg: etcdCfg, - - clusterManager: nil, - flowLimiter: nil, - member: nil, - etcdCli: nil, - etcdSrv: nil, - httpService: nil, - bgJobWg: sync.WaitGroup{}, - bgJobCancel: nil, - } - - grpcService := metagrpc.NewService(cfg.GrpcHandleTimeout(), srv) - etcdCfg.ServiceRegister = func(grpcSrv *grpc.Server) { - grpcSrv.RegisterService(&metaservicepb.MetaRpcService_ServiceDesc, grpcService) - } - - return srv, nil -} - -// Run runs the services and background jobs. -func (srv *Server) Run(ctx context.Context) error { - // If enableEmbedEtcd is true, the grpc server is started in the same process as the etcd server. - if srv.cfg.EnableEmbedEtcd { - if err := srv.startEmbedEtcd(ctx); err != nil { - srv.status.Set(status.Terminated) - return err - } - } else { - // If enableEmbedEtcd is false, the grpc server is started in a separate process. - go func() { - if err := srv.startGrpcServer(ctx); err != nil { - srv.status.Set(status.Terminated) - log.Fatal("Grpc serve failed", zap.Error(err)) - } - }() - } - if err := srv.initEtcdClient(srv.cfg.EnableEmbedEtcd); err != nil { - srv.status.Set(status.Terminated) - return err - } - - if err := srv.startServer(ctx); err != nil { - srv.status.Set(status.Terminated) - return err - } - - srv.startBgJobs(ctx) - srv.status.Set(status.StatusRunning) - return nil -} - -func (srv *Server) Close() { - atomic.StoreInt32(&srv.isClosed, 1) - - srv.stopBgJobs() - - if srv.etcdCli != nil { - err := srv.etcdCli.Close() - if err != nil { - log.Error("fail to close etcdCli", zap.Error(err)) - } - } - - err := srv.httpService.Stop() - if err != nil { - log.Error("fail to close http server", zap.Error(err)) - } -} - -func (srv *Server) IsClosed() bool { - return atomic.LoadInt32(&srv.isClosed) == 1 -} - -func (srv *Server) startEmbedEtcd(ctx context.Context) error { - etcdSrv, err := embed.StartEtcd(srv.etcdCfg) - if err != nil { - return ErrStartEtcd.WithCause(err) - } - - newCtx, cancel := context.WithTimeout(ctx, srv.cfg.EtcdStartTimeout()) - defer cancel() - - select { - case <-etcdSrv.Server.ReadyNotify(): - case <-newCtx.Done(): - return ErrStartEtcdTimeout.WithCausef("timeout is:%v", srv.cfg.EtcdStartTimeout()) - } - srv.etcdSrv = etcdSrv - - return nil -} - -func (srv *Server) initEtcdClient(enableEmbedEtcd bool) error { - // If enableEmbedEtcd is false, we should add tls config to connect remote Etcd server. - var tlsConfig *tls.Config - if !enableEmbedEtcd { - tlsInfo := transport.TLSInfo{ - TrustedCAFile: srv.cfg.EtcdCaCertPath, - CertFile: srv.cfg.EtcdCertPath, - KeyFile: srv.cfg.EtcdKeyPath, - } - clientConfig, err := tlsInfo.ClientConfig() - if err != nil { - return ErrCreateEtcdClient.WithCause(err) - } - tlsConfig = clientConfig - } - - etcdEndpoints := make([]string, 0, len(srv.etcdCfg.AdvertiseClientUrls)) - for _, url := range srv.etcdCfg.AdvertiseClientUrls { - etcdEndpoints = append(etcdEndpoints, url.String()) - } - lgc := log.GetLoggerConfig() - client, err := clientv3.New(clientv3.Config{ - Endpoints: etcdEndpoints, - DialTimeout: srv.cfg.EtcdCallTimeout(), - LogConfig: lgc, - TLS: tlsConfig, - }) - if err != nil { - return ErrCreateEtcdClient.WithCause(err) - } - srv.etcdCli = client - - if srv.etcdSrv != nil { - etcdLeaderGetter := &etcdutil.LeaderGetterWrapper{Server: srv.etcdSrv.Server} - srv.member = member.NewMember(srv.cfg.StorageRootPath, uint64(srv.etcdSrv.Server.ID()), srv.cfg.NodeName, srv.etcdCfg.AdvertiseClientUrls[0].String(), client, etcdLeaderGetter, srv.cfg.EtcdCallTimeout()) - } else { - endpoint := fmt.Sprintf("http://%s:%d", srv.cfg.Addr, srv.cfg.GrpcPort) - srv.member = member.NewMember(srv.cfg.StorageRootPath, 0, srv.cfg.NodeName, endpoint, client, nil, srv.cfg.EtcdCallTimeout()) - } - return nil -} - -func (srv *Server) startGrpcServer(_ context.Context) error { - opts := srv.buildGrpcOptions() - server := grpc.NewServer(opts...) - - grpcService := metagrpc.NewService(srv.cfg.GrpcHandleTimeout(), srv) - server.RegisterService(&metaservicepb.MetaRpcService_ServiceDesc, grpcService) - addr := fmt.Sprintf(":%d", srv.cfg.GrpcPort) - lis, err := net.Listen("tcp", addr) - if err != nil { - return errors.Wrapf(err, "listen on %s failed", addr) - } - - if err = server.Serve(lis); err != nil { - return errors.Wrap(err, "serve failed") - } - - return nil -} - -// startServer starts involved services. -func (srv *Server) startServer(_ context.Context) error { - if srv.cfg.MaxScanLimit <= 1 { - return ErrStartServer.WithCausef("scan limit must be greater than 1") - } - - storage := storage.NewStorageWithEtcdBackend(srv.etcdCli, srv.cfg.StorageRootPath, - storage.Options{ - MaxScanLimit: srv.cfg.MaxScanLimit, - MinScanLimit: srv.cfg.MinScanLimit, - MaxOpsPerTxn: srv.cfg.MaxOpsPerTxn, - }) - - topologyType, err := metadata.ParseTopologyType(srv.cfg.TopologyType) - if err != nil { - return err - } - - manager, err := cluster.NewManagerImpl(storage, srv.etcdCli, srv.etcdCli, srv.cfg.StorageRootPath, srv.cfg.IDAllocatorStep, topologyType) - if err != nil { - return err - } - srv.clusterManager = manager - srv.flowLimiter = limiter.NewFlowLimiter(srv.cfg.FlowLimiter) - - api := http.NewAPI(manager, srv.status, http.NewForwardClient(srv.member, srv.cfg.HTTPPort), srv.flowLimiter, srv.etcdCli) - httpService := http.NewHTTPService(srv.cfg.HTTPPort, time.Second*10, time.Second*10, api.NewAPIRouter()) - go func() { - err := httpService.Start() - if err != nil { - log.Error("start http service failed", zap.Error(err)) - } - }() - srv.httpService = httpService - - log.Info("server started") - return nil -} - -func (srv *Server) startBgJobs(ctx context.Context) { - var bgJobCtx context.Context - bgJobCtx, srv.bgJobCancel = context.WithCancel(ctx) - - go srv.watchLeader(bgJobCtx) - go srv.watchEtcdLeaderPriority(bgJobCtx) -} - -func (srv *Server) stopBgJobs() { - srv.bgJobCancel() - srv.bgJobWg.Wait() -} - -// watchLeader watches whether the leader of the cluster exists. -// Every node campaigns the leadership if it finds the leader is offline and the leader should keep the leadership after -// election. And Keep the leader node also be the leader of the etcd cluster during election. -func (srv *Server) watchLeader(ctx context.Context) { - srv.bgJobWg.Add(1) - defer srv.bgJobWg.Done() - - watchCtx := &leaderWatchContext{ - srv, - } - // If enable embed etcd, we should watch the leader of the etcd cluster. - watcher := member.NewLeaderWatcher(watchCtx, srv.member, srv.cfg.LeaseTTLSec, srv.cfg.EnableEmbedEtcd) - - callbacks := &leadershipEventCallbacks{ - srv: srv, - } - watcher.Watch(ctx, callbacks) -} - -func (srv *Server) watchEtcdLeaderPriority(_ context.Context) { - srv.bgJobWg.Add(1) - defer srv.bgJobWg.Done() -} - -func (srv *Server) createDefaultCluster(ctx context.Context) error { - resp, err := srv.member.GetLeaderAddr(ctx) - if err != nil { - log.Warn("get leader failed", zap.Error(err)) - } - - // Create default cluster by the leader. - if resp.IsLocal { - topologyType, err := metadata.ParseTopologyType(srv.cfg.TopologyType) - if err != nil { - return err - } - defaultCluster, err := srv.clusterManager.CreateCluster(ctx, srv.cfg.DefaultClusterName, - metadata.CreateClusterOpts{ - NodeCount: uint32(srv.cfg.DefaultClusterNodeCount), - ShardTotal: uint32(srv.cfg.DefaultClusterShardTotal), - EnableSchedule: srv.cfg.EnableSchedule, - TopologyType: topologyType, - ProcedureExecutingBatchSize: srv.cfg.ProcedureExecutingBatchSize, - }) - if err != nil { - log.Warn("create default cluster failed", zap.Error(err)) - if coderr.Is(err, metadata.ErrClusterAlreadyExists.Code()) { - _, err = srv.clusterManager.GetCluster(ctx, srv.cfg.DefaultClusterName) - if err != nil { - return errors.WithMessage(err, "get default cluster failed") - } - } - } else { - log.Info("create default cluster succeed", zap.String("cluster", defaultCluster.GetMetadata().Name())) - } - } - return nil -} - -func (srv *Server) buildGrpcOptions() []grpc.ServerOption { - keepalivePolicy := keepalive.EnforcementPolicy{ - MinTime: time.Duration(srv.cfg.GrpcServiceKeepAlivePingMinIntervalSec) * time.Second, - PermitWithoutStream: true, - } - opts := []grpc.ServerOption{ - grpc.MaxSendMsgSize(srv.cfg.GrpcServiceMaxSendMsgSize), - grpc.MaxRecvMsgSize(srv.cfg.GrpcServiceMaxSendMsgSize), - grpc.KeepaliveEnforcementPolicy(keepalivePolicy), - } - return opts -} - -type leaderWatchContext struct { - srv *Server -} - -func (ctx *leaderWatchContext) ShouldStop() bool { - return ctx.srv.IsClosed() -} - -func (ctx *leaderWatchContext) EtcdLeaderID() (uint64, error) { - if ctx.srv.etcdSrv != nil { - return ctx.srv.etcdSrv.Server.Lead(), nil - } - return 0, errors.WithMessage(member.ErrGetLeader, "no leader found") -} - -func (srv *Server) GetClusterManager() cluster.Manager { - return srv.clusterManager -} - -func (srv *Server) GetLeader(ctx context.Context) (member.GetLeaderAddrResp, error) { - // Get leader with cache. - return srv.member.GetLeaderAddr(ctx) -} - -func (srv *Server) GetFlowLimiter() (*limiter.FlowLimiter, error) { - if srv.flowLimiter == nil { - return nil, ErrFlowLimiterNotFound - } - return srv.flowLimiter, nil -} - -type leadershipEventCallbacks struct { - srv *Server -} - -func (c *leadershipEventCallbacks) AfterElected(ctx context.Context) { - if err := c.srv.clusterManager.Start(ctx); err != nil { - panic(fmt.Sprintf("cluster manager fail to start, err:%v", err)) - } - if err := c.srv.createDefaultCluster(ctx); err != nil { - panic(fmt.Sprintf("create default cluster failed, err:%v", err)) - } -} - -func (c *leadershipEventCallbacks) BeforeTransfer(ctx context.Context) { - if err := c.srv.clusterManager.Stop(ctx); err != nil { - panic(fmt.Sprintf("cluster manager fail to stop, err:%v", err)) - } -} diff --git a/horaemeta/server/service/grpc/error.go b/horaemeta/server/service/grpc/error.go deleted file mode 100644 index 44f6a4920a..0000000000 --- a/horaemeta/server/service/grpc/error.go +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package grpc - -import ( - "github.com/apache/incubator-horaedb-meta/pkg/coderr" -) - -var ( - ErrRecvHeartbeat = coderr.NewCodeError(coderr.Internal, "receive heartbeat") - ErrBindHeartbeatStream = coderr.NewCodeError(coderr.Internal, "bind heartbeat sender") - ErrUnbindHeartbeatStream = coderr.NewCodeError(coderr.Internal, "unbind heartbeat sender") - ErrForward = coderr.NewCodeError(coderr.Internal, "grpc forward") - ErrFlowLimit = coderr.NewCodeError(coderr.TooManyRequests, "flow limit") -) diff --git a/horaemeta/server/service/grpc/forward.go b/horaemeta/server/service/grpc/forward.go deleted file mode 100644 index b4507f362d..0000000000 --- a/horaemeta/server/service/grpc/forward.go +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package grpc - -import ( - "context" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/service" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/pkg/errors" - "go.uber.org/zap" - "google.golang.org/grpc" -) - -// getForwardedMetaClient get forwarded horaemeta client. When current node is the leader, this func will return (nil,nil). -func (s *Service) getForwardedMetaClient(ctx context.Context) (metaservicepb.MetaRpcServiceClient, error) { - forwardedAddr, _, err := s.getForwardedAddr(ctx) - if err != nil { - return nil, errors.WithMessage(err, "get forwarded horaemeta client") - } - - if forwardedAddr != "" { - horaeClient, err := s.getMetaClient(ctx, forwardedAddr) - if err != nil { - return nil, errors.WithMessagef(err, "get forwarded horaemeta client, addr:%s", forwardedAddr) - } - return horaeClient, nil - } - return nil, nil -} - -func (s *Service) getMetaClient(ctx context.Context, addr string) (metaservicepb.MetaRpcServiceClient, error) { - client, err := s.getForwardedGrpcClient(ctx, addr) - if err != nil { - return nil, errors.WithMessagef(err, "get horaemeta client, addr:%s", addr) - } - return metaservicepb.NewMetaRpcServiceClient(client), nil -} - -func (s *Service) getForwardedGrpcClient(ctx context.Context, forwardedAddr string) (*grpc.ClientConn, error) { - client, ok := s.conns.Load(forwardedAddr) - if !ok { - log.Info("try to create horaemeta client", zap.String("addr", forwardedAddr)) - cc, err := service.GetClientConn(ctx, forwardedAddr) - if err != nil { - return nil, err - } - client = cc - s.conns.Store(forwardedAddr, cc) - } - return client.(*grpc.ClientConn), nil -} - -func (s *Service) getForwardedAddr(ctx context.Context) (string, bool, error) { - resp, err := s.h.GetLeader(ctx) - if err != nil { - return "", false, errors.WithMessage(err, "get forwarded addr") - } - if resp.IsLocal { - return "", true, nil - } - return resp.LeaderEndpoint, false, nil -} diff --git a/horaemeta/server/service/grpc/service.go b/horaemeta/server/service/grpc/service.go deleted file mode 100644 index d4693de4b6..0000000000 --- a/horaemeta/server/service/grpc/service.go +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package grpc - -import ( - "context" - "fmt" - "strings" - "sync" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/limiter" - "github.com/apache/incubator-horaedb-meta/server/member" - "github.com/apache/incubator-horaedb-meta/server/storage" - "github.com/apache/incubator-horaedb-proto/golang/pkg/clusterpb" - "github.com/apache/incubator-horaedb-proto/golang/pkg/commonpb" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -type Service struct { - metaservicepb.UnimplementedMetaRpcServiceServer - opTimeout time.Duration - h Handler - - // Store as map[string]*grpc.ClientConn - // TODO: remove unavailable connection - conns sync.Map -} - -func NewService(opTimeout time.Duration, h Handler) *Service { - return &Service{ - UnimplementedMetaRpcServiceServer: metaservicepb.UnimplementedMetaRpcServiceServer{}, - opTimeout: opTimeout, - h: h, - conns: sync.Map{}, - } -} - -// Handler is needed by grpc service to process the requests. -type Handler interface { - GetClusterManager() cluster.Manager - GetLeader(ctx context.Context) (member.GetLeaderAddrResp, error) - GetFlowLimiter() (*limiter.FlowLimiter, error) - // TODO: define the methods for handling other grpc requests. -} - -// NodeHeartbeat implements gRPC HoraeMetaServer. -func (s *Service) NodeHeartbeat(ctx context.Context, req *metaservicepb.NodeHeartbeatRequest) (*metaservicepb.NodeHeartbeatResponse, error) { - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.NodeHeartbeatResponse{Header: responseHeader(err, "grpc heartbeat")}, nil - } - - // Forward request to the leader. - if metaClient != nil { - return metaClient.NodeHeartbeat(ctx, req) - } - - shardInfos := make([]metadata.ShardInfo, 0, len(req.Info.ShardInfos)) - for _, shardInfo := range req.Info.ShardInfos { - shardInfos = append(shardInfos, metadata.ConvertShardsInfoPB(shardInfo)) - } - - registeredNode := metadata.RegisteredNode{ - Node: storage.Node{ - Name: req.Info.Endpoint, - NodeStats: storage.NodeStats{ - Lease: req.GetInfo().Lease, - Zone: req.GetInfo().Zone, - NodeVersion: req.GetInfo().BinaryVersion, - }, - LastTouchTime: uint64(time.Now().UnixMilli()), - State: storage.NodeStateOnline, - }, ShardInfos: shardInfos, - } - - log.Info("[NodeHeartbeat]", zap.String("clusterName", req.GetHeader().ClusterName), zap.String("name", req.Info.Endpoint), zap.String("info", fmt.Sprintf("%+v", registeredNode))) - - err = s.h.GetClusterManager().RegisterNode(ctx, req.GetHeader().GetClusterName(), registeredNode) - if err != nil { - return &metaservicepb.NodeHeartbeatResponse{Header: responseHeader(err, "grpc heartbeat")}, nil - } - - return &metaservicepb.NodeHeartbeatResponse{ - Header: okResponseHeader(), - }, nil -} - -// AllocSchemaID implements gRPC HoraeMetaServer. -func (s *Service) AllocSchemaID(ctx context.Context, req *metaservicepb.AllocSchemaIdRequest) (*metaservicepb.AllocSchemaIdResponse, error) { - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.AllocSchemaIdResponse{Header: responseHeader(err, "grpc alloc schema id")}, nil - } - - // Forward request to the leader. - if metaClient != nil { - return metaClient.AllocSchemaID(ctx, req) - } - - log.Info("[AllocSchemaID]", zap.String("schemaName", req.GetName()), zap.String("clusterName", req.GetHeader().GetClusterName())) - - schemaID, _, err := s.h.GetClusterManager().AllocSchemaID(ctx, req.GetHeader().GetClusterName(), req.GetName()) - if err != nil { - return &metaservicepb.AllocSchemaIdResponse{Header: responseHeader(err, "grpc alloc schema id")}, nil - } - - return &metaservicepb.AllocSchemaIdResponse{ - Header: okResponseHeader(), - Name: req.GetName(), - Id: uint32(schemaID), - }, nil -} - -// GetTablesOfShards implements gRPC HoraeMetaServer. -func (s *Service) GetTablesOfShards(ctx context.Context, req *metaservicepb.GetTablesOfShardsRequest) (*metaservicepb.GetTablesOfShardsResponse, error) { - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.GetTablesOfShardsResponse{Header: responseHeader(err, "grpc get tables of shards")}, nil - } - - // Forward request to the leader. - if metaClient != nil { - return metaClient.GetTablesOfShards(ctx, req) - } - - log.Info("[GetTablesOfShards]", zap.String("clusterName", req.GetHeader().GetClusterName()), zap.String("shardIDs", fmt.Sprint(req.ShardIds))) - - shardIDs := make([]storage.ShardID, 0, len(req.GetShardIds())) - for _, shardID := range req.GetShardIds() { - shardIDs = append(shardIDs, storage.ShardID(shardID)) - } - - tables, err := s.h.GetClusterManager().GetTablesByShardIDs(req.GetHeader().GetClusterName(), req.GetHeader().GetNode(), shardIDs) - if err != nil { - return &metaservicepb.GetTablesOfShardsResponse{Header: responseHeader(err, "grpc get tables of shards")}, nil - } - - result := convertToGetTablesOfShardsResponse(tables) - return result, nil -} - -// CreateTable implements gRPC HoraeMetaServer. -func (s *Service) CreateTable(ctx context.Context, req *metaservicepb.CreateTableRequest) (*metaservicepb.CreateTableResponse, error) { - start := time.Now() - // Since there may be too many table creation requests, a flow limiter is added here. - if ok, err := s.allow(); !ok { - return &metaservicepb.CreateTableResponse{Header: responseHeader(err, "create table grpc request is rejected by flow limiter")}, nil - } - - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.CreateTableResponse{Header: responseHeader(err, err.Error())}, nil - } - - // Forward request to the leader. - if metaClient != nil { - return metaClient.CreateTable(ctx, req) - } - - log.Info("[CreateTable]", zap.String("schemaName", req.SchemaName), zap.String("clusterName", req.GetHeader().ClusterName), zap.String("tableName", req.GetName())) - - clusterManager := s.h.GetClusterManager() - c, err := clusterManager.GetCluster(ctx, req.GetHeader().GetClusterName()) - if err != nil { - log.Error("fail to create table", zap.Error(err)) - return &metaservicepb.CreateTableResponse{Header: responseHeader(err, err.Error())}, nil - } - - errorCh := make(chan error, 1) - resultCh := make(chan metadata.CreateTableResult, 1) - - onSucceeded := func(ret metadata.CreateTableResult) error { - resultCh <- ret - return nil - } - onFailed := func(err error) error { - errorCh <- err - return nil - } - - p, err := c.GetProcedureFactory().MakeCreateTableProcedure(ctx, coordinator.CreateTableRequest{ - ClusterMetadata: c.GetMetadata(), - SourceReq: req, - OnSucceeded: onSucceeded, - OnFailed: onFailed, - }) - if err != nil { - log.Error("fail to create table, factory create procedure", zap.Error(err)) - return &metaservicepb.CreateTableResponse{Header: responseHeader(err, err.Error())}, nil - } - - err = c.GetProcedureManager().Submit(ctx, p) - if err != nil { - log.Error("fail to create table, manager submit procedure", zap.Error(err)) - return &metaservicepb.CreateTableResponse{Header: responseHeader(err, err.Error())}, nil - } - - select { - case ret := <-resultCh: - log.Info("create table finish", zap.String("tableName", req.Name), zap.Int64("costTime", time.Since(start).Milliseconds())) - return &metaservicepb.CreateTableResponse{ - Header: okResponseHeader(), - CreatedTable: &metaservicepb.TableInfo{ - Id: uint64(ret.Table.ID), - Name: ret.Table.Name, - SchemaId: uint32(ret.Table.SchemaID), - SchemaName: req.GetSchemaName(), - }, - ShardInfo: &metaservicepb.ShardInfo{ - Id: uint32(ret.ShardVersionUpdate.ShardID), - Role: clusterpb.ShardRole_LEADER, - Version: ret.ShardVersionUpdate.LatestVersion, - }, - }, nil - case err = <-errorCh: - log.Warn("create table failed", zap.String("tableName", req.Name), zap.Int64("costTime", time.Since(start).Milliseconds()), zap.Error(err)) - return &metaservicepb.CreateTableResponse{Header: responseHeader(err, err.Error())}, nil - } -} - -// DropTable implements gRPC HoraeMetaServer. -func (s *Service) DropTable(ctx context.Context, req *metaservicepb.DropTableRequest) (*metaservicepb.DropTableResponse, error) { - start := time.Now() - // Since there may be too many table dropping requests, a flow limiter is added here. - if ok, err := s.allow(); !ok { - return &metaservicepb.DropTableResponse{Header: responseHeader(err, "drop table grpc request is rejected by flow limiter")}, nil - } - - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.DropTableResponse{Header: responseHeader(err, "drop table")}, nil - } - - // Forward request to the leader. - if metaClient != nil { - return metaClient.DropTable(ctx, req) - } - - log.Info("[DropTable]", zap.String("schemaName", req.SchemaName), zap.String("clusterName", req.GetHeader().ClusterName), zap.String("tableName", req.Name)) - - clusterManager := s.h.GetClusterManager() - c, err := clusterManager.GetCluster(ctx, req.GetHeader().GetClusterName()) - if err != nil { - log.Error("fail to drop table", zap.Error(err)) - return &metaservicepb.DropTableResponse{Header: responseHeader(err, "drop table")}, nil - } - - errorCh := make(chan error, 1) - resultCh := make(chan metadata.TableInfo, 1) - - onSucceeded := func(ret metadata.TableInfo) error { - resultCh <- ret - return nil - } - onFailed := func(err error) error { - errorCh <- err - return nil - } - procedure, ok, err := c.GetProcedureFactory().CreateDropTableProcedure(ctx, coordinator.DropTableRequest{ - ClusterMetadata: c.GetMetadata(), - ClusterSnapshot: c.GetMetadata().GetClusterSnapshot(), - SourceReq: req, - OnSucceeded: onSucceeded, - OnFailed: onFailed, - }) - if err != nil { - log.Error("fail to drop table", zap.Error(err)) - return &metaservicepb.DropTableResponse{Header: responseHeader(err, "drop table")}, nil - } - if !ok { - log.Warn("table may have been dropped already") - return &metaservicepb.DropTableResponse{Header: okResponseHeader()}, nil - } - - err = c.GetProcedureManager().Submit(ctx, procedure) - if err != nil { - log.Error("fail to drop table, manager submit procedure", zap.Error(err), zap.Int64("costTime", time.Since(start).Milliseconds())) - return &metaservicepb.DropTableResponse{Header: responseHeader(err, "drop table")}, nil - } - - select { - case ret := <-resultCh: - log.Info("drop table finish", zap.String("tableName", req.Name), zap.Int64("costTime", time.Since(start).Milliseconds())) - return &metaservicepb.DropTableResponse{ - Header: okResponseHeader(), - DroppedTable: metadata.ConvertTableInfoToPB(ret), - }, nil - case err = <-errorCh: - log.Info("drop table failed", zap.String("tableName", req.Name), zap.Int64("costTime", time.Since(start).Milliseconds())) - return &metaservicepb.DropTableResponse{Header: responseHeader(err, "drop table")}, nil - } -} - -// RouteTables implements gRPC HoraeMetaServer. -func (s *Service) RouteTables(ctx context.Context, req *metaservicepb.RouteTablesRequest) (*metaservicepb.RouteTablesResponse, error) { - // Since there may be too many table routing requests, a flow limiter is added here. - if ok, err := s.allow(); !ok { - return &metaservicepb.RouteTablesResponse{Header: responseHeader(err, "routeTables grpc request is rejected by flow limiter")}, nil - } - - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.RouteTablesResponse{Header: responseHeader(err, "grpc routeTables")}, nil - } - - log.Debug("[RouteTable]", zap.String("schemaName", req.SchemaName), zap.String("clusterName", req.GetHeader().ClusterName), zap.String("tableNames", strings.Join(req.TableNames, ","))) - - // Forward request to the leader. - if metaClient != nil { - return metaClient.RouteTables(ctx, req) - } - - routeTableResult, err := s.h.GetClusterManager().RouteTables(ctx, req.GetHeader().GetClusterName(), req.GetSchemaName(), req.GetTableNames()) - if err != nil { - return &metaservicepb.RouteTablesResponse{Header: responseHeader(err, "grpc routeTables")}, nil - } - - return convertRouteTableResult(routeTableResult), nil -} - -// GetNodes implements gRPC HoraeMetaServer. -func (s *Service) GetNodes(ctx context.Context, req *metaservicepb.GetNodesRequest) (*metaservicepb.GetNodesResponse, error) { - metaClient, err := s.getForwardedMetaClient(ctx) - if err != nil { - return &metaservicepb.GetNodesResponse{Header: responseHeader(err, "grpc get nodes")}, nil - } - - // Forward request to the leader. - if metaClient != nil { - return metaClient.GetNodes(ctx, req) - } - - log.Info("[GetNodes]", zap.String("clusterName", req.GetHeader().ClusterName)) - - nodesResult, err := s.h.GetClusterManager().GetNodeShards(ctx, req.GetHeader().GetClusterName()) - if err != nil { - log.Error("fail to get nodes", zap.Error(err)) - return &metaservicepb.GetNodesResponse{Header: responseHeader(err, "grpc get nodes")}, nil - } - - return convertToGetNodesResponse(nodesResult), nil -} - -func convertToGetTablesOfShardsResponse(shardTables map[storage.ShardID]metadata.ShardTables) *metaservicepb.GetTablesOfShardsResponse { - tablesByShard := make(map[uint32]*metaservicepb.TablesOfShard, len(shardTables)) - for id, shardTable := range shardTables { - tables := make([]*metaservicepb.TableInfo, 0, len(shardTable.Tables)) - for _, table := range shardTable.Tables { - tables = append(tables, metadata.ConvertTableInfoToPB(table)) - } - tablesByShard[uint32(id)] = &metaservicepb.TablesOfShard{ - ShardInfo: metadata.ConvertShardsInfoToPB(shardTable.Shard), - Tables: tables, - } - } - return &metaservicepb.GetTablesOfShardsResponse{ - Header: okResponseHeader(), - TablesByShard: tablesByShard, - } -} - -func convertRouteTableResult(routeTablesResult metadata.RouteTablesResult) *metaservicepb.RouteTablesResponse { - entries := make(map[string]*metaservicepb.RouteEntry, len(routeTablesResult.RouteEntries)) - for tableName, entry := range routeTablesResult.RouteEntries { - nodeShards := make([]*metaservicepb.NodeShard, 0, len(entry.NodeShards)) - for _, nodeShard := range entry.NodeShards { - nodeShards = append(nodeShards, &metaservicepb.NodeShard{ - Endpoint: nodeShard.ShardNode.NodeName, - ShardInfo: &metaservicepb.ShardInfo{ - Id: uint32(nodeShard.ShardNode.ID), - Role: storage.ConvertShardRoleToPB(nodeShard.ShardNode.ShardRole), - }, - }) - } - - entries[tableName] = &metaservicepb.RouteEntry{ - Table: metadata.ConvertTableInfoToPB(entry.Table), - NodeShards: nodeShards, - } - } - - return &metaservicepb.RouteTablesResponse{ - Header: okResponseHeader(), - ClusterTopologyVersion: routeTablesResult.ClusterViewVersion, - Entries: entries, - } -} - -func convertToGetNodesResponse(nodesResult metadata.GetNodeShardsResult) *metaservicepb.GetNodesResponse { - nodeShards := make([]*metaservicepb.NodeShard, 0, len(nodesResult.NodeShards)) - for _, shardNodeWithVersion := range nodesResult.NodeShards { - nodeShards = append(nodeShards, &metaservicepb.NodeShard{ - Endpoint: shardNodeWithVersion.ShardNode.NodeName, - ShardInfo: &metaservicepb.ShardInfo{ - Id: uint32(shardNodeWithVersion.ShardNode.ID), - Role: storage.ConvertShardRoleToPB(shardNodeWithVersion.ShardNode.ShardRole), - }, - }) - } - return &metaservicepb.GetNodesResponse{ - Header: okResponseHeader(), - ClusterTopologyVersion: nodesResult.ClusterTopologyVersion, - NodeShards: nodeShards, - } -} - -func okResponseHeader() *commonpb.ResponseHeader { - return responseHeader(nil, "") -} - -func responseHeader(err error, msg string) *commonpb.ResponseHeader { - if err == nil { - return &commonpb.ResponseHeader{Code: coderr.Ok, Error: msg} - } - - code, ok := coderr.GetCauseCode(err) - if ok { - return &commonpb.ResponseHeader{Code: uint32(code), Error: msg} - } - - return &commonpb.ResponseHeader{Code: coderr.Internal, Error: msg} -} - -func (s *Service) allow() (bool, error) { - flowLimiter, err := s.h.GetFlowLimiter() - if err != nil { - return false, errors.WithMessage(err, "get flow limiter failed") - } - if !flowLimiter.Allow() { - return false, ErrFlowLimit.WithCausef("the current flow has reached the threshold") - } - return true, nil -} diff --git a/horaemeta/server/service/http/api.go b/horaemeta/server/service/http/api.go deleted file mode 100644 index 5f4a36eaee..0000000000 --- a/horaemeta/server/service/http/api.go +++ /dev/null @@ -1,777 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package http - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/http/pprof" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/cluster/metadata" - "github.com/apache/incubator-horaedb-meta/server/config" - "github.com/apache/incubator-horaedb-meta/server/coordinator" - "github.com/apache/incubator-horaedb-meta/server/coordinator/procedure" - "github.com/apache/incubator-horaedb-meta/server/coordinator/scheduler" - "github.com/apache/incubator-horaedb-meta/server/limiter" - "github.com/apache/incubator-horaedb-meta/server/member" - "github.com/apache/incubator-horaedb-meta/server/status" - "github.com/apache/incubator-horaedb-meta/server/storage" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -func NewAPI(clusterManager cluster.Manager, serverStatus *status.ServerStatus, forwardClient *ForwardClient, flowLimiter *limiter.FlowLimiter, etcdClient *clientv3.Client) *API { - return &API{ - clusterManager: clusterManager, - serverStatus: serverStatus, - forwardClient: forwardClient, - flowLimiter: flowLimiter, - etcdAPI: NewEtcdAPI(etcdClient, forwardClient), - } -} - -func (a *API) NewAPIRouter() *Router { - router := New().WithPrefix(apiPrefix).WithInstrumentation(printRequestInfo) - - // Register API. - router.Post("/getShardTables", wrap(a.getShardTables, true, a.forwardClient)) - router.Post("/transferLeader", wrap(a.transferLeader, true, a.forwardClient)) - router.Post("/split", wrap(a.split, true, a.forwardClient)) - router.Post("/route", wrap(a.route, true, a.forwardClient)) - router.Del("/table", wrap(a.dropTable, true, a.forwardClient)) - router.Post("/getNodeShards", wrap(a.getNodeShards, true, a.forwardClient)) - router.Del("/nodeShards", wrap(a.dropNodeShards, true, a.forwardClient)) - router.Get("/flowLimiter", wrap(a.getFlowLimiter, true, a.forwardClient)) - router.Put("/flowLimiter", wrap(a.updateFlowLimiter, true, a.forwardClient)) - router.Get("/health", wrap(a.health, false, a.forwardClient)) - - // Register cluster API. - router.Get("/clusters", wrap(a.listClusters, true, a.forwardClient)) - router.Post("/clusters", wrap(a.createCluster, true, a.forwardClient)) - router.Put(fmt.Sprintf("/clusters/:%s", clusterNameParam), wrap(a.updateCluster, true, a.forwardClient)) - router.Get(fmt.Sprintf("/clusters/:%s/procedure", clusterNameParam), wrap(a.listProcedures, true, a.forwardClient)) - router.Get(fmt.Sprintf("/clusters/:%s/shardAffinities", clusterNameParam), wrap(a.listShardAffinities, true, a.forwardClient)) - router.Post(fmt.Sprintf("/clusters/:%s/shardAffinities", clusterNameParam), wrap(a.addShardAffinities, true, a.forwardClient)) - router.Del(fmt.Sprintf("/clusters/:%s/shardAffinities", clusterNameParam), wrap(a.removeShardAffinities, true, a.forwardClient)) - router.Post("/table/query", wrap(a.queryTable, true, a.forwardClient)) - - // Register debug API. - router.DebugGet("/pprof/profile", pprof.Profile) - router.DebugGet("/pprof/symbol", pprof.Symbol) - router.DebugGet("/pprof/trace", pprof.Trace) - router.DebugGet("/pprof/heap", a.pprofHeap) - router.DebugGet("/pprof/allocs", a.pprofAllocs) - router.DebugGet("/pprof/block", a.pprofBlock) - router.DebugGet("/pprof/goroutine", a.pprofGoroutine) - router.DebugGet("/pprof/threadCreate", a.pprofThreadCreate) - router.DebugGet(fmt.Sprintf("/diagnose/:%s/shards", clusterNameParam), wrap(a.diagnoseShards, true, a.forwardClient)) - router.DebugGet("/leader", wrap(a.getLeader, false, a.forwardClient)) - router.DebugGet(fmt.Sprintf("/clusters/:%s/enableSchedule", clusterNameParam), wrap(a.getEnableSchedule, true, a.forwardClient)) - router.DebugPut(fmt.Sprintf("/clusters/:%s/enableSchedule", clusterNameParam), wrap(a.updateEnableSchedule, true, a.forwardClient)) - - // Register ETCD API. - router.Post("/etcd/promoteLearner", wrap(a.etcdAPI.promoteLearner, false, a.forwardClient)) - router.Put("/etcd/member", wrap(a.etcdAPI.addMember, false, a.forwardClient)) - router.Get("/etcd/member", wrap(a.etcdAPI.getMember, false, a.forwardClient)) - router.Post("/etcd/member", wrap(a.etcdAPI.updateMember, false, a.forwardClient)) - router.Del("/etcd/member", wrap(a.etcdAPI.removeMember, false, a.forwardClient)) - router.Post("/etcd/moveLeader", wrap(a.etcdAPI.moveLeader, false, a.forwardClient)) - - return router -} - -func (a *API) getLeader(req *http.Request) apiFuncResult { - leaderAddr, err := a.forwardClient.GetLeaderAddr(req.Context()) - if err != nil { - log.Error("get leader addr failed", zap.Error(err)) - return errResult(member.ErrGetLeader, err.Error()) - } - return okResult(leaderAddr) -} - -func (a *API) getShardTables(req *http.Request) apiFuncResult { - var getShardTablesReq GetShardTablesRequest - err := json.NewDecoder(req.Body).Decode(&getShardTablesReq) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - c, err := a.clusterManager.GetCluster(req.Context(), getShardTablesReq.ClusterName) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - // If ShardIDs in the request is empty, query with all shardIDs in the cluster. - shardIDs := make([]storage.ShardID, len(getShardTablesReq.ShardIDs)) - if len(getShardTablesReq.ShardIDs) != 0 { - for _, shardID := range getShardTablesReq.ShardIDs { - shardIDs = append(shardIDs, storage.ShardID(shardID)) - } - } else { - shardViewsMapping := c.GetMetadata().GetClusterSnapshot().Topology.ShardViewsMapping - for shardID := range shardViewsMapping { - shardIDs = append(shardIDs, shardID) - } - } - - shardTables := c.GetMetadata().GetShardTables(shardIDs) - return okResult(shardTables) -} - -func (a *API) transferLeader(req *http.Request) apiFuncResult { - var transferLeaderRequest TransferLeaderRequest - err := json.NewDecoder(req.Body).Decode(&transferLeaderRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - log.Info("transfer leader request", zap.String("request", fmt.Sprintf("%+v", transferLeaderRequest))) - - c, err := a.clusterManager.GetCluster(req.Context(), transferLeaderRequest.ClusterName) - if err != nil { - log.Error("get cluster failed", zap.String("clusterName", transferLeaderRequest.ClusterName), zap.Error(err)) - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", transferLeaderRequest.ClusterName, err.Error())) - } - - transferLeaderProcedure, err := c.GetProcedureFactory().CreateTransferLeaderProcedure(req.Context(), coordinator.TransferLeaderRequest{ - Snapshot: c.GetMetadata().GetClusterSnapshot(), - ShardID: storage.ShardID(transferLeaderRequest.ShardID), - OldLeaderNodeName: transferLeaderRequest.OldLeaderNodeName, - NewLeaderNodeName: transferLeaderRequest.NewLeaderNodeName, - }) - if err != nil { - log.Error("create transfer leader procedure failed", zap.Error(err)) - return errResult(ErrCreateProcedure, err.Error()) - } - err = c.GetProcedureManager().Submit(req.Context(), transferLeaderProcedure) - if err != nil { - log.Error("submit transfer leader procedure failed", zap.Error(err)) - return errResult(ErrSubmitProcedure, err.Error()) - } - - return okResult(statusSuccess) -} - -func (a *API) route(req *http.Request) apiFuncResult { - var routeRequest RouteRequest - err := json.NewDecoder(req.Body).Decode(&routeRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - result, err := a.clusterManager.RouteTables(context.Background(), routeRequest.ClusterName, routeRequest.SchemaName, routeRequest.Tables) - if err != nil { - log.Error("route tables failed", zap.Error(err)) - return errResult(ErrRoute, err.Error()) - } - - return okResult(result) -} - -func (a *API) getNodeShards(req *http.Request) apiFuncResult { - var nodeShardsRequest NodeShardsRequest - err := json.NewDecoder(req.Body).Decode(&nodeShardsRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - result, err := a.clusterManager.GetNodeShards(context.Background(), nodeShardsRequest.ClusterName) - if err != nil { - log.Error("get node shards failed", zap.Error(err)) - return errResult(ErrGetNodeShards, err.Error()) - } - - return okResult(result) -} - -func (a *API) dropNodeShards(req *http.Request) apiFuncResult { - var dropNodeShardsRequest DropNodeShardsRequest - err := json.NewDecoder(req.Body).Decode(&dropNodeShardsRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - c, err := a.clusterManager.GetCluster(req.Context(), dropNodeShardsRequest.ClusterName) - if err != nil { - log.Error("get cluster failed", zap.String("clusterName", dropNodeShardsRequest.ClusterName), zap.Error(err)) - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", dropNodeShardsRequest.ClusterName, err.Error())) - } - - targetShardNodes := make([]storage.ShardNode, 0, len(dropNodeShardsRequest.ShardIDs)) - getShardNodeResult := c.GetMetadata().GetShardNodes() - for _, shardNode := range getShardNodeResult.ShardNodes { - for _, shardID := range dropNodeShardsRequest.ShardIDs { - if shardNode.ID == storage.ShardID(shardID) { - targetShardNodes = append(targetShardNodes, shardNode) - } - } - } - - if err := c.GetMetadata().DropShardNodes(req.Context(), targetShardNodes); err != nil { - log.Error("drop node shards failed", zap.Error(err)) - return errResult(ErrDropNodeShards, err.Error()) - } - - return okResult(targetShardNodes) -} - -func (a *API) dropTable(req *http.Request) apiFuncResult { - var dropTableRequest DropTableRequest - err := json.NewDecoder(req.Body).Decode(&dropTableRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - log.Info("drop table request", zap.String("request", fmt.Sprintf("%+v", dropTableRequest))) - - if err := a.clusterManager.DropTable(context.Background(), dropTableRequest.ClusterName, dropTableRequest.SchemaName, dropTableRequest.Table); err != nil { - log.Error("drop table failed", zap.Error(err)) - return errResult(ErrTable, err.Error()) - } - - return okResult(statusSuccess) -} - -func (a *API) split(req *http.Request) apiFuncResult { - var splitRequest SplitRequest - err := json.NewDecoder(req.Body).Decode(&splitRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - log.Info("split request", zap.String("request", fmt.Sprintf("%+v", splitRequest))) - - ctx := context.Background() - - c, err := a.clusterManager.GetCluster(ctx, splitRequest.ClusterName) - if err != nil { - log.Error("get cluster failed", zap.String("clusterName", splitRequest.ClusterName), zap.Error(err)) - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", splitRequest.ClusterName, err.Error())) - } - - newShardID, err := c.GetMetadata().AllocShardID(ctx) - if err != nil { - log.Error("alloc shard id failed", zap.Error(err)) - return errResult(ErrAllocShardID, err.Error()) - } - - splitProcedure, err := c.GetProcedureFactory().CreateSplitProcedure(ctx, coordinator.SplitRequest{ - ClusterMetadata: c.GetMetadata(), - SchemaName: splitRequest.SchemaName, - TableNames: splitRequest.SplitTables, - Snapshot: c.GetMetadata().GetClusterSnapshot(), - ShardID: storage.ShardID(splitRequest.ShardID), - NewShardID: storage.ShardID(newShardID), - TargetNodeName: splitRequest.NodeName, - }) - if err != nil { - log.Error("create split procedure failed", zap.Error(err)) - return errResult(ErrCreateProcedure, err.Error()) - } - - if err := c.GetProcedureManager().Submit(ctx, splitProcedure); err != nil { - log.Error("submit split procedure failed", zap.Error(err)) - return errResult(ErrSubmitProcedure, err.Error()) - } - - return okResult(newShardID) -} - -func (a *API) listClusters(req *http.Request) apiFuncResult { - clusters, err := a.clusterManager.ListClusters(req.Context()) - if err != nil { - return errResult(ErrGetCluster, err.Error()) - } - - clusterMetadatas := make([]storage.Cluster, 0, len(clusters)) - for i := 0; i < len(clusters); i++ { - storageMetadata := clusters[i].GetMetadata().GetStorageMetadata() - clusterMetadatas = append(clusterMetadatas, storageMetadata) - } - - return okResult(clusterMetadatas) -} - -func (a *API) createCluster(req *http.Request) apiFuncResult { - var createClusterRequest CreateClusterRequest - err := json.NewDecoder(req.Body).Decode(&createClusterRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - log.Info("create cluster request", zap.String("request", fmt.Sprintf("%+v", createClusterRequest))) - - if createClusterRequest.ProcedureExecutingBatchSize == 0 { - return errResult(ErrInvalidParamsForCreateCluster, "expect positive procedureExecutingBatchSize") - } - - if _, err := a.clusterManager.GetCluster(req.Context(), createClusterRequest.Name); err == nil { - log.Error("cluster already exists", zap.String("clusterName", createClusterRequest.Name)) - return errResult(ErrGetCluster, fmt.Sprintf("cluster: %s already exists", createClusterRequest.Name)) - } - - topologyType, err := metadata.ParseTopologyType(createClusterRequest.TopologyType) - if err != nil { - log.Error("parse topology type failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - ctx := context.Background() - createClusterOpts := metadata.CreateClusterOpts{ - NodeCount: createClusterRequest.NodeCount, - ShardTotal: createClusterRequest.ShardTotal, - EnableSchedule: createClusterRequest.EnableSchedule, - TopologyType: topologyType, - ProcedureExecutingBatchSize: createClusterRequest.ProcedureExecutingBatchSize, - } - c, err := a.clusterManager.CreateCluster(ctx, createClusterRequest.Name, createClusterOpts) - if err != nil { - log.Error("create cluster failed", zap.Error(err)) - return errResult(metadata.ErrCreateCluster, err.Error()) - } - - return okResult(c.GetMetadata().GetClusterID()) -} - -func (a *API) updateCluster(req *http.Request) apiFuncResult { - clusterName := Param(req.Context(), clusterNameParam) - if len(clusterName) == 0 { - return errResult(ErrParseRequest, "clusterName could not be empty") - } - - var updateClusterRequest UpdateClusterRequest - err := json.NewDecoder(req.Body).Decode(&updateClusterRequest) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - log.Info("update cluster request", zap.String("request", fmt.Sprintf("%+v", updateClusterRequest))) - - c, err := a.clusterManager.GetCluster(req.Context(), clusterName) - if err != nil { - log.Error("get cluster failed", zap.Error(err)) - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - topologyType, err := metadata.ParseTopologyType(updateClusterRequest.TopologyType) - if err != nil { - log.Error("parse topology type", zap.Error(err)) - return errResult(ErrParseTopology, err.Error()) - } - - if err := a.clusterManager.UpdateCluster(req.Context(), clusterName, metadata.UpdateClusterOpts{ - TopologyType: topologyType, - ProcedureExecutingBatchSize: updateClusterRequest.ProcedureExecutingBatchSize, - }); err != nil { - log.Error("update cluster failed", zap.Error(err)) - return errResult(metadata.ErrUpdateCluster, err.Error()) - } - - return okResult(c.GetMetadata().GetClusterID()) -} - -func (a *API) getFlowLimiter(_ *http.Request) apiFuncResult { - limiter := a.flowLimiter.GetConfig() - return okResult(limiter) -} - -func (a *API) updateFlowLimiter(req *http.Request) apiFuncResult { - var updateFlowLimiterRequest UpdateFlowLimiterRequest - err := json.NewDecoder(req.Body).Decode(&updateFlowLimiterRequest) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - log.Info("update flow limiter request", zap.String("request", fmt.Sprintf("%+v", updateFlowLimiterRequest))) - - newLimiterConfig := config.LimiterConfig{ - Enable: updateFlowLimiterRequest.Enable, - Limit: updateFlowLimiterRequest.Limit, - Burst: updateFlowLimiterRequest.Burst, - } - - if err := a.flowLimiter.UpdateLimiter(newLimiterConfig); err != nil { - log.Error("update flow limiter failed", zap.Error(err)) - return errResult(ErrUpdateFlowLimiter, err.Error()) - } - - return okResult(statusSuccess) -} - -func (a *API) listProcedures(req *http.Request) apiFuncResult { - ctx := req.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - return errResult(ErrParseRequest, "clusterName could not be empty") - } - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - infos, err := c.GetProcedureManager().ListRunningProcedure(ctx) - if err != nil { - log.Error("list running procedure failed", zap.Error(err)) - return errResult(procedure.ErrListRunningProcedure, fmt.Sprintf("clusterName: %s", clusterName)) - } - - return okResult(infos) -} - -func (a *API) listShardAffinities(req *http.Request) apiFuncResult { - ctx := req.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - return errResult(ErrParseRequest, "clusterName could not be empty") - } - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - affinityRules, err := c.GetSchedulerManager().ListShardAffinityRules(ctx) - if err != nil { - return errResult(ErrListAffinityRules, fmt.Sprintf("err: %v", err)) - } - - return okResult(affinityRules) -} - -func (a *API) addShardAffinities(req *http.Request) apiFuncResult { - ctx := req.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - return errResult(ErrParseRequest, "clusterName could not be empty") - } - - var affinities []scheduler.ShardAffinity - err := json.NewDecoder(req.Body).Decode(&affinities) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - log.Info("try to apply shard affinity rule", zap.String("cluster", clusterName), zap.String("affinity", fmt.Sprintf("%+v", affinities))) - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - err = c.GetSchedulerManager().AddShardAffinityRule(ctx, scheduler.ShardAffinityRule{Affinities: affinities}) - if err != nil { - log.Error("failed to apply shard affinity rule", zap.String("cluster", clusterName), zap.String("affinity", fmt.Sprintf("%+v", affinities))) - return errResult(ErrAddAffinityRule, fmt.Sprintf("err: %v", err)) - } - - log.Info("finish applying shard affinity rule", zap.String("cluster", clusterName), zap.String("rules", fmt.Sprintf("%+v", affinities))) - - return okResult(nil) -} - -func (a *API) removeShardAffinities(req *http.Request) apiFuncResult { - ctx := req.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - return errResult(ErrParseRequest, "clusterName could not be empty") - } - - var decodedReq RemoveShardAffinitiesRequest - err := json.NewDecoder(req.Body).Decode(&decodedReq) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - for _, shardID := range decodedReq.ShardIDs { - log.Info("try to remove shard affinity rule", zap.String("cluster", clusterName), zap.Int("shardID", int(shardID))) - err := c.GetSchedulerManager().RemoveShardAffinityRule(ctx, shardID) - if err != nil { - log.Error("failed to remove shard affinity rule", zap.String("cluster", clusterName), zap.Int("shardID", int(shardID)), zap.Error(err)) - return errResult(ErrRemoveAffinityRule, fmt.Sprintf("err: %s", err)) - } - } - - return okResult(nil) -} - -func (a *API) queryTable(r *http.Request) apiFuncResult { - var req QueryTableRequest - err := json.NewDecoder(r.Body).Decode(&req) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - if len(req.Names) != 0 { - tables, err := a.clusterManager.GetTables(req.ClusterName, req.SchemaName, req.Names) - if err != nil { - return errResult(ErrTable, err.Error()) - } - return okResult(tables) - } - - ids := make([]storage.TableID, 0, len(req.IDs)) - for _, id := range req.IDs { - ids = append(ids, storage.TableID(id)) - } - - tables, err := a.clusterManager.GetTablesByIDs(req.ClusterName, ids) - if err != nil { - return errResult(ErrTable, err.Error()) - } - return okResult(tables) -} - -func (a *API) getEnableSchedule(r *http.Request) apiFuncResult { - ctx := r.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - clusterName = config.DefaultClusterName - } - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - enableSchedule, err := c.GetSchedulerManager().GetEnableSchedule(r.Context()) - if err != nil { - return errResult(ErrGetEnableSchedule, err.Error()) - } - - return okResult(enableSchedule) -} - -func (a *API) updateEnableSchedule(r *http.Request) apiFuncResult { - ctx := r.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - clusterName = config.DefaultClusterName - } - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - var req UpdateEnableScheduleRequest - err = json.NewDecoder(r.Body).Decode(&req) - if err != nil { - return errResult(ErrParseRequest, err.Error()) - } - - err = c.GetSchedulerManager().UpdateEnableSchedule(r.Context(), req.Enable) - if err != nil { - return errResult(ErrUpdateEnableSchedule, err.Error()) - } - - return okResult(req.Enable) -} - -func (a *API) diagnoseShards(req *http.Request) apiFuncResult { - ctx := req.Context() - clusterName := Param(ctx, clusterNameParam) - if len(clusterName) == 0 { - clusterName = config.DefaultClusterName - } - - c, err := a.clusterManager.GetCluster(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - registeredNodes, err := a.clusterManager.ListRegisteredNodes(ctx, clusterName) - if err != nil { - return errResult(ErrGetCluster, fmt.Sprintf("clusterName: %s, err: %s", clusterName, err.Error())) - } - - ret := DiagnoseShardResult{ - UnregisteredShards: []storage.ShardID{}, - UnreadyShards: make(map[storage.ShardID]DiagnoseShardStatus), - } - shards := c.GetShards() - - registeredShards := make(map[storage.ShardID]struct{}, len(shards)) - // Check if there are unready shards. - for _, node := range registeredNodes { - for _, shardInfo := range node.ShardInfos { - if shardInfo.Status != storage.ShardStatusReady { - ret.UnreadyShards[shardInfo.ID] = DiagnoseShardStatus{ - NodeName: node.Node.Name, - Status: storage.ConvertShardStatusToString(shardInfo.Status), - } - } - registeredShards[shardInfo.ID] = struct{}{} - } - } - - // Check if there are unregistered shards. - for _, shard := range shards { - if _, ok := registeredShards[shard]; !ok { - ret.UnregisteredShards = append(ret.UnregisteredShards, shard) - } - } - - return okResult(ret) -} - -func (a *API) health(_ *http.Request) apiFuncResult { - isServerHealthy := a.serverStatus.IsHealthy() - if isServerHealthy { - return okResult(nil) - } - return errResult(ErrHealthCheck, fmt.Sprintf("server heath check failed, status is %v", a.serverStatus.Get())) -} - -func (a *API) pprofHeap(writer http.ResponseWriter, req *http.Request) { - pprof.Handler("heap").ServeHTTP(writer, req) -} - -func (a *API) pprofAllocs(writer http.ResponseWriter, req *http.Request) { - pprof.Handler("allocs").ServeHTTP(writer, req) -} - -func (a *API) pprofBlock(writer http.ResponseWriter, req *http.Request) { - pprof.Handler("block").ServeHTTP(writer, req) -} - -func (a *API) pprofGoroutine(writer http.ResponseWriter, req *http.Request) { - pprof.Handler("goroutine").ServeHTTP(writer, req) -} - -func (a *API) pprofThreadCreate(writer http.ResponseWriter, req *http.Request) { - pprof.Handler("threadcreate").ServeHTTP(writer, req) -} - -// printRequestInfo used for printing every request information. -func printRequestInfo(handlerName string, handler http.HandlerFunc) http.HandlerFunc { - return func(writer http.ResponseWriter, request *http.Request) { - body := "" - bodyByte, err := io.ReadAll(request.Body) - if err != nil { - log.Error("read request body failed", zap.Error(err)) - return - } - body = string(bodyByte) - newBody := io.NopCloser(bytes.NewReader(bodyByte)) - request.Body = newBody - log.Info("receive http request", zap.String("handlerName", handlerName), zap.String("client host", request.RemoteAddr), zap.String("method", request.Method), zap.String("params", request.Form.Encode()), zap.String("body", body)) - handler.ServeHTTP(writer, request) - } -} - -func respondForward(w http.ResponseWriter, response *http.Response) { - b, err := io.ReadAll(response.Body) - if err != nil { - log.Error("read response failed", zap.Error(err)) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - for key, valArr := range response.Header { - for _, val := range valArr { - w.Header().Add(key, val) - } - } - w.WriteHeader(response.StatusCode) - if n, err := w.Write(b); err != nil { - log.Error("write response failed", zap.Int("msg", n), zap.Error(err)) - } -} - -func respond(w http.ResponseWriter, data interface{}) { - statusMessage := statusSuccess - b, err := json.Marshal(&response{ - Status: statusMessage, - Data: data, - Error: "", - Msg: "", - }) - if err != nil { - log.Error("marshal json response failed", zap.Error(err)) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - if n, err := w.Write(b); err != nil { - log.Error("write response failed", zap.Int("msg", n), zap.Error(err)) - } -} - -func respondError(w http.ResponseWriter, apiErr coderr.CodeError, msg string) { - b, err := json.Marshal(&response{ - Status: statusError, - Data: nil, - Error: apiErr.Error(), - Msg: msg, - }) - if err != nil { - log.Error("marshal json response failed", zap.Error(err)) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(apiErr.Code().ToHTTPCode()) - if n, err := w.Write(b); err != nil { - log.Error("write response failed", zap.Int("msg", n), zap.Error(err)) - } -} - -func wrap(f apiFunc, needForward bool, forwardClient *ForwardClient) http.HandlerFunc { - hf := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if needForward { - resp, isLeader, err := forwardClient.forwardToLeader(r) - if err != nil { - log.Error("forward to leader failed", zap.Error(err)) - respondError(w, ErrForwardToLeader, err.Error()) - return - } - if !isLeader { - // nolint:staticcheck - defer resp.Body.Close() - respondForward(w, resp) - return - } - } - result := f(r) - if result.err != nil { - respondError(w, result.err, result.errMsg) - return - } - respond(w, result.data) - }) - return hf -} diff --git a/horaemeta/server/service/http/error.go b/horaemeta/server/service/http/error.go deleted file mode 100644 index 70cc83c8e4..0000000000 --- a/horaemeta/server/service/http/error.go +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package http - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrParseRequest = coderr.NewCodeError(coderr.BadRequest, "parse request params") - ErrInvalidParamsForCreateCluster = coderr.NewCodeError(coderr.BadRequest, "invalid params to create cluster") - ErrTable = coderr.NewCodeError(coderr.Internal, "table") - ErrRoute = coderr.NewCodeError(coderr.Internal, "route table") - ErrGetNodeShards = coderr.NewCodeError(coderr.Internal, "get node shards") - ErrDropNodeShards = coderr.NewCodeError(coderr.Internal, "drop node shards") - ErrCreateProcedure = coderr.NewCodeError(coderr.Internal, "create procedure") - ErrSubmitProcedure = coderr.NewCodeError(coderr.Internal, "submit procedure") - ErrGetCluster = coderr.NewCodeError(coderr.Internal, "get cluster") - ErrAllocShardID = coderr.NewCodeError(coderr.Internal, "alloc shard id") - ErrForwardToLeader = coderr.NewCodeError(coderr.Internal, "forward to leader") - ErrParseLeaderAddr = coderr.NewCodeError(coderr.Internal, "parse leader addr") - ErrHealthCheck = coderr.NewCodeError(coderr.Internal, "server health check") - ErrParseTopology = coderr.NewCodeError(coderr.Internal, "parse topology type") - ErrUpdateFlowLimiter = coderr.NewCodeError(coderr.Internal, "update flow limiter") - ErrGetEnableSchedule = coderr.NewCodeError(coderr.Internal, "get enableSchedule") - ErrUpdateEnableSchedule = coderr.NewCodeError(coderr.Internal, "update enableSchedule") - ErrAddLearner = coderr.NewCodeError(coderr.Internal, "add member as learner") - ErrListMembers = coderr.NewCodeError(coderr.Internal, "get member list") - ErrRemoveMembers = coderr.NewCodeError(coderr.Internal, "remove member") - ErrGetMember = coderr.NewCodeError(coderr.Internal, "get member") - ErrListAffinityRules = coderr.NewCodeError(coderr.Internal, "list affinity rules") - ErrAddAffinityRule = coderr.NewCodeError(coderr.Internal, "add affinity rule") - ErrRemoveAffinityRule = coderr.NewCodeError(coderr.Internal, "remove affinity rule") -) diff --git a/horaemeta/server/service/http/etcd_api.go b/horaemeta/server/service/http/etcd_api.go deleted file mode 100644 index c8c190c986..0000000000 --- a/horaemeta/server/service/http/etcd_api.go +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package http - -import ( - "encoding/json" - "fmt" - "net/http" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -type EtcdAPI struct { - etcdClient *clientv3.Client - forwardClient *ForwardClient -} - -type AddMemberRequest struct { - MemberAddrs []string `json:"memberAddrs"` -} - -type UpdateMemberRequest struct { - OldMemberName string `json:"oldMemberName"` - NewMemberAddr []string `json:"newMemberAddr"` -} - -type RemoveMemberRequest struct { - MemberName string `json:"memberName"` -} - -type PromoteLearnerRequest struct { - LearnerName string `json:"learnerName"` -} - -type MoveLeaderRequest struct { - MemberName string `json:"memberName"` -} - -func NewEtcdAPI(etcdClient *clientv3.Client, forwardClient *ForwardClient) EtcdAPI { - return EtcdAPI{ - etcdClient: etcdClient, - forwardClient: forwardClient, - } -} - -func (a *EtcdAPI) addMember(req *http.Request) apiFuncResult { - var addMemberRequest AddMemberRequest - err := json.NewDecoder(req.Body).Decode(&addMemberRequest) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - resp, err := a.etcdClient.MemberAdd(req.Context(), addMemberRequest.MemberAddrs) - if err != nil { - log.Error("member add as learner failed", zap.Error(err)) - return errResult(ErrAddLearner, err.Error()) - } - - return okResult(resp) -} - -func (a *EtcdAPI) getMember(req *http.Request) apiFuncResult { - resp, err := a.etcdClient.MemberList(req.Context()) - if err != nil { - log.Error("list member failed", zap.Error(err)) - return errResult(ErrListMembers, err.Error()) - } - - return okResult(resp) -} - -func (a *EtcdAPI) updateMember(req *http.Request) apiFuncResult { - var updateMemberRequest UpdateMemberRequest - err := json.NewDecoder(req.Body).Decode(&updateMemberRequest) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseTopology, err.Error()) - } - - memberListResp, err := a.etcdClient.MemberList(req.Context()) - if err != nil { - log.Error("list members failed", zap.Error(err)) - return errResult(ErrListMembers, err.Error()) - } - - for _, member := range memberListResp.Members { - if member.Name == updateMemberRequest.OldMemberName { - _, err := a.etcdClient.MemberUpdate(req.Context(), member.ID, updateMemberRequest.NewMemberAddr) - if err != nil { - log.Error("remove learner failed", zap.Error(err)) - return errResult(ErrRemoveMembers, err.Error()) - } - return okResult("ok") - } - } - - return errResult(ErrGetMember, fmt.Sprintf("member not found, member name: %s", updateMemberRequest.OldMemberName)) -} - -func (a *EtcdAPI) removeMember(req *http.Request) apiFuncResult { - var removeMemberRequest RemoveMemberRequest - err := json.NewDecoder(req.Body).Decode(&removeMemberRequest) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - memberListResp, err := a.etcdClient.MemberList(req.Context()) - if err != nil { - log.Error("list members failed", zap.Error(err)) - return errResult(ErrListMembers, err.Error()) - } - - for _, member := range memberListResp.Members { - if member.Name == removeMemberRequest.MemberName { - _, err := a.etcdClient.MemberRemove(req.Context(), member.ID) - if err != nil { - log.Error("remove learner failed", zap.Error(err)) - return errResult(ErrRemoveMembers, err.Error()) - } - - return okResult("ok") - } - } - - return errResult(ErrGetMember, fmt.Sprintf("member not found, member name: %s", removeMemberRequest.MemberName)) -} - -func (a *EtcdAPI) promoteLearner(req *http.Request) apiFuncResult { - var promoteLearnerRequest PromoteLearnerRequest - err := json.NewDecoder(req.Body).Decode(&promoteLearnerRequest) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - memberListResp, err := a.etcdClient.MemberList(req.Context()) - if err != nil { - log.Error("list members failed", zap.Error(err)) - return errResult(ErrListMembers, err.Error()) - } - - for _, member := range memberListResp.Members { - if member.Name == promoteLearnerRequest.LearnerName { - _, err := a.etcdClient.MemberPromote(req.Context(), member.ID) - if err != nil { - log.Error("remove learner failed", zap.Error(err)) - return errResult(ErrRemoveMembers, err.Error()) - } - return okResult("ok") - } - } - - return errResult(ErrGetMember, fmt.Sprintf("learner not found, learner name: %s", promoteLearnerRequest.LearnerName)) -} - -func (a *EtcdAPI) moveLeader(req *http.Request) apiFuncResult { - var moveLeaderRequest MoveLeaderRequest - err := json.NewDecoder(req.Body).Decode(&moveLeaderRequest) - if err != nil { - log.Error("decode request body failed", zap.Error(err)) - return errResult(ErrParseRequest, err.Error()) - } - - memberListResp, err := a.etcdClient.MemberList(req.Context()) - if err != nil { - log.Error("list members failed", zap.Error(err)) - return errResult(ErrListMembers, err.Error()) - } - - for _, member := range memberListResp.Members { - if member.Name == moveLeaderRequest.MemberName { - moveLeaderResp, err := a.etcdClient.MoveLeader(req.Context(), member.ID) - if err != nil { - log.Error("remove learner failed", zap.Error(err)) - return errResult(ErrRemoveMembers, err.Error()) - } - log.Info("move leader", zap.String("moveLeaderResp", fmt.Sprintf("%v", moveLeaderResp))) - return okResult("ok") - } - } - - return errResult(ErrGetMember, fmt.Sprintf("member not found, member name: %s", moveLeaderRequest.MemberName)) -} diff --git a/horaemeta/server/service/http/forward.go b/horaemeta/server/service/http/forward.go deleted file mode 100644 index 01874aa7af..0000000000 --- a/horaemeta/server/service/http/forward.go +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package http - -import ( - "context" - "net" - "net/http" - "net/url" - "strconv" - "strings" - "time" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/member" - "github.com/apache/incubator-horaedb-meta/server/service" - "github.com/pkg/errors" - "go.uber.org/zap" -) - -type ForwardClient struct { - member *member.Member - client *http.Client - port int -} - -func NewForwardClient(member *member.Member, port int) *ForwardClient { - return &ForwardClient{ - member: member, - client: getForwardedHTTPClient(), - port: port, - } -} - -func (s *ForwardClient) GetLeaderAddr(ctx context.Context) (string, error) { - resp, err := s.member.GetLeaderAddr(ctx) - if err != nil { - return "", err - } - - return resp.LeaderEndpoint, nil -} - -func (s *ForwardClient) getForwardedAddr(ctx context.Context) (string, bool, error) { - resp, err := s.member.GetLeaderAddr(ctx) - if err != nil { - return "", false, errors.WithMessage(err, "get forwarded addr") - } - if resp.IsLocal { - return "", true, nil - } - // TODO: In the current implementation, if the HTTP port of each node of HoraeMeta is inconsistent, the forwarding address will be wrong - httpAddr, err := formatHTTPAddr(resp.LeaderEndpoint, s.port) - if err != nil { - return "", false, errors.WithMessage(err, "format http addr") - } - log.Info("getForwardedAddr", zap.String("leaderAddr", httpAddr), zap.Int("port", s.port)) - return httpAddr, false, nil -} - -func (s *ForwardClient) forwardToLeader(req *http.Request) (*http.Response, bool, error) { - addr, isLeader, err := s.getForwardedAddr(req.Context()) - if err != nil { - log.Error("get forward addr failed", zap.Error(err)) - return nil, false, err - } - if isLeader { - return nil, true, nil - } - - // Update remote host - req.RequestURI = "" - if req.TLS == nil { - req.URL.Scheme = "http" - } else { - req.URL.Scheme = "https" - } - req.URL.Host = addr - - resp, err := s.client.Do(req) - if err != nil { - log.Error("forward client send request failed", zap.Error(err)) - return nil, false, err - } - - return resp, false, nil -} - -func getForwardedHTTPClient() *http.Client { - return &http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - Deadline: time.Time{}, - KeepAlive: 30 * time.Second, - }).DialContext, - TLSHandshakeTimeout: 10 * time.Second, - }, - } -} - -// formatHttpAddr convert grpcAddr(http://127.0.0.1:8831) httpPort(5000) to httpAddr(127.0.0.1:5000). -func formatHTTPAddr(grpcAddr string, httpPort int) (string, error) { - url, err := url.Parse(grpcAddr) - if err != nil { - return "", service.ErrParseURL.WithCause(err) - } - hostAndPort := strings.Split(url.Host, ":") - if len(hostAndPort) != 2 { - return "", errors.WithMessagef(ErrParseLeaderAddr, "parse leader addr, grpcAdd:%s", grpcAddr) - } - hostAndPort[1] = strconv.Itoa(httpPort) - httpAddr := strings.Join(hostAndPort, ":") - return httpAddr, nil -} diff --git a/horaemeta/server/service/http/route.go b/horaemeta/server/service/http/route.go deleted file mode 100644 index cb4245ef79..0000000000 --- a/horaemeta/server/service/http/route.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2015 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file is copied from: -// https://github.com/prometheus/common/blob/8c9cb3fa6d01832ea16937b20ea561eed81abd2f/route/route.go - -package http - -import ( - "context" - "net/http" - - "github.com/julienschmidt/httprouter" -) - -type param string - -const DebugPrefix = "/debug" - -// Router wraps httprouter.Router and adds support for prefixed sub-routers, -// per-request context injections and instrumentation. -type Router struct { - rtr *httprouter.Router - prefix string - instrh func(handlerName string, handler http.HandlerFunc) http.HandlerFunc -} - -func New() *Router { - return &Router{ - rtr: httprouter.New(), - prefix: "", - instrh: nil, - } -} - -// WithPrefix returns a router that prefixes all registered routes with prefix. -func (r *Router) WithPrefix(prefix string) *Router { - return &Router{rtr: r.rtr, prefix: r.prefix + prefix, instrh: r.instrh} -} - -// WithInstrumentation returns a router with instrumentation support. -func (r *Router) WithInstrumentation(instrh func(handlerName string, handler http.HandlerFunc) http.HandlerFunc) *Router { - if r.instrh != nil { - newInstrh := instrh - instrh = func(handlerName string, handler http.HandlerFunc) http.HandlerFunc { - return newInstrh(handlerName, r.instrh(handlerName, handler)) - } - } - return &Router{rtr: r.rtr, prefix: r.prefix, instrh: instrh} -} - -// ServeHTTP implements http.Handler. -func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) { - r.rtr.ServeHTTP(w, req) -} - -// Get registers a new GET route. -func (r *Router) Get(path string, h http.HandlerFunc) { - r.rtr.GET(r.prefix+path, r.handle(path, h)) -} - -// DebugGet registers a new GET route without prefix. -func (r *Router) DebugGet(path string, h http.HandlerFunc) { - r.rtr.GET(DebugPrefix+path, r.handle(path, h)) -} - -// Options registers a new OPTIONS route. -func (r *Router) Options(path string, h http.HandlerFunc) { - r.rtr.OPTIONS(r.prefix+path, r.handle(path, h)) -} - -// Del registers a new DELETE route. -func (r *Router) Del(path string, h http.HandlerFunc) { - r.rtr.DELETE(r.prefix+path, r.handle(path, h)) -} - -// Put registers a new PUT route. -func (r *Router) Put(path string, h http.HandlerFunc) { - r.rtr.PUT(r.prefix+path, r.handle(path, h)) -} - -// DebugPut registers a new PUT route without prefix. -func (r *Router) DebugPut(path string, h http.HandlerFunc) { - r.rtr.PUT(DebugPrefix+path, r.handle(path, h)) -} - -// Post registers a new POST route. -func (r *Router) Post(path string, h http.HandlerFunc) { - r.rtr.POST(r.prefix+path, r.handle(path, h)) -} - -// Head registers a new HEAD route. -func (r *Router) Head(path string, h http.HandlerFunc) { - r.rtr.HEAD(r.prefix+path, r.handle(path, h)) -} - -// handle turns a HandlerFunc into a httprouter.Handle. -func (r *Router) handle(handlerName string, h http.HandlerFunc) httprouter.Handle { - if r.instrh != nil { - // This needs to be outside the closure to avoid data race when reading and writing to 'h'. - h = r.instrh(handlerName, h) - } - return func(w http.ResponseWriter, req *http.Request, params httprouter.Params) { - ctx, cancel := context.WithCancel(req.Context()) - defer cancel() - - for _, p := range params { - ctx = context.WithValue(ctx, param(p.Key), p.Value) - } - h(w, req.WithContext(ctx)) - } -} - -// Param returns param p for the context, or the empty string when -// param does not exist in context. -func Param(ctx context.Context, p string) string { - if v := ctx.Value(param(p)); v != nil { - return v.(string) - } - return "" -} diff --git a/horaemeta/server/service/http/service.go b/horaemeta/server/service/http/service.go deleted file mode 100644 index e40c5df96c..0000000000 --- a/horaemeta/server/service/http/service.go +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package http - -import ( - "fmt" - "net/http" - "time" -) - -const defaultReadHeaderTimeout time.Duration = time.Duration(5) * time.Second - -// Service is wrapper for http.Server -type Service struct { - port int - readTimeout time.Duration - writeTimeout time.Duration - - router *Router - server http.Server -} - -func NewHTTPService(port int, readTimeout time.Duration, writeTimeout time.Duration, router *Router) *Service { - return &Service{ - port: port, - readTimeout: readTimeout, - writeTimeout: writeTimeout, - router: router, - server: http.Server{ - ReadHeaderTimeout: defaultReadHeaderTimeout, - }, - } -} - -func (s *Service) Start() error { - s.server.ReadTimeout = s.readTimeout - s.server.WriteTimeout = s.writeTimeout - s.server.Addr = fmt.Sprintf(":%d", s.port) - s.server.Handler = s.router - - return s.server.ListenAndServe() -} - -func (s *Service) Stop() error { - return s.server.Close() -} diff --git a/horaemeta/server/service/http/types.go b/horaemeta/server/service/http/types.go deleted file mode 100644 index 183257eb1a..0000000000 --- a/horaemeta/server/service/http/types.go +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package http - -import ( - "net/http" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "github.com/apache/incubator-horaedb-meta/server/cluster" - "github.com/apache/incubator-horaedb-meta/server/limiter" - "github.com/apache/incubator-horaedb-meta/server/status" - "github.com/apache/incubator-horaedb-meta/server/storage" -) - -const ( - statusSuccess string = "success" - statusError string = "error" - clusterNameParam string = "cluster" - - apiPrefix string = "/api/v1" -) - -type response struct { - Status string `json:"status"` - Data interface{} `json:"data,omitempty"` - Error string `json:"error,omitempty"` - Msg string `json:"msg,omitempty"` -} - -type apiFuncResult struct { - data interface{} - err coderr.CodeError - errMsg string -} - -func okResult(data interface{}) apiFuncResult { - return apiFuncResult{ - data: data, - err: nil, - errMsg: "", - } -} - -func errResult(err coderr.CodeError, errMsg string) apiFuncResult { - return apiFuncResult{ - data: nil, - err: err, - errMsg: errMsg, - } -} - -type apiFunc func(r *http.Request) apiFuncResult - -type API struct { - clusterManager cluster.Manager - - serverStatus *status.ServerStatus - - forwardClient *ForwardClient - flowLimiter *limiter.FlowLimiter - - etcdAPI EtcdAPI -} - -type DiagnoseShardStatus struct { - NodeName string `json:"nodeName"` - Status string `json:"status"` -} - -type DiagnoseShardResult struct { - // shardID -> nodeName - UnregisteredShards []storage.ShardID `json:"unregisteredShards"` - UnreadyShards map[storage.ShardID]DiagnoseShardStatus `json:"unreadyShards"` -} - -type QueryTableRequest struct { - ClusterName string `json:"clusterName"` - SchemaName string `json:"schemaName"` - Names []string `json:"names"` - IDs []uint64 `json:"ids"` -} - -type GetShardTablesRequest struct { - ClusterName string `json:"clusterName"` - ShardIDs []uint32 `json:"shardIDs"` -} - -type TransferLeaderRequest struct { - ClusterName string `json:"clusterName"` - ShardID uint32 `json:"shardID"` - OldLeaderNodeName string `json:"OldLeaderNodeName"` - NewLeaderNodeName string `json:"newLeaderNodeName"` -} - -type RouteRequest struct { - ClusterName string `json:"clusterName"` - SchemaName string `json:"schemaName"` - Tables []string `json:"table"` -} - -type NodeShardsRequest struct { - ClusterName string `json:"clusterName"` -} - -type DropNodeShardsRequest struct { - ClusterName string `json:"clusterName"` - ShardIDs []uint32 `json:"shardIDs"` -} - -type DropTableRequest struct { - ClusterName string `json:"clusterName"` - SchemaName string `json:"schemaName"` - Table string `json:"table"` -} - -type SplitRequest struct { - ClusterName string `json:"clusterName"` - SchemaName string `json:"schemaName"` - ShardID uint32 `json:"shardID"` - SplitTables []string `json:"splitTables"` - NodeName string `json:"nodeName"` -} - -type CreateClusterRequest struct { - Name string `json:"Name"` - NodeCount uint32 `json:"NodeCount"` - ShardTotal uint32 `json:"ShardTotal"` - EnableSchedule bool `json:"enableSchedule"` - TopologyType string `json:"topologyType"` - ProcedureExecutingBatchSize uint32 `json:"procedureExecutingBatchSize"` -} - -type UpdateClusterRequest struct { - NodeCount uint32 `json:"nodeCount"` - ShardTotal uint32 `json:"shardTotal"` - EnableSchedule bool `json:"enableSchedule"` - TopologyType string `json:"topologyType"` - ProcedureExecutingBatchSize uint32 `json:"procedureExecutingBatchSize"` -} - -type UpdateFlowLimiterRequest struct { - Enable bool `json:"enable"` - Limit int `json:"limit"` - Burst int `json:"burst"` -} - -type UpdateEnableScheduleRequest struct { - Enable bool `json:"enable"` -} - -type RemoveShardAffinitiesRequest struct { - ShardIDs []storage.ShardID `json:"shardIDs"` -} diff --git a/horaemeta/server/service/util.go b/horaemeta/server/service/util.go deleted file mode 100644 index 60a3430f0e..0000000000 --- a/horaemeta/server/service/util.go +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package service - -import ( - "context" - "net/url" - "strings" - - "github.com/apache/incubator-horaedb-meta/pkg/coderr" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -var ( - ErrParseURL = coderr.NewCodeError(coderr.Internal, "parse url") - ErrGRPCDial = coderr.NewCodeError(coderr.Internal, "grpc dial") -) - -// GetClientConn returns a gRPC client connection. -func GetClientConn(ctx context.Context, addr string) (*grpc.ClientConn, error) { - opt := grpc.WithTransportCredentials(insecure.NewCredentials()) - - host := addr - if strings.HasPrefix(addr, "http") { - u, err := url.Parse(addr) - if err != nil { - return nil, ErrParseURL.WithCause(err) - } - host = u.Host - } - - cc, err := grpc.DialContext(ctx, host, opt) - if err != nil { - return nil, ErrGRPCDial.WithCause(err) - } - return cc, nil -} diff --git a/horaemeta/server/status/status.go b/horaemeta/server/status/status.go deleted file mode 100644 index 8bd989d50d..0000000000 --- a/horaemeta/server/status/status.go +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package status - -import "sync/atomic" - -type Status int32 - -const ( - StatusWaiting Status = iota - StatusRunning - Terminated -) - -type ServerStatus struct { - status Status -} - -func NewServerStatus() *ServerStatus { - return &ServerStatus{ - status: StatusWaiting, - } -} - -func (s *ServerStatus) Set(status Status) { - atomic.StoreInt32((*int32)(&s.status), int32(status)) -} - -func (s *ServerStatus) Get() Status { - return Status(atomic.LoadInt32((*int32)(&s.status))) -} - -func (s *ServerStatus) IsHealthy() bool { - return s.Get() == StatusRunning -} diff --git a/horaemeta/server/storage/error.go b/horaemeta/server/storage/error.go deleted file mode 100644 index a70a3290cf..0000000000 --- a/horaemeta/server/storage/error.go +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package storage - -import "github.com/apache/incubator-horaedb-meta/pkg/coderr" - -var ( - ErrEncode = coderr.NewCodeError(coderr.Internal, "storage encode") - ErrDecode = coderr.NewCodeError(coderr.Internal, "storage decode") - - ErrCreateSchemaAgain = coderr.NewCodeError(coderr.Internal, "storage create schemas") - ErrCreateClusterAgain = coderr.NewCodeError(coderr.Internal, "storage create cluster") - ErrUpdateCluster = coderr.NewCodeError(coderr.Internal, "storage update cluster") - ErrCreateClusterViewAgain = coderr.NewCodeError(coderr.Internal, "storage create cluster view") - ErrUpdateClusterViewConflict = coderr.NewCodeError(coderr.Internal, "storage update cluster view") - ErrCreateTableAgain = coderr.NewCodeError(coderr.Internal, "storage create tables") - ErrDeleteTableAgain = coderr.NewCodeError(coderr.Internal, "storage delete table") - ErrCreateShardViewAgain = coderr.NewCodeError(coderr.Internal, "storage create shard view") - ErrUpdateShardViewConflict = coderr.NewCodeError(coderr.Internal, "storage update shard view") -) diff --git a/horaemeta/server/storage/key_path.go b/horaemeta/server/storage/key_path.go deleted file mode 100644 index 4627bc0b89..0000000000 --- a/horaemeta/server/storage/key_path.go +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package storage - -import ( - "fmt" - "path" - "strings" -) - -const ( - version = "v1" - cluster = "cluster" - schema = "schema" - table = "table" - tableNameToID = "table_name_to_id" - node = "node" - clusterView = "cluster_view" - shardView = "shard_view" - latestVersion = "latest_version" - info = "info" - tableAssign = "table_assign" -) - -// makeSchemaKey returns the key path to the schema meta info. -func makeSchemaKey(rootPath string, clusterID uint32, schemaID uint32) string { - // Example: - // v1/cluster/1/schema/info/1 -> pb.Schema - // v1/cluster/1/schema/info/2 -> pb.Schema - // v1/cluster/1/schema/info/3 -> pb.Schema - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), schema, info, fmtID(uint64(schemaID))) -} - -// makeClusterKey returns the cluster meta info key path. -func makeClusterKey(rootPath string, clusterID uint32) string { - // Example: - // v1/cluster/info/1 -> pb.Cluster - // v1/cluster/info/2 -> pb.Cluster - // v1/cluster/info/3 -> pb.Cluster - return path.Join(rootPath, version, cluster, info, fmtID(uint64(clusterID))) -} - -// makeClusterViewLatestVersionKey returns the latest version info key path of cluster clusterView. -func makeClusterViewLatestVersionKey(rootPath string, clusterID uint32) string { - // Example: - // v1/cluster/1/clusterView/latest_version -> pb.ClusterTopologyLatestVersion - // v1/cluster/2/clusterView/latest_version -> pb.ClusterTopologyLatestVersion - // v1/cluster/3/clusterView/latest_version -> pb.ClusterTopologyLatestVersion - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), clusterView, latestVersion) -} - -// makeClusterViewKey returns the cluster view meta info key path. -func makeClusterViewKey(rootPath string, clusterID uint32, latestVersion string) string { - // Example: - // v1/cluster/1/clusterView/1 -> pb.ClusterTopology - // v1/cluster/1/clusterView/2 -> pb.ClusterTopology - // v1/cluster/1/clusterView/3 -> pb.ClusterTopology - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), clusterView, latestVersion) -} - -func makeShardViewVersionKey(rootPath string, clusterID uint32) string { - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), shardView) -} - -// makeShardViewLatestVersionKey returns the latest version info key path of shard. -func makeShardViewLatestVersionKey(rootPath string, clusterID uint32, shardID uint32) string { - // Example: - // v1/cluster/1/shard_view/1/latest_version -> pb.ShardLatestVersion - // v1/cluster/1/shard_view/2/latest_version -> pb.ShardLatestVersion - // v1/cluster/1/shard_view/3/latest_version -> pb.ShardLatestVersion - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), shardView, fmtID(uint64(shardID)), latestVersion) -} - -func decodeShardViewVersionKey(key string) (string, error) { - sequences := strings.Split(key, "/") - shardID := sequences[len(sequences)-2] - return shardID, nil -} - -// makeShardViewKey returns the shard meta info key path. -func makeShardViewKey(rootPath string, clusterID uint32, shardID uint32, latestVersion string) string { - // Example: - // v1/cluster/1/shard_view/1/1 -> pb.ShardTopology - // v1/cluster/1/shard_view/2/1 -> pb.ShardTopology - // v1/cluster/1/shard_view/3/1 -> pb.ShardTopology - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), shardView, fmtID(uint64(shardID)), latestVersion) -} - -// makeNodeKey returns the node meta info key path. -func makeNodeKey(rootPath string, clusterID uint32, nodeName string) string { - // Example: - // v1/cluster/1/node/127.0.0.1:8081 -> pb.NodeName - // v1/cluster/1/node/127.0.0.2:8081 -> pb.NodeName - // v1/cluster/1/node/127.0.0.3:8081 -> pb.NodeName - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), node, nodeName) -} - -// makeTableKey returns the table meta info key path. -func makeTableKey(rootPath string, clusterID uint32, schemaID uint32, tableID uint64) string { - // Example: - // v1/cluster/1/schema/1/table/1 -> pb.Table - // v1/cluster/1/schema/1/table/2 -> pb.Table - // v1/cluster/1/schema/1/table/3 -> pb.Table - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), schema, fmtID(uint64(schemaID)), table, fmtID(tableID)) -} - -// makeNameToIDKey return the table id key path. -func makeNameToIDKey(rootPath string, clusterID uint32, schemaID uint32, tableName string) string { - // Example: - // v1/cluster/1/schema/1/table_name_to_id/table1 -> 1 - // v1/cluster/1/schema/1/table_name_to_id/table2 -> 2 - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), schema, fmtID(uint64(schemaID)), tableNameToID, tableName) -} - -// makeTableAssignKey return the tableAssign key path. -func makeTableAssignKey(rootPath string, clusterID uint32, schemaID uint32, tableName string) string { - // Example: - // v1/cluster/1/schema/1/table_assign/tableName1 -> shardID1 - // v1/cluster/1/schema/1/table_assign/tableName2 -> shardID2 - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), schema, fmtID(uint64(schemaID)), tableAssign, tableName) -} - -// makeTableAssignPrefixKey return the tableAssign prefix key path. -func makeTableAssignPrefixKey(rootPath string, clusterID uint32, schemaID uint32) string { - return path.Join(rootPath, version, cluster, fmtID(uint64(clusterID)), schema, fmtID(uint64(schemaID)), tableAssign) -} - -func fmtID(id uint64) string { - return fmt.Sprintf("%020d", id) -} diff --git a/horaemeta/server/storage/meta.go b/horaemeta/server/storage/meta.go deleted file mode 100644 index b56da68256..0000000000 --- a/horaemeta/server/storage/meta.go +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package storage - -import ( - "context" - - clientv3 "go.etcd.io/etcd/client/v3" -) - -// Storage defines the storage operations on the HoraeDB cluster meta info. -type Storage interface { - // GetCluster get cluster metadata by clusterID. - GetCluster(ctx context.Context, clusterID ClusterID) (Cluster, error) - // ListClusters list all clusters. - ListClusters(ctx context.Context) (ListClustersResult, error) - // CreateCluster create new cluster, return error if cluster already exists. - CreateCluster(ctx context.Context, req CreateClusterRequest) error - // UpdateCluster update cluster metadata. - UpdateCluster(ctx context.Context, req UpdateClusterRequest) error - - // CreateClusterView create cluster view. - CreateClusterView(ctx context.Context, req CreateClusterViewRequest) error - // GetClusterView get cluster view by cluster id. - GetClusterView(ctx context.Context, req GetClusterViewRequest) (GetClusterViewResult, error) - // UpdateClusterView update cluster view. - UpdateClusterView(ctx context.Context, req UpdateClusterViewRequest) error - - // ListSchemas list all schemas in specified cluster. - ListSchemas(ctx context.Context, req ListSchemasRequest) (ListSchemasResult, error) - // CreateSchema create schema in specified cluster. - CreateSchema(ctx context.Context, req CreateSchemaRequest) error - - // CreateTable create new table in specified cluster and schema, return error if table already exists. - CreateTable(ctx context.Context, req CreateTableRequest) error - // GetTable get table by table name in specified cluster and schema. - GetTable(ctx context.Context, req GetTableRequest) (GetTableResult, error) - // ListTables list all tables in specified cluster and schema. - ListTables(ctx context.Context, req ListTableRequest) (ListTablesResult, error) - // DeleteTable delete table by table name in specified cluster and schema. - DeleteTable(ctx context.Context, req DeleteTableRequest) error - - // AssignTableToShard save table assign result. - AssignTableToShard(ctx context.Context, req AssignTableToShardRequest) error - // DeleteTableAssignedShard delete table assign result. - DeleteTableAssignedShard(ctx context.Context, req DeleteTableAssignedRequest) error - // ListTableAssignedShard list table assign result. - ListTableAssignedShard(ctx context.Context, req ListAssignTableRequest) (ListTableAssignedShardResult, error) - - // CreateShardViews create shard views in specified cluster. - CreateShardViews(ctx context.Context, req CreateShardViewsRequest) error - // ListShardViews list all shard views in specified cluster. - ListShardViews(ctx context.Context, req ListShardViewsRequest) (ListShardViewsResult, error) - // UpdateShardView update shard views in specified cluster. - UpdateShardView(ctx context.Context, req UpdateShardViewRequest) error - - // ListNodes list all nodes in specified cluster. - ListNodes(ctx context.Context, req ListNodesRequest) (ListNodesResult, error) - // CreateOrUpdateNode create or update node in specified cluster. - CreateOrUpdateNode(ctx context.Context, req CreateOrUpdateNodeRequest) error -} - -// NewStorageWithEtcdBackend creates a new storage with etcd backend. -func NewStorageWithEtcdBackend(client *clientv3.Client, rootPath string, opts Options) Storage { - return newEtcdStorage(client, rootPath, opts) -} diff --git a/horaemeta/server/storage/storage_impl.go b/horaemeta/server/storage/storage_impl.go deleted file mode 100644 index e6b130e88c..0000000000 --- a/horaemeta/server/storage/storage_impl.go +++ /dev/null @@ -1,661 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package storage - -import ( - "context" - "math" - "strconv" - "strings" - - "github.com/apache/incubator-horaedb-meta/pkg/log" - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/apache/incubator-horaedb-proto/golang/pkg/clusterpb" - "github.com/pkg/errors" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/client/v3/clientv3util" - "go.uber.org/zap" - "google.golang.org/protobuf/proto" -) - -type Options struct { - - // MaxScanLimit is the max limit of the number of keys in a scan. - MaxScanLimit int - // MinScanLimit is the min limit of the number of keys in a scan. - MinScanLimit int - // MaxOpsPerTxn is th max number of the operations allowed in a txn. - MaxOpsPerTxn int -} - -// metaStorageImpl is the base underlying storage endpoint for all other upper -// specific storage backends. It should define some common storage interfaces and operations, -// which provIDes the default implementations for all kinds of storages. -type metaStorageImpl struct { - client *clientv3.Client - - opts Options - - rootPath string -} - -// newEtcdBackend is used to create a new etcd backend. -func newEtcdStorage(client *clientv3.Client, rootPath string, opts Options) Storage { - return &metaStorageImpl{client, opts, rootPath} -} - -func (s *metaStorageImpl) GetCluster(ctx context.Context, clusterID ClusterID) (Cluster, error) { - clusterKey := makeClusterKey(s.rootPath, uint32(clusterID)) - - var cluster Cluster - value, err := etcdutil.Get(ctx, s.client, clusterKey) - if err != nil { - return cluster, errors.WithMessagef(err, "get cluster, clusterID:%d, key:%s", clusterID, clusterKey) - } - - clusterProto := &clusterpb.Cluster{} - if err = proto.Unmarshal([]byte(value), clusterProto); err != nil { - return cluster, ErrDecode.WithCausef("decode cluster view, clusterID:%d, err:%v", clusterID, err) - } - - cluster = convertClusterPB(clusterProto) - return cluster, nil -} - -func (s *metaStorageImpl) ListClusters(ctx context.Context) (ListClustersResult, error) { - startKey := makeClusterKey(s.rootPath, 0) - endKey := makeClusterKey(s.rootPath, math.MaxUint32) - rangeLimit := s.opts.MaxScanLimit - - var clusters []Cluster - do := func(key string, value []byte) error { - cluster := &clusterpb.Cluster{} - if err := proto.Unmarshal(value, cluster); err != nil { - return ErrDecode.WithCausef("decode cluster, key:%s, value:%v, err:%v", key, value, err) - } - - clusters = append(clusters, convertClusterPB(cluster)) - return nil - } - - err := etcdutil.Scan(ctx, s.client, startKey, endKey, rangeLimit, do) - if err != nil { - return ListClustersResult{}, errors.WithMessagef(err, "etcd scan clusters, start key:%s, end key:%s, range limit:%d", startKey, endKey, rangeLimit) - } - - return ListClustersResult{ - Clusters: clusters, - }, nil -} - -// CreateCluster return error if the cluster already exists. -func (s *metaStorageImpl) CreateCluster(ctx context.Context, req CreateClusterRequest) error { - c := convertClusterToPB(req.Cluster) - value, err := proto.Marshal(&c) - if err != nil { - return ErrEncode.WithCausef("encode cluster,clusterID:%d, err:%v", req.Cluster.ID, err) - } - - key := makeClusterKey(s.rootPath, c.Id) - - // Check if the key exists, if not,create cluster; Otherwise, the cluster already exists and return an error. - keyMissing := clientv3util.KeyMissing(key) - opCreateCluster := clientv3.OpPut(key, string(value)) - - resp, err := s.client.Txn(ctx). - If(keyMissing). - Then(opCreateCluster). - Commit() - if err != nil { - return errors.WithMessagef(err, "create cluster, clusterID:%d, key:%s", req.Cluster.ID, key) - } - if !resp.Succeeded { - return ErrCreateClusterAgain.WithCausef("cluster may already exist, clusterID:%d, key:%s, resp:%v", req.Cluster.ID, key, resp) - } - return nil -} - -// UpdateCluster return an error if the cluster does not exist. -func (s *metaStorageImpl) UpdateCluster(ctx context.Context, req UpdateClusterRequest) error { - c := convertClusterToPB(req.Cluster) - value, err := proto.Marshal(&c) - if err != nil { - return ErrEncode.WithCausef("encode cluster,clusterID:%d, err:%v", req.Cluster.ID, err) - } - - key := makeClusterKey(s.rootPath, c.Id) - - keyExists := clientv3util.KeyExists(key) - opUpdateCluster := clientv3.OpPut(key, string(value)) - - resp, err := s.client.Txn(ctx). - If(keyExists). - Then(opUpdateCluster). - Commit() - if err != nil { - return errors.WithMessagef(err, "update cluster, clusterID:%d, key:%s", req.Cluster.ID, key) - } - if !resp.Succeeded { - return ErrUpdateCluster.WithCausef("update cluster failed, clusterID:%d, key:%s, resp:%v", req.Cluster.ID, key, resp) - } - return nil -} - -// CreateClusterView return error if the cluster view already exists. -func (s *metaStorageImpl) CreateClusterView(ctx context.Context, req CreateClusterViewRequest) error { - clusterViewPB := convertClusterViewToPB(req.ClusterView) - value, err := proto.Marshal(&clusterViewPB) - if err != nil { - return ErrEncode.WithCausef("encode cluster view, clusterID:%d, err:%v", clusterViewPB.ClusterId, err) - } - - key := makeClusterViewKey(s.rootPath, clusterViewPB.ClusterId, fmtID(clusterViewPB.Version)) - latestVersionKey := makeClusterViewLatestVersionKey(s.rootPath, clusterViewPB.ClusterId) - - // Check if the key and latest version key exists, if not,create cluster view and latest version; Otherwise, the cluster view already exists and return an error. - latestVersionKeyMissing := clientv3util.KeyMissing(latestVersionKey) - keyMissing := clientv3util.KeyMissing(key) - opCreateClusterTopology := clientv3.OpPut(key, string(value)) - opCreateClusterTopologyLatestVersion := clientv3.OpPut(latestVersionKey, fmtID(clusterViewPB.Version)) - - resp, err := s.client.Txn(ctx). - If(latestVersionKeyMissing, keyMissing). - Then(opCreateClusterTopology, opCreateClusterTopologyLatestVersion). - Commit() - if err != nil { - return errors.WithMessagef(err, "create cluster view, clusterID:%d, key:%s", clusterViewPB.ClusterId, key) - } - if !resp.Succeeded { - return ErrCreateClusterViewAgain.WithCausef("cluster view may already exist, clusterID:%d, key:%s, resp:%v", clusterViewPB.ClusterId, key, resp) - } - return nil -} - -func (s *metaStorageImpl) GetClusterView(ctx context.Context, req GetClusterViewRequest) (GetClusterViewResult, error) { - var viewRes GetClusterViewResult - key := makeClusterViewLatestVersionKey(s.rootPath, uint32(req.ClusterID)) - version, err := etcdutil.Get(ctx, s.client, key) - if err != nil { - return viewRes, errors.WithMessagef(err, "get cluster view latest version, clusterID:%d, key:%s", req.ClusterID, key) - } - - key = makeClusterViewKey(s.rootPath, uint32(req.ClusterID), version) - value, err := etcdutil.Get(ctx, s.client, key) - if err != nil { - return viewRes, errors.WithMessagef(err, "get cluster view, clusterID:%d, key:%s", req.ClusterID, key) - } - - clusterView := &clusterpb.ClusterView{} - if err = proto.Unmarshal([]byte(value), clusterView); err != nil { - return viewRes, ErrDecode.WithCausef("decode cluster view, clusterID:%d, err:%v", req.ClusterID, err) - } - - viewRes = GetClusterViewResult{ - ClusterView: convertClusterViewPB(clusterView), - } - return viewRes, nil -} - -func (s *metaStorageImpl) UpdateClusterView(ctx context.Context, req UpdateClusterViewRequest) error { - clusterViewPB := convertClusterViewToPB(req.ClusterView) - - value, err := proto.Marshal(&clusterViewPB) - if err != nil { - return ErrEncode.WithCausef("encode cluster view, clusterID:%d, err:%v", req.ClusterID, err) - } - - key := makeClusterViewKey(s.rootPath, uint32(req.ClusterID), fmtID(clusterViewPB.Version)) - latestVersionKey := makeClusterViewLatestVersionKey(s.rootPath, uint32(req.ClusterID)) - - // Check whether the latest version is equal to that in etcd. If it is equal,update cluster view and latest version; Otherwise, return an error. - latestVersionEquals := clientv3.Compare(clientv3.Value(latestVersionKey), "=", fmtID(req.LatestVersion)) - opPutClusterTopology := clientv3.OpPut(key, string(value)) - opPutLatestVersion := clientv3.OpPut(latestVersionKey, fmtID(clusterViewPB.Version)) - - resp, err := s.client.Txn(ctx). - If(latestVersionEquals). - Then(opPutClusterTopology, opPutLatestVersion). - Commit() - if err != nil { - return errors.WithMessagef(err, "put cluster view, clusterID:%d, key:%s", req.ClusterID, key) - } - if !resp.Succeeded { - return ErrUpdateClusterViewConflict.WithCausef("cluster view may have been modified, clusterID:%d, key:%s, resp:%v", req.ClusterID, key, resp) - } - - return nil -} - -func (s *metaStorageImpl) ListSchemas(ctx context.Context, req ListSchemasRequest) (ListSchemasResult, error) { - startKey := makeSchemaKey(s.rootPath, uint32(req.ClusterID), 0) - endKey := makeSchemaKey(s.rootPath, uint32(req.ClusterID), math.MaxUint32) - rangeLimit := s.opts.MaxScanLimit - - var schemas []Schema - do := func(key string, value []byte) error { - schema := &clusterpb.Schema{} - if err := proto.Unmarshal(value, schema); err != nil { - return ErrDecode.WithCausef("decode schema, key:%s, value:%v, clusterID:%d, err:%v", key, value, req.ClusterID, err) - } - - schemas = append(schemas, convertSchemaPB(schema)) - return nil - } - - err := etcdutil.Scan(ctx, s.client, startKey, endKey, rangeLimit, do) - if err != nil { - return ListSchemasResult{}, errors.WithMessagef(err, "scan schemas, clusterID:%d, start key:%s, end key:%s, range limit:%d", req.ClusterID, startKey, endKey, rangeLimit) - } - - return ListSchemasResult{Schemas: schemas}, nil -} - -// CreateSchema return error if the schema already exists. -func (s *metaStorageImpl) CreateSchema(ctx context.Context, req CreateSchemaRequest) error { - schema := convertSchemaToPB(req.Schema) - value, err := proto.Marshal(&schema) - if err != nil { - return ErrDecode.WithCausef("encode schema, clusterID:%d, schemaID:%d, err:%v", req.ClusterID, schema.Id, err) - } - - key := makeSchemaKey(s.rootPath, uint32(req.ClusterID), schema.Id) - - // Check if the key exists, if not,create schema; Otherwise, the schema already exists and return an error. - keyMissing := clientv3util.KeyMissing(key) - opCreateSchema := clientv3.OpPut(key, string(value)) - - resp, err := s.client.Txn(ctx). - If(keyMissing). - Then(opCreateSchema). - Commit() - if err != nil { - return errors.WithMessagef(err, "create schema, clusterID:%d, schemaID:%d, key:%s", req.ClusterID, schema.Id, key) - } - if !resp.Succeeded { - return ErrCreateSchemaAgain.WithCausef("schema may already exist, clusterID:%d, schemaID:%d, key:%s, resp:%v", req.ClusterID, schema.Id, key, resp) - } - return nil -} - -// CreateTable return error if the table already exists. -func (s *metaStorageImpl) CreateTable(ctx context.Context, req CreateTableRequest) error { - table := convertTableToPB(req.Table) - value, err := proto.Marshal(&table) - if err != nil { - return ErrEncode.WithCausef("encode table, clusterID:%d, schemaID:%d, tableID:%d, err:%v", req.ClusterID, req.Table.ID, table.Id, err) - } - - key := makeTableKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), table.Id) - nameToIDKey := makeNameToIDKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), table.Name) - - // Check if the key and the name to id key exists, if not,create table; Otherwise, the table already exists and return an error. - idKeyMissing := clientv3util.KeyMissing(key) - nameKeyMissing := clientv3util.KeyMissing(nameToIDKey) - opCreateTable := clientv3.OpPut(key, string(value)) - opCreateNameToID := clientv3.OpPut(nameToIDKey, fmtID(table.Id)) - - resp, err := s.client.Txn(ctx). - If(nameKeyMissing, idKeyMissing). - Then(opCreateTable, opCreateNameToID). - Commit() - if err != nil { - return errors.WithMessagef(err, "create table, clusterID:%d, schemaID:%d, tableID:%d, key:%s", req.ClusterID, req.SchemaID, table.Id, key) - } - if !resp.Succeeded { - return ErrCreateTableAgain.WithCausef("table may already exist, clusterID:%d, schemaID:%d, tableID:%d, key:%s, resp:%v", req.ClusterID, req.SchemaID, table.Id, key, resp) - } - return nil -} - -func (s *metaStorageImpl) GetTable(ctx context.Context, req GetTableRequest) (GetTableResult, error) { - var res GetTableResult - value, err := etcdutil.Get(ctx, s.client, makeNameToIDKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), req.TableName)) - if err == etcdutil.ErrEtcdKVGetNotFound { - res.Exists = false - return res, nil - } - if err != nil { - return res, errors.WithMessagef(err, "get table id, clusterID:%d, schemaID:%d, table name:%s", req.ClusterID, req.SchemaID, req.TableName) - } - - tableID, err := strconv.ParseUint(value, 10, 64) - if err != nil { - return res, errors.WithMessagef(err, "string to int failed") - } - - key := makeTableKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), tableID) - value, err = etcdutil.Get(ctx, s.client, key) - if err != nil { - return res, errors.WithMessagef(err, "get table, clusterID:%d, schemaID:%d, tableID:%d, key:%s", req.ClusterID, req.SchemaID, tableID, key) - } - - table := &clusterpb.Table{} - if err = proto.Unmarshal([]byte(value), table); err != nil { - return res, ErrDecode.WithCausef("decode table, clusterID:%d, schemaID:%d, tableID:%d, err:%v", req.ClusterID, req.SchemaID, tableID, err) - } - - res = GetTableResult{ - Table: convertTablePB(table), - Exists: true, - } - return res, nil -} - -func (s *metaStorageImpl) ListTables(ctx context.Context, req ListTableRequest) (ListTablesResult, error) { - startKey := makeTableKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), 0) - endKey := makeTableKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), math.MaxUint64) - rangeLimit := s.opts.MaxScanLimit - - var tables []Table - do := func(key string, value []byte) error { - tablePB := &clusterpb.Table{} - if err := proto.Unmarshal(value, tablePB); err != nil { - return ErrDecode.WithCausef("decode table, key:%s, value:%v, clusterID:%d, schemaID:%d, err:%v", key, value, req.ClusterID, req.SchemaID, err) - } - table := convertTablePB(tablePB) - tables = append(tables, table) - return nil - } - err := etcdutil.Scan(ctx, s.client, startKey, endKey, rangeLimit, do) - if err != nil { - return ListTablesResult{}, errors.WithMessagef(err, "scan tables, clusterID:%d, schemaID:%d, start key:%s, end key:%s, range limit:%d", req.ClusterID, req.SchemaID, startKey, endKey, rangeLimit) - } - - return ListTablesResult{ - Tables: tables, - }, nil -} - -func (s *metaStorageImpl) DeleteTable(ctx context.Context, req DeleteTableRequest) error { - nameKey := makeNameToIDKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), req.TableName) - - value, err := etcdutil.Get(ctx, s.client, nameKey) - if err != nil { - return errors.WithMessagef(err, "get table id, clusterID:%d, schemaID:%d, table name:%s", req.ClusterID, req.SchemaID, req.TableName) - } - - tableID, err := strconv.ParseUint(value, 10, 64) - if err != nil { - return errors.WithMessagef(err, "string to int failed") - } - - key := makeTableKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), tableID) - - nameKeyExists := clientv3util.KeyExists(nameKey) - idKeyExists := clientv3util.KeyExists(key) - - opDeleteNameToID := clientv3.OpDelete(nameKey) - opDeleteTable := clientv3.OpDelete(key) - - resp, err := s.client.Txn(ctx). - If(nameKeyExists, idKeyExists). - Then(opDeleteNameToID, opDeleteTable). - Commit() - if err != nil { - return errors.WithMessagef(err, "delete table, clusterID:%d, schemaID:%d, tableID:%d, tableName:%s", req.ClusterID, req.SchemaID, tableID, req.TableName) - } - if !resp.Succeeded { - return ErrDeleteTableAgain.WithCausef("table may have been deleted, clusterID:%d, schemaID:%d, tableID:%d, tableName:%s", req.ClusterID, req.SchemaID, tableID, req.TableName) - } - - return nil -} - -func (s *metaStorageImpl) AssignTableToShard(ctx context.Context, req AssignTableToShardRequest) error { - key := makeTableAssignKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), req.TableName) - - // Check if the key exists, if not,save table assign result; Otherwise, the table assign result already exists and return an error. - keyMissing := clientv3util.KeyMissing(key) - opCreateAssignTable := clientv3.OpPut(key, strconv.Itoa(int(req.ShardID))) - - resp, err := s.client.Txn(ctx). - If(keyMissing). - Then(opCreateAssignTable). - Commit() - if err != nil { - return errors.WithMessagef(err, "create assign table, clusterID:%d, schemaID:%d, key:%s", req.ClusterID, req.ShardID, key) - } - if !resp.Succeeded { - return ErrCreateSchemaAgain.WithCausef("assign table may already exist, clusterID:%d, schemaID:%d, key:%s, resp:%v", req.ClusterID, req.SchemaID, key, resp) - } - - return nil -} - -func (s *metaStorageImpl) DeleteTableAssignedShard(ctx context.Context, req DeleteTableAssignedRequest) error { - key := makeTableAssignKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID), req.TableName) - - keyExists := clientv3util.KeyExists(key) - opDeleteAssignTable := clientv3.OpDelete(key) - - resp, err := s.client.Txn(ctx). - If(keyExists). - Then(opDeleteAssignTable). - Commit() - if err != nil { - return errors.WithMessagef(err, "delete assign table, clusterID:%d, schemaID:%d, tableName:%s", req.ClusterID, req.SchemaID, req.TableName) - } - if !resp.Succeeded { - return ErrDeleteTableAgain.WithCausef("assign table may have been deleted, clusterID:%d, schemaID:%d, tableName:%s", req.ClusterID, req.SchemaID, req.TableName) - } - - return nil -} - -func (s *metaStorageImpl) ListTableAssignedShard(ctx context.Context, req ListAssignTableRequest) (ListTableAssignedShardResult, error) { - key := makeTableAssignPrefixKey(s.rootPath, uint32(req.ClusterID), uint32(req.SchemaID)) - rangeLimit := s.opts.MaxScanLimit - - var tableAssigns []TableAssign - do := func(key string, value []byte) error { - tableName := etcdutil.GetLastPathSegment(key) - shardIDStr := string(value) - shardID, err := strconv.ParseUint(shardIDStr, 10, 32) - if err != nil { - return err - } - tableAssigns = append(tableAssigns, TableAssign{ - TableName: tableName, - ShardID: ShardID(shardID), - }) - return nil - } - - if err := etcdutil.ScanWithPrefix(ctx, s.client, key, do); err != nil { - return ListTableAssignedShardResult{}, errors.WithMessagef(err, "scan tables, clusterID:%d, schemaID:%d, prefix key:%s, range limit:%d", req.ClusterID, req.SchemaID, key, rangeLimit) - } - - return ListTableAssignedShardResult{TableAssigns: tableAssigns}, nil -} - -func (s *metaStorageImpl) createNShardViews(ctx context.Context, clusterID ClusterID, shardViews []ShardView, ifConds []clientv3.Cmp, opCreates []clientv3.Op) error { - for _, shardView := range shardViews { - shardViewPB := convertShardViewToPB(shardView) - value, err := proto.Marshal(&shardViewPB) - if err != nil { - return ErrEncode.WithCausef("encode shard clusterView, clusterID:%d, shardID:%d, err:%v", clusterID, shardView.ShardID, err) - } - - key := makeShardViewKey(s.rootPath, uint32(clusterID), uint32(shardView.ShardID), fmtID(shardView.Version)) - latestVersionKey := makeShardViewLatestVersionKey(s.rootPath, uint32(clusterID), uint32(shardView.ShardID)) - - // Check if the key and latest version key exists, if not,create shard clusterView and latest version; Otherwise, the shard clusterView already exists and return an error. - ifConds = append(ifConds, clientv3util.KeyMissing(key), clientv3util.KeyMissing(latestVersionKey)) - opCreates = append(opCreates, clientv3.OpPut(key, string(value)), clientv3.OpPut(latestVersionKey, fmtID(shardView.Version))) - } - - resp, err := s.client.Txn(ctx). - If(ifConds...). - Then(opCreates...). - Commit() - if err != nil { - return errors.WithMessagef(err, "create shard view, clusterID:%d", clusterID) - } - if !resp.Succeeded { - return ErrCreateShardViewAgain.WithCausef("shard view may already exist, clusterID:%d, resp:%v", clusterID, resp) - } - - return nil -} - -func (s *metaStorageImpl) CreateShardViews(ctx context.Context, req CreateShardViewsRequest) error { - ifConds := make([]clientv3.Cmp, 0, s.opts.MaxOpsPerTxn) - opCreates := make([]clientv3.Op, 0, s.opts.MaxOpsPerTxn) - numShardViews := len(req.ShardViews) - for start := 0; start < numShardViews; start += s.opts.MaxOpsPerTxn { - end := start + s.opts.MaxOpsPerTxn - if end > numShardViews { - end = numShardViews - } - - if err := s.createNShardViews(ctx, req.ClusterID, req.ShardViews[start:end], ifConds, opCreates); err != nil { - return err - } - ifConds = ifConds[:0] - opCreates = opCreates[:0] - } - - return nil -} - -func (s *metaStorageImpl) ListShardViews(ctx context.Context, req ListShardViewsRequest) (ListShardViewsResult, error) { - var listRes ListShardViewsResult - var shardViews []ShardView - prefix := makeShardViewVersionKey(s.rootPath, uint32(req.ClusterID)) - keys, err := etcdutil.List(ctx, s.client, prefix) - if err != nil { - return listRes, errors.WithMessagef(err, "list shard view, clusterID:%d", req.ClusterID) - } - for _, key := range keys { - if strings.HasSuffix(key, latestVersion) { - shardIDKey, err := decodeShardViewVersionKey(key) - if err != nil { - return listRes, errors.WithMessagef(err, "list shard view latest version, clusterID:%d, shardIDKey:%s, key:%s", req.ClusterID, shardIDKey, key) - } - shardID, err := strconv.ParseUint(shardIDKey, 10, 32) - if err != nil { - return listRes, errors.WithMessagef(err, "list shard view latest version, clusterID:%d, shardID:%d, key:%s", req.ClusterID, shardID, key) - } - - version, err := etcdutil.Get(ctx, s.client, key) - if err != nil { - return listRes, errors.WithMessagef(err, "list shard view latest version, clusterID:%d, shardID:%d, key:%s", req.ClusterID, shardID, key) - } - - key = makeShardViewKey(s.rootPath, uint32(req.ClusterID), uint32(shardID), version) - value, err := etcdutil.Get(ctx, s.client, key) - if err != nil { - return listRes, errors.WithMessagef(err, "list shard view, clusterID:%d, shardID:%d, key:%s", req.ClusterID, shardID, key) - } - - shardViewPB := &clusterpb.ShardView{} - if err = proto.Unmarshal([]byte(value), shardViewPB); err != nil { - return listRes, ErrDecode.WithCausef("decode shard view, clusterID:%d, shardID:%d, err:%v", req.ClusterID, shardID, err) - } - shardView := convertShardViewPB(shardViewPB) - shardViews = append(shardViews, shardView) - } - } - - listRes = ListShardViewsResult{ - ShardViews: shardViews, - } - return listRes, nil -} - -func (s *metaStorageImpl) UpdateShardView(ctx context.Context, req UpdateShardViewRequest) error { - shardViewPB := convertShardViewToPB(req.ShardView) - value, err := proto.Marshal(&shardViewPB) - if err != nil { - return ErrEncode.WithCausef("encode shard view, clusterID:%d, shardID:%d, err:%v", req.ClusterID, req.ShardView.ShardID, err) - } - - key := makeShardViewKey(s.rootPath, uint32(req.ClusterID), shardViewPB.ShardId, fmtID(shardViewPB.GetVersion())) - oldTopologyKey := makeShardViewKey(s.rootPath, uint32(req.ClusterID), shardViewPB.ShardId, fmtID(req.PrevVersion)) - latestVersionKey := makeShardViewLatestVersionKey(s.rootPath, uint32(req.ClusterID), shardViewPB.ShardId) - - // Check whether the latest version is equal to that in etcd. If it is equal,update shard clusterView and latest version; Otherwise, return an error. - opPutLatestVersion := clientv3.OpPut(latestVersionKey, fmtID(shardViewPB.Version)) - opPutShardTopology := clientv3.OpPut(key, string(value)) - - resp, err := s.client.Txn(ctx). - Then(opPutLatestVersion, opPutShardTopology). - Commit() - if err != nil { - return errors.WithMessagef(err, "fail to put shard clusterView, clusterID:%d, shardID:%d, key:%s", req.ClusterID, shardViewPB.ShardId, key) - } - if !resp.Succeeded { - return ErrUpdateShardViewConflict.WithCausef("shard view may have been modified, clusterID:%d, shardID:%d, key:%s, resp:%v", req.ClusterID, shardViewPB.ShardId, key, resp) - } - - // Try to remove expired shard view. - if req.PrevVersion != shardViewPB.Version { - opDelShardTopology := clientv3.OpDelete(oldTopologyKey) - if _, err := s.client.Do(ctx, opDelShardTopology); err != nil { - log.Warn("remove expired shard view failed", zap.Error(err), zap.String("oldTopologyKey", oldTopologyKey)) - } - } - - return nil -} - -func (s *metaStorageImpl) ListNodes(ctx context.Context, req ListNodesRequest) (ListNodesResult, error) { - startKey := makeNodeKey(s.rootPath, uint32(req.ClusterID), string([]byte{0})) - endKey := makeNodeKey(s.rootPath, uint32(req.ClusterID), string([]byte{255})) - rangeLimit := s.opts.MaxScanLimit - - var nodes []Node - do := func(key string, value []byte) error { - nodePB := &clusterpb.Node{} - if err := proto.Unmarshal(value, nodePB); err != nil { - return ErrDecode.WithCausef("decode node, key:%s, value:%v, clusterID:%d, err:%v", key, value, req.ClusterID, err) - } - node := convertNodePB(nodePB) - nodes = append(nodes, node) - return nil - } - - err := etcdutil.Scan(ctx, s.client, startKey, endKey, rangeLimit, do) - if err != nil { - return ListNodesResult{}, errors.WithMessagef(err, "scan nodes, clusterID:%d, start key:%s, end key:%s, range limit:%d", req.ClusterID, startKey, endKey, rangeLimit) - } - - return ListNodesResult{ - Nodes: nodes, - }, nil -} - -func (s *metaStorageImpl) CreateOrUpdateNode(ctx context.Context, req CreateOrUpdateNodeRequest) error { - nodePB := convertNodeToPB(req.Node) - - key := makeNodeKey(s.rootPath, uint32(req.ClusterID), req.Node.Name) - - value, err := proto.Marshal(&nodePB) - if err != nil { - return ErrEncode.WithCausef("encode node, clusterID:%d, node name:%s, err:%v", req.ClusterID, req.Node.Name, err) - } - - _, err = s.client.Put(ctx, key, string(value)) - if err != nil { - return errors.WithMessagef(err, "create or update node, clusterID:%d, node name:%s, key:%s", req.ClusterID, req.Node.Name, key) - } - - return nil -} diff --git a/horaemeta/server/storage/storage_test.go b/horaemeta/server/storage/storage_test.go deleted file mode 100644 index 16ed22ed02..0000000000 --- a/horaemeta/server/storage/storage_test.go +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package storage - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/apache/incubator-horaedb-meta/server/etcdutil" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/server/v3/embed" -) - -const ( - defaultRootPath = "/meta" - name0 = "name0" - nameFormat = "name%d" - defaultClusterID = 1 - defaultSchemaID = 1 - defaultVersion = 0 - defaultCount = 10 - defaultRequestTimeout = time.Second * 100 -) - -func TestStorage_CreateAndListCluster(t *testing.T) { - re := require.New(t) - s := newTestStorage(t) - ctx := context.Background() - - // Test to create expectClusters. - expectClusters := make([]Cluster, 0, defaultCount) - for i := 0; i < defaultCount; i++ { - cluster := Cluster{ - ID: ClusterID(i), - Name: fmt.Sprintf(nameFormat, i), - MinNodeCount: uint32(i), - ShardTotal: uint32(i), - TopologyType: TopologyTypeStatic, - ProcedureExecutingBatchSize: 100, - CreatedAt: uint64(time.Now().UnixMilli()), - ModifiedAt: 0, - } - req := CreateClusterRequest{ - Cluster: cluster, - } - - err := s.CreateCluster(ctx, req) - re.NoError(err) - expectClusters = append(expectClusters, cluster) - } - - // Test to list expectClusters. - ret, err := s.ListClusters(ctx) - re.NoError(err) - - clusters := ret.Clusters - for i := 0; i < defaultCount; i++ { - re.Equal(expectClusters[i].ID, clusters[i].ID) - re.Equal(expectClusters[i].Name, clusters[i].Name) - re.Equal(expectClusters[i].MinNodeCount, clusters[i].MinNodeCount) - re.Equal(expectClusters[i].CreatedAt, clusters[i].CreatedAt) - re.Equal(expectClusters[i].ShardTotal, clusters[i].ShardTotal) - } -} - -func TestStorage_CreateAndGetClusterView(t *testing.T) { - re := require.New(t) - s := newTestStorage(t) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - - // Test to create cluster view. - expectClusterView := ClusterView{ - ClusterID: defaultClusterID, - Version: defaultVersion, - State: ClusterStateEmpty, - ShardNodes: nil, - CreatedAt: uint64(time.Now().UnixMilli()), - } - - req := CreateClusterViewRequest{ - ClusterView: expectClusterView, - } - err := s.CreateClusterView(ctx, req) - re.NoError(err) - - // Test to get cluster view. - ret, err := s.GetClusterView(ctx, GetClusterViewRequest{ - ClusterID: defaultClusterID, - }) - re.NoError(err) - re.Equal(expectClusterView.ClusterID, ret.ClusterView.ClusterID) - re.Equal(expectClusterView.Version, ret.ClusterView.Version) - re.Equal(expectClusterView.CreatedAt, ret.ClusterView.CreatedAt) - - // Test to put cluster view. - expectClusterView.Version = uint64(1) - putReq := UpdateClusterViewRequest{ - ClusterID: defaultClusterID, - ClusterView: expectClusterView, - LatestVersion: 0, - } - err = s.UpdateClusterView(ctx, putReq) - re.NoError(err) - - ret, err = s.GetClusterView(ctx, GetClusterViewRequest{ - ClusterID: defaultClusterID, - }) - re.NoError(err) - re.Equal(expectClusterView.ClusterID, ret.ClusterView.ClusterID) - re.Equal(expectClusterView.Version, ret.ClusterView.Version) - re.Equal(expectClusterView.CreatedAt, ret.ClusterView.CreatedAt) -} - -func TestStorage_CreateAndListScheme(t *testing.T) { - re := require.New(t) - s := newTestStorage(t) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - - // Test to create expectSchemas. - expectSchemas := make([]Schema, 0, defaultCount) - for i := 0; i < defaultCount; i++ { - schema := Schema{ - ID: SchemaID(i), - ClusterID: defaultClusterID, - Name: fmt.Sprintf(nameFormat, i), - CreatedAt: uint64(time.Now().UnixMilli()), - } - req := CreateSchemaRequest{ - ClusterID: defaultClusterID, - Schema: schema, - } - err := s.CreateSchema(ctx, req) - re.NoError(err) - expectSchemas = append(expectSchemas, schema) - } - - // Test to list expectSchemas. - ret, err := s.ListSchemas(ctx, ListSchemasRequest{ - ClusterID: defaultClusterID, - }) - re.NoError(err) - for i := 0; i < defaultCount; i++ { - re.Equal(expectSchemas[i].ID, ret.Schemas[i].ID) - re.Equal(expectSchemas[i].ClusterID, ret.Schemas[i].ClusterID) - re.Equal(expectSchemas[i].Name, ret.Schemas[i].Name) - re.Equal(expectSchemas[i].CreatedAt, ret.Schemas[i].CreatedAt) - } -} - -func TestStorage_CreateAndGetAndListTable(t *testing.T) { - re := require.New(t) - s := newTestStorage(t) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout*100) - defer cancel() - - // Test to create tables. - expectTables := make([]Table, 0, defaultCount) - for i := 0; i < defaultCount; i++ { - table := Table{ - ID: TableID(i), - Name: fmt.Sprintf(nameFormat, i), - SchemaID: defaultSchemaID, - CreatedAt: 0, - PartitionInfo: PartitionInfo{Info: nil}, - } - req := CreateTableRequest{ - ClusterID: defaultClusterID, - SchemaID: defaultSchemaID, - Table: table, - } - err := s.CreateTable(ctx, req) - re.NoError(err) - expectTables = append(expectTables, table) - } - - // Test to get table. - tableResult, err := s.GetTable(ctx, GetTableRequest{ - ClusterID: defaultClusterID, - SchemaID: defaultSchemaID, - TableName: name0, - }) - re.NoError(err) - re.True(tableResult.Exists) - re.Equal(expectTables[0].ID, tableResult.Table.ID) - re.Equal(expectTables[0].Name, tableResult.Table.Name) - re.Equal(expectTables[0].SchemaID, tableResult.Table.SchemaID) - re.Equal(expectTables[0].CreatedAt, tableResult.Table.CreatedAt) - - // Test to list tables. - tablesResult, err := s.ListTables(ctx, ListTableRequest{ - ClusterID: defaultClusterID, - SchemaID: defaultSchemaID, - }) - re.NoError(err) - - for i := 0; i < defaultCount; i++ { - re.True(tableResult.Exists) - re.Equal(expectTables[i].ID, tablesResult.Tables[i].ID) - re.Equal(expectTables[i].Name, tablesResult.Tables[i].Name) - re.Equal(expectTables[i].SchemaID, tablesResult.Tables[i].SchemaID) - re.Equal(expectTables[i].CreatedAt, tablesResult.Tables[i].CreatedAt) - } - - // Test to delete table. - err = s.DeleteTable(ctx, DeleteTableRequest{ - ClusterID: defaultClusterID, - SchemaID: defaultSchemaID, - TableName: name0, - }) - re.NoError(err) - - tableResult, err = s.GetTable(ctx, GetTableRequest{ - ClusterID: defaultClusterID, - SchemaID: defaultSchemaID, - TableName: name0, - }) - re.NoError(err) - re.Empty(tableResult.Table) - re.True(!tableResult.Exists) -} - -func TestStorage_CreateAndListShardView(t *testing.T) { - re := require.New(t) - s := newTestStorage(t) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - - // Test to create shard topologies. - expectShardViews := make([]ShardView, 0, defaultCount) - var shardIDs []ShardID - for i := 0; i < defaultCount; i++ { - shardView := ShardView{ - ShardID: ShardID(i), - Version: defaultVersion, - TableIDs: nil, - CreatedAt: uint64(time.Now().UnixMilli()), - } - expectShardViews = append(expectShardViews, shardView) - shardIDs = append(shardIDs, ShardID(i)) - } - err := s.CreateShardViews(ctx, CreateShardViewsRequest{ - ClusterID: defaultClusterID, - ShardViews: expectShardViews, - }) - re.NoError(err) - - // Test to list shard topologies. - ret, err := s.ListShardViews(ctx, ListShardViewsRequest{ - ClusterID: defaultClusterID, - ShardIDs: shardIDs, - }) - re.NoError(err) - for i := 0; i < defaultCount; i++ { - re.Equal(expectShardViews[i].ShardID, ret.ShardViews[i].ShardID) - re.Equal(expectShardViews[i].Version, ret.ShardViews[i].Version) - re.Equal(expectShardViews[i].CreatedAt, ret.ShardViews[i].CreatedAt) - } - - newVersion := uint64(1) - // Test to put shard topologies. - for i := 0; i < defaultCount; i++ { - expectShardViews[i].Version = newVersion - err = s.UpdateShardView(ctx, UpdateShardViewRequest{ - ClusterID: defaultClusterID, - ShardView: expectShardViews[i], - PrevVersion: defaultVersion, - }) - re.NoError(err) - } - - ret, err = s.ListShardViews(ctx, ListShardViewsRequest{ - ClusterID: defaultClusterID, - ShardIDs: shardIDs, - }) - re.NoError(err) - for i := 0; i < defaultCount; i++ { - re.Equal(expectShardViews[i].ShardID, ret.ShardViews[i].ShardID) - re.Equal(expectShardViews[i].Version, ret.ShardViews[i].Version) - re.Equal(expectShardViews[i].CreatedAt, ret.ShardViews[i].CreatedAt) - } -} - -func TestStorage_CreateOrUpdateNode(t *testing.T) { - re := require.New(t) - s := newTestStorage(t) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - - // Test to create nodes. - expectNodes := make([]Node, 0, defaultCount) - for i := 0; i < defaultCount; i++ { - var nodeStats NodeStats - node := Node{ - Name: fmt.Sprintf(nameFormat, i), - NodeStats: nodeStats, - LastTouchTime: uint64(time.Now().UnixMilli()), - State: NodeStateOnline, - } - err := s.CreateOrUpdateNode(ctx, CreateOrUpdateNodeRequest{ - ClusterID: defaultClusterID, - Node: node, - }) - re.NoError(err) - expectNodes = append(expectNodes, node) - } - - // Test to list nodes. - ret, err := s.ListNodes(ctx, ListNodesRequest{ - ClusterID: defaultClusterID, - }) - re.NoError(err) - - re.Equal(len(ret.Nodes), defaultCount) - for i := 0; i < defaultCount; i++ { - re.Equal(ret.Nodes[i].Name, expectNodes[i].Name) - re.Equal(ret.Nodes[i].LastTouchTime, expectNodes[i].LastTouchTime) - } -} - -func newTestStorage(t *testing.T) Storage { - cfg := etcdutil.NewTestSingleConfig() - etcd, err := embed.StartEtcd(cfg) - assert.NoError(t, err) - - <-etcd.Server.ReadyNotify() - - endpoint := cfg.ListenClientUrls[0].String() - client, err := clientv3.New(clientv3.Config{ - Endpoints: []string{endpoint}, - }) - assert.NoError(t, err) - - ops := Options{MaxScanLimit: 100, MinScanLimit: 10, MaxOpsPerTxn: 32} - - return newEtcdStorage(client, defaultRootPath, ops) -} diff --git a/horaemeta/server/storage/types.go b/horaemeta/server/storage/types.go deleted file mode 100644 index 5711283a7c..0000000000 --- a/horaemeta/server/storage/types.go +++ /dev/null @@ -1,605 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package storage - -import ( - "fmt" - "time" - - "github.com/apache/incubator-horaedb-proto/golang/pkg/clusterpb" - "github.com/apache/incubator-horaedb-proto/golang/pkg/metaservicepb" -) - -type ( - ClusterID uint32 - SchemaID uint32 - ShardID uint32 - TableID uint64 - ClusterState int - ShardRole int - ShardStatus int - NodeState int - TopologyType string -) - -const ( - ClusterStateEmpty ClusterState = iota + 1 - ClusterStateStable - ClusterStatePrepare - - TopologyTypeUnknown = "unknown" - TopologyTypeStatic = "static" - TopologyTypeDynamic = "dynamic" -) - -const ( - ShardRoleLeader ShardRole = iota + 1 - ShardRoleFollower -) - -const ( - ShardStatusUnknown ShardStatus = iota - ShardStatusReady - ShardStatusPartialOpen -) - -const ( - NodeStateUnknown NodeState = iota - NodeStateOnline - NodeStateOffline -) - -type ListClustersResult struct { - Clusters []Cluster -} - -type CreateClusterRequest struct { - Cluster Cluster -} - -type UpdateClusterRequest struct { - Cluster Cluster -} - -type CreateClusterViewRequest struct { - ClusterView ClusterView -} - -type GetClusterViewRequest struct { - ClusterID ClusterID -} - -type GetClusterViewResult struct { - ClusterView ClusterView -} - -type UpdateClusterViewRequest struct { - ClusterID ClusterID - ClusterView ClusterView - LatestVersion uint64 -} - -type ListSchemasRequest struct { - ClusterID ClusterID -} - -type ListSchemasResult struct { - Schemas []Schema -} - -type CreateSchemaRequest struct { - ClusterID ClusterID - Schema Schema -} - -type CreateTableRequest struct { - ClusterID ClusterID - SchemaID SchemaID - Table Table -} - -type GetTableRequest struct { - ClusterID ClusterID - SchemaID SchemaID - TableName string -} - -type GetTableResult struct { - Table Table - Exists bool -} - -type ListTableRequest struct { - ClusterID ClusterID - SchemaID SchemaID -} - -type ListTablesResult struct { - Tables []Table -} - -type DeleteTableRequest struct { - ClusterID ClusterID - SchemaID SchemaID - TableName string -} - -type AssignTableToShardRequest struct { - ClusterID ClusterID - SchemaID SchemaID - TableName string - ShardID ShardID -} - -type DeleteTableAssignedRequest struct { - ClusterID ClusterID - SchemaID SchemaID - TableName string -} - -type ListAssignTableRequest struct { - ClusterID ClusterID - SchemaID SchemaID -} - -type ListTableAssignedShardResult struct { - TableAssigns []TableAssign -} - -type CreateShardViewsRequest struct { - ClusterID ClusterID - ShardViews []ShardView -} - -type ListShardViewsRequest struct { - ClusterID ClusterID - ShardIDs []ShardID -} - -type ListShardViewsResult struct { - ShardViews []ShardView -} - -type UpdateShardViewRequest struct { - ClusterID ClusterID - ShardView ShardView - PrevVersion uint64 -} - -type ListNodesRequest struct { - ClusterID ClusterID -} - -type ListNodesResult struct { - Nodes []Node -} - -type CreateOrUpdateNodeRequest struct { - ClusterID ClusterID - Node Node -} - -type Cluster struct { - ID ClusterID - Name string - MinNodeCount uint32 - ShardTotal uint32 - TopologyType TopologyType - ProcedureExecutingBatchSize uint32 - CreatedAt uint64 - ModifiedAt uint64 -} - -type ShardNode struct { - ID ShardID - ShardRole ShardRole - NodeName string -} - -type ClusterView struct { - ClusterID ClusterID - Version uint64 - State ClusterState - ShardNodes []ShardNode - CreatedAt uint64 -} - -func NewClusterView(clusterID ClusterID, version uint64, state ClusterState, shardNodes []ShardNode) ClusterView { - return ClusterView{ - ClusterID: clusterID, - Version: version, - State: state, - ShardNodes: shardNodes, - CreatedAt: uint64(time.Now().UnixMilli()), - } -} - -type Schema struct { - ID SchemaID - ClusterID ClusterID - Name string - CreatedAt uint64 -} - -type PartitionInfo struct { - Info *clusterpb.PartitionInfo `json:"info,omitempty"` -} - -type Table struct { - ID TableID - Name string - SchemaID SchemaID - CreatedAt uint64 - PartitionInfo PartitionInfo -} - -func (t Table) IsPartitioned() bool { - return t.PartitionInfo.Info != nil -} - -type TableAssign struct { - TableName string - ShardID ShardID -} - -type ShardView struct { - ShardID ShardID - Version uint64 - TableIDs []TableID - CreatedAt uint64 -} - -func NewShardView(shardID ShardID, version uint64, tableIDs []TableID) ShardView { - return ShardView{ - ShardID: shardID, - Version: version, - TableIDs: tableIDs, - CreatedAt: uint64(time.Now().UnixMilli()), - } -} - -type NodeStats struct { - Lease uint32 - Zone string - NodeVersion string -} - -func NewEmptyNodeStats() NodeStats { - var stats NodeStats - return stats -} - -type Node struct { - Name string - NodeStats NodeStats - LastTouchTime uint64 - State NodeState -} - -func ConvertShardRolePB(role clusterpb.ShardRole) ShardRole { - switch role { - case clusterpb.ShardRole_LEADER: - return ShardRoleLeader - case clusterpb.ShardRole_FOLLOWER: - return ShardRoleFollower - } - return ShardRoleFollower -} - -func convertNodeToPB(node Node) clusterpb.Node { - nodeStats := convertNodeStatsToPB(node.NodeStats) - return clusterpb.Node{ - Name: node.Name, - Stats: &nodeStats, - LastTouchTime: node.LastTouchTime, - State: convertNodeStateToPB(node.State), - } -} - -func convertClusterPB(cluster *clusterpb.Cluster) Cluster { - return Cluster{ - ID: ClusterID(cluster.Id), - Name: cluster.Name, - MinNodeCount: cluster.MinNodeCount, - ShardTotal: cluster.ShardTotal, - TopologyType: convertTopologyTypePB(cluster.TopologyType), - ProcedureExecutingBatchSize: cluster.ProcedureExecutingBatchSize, - CreatedAt: cluster.CreatedAt, - ModifiedAt: cluster.ModifiedAt, - } -} - -func convertClusterToPB(cluster Cluster) clusterpb.Cluster { - return clusterpb.Cluster{ - Id: uint32(cluster.ID), - Name: cluster.Name, - MinNodeCount: cluster.MinNodeCount, - ShardTotal: cluster.ShardTotal, - // TODO: add EnableSchedule to cluster - EnableSchedule: false, - TopologyType: convertTopologyTypeToPB(cluster.TopologyType), - ProcedureExecutingBatchSize: cluster.ProcedureExecutingBatchSize, - CreatedAt: cluster.CreatedAt, - ModifiedAt: cluster.ModifiedAt, - } -} - -func convertTopologyTypeToPB(topologyType TopologyType) clusterpb.Cluster_TopologyType { - switch topologyType { - case TopologyTypeUnknown: - return clusterpb.Cluster_UNKNOWN - case TopologyTypeStatic: - return clusterpb.Cluster_STATIC - case TopologyTypeDynamic: - return clusterpb.Cluster_DYNAMIC - } - return clusterpb.Cluster_STATIC -} - -func convertTopologyTypePB(topologyType clusterpb.Cluster_TopologyType) TopologyType { - switch topologyType { - case clusterpb.Cluster_UNKNOWN: - return TopologyTypeUnknown - case clusterpb.Cluster_STATIC: - return TopologyTypeStatic - case clusterpb.Cluster_DYNAMIC: - return TopologyTypeDynamic - } - return TopologyTypeStatic -} - -func convertClusterStateToPB(state ClusterState) clusterpb.ClusterView_ClusterState { - switch state { - case ClusterStateEmpty: - return clusterpb.ClusterView_EMPTY - case ClusterStateStable: - return clusterpb.ClusterView_STABLE - case ClusterStatePrepare: - return clusterpb.ClusterView_PREPARE_REBALANCE - } - return clusterpb.ClusterView_EMPTY -} - -func convertClusterStatePB(state clusterpb.ClusterView_ClusterState) ClusterState { - switch state { - case clusterpb.ClusterView_EMPTY, clusterpb.ClusterView_AWAITING_OPEN, clusterpb.ClusterView_AWAITING_CLOSE: - return ClusterStateEmpty - case clusterpb.ClusterView_PREPARE_REBALANCE: - return ClusterStatePrepare - case clusterpb.ClusterView_STABLE: - return ClusterStateStable - } - - panic(fmt.Sprintf("invalid state:%v", state)) -} - -func ConvertShardRoleToPB(role ShardRole) clusterpb.ShardRole { - switch role { - case ShardRoleLeader: - return clusterpb.ShardRole_LEADER - case ShardRoleFollower: - return clusterpb.ShardRole_FOLLOWER - } - return clusterpb.ShardRole_FOLLOWER -} - -func ConvertShardStatusPB(status *metaservicepb.ShardInfo_Status) ShardStatus { - if status == nil { - return ShardStatusReady - } - switch *status { - case metaservicepb.ShardInfo_PartialOpen: - return ShardStatusPartialOpen - case metaservicepb.ShardInfo_Ready: - return ShardStatusReady - } - return ShardStatusReady -} - -func ConvertShardStatusToPB(status ShardStatus) metaservicepb.ShardInfo_Status { - switch status { - case ShardStatusPartialOpen: - return metaservicepb.ShardInfo_PartialOpen - case ShardStatusReady: - return metaservicepb.ShardInfo_Ready - case ShardStatusUnknown: - // FIXME: shall we introduce unknown state to the pb definitions. - return metaservicepb.ShardInfo_Ready - } - return metaservicepb.ShardInfo_Ready -} - -func convertShardNodeToPB(shardNode ShardNode) clusterpb.ShardNode { - return clusterpb.ShardNode{ - Id: uint32(shardNode.ID), - ShardRole: ConvertShardRoleToPB(shardNode.ShardRole), - Node: shardNode.NodeName, - } -} - -func convertShardNodePB(shardNode *clusterpb.ShardNode) ShardNode { - return ShardNode{ - ID: ShardID(shardNode.Id), - ShardRole: ConvertShardRolePB(shardNode.ShardRole), - NodeName: shardNode.Node, - } -} - -func convertClusterViewToPB(view ClusterView) clusterpb.ClusterView { - shardViews := make([]*clusterpb.ShardNode, 0, len(view.ShardNodes)) - for _, shardNode := range view.ShardNodes { - shardNodePB := convertShardNodeToPB(shardNode) - shardViews = append(shardViews, &shardNodePB) - } - - return clusterpb.ClusterView{ - ClusterId: uint32(view.ClusterID), - Version: view.Version, - State: convertClusterStateToPB(view.State), - ShardNodes: shardViews, - Cause: "", - CreatedAt: view.CreatedAt, - } -} - -func convertClusterViewPB(view *clusterpb.ClusterView) ClusterView { - shardNodes := make([]ShardNode, 0, len(view.ShardNodes)) - for _, shardNodePB := range view.ShardNodes { - shardNode := convertShardNodePB(shardNodePB) - shardNodes = append(shardNodes, shardNode) - } - - return ClusterView{ - ClusterID: ClusterID(view.ClusterId), - Version: view.Version, - State: convertClusterStatePB(view.State), - ShardNodes: shardNodes, - CreatedAt: view.CreatedAt, - } -} - -func convertSchemaToPB(schema Schema) clusterpb.Schema { - return clusterpb.Schema{ - Id: uint32(schema.ID), - ClusterId: uint32(schema.ClusterID), - Name: schema.Name, - CreatedAt: schema.CreatedAt, - } -} - -func convertSchemaPB(schema *clusterpb.Schema) Schema { - return Schema{ - ID: SchemaID(schema.Id), - ClusterID: ClusterID(schema.ClusterId), - Name: schema.Name, - CreatedAt: schema.CreatedAt, - } -} - -func convertTableToPB(table Table) clusterpb.Table { - return clusterpb.Table{ - Id: uint64(table.ID), - Name: table.Name, - SchemaId: uint32(table.SchemaID), - Desc: "", - CreatedAt: table.CreatedAt, - PartitionInfo: table.PartitionInfo.Info, - } -} - -func convertTablePB(table *clusterpb.Table) Table { - return Table{ - ID: TableID(table.Id), - Name: table.Name, - SchemaID: SchemaID(table.SchemaId), - CreatedAt: table.CreatedAt, - PartitionInfo: PartitionInfo{ - Info: table.PartitionInfo, - }, - } -} - -func convertShardViewToPB(view ShardView) clusterpb.ShardView { - tableIDs := make([]uint64, 0, len(view.TableIDs)) - for _, id := range view.TableIDs { - tableIDs = append(tableIDs, uint64(id)) - } - - return clusterpb.ShardView{ - ShardId: uint32(view.ShardID), - TableIds: tableIDs, - Version: view.Version, - CreatedAt: view.CreatedAt, - } -} - -func convertShardViewPB(shardTopology *clusterpb.ShardView) ShardView { - tableIDs := make([]TableID, 0, len(shardTopology.TableIds)) - for _, id := range shardTopology.TableIds { - tableIDs = append(tableIDs, TableID(id)) - } - - return ShardView{ - ShardID: ShardID(shardTopology.ShardId), - Version: shardTopology.Version, - TableIDs: tableIDs, - CreatedAt: shardTopology.CreatedAt, - } -} - -func convertNodeStatsToPB(stats NodeStats) clusterpb.NodeStats { - return clusterpb.NodeStats{ - Lease: stats.Lease, - Zone: stats.Zone, - NodeVersion: stats.NodeVersion, - } -} - -func convertNodeStatsPB(stats *clusterpb.NodeStats) NodeStats { - return NodeStats{ - Lease: stats.Lease, - Zone: stats.Zone, - NodeVersion: stats.NodeVersion, - } -} - -func convertNodeStateToPB(state NodeState) clusterpb.NodeState { - switch state { - case NodeStateUnknown: - return clusterpb.NodeState_OFFLINE - case NodeStateOnline: - return clusterpb.NodeState_ONLINE - case NodeStateOffline: - return clusterpb.NodeState_OFFLINE - } - return clusterpb.NodeState_OFFLINE -} - -func convertNodeStatePB(state clusterpb.NodeState) NodeState { - switch state { - case clusterpb.NodeState_ONLINE: - return NodeStateOnline - case clusterpb.NodeState_OFFLINE: - return NodeStateOffline - } - return NodeStateOffline -} - -func convertNodePB(node *clusterpb.Node) Node { - nodeStats := convertNodeStatsPB(node.Stats) - return Node{ - Name: node.Name, - NodeStats: nodeStats, - LastTouchTime: node.LastTouchTime, - State: convertNodeStatePB(node.State), - } -} - -func ConvertShardStatusToString(status ShardStatus) string { - switch status { - case ShardStatusUnknown: - return "unknown" - case ShardStatusReady: - return "ready" - case ShardStatusPartialOpen: - return "partialOpen" - } - return "unknown" -} diff --git a/integration_tests/.gitignore b/integration_tests/.gitignore deleted file mode 100644 index 6ac71c3f80..0000000000 --- a/integration_tests/.gitignore +++ /dev/null @@ -1 +0,0 @@ -horaemeta diff --git a/integration_tests/Cargo.toml b/integration_tests/Cargo.toml deleted file mode 100644 index b3019b65e9..0000000000 --- a/integration_tests/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "horaedb-test" - -[package.license] -workspace = true - -[package.edition] -workspace = true - -[package.version] -workspace = true - -[dependencies] -anyhow = { workspace = true } -async-trait = { workspace = true } -horaedb-client = { workspace = true } -local-ip-address = "0.5" -reqwest = { workspace = true } -serde = { workspace = true } -sqlness = "0.6" -tokio = { workspace = true } -uuid = { workspace = true, features = ["v4"] } diff --git a/integration_tests/Makefile b/integration_tests/Makefile deleted file mode 100644 index 505f8380d5..0000000000 --- a/integration_tests/Makefile +++ /dev/null @@ -1,129 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -MODE ?= debug -ROOT = $(shell pwd) -HORAEDB_DATA_DIR = /tmp/horaedb -HORAEDB_DATA_DIR_0 = /tmp/horaedb0 -HORAEDB_DATA_DIR_1 = /tmp/horaedb1 -HORAEMETA_DATA_DIR = /tmp/horaemeta -HORAEDB_DATA_DIR_2 = /tmp/compaction-offload - -export HORAEDB_TEST_CASE_PATH ?= $(ROOT)/cases/env -export HORAEDB_TEST_BINARY ?= $(ROOT)/../target/$(MODE)/horaedb-test - -# Environment variables for standalone -export HORAEDB_SERVER_GRPC_ENDPOINT ?= 127.0.0.1:8831 -export HORAEDB_SERVER_HTTP_ENDPOINT ?= 127.0.0.1:5440 -export HORAEDB_BINARY_PATH ?= $(ROOT)/../target/$(MODE)/horaedb-server -export HORAEDB_STDOUT_FILE ?= /tmp/horaedb-stdout.log -export HORAEDB_CONFIG_FILE ?= $(ROOT)/../docs/minimal.toml - -# Environment variables for cluster -export HORAEMETA_BINARY_PATH ?= $(ROOT)/../target/horaemeta-server -export HORAEMETA_CONFIG_PATH ?= $(ROOT)/config/horaemeta.toml -export HORAEMETA_STDOUT_FILE ?= /tmp/horaemeta-stdout.log -export HORAEDB_CONFIG_FILE_0 ?= $(ROOT)/config/horaedb-cluster-0.toml -export HORAEDB_CONFIG_FILE_1 ?= $(ROOT)/config/horaedb-cluster-1.toml -export CLUSTER_HORAEDB_STDOUT_FILE_0 ?= /tmp/horaedb-stdout-0.log -export CLUSTER_HORAEDB_STDOUT_FILE_1 ?= /tmp/horaedb-stdout-1.log -export RUST_BACKTRACE=1 - -# Environment variables for compaction offload -export HORAEDB_STDOUT_FILE_2 ?= /tmp/horaedb-stdout-2.log -export HORAEDB_CONFIG_FILE_2 ?= $(ROOT)/config/compaction-offload.toml - -# Whether update related repos -# We don't want to rebuild the binaries and data on sometimes(e.g. debugging in local), -# and we can set it to false. -export UPDATE_REPOS_TO_LATEST ?= true - -clean: - rm -rf $(HORAEDB_DATA_DIR) $(HORAEDB_DATA_DIR_0) $(HORAEDB_DATA_DIR_1) $(HORAEMETA_DATA_DIR) $(HORAEDB_DATA_DIR_2) - -build-meta: - ./build_meta.sh - -build-horaedb: - cd .. && make build-debug - -build-test: - cargo build - -build: build-horaedb build-test - -kill-old-horaemeta: - killall horaemeta-server | true - -kill-old-horaedb: - killall horaedb-server | true - -kill-old-process: kill-old-horaemeta kill-old-horaedb - -prepare: clean build kill-old-process - -run-horaemeta: build-meta - nohup $(HORAEMETA_BINARY_PATH) --config ${HORAEMETA_CONFIG_PATH} > /tmp/horaemeta-stdout.log 2>&1 & - sleep 10 - -run-horaedb-cluster: build-horaedb - nohup ${HORAEDB_BINARY_PATH} --config ${HORAEDB_CONFIG_FILE_0} > ${CLUSTER_HORAEDB_STDOUT_FILE_0} 2>&1 & - nohup ${HORAEDB_BINARY_PATH} --config ${HORAEDB_CONFIG_FILE_1} > ${CLUSTER_HORAEDB_STDOUT_FILE_1} 2>&1 & - sleep 30 - -run: - make run-local - make run-cluster - make run-compaction-offload - -run-local: prepare - HORAEDB_ENV_FILTER=local $(HORAEDB_TEST_BINARY) - -run-cluster: prepare build-meta - HORAEDB_ENV_FILTER=cluster $(HORAEDB_TEST_BINARY) - -run-compaction-offload: prepare - HORAEDB_ENV_FILTER=compaction_offload $(HORAEDB_TEST_BINARY) - -run-java: - java -version - cd sdk/java && MAVEN_OPTS="--add-opens=java.base/java.nio=ALL-UNNAMED" mvn clean compile exec:java - -run-go: - cd sdk/go && go run . - -run-rust: - cd sdk/rust && cargo run - -run-mysql: - cd mysql && ./basic.sh - -run-postgresql: - cd postgresql && ./basic.sh - -run-prom: - cd prom && ./run-tests.sh - -run-opentsdb: - cd opentsdb && ./run-tests.sh - -run-recovery: clean build-horaedb kill-old-process - cd recovery && ./run.sh && ./run.sh shard_based - -run-dist-query: prepare build-meta - HORAEDB_INTEGRATION_TEST_BIN_RUN_MODE=build_cluster $(HORAEDB_TEST_BINARY) - cd dist_query && ./run.sh diff --git a/integration_tests/README.md b/integration_tests/README.md deleted file mode 100644 index a3dc758392..0000000000 --- a/integration_tests/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Integration tests suite for HoraeDB - -## Running test - -There are three Makefile commands to run integration test: -```sh -# All envs -make run - -# Only local env -make run-local - -# Only cluster env -make run-cluster - -# Only compaction offload env -make run-compaction-offload -``` - -`horaedb-test` will recursively find all the files end with `.sql` and run it. Each file will be treated as a case. A file can contain multiple SQLs. When finished it will tell how many cases it run, and display the diff set if there is any. An example with one case: -``` -Server from "/path/to/horaedb/target/debug/horaedb-server" is starting ... -Takes 49.020203ms. Diff: false. Test case "/path/to/horaedb/tests/cases/example.sql" finished. -Run 1 finished. 0 cases are different. -``` - -Users can set `HORAEDB_ENV_FILTER` variables to filter env to run. For example: -``` -HORAEDB_ENV_FILTER=local make run -``` -This command will only run cases in `local`. - -## Add a test - -Please refer README of https://github.com/CeresDB/sqlness - -## Test case organization - -Cases are grouped by scenario. SQLs used to test one feature are put in one file. Like `top_k.sql` for `TopK` operator and `limit.sql` for `LIMIT` function. - -On top of files, we organize them by deployment. Like `local/` contains all the cases run in a standalone server. diff --git a/integration_tests/build_meta.sh b/integration_tests/build_meta.sh deleted file mode 100755 index 1aebbb70e2..0000000000 --- a/integration_tests/build_meta.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -set -o errexit -set -o nounset -set -o pipefail - -SCRIPT_DIR=$(cd "$(dirname "$0")"; pwd) -TARGET="${SCRIPT_DIR}/../target" - -META_BIN_PATH=${META_BIN_PATH:-""} - -if [[ -z "${META_BIN_PATH}" ]]; then - echo "Build horaemeta-server..." - make -C "${SCRIPT_DIR}/../horaemeta" build - META_BIN_PATH="${SCRIPT_DIR}/../horaemeta/bin/horaemeta-server" -fi - -mkdir -p ${TARGET} -cp ${META_BIN_PATH} ${TARGET}/horaemeta-server diff --git a/integration_tests/cases/common/basic.result b/integration_tests/cases/common/basic.result deleted file mode 100644 index 60c5e6ee2e..0000000000 --- a/integration_tests/cases/common/basic.result +++ /dev/null @@ -1,105 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `demo`; - -affected_rows: 0 - -CREATE TABLE demo ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - -affected_rows: 0 - -INSERT INTO demo (t, name, value) - VALUES (1651737067000, 'horaedb', 100); - -affected_rows: 1 - -SELECT * FROM demo; - -tsid,t,name,value, -UInt64(1023872802579860359),Timestamp(1651737067000),String("horaedb"),Double(100.0), - - -INSERT INTO demo (t, name, value) - VALUES (1651737067001, "horaedb", 100); - -affected_rows: 1 - -SELECT * FROM demo; - -tsid,t,name,value, -UInt64(1023872802579860359),Timestamp(1651737067000),String("horaedb"),Double(100.0), -UInt64(1023872802579860359),Timestamp(1651737067001),String("horaedb"),Double(100.0), - - -DROP TABLE IF EXISTS `demo`; - -affected_rows: 0 - -CREATE TABLE `DeMo` ( - `nAmE` string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - -affected_rows: 0 - -SELECT `nAmE` FROM `DeMo`; - -affected_rows: 0 - -DROP TABLE `DeMo`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `binary_demo`; - -affected_rows: 0 - -CREATE TABLE `binary_demo` ( - `name` string TAG, - `value` varbinary NOT NULL, - `t` timestamp NOT NULL, - timestamp KEY (t)) ENGINE=Analytic WITH ( - enable_ttl = 'false' -); - -affected_rows: 0 - -INSERT INTO binary_demo(t, name, value) VALUES(1667374200022, 'horaedb', x'11'); - -affected_rows: 1 - -SELECT * FROM binary_demo WHERE value = x'11'; - -tsid,t,name,value, -UInt64(1023872802579860359),Timestamp(1667374200022),String("horaedb"),Varbinary([17]), - - -DROP TABLE `binary_demo`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/basic.sql b/integration_tests/cases/common/basic.sql deleted file mode 100644 index 9b540c51f7..0000000000 --- a/integration_tests/cases/common/basic.sql +++ /dev/null @@ -1,71 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `demo`; - -CREATE TABLE demo ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - - -INSERT INTO demo (t, name, value) - VALUES (1651737067000, 'horaedb', 100); - - -SELECT * FROM demo; - -INSERT INTO demo (t, name, value) - VALUES (1651737067001, "horaedb", 100); - -SELECT * FROM demo; - -DROP TABLE IF EXISTS `demo`; - -CREATE TABLE `DeMo` ( - `nAmE` string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - - -SELECT `nAmE` FROM `DeMo`; - -DROP TABLE `DeMo`; - -DROP TABLE IF EXISTS `binary_demo`; - -CREATE TABLE `binary_demo` ( - `name` string TAG, - `value` varbinary NOT NULL, - `t` timestamp NOT NULL, - timestamp KEY (t)) ENGINE=Analytic WITH ( - enable_ttl = 'false' -); - -INSERT INTO binary_demo(t, name, value) VALUES(1667374200022, 'horaedb', x'11'); - -SELECT * FROM binary_demo WHERE value = x'11'; - -DROP TABLE `binary_demo`; diff --git a/integration_tests/cases/common/dml/case_sensitive.result b/integration_tests/cases/common/dml/case_sensitive.result deleted file mode 100644 index 414523e819..0000000000 --- a/integration_tests/cases/common/dml/case_sensitive.result +++ /dev/null @@ -1,121 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS case_SENSITIVE_table1; - -affected_rows: 0 - -CREATE TABLE case_SENSITIVE_table1 ( - ts timestamp NOT NULL, - VALUE1 double, - timestamp KEY (ts)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO case_SENSITIVE_table1 (ts, VALUE1) - VALUES (1, 10), (2, 20), (3, 30); - -affected_rows: 3 - -SELECT - * -FROM - case_SENSITIVE_table1; - -tsid,ts,VALUE1, -UInt64(0),Timestamp(1),Double(10.0), -UInt64(0),Timestamp(2),Double(20.0), -UInt64(0),Timestamp(3),Double(30.0), - - -SELECT - * -FROM - CASE_SENSITIVE_TABLE1; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Failed to generate datafusion plan, err:Execution error: Table is not found, \"table:CASE_SENSITIVE_TABLE1\" sql:SELECT\n *\nFROM\n CASE_SENSITIVE_TABLE1;" }) - -SELECT - * -FROM - `case_SENSITIVE_table1`; - -tsid,ts,VALUE1, -UInt64(0),Timestamp(1),Double(10.0), -UInt64(0),Timestamp(2),Double(20.0), -UInt64(0),Timestamp(3),Double(30.0), - - -SELECT - * -FROM - `CASE_SENSITIVE_TABLE1`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Failed to generate datafusion plan, err:Execution error: Table is not found, \"table:CASE_SENSITIVE_TABLE1\" sql:SELECT\n *\nFROM\n `CASE_SENSITIVE_TABLE1`;" }) - -SHOW CREATE TABLE case_SENSITIVE_table1; - -Table,Create Table, -String("case_SENSITIVE_table1"),String("CREATE TABLE `case_SENSITIVE_table1` (`tsid` uint64 NOT NULL, `ts` timestamp NOT NULL, `VALUE1` double, PRIMARY KEY(tsid,ts), TIMESTAMP KEY(ts)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -SHOW CREATE TABLE CASE_SENSITIVE_TABLE1; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:CASE_SENSITIVE_TABLE1 sql:SHOW CREATE TABLE CASE_SENSITIVE_TABLE1;" }) - -SHOW CREATE TABLE `case_SENSITIVE_table1`; - -Table,Create Table, -String("case_SENSITIVE_table1"),String("CREATE TABLE `case_SENSITIVE_table1` (`tsid` uint64 NOT NULL, `ts` timestamp NOT NULL, `VALUE1` double, PRIMARY KEY(tsid,ts), TIMESTAMP KEY(ts)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -SHOW CREATE TABLE `CASE_SENSITIVE_TABLE1`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:CASE_SENSITIVE_TABLE1 sql:SHOW CREATE TABLE `CASE_SENSITIVE_TABLE1`;" }) - -DESC case_SENSITIVE_table1; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("ts"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("VALUE1"),String("double"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -DESC CASE_SENSITIVE_TABLE1; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:CASE_SENSITIVE_TABLE1 sql:DESC CASE_SENSITIVE_TABLE1;" }) - -DESC `case_SENSITIVE_table1`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("ts"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("VALUE1"),String("double"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -DESC `CASE_SENSITIVE_TABLE1`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:CASE_SENSITIVE_TABLE1 sql:DESC `CASE_SENSITIVE_TABLE1`;" }) - -DROP TABLE IF EXISTS case_SENSITIVE_table1; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/case_sensitive.sql b/integration_tests/cases/common/dml/case_sensitive.sql deleted file mode 100644 index 031bf2a262..0000000000 --- a/integration_tests/cases/common/dml/case_sensitive.sql +++ /dev/null @@ -1,72 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - - -DROP TABLE IF EXISTS case_SENSITIVE_table1; - - -CREATE TABLE case_SENSITIVE_table1 ( - ts timestamp NOT NULL, - VALUE1 double, - timestamp KEY (ts)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO case_SENSITIVE_table1 (ts, VALUE1) - VALUES (1, 10), (2, 20), (3, 30); - - -SELECT - * -FROM - case_SENSITIVE_table1; - -SELECT - * -FROM - CASE_SENSITIVE_TABLE1; - -SELECT - * -FROM - `case_SENSITIVE_table1`; - -SELECT - * -FROM - `CASE_SENSITIVE_TABLE1`; - -SHOW CREATE TABLE case_SENSITIVE_table1; - -SHOW CREATE TABLE CASE_SENSITIVE_TABLE1; - -SHOW CREATE TABLE `case_SENSITIVE_table1`; - -SHOW CREATE TABLE `CASE_SENSITIVE_TABLE1`; - -DESC case_SENSITIVE_table1; - -DESC CASE_SENSITIVE_TABLE1; - -DESC `case_SENSITIVE_table1`; - -DESC `CASE_SENSITIVE_TABLE1`; - -DROP TABLE IF EXISTS case_SENSITIVE_table1; diff --git a/integration_tests/cases/common/dml/insert_mode.result b/integration_tests/cases/common/dml/insert_mode.result deleted file mode 100644 index bdf9a520f9..0000000000 --- a/integration_tests/cases/common/dml/insert_mode.result +++ /dev/null @@ -1,235 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- --- overwrite -DROP TABLE IF EXISTS `03_dml_insert_mode_table1`; - -affected_rows: 0 - -CREATE TABLE `03_dml_insert_mode_table1` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='OVERWRITE' -); - -affected_rows: 0 - -INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`, `dic`) - VALUES (1, +10, "d1"), (2, 0, "d2"), (3, -30, "d1"); - -affected_rows: 3 - -SELECT - * -FROM - `03_dml_insert_mode_table1` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(3),Double(-30.0),String("d1"), -UInt64(0),Timestamp(2),Double(0.0),String("d2"), -UInt64(0),Timestamp(1),Double(10.0),String("d1"), - - -INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`) - VALUES (1, 100), (2, 200), (3, 300); - -affected_rows: 3 - -SELECT - * -FROM - `03_dml_insert_mode_table1` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(1),Double(100.0),String(""), -UInt64(0),Timestamp(2),Double(200.0),String(""), -UInt64(0),Timestamp(3),Double(300.0),String(""), - - -DROP TABLE `03_dml_insert_mode_table1`; - -affected_rows: 0 - --- append -DROP TABLE IF EXISTS `03_dml_insert_mode_table2`; - -affected_rows: 0 - -CREATE TABLE `03_dml_insert_mode_table2` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='APPEND' -); - -affected_rows: 0 - -INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`) - VALUES (1, 10, "d1"), (2, 20, ""), (3, 30, "d2"); - -affected_rows: 3 - -SELECT - * -FROM - `03_dml_insert_mode_table2` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(1),Double(10.0),String("d1"), -UInt64(0),Timestamp(2),Double(20.0),String(""), -UInt64(0),Timestamp(3),Double(30.0),String("d2"), - - -INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, ""); - -affected_rows: 3 - -SELECT - * -FROM - `03_dml_insert_mode_table2` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(1),Double(10.0),String("d1"), -UInt64(0),Timestamp(2),Double(20.0),String(""), -UInt64(0),Timestamp(3),Double(30.0),String("d2"), -UInt64(0),Timestamp(1),Double(100.0),String("d2"), -UInt64(0),Timestamp(2),Double(200.0),String("d1"), -UInt64(0),Timestamp(3),Double(300.0),String(""), - - -DROP TABLE `03_dml_insert_mode_table2`; - -affected_rows: 0 - --- default(overwrite) -DROP TABLE IF EXISTS `03_dml_insert_mode_table3`; - -affected_rows: 0 - -CREATE TABLE `03_dml_insert_mode_table3` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, "d1"); - -affected_rows: 3 - --- TODO support insert Null --- INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`) VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, Null); -SELECT - * -FROM - `03_dml_insert_mode_table3` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(1),Double(100.0),String("d2"), -UInt64(0),Timestamp(2),Double(200.0),String("d1"), -UInt64(0),Timestamp(3),Double(300.0),String("d1"), - - -INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`) - VALUES (1, 100, "d5"), (2, 200, "d6"), (3, 300, "d7"); - -affected_rows: 3 - -SELECT - * -FROM - `03_dml_insert_mode_table3` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(1),Double(100.0),String(""), -UInt64(0),Timestamp(2),Double(200.0),String(""), -UInt64(0),Timestamp(3),Double(300.0),String(""), - - -DROP TABLE `03_dml_insert_mode_table3`; - -affected_rows: 0 - --- insert with missing columns -DROP TABLE IF EXISTS `03_dml_insert_mode_table4`; - -affected_rows: 0 - -CREATE TABLE `03_dml_insert_mode_table4` ( - `timestamp` timestamp NOT NULL, - `c1` uint32, - `c2` string default '123', - `c3` uint32 default c1 + 1, - `c4` uint32 default c3 + 1, - `c5` uint32 default c3 + 10, - `c6` string default "default", - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `03_dml_insert_mode_table4` (`timestamp`, `c1`, `c5`) - VALUES (1, 10, 3), (2, 20, 4), (3, 30, 5); - -affected_rows: 3 - -SELECT - * -FROM - `03_dml_insert_mode_table4` -ORDER BY - `c1` ASC; - -tsid,timestamp,c1,c2,c3,c4,c5,c6, -UInt64(0),Timestamp(1),UInt32(10),String("123"),UInt32(11),UInt32(12),UInt32(3),String("default"), -UInt64(0),Timestamp(2),UInt32(20),String("123"),UInt32(21),UInt32(22),UInt32(4),String("default"), -UInt64(0),Timestamp(3),UInt32(30),String("123"),UInt32(31),UInt32(32),UInt32(5),String("default"), - - -DROP TABLE IF EXISTS `03_dml_insert_mode_table4`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/insert_mode.sql b/integration_tests/cases/common/dml/insert_mode.sql deleted file mode 100644 index 9327cb71b9..0000000000 --- a/integration_tests/cases/common/dml/insert_mode.sql +++ /dev/null @@ -1,161 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - --- overwrite -DROP TABLE IF EXISTS `03_dml_insert_mode_table1`; - -CREATE TABLE `03_dml_insert_mode_table1` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='OVERWRITE' -); - - -INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`, `dic`) - VALUES (1, +10, "d1"), (2, 0, "d2"), (3, -30, "d1"); - - -SELECT - * -FROM - `03_dml_insert_mode_table1` -ORDER BY - `value` ASC; - - -INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`) - VALUES (1, 100), (2, 200), (3, 300); - - -SELECT - * -FROM - `03_dml_insert_mode_table1` -ORDER BY - `value` ASC; - -DROP TABLE `03_dml_insert_mode_table1`; - --- append -DROP TABLE IF EXISTS `03_dml_insert_mode_table2`; - -CREATE TABLE `03_dml_insert_mode_table2` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='APPEND' -); - - -INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`) - VALUES (1, 10, "d1"), (2, 20, ""), (3, 30, "d2"); - -SELECT - * -FROM - `03_dml_insert_mode_table2` -ORDER BY - `value` ASC; - -INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, ""); - -SELECT - * -FROM - `03_dml_insert_mode_table2` -ORDER BY - `value` ASC; - -DROP TABLE `03_dml_insert_mode_table2`; - --- default(overwrite) -DROP TABLE IF EXISTS `03_dml_insert_mode_table3`; - -CREATE TABLE `03_dml_insert_mode_table3` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - - -INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, "d1"); - --- TODO support insert Null --- INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`) VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, Null); - -SELECT - * -FROM - `03_dml_insert_mode_table3` -ORDER BY - `value` ASC; - -INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`) - VALUES (1, 100, "d5"), (2, 200, "d6"), (3, 300, "d7"); - - -SELECT - * -FROM - `03_dml_insert_mode_table3` -ORDER BY - `value` ASC; - -DROP TABLE `03_dml_insert_mode_table3`; - --- insert with missing columns -DROP TABLE IF EXISTS `03_dml_insert_mode_table4`; - -CREATE TABLE `03_dml_insert_mode_table4` ( - `timestamp` timestamp NOT NULL, - `c1` uint32, - `c2` string default '123', - `c3` uint32 default c1 + 1, - `c4` uint32 default c3 + 1, - `c5` uint32 default c3 + 10, - `c6` string default "default", - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `03_dml_insert_mode_table4` (`timestamp`, `c1`, `c5`) - VALUES (1, 10, 3), (2, 20, 4), (3, 30, 5); - -SELECT - * -FROM - `03_dml_insert_mode_table4` -ORDER BY - `c1` ASC; - - -DROP TABLE IF EXISTS `03_dml_insert_mode_table4`; diff --git a/integration_tests/cases/common/dml/issue-1087.result b/integration_tests/cases/common/dml/issue-1087.result deleted file mode 100644 index f9f41029ff..0000000000 --- a/integration_tests/cases/common/dml/issue-1087.result +++ /dev/null @@ -1,122 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -CREATE TABLE `issue_1087` ( - `name` string TAG NULL, - `value` double NOT NULL, - `t` timestamp NOT NULL, - timestamp KEY (t)) - ENGINE=Analytic with (enable_ttl='false'); - -affected_rows: 0 - --- Check which optimizer rules we are using now -explain verbose select * from issue_1087; - -plan_type,plan, -String("initial_logical_plan"),String("Projection: issue_1087.tsid, issue_1087.t, issue_1087.name, issue_1087.value\n TableScan: issue_1087"), -String("logical_plan after horaedb_type_conversion"),String("SAME TEXT AS ABOVE"), -String("logical_plan after inline_table_scan"),String("SAME TEXT AS ABOVE"), -String("logical_plan after type_coercion"),String("SAME TEXT AS ABOVE"), -String("logical_plan after count_wildcard_rule"),String("SAME TEXT AS ABOVE"), -String("analyzed_logical_plan"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_nested_union"),String("SAME TEXT AS ABOVE"), -String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), -String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"), -String("logical_plan after replace_distinct_aggregate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after decorrelate_predicate_subquery"),String("SAME TEXT AS ABOVE"), -String("logical_plan after scalar_subquery_to_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after extract_equijoin_predicate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), -String("logical_plan after merge_projection"),String("SAME TEXT AS ABOVE"), -String("logical_plan after rewrite_disjunctive_predicate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_duplicated_expr"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_filter"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_cross_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_limit"),String("SAME TEXT AS ABOVE"), -String("logical_plan after propagate_empty_relation"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_one_union"),String("SAME TEXT AS ABOVE"), -String("logical_plan after filter_null_join_keys"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_outer_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_filter"),String("SAME TEXT AS ABOVE"), -String("logical_plan after single_distinct_aggregation_to_group_by"),String("SAME TEXT AS ABOVE"), -String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), -String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"), -String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_projection"),String("Projection: issue_1087.tsid, issue_1087.t, issue_1087.name, issue_1087.value\n TableScan: issue_1087 projection=[tsid, t, name, value]"), -String("logical_plan after eliminate_projection"),String("TableScan: issue_1087 projection=[tsid, t, name, value]"), -String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), -String("logical_plan after influx_regex_to_datafusion_regex"),String("SAME TEXT AS ABOVE"), -String("logical_plan after handle_gap_fill"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_nested_union"),String("SAME TEXT AS ABOVE"), -String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), -String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"), -String("logical_plan after replace_distinct_aggregate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after decorrelate_predicate_subquery"),String("SAME TEXT AS ABOVE"), -String("logical_plan after scalar_subquery_to_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after extract_equijoin_predicate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), -String("logical_plan after merge_projection"),String("SAME TEXT AS ABOVE"), -String("logical_plan after rewrite_disjunctive_predicate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_duplicated_expr"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_filter"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_cross_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_limit"),String("SAME TEXT AS ABOVE"), -String("logical_plan after propagate_empty_relation"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_one_union"),String("SAME TEXT AS ABOVE"), -String("logical_plan after filter_null_join_keys"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_outer_join"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_filter"),String("SAME TEXT AS ABOVE"), -String("logical_plan after single_distinct_aggregation_to_group_by"),String("SAME TEXT AS ABOVE"), -String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), -String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"), -String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_projection"),String("SAME TEXT AS ABOVE"), -String("logical_plan after eliminate_projection"),String("SAME TEXT AS ABOVE"), -String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), -String("logical_plan after influx_regex_to_datafusion_regex"),String("SAME TEXT AS ABOVE"), -String("logical_plan after handle_gap_fill"),String("SAME TEXT AS ABOVE"), -String("logical_plan"),String("TableScan: issue_1087 projection=[tsid, t, name, value]"), -String("initial_physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -String("initial_physical_plan_with_stats"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:)]]\n"), -String("physical_plan after OutputRequirements"),String("OutputRequirementExec\n ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -String("physical_plan after aggregate_statistics"),String("SAME TEXT AS ABOVE"), -String("physical_plan after join_selection"),String("SAME TEXT AS ABOVE"), -String("physical_plan after LimitedDistinctAggregation"),String("SAME TEXT AS ABOVE"), -String("physical_plan after EnforceDistribution"),String("SAME TEXT AS ABOVE"), -String("physical_plan after CombinePartialFinalAggregate"),String("SAME TEXT AS ABOVE"), -String("physical_plan after EnforceSorting"),String("SAME TEXT AS ABOVE"), -String("physical_plan after coalesce_batches"),String("SAME TEXT AS ABOVE"), -String("physical_plan after OutputRequirements"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -String("physical_plan after PipelineChecker"),String("SAME TEXT AS ABOVE"), -String("physical_plan after LimitAggregation"),String("SAME TEXT AS ABOVE"), -String("physical_plan after ProjectionPushdown"),String("SAME TEXT AS ABOVE"), -String("physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -String("physical_plan_with_stats"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:)]]\n"), - - -DROP TABLE `issue_1087`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/issue-1087.sql b/integration_tests/cases/common/dml/issue-1087.sql deleted file mode 100644 index 7aa026024c..0000000000 --- a/integration_tests/cases/common/dml/issue-1087.sql +++ /dev/null @@ -1,31 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -CREATE TABLE `issue_1087` ( - `name` string TAG NULL, - `value` double NOT NULL, - `t` timestamp NOT NULL, - timestamp KEY (t)) - ENGINE=Analytic with (enable_ttl='false'); - - --- Check which optimizer rules we are using now -explain verbose select * from issue_1087; - -DROP TABLE `issue_1087`; diff --git a/integration_tests/cases/common/dml/issue-302.result b/integration_tests/cases/common/dml/issue-302.result deleted file mode 100644 index 74407607bc..0000000000 --- a/integration_tests/cases/common/dml/issue-302.result +++ /dev/null @@ -1,40 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS issue302; - -affected_rows: 0 - -CREATE TABLE `issue302` (`name` string TAG NULL, `value` double NOT NULL, `t` timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic with (enable_ttl='false'); - -affected_rows: 0 - -INSERT INTO issue302(t, value) VALUES(1651737067000, 100); - -affected_rows: 1 - -select `t`, count(distinct name) from issue302 group by `t`; - -t,COUNT(DISTINCT issue302.name), -Timestamp(1651737067000),Int64(0), - - -DROP TABLE IF EXISTS issue302; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/issue-302.sql b/integration_tests/cases/common/dml/issue-302.sql deleted file mode 100644 index bb8b51b484..0000000000 --- a/integration_tests/cases/common/dml/issue-302.sql +++ /dev/null @@ -1,28 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS issue302; - -CREATE TABLE `issue302` (`name` string TAG NULL, `value` double NOT NULL, `t` timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic with (enable_ttl='false'); - -INSERT INTO issue302(t, value) VALUES(1651737067000, 100); - -select `t`, count(distinct name) from issue302 group by `t`; - -DROP TABLE IF EXISTS issue302; diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result deleted file mode 100644 index 0b4ffed96b..0000000000 --- a/integration_tests/cases/common/dml/issue-341.result +++ /dev/null @@ -1,161 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `issue341_t1`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `issue341_t2`; - -affected_rows: 0 - -CREATE TABLE `issue341_t1` ( - `timestamp` timestamp NOT NULL, - `value` int, - `tag1` string tag, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='append' -); - -affected_rows: 0 - -INSERT INTO `issue341_t1` (`timestamp`, `value`, `tag1`) - VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3"); - -affected_rows: 3 - -SELECT - `timestamp`, - `value` -FROM - `issue341_t1`; - -timestamp,value, -Timestamp(1),Int32(1), -Timestamp(3),Int32(3), -Timestamp(2),Int32(2), - - -SELECT - `timestamp`, - `value` -FROM - `issue341_t1` -WHERE - `value` = 3; - -timestamp,value, -Timestamp(3),Int32(3), - - --- FilterExec node should not be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t1` -WHERE - `value` = 3; - -plan_type,plan, -String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.value = Int32(3)]"), -String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - --- FilterExec node should not be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t1` -WHERE - tag1 = "t3"; - -plan_type,plan, -String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - --- Repeat operations above, but with overwrite table -CREATE TABLE `issue341_t2` ( - `timestamp` timestamp NOT NULL, - `value` double, - `tag1` string tag, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='overwrite' -); - -affected_rows: 0 - -INSERT INTO `issue341_t2` (`timestamp`, `value`, `tag1`) - VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3"); - -affected_rows: 3 - -SELECT - `timestamp`, - `value` -FROM - `issue341_t2` -WHERE - `value` = 3; - -timestamp,value, -Timestamp(3),Double(3.0), - - --- FilterExec node should be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t2` -WHERE - `value` = 3; - -plan_type,plan, -String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"), -String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - --- When using tag as filter, FilterExec node should not be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t2` -WHERE - tag1 = "t3"; - -plan_type,plan, -String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - -DROP TABLE IF EXISTS `issue341_t1`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `issue341_t2`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/issue-341.sql b/integration_tests/cases/common/dml/issue-341.sql deleted file mode 100644 index 9836fe03f5..0000000000 --- a/integration_tests/cases/common/dml/issue-341.sql +++ /dev/null @@ -1,111 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - - -DROP TABLE IF EXISTS `issue341_t1`; -DROP TABLE IF EXISTS `issue341_t2`; - -CREATE TABLE `issue341_t1` ( - `timestamp` timestamp NOT NULL, - `value` int, - `tag1` string tag, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='append' -); - -INSERT INTO `issue341_t1` (`timestamp`, `value`, `tag1`) - VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3"); - -SELECT - `timestamp`, - `value` -FROM - `issue341_t1`; - -SELECT - `timestamp`, - `value` -FROM - `issue341_t1` -WHERE - `value` = 3; - --- FilterExec node should not be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t1` -WHERE - `value` = 3; - --- FilterExec node should not be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t1` -WHERE - tag1 = "t3"; - --- Repeat operations above, but with overwrite table - -CREATE TABLE `issue341_t2` ( - `timestamp` timestamp NOT NULL, - `value` double, - `tag1` string tag, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='overwrite' -); - -INSERT INTO `issue341_t2` (`timestamp`, `value`, `tag1`) - VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3"); - -SELECT - `timestamp`, - `value` -FROM - `issue341_t2` -WHERE - `value` = 3; - --- FilterExec node should be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t2` -WHERE - `value` = 3; - --- When using tag as filter, FilterExec node should not be in plan. -EXPLAIN SELECT - `timestamp`, - `value` -FROM - `issue341_t2` -WHERE - tag1 = "t3"; - -DROP TABLE IF EXISTS `issue341_t1`; -DROP TABLE IF EXISTS `issue341_t2`; diff --git a/integration_tests/cases/common/dml/issue-59.result b/integration_tests/cases/common/dml/issue-59.result deleted file mode 100644 index 7888f8d843..0000000000 --- a/integration_tests/cases/common/dml/issue-59.result +++ /dev/null @@ -1,52 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS issue59; - -affected_rows: 0 - -CREATE TABLE issue59 ( - ts timestamp NOT NULL, - id int, - account string, - timestamp KEY (ts)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -SELECT id+1, count(distinct(account)) -FROM issue59 -GROUP BY id+1; - -affected_rows: 0 - -explain SELECT id+1, count(distinct(account)) -FROM issue59 -GROUP BY id+1; - -plan_type,plan, -String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n TableScan: issue59 projection=[id, account]"), -String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([group_alias_0@0], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([group_alias_0@0, alias1@1], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - -DROP TABLE IF EXISTS issue59; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/issue-59.sql b/integration_tests/cases/common/dml/issue-59.sql deleted file mode 100644 index b9d1a5f8f2..0000000000 --- a/integration_tests/cases/common/dml/issue-59.sql +++ /dev/null @@ -1,39 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS issue59; - -CREATE TABLE issue59 ( - ts timestamp NOT NULL, - id int, - account string, - timestamp KEY (ts)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -SELECT id+1, count(distinct(account)) -FROM issue59 -GROUP BY id+1; - -explain SELECT id+1, count(distinct(account)) -FROM issue59 -GROUP BY id+1; - -DROP TABLE IF EXISTS issue59; diff --git a/integration_tests/cases/common/dml/issue-637.result b/integration_tests/cases/common/dml/issue-637.result deleted file mode 100644 index 93d018c2b1..0000000000 --- a/integration_tests/cases/common/dml/issue-637.result +++ /dev/null @@ -1,92 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `issue637`; - -affected_rows: 0 - -CREATE TABLE IF NOT EXISTS `issue637` -( - str_tag string TAG, - int_tag int32 TAG, - var_tag VARBINARY TAG, - str_field string, - int_field int32, - bin_field string, - t timestamp NOT NULL, - TIMESTAMP KEY (t) -) ENGINE=Analytic with(enable_ttl = 'false'); - -affected_rows: 0 - -INSERT INTO issue637 - (`str_tag`,`int_tag`,`var_tag`,`str_field`,`int_field`,`bin_field`,`t`) -VALUES - ("t1",1,"v1","s1",1,"b1",1651737067000); - -affected_rows: 1 - -SELECT * FROM `issue637`; - -tsid,t,str_tag,int_tag,var_tag,str_field,int_field,bin_field, -UInt64(15527369105987057363),Timestamp(1651737067000),String("t1"),Int32(1),Varbinary([118, 49]),String("s1"),Int32(1),String("b1"), - - --- Test all data types mentioned in our user document. -CREATE TABLE IF NOT EXISTS `issue637_1` -( - t timestamp NOT NULL, - double_filed double, - float_filed float, - str_field string, - var_field varbinary, - u64_field uint64, - u32_field uint32, - u16_field uint16, - u8_field uint8, - i64_field int64, - i32_field int32, - i16_field int16, - i8_field int8, - bool_field boolean, - TIMESTAMP KEY (t) -) ENGINE=Analytic with(enable_ttl = 'false'); - -affected_rows: 0 - -INSERT INTO issue637_1 - (`t`,`double_filed`,`float_filed`,`str_field`,`var_field`,`u64_field`,`u32_field`,`u16_field`,`u8_field`,`i64_field`,`i32_field`,`i16_field`,`i8_field`,`bool_field`) -VALUES - (1651737067000,100,100,"s","v",100,100,100,100,100,100,100,100,false); - -affected_rows: 1 - -SELECT * FROM `issue637_1`; - -tsid,t,double_filed,float_filed,str_field,var_field,u64_field,u32_field,u16_field,u8_field,i64_field,i32_field,i16_field,i8_field,bool_field, -UInt64(0),Timestamp(1651737067000),Double(100.0),Float(100.0),String("s"),Varbinary([118]),UInt64(100),UInt32(100),UInt16(100),UInt8(100),Int64(100),Int32(100),Int16(100),Int8(100),Boolean(false), - - -DROP TABLE IF EXISTS issue637; - -affected_rows: 0 - -DROP TABLE IF EXISTS issue637_1; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/issue-637.sql b/integration_tests/cases/common/dml/issue-637.sql deleted file mode 100644 index ff20b53b94..0000000000 --- a/integration_tests/cases/common/dml/issue-637.sql +++ /dev/null @@ -1,71 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `issue637`; - -CREATE TABLE IF NOT EXISTS `issue637` -( - str_tag string TAG, - int_tag int32 TAG, - var_tag VARBINARY TAG, - str_field string, - int_field int32, - bin_field string, - t timestamp NOT NULL, - TIMESTAMP KEY (t) -) ENGINE=Analytic with(enable_ttl = 'false'); - - -INSERT INTO issue637 - (`str_tag`,`int_tag`,`var_tag`,`str_field`,`int_field`,`bin_field`,`t`) -VALUES - ("t1",1,"v1","s1",1,"b1",1651737067000); - -SELECT * FROM `issue637`; - --- Test all data types mentioned in our user document. -CREATE TABLE IF NOT EXISTS `issue637_1` -( - t timestamp NOT NULL, - double_filed double, - float_filed float, - str_field string, - var_field varbinary, - u64_field uint64, - u32_field uint32, - u16_field uint16, - u8_field uint8, - i64_field int64, - i32_field int32, - i16_field int16, - i8_field int8, - bool_field boolean, - TIMESTAMP KEY (t) -) ENGINE=Analytic with(enable_ttl = 'false'); - -INSERT INTO issue637_1 - (`t`,`double_filed`,`float_filed`,`str_field`,`var_field`,`u64_field`,`u32_field`,`u16_field`,`u8_field`,`i64_field`,`i32_field`,`i16_field`,`i8_field`,`bool_field`) -VALUES - (1651737067000,100,100,"s","v",100,100,100,100,100,100,100,100,false); - -SELECT * FROM `issue637_1`; - -DROP TABLE IF EXISTS issue637; - -DROP TABLE IF EXISTS issue637_1; diff --git a/integration_tests/cases/common/dml/select_filter.result b/integration_tests/cases/common/dml/select_filter.result deleted file mode 100644 index fe2f7bee6b..0000000000 --- a/integration_tests/cases/common/dml/select_filter.result +++ /dev/null @@ -1,81 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `03_dml_select_filter_table1`; - -affected_rows: 0 - -CREATE TABLE `03_dml_select_filter_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `03_dml_select_filter_table1` - (`timestamp`, `value`) -VALUES - (1, 100), - (2, 1000), - (3, 200), - (4, 30000), - (5, 4400), - (6, 400); - -affected_rows: 6 - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_filter_table1` -where `value` > 50+50 -ORDER BY - `value` ASC; - -timestamp,value, -Timestamp(3),Int32(200), -Timestamp(6),Int32(400), -Timestamp(2),Int32(1000), -Timestamp(5),Int32(4400), -Timestamp(4),Int32(30000), - - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_filter_table1` -where `value` > 50+50 and `value` <= 4400 -ORDER BY - `value` ASC; - -timestamp,value, -Timestamp(3),Int32(200), -Timestamp(6),Int32(400), -Timestamp(2),Int32(1000), -Timestamp(5),Int32(4400), - - -DROP TABLE `03_dml_select_filter_table1`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/select_filter.sql b/integration_tests/cases/common/dml/select_filter.sql deleted file mode 100644 index 9541916487..0000000000 --- a/integration_tests/cases/common/dml/select_filter.sql +++ /dev/null @@ -1,62 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - - -DROP TABLE IF EXISTS `03_dml_select_filter_table1`; - -CREATE TABLE `03_dml_select_filter_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - - -INSERT INTO `03_dml_select_filter_table1` - (`timestamp`, `value`) -VALUES - (1, 100), - (2, 1000), - (3, 200), - (4, 30000), - (5, 4400), - (6, 400); - - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_filter_table1` -where `value` > 50+50 -ORDER BY - `value` ASC; - - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_filter_table1` -where `value` > 50+50 and `value` <= 4400 -ORDER BY - `value` ASC; - -DROP TABLE `03_dml_select_filter_table1`; diff --git a/integration_tests/cases/common/dml/select_having.result b/integration_tests/cases/common/dml/select_having.result deleted file mode 100644 index 18b3340c0d..0000000000 --- a/integration_tests/cases/common/dml/select_having.result +++ /dev/null @@ -1,80 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `03_dml_select_having_table1`; - -affected_rows: 0 - -CREATE TABLE `03_dml_select_having_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `03_dml_select_having_table1` - (`timestamp`, `value`) -VALUES - (1, 101), - (2, 1002), - (3, 203), - (4, 30004), - (5, 4405), - (6, 406); - -affected_rows: 6 - -SELECT - `value` % 3, - MAX(`value`) AS max -FROM - `03_dml_select_having_table1` -GROUP BY - `value` % 3 -ORDER BY - max ASC; - -03_dml_select_having_table1.value % Int64(3),max, -Int64(2),Int32(203), -Int64(0),Int32(1002), -Int64(1),Int32(30004), - - -SELECT - `value` % 3, - MAX(`value`) AS max -FROM - `03_dml_select_having_table1` -GROUP BY - `value` % 3 -HAVING - max > 10000 -ORDER BY - max ASC; - -03_dml_select_having_table1.value % Int64(3),max, -Int64(1),Int32(30004), - - -DROP TABLE `03_dml_select_having_table1`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/select_having.sql b/integration_tests/cases/common/dml/select_having.sql deleted file mode 100644 index 8f009adf74..0000000000 --- a/integration_tests/cases/common/dml/select_having.sql +++ /dev/null @@ -1,65 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `03_dml_select_having_table1`; - -CREATE TABLE `03_dml_select_having_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - - -INSERT INTO `03_dml_select_having_table1` - (`timestamp`, `value`) -VALUES - (1, 101), - (2, 1002), - (3, 203), - (4, 30004), - (5, 4405), - (6, 406); - - -SELECT - `value` % 3, - MAX(`value`) AS max -FROM - `03_dml_select_having_table1` -GROUP BY - `value` % 3 -ORDER BY - max ASC; - - -SELECT - `value` % 3, - MAX(`value`) AS max -FROM - `03_dml_select_having_table1` -GROUP BY - `value` % 3 -HAVING - max > 10000 -ORDER BY - max ASC; - -DROP TABLE `03_dml_select_having_table1`; diff --git a/integration_tests/cases/common/dml/select_order.result b/integration_tests/cases/common/dml/select_order.result deleted file mode 100644 index 2daf44fe10..0000000000 --- a/integration_tests/cases/common/dml/select_order.result +++ /dev/null @@ -1,81 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `03_dml_select_order_table1`; - -affected_rows: 0 - -CREATE TABLE `03_dml_select_order_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `03_dml_select_order_table1` (`timestamp`, `value`) -VALUES - (1, 100), - (2, 1000), - (3, 200), - (4, 30000), - (5, 4400), - (6, 400); - -affected_rows: 6 - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_order_table1` -ORDER BY - `value` ASC; - -timestamp,value, -Timestamp(1),Int32(100), -Timestamp(3),Int32(200), -Timestamp(6),Int32(400), -Timestamp(2),Int32(1000), -Timestamp(5),Int32(4400), -Timestamp(4),Int32(30000), - - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_order_table1` -ORDER BY - `value` DESC; - -timestamp,value, -Timestamp(4),Int32(30000), -Timestamp(5),Int32(4400), -Timestamp(2),Int32(1000), -Timestamp(6),Int32(400), -Timestamp(3),Int32(200), -Timestamp(1),Int32(100), - - -DROP TABLE `03_dml_select_order_table1`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/dml/select_order.sql b/integration_tests/cases/common/dml/select_order.sql deleted file mode 100644 index 9a45515782..0000000000 --- a/integration_tests/cases/common/dml/select_order.sql +++ /dev/null @@ -1,58 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `03_dml_select_order_table1`; - -CREATE TABLE `03_dml_select_order_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - - -INSERT INTO `03_dml_select_order_table1` (`timestamp`, `value`) -VALUES - (1, 100), - (2, 1000), - (3, 200), - (4, 30000), - (5, 4400), - (6, 400); - - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_order_table1` -ORDER BY - `value` ASC; - - -SELECT - `timestamp`, - `value` -FROM - `03_dml_select_order_table1` -ORDER BY - `value` DESC; - -DROP TABLE `03_dml_select_order_table1`; diff --git a/integration_tests/cases/common/dummy/select_1.result b/integration_tests/cases/common/dummy/select_1.result deleted file mode 100644 index f88c94793c..0000000000 --- a/integration_tests/cases/common/dummy/select_1.result +++ /dev/null @@ -1,81 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -SELECT 1; - -Int64(1), -Int64(1), - - -SELECT x; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Failed to generate datafusion plan, err:Schema error: No field named x. sql:SELECT x;" }) - -SELECT 'a'; - -Utf8("a"), -String("a"), - - -SELECT NOT(1=1); - -NOT Int64(1) = Int64(1), -Boolean(false), - - -SELECT TRUE; - -Boolean(true), -Boolean(true), - - -SELECT FALSE; - -Boolean(false), -Boolean(false), - - -SELECT NOT(TRUE); - -NOT Boolean(true), -Boolean(false), - - -SELECT 10 - 2 * 3; - -Int64(10) - Int64(2) * Int64(3), -Int64(4), - - -SELECT (10 - 2) * 3; - -Int64(10) - Int64(2) * Int64(3), -Int64(24), - - --- FIXME -SELECT "That is not good."; - -Utf8("That is not good."), -String("That is not good."), - - -SELECT *; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Failed to generate datafusion plan, err:Error during planning: SELECT * with no tables specified is not valid sql:SELECT *;" }) - diff --git a/integration_tests/cases/common/dummy/select_1.sql b/integration_tests/cases/common/dummy/select_1.sql deleted file mode 100644 index e43d60ffdd..0000000000 --- a/integration_tests/cases/common/dummy/select_1.sql +++ /dev/null @@ -1,41 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -SELECT 1; - -SELECT x; - -SELECT 'a'; - -SELECT NOT(1=1); - -SELECT TRUE; - -SELECT FALSE; - -SELECT NOT(TRUE); - -SELECT 10 - 2 * 3; - -SELECT (10 - 2) * 3; - --- FIXME -SELECT "That is not good."; - -SELECT *; diff --git a/integration_tests/cases/common/explain/explain.result b/integration_tests/cases/common/explain/explain.result deleted file mode 100644 index da50788b66..0000000000 --- a/integration_tests/cases/common/explain/explain.result +++ /dev/null @@ -1,37 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE `04_explain_t`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:04_explain_t sql:DROP TABLE `04_explain_t`;" }) - -CREATE TABLE `04_explain_t` (t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic; - -affected_rows: 0 - -EXPLAIN SELECT t FROM `04_explain_t`; - -plan_type,plan, -String("logical_plan"),String("TableScan: 04_explain_t projection=[t]"), -String("physical_plan"),String("ScanTable: table=04_explain_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - -DROP TABLE `04_explain_t`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/explain/explain.sql b/integration_tests/cases/common/explain/explain.sql deleted file mode 100644 index ed1d443b71..0000000000 --- a/integration_tests/cases/common/explain/explain.sql +++ /dev/null @@ -1,26 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE `04_explain_t`; - -CREATE TABLE `04_explain_t` (t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic; - -EXPLAIN SELECT t FROM `04_explain_t`; - -DROP TABLE `04_explain_t`; \ No newline at end of file diff --git a/integration_tests/cases/common/function/aggregate.result b/integration_tests/cases/common/function/aggregate.result deleted file mode 100644 index 8f956cf933..0000000000 --- a/integration_tests/cases/common/function/aggregate.result +++ /dev/null @@ -1,172 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `02_function_aggregate_table1`; - -affected_rows: 0 - -CREATE TABLE `02_function_aggregate_table1` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `value` int, - `uvalue` uint64, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `02_function_aggregate_table1` - (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`) -VALUES - (1658304762, 'x86-64', 'china', 100, 10), - (1658304763, 'x86-64', 'china', 200, 10), - (1658304762, 'arm64', 'china', 110, 0), - (1658304763, 'arm64', 'china', 210, 0); - -affected_rows: 4 - -SELECT sum(`value`) FROM `02_function_aggregate_table1`; - -SUM(02_function_aggregate_table1.value), -Int64(620), - - -SELECT - `arch`, - sum(`value`) -FROM - `02_function_aggregate_table1` -WHERE - `timestamp` BETWEEN 1658304763 AND 1658304763 -GROUP BY - `arch` -ORDER BY - `arch` DESC; - -arch,SUM(02_function_aggregate_table1.value), -String("x86-64"),Int64(200), -String("arm64"),Int64(210), - - -SELECT count(`value`) FROM `02_function_aggregate_table1`; - -COUNT(02_function_aggregate_table1.value), -Int64(4), - - -SELECT avg(`value`) FROM `02_function_aggregate_table1`; - -AVG(02_function_aggregate_table1.value), -Double(155.0), - - -SELECT max(`value`) FROM `02_function_aggregate_table1`; - -MAX(02_function_aggregate_table1.value), -Int32(210), - - -SELECT min(`value`) FROM `02_function_aggregate_table1`; - -MIN(02_function_aggregate_table1.value), -Int32(100), - - -SELECT min(`uvalue`) - max(`uvalue`) FROM `02_function_aggregate_table1`; - -MIN(02_function_aggregate_table1.uvalue) - MAX(02_function_aggregate_table1.uvalue), -UInt64(18446744073709551606), - - --- duplicate with last insert -INSERT INTO `02_function_aggregate_table1` - (`timestamp`, `arch`, `datacenter`, `value`) -VALUES - (1658304762, 'x86-64', 'china', 100); - -affected_rows: 1 - -SELECT count(`arch`) FROM `02_function_aggregate_table1`; - -COUNT(02_function_aggregate_table1.arch), -Int64(4), - - -SELECT distinct(`arch`) FROM `02_function_aggregate_table1` ORDER BY `arch` DESC; - -arch, -String("x86-64"), -String("arm64"), - - -SELECT count(distinct(`arch`)) FROM `02_function_aggregate_table1`; - -COUNT(DISTINCT 02_function_aggregate_table1.arch), -Int64(2), - - -CREATE TABLE `02_function_aggregate_table2` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `value` int, - `uvalue` uint64, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode = 'append' -); - -affected_rows: 0 - -INSERT INTO `02_function_aggregate_table2` - (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`) -VALUES - (1658304762, 'x86-64', 'china', 100, 10), - (1658304763, 'x86-64', 'china', 200, 10), - (1658304762, 'arm64', 'china', 110, 0), - (1658304763, 'arm64', 'china', 210, 0); - -affected_rows: 4 - --- The should select empty column -SELECT count(*) FROM `02_function_aggregate_table1`; - -COUNT(*), -Int64(4), - - --- Same with before, but query from sst --- SQLNESS ARG pre_cmd=flush -SELECT count(*) FROM `02_function_aggregate_table1`; - -COUNT(*), -Int64(4), - - -DROP TABLE `02_function_aggregate_table1`; - -affected_rows: 0 - -DROP TABLE `02_function_aggregate_table2`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/function/aggregate.sql b/integration_tests/cases/common/function/aggregate.sql deleted file mode 100644 index 94b4a21c45..0000000000 --- a/integration_tests/cases/common/function/aggregate.sql +++ /dev/null @@ -1,107 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `02_function_aggregate_table1`; - -CREATE TABLE `02_function_aggregate_table1` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `value` int, - `uvalue` uint64, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `02_function_aggregate_table1` - (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`) -VALUES - (1658304762, 'x86-64', 'china', 100, 10), - (1658304763, 'x86-64', 'china', 200, 10), - (1658304762, 'arm64', 'china', 110, 0), - (1658304763, 'arm64', 'china', 210, 0); - - -SELECT sum(`value`) FROM `02_function_aggregate_table1`; - -SELECT - `arch`, - sum(`value`) -FROM - `02_function_aggregate_table1` -WHERE - `timestamp` BETWEEN 1658304763 AND 1658304763 -GROUP BY - `arch` -ORDER BY - `arch` DESC; - - -SELECT count(`value`) FROM `02_function_aggregate_table1`; - -SELECT avg(`value`) FROM `02_function_aggregate_table1`; - -SELECT max(`value`) FROM `02_function_aggregate_table1`; - -SELECT min(`value`) FROM `02_function_aggregate_table1`; - -SELECT min(`uvalue`) - max(`uvalue`) FROM `02_function_aggregate_table1`; - --- duplicate with last insert -INSERT INTO `02_function_aggregate_table1` - (`timestamp`, `arch`, `datacenter`, `value`) -VALUES - (1658304762, 'x86-64', 'china', 100); - -SELECT count(`arch`) FROM `02_function_aggregate_table1`; - -SELECT distinct(`arch`) FROM `02_function_aggregate_table1` ORDER BY `arch` DESC; - -SELECT count(distinct(`arch`)) FROM `02_function_aggregate_table1`; - -CREATE TABLE `02_function_aggregate_table2` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `value` int, - `uvalue` uint64, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode = 'append' -); - -INSERT INTO `02_function_aggregate_table2` - (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`) -VALUES - (1658304762, 'x86-64', 'china', 100, 10), - (1658304763, 'x86-64', 'china', 200, 10), - (1658304762, 'arm64', 'china', 110, 0), - (1658304763, 'arm64', 'china', 210, 0); - --- The should select empty column -SELECT count(*) FROM `02_function_aggregate_table1`; - --- Same with before, but query from sst --- SQLNESS ARG pre_cmd=flush -SELECT count(*) FROM `02_function_aggregate_table1`; - -DROP TABLE `02_function_aggregate_table1`; -DROP TABLE `02_function_aggregate_table2`; diff --git a/integration_tests/cases/common/function/date_bin.result b/integration_tests/cases/common/function/date_bin.result deleted file mode 100644 index 69985c9254..0000000000 --- a/integration_tests/cases/common/function/date_bin.result +++ /dev/null @@ -1,81 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `02_function_date_bin_table`; - -affected_rows: 0 - -CREATE TABLE `02_function_date_bin_table` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `02_function_date_bin_table` -(`timestamp`, `value`) -VALUES - (1659577423000, 1), - (1659577422000, 2), - (1659577320000, 3), - (1659571200000, 4), - (1659484800000, 5), - (1656777600000, 6); - -affected_rows: 6 - -SELECT `timestamp`, DATE_BIN(INTERVAL '30' second, `timestamp`, TIMESTAMP '2001-01-01T00:00:00Z') as `time` FROM `02_function_date_bin_table` order by `timestamp`; - -timestamp,time, -Timestamp(1656777600000),Timestamp(1656777600000), -Timestamp(1659484800000),Timestamp(1659484800000), -Timestamp(1659571200000),Timestamp(1659571200000), -Timestamp(1659577320000),Timestamp(1659577320000), -Timestamp(1659577422000),Timestamp(1659577410000), -Timestamp(1659577423000),Timestamp(1659577410000), - - -SELECT `timestamp`, DATE_BIN(INTERVAL '15' minute, `timestamp`, TIMESTAMP '2001-01-01T00:00:00Z') as `time` FROM `02_function_date_bin_table` order by `timestamp`; - -timestamp,time, -Timestamp(1656777600000),Timestamp(1656777600000), -Timestamp(1659484800000),Timestamp(1659484800000), -Timestamp(1659571200000),Timestamp(1659571200000), -Timestamp(1659577320000),Timestamp(1659576600000), -Timestamp(1659577422000),Timestamp(1659576600000), -Timestamp(1659577423000),Timestamp(1659576600000), - - -SELECT `timestamp`, DATE_BIN(INTERVAL '2' hour, `timestamp`, TIMESTAMP '2001-01-01T00:00:00Z') as `time` FROM `02_function_date_bin_table` order by `timestamp`; - -timestamp,time, -Timestamp(1656777600000),Timestamp(1656777600000), -Timestamp(1659484800000),Timestamp(1659484800000), -Timestamp(1659571200000),Timestamp(1659571200000), -Timestamp(1659577320000),Timestamp(1659571200000), -Timestamp(1659577422000),Timestamp(1659571200000), -Timestamp(1659577423000),Timestamp(1659571200000), - - -DROP TABLE `02_function_date_bin_table`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/function/date_bin.sql b/integration_tests/cases/common/function/date_bin.sql deleted file mode 100644 index f9b9680a48..0000000000 --- a/integration_tests/cases/common/function/date_bin.sql +++ /dev/null @@ -1,44 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `02_function_date_bin_table`; - -CREATE TABLE `02_function_date_bin_table` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `02_function_date_bin_table` -(`timestamp`, `value`) -VALUES - (1659577423000, 1), - (1659577422000, 2), - (1659577320000, 3), - (1659571200000, 4), - (1659484800000, 5), - (1656777600000, 6); - -SELECT `timestamp`, DATE_BIN(INTERVAL '30' second, `timestamp`, TIMESTAMP '2001-01-01T00:00:00Z') as `time` FROM `02_function_date_bin_table` order by `timestamp`; -SELECT `timestamp`, DATE_BIN(INTERVAL '15' minute, `timestamp`, TIMESTAMP '2001-01-01T00:00:00Z') as `time` FROM `02_function_date_bin_table` order by `timestamp`; -SELECT `timestamp`, DATE_BIN(INTERVAL '2' hour, `timestamp`, TIMESTAMP '2001-01-01T00:00:00Z') as `time` FROM `02_function_date_bin_table` order by `timestamp`; - -DROP TABLE `02_function_date_bin_table`; diff --git a/integration_tests/cases/common/function/thetasketch_distinct.result b/integration_tests/cases/common/function/thetasketch_distinct.result deleted file mode 100644 index 3aa8aae81b..0000000000 --- a/integration_tests/cases/common/function/thetasketch_distinct.result +++ /dev/null @@ -1,467 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `02_function_thetasketch_distinct_table`; - -affected_rows: 0 - -CREATE TABLE `02_function_thetasketch_distinct_table` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `02_function_thetasketch_distinct_table` -(`timestamp`, `arch`, `datacenter`, `value`) -VALUES - (1658304762, 'x86', 'china', 75), - (1658304763, 'x86', 'china', 15), - (1658304764, 'x86', 'china', 87), - (1658304765, 'x86', 'china', 6), - (1658304766, 'x86', 'china', 112), - (1658304767, 'x86', 'china', 5), - (1658304768, 'x86', 'china', 67), - (1658304769, 'x86', 'china', 41), - (1658304770, 'x86', 'china', 93), - (1658304771, 'x86', 'china', 9), - (1658304772, 'x86', 'china', 82), - (1658304773, 'x86', 'china', 97), - (1658304774, 'x86', 'china', 121), - (1658304775, 'x86', 'china', 97), - (1658304776, 'x86', 'china', 114), - (1658304777, 'x86', 'china', 8), - (1658304778, 'x86', 'china', 9), - (1658304779, 'x86', 'china', 13), - (1658304780, 'x86', 'china', 8), - (1658304781, 'x86', 'china', 146), - (1658304782, 'x86', 'china', 131), - (1658304783, 'x86', 'china', 45), - (1658304784, 'x86', 'china', 57), - (1658304785, 'x86', 'china', 74), - (1658304786, 'x86', 'china', 25), - (1658304787, 'x86', 'china', 67), - (1658304788, 'x86', 'china', 108), - (1658304789, 'x86', 'china', 32), - (1658304790, 'x86', 'china', 67), - (1658304791, 'x86', 'china', 61), - (1658304792, 'x86', 'china', 7), - (1658304793, 'x86', 'china', 107), - (1658304794, 'x86', 'china', 18), - (1658304795, 'x86', 'china', 127), - (1658304796, 'x86', 'china', 88), - (1658304797, 'x86', 'china', 21), - (1658304798, 'x86', 'china', 85), - (1658304799, 'x86', 'china', 117), - (1658304800, 'x86', 'china', 51), - (1658304801, 'x86', 'china', 23), - (1658304802, 'x86', 'china', 74), - (1658304803, 'x86', 'china', 77), - (1658304804, 'x86', 'china', 66), - (1658304805, 'x86', 'china', 84), - (1658304806, 'x86', 'china', 1), - (1658304807, 'x86', 'china', 108), - (1658304808, 'x86', 'china', 50), - (1658304809, 'x86', 'china', 80), - (1658304810, 'x86', 'china', 30), - (1658304811, 'x86', 'china', 36), - (1658304812, 'x86', 'china', 110), - (1658304813, 'x86', 'china', 22), - (1658304814, 'x86', 'china', 126), - (1658304815, 'x86', 'china', 142), - (1658304816, 'x86', 'china', 17), - (1658304817, 'x86', 'china', 38), - (1658304818, 'x86', 'china', 40), - (1658304819, 'x86', 'china', 114), - (1658304820, 'x86', 'china', 18), - (1658304821, 'x86', 'china', 8), - (1658304822, 'x86', 'china', 144), - (1658304823, 'x86', 'china', 61), - (1658304824, 'x86', 'china', 49), - (1658304825, 'x86', 'china', 28), - (1658304826, 'x86', 'china', 80), - (1658304827, 'x86', 'china', 149), - (1658304828, 'x86', 'china', 144), - (1658304829, 'x86', 'china', 74), - (1658304830, 'x86', 'china', 130), - (1658304831, 'x86', 'china', 116), - (1658304832, 'x86', 'china', 72), - (1658304833, 'x86', 'china', 61), - (1658304834, 'x86', 'china', 24), - (1658304835, 'x86', 'china', 47), - (1658304836, 'x86', 'china', 35), - (1658304837, 'x86', 'china', 62), - (1658304838, 'x86', 'china', 148), - (1658304839, 'x86', 'china', 111), - (1658304840, 'x86', 'china', 86), - (1658304841, 'x86', 'china', 139), - (1658304842, 'x86', 'china', 83), - (1658304843, 'x86', 'china', 144), - (1658304844, 'x86', 'china', 87), - (1658304845, 'x86', 'china', 108), - (1658304846, 'x86', 'china', 105), - (1658304847, 'x86', 'china', 20), - (1658304848, 'x86', 'china', 35), - (1658304849, 'x86', 'china', 132), - (1658304850, 'x86', 'china', 80), - (1658304851, 'x86', 'china', 24), - (1658304852, 'x86', 'china', 139), - (1658304853, 'x86', 'china', 43), - (1658304854, 'x86', 'china', 68), - (1658304855, 'x86', 'china', 65), - (1658304856, 'x86', 'china', 83), - (1658304857, 'x86', 'china', 101), - (1658304858, 'x86', 'china', 148), - (1658304859, 'x86', 'china', 112), - (1658304860, 'x86', 'china', 33), - (1658304861, 'x86', 'china', 19), - (1658304862, 'x86', 'china', 6), - (1658304863, 'x86', 'china', 73), - (1658304864, 'x86', 'china', 134), - (1658304865, 'x86', 'china', 61), - (1658304866, 'x86', 'china', 9), - (1658304867, 'x86', 'china', 103), - (1658304868, 'x86', 'china', 55), - (1658304869, 'x86', 'china', 62), - (1658304870, 'x86', 'china', 58), - (1658304871, 'x86', 'china', 85), - (1658304872, 'x86', 'china', 83), - (1658304873, 'x86', 'china', 39), - (1658304874, 'x86', 'china', 48), - (1658304875, 'x86', 'china', 95), - (1658304876, 'x86', 'china', 16), - (1658304877, 'x86', 'china', 144), - (1658304878, 'x86', 'china', 60), - (1658304879, 'x86', 'china', 83), - (1658304880, 'x86', 'china', 140), - (1658304881, 'x86', 'china', 48), - (1658304882, 'x86', 'china', 117), - (1658304883, 'x86', 'china', 146), - (1658304884, 'x86', 'china', 31), - (1658304885, 'x86', 'china', 143), - (1658304886, 'x86', 'china', 102), - (1658304887, 'x86', 'china', 98), - (1658304888, 'x86', 'china', 41), - (1658304889, 'x86', 'china', 50), - (1658304890, 'x86', 'china', 46), - (1658304891, 'x86', 'china', 143), - (1658304892, 'x86', 'china', 82), - (1658304893, 'x86', 'china', 142), - (1658304894, 'x86', 'china', 110), - (1658304895, 'x86', 'china', 113), - (1658304896, 'x86', 'china', 89), - (1658304897, 'x86', 'china', 38), - (1658304898, 'x86', 'china', 92), - (1658304899, 'x86', 'china', 137), - (1658304900, 'x86', 'china', 120), - (1658304901, 'x86', 'china', 74), - (1658304902, 'x86', 'china', 64), - (1658304903, 'x86', 'china', 134), - (1658304904, 'x86', 'china', 121), - (1658304905, 'x86', 'china', 40), - (1658304906, 'x86', 'china', 148), - (1658304907, 'x86', 'china', 37), - (1658304908, 'x86', 'china', 72), - (1658304909, 'x86', 'china', 131), - (1658304910, 'x86', 'china', 142), - (1658304911, 'x86', 'china', 53), - (1658304912, 'x86', 'china', 46), - (1658304913, 'x86', 'china', 83), - (1658304914, 'x86', 'china', 52), - (1658304915, 'x86', 'china', 83), - (1658304916, 'x86', 'china', 150), - (1658304917, 'x86', 'china', 13), - (1658304918, 'x86', 'china', 133), - (1658304919, 'x86', 'china', 69), - (1658304920, 'x86', 'china', 46), - (1658304921, 'x86', 'china', 13), - (1658304922, 'x86', 'china', 4), - (1658304923, 'x86', 'china', 84), - (1658304924, 'x86', 'china', 116), - (1658304925, 'x86', 'china', 54), - (1658304926, 'x86', 'china', 38), - (1658304927, 'x86', 'china', 125), - (1658304928, 'x86', 'china', 28), - (1658304929, 'x86', 'china', 45), - (1658304930, 'x86', 'china', 147), - (1658304931, 'x86', 'china', 36), - (1658304932, 'x86', 'china', 6), - (1658304933, 'x86', 'china', 2), - (1658304934, 'x86', 'china', 16), - (1658304935, 'x86', 'china', 149), - (1658304936, 'x86', 'china', 109), - (1658304937, 'x86', 'china', 22), - (1658304938, 'x86', 'china', 143), - (1658304939, 'x86', 'china', 46), - (1658304940, 'x86', 'china', 139), - (1658304941, 'x86', 'china', 128), - (1658304942, 'x86', 'china', 5), - (1658304943, 'x86', 'china', 9), - (1658304944, 'x86', 'china', 80), - (1658304945, 'x86', 'china', 56), - (1658304946, 'x86', 'china', 94), - (1658304947, 'x86', 'china', 115), - (1658304948, 'x86', 'china', 9), - (1658304949, 'x86', 'china', 111), - (1658304950, 'x86', 'china', 118), - (1658304951, 'x86', 'china', 77), - (1658304952, 'x86', 'china', 43), - (1658304953, 'x86', 'china', 65), - (1658304954, 'x86', 'china', 129), - (1658304955, 'x86', 'china', 118), - (1658304956, 'x86', 'china', 68), - (1658304957, 'x86', 'china', 10), - (1658304958, 'x86', 'china', 11), - (1658304959, 'x86', 'china', 82), - (1658304960, 'x86', 'china', 115), - (1658304961, 'x86', 'china', 86), - (1658304762, 'arm', 'china', 145), - (1658304763, 'arm', 'china', 36), - (1658304764, 'arm', 'china', 11), - (1658304765, 'arm', 'china', 3), - (1658304766, 'arm', 'china', 135), - (1658304767, 'arm', 'china', 117), - (1658304768, 'arm', 'china', 137), - (1658304769, 'arm', 'china', 140), - (1658304770, 'arm', 'china', 135), - (1658304771, 'arm', 'china', 91), - (1658304772, 'arm', 'china', 143), - (1658304773, 'arm', 'china', 25), - (1658304774, 'arm', 'china', 93), - (1658304775, 'arm', 'china', 137), - (1658304776, 'arm', 'china', 10), - (1658304777, 'arm', 'china', 15), - (1658304778, 'arm', 'china', 106), - (1658304779, 'arm', 'china', 144), - (1658304780, 'arm', 'china', 40), - (1658304781, 'arm', 'china', 118), - (1658304782, 'arm', 'china', 5), - (1658304783, 'arm', 'china', 37), - (1658304784, 'arm', 'china', 61), - (1658304785, 'arm', 'china', 103), - (1658304786, 'arm', 'china', 73), - (1658304787, 'arm', 'china', 133), - (1658304788, 'arm', 'china', 147), - (1658304789, 'arm', 'china', 2), - (1658304790, 'arm', 'china', 132), - (1658304791, 'arm', 'china', 52), - (1658304792, 'arm', 'china', 85), - (1658304793, 'arm', 'china', 38), - (1658304794, 'arm', 'china', 137), - (1658304795, 'arm', 'china', 67), - (1658304796, 'arm', 'china', 47), - (1658304797, 'arm', 'china', 46), - (1658304798, 'arm', 'china', 85), - (1658304799, 'arm', 'china', 113), - (1658304800, 'arm', 'china', 7), - (1658304801, 'arm', 'china', 125), - (1658304802, 'arm', 'china', 65), - (1658304803, 'arm', 'china', 89), - (1658304804, 'arm', 'china', 36), - (1658304805, 'arm', 'china', 123), - (1658304806, 'arm', 'china', 40), - (1658304807, 'arm', 'china', 101), - (1658304808, 'arm', 'china', 39), - (1658304809, 'arm', 'china', 0), - (1658304810, 'arm', 'china', 120), - (1658304811, 'arm', 'china', 100), - (1658304812, 'arm', 'china', 127), - (1658304813, 'arm', 'china', 58), - (1658304814, 'arm', 'china', 110), - (1658304815, 'arm', 'china', 107), - (1658304816, 'arm', 'china', 25), - (1658304817, 'arm', 'china', 85), - (1658304818, 'arm', 'china', 61), - (1658304819, 'arm', 'china', 19), - (1658304820, 'arm', 'china', 150), - (1658304821, 'arm', 'china', 30), - (1658304822, 'arm', 'china', 47), - (1658304823, 'arm', 'china', 65), - (1658304824, 'arm', 'china', 70), - (1658304825, 'arm', 'china', 52), - (1658304826, 'arm', 'china', 54), - (1658304827, 'arm', 'china', 145), - (1658304828, 'arm', 'china', 0), - (1658304829, 'arm', 'china', 148), - (1658304830, 'arm', 'china', 76), - (1658304831, 'arm', 'china', 59), - (1658304832, 'arm', 'china', 47), - (1658304833, 'arm', 'china', 12), - (1658304834, 'arm', 'china', 83), - (1658304835, 'arm', 'china', 135), - (1658304836, 'arm', 'china', 37), - (1658304837, 'arm', 'china', 94), - (1658304838, 'arm', 'china', 129), - (1658304839, 'arm', 'china', 105), - (1658304840, 'arm', 'china', 0), - (1658304841, 'arm', 'china', 5), - (1658304842, 'arm', 'china', 84), - (1658304843, 'arm', 'china', 74), - (1658304844, 'arm', 'china', 132), - (1658304845, 'arm', 'china', 138), - (1658304846, 'arm', 'china', 75), - (1658304847, 'arm', 'china', 92), - (1658304848, 'arm', 'china', 55), - (1658304849, 'arm', 'china', 51), - (1658304850, 'arm', 'china', 103), - (1658304851, 'arm', 'china', 20), - (1658304852, 'arm', 'china', 7), - (1658304853, 'arm', 'china', 19), - (1658304854, 'arm', 'china', 97), - (1658304855, 'arm', 'china', 15), - (1658304856, 'arm', 'china', 28), - (1658304857, 'arm', 'china', 60), - (1658304858, 'arm', 'china', 56), - (1658304859, 'arm', 'china', 23), - (1658304860, 'arm', 'china', 146), - (1658304861, 'arm', 'china', 78), - (1658304862, 'arm', 'china', 106), - (1658304863, 'arm', 'china', 128), - (1658304864, 'arm', 'china', 119), - (1658304865, 'arm', 'china', 84), - (1658304866, 'arm', 'china', 7), - (1658304867, 'arm', 'china', 147), - (1658304868, 'arm', 'china', 124), - (1658304869, 'arm', 'china', 130), - (1658304870, 'arm', 'china', 20), - (1658304871, 'arm', 'china', 44), - (1658304872, 'arm', 'china', 82), - (1658304873, 'arm', 'china', 148), - (1658304874, 'arm', 'china', 84), - (1658304875, 'arm', 'china', 125), - (1658304876, 'arm', 'china', 130), - (1658304877, 'arm', 'china', 136), - (1658304878, 'arm', 'china', 6), - (1658304879, 'arm', 'china', 41), - (1658304880, 'arm', 'china', 29), - (1658304881, 'arm', 'china', 122), - (1658304882, 'arm', 'china', 3), - (1658304883, 'arm', 'china', 115), - (1658304884, 'arm', 'china', 101), - (1658304885, 'arm', 'china', 132), - (1658304886, 'arm', 'china', 7), - (1658304887, 'arm', 'china', 81), - (1658304888, 'arm', 'china', 57), - (1658304889, 'arm', 'china', 115), - (1658304890, 'arm', 'china', 132), - (1658304891, 'arm', 'china', 100), - (1658304892, 'arm', 'china', 20), - (1658304893, 'arm', 'china', 147), - (1658304894, 'arm', 'china', 42), - (1658304895, 'arm', 'china', 95), - (1658304896, 'arm', 'china', 21), - (1658304897, 'arm', 'china', 75), - (1658304898, 'arm', 'china', 86), - (1658304899, 'arm', 'china', 63), - (1658304900, 'arm', 'china', 45), - (1658304901, 'arm', 'china', 6), - (1658304902, 'arm', 'china', 37), - (1658304903, 'arm', 'china', 111), - (1658304904, 'arm', 'china', 106), - (1658304905, 'arm', 'china', 86), - (1658304906, 'arm', 'china', 76), - (1658304907, 'arm', 'china', 52), - (1658304908, 'arm', 'china', 38), - (1658304909, 'arm', 'china', 78), - (1658304910, 'arm', 'china', 111), - (1658304911, 'arm', 'china', 121), - (1658304912, 'arm', 'china', 67), - (1658304913, 'arm', 'china', 134), - (1658304914, 'arm', 'china', 105), - (1658304915, 'arm', 'china', 105), - (1658304916, 'arm', 'china', 70), - (1658304917, 'arm', 'china', 116), - (1658304918, 'arm', 'china', 81), - (1658304919, 'arm', 'china', 102), - (1658304920, 'arm', 'china', 87), - (1658304921, 'arm', 'china', 85), - (1658304922, 'arm', 'china', 87), - (1658304923, 'arm', 'china', 90), - (1658304924, 'arm', 'china', 33), - (1658304925, 'arm', 'china', 14), - (1658304926, 'arm', 'china', 54), - (1658304927, 'arm', 'china', 61), - (1658304928, 'arm', 'china', 64), - (1658304929, 'arm', 'china', 74), - (1658304930, 'arm', 'china', 141), - (1658304931, 'arm', 'china', 9), - (1658304932, 'arm', 'china', 40), - (1658304933, 'arm', 'china', 139), - (1658304934, 'arm', 'china', 55), - (1658304935, 'arm', 'china', 92), - (1658304936, 'arm', 'china', 99), - (1658304937, 'arm', 'china', 70), - (1658304938, 'arm', 'china', 79), - (1658304939, 'arm', 'china', 10), - (1658304940, 'arm', 'china', 89), - (1658304941, 'arm', 'china', 90), - (1658304942, 'arm', 'china', 71), - (1658304943, 'arm', 'china', 0), - (1658304944, 'arm', 'china', 101), - (1658304945, 'arm', 'china', 83), - (1658304946, 'arm', 'china', 92), - (1658304947, 'arm', 'china', 148), - (1658304948, 'arm', 'china', 22), - (1658304949, 'arm', 'china', 79), - (1658304950, 'arm', 'china', 120), - (1658304951, 'arm', 'china', 59), - (1658304952, 'arm', 'china', 136), - (1658304953, 'arm', 'china', 47), - (1658304954, 'arm', 'china', 122), - (1658304955, 'arm', 'china', 60), - (1658304956, 'arm', 'china', 108), - (1658304957, 'arm', 'china', 96), - (1658304958, 'arm', 'china', 74), - (1658304959, 'arm', 'china', 50), - (1658304960, 'arm', 'china', 46), - (1658304961, 'arm', 'china', 82); - -affected_rows: 400 - -SELECT thetasketch_distinct(`value`) FROM `02_function_thetasketch_distinct_table`; - -thetasketch_distinct(02_function_thetasketch_distinct_table.value), -UInt64(148), - - -SELECT - `arch`, - thetasketch_distinct(`value`) -FROM - `02_function_thetasketch_distinct_table` -WHERE - `timestamp` BETWEEN 1658304766 AND 1658304961 -GROUP BY - `arch` -ORDER BY - `arch` DESC; - -arch,thetasketch_distinct(02_function_thetasketch_distinct_table.value), -String("x86"),UInt64(113), -String("arm"),UInt64(117), - - -DROP TABLE `02_function_thetasketch_distinct_table`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/function/thetasketch_distinct.sql b/integration_tests/cases/common/function/thetasketch_distinct.sql deleted file mode 100644 index e3d2d39ca0..0000000000 --- a/integration_tests/cases/common/function/thetasketch_distinct.sql +++ /dev/null @@ -1,450 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `02_function_thetasketch_distinct_table`; - -CREATE TABLE `02_function_thetasketch_distinct_table` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `02_function_thetasketch_distinct_table` -(`timestamp`, `arch`, `datacenter`, `value`) -VALUES - (1658304762, 'x86', 'china', 75), - (1658304763, 'x86', 'china', 15), - (1658304764, 'x86', 'china', 87), - (1658304765, 'x86', 'china', 6), - (1658304766, 'x86', 'china', 112), - (1658304767, 'x86', 'china', 5), - (1658304768, 'x86', 'china', 67), - (1658304769, 'x86', 'china', 41), - (1658304770, 'x86', 'china', 93), - (1658304771, 'x86', 'china', 9), - (1658304772, 'x86', 'china', 82), - (1658304773, 'x86', 'china', 97), - (1658304774, 'x86', 'china', 121), - (1658304775, 'x86', 'china', 97), - (1658304776, 'x86', 'china', 114), - (1658304777, 'x86', 'china', 8), - (1658304778, 'x86', 'china', 9), - (1658304779, 'x86', 'china', 13), - (1658304780, 'x86', 'china', 8), - (1658304781, 'x86', 'china', 146), - (1658304782, 'x86', 'china', 131), - (1658304783, 'x86', 'china', 45), - (1658304784, 'x86', 'china', 57), - (1658304785, 'x86', 'china', 74), - (1658304786, 'x86', 'china', 25), - (1658304787, 'x86', 'china', 67), - (1658304788, 'x86', 'china', 108), - (1658304789, 'x86', 'china', 32), - (1658304790, 'x86', 'china', 67), - (1658304791, 'x86', 'china', 61), - (1658304792, 'x86', 'china', 7), - (1658304793, 'x86', 'china', 107), - (1658304794, 'x86', 'china', 18), - (1658304795, 'x86', 'china', 127), - (1658304796, 'x86', 'china', 88), - (1658304797, 'x86', 'china', 21), - (1658304798, 'x86', 'china', 85), - (1658304799, 'x86', 'china', 117), - (1658304800, 'x86', 'china', 51), - (1658304801, 'x86', 'china', 23), - (1658304802, 'x86', 'china', 74), - (1658304803, 'x86', 'china', 77), - (1658304804, 'x86', 'china', 66), - (1658304805, 'x86', 'china', 84), - (1658304806, 'x86', 'china', 1), - (1658304807, 'x86', 'china', 108), - (1658304808, 'x86', 'china', 50), - (1658304809, 'x86', 'china', 80), - (1658304810, 'x86', 'china', 30), - (1658304811, 'x86', 'china', 36), - (1658304812, 'x86', 'china', 110), - (1658304813, 'x86', 'china', 22), - (1658304814, 'x86', 'china', 126), - (1658304815, 'x86', 'china', 142), - (1658304816, 'x86', 'china', 17), - (1658304817, 'x86', 'china', 38), - (1658304818, 'x86', 'china', 40), - (1658304819, 'x86', 'china', 114), - (1658304820, 'x86', 'china', 18), - (1658304821, 'x86', 'china', 8), - (1658304822, 'x86', 'china', 144), - (1658304823, 'x86', 'china', 61), - (1658304824, 'x86', 'china', 49), - (1658304825, 'x86', 'china', 28), - (1658304826, 'x86', 'china', 80), - (1658304827, 'x86', 'china', 149), - (1658304828, 'x86', 'china', 144), - (1658304829, 'x86', 'china', 74), - (1658304830, 'x86', 'china', 130), - (1658304831, 'x86', 'china', 116), - (1658304832, 'x86', 'china', 72), - (1658304833, 'x86', 'china', 61), - (1658304834, 'x86', 'china', 24), - (1658304835, 'x86', 'china', 47), - (1658304836, 'x86', 'china', 35), - (1658304837, 'x86', 'china', 62), - (1658304838, 'x86', 'china', 148), - (1658304839, 'x86', 'china', 111), - (1658304840, 'x86', 'china', 86), - (1658304841, 'x86', 'china', 139), - (1658304842, 'x86', 'china', 83), - (1658304843, 'x86', 'china', 144), - (1658304844, 'x86', 'china', 87), - (1658304845, 'x86', 'china', 108), - (1658304846, 'x86', 'china', 105), - (1658304847, 'x86', 'china', 20), - (1658304848, 'x86', 'china', 35), - (1658304849, 'x86', 'china', 132), - (1658304850, 'x86', 'china', 80), - (1658304851, 'x86', 'china', 24), - (1658304852, 'x86', 'china', 139), - (1658304853, 'x86', 'china', 43), - (1658304854, 'x86', 'china', 68), - (1658304855, 'x86', 'china', 65), - (1658304856, 'x86', 'china', 83), - (1658304857, 'x86', 'china', 101), - (1658304858, 'x86', 'china', 148), - (1658304859, 'x86', 'china', 112), - (1658304860, 'x86', 'china', 33), - (1658304861, 'x86', 'china', 19), - (1658304862, 'x86', 'china', 6), - (1658304863, 'x86', 'china', 73), - (1658304864, 'x86', 'china', 134), - (1658304865, 'x86', 'china', 61), - (1658304866, 'x86', 'china', 9), - (1658304867, 'x86', 'china', 103), - (1658304868, 'x86', 'china', 55), - (1658304869, 'x86', 'china', 62), - (1658304870, 'x86', 'china', 58), - (1658304871, 'x86', 'china', 85), - (1658304872, 'x86', 'china', 83), - (1658304873, 'x86', 'china', 39), - (1658304874, 'x86', 'china', 48), - (1658304875, 'x86', 'china', 95), - (1658304876, 'x86', 'china', 16), - (1658304877, 'x86', 'china', 144), - (1658304878, 'x86', 'china', 60), - (1658304879, 'x86', 'china', 83), - (1658304880, 'x86', 'china', 140), - (1658304881, 'x86', 'china', 48), - (1658304882, 'x86', 'china', 117), - (1658304883, 'x86', 'china', 146), - (1658304884, 'x86', 'china', 31), - (1658304885, 'x86', 'china', 143), - (1658304886, 'x86', 'china', 102), - (1658304887, 'x86', 'china', 98), - (1658304888, 'x86', 'china', 41), - (1658304889, 'x86', 'china', 50), - (1658304890, 'x86', 'china', 46), - (1658304891, 'x86', 'china', 143), - (1658304892, 'x86', 'china', 82), - (1658304893, 'x86', 'china', 142), - (1658304894, 'x86', 'china', 110), - (1658304895, 'x86', 'china', 113), - (1658304896, 'x86', 'china', 89), - (1658304897, 'x86', 'china', 38), - (1658304898, 'x86', 'china', 92), - (1658304899, 'x86', 'china', 137), - (1658304900, 'x86', 'china', 120), - (1658304901, 'x86', 'china', 74), - (1658304902, 'x86', 'china', 64), - (1658304903, 'x86', 'china', 134), - (1658304904, 'x86', 'china', 121), - (1658304905, 'x86', 'china', 40), - (1658304906, 'x86', 'china', 148), - (1658304907, 'x86', 'china', 37), - (1658304908, 'x86', 'china', 72), - (1658304909, 'x86', 'china', 131), - (1658304910, 'x86', 'china', 142), - (1658304911, 'x86', 'china', 53), - (1658304912, 'x86', 'china', 46), - (1658304913, 'x86', 'china', 83), - (1658304914, 'x86', 'china', 52), - (1658304915, 'x86', 'china', 83), - (1658304916, 'x86', 'china', 150), - (1658304917, 'x86', 'china', 13), - (1658304918, 'x86', 'china', 133), - (1658304919, 'x86', 'china', 69), - (1658304920, 'x86', 'china', 46), - (1658304921, 'x86', 'china', 13), - (1658304922, 'x86', 'china', 4), - (1658304923, 'x86', 'china', 84), - (1658304924, 'x86', 'china', 116), - (1658304925, 'x86', 'china', 54), - (1658304926, 'x86', 'china', 38), - (1658304927, 'x86', 'china', 125), - (1658304928, 'x86', 'china', 28), - (1658304929, 'x86', 'china', 45), - (1658304930, 'x86', 'china', 147), - (1658304931, 'x86', 'china', 36), - (1658304932, 'x86', 'china', 6), - (1658304933, 'x86', 'china', 2), - (1658304934, 'x86', 'china', 16), - (1658304935, 'x86', 'china', 149), - (1658304936, 'x86', 'china', 109), - (1658304937, 'x86', 'china', 22), - (1658304938, 'x86', 'china', 143), - (1658304939, 'x86', 'china', 46), - (1658304940, 'x86', 'china', 139), - (1658304941, 'x86', 'china', 128), - (1658304942, 'x86', 'china', 5), - (1658304943, 'x86', 'china', 9), - (1658304944, 'x86', 'china', 80), - (1658304945, 'x86', 'china', 56), - (1658304946, 'x86', 'china', 94), - (1658304947, 'x86', 'china', 115), - (1658304948, 'x86', 'china', 9), - (1658304949, 'x86', 'china', 111), - (1658304950, 'x86', 'china', 118), - (1658304951, 'x86', 'china', 77), - (1658304952, 'x86', 'china', 43), - (1658304953, 'x86', 'china', 65), - (1658304954, 'x86', 'china', 129), - (1658304955, 'x86', 'china', 118), - (1658304956, 'x86', 'china', 68), - (1658304957, 'x86', 'china', 10), - (1658304958, 'x86', 'china', 11), - (1658304959, 'x86', 'china', 82), - (1658304960, 'x86', 'china', 115), - (1658304961, 'x86', 'china', 86), - (1658304762, 'arm', 'china', 145), - (1658304763, 'arm', 'china', 36), - (1658304764, 'arm', 'china', 11), - (1658304765, 'arm', 'china', 3), - (1658304766, 'arm', 'china', 135), - (1658304767, 'arm', 'china', 117), - (1658304768, 'arm', 'china', 137), - (1658304769, 'arm', 'china', 140), - (1658304770, 'arm', 'china', 135), - (1658304771, 'arm', 'china', 91), - (1658304772, 'arm', 'china', 143), - (1658304773, 'arm', 'china', 25), - (1658304774, 'arm', 'china', 93), - (1658304775, 'arm', 'china', 137), - (1658304776, 'arm', 'china', 10), - (1658304777, 'arm', 'china', 15), - (1658304778, 'arm', 'china', 106), - (1658304779, 'arm', 'china', 144), - (1658304780, 'arm', 'china', 40), - (1658304781, 'arm', 'china', 118), - (1658304782, 'arm', 'china', 5), - (1658304783, 'arm', 'china', 37), - (1658304784, 'arm', 'china', 61), - (1658304785, 'arm', 'china', 103), - (1658304786, 'arm', 'china', 73), - (1658304787, 'arm', 'china', 133), - (1658304788, 'arm', 'china', 147), - (1658304789, 'arm', 'china', 2), - (1658304790, 'arm', 'china', 132), - (1658304791, 'arm', 'china', 52), - (1658304792, 'arm', 'china', 85), - (1658304793, 'arm', 'china', 38), - (1658304794, 'arm', 'china', 137), - (1658304795, 'arm', 'china', 67), - (1658304796, 'arm', 'china', 47), - (1658304797, 'arm', 'china', 46), - (1658304798, 'arm', 'china', 85), - (1658304799, 'arm', 'china', 113), - (1658304800, 'arm', 'china', 7), - (1658304801, 'arm', 'china', 125), - (1658304802, 'arm', 'china', 65), - (1658304803, 'arm', 'china', 89), - (1658304804, 'arm', 'china', 36), - (1658304805, 'arm', 'china', 123), - (1658304806, 'arm', 'china', 40), - (1658304807, 'arm', 'china', 101), - (1658304808, 'arm', 'china', 39), - (1658304809, 'arm', 'china', 0), - (1658304810, 'arm', 'china', 120), - (1658304811, 'arm', 'china', 100), - (1658304812, 'arm', 'china', 127), - (1658304813, 'arm', 'china', 58), - (1658304814, 'arm', 'china', 110), - (1658304815, 'arm', 'china', 107), - (1658304816, 'arm', 'china', 25), - (1658304817, 'arm', 'china', 85), - (1658304818, 'arm', 'china', 61), - (1658304819, 'arm', 'china', 19), - (1658304820, 'arm', 'china', 150), - (1658304821, 'arm', 'china', 30), - (1658304822, 'arm', 'china', 47), - (1658304823, 'arm', 'china', 65), - (1658304824, 'arm', 'china', 70), - (1658304825, 'arm', 'china', 52), - (1658304826, 'arm', 'china', 54), - (1658304827, 'arm', 'china', 145), - (1658304828, 'arm', 'china', 0), - (1658304829, 'arm', 'china', 148), - (1658304830, 'arm', 'china', 76), - (1658304831, 'arm', 'china', 59), - (1658304832, 'arm', 'china', 47), - (1658304833, 'arm', 'china', 12), - (1658304834, 'arm', 'china', 83), - (1658304835, 'arm', 'china', 135), - (1658304836, 'arm', 'china', 37), - (1658304837, 'arm', 'china', 94), - (1658304838, 'arm', 'china', 129), - (1658304839, 'arm', 'china', 105), - (1658304840, 'arm', 'china', 0), - (1658304841, 'arm', 'china', 5), - (1658304842, 'arm', 'china', 84), - (1658304843, 'arm', 'china', 74), - (1658304844, 'arm', 'china', 132), - (1658304845, 'arm', 'china', 138), - (1658304846, 'arm', 'china', 75), - (1658304847, 'arm', 'china', 92), - (1658304848, 'arm', 'china', 55), - (1658304849, 'arm', 'china', 51), - (1658304850, 'arm', 'china', 103), - (1658304851, 'arm', 'china', 20), - (1658304852, 'arm', 'china', 7), - (1658304853, 'arm', 'china', 19), - (1658304854, 'arm', 'china', 97), - (1658304855, 'arm', 'china', 15), - (1658304856, 'arm', 'china', 28), - (1658304857, 'arm', 'china', 60), - (1658304858, 'arm', 'china', 56), - (1658304859, 'arm', 'china', 23), - (1658304860, 'arm', 'china', 146), - (1658304861, 'arm', 'china', 78), - (1658304862, 'arm', 'china', 106), - (1658304863, 'arm', 'china', 128), - (1658304864, 'arm', 'china', 119), - (1658304865, 'arm', 'china', 84), - (1658304866, 'arm', 'china', 7), - (1658304867, 'arm', 'china', 147), - (1658304868, 'arm', 'china', 124), - (1658304869, 'arm', 'china', 130), - (1658304870, 'arm', 'china', 20), - (1658304871, 'arm', 'china', 44), - (1658304872, 'arm', 'china', 82), - (1658304873, 'arm', 'china', 148), - (1658304874, 'arm', 'china', 84), - (1658304875, 'arm', 'china', 125), - (1658304876, 'arm', 'china', 130), - (1658304877, 'arm', 'china', 136), - (1658304878, 'arm', 'china', 6), - (1658304879, 'arm', 'china', 41), - (1658304880, 'arm', 'china', 29), - (1658304881, 'arm', 'china', 122), - (1658304882, 'arm', 'china', 3), - (1658304883, 'arm', 'china', 115), - (1658304884, 'arm', 'china', 101), - (1658304885, 'arm', 'china', 132), - (1658304886, 'arm', 'china', 7), - (1658304887, 'arm', 'china', 81), - (1658304888, 'arm', 'china', 57), - (1658304889, 'arm', 'china', 115), - (1658304890, 'arm', 'china', 132), - (1658304891, 'arm', 'china', 100), - (1658304892, 'arm', 'china', 20), - (1658304893, 'arm', 'china', 147), - (1658304894, 'arm', 'china', 42), - (1658304895, 'arm', 'china', 95), - (1658304896, 'arm', 'china', 21), - (1658304897, 'arm', 'china', 75), - (1658304898, 'arm', 'china', 86), - (1658304899, 'arm', 'china', 63), - (1658304900, 'arm', 'china', 45), - (1658304901, 'arm', 'china', 6), - (1658304902, 'arm', 'china', 37), - (1658304903, 'arm', 'china', 111), - (1658304904, 'arm', 'china', 106), - (1658304905, 'arm', 'china', 86), - (1658304906, 'arm', 'china', 76), - (1658304907, 'arm', 'china', 52), - (1658304908, 'arm', 'china', 38), - (1658304909, 'arm', 'china', 78), - (1658304910, 'arm', 'china', 111), - (1658304911, 'arm', 'china', 121), - (1658304912, 'arm', 'china', 67), - (1658304913, 'arm', 'china', 134), - (1658304914, 'arm', 'china', 105), - (1658304915, 'arm', 'china', 105), - (1658304916, 'arm', 'china', 70), - (1658304917, 'arm', 'china', 116), - (1658304918, 'arm', 'china', 81), - (1658304919, 'arm', 'china', 102), - (1658304920, 'arm', 'china', 87), - (1658304921, 'arm', 'china', 85), - (1658304922, 'arm', 'china', 87), - (1658304923, 'arm', 'china', 90), - (1658304924, 'arm', 'china', 33), - (1658304925, 'arm', 'china', 14), - (1658304926, 'arm', 'china', 54), - (1658304927, 'arm', 'china', 61), - (1658304928, 'arm', 'china', 64), - (1658304929, 'arm', 'china', 74), - (1658304930, 'arm', 'china', 141), - (1658304931, 'arm', 'china', 9), - (1658304932, 'arm', 'china', 40), - (1658304933, 'arm', 'china', 139), - (1658304934, 'arm', 'china', 55), - (1658304935, 'arm', 'china', 92), - (1658304936, 'arm', 'china', 99), - (1658304937, 'arm', 'china', 70), - (1658304938, 'arm', 'china', 79), - (1658304939, 'arm', 'china', 10), - (1658304940, 'arm', 'china', 89), - (1658304941, 'arm', 'china', 90), - (1658304942, 'arm', 'china', 71), - (1658304943, 'arm', 'china', 0), - (1658304944, 'arm', 'china', 101), - (1658304945, 'arm', 'china', 83), - (1658304946, 'arm', 'china', 92), - (1658304947, 'arm', 'china', 148), - (1658304948, 'arm', 'china', 22), - (1658304949, 'arm', 'china', 79), - (1658304950, 'arm', 'china', 120), - (1658304951, 'arm', 'china', 59), - (1658304952, 'arm', 'china', 136), - (1658304953, 'arm', 'china', 47), - (1658304954, 'arm', 'china', 122), - (1658304955, 'arm', 'china', 60), - (1658304956, 'arm', 'china', 108), - (1658304957, 'arm', 'china', 96), - (1658304958, 'arm', 'china', 74), - (1658304959, 'arm', 'china', 50), - (1658304960, 'arm', 'china', 46), - (1658304961, 'arm', 'china', 82); - -SELECT thetasketch_distinct(`value`) FROM `02_function_thetasketch_distinct_table`; - -SELECT - `arch`, - thetasketch_distinct(`value`) -FROM - `02_function_thetasketch_distinct_table` -WHERE - `timestamp` BETWEEN 1658304766 AND 1658304961 -GROUP BY - `arch` -ORDER BY - `arch` DESC; - -DROP TABLE `02_function_thetasketch_distinct_table`; diff --git a/integration_tests/cases/common/function/time_bucket.result b/integration_tests/cases/common/function/time_bucket.result deleted file mode 100644 index d5783a6f1d..0000000000 --- a/integration_tests/cases/common/function/time_bucket.result +++ /dev/null @@ -1,161 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `02_function_time_bucket_table`; - -affected_rows: 0 - -CREATE TABLE `02_function_time_bucket_table` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `02_function_time_bucket_table` -(`timestamp`, `value`) -VALUES - (1659577423000, 1), - (1659577422000, 2), - (1659577320000, 3), - (1659571200000, 4), - (1659484800000, 5), - (1656777600000, 6); - -affected_rows: 6 - --- Test all time granularity. -SELECT time_bucket(`timestamp`, 'P1Y') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("P1Y")), -Timestamp(1640966400000), -Timestamp(1640966400000), -Timestamp(1640966400000), -Timestamp(1640966400000), -Timestamp(1640966400000), -Timestamp(1640966400000), - - -SELECT time_bucket(`timestamp`, 'P1M') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("P1M")), -Timestamp(1656604800000), -Timestamp(1659283200000), -Timestamp(1659283200000), -Timestamp(1659283200000), -Timestamp(1659283200000), -Timestamp(1659283200000), - - -SELECT time_bucket(`timestamp`, 'P1W') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("P1W")), -Timestamp(1652918400000), -Timestamp(1657756800000), -Timestamp(1657756800000), -Timestamp(1657756800000), -Timestamp(1657756800000), -Timestamp(1657756800000), - - -SELECT time_bucket(`timestamp`, 'P1D') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("P1D")), -Timestamp(1656777600000), -Timestamp(1659456000000), -Timestamp(1659542400000), -Timestamp(1659542400000), -Timestamp(1659542400000), -Timestamp(1659542400000), - - -SELECT time_bucket(`timestamp`, 'PT1H') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("PT1H")), -Timestamp(1656777600000), -Timestamp(1659484800000), -Timestamp(1659571200000), -Timestamp(1659574800000), -Timestamp(1659574800000), -Timestamp(1659574800000), - - -SELECT time_bucket(`timestamp`, 'PT1M') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("PT1M")), -Timestamp(1656777600000), -Timestamp(1659484800000), -Timestamp(1659571200000), -Timestamp(1659577320000), -Timestamp(1659577380000), -Timestamp(1659577380000), - - -SELECT time_bucket(`timestamp`, 'PT1S') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("PT1S")), -Timestamp(1656777600000), -Timestamp(1659484800000), -Timestamp(1659571200000), -Timestamp(1659577320000), -Timestamp(1659577422000), -Timestamp(1659577423000), - - --- Test various parameters. --- NOTICE: customizing format is not supported now. -SELECT time_bucket(`timestamp`, 'PT1H', 'yyyy-MM-dd HH:mm:ss') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("PT1H"),Utf8("yyyy-MM-dd HH:mm:ss")), -Timestamp(1656777600000), -Timestamp(1659484800000), -Timestamp(1659571200000), -Timestamp(1659574800000), -Timestamp(1659574800000), -Timestamp(1659574800000), - - -SELECT time_bucket(`timestamp`, 'PT1H', 'yyyy-MM-dd HH:mm:ss', '+0800') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("PT1H"),Utf8("yyyy-MM-dd HH:mm:ss"),Utf8("+0800")), -Timestamp(1656777600000), -Timestamp(1659484800000), -Timestamp(1659571200000), -Timestamp(1659574800000), -Timestamp(1659574800000), -Timestamp(1659574800000), - - -SELECT time_bucket(`timestamp`, 'PT1H', 'yyyy-MM-dd HH:mm:ss', '+0800', 'yyyy-MM-dd HH') FROM `02_function_time_bucket_table`; - -time_bucket(02_function_time_bucket_table.timestamp,Utf8("PT1H"),Utf8("yyyy-MM-dd HH:mm:ss"),Utf8("+0800"),Utf8("yyyy-MM-dd HH")), -Timestamp(1656777600000), -Timestamp(1659484800000), -Timestamp(1659571200000), -Timestamp(1659574800000), -Timestamp(1659574800000), -Timestamp(1659574800000), - - -DROP TABLE `02_function_time_bucket_table`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/function/time_bucket.sql b/integration_tests/cases/common/function/time_bucket.sql deleted file mode 100644 index 2eeeb123f2..0000000000 --- a/integration_tests/cases/common/function/time_bucket.sql +++ /dev/null @@ -1,56 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `02_function_time_bucket_table`; - -CREATE TABLE `02_function_time_bucket_table` ( - `timestamp` timestamp NOT NULL, - `value` int, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `02_function_time_bucket_table` -(`timestamp`, `value`) -VALUES - (1659577423000, 1), - (1659577422000, 2), - (1659577320000, 3), - (1659571200000, 4), - (1659484800000, 5), - (1656777600000, 6); - --- Test all time granularity. -SELECT time_bucket(`timestamp`, 'P1Y') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'P1M') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'P1W') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'P1D') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'PT1H') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'PT1M') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'PT1S') FROM `02_function_time_bucket_table`; - --- Test various parameters. --- NOTICE: customizing format is not supported now. -SELECT time_bucket(`timestamp`, 'PT1H', 'yyyy-MM-dd HH:mm:ss') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'PT1H', 'yyyy-MM-dd HH:mm:ss', '+0800') FROM `02_function_time_bucket_table`; -SELECT time_bucket(`timestamp`, 'PT1H', 'yyyy-MM-dd HH:mm:ss', '+0800', 'yyyy-MM-dd HH') FROM `02_function_time_bucket_table`; - - -DROP TABLE `02_function_time_bucket_table`; diff --git a/integration_tests/cases/common/optimizer/optimizer.result b/integration_tests/cases/common/optimizer/optimizer.result deleted file mode 100644 index 7d58e01436..0000000000 --- a/integration_tests/cases/common/optimizer/optimizer.result +++ /dev/null @@ -1,37 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE `07_optimizer_t`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:07_optimizer_t sql:DROP TABLE `07_optimizer_t`;" }) - -CREATE TABLE `07_optimizer_t` (name string TAG, value double NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic with (enable_ttl='false'); - -affected_rows: 0 - -EXPLAIN SELECT max(value) AS c1, avg(value) AS c2 FROM `07_optimizer_t` GROUP BY name; - -plan_type,plan, -String("logical_plan"),String("Projection: MAX(07_optimizer_t.value) AS c1, AVG(07_optimizer_t.value) AS c2\n Aggregate: groupBy=[[07_optimizer_t.name]], aggr=[[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]]\n TableScan: 07_optimizer_t projection=[name, value]"), -String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([name@0], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), - - -DROP TABLE `07_optimizer_t`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/optimizer/optimizer.sql b/integration_tests/cases/common/optimizer/optimizer.sql deleted file mode 100644 index 35e4076d98..0000000000 --- a/integration_tests/cases/common/optimizer/optimizer.sql +++ /dev/null @@ -1,26 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE `07_optimizer_t`; - -CREATE TABLE `07_optimizer_t` (name string TAG, value double NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE=Analytic with (enable_ttl='false'); - -EXPLAIN SELECT max(value) AS c1, avg(value) AS c2 FROM `07_optimizer_t` GROUP BY name; - -DROP TABLE `07_optimizer_t`; \ No newline at end of file diff --git a/integration_tests/cases/common/show/show_create_table.result b/integration_tests/cases/common/show/show_create_table.result deleted file mode 100644 index efac075143..0000000000 --- a/integration_tests/cases/common/show/show_create_table.result +++ /dev/null @@ -1,72 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `06_show_a`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `06_show_b`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `06_show_c`; - -affected_rows: 0 - -CREATE TABLE `06_show_a` (a bigint, b int default 3, c string default 'x', d smallint null, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -SHOW CREATE TABLE `06_show_a`; - -Table,Create Table, -String("06_show_a"),String("CREATE TABLE `06_show_a` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` bigint, `b` int DEFAULT 3, `c` string DEFAULT 'x', `d` smallint, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -CREATE TABLE `06_show_b` (a bigint, b int null default null, c string, d smallint null, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -SHOW CREATE TABLE `06_show_b`; - -Table,Create Table, -String("06_show_b"),String("CREATE TABLE `06_show_b` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` bigint, `b` int DEFAULT NULL, `c` string, `d` smallint, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -CREATE TABLE `06_show_c` (a int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -SHOW CREATE TABLE `06_show_c`; - -Table,Create Table, -String("06_show_c"),String("CREATE TABLE `06_show_c` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -DROP TABLE `06_show_a`; - -affected_rows: 0 - -DROP TABLE `06_show_b`; - -affected_rows: 0 - -DROP TABLE `06_show_c`; - -affected_rows: 0 - diff --git a/integration_tests/cases/common/show/show_create_table.sql b/integration_tests/cases/common/show/show_create_table.sql deleted file mode 100644 index 44ad61dfd7..0000000000 --- a/integration_tests/cases/common/show/show_create_table.sql +++ /dev/null @@ -1,35 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `06_show_a`; -DROP TABLE IF EXISTS `06_show_b`; -DROP TABLE IF EXISTS `06_show_c`; - -CREATE TABLE `06_show_a` (a bigint, b int default 3, c string default 'x', d smallint null, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -SHOW CREATE TABLE `06_show_a`; - -CREATE TABLE `06_show_b` (a bigint, b int null default null, c string, d smallint null, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -SHOW CREATE TABLE `06_show_b`; - -CREATE TABLE `06_show_c` (a int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -SHOW CREATE TABLE `06_show_c`; - -DROP TABLE `06_show_a`; -DROP TABLE `06_show_b`; -DROP TABLE `06_show_c`; \ No newline at end of file diff --git a/integration_tests/cases/env/cluster/common b/integration_tests/cases/env/cluster/common deleted file mode 120000 index dc879abe93..0000000000 --- a/integration_tests/cases/env/cluster/common +++ /dev/null @@ -1 +0,0 @@ -../../common \ No newline at end of file diff --git a/integration_tests/cases/env/cluster/ddl/alter_table.result b/integration_tests/cases/env/cluster/ddl/alter_table.result deleted file mode 100644 index 9a9eef72f2..0000000000 --- a/integration_tests/cases/env/cluster/ddl/alter_table.result +++ /dev/null @@ -1,129 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `05_alter_table_t0`; - -affected_rows: 0 - -CREATE TABLE `05_alter_table_t0`(a int, t timestamp NOT NULL, dic string dictionary, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false'); - -affected_rows: 0 - -INSERT INTO TABLE `05_alter_table_t0`(a, t, dic) values(1, 1 , "d1"); - -affected_rows: 1 - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic, -UInt64(0),Timestamp(1),Int32(1),String("d1"), - - --- doesn't support rename -ALTER TABLE `05_alter_table_t0` RENAME TO `t1`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Unsupported SQL statement sql:ALTER TABLE `05_alter_table_t0` RENAME TO `t1`;" }) - -ALTER TABLE `05_alter_table_t0` add COLUMN (b string); - -affected_rows: 0 - -DESCRIBE TABLE `05_alter_table_t0`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), -String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic) values (2, '2', 2, "d2"); - -affected_rows: 1 - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic,b, -UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""), -UInt64(0),Timestamp(2),Int32(2),String("d2"),String("2"), - - --- doesn't support drop column -ALTER TABLE `05_alter_table_t0` DROP COLUMN b; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Unsupported SQL statement sql:ALTER TABLE `05_alter_table_t0` DROP COLUMN b;" }) - -DESCRIBE TABLE `05_alter_table_t0`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), -String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic,b, -UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""), -UInt64(0),Timestamp(2),Int32(2),String("d2"),String("2"), - - -DROP TABLE `05_alter_table_t0`; - -affected_rows: 0 - --- alter table options -CREATE TABLE `05_alter_table_t1`(a int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -ALTER TABLE `05_alter_table_t1` MODIFY SETTING write_buffer_size='300M'; - -affected_rows: 0 - -show create table 05_alter_table_t1; - -Table,Create Table, -String("05_alter_table_t1"),String("CREATE TABLE `05_alter_table_t1` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='314572800')"), - - -drop table 05_alter_table_t1; - -affected_rows: 0 - -CREATE TABLE `05_alter_table_t1` (`sid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='314572800'); - -affected_rows: 0 - -ALTER TABLE `05_alter_table_t1` MODIFY SETTING ttl='10d'; - -affected_rows: 0 - -show create table 05_alter_table_t1; - -Table,Create Table, -String("05_alter_table_t1"),String("CREATE TABLE `05_alter_table_t1` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `sid` uint64 NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='10d', update_mode='OVERWRITE', write_buffer_size='314572800')"), - - -drop table 05_alter_table_t1; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/cluster/ddl/alter_table.sql b/integration_tests/cases/env/cluster/ddl/alter_table.sql deleted file mode 100644 index f93930d9b4..0000000000 --- a/integration_tests/cases/env/cluster/ddl/alter_table.sql +++ /dev/null @@ -1,53 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `05_alter_table_t0`; - -CREATE TABLE `05_alter_table_t0`(a int, t timestamp NOT NULL, dic string dictionary, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false'); -INSERT INTO TABLE `05_alter_table_t0`(a, t, dic) values(1, 1 , "d1"); -SELECT * FROM `05_alter_table_t0`; - --- doesn't support rename -ALTER TABLE `05_alter_table_t0` RENAME TO `t1`; - -ALTER TABLE `05_alter_table_t0` add COLUMN (b string); -DESCRIBE TABLE `05_alter_table_t0`; -INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic) values (2, '2', 2, "d2"); -SELECT * FROM `05_alter_table_t0`; - --- doesn't support drop column -ALTER TABLE `05_alter_table_t0` DROP COLUMN b; -DESCRIBE TABLE `05_alter_table_t0`; -SELECT * FROM `05_alter_table_t0`; - -DROP TABLE `05_alter_table_t0`; - --- alter table options -CREATE TABLE `05_alter_table_t1`(a int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -ALTER TABLE `05_alter_table_t1` MODIFY SETTING write_buffer_size='300M'; - -show create table 05_alter_table_t1; -drop table 05_alter_table_t1; - -CREATE TABLE `05_alter_table_t1` (`sid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='314572800'); -ALTER TABLE `05_alter_table_t1` MODIFY SETTING ttl='10d'; -show create table 05_alter_table_t1; -drop table 05_alter_table_t1; - - diff --git a/integration_tests/cases/env/cluster/ddl/create_tables.result b/integration_tests/cases/env/cluster/ddl/create_tables.result deleted file mode 100644 index 5fb213e8af..0000000000 --- a/integration_tests/cases/env/cluster/ddl/create_tables.result +++ /dev/null @@ -1,338 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `05_create_tables_t`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t2`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t3`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t4`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t5`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t6`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t7`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t8`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t9`; - -affected_rows: 0 - --- no TIMESTAMP column -CREATE TABLE `05_create_tables_t`(c1 int) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table must contain timestamp constraint sql:CREATE TABLE `05_create_tables_t`(c1 int) ENGINE = Analytic;" }) - -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -CREATE TABLE IF NOT EXISTS `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to create table, msg:failed to create table by meta client, req:CreateTableRequest { schema_name: \"public\", name: \"05_create_tables_t\", engine: \"Analytic\", create_if_not_exist: true, options: {}, partition_table_info: None }, err:Bad response, resp code:500, msg:table shard already exists: (#500)table already exists, cause:. sql:CREATE TABLE IF NOT EXISTS `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - --- table already exist -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to create table, msg:failed to create table by meta client, req:CreateTableRequest { schema_name: \"public\", name: \"05_create_tables_t\", engine: \"Analytic\", create_if_not_exist: false, options: {}, partition_table_info: None }, err:Bad response, resp code:500, msg:table shard already exists: (#500)table already exists, cause:. sql:CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - -create table `05_create_tables_t2`(a int, b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false'); - -affected_rows: 0 - -insert into `05_create_tables_t2`(a, b, t) values(1,1,1),(2,2,2); - -affected_rows: 2 - -select a+b from `05_create_tables_t2`; - -05_create_tables_t2.a + 05_create_tables_t2.b, -Int32(2), -Int32(4), - - --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to create table, msg:failed to create table by meta client, req:CreateTableRequest { schema_name: \"public\", name: \"05_create_tables_t2\", engine: \"Analytic\", create_if_not_exist: false, options: {}, partition_table_info: None }, err:Bad response, resp code:500, msg:table shard already exists: (#500)table already exists, cause:. sql:create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to create table, msg:failed to create table by meta client, req:CreateTableRequest { schema_name: \"public\", name: \"05_create_tables_t2\", engine: \"Analytic\", create_if_not_exist: false, options: {}, partition_table_info: None }, err:Bad response, resp code:500, msg:table shard already exists: (#500)table already exists, cause:. sql:create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - -create table `05_create_tables_t3`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -create table `05_create_tables_t4`(`a` int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -describe table `05_create_tables_t4`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -show create table `05_create_tables_t4`; - -Table,Create Table, -String("05_create_tables_t4"),String("CREATE TABLE `05_create_tables_t4` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - --- TIMESTAMP KEY -CREATE TABLE `05_create_tables_t5`(c1 int, t timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -describe table `05_create_tables_t5`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("c1"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -show create table `05_create_tables_t5`; - -Table,Create Table, -String("05_create_tables_t5"),String("CREATE TABLE `05_create_tables_t5` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - --- Multiple TIMESTAMP KEYs -CREATE TABLE `05_create_tables_t6`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, t2 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table must contain only one timestamp key and it's data type must be TIMESTAMP sql:CREATE TABLE `05_create_tables_t6`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, t2 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic;" }) - --- Column with comment -CREATE TABLE `05_create_tables_t7`(c1 int COMMENT 'id', t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -describe table `05_create_tables_t7`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("c1"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -show create table `05_create_tables_t7`; - -Table,Create Table, -String("05_create_tables_t7"),String("CREATE TABLE `05_create_tables_t7` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `c1` int COMMENT 'id', PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - --- StorageFormat -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t8`; - -Table,Create Table, -String("05_create_tables_t8"),String("CREATE TABLE `05_create_tables_t8` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t8`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); - -affected_rows: 0 - -show create table `05_create_tables_t8`; - -Table,Create Table, -String("05_create_tables_t8"),String("CREATE TABLE `05_create_tables_t8` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='COLUMNAR', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t8`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); - -affected_rows: 0 - -show create table `05_create_tables_t9`; - -Table,Create Table, -String("05_create_tables_t9"),String("CREATE TABLE `05_create_tables_t9` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, `d` string DICTIONARY, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='COLUMNAR', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t9`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t9`; - -Table,Create Table, -String("05_create_tables_t9"),String("CREATE TABLE `05_create_tables_t9` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, `d` string DICTIONARY, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t9`; - -affected_rows: 0 - --- Error: dictionary must be string type -CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "parse table name. Caused by: Invalid sql, sql:CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic;, err:sql parser error: Only string column can be dictionary encoded: \"d DOUBLE DICTIONARY\" sql:CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic;" }) - --- Ignore now, table_id is not stable now --- CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'unknown'); --- Default value options -CREATE TABLE `05_create_tables_t9`(c1 int, c2 bigint default 0, c3 uint32 default 1 + 1, c4 string default 'xxx', c5 uint32 default c3*2 + 1, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t9`; - -Table,Create Table, -String("05_create_tables_t9"),String("CREATE TABLE `05_create_tables_t9` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, `c2` bigint DEFAULT 0, `c3` uint32 DEFAULT 1 + 1, `c4` string DEFAULT 'xxx', `c5` uint32 DEFAULT c3 * 2 + 1, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t9`; - -affected_rows: 0 - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t10`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(tsid, t1)) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t10`; - -Table,Create Table, -String("05_create_tables_t10"),String("CREATE TABLE `05_create_tables_t10` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t10`; - -affected_rows: 0 - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t11`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(t1, tsid)) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t11`; - -Table,Create Table, -String("05_create_tables_t11"),String("CREATE TABLE `05_create_tables_t11` (`t1` timestamp NOT NULL, `tsid` uint64 NOT NULL, `c1` int, PRIMARY KEY(t1,tsid), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t11`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t12`(c1 int not null, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(tsid, t1, c1)) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t12`; - -Table,Create Table, -String("05_create_tables_t12"),String("CREATE TABLE `05_create_tables_t12` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int NOT NULL, PRIMARY KEY(tsid,t1,c1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t12`; - -affected_rows: 0 - --- Timestamp not in primary key -CREATE TABLE `05_timestamp_not_in_primary_key`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t), PRIMARY KEY(c1)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Failed to build schema, err:Timestamp not in primary key. sql:CREATE TABLE `05_timestamp_not_in_primary_key`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t), PRIMARY KEY(c1)) ENGINE = Analytic;" }) - -DROP TABLE IF EXISTS `05_create_tables_t`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t2`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t3`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t4`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t5`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t6`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t7`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t8`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t9`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t10`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t11`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t12`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/cluster/ddl/create_tables.sql b/integration_tests/cases/env/cluster/ddl/create_tables.sql deleted file mode 100644 index 260c7e2bcc..0000000000 --- a/integration_tests/cases/env/cluster/ddl/create_tables.sql +++ /dev/null @@ -1,124 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `05_create_tables_t`; -DROP TABLE IF EXISTS `05_create_tables_t2`; -DROP TABLE IF EXISTS `05_create_tables_t3`; -DROP TABLE IF EXISTS `05_create_tables_t4`; -DROP TABLE IF EXISTS `05_create_tables_t5`; -DROP TABLE IF EXISTS `05_create_tables_t6`; -DROP TABLE IF EXISTS `05_create_tables_t7`; -DROP TABLE IF EXISTS `05_create_tables_t8`; -DROP TABLE IF EXISTS `05_create_tables_t9`; - --- no TIMESTAMP column -CREATE TABLE `05_create_tables_t`(c1 int) ENGINE = Analytic; - -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -CREATE TABLE IF NOT EXISTS `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - --- table already exist -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -create table `05_create_tables_t2`(a int, b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false'); -insert into `05_create_tables_t2`(a, b, t) values(1,1,1),(2,2,2); -select a+b from `05_create_tables_t2`; - --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -create table `05_create_tables_t3`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -create table `05_create_tables_t4`(`a` int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -describe table `05_create_tables_t4`; -show create table `05_create_tables_t4`; - --- TIMESTAMP KEY -CREATE TABLE `05_create_tables_t5`(c1 int, t timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -describe table `05_create_tables_t5`; -show create table `05_create_tables_t5`; - --- Multiple TIMESTAMP KEYs -CREATE TABLE `05_create_tables_t6`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, t2 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - --- Column with comment -CREATE TABLE `05_create_tables_t7`(c1 int COMMENT 'id', t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -describe table `05_create_tables_t7`; -show create table `05_create_tables_t7`; - --- StorageFormat -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -show create table `05_create_tables_t8`; -drop table `05_create_tables_t8`; - -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); -show create table `05_create_tables_t8`; -drop table `05_create_tables_t8`; - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); -show create table `05_create_tables_t9`; -drop table `05_create_tables_t9`; - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -show create table `05_create_tables_t9`; -drop table `05_create_tables_t9`; - --- Error: dictionary must be string type -CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - --- Ignore now, table_id is not stable now --- CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'unknown'); - --- Default value options -CREATE TABLE `05_create_tables_t9`(c1 int, c2 bigint default 0, c3 uint32 default 1 + 1, c4 string default 'xxx', c5 uint32 default c3*2 + 1, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -show create table `05_create_tables_t9`; -drop table `05_create_tables_t9`; - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t10`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(tsid, t1)) ENGINE = Analytic; -show create table `05_create_tables_t10`; -drop table `05_create_tables_t10`; - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t11`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(t1, tsid)) ENGINE = Analytic; -show create table `05_create_tables_t11`; -drop table `05_create_tables_t11`; - -CREATE TABLE `05_create_tables_t12`(c1 int not null, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(tsid, t1, c1)) ENGINE = Analytic; -show create table `05_create_tables_t12`; -drop table `05_create_tables_t12`; - --- Timestamp not in primary key -CREATE TABLE `05_timestamp_not_in_primary_key`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t), PRIMARY KEY(c1)) ENGINE = Analytic; - -DROP TABLE IF EXISTS `05_create_tables_t`; -DROP TABLE IF EXISTS `05_create_tables_t2`; -DROP TABLE IF EXISTS `05_create_tables_t3`; -DROP TABLE IF EXISTS `05_create_tables_t4`; -DROP TABLE IF EXISTS `05_create_tables_t5`; -DROP TABLE IF EXISTS `05_create_tables_t6`; -DROP TABLE IF EXISTS `05_create_tables_t7`; -DROP TABLE IF EXISTS `05_create_tables_t8`; -DROP TABLE IF EXISTS `05_create_tables_t9`; -DROP TABLE IF EXISTS `05_create_tables_t10`; -DROP TABLE IF EXISTS `05_create_tables_t11`; -DROP TABLE IF EXISTS `05_create_tables_t12`; diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result deleted file mode 100644 index d576d93bd7..0000000000 --- a/integration_tests/cases/env/cluster/ddl/partition_table.result +++ /dev/null @@ -1,252 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `partition_table_t`; - -affected_rows: 0 - -CREATE TABLE `partition_table_t`( - `name`string TAG, - `id` int TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - TIMESTAMP KEY(t) -) PARTITION BY KEY(name) PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false'); - -affected_rows: 0 - -SHOW CREATE TABLE partition_table_t; - -Table,Create Table, -String("partition_table_t"),String("CREATE TABLE `partition_table_t` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `name` string TAG, `id` int TAG, `value` double NOT NULL, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) PARTITION BY KEY(name) PARTITIONS 4 ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -INSERT INTO partition_table_t (t, name, value) -VALUES (1651737067000, "horaedb0", 100), - (1651737067000, "horaedb1", 101), - (1651737067000, "horaedb2", 102), - (1651737067000, "horaedb3", 103), - (1651737067000, "horaedb4", 104), - (1651737067000, "horaedb5", 105), - (1651737067000, "horaedb6", 106), - (1651737067000, "horaedb7", 107), - (1651737067000, "horaedb8", 108), - (1651737067000, "horaedb9", 109), - (1651737067000, "horaedb10", 110); - -affected_rows: 11 - -SELECT * from partition_table_t where name = "horaedb0"; - -tsid,t,name,id,value, -UInt64(14003629709952370168),Timestamp(1651737067000),String("horaedb0"),Int32(0),Double(100.0), - - -SELECT * from partition_table_t where name = "horaedb1"; - -tsid,t,name,id,value, -UInt64(16657478399506126519),Timestamp(1651737067000),String("horaedb1"),Int32(0),Double(101.0), - - -SELECT * from partition_table_t where name = "horaedb2"; - -tsid,t,name,id,value, -UInt64(13157642818866239218),Timestamp(1651737067000),String("horaedb2"),Int32(0),Double(102.0), - - -SELECT * from partition_table_t where name = "horaedb3"; - -tsid,t,name,id,value, -UInt64(17269220762189063056),Timestamp(1651737067000),String("horaedb3"),Int32(0),Double(103.0), - - -SELECT * from partition_table_t where name in ("horaedb0", "horaedb1", "horaedb2", "horaedb3", "horaedb4") order by name; - -tsid,t,name,id,value, -UInt64(14003629709952370168),Timestamp(1651737067000),String("horaedb0"),Int32(0),Double(100.0), -UInt64(16657478399506126519),Timestamp(1651737067000),String("horaedb1"),Int32(0),Double(101.0), -UInt64(13157642818866239218),Timestamp(1651737067000),String("horaedb2"),Int32(0),Double(102.0), -UInt64(17269220762189063056),Timestamp(1651737067000),String("horaedb3"),Int32(0),Double(103.0), -UInt64(7518868672723248583),Timestamp(1651737067000),String("horaedb4"),Int32(0),Double(104.0), - - -SELECT * from partition_table_t where name in ("horaedb5", "horaedb6", "horaedb7","horaedb8", "horaedb9", "horaedb10") order by name; - -tsid,t,name,id,value, -UInt64(8750817885369790753),Timestamp(1651737067000),String("horaedb10"),Int32(0),Double(110.0), -UInt64(11618546385801067358),Timestamp(1651737067000),String("horaedb5"),Int32(0),Double(105.0), -UInt64(14251454593639082376),Timestamp(1651737067000),String("horaedb6"),Int32(0),Double(106.0), -UInt64(5459144247315085218),Timestamp(1651737067000),String("horaedb7"),Int32(0),Double(107.0), -UInt64(18050575420545011296),Timestamp(1651737067000),String("horaedb8"),Int32(0),Double(108.0), -UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0),Double(109.0), - - --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx --- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n -EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0"; - -plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n __partition_table_t_1:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), - - --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx --- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x --- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n -EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4"); - -plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb2\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb1\"), Utf8(\"ceresdb3\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), - - -ALTER TABLE partition_table_t ADD COLUMN (b string); - -affected_rows: 0 - --- SQLNESS REPLACE endpoint:(.*?), endpoint:xx, -INSERT INTO partition_table_t (t, id, name, value) VALUES (1651737067000, 10, "horaedb0", 100); - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute insert, err:Failed to write table, err:Failed to write tables in batch, tables:[\"__partition_table_t_1\"], err:Failed to query from table in server, table_idents:[TableIdentifier { catalog: \"horaedb\", schema: \"public\", table: \"__partition_table_t_1\" }], endpoint:xx, code:401, msg:failed to decode row group payload. Caused by: Schema mismatch with the write request, msg:expect 6 columns, but got 5. sql:INSERT INTO partition_table_t (t, id, name, value) VALUES (1651737067000, 10, \"horaedb0\", 100);" }) - --- SQLNESS REPLACE endpoint:(.*?), endpoint:xx, -INSERT INTO partition_table_t (t, id, name, value) VALUES (1651737067000, 10, "ceresdb0", 100); - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute insert, err:Failed to write table, err:Failed to write tables in batch, tables:[\"__partition_table_t_1\"], err:Failed to query from table in server, table_idents:[TableIdentifier { catalog: \"horaedb\", schema: \"public\", table: \"__partition_table_t_1\" }], endpoint:xx, code:401, msg:failed to decode row group payload. Caused by: Schema mismatch with the write request, msg:expect 6 columns, but got 5. sql:INSERT INTO partition_table_t (t, id, name, value) VALUES (1651737067000, 10, \"ceresdb0\", 100);" }) - -ALTER TABLE partition_table_t MODIFY SETTING enable_ttl='true'; - -affected_rows: 0 - -SHOW CREATE TABLE __partition_table_t_0; - -Table,Create Table, -String("__partition_table_t_0"),String("CREATE TABLE `__partition_table_t_0` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `name` string TAG, `id` int TAG, `value` double NOT NULL, `b` string, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='2h', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -SHOW CREATE TABLE __partition_table_t_1; - -Table,Create Table, -String("__partition_table_t_1"),String("CREATE TABLE `__partition_table_t_1` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `name` string TAG, `id` int TAG, `value` double NOT NULL, `b` string, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='2h', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -SHOW CREATE TABLE __partition_table_t_2; - -Table,Create Table, -String("__partition_table_t_2"),String("CREATE TABLE `__partition_table_t_2` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `name` string TAG, `id` int TAG, `value` double NOT NULL, `b` string, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='2h', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -SHOW CREATE TABLE __partition_table_t_3; - -Table,Create Table, -String("__partition_table_t_3"),String("CREATE TABLE `__partition_table_t_3` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `name` string TAG, `id` int TAG, `value` double NOT NULL, `b` string, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='2h', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -DROP TABLE IF EXISTS `partition_table_t`; - -affected_rows: 0 - -SHOW CREATE TABLE partition_table_t; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:partition_table_t sql:SHOW CREATE TABLE partition_table_t;" }) - -DROP TABLE IF EXISTS `random_partition_table_t`; - -affected_rows: 0 - -CREATE TABLE `random_partition_table_t`( - `name`string TAG, - `id` int TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - TIMESTAMP KEY(t) -) PARTITION BY RANDOM PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false', update_mode="APPEND"); - -affected_rows: 0 - -SHOW CREATE TABLE random_partition_table_t; - -Table,Create Table, -String("random_partition_table_t"),String("CREATE TABLE `random_partition_table_t` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `name` string TAG, `id` int TAG, `value` double NOT NULL, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) PARTITION BY RANDOM PARTITIONS 4 ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='APPEND', write_buffer_size='33554432')"), - - -INSERT INTO random_partition_table_t (t, name, value) -VALUES (1651737067000, "horaedb0", 100), - (1651737067000, "horaedb1", 101), - (1651737067000, "horaedb2", 102), - (1651737067000, "horaedb3", 103), - (1651737067000, "horaedb4", 104), - (1651737067000, "horaedb5", 105), - (1651737067000, "horaedb6", 106), - (1651737067000, "horaedb7", 107), - (1651737067000, "horaedb8", 108), - (1651737067000, "horaedb9", 109), - (1651737067000, "horaedb10", 110); - -affected_rows: 11 - -SELECT * from random_partition_table_t where name = "horaedb0"; - -tsid,t,name,id,value, -UInt64(14003629709952370168),Timestamp(1651737067000),String("horaedb0"),Int32(0),Double(100.0), - - -SELECT * from random_partition_table_t where name = "horaedb5"; - -tsid,t,name,id,value, -UInt64(11618546385801067358),Timestamp(1651737067000),String("horaedb5"),Int32(0),Double(105.0), - - -SELECT - time_bucket (t, "PT1M") AS ts, - approx_percentile_cont (value, 0.9) AS value -FROM - random_partition_table_t -GROUP BY - time_bucket (t, "PT1M"); - -ts,value, -Timestamp(1651737060000),Double(109.4), - - -DROP TABLE IF EXISTS `random_partition_table_t`; - -affected_rows: 0 - -SHOW CREATE TABLE random_partition_table_t; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table not found, table:random_partition_table_t sql:SHOW CREATE TABLE random_partition_table_t;" }) - -DROP TABLE IF EXISTS `random_partition_table_t_overwrite`; - -affected_rows: 0 - -CREATE TABLE `random_partition_table_t_overwrite`( - `name`string TAG, - `id` int TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - TIMESTAMP KEY(t) -) PARTITION BY RANDOM PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false', update_mode="OVERWRITE"); - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to create table, msg:invalid parameters to create table, plan:CreateTablePlan { engine: \"Analytic\", if_not_exists: false, table: \"random_partition_table_t_overwrite\", table_schema: Schema { timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas { columns: [ColumnSchema { id: 1, name: \"tsid\", data_type: UInt64, is_nullable: false, is_tag: false, is_dictionary: false, comment: \"\", escaped_name: \"tsid\", default_value: None }, ColumnSchema { id: 2, name: \"t\", data_type: Timestamp, is_nullable: false, is_tag: false, is_dictionary: false, comment: \"\", escaped_name: \"t\", default_value: None }, ColumnSchema { id: 3, name: \"name\", data_type: String, is_nullable: true, is_tag: true, is_dictionary: false, comment: \"\", escaped_name: \"name\", default_value: None }, ColumnSchema { id: 4, name: \"id\", data_type: Int32, is_nullable: true, is_tag: true, is_dictionary: false, comment: \"\", escaped_name: \"id\", default_value: None }, ColumnSchema { id: 5, name: \"value\", data_type: Double, is_nullable: false, is_tag: false, is_dictionary: false, comment: \"\", escaped_name: \"value\", default_value: None }] }, version: 1, primary_key_indexes: [0, 1] }, options: {\"enable_ttl\": \"false\", \"update_mode\": \"OVERWRITE\"} }, err:Invalid arguments, table:random_partition_table_t_overwrite, err:Try to create a random partition table in overwrite mode, table:random_partition_table_t_overwrite. sql:CREATE TABLE `random_partition_table_t_overwrite`(\n `name`string TAG,\n `id` int TAG,\n `value` double NOT NULL,\n `t` timestamp NOT NULL,\n TIMESTAMP KEY(t)\n) PARTITION BY RANDOM PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false', update_mode=\"OVERWRITE\");" }) - diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.sql b/integration_tests/cases/env/cluster/ddl/partition_table.sql deleted file mode 100644 index f06dee2ea8..0000000000 --- a/integration_tests/cases/env/cluster/ddl/partition_table.sql +++ /dev/null @@ -1,143 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `partition_table_t`; - -CREATE TABLE `partition_table_t`( - `name`string TAG, - `id` int TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - TIMESTAMP KEY(t) -) PARTITION BY KEY(name) PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false'); - -SHOW CREATE TABLE partition_table_t; - -INSERT INTO partition_table_t (t, name, value) -VALUES (1651737067000, "horaedb0", 100), - (1651737067000, "horaedb1", 101), - (1651737067000, "horaedb2", 102), - (1651737067000, "horaedb3", 103), - (1651737067000, "horaedb4", 104), - (1651737067000, "horaedb5", 105), - (1651737067000, "horaedb6", 106), - (1651737067000, "horaedb7", 107), - (1651737067000, "horaedb8", 108), - (1651737067000, "horaedb9", 109), - (1651737067000, "horaedb10", 110); - -SELECT * from partition_table_t where name = "horaedb0"; - -SELECT * from partition_table_t where name = "horaedb1"; - -SELECT * from partition_table_t where name = "horaedb2"; - -SELECT * from partition_table_t where name = "horaedb3"; - -SELECT * from partition_table_t where name in ("horaedb0", "horaedb1", "horaedb2", "horaedb3", "horaedb4") order by name; - -SELECT * from partition_table_t where name in ("horaedb5", "horaedb6", "horaedb7","horaedb8", "horaedb9", "horaedb10") order by name; - --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx --- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n -EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0"; - --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx --- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x --- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n -EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4"); - -ALTER TABLE partition_table_t ADD COLUMN (b string); - --- SQLNESS REPLACE endpoint:(.*?), endpoint:xx, -INSERT INTO partition_table_t (t, id, name, value) VALUES (1651737067000, 10, "horaedb0", 100); - --- SQLNESS REPLACE endpoint:(.*?), endpoint:xx, -INSERT INTO partition_table_t (t, id, name, value) VALUES (1651737067000, 10, "ceresdb0", 100); - -ALTER TABLE partition_table_t MODIFY SETTING enable_ttl='true'; - -SHOW CREATE TABLE __partition_table_t_0; - -SHOW CREATE TABLE __partition_table_t_1; - -SHOW CREATE TABLE __partition_table_t_2; - -SHOW CREATE TABLE __partition_table_t_3; - -DROP TABLE IF EXISTS `partition_table_t`; - -SHOW CREATE TABLE partition_table_t; - -DROP TABLE IF EXISTS `random_partition_table_t`; - -CREATE TABLE `random_partition_table_t`( - `name`string TAG, - `id` int TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - TIMESTAMP KEY(t) -) PARTITION BY RANDOM PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false', update_mode="APPEND"); - -SHOW CREATE TABLE random_partition_table_t; - -INSERT INTO random_partition_table_t (t, name, value) -VALUES (1651737067000, "horaedb0", 100), - (1651737067000, "horaedb1", 101), - (1651737067000, "horaedb2", 102), - (1651737067000, "horaedb3", 103), - (1651737067000, "horaedb4", 104), - (1651737067000, "horaedb5", 105), - (1651737067000, "horaedb6", 106), - (1651737067000, "horaedb7", 107), - (1651737067000, "horaedb8", 108), - (1651737067000, "horaedb9", 109), - (1651737067000, "horaedb10", 110); - -SELECT * from random_partition_table_t where name = "horaedb0"; - -SELECT * from random_partition_table_t where name = "horaedb5"; - -SELECT - time_bucket (t, "PT1M") AS ts, - approx_percentile_cont (value, 0.9) AS value -FROM - random_partition_table_t -GROUP BY - time_bucket (t, "PT1M"); - -DROP TABLE IF EXISTS `random_partition_table_t`; - -SHOW CREATE TABLE random_partition_table_t; - -DROP TABLE IF EXISTS `random_partition_table_t_overwrite`; - -CREATE TABLE `random_partition_table_t_overwrite`( - `name`string TAG, - `id` int TAG, - `value` double NOT NULL, - `t` timestamp NOT NULL, - TIMESTAMP KEY(t) -) PARTITION BY RANDOM PARTITIONS 4 ENGINE = Analytic with (enable_ttl='false', update_mode="OVERWRITE"); diff --git a/integration_tests/cases/env/compaction_offload/compact/compact.result b/integration_tests/cases/env/compaction_offload/compact/compact.result deleted file mode 100644 index 9f4d91b488..0000000000 --- a/integration_tests/cases/env/compaction_offload/compact/compact.result +++ /dev/null @@ -1,110 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `compact_table1`; - -affected_rows: 0 - -CREATE TABLE `compact_table1` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='OVERWRITE' -); - -affected_rows: 0 - -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "d1"), (2, 200, "d2"), (3, 300, "d3"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "update_d1"), (2, 200, "update_d2"), (3, 300, "update_d3"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (4, 400, "d4"), (5, 500, "d5"), (6, 600, "d6"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (4, 400, "update_d4"), (5, 500, "update_d5"), (6, 600, "update_d6"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (7, 700, "d7"), (8, 800, "d8"), (9, 900, "d9"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (7, 700, "update_d7"), (8, 800, "update_d8"), (9, 900, "update_d9"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (10, 1000, "d10"), (11, 1100, "d11"), (12, 1200, "d12"); - -affected_rows: 3 - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (10, 1000, "update_d10"), (11, 1100, "update_d11"), (12, 1200, "update_d12"); - -affected_rows: 3 - --- trigger manual compaction after flush memtable --- SQLNESS ARG pre_cmd=flush --- SQLNESS ARG pre_cmd=compact -SELECT - * -FROM - `compact_table1` -ORDER BY - `value` ASC; - -tsid,timestamp,value,dic, -UInt64(0),Timestamp(1),Double(100.0),String("update_d1"), -UInt64(0),Timestamp(2),Double(200.0),String("update_d2"), -UInt64(0),Timestamp(3),Double(300.0),String("update_d3"), -UInt64(0),Timestamp(4),Double(400.0),String("update_d4"), -UInt64(0),Timestamp(5),Double(500.0),String("update_d5"), -UInt64(0),Timestamp(6),Double(600.0),String("update_d6"), -UInt64(0),Timestamp(7),Double(700.0),String("update_d7"), -UInt64(0),Timestamp(8),Double(800.0),String("update_d8"), -UInt64(0),Timestamp(9),Double(900.0),String("update_d9"), -UInt64(0),Timestamp(10),Double(1000.0),String("update_d10"), -UInt64(0),Timestamp(11),Double(1100.0),String("update_d11"), -UInt64(0),Timestamp(12),Double(1200.0),String("update_d12"), - - -DROP TABLE `compact_table1`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/compaction_offload/compact/compact.sql b/integration_tests/cases/env/compaction_offload/compact/compact.sql deleted file mode 100644 index f0aa46fbb5..0000000000 --- a/integration_tests/cases/env/compaction_offload/compact/compact.sql +++ /dev/null @@ -1,76 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `compact_table1`; - -CREATE TABLE `compact_table1` ( - `timestamp` timestamp NOT NULL, - `value` double, - `dic` string dictionary, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false', - update_mode='OVERWRITE' -); - - -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "d1"), (2, 200, "d2"), (3, 300, "d3"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (1, 100, "update_d1"), (2, 200, "update_d2"), (3, 300, "update_d3"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (4, 400, "d4"), (5, 500, "d5"), (6, 600, "d6"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (4, 400, "update_d4"), (5, 500, "update_d5"), (6, 600, "update_d6"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (7, 700, "d7"), (8, 800, "d8"), (9, 900, "d9"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (7, 700, "update_d7"), (8, 800, "update_d8"), (9, 900, "update_d9"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (10, 1000, "d10"), (11, 1100, "d11"), (12, 1200, "d12"); - --- SQLNESS ARG pre_cmd=flush -INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) - VALUES (10, 1000, "update_d10"), (11, 1100, "update_d11"), (12, 1200, "update_d12"); - - --- trigger manual compaction after flush memtable --- SQLNESS ARG pre_cmd=flush --- SQLNESS ARG pre_cmd=compact -SELECT - * -FROM - `compact_table1` -ORDER BY - `value` ASC; - - -DROP TABLE `compact_table1`; diff --git a/integration_tests/cases/env/local/common b/integration_tests/cases/env/local/common deleted file mode 120000 index dc879abe93..0000000000 --- a/integration_tests/cases/env/local/common +++ /dev/null @@ -1 +0,0 @@ -../../common \ No newline at end of file diff --git a/integration_tests/cases/env/local/ddl/alter_table.result b/integration_tests/cases/env/local/ddl/alter_table.result deleted file mode 100644 index fda3c80a5e..0000000000 --- a/integration_tests/cases/env/local/ddl/alter_table.result +++ /dev/null @@ -1,133 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `05_alter_table_t0`; - -affected_rows: 0 - -CREATE TABLE `05_alter_table_t0`(a int, t timestamp NOT NULL, dic string dictionary, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false', update_mode='OVERWRITE'); - -affected_rows: 0 - -INSERT INTO TABLE `05_alter_table_t0`(a, t, dic) values(1, 1 , "d1"); - -affected_rows: 1 - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic, -UInt64(0),Timestamp(1),Int32(1),String("d1"), - - --- doesn't support rename -ALTER TABLE `05_alter_table_t0` RENAME TO `t1`; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Unsupported SQL statement sql:ALTER TABLE `05_alter_table_t0` RENAME TO `t1`;" }) - -ALTER TABLE `05_alter_table_t0` add COLUMN (b string); - -affected_rows: 0 - -DESCRIBE TABLE `05_alter_table_t0`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), -String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic) values (2, '2', 2, "d2"); - -affected_rows: 1 - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic,b, -UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""), -UInt64(0),Timestamp(2),Int32(2),String("d2"),String("2"), - - -ALTER TABLE `05_alter_table_t0` add COLUMN (add_dic string dictionary); - -affected_rows: 0 - -DESCRIBE TABLE `05_alter_table_t0`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), -String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("add_dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), - - -INSERT INTO TABLE `05_alter_table_t0` (a, b, t, dic, add_dic) - VALUES (2, '2', 2, "d11", "d22"), - (3, '3', 3, "d22", "d33"); - -affected_rows: 2 - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic,b,add_dic, -UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""),String(""), -UInt64(0),Timestamp(2),Int32(2),String("d11"),String("2"),String("d22"), -UInt64(0),Timestamp(3),Int32(3),String("d22"),String("3"),String("d33"), - - --- doesn't support drop column -ALTER TABLE `05_alter_table_t0` DROP COLUMN b; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Unsupported SQL statement sql:ALTER TABLE `05_alter_table_t0` DROP COLUMN b;" }) - -DESCRIBE TABLE `05_alter_table_t0`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), -String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), -String("add_dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true), - - -SELECT * FROM `05_alter_table_t0`; - -tsid,t,a,dic,b,add_dic, -UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""),String(""), -UInt64(0),Timestamp(2),Int32(2),String("d11"),String("2"),String("d22"), -UInt64(0),Timestamp(3),Int32(3),String("d22"),String("3"),String("d33"), - - --- try to enable layered memtable with invalid 0 mutable switch threshold -ALTER TABLE `05_alter_table_t0` MODIFY SETTING layered_enable='true',layered_mutable_switch_threshold='0'; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute alter table, err:Failed to alter table options, err:Failed to alter options, table:05_alter_table_t0, err:Found invalid table options, reason:layered memtable is enabled but mutable_switch_threshold is 0, layered_memtable_opts:LayeredMemtableOptions { enable: true, mutable_segment_switch_threshold: ReadableSize(0) }. sql:ALTER TABLE `05_alter_table_t0` MODIFY SETTING layered_enable='true',layered_mutable_switch_threshold='0';" }) - --- try to enable layered memtable for overwrite mode table -ALTER TABLE `05_alter_table_t0` MODIFY SETTING layered_enable='true',layered_mutable_switch_threshold='3MB'; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute alter table, err:Failed to alter table options, err:Failed to alter options, table:05_alter_table_t0, err:Found invalid table options, reason:layered memtable is enabled for table needing dedup, layered_memtable_opts:LayeredMemtableOptions { enable: true, mutable_segment_switch_threshold: ReadableSize(3145728) }, update_mode:Overwrite. sql:ALTER TABLE `05_alter_table_t0` MODIFY SETTING layered_enable='true',layered_mutable_switch_threshold='3MB';" }) - -DROP TABLE `05_alter_table_t0`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/ddl/alter_table.sql b/integration_tests/cases/env/local/ddl/alter_table.sql deleted file mode 100644 index 4ecdbe5e86..0000000000 --- a/integration_tests/cases/env/local/ddl/alter_table.sql +++ /dev/null @@ -1,54 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `05_alter_table_t0`; - -CREATE TABLE `05_alter_table_t0`(a int, t timestamp NOT NULL, dic string dictionary, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false', update_mode='OVERWRITE'); -INSERT INTO TABLE `05_alter_table_t0`(a, t, dic) values(1, 1 , "d1"); -SELECT * FROM `05_alter_table_t0`; - --- doesn't support rename -ALTER TABLE `05_alter_table_t0` RENAME TO `t1`; - -ALTER TABLE `05_alter_table_t0` add COLUMN (b string); -DESCRIBE TABLE `05_alter_table_t0`; -INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic) values (2, '2', 2, "d2"); -SELECT * FROM `05_alter_table_t0`; - -ALTER TABLE `05_alter_table_t0` add COLUMN (add_dic string dictionary); -DESCRIBE TABLE `05_alter_table_t0`; -INSERT INTO TABLE `05_alter_table_t0` (a, b, t, dic, add_dic) - VALUES (2, '2', 2, "d11", "d22"), - (3, '3', 3, "d22", "d33"); - - -SELECT * FROM `05_alter_table_t0`; - --- doesn't support drop column -ALTER TABLE `05_alter_table_t0` DROP COLUMN b; -DESCRIBE TABLE `05_alter_table_t0`; -SELECT * FROM `05_alter_table_t0`; - --- try to enable layered memtable with invalid 0 mutable switch threshold -ALTER TABLE `05_alter_table_t0` MODIFY SETTING layered_enable='true',layered_mutable_switch_threshold='0'; - --- try to enable layered memtable for overwrite mode table -ALTER TABLE `05_alter_table_t0` MODIFY SETTING layered_enable='true',layered_mutable_switch_threshold='3MB'; - -DROP TABLE `05_alter_table_t0`; diff --git a/integration_tests/cases/env/local/ddl/create_tables.result b/integration_tests/cases/env/local/ddl/create_tables.result deleted file mode 100644 index 0f8e0bc8f3..0000000000 --- a/integration_tests/cases/env/local/ddl/create_tables.result +++ /dev/null @@ -1,362 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `05_create_tables_t`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t2`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t3`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t4`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t5`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t6`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t7`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t8`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t9`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t10`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t11`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_timestamp_not_in_primary_key`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_append`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_overwrite`; - -affected_rows: 0 - --- no TIMESTAMP column -CREATE TABLE `05_create_tables_t`(c1 int) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table must contain timestamp constraint sql:CREATE TABLE `05_create_tables_t`(c1 int) ENGINE = Analytic;" }) - -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -CREATE TABLE IF NOT EXISTS `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - --- table already exist -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to operate table, err:Failed to operate table, msg:Some(\"failed to create table on shard, request:CreateTableRequest { params: CreateTableParams { catalog_name: \\\"horaedb\\\", schema_name: \\\"public\\\", table_name: \\\"05_create_tables_t\\\", table_options: [], table_schema: Schema { timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas { columns: [ColumnSchema { id: 1, name: \\\"tsid\\\", data_type: UInt64, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"tsid\\\", default_value: None }, ColumnSchema { id: 2, name: \\\"t\\\", data_type: Timestamp, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"t\\\", default_value: None }, ColumnSchema { id: 3, name: \\\"c1\\\", data_type: Int32, is_nullable: true, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"c1\\\", default_value: None }] }, version: 1, primary_key_indexes: [0, 1] }, partition_info: None, engine: \\\"Analytic\\\" }, table_id: None, state: Stable, shard_id: 0 }\"), err:Failed to create table, table already exists, table:05_create_tables_t. sql:CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - -create table `05_create_tables_t2`(a int, b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false'); - -affected_rows: 0 - -insert into `05_create_tables_t2`(a, b, t) values(1,1,1),(2,2,2); - -affected_rows: 2 - -select a+b from `05_create_tables_t2`; - -05_create_tables_t2.a + 05_create_tables_t2.b, -Int32(2), -Int32(4), - - --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to operate table, err:Failed to operate table, msg:Some(\"failed to create table on shard, request:CreateTableRequest { params: CreateTableParams { catalog_name: \\\"horaedb\\\", schema_name: \\\"public\\\", table_name: \\\"05_create_tables_t2\\\", table_options: [], table_schema: Schema { timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas { columns: [ColumnSchema { id: 1, name: \\\"tsid\\\", data_type: UInt64, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"tsid\\\", default_value: None }, ColumnSchema { id: 2, name: \\\"t\\\", data_type: Timestamp, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"t\\\", default_value: None }, ColumnSchema { id: 3, name: \\\"a\\\", data_type: Int32, is_nullable: true, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"a\\\", default_value: None }, ColumnSchema { id: 4, name: \\\"b\\\", data_type: Int32, is_nullable: true, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"b\\\", default_value: None }] }, version: 1, primary_key_indexes: [0, 1] }, partition_info: None, engine: \\\"Analytic\\\" }, table_id: None, state: Stable, shard_id: 0 }\"), err:Failed to create table, table already exists, table:05_create_tables_t2. sql:create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to operate table, err:Failed to operate table, msg:Some(\"failed to create table on shard, request:CreateTableRequest { params: CreateTableParams { catalog_name: \\\"horaedb\\\", schema_name: \\\"public\\\", table_name: \\\"05_create_tables_t2\\\", table_options: [], table_schema: Schema { timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas { columns: [ColumnSchema { id: 1, name: \\\"tsid\\\", data_type: UInt64, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"tsid\\\", default_value: None }, ColumnSchema { id: 2, name: \\\"t\\\", data_type: Timestamp, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"t\\\", default_value: None }, ColumnSchema { id: 3, name: \\\"a\\\", data_type: Int32, is_nullable: true, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"a\\\", default_value: None }, ColumnSchema { id: 4, name: \\\"b\\\", data_type: Int32, is_nullable: true, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"b\\\", default_value: None }] }, version: 1, primary_key_indexes: [0, 1] }, partition_info: None, engine: \\\"Analytic\\\" }, table_id: None, state: Stable, shard_id: 0 }\"), err:Failed to create table, table already exists, table:05_create_tables_t2. sql:create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic;" }) - -create table `05_create_tables_t3`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -create table `05_create_tables_t4`(`a` int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -describe table `05_create_tables_t4`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -show create table `05_create_tables_t4`; - -Table,Create Table, -String("05_create_tables_t4"),String("CREATE TABLE `05_create_tables_t4` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `a` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - --- TIMESTAMP KEY -CREATE TABLE `05_create_tables_t5`(c1 int, t timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -describe table `05_create_tables_t5`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("c1"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -show create table `05_create_tables_t5`; - -Table,Create Table, -String("05_create_tables_t5"),String("CREATE TABLE `05_create_tables_t5` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - --- Multiple TIMESTAMP KEYs -CREATE TABLE `05_create_tables_t6`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, t2 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Table must contain only one timestamp key and it's data type must be TIMESTAMP sql:CREATE TABLE `05_create_tables_t6`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, t2 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic;" }) - --- Column with comment -CREATE TABLE `05_create_tables_t7`(c1 int COMMENT 'id', t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -affected_rows: 0 - -describe table `05_create_tables_t7`; - -name,type,is_primary,is_nullable,is_tag,is_dictionary, -String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false), -String("c1"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false), - - -show create table `05_create_tables_t7`; - -Table,Create Table, -String("05_create_tables_t7"),String("CREATE TABLE `05_create_tables_t7` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `c1` int COMMENT 'id', PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - --- StorageFormat -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t8`; - -Table,Create Table, -String("05_create_tables_t8"),String("CREATE TABLE `05_create_tables_t8` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t8`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); - -affected_rows: 0 - -show create table `05_create_tables_t8`; - -Table,Create Table, -String("05_create_tables_t8"),String("CREATE TABLE `05_create_tables_t8` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='COLUMNAR', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t8`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); - -affected_rows: 0 - -show create table `05_create_tables_t9`; - -Table,Create Table, -String("05_create_tables_t9"),String("CREATE TABLE `05_create_tables_t9` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, `d` string DICTIONARY, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='COLUMNAR', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t9`; - -affected_rows: 0 - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t9`; - -Table,Create Table, -String("05_create_tables_t9"),String("CREATE TABLE `05_create_tables_t9` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, `d` string DICTIONARY, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t9`; - -affected_rows: 0 - --- Error: dictionary must be string type -CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "parse table name. Caused by: Invalid sql, sql:CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic;, err:sql parser error: Only string column can be dictionary encoded: \"d DOUBLE DICTIONARY\" sql:CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic;" }) - --- Ignore now, table_id is not stable now --- CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'unknown'); --- Default value options -CREATE TABLE `05_create_tables_t9`(c1 int, c2 bigint default 0, c3 uint32 default 1 + 1, c4 string default 'xxx', c5 uint32 default c3*2 + 1, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t9`; - -Table,Create Table, -String("05_create_tables_t9"),String("CREATE TABLE `05_create_tables_t9` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, `c2` bigint DEFAULT 0, `c3` uint32 DEFAULT 1 + 1, `c4` string DEFAULT 'xxx', `c5` uint32 DEFAULT c3 * 2 + 1, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t9`; - -affected_rows: 0 - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t10`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(tsid, t1)) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t10`; - -Table,Create Table, -String("05_create_tables_t10"),String("CREATE TABLE `05_create_tables_t10` (`tsid` uint64 NOT NULL, `t1` timestamp NOT NULL, `c1` int, PRIMARY KEY(tsid,t1), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t10`; - -affected_rows: 0 - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t11`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(t1, tsid)) ENGINE = Analytic; - -affected_rows: 0 - -show create table `05_create_tables_t11`; - -Table,Create Table, -String("05_create_tables_t11"),String("CREATE TABLE `05_create_tables_t11` (`t1` timestamp NOT NULL, `tsid` uint64 NOT NULL, `c1` int, PRIMARY KEY(t1,tsid), TIMESTAMP KEY(t1)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='true', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='OVERWRITE', write_buffer_size='33554432')"), - - -drop table `05_create_tables_t11`; - -affected_rows: 0 - --- Timestamp not in primary key -CREATE TABLE `05_timestamp_not_in_primary_key`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t), PRIMARY KEY(c1)) ENGINE = Analytic; - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan. Caused by: Failed to create plan, err:Failed to build schema, err:Timestamp not in primary key. sql:CREATE TABLE `05_timestamp_not_in_primary_key`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t), PRIMARY KEY(c1)) ENGINE = Analytic;" }) - --- Valid, try to create append mode table with invalid layered memtable enabling -CREATE TABLE `05_enable_layered_memtable_for_append`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (layered_enable='true', layered_mutable_switch_threshold='3MB', update_mode='APPEND'); - -affected_rows: 0 - --- Invalid, try to create overwrite mode table with invalid layered memtable enabling -CREATE TABLE `05_enable_layered_memtable_for_overwrite`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (layered_enable='true', layered_mutable_switch_threshold='3MB', update_mode='OVERWRITE'); - -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute create table, err:Failed to create table by table manipulator, err:Failed to operate table, err:Failed to operate table, msg:Some(\"failed to create table on shard, request:CreateTableRequest { params: CreateTableParams { catalog_name: \\\"horaedb\\\", schema_name: \\\"public\\\", table_name: \\\"05_enable_layered_memtable_for_overwrite\\\", table_options: [(\\\"layered_enable\\\", \\\"true\\\"), (\\\"layered_mutable_switch_threshold\\\", \\\"3MB\\\"), (\\\"update_mode\\\", \\\"OVERWRITE\\\")], table_schema: Schema { timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas { columns: [ColumnSchema { id: 1, name: \\\"tsid\\\", data_type: UInt64, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"tsid\\\", default_value: None }, ColumnSchema { id: 2, name: \\\"t\\\", data_type: Timestamp, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"t\\\", default_value: None }, ColumnSchema { id: 3, name: \\\"c1\\\", data_type: Int32, is_nullable: false, is_tag: false, is_dictionary: false, comment: \\\"\\\", escaped_name: \\\"c1\\\", default_value: None }] }, version: 1, primary_key_indexes: [0, 1] }, partition_info: None, engine: \\\"Analytic\\\" }, table_id: None, state: Stable, shard_id: 0 }\"), err:Failed to create table, err:Unexpected error, err:Found invalid table options, reason:layered memtable is enabled for table needing dedup, layered_memtable_opts:LayeredMemtableOptions { enable: true, mutable_segment_switch_threshold: ReadableSize(3145728) }, update_mode:Overwrite. sql:CREATE TABLE `05_enable_layered_memtable_for_overwrite`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (layered_enable='true', layered_mutable_switch_threshold='3MB', update_mode='OVERWRITE');" }) - -DROP TABLE IF EXISTS `05_create_tables_t`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t2`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t3`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t4`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t5`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t6`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t7`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t8`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t9`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t10`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_create_tables_t11`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_timestamp_not_in_primary_key`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_append`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_overwrite`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/ddl/create_tables.sql b/integration_tests/cases/env/local/ddl/create_tables.sql deleted file mode 100644 index 569365056f..0000000000 --- a/integration_tests/cases/env/local/ddl/create_tables.sql +++ /dev/null @@ -1,133 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `05_create_tables_t`; -DROP TABLE IF EXISTS `05_create_tables_t2`; -DROP TABLE IF EXISTS `05_create_tables_t3`; -DROP TABLE IF EXISTS `05_create_tables_t4`; -DROP TABLE IF EXISTS `05_create_tables_t5`; -DROP TABLE IF EXISTS `05_create_tables_t6`; -DROP TABLE IF EXISTS `05_create_tables_t7`; -DROP TABLE IF EXISTS `05_create_tables_t8`; -DROP TABLE IF EXISTS `05_create_tables_t9`; -DROP TABLE IF EXISTS `05_create_tables_t10`; -DROP TABLE IF EXISTS `05_create_tables_t11`; -DROP TABLE IF EXISTS `05_timestamp_not_in_primary_key`; -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_append`; -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_overwrite`; - --- no TIMESTAMP column -CREATE TABLE `05_create_tables_t`(c1 int) ENGINE = Analytic; - -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -CREATE TABLE IF NOT EXISTS `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - --- table already exist -CREATE TABLE `05_create_tables_t`(c1 int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -create table `05_create_tables_t2`(a int, b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (enable_ttl='false'); -insert into `05_create_tables_t2`(a, b, t) values(1,1,1),(2,2,2); -select a+b from `05_create_tables_t2`; - --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; --- table already exist -create table `05_create_tables_t2`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -create table `05_create_tables_t3`(a int,b int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; - -create table `05_create_tables_t4`(`a` int, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -describe table `05_create_tables_t4`; -show create table `05_create_tables_t4`; - --- TIMESTAMP KEY -CREATE TABLE `05_create_tables_t5`(c1 int, t timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -describe table `05_create_tables_t5`; -show create table `05_create_tables_t5`; - --- Multiple TIMESTAMP KEYs -CREATE TABLE `05_create_tables_t6`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, t2 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - --- Column with comment -CREATE TABLE `05_create_tables_t7`(c1 int COMMENT 'id', t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic; -describe table `05_create_tables_t7`; -show create table `05_create_tables_t7`; - --- StorageFormat -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -show create table `05_create_tables_t8`; -drop table `05_create_tables_t8`; - -CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); -show create table `05_create_tables_t8`; -drop table `05_create_tables_t8`; - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'columnar'); -show create table `05_create_tables_t9`; -drop table `05_create_tables_t9`; - -CREATE TABLE `05_create_tables_t9`(c1 int, d string dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -show create table `05_create_tables_t9`; -drop table `05_create_tables_t9`; - --- Error: dictionary must be string type -CREATE TABLE `05_create_tables_t9`(c1 int, d double dictionary, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; - --- Ignore now, table_id is not stable now --- CREATE TABLE `05_create_tables_t8`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic with (storage_format= 'unknown'); - --- Default value options -CREATE TABLE `05_create_tables_t9`(c1 int, c2 bigint default 0, c3 uint32 default 1 + 1, c4 string default 'xxx', c5 uint32 default c3*2 + 1, t1 timestamp NOT NULL TIMESTAMP KEY) ENGINE = Analytic; -show create table `05_create_tables_t9`; -drop table `05_create_tables_t9`; - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t10`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(tsid, t1)) ENGINE = Analytic; -show create table `05_create_tables_t10`; -drop table `05_create_tables_t10`; - --- Explicit primary key with tsid -CREATE TABLE `05_create_tables_t11`(c1 int, t1 timestamp NOT NULL TIMESTAMP KEY, PRIMARY KEY(t1, tsid)) ENGINE = Analytic; -show create table `05_create_tables_t11`; -drop table `05_create_tables_t11`; - --- Timestamp not in primary key -CREATE TABLE `05_timestamp_not_in_primary_key`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t), PRIMARY KEY(c1)) ENGINE = Analytic; - --- Valid, try to create append mode table with invalid layered memtable enabling -CREATE TABLE `05_enable_layered_memtable_for_append`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (layered_enable='true', layered_mutable_switch_threshold='3MB', update_mode='APPEND'); - --- Invalid, try to create overwrite mode table with invalid layered memtable enabling -CREATE TABLE `05_enable_layered_memtable_for_overwrite`(c1 int NOT NULL, t timestamp NOT NULL, TIMESTAMP KEY(t)) ENGINE = Analytic with (layered_enable='true', layered_mutable_switch_threshold='3MB', update_mode='OVERWRITE'); - -DROP TABLE IF EXISTS `05_create_tables_t`; -DROP TABLE IF EXISTS `05_create_tables_t2`; -DROP TABLE IF EXISTS `05_create_tables_t3`; -DROP TABLE IF EXISTS `05_create_tables_t4`; -DROP TABLE IF EXISTS `05_create_tables_t5`; -DROP TABLE IF EXISTS `05_create_tables_t6`; -DROP TABLE IF EXISTS `05_create_tables_t7`; -DROP TABLE IF EXISTS `05_create_tables_t8`; -DROP TABLE IF EXISTS `05_create_tables_t9`; -DROP TABLE IF EXISTS `05_create_tables_t10`; -DROP TABLE IF EXISTS `05_create_tables_t11`; -DROP TABLE IF EXISTS `05_timestamp_not_in_primary_key`; -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_append`; -DROP TABLE IF EXISTS `05_enable_layered_memtable_for_overwrite`; diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result deleted file mode 100644 index ee1e27c0d0..0000000000 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ /dev/null @@ -1,199 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `03_dml_select_real_time_range`; - -affected_rows: 0 - -DROP TABLE IF EXISTS `03_append_mode_table`; - -affected_rows: 0 - -CREATE TABLE `03_dml_select_real_time_range` ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false', - segment_duration='2h' -); - -affected_rows: 0 - -INSERT INTO `03_dml_select_real_time_range` (t, name, value) - VALUES - (1695348000000, "horaedb", 100), - (1695348001000, "horaedb", 200), - (1695348002000, "horaedb", 300); - -affected_rows: 3 - --- This query should include memtable --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348001000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t]:\n=0]\n"), - - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t >= 1695348001000 and t < 1695348002000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t]:\n=0]\n"), - - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select name from `03_dml_select_real_time_range` -where t >= 1695348001000 and t < 1695348002000; - -plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[name@0 as name], metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t,name]:\n=0]\n"), - - --- This query should not include memtable --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348002000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), - - --- SQLNESS ARG pre_cmd=flush --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx --- This query should include SST -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348001000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), - - --- This query should not include SST --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348002000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), - - --- Table with an 'append' update mode -CREATE TABLE `03_append_mode_table` ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false', - segment_duration = '2h', - update_mode = 'append' -); - -affected_rows: 0 - -INSERT INTO `03_append_mode_table` (t, name, value) - VALUES - (1695348000000, "ceresdb", 100), - (1695348001000, "ceresdb", 200), - (1695348002000, "ceresdb", 300); - -affected_rows: 3 - --- Should just fetch projected columns from memtable --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx --- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_append_mode_table` -where t >= 1695348001000 and name = 'ceresdb'; - -plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_166, fetched_columns:[t,name]:\n=0]\n"), - - --- Should just fetch projected columns from SST --- SQLNESS ARG pre_cmd=flush --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx --- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_append_mode_table` -where t >= 1695348001000 and name = 'ceresdb'; - -plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetched_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), - - -CREATE TABLE `TEST_QUERY_PRIORITY` ( - NAME string TAG, - VALUE double NOT NULL, - TS timestamp NOT NULL, - timestamp KEY (TS)) ENGINE = Analytic WITH ( - enable_ttl = 'false', - segment_duration = '2h', - update_mode = 'append' -); - -affected_rows: 0 - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select TS from `TEST_QUERY_PRIORITY` -where TS >= 1695348001000 and TS < 1695348002000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None), TS < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=false\n=0]\n"), - - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select TS from `TEST_QUERY_PRIORITY` -where TS >= 1695348001000; - -plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n=0]\n"), - - -DROP TABLE `03_dml_select_real_time_range`; - -affected_rows: 0 - -DROP TABLE `03_append_mode_table`; - -affected_rows: 0 - -DROP TABLE `TEST_QUERY_PRIORITY`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/ddl/query-plan.sql b/integration_tests/cases/env/local/ddl/query-plan.sql deleted file mode 100644 index b88d432eec..0000000000 --- a/integration_tests/cases/env/local/ddl/query-plan.sql +++ /dev/null @@ -1,136 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `03_dml_select_real_time_range`; -DROP TABLE IF EXISTS `03_append_mode_table`; - -CREATE TABLE `03_dml_select_real_time_range` ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false', - segment_duration='2h' -); - -INSERT INTO `03_dml_select_real_time_range` (t, name, value) - VALUES - (1695348000000, "horaedb", 100), - (1695348001000, "horaedb", 200), - (1695348002000, "horaedb", 300); - --- This query should include memtable --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348001000; - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t >= 1695348001000 and t < 1695348002000; - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select name from `03_dml_select_real_time_range` -where t >= 1695348001000 and t < 1695348002000; - --- This query should not include memtable --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348002000; - --- SQLNESS ARG pre_cmd=flush --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx --- This query should include SST -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348001000; - --- This query should not include SST --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_dml_select_real_time_range` -where t > 1695348002000; - --- Table with an 'append' update mode -CREATE TABLE `03_append_mode_table` ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - enable_ttl = 'false', - segment_duration = '2h', - update_mode = 'append' -); - -INSERT INTO `03_append_mode_table` (t, name, value) - VALUES - (1695348000000, "ceresdb", 100), - (1695348001000, "ceresdb", 200), - (1695348002000, "ceresdb", 300); - --- Should just fetch projected columns from memtable --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx --- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_append_mode_table` -where t >= 1695348001000 and name = 'ceresdb'; - --- Should just fetch projected columns from SST --- SQLNESS ARG pre_cmd=flush --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx --- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx --- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx --- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select t from `03_append_mode_table` -where t >= 1695348001000 and name = 'ceresdb'; - -CREATE TABLE `TEST_QUERY_PRIORITY` ( - NAME string TAG, - VALUE double NOT NULL, - TS timestamp NOT NULL, - timestamp KEY (TS)) ENGINE = Analytic WITH ( - enable_ttl = 'false', - segment_duration = '2h', - update_mode = 'append' -); - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select TS from `TEST_QUERY_PRIORITY` -where TS >= 1695348001000 and TS < 1695348002000; - --- This query should have higher priority --- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx --- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -explain analyze select TS from `TEST_QUERY_PRIORITY` -where TS >= 1695348001000; - -DROP TABLE `03_dml_select_real_time_range`; -DROP TABLE `03_append_mode_table`; -DROP TABLE `TEST_QUERY_PRIORITY`; diff --git a/integration_tests/cases/env/local/ddl/sampling-primary-key.result b/integration_tests/cases/env/local/ddl/sampling-primary-key.result deleted file mode 100644 index 2a8040584a..0000000000 --- a/integration_tests/cases/env/local/ddl/sampling-primary-key.result +++ /dev/null @@ -1,82 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `sampling_primary_key_table`; - -affected_rows: 0 - -CREATE TABLE `sampling_primary_key_table` ( - v1 double, - v2 double, - v3 double, - v5 double, - name string TAG, - myVALUE int64 NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - update_mode='append', - enable_ttl = 'false' -); - -affected_rows: 0 - -show create table `sampling_primary_key_table`; - -Table,Create Table, -String("sampling_primary_key_table"),String("CREATE TABLE `sampling_primary_key_table` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `v1` double, `v2` double, `v3` double, `v5` double, `name` string TAG, `myVALUE` bigint NOT NULL, PRIMARY KEY(tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='', storage_format='AUTO', ttl='7d', update_mode='APPEND', write_buffer_size='33554432')"), - - -INSERT INTO `sampling_primary_key_table` (t, name, myVALUE) - VALUES - (1695348000000, "horaedb2", 200), - (1695348000005, "horaedb2", 100), - (1695348000001, "horaedb1", 100), - (1695348000003, "horaedb3", 200); - -affected_rows: 4 - -select * from `sampling_primary_key_table`; - -tsid,t,v1,v2,v3,v5,name,myVALUE, -UInt64(955822522188633507),Timestamp(1695348000003),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb3"),Int64(200), -UInt64(9464314370308696884),Timestamp(1695348000001),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb1"),Int64(100), -UInt64(14649097417416496686),Timestamp(1695348000000),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb2"),Int64(200), -UInt64(14649097417416496686),Timestamp(1695348000005),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb2"),Int64(100), - - --- After flush, its primary key should changed. --- SQLNESS ARG pre_cmd=flush -show create table `sampling_primary_key_table`; - -Table,Create Table, -String("sampling_primary_key_table"),String("CREATE TABLE `sampling_primary_key_table` (`tsid` uint64 NOT NULL, `t` timestamp NOT NULL, `v1` double, `v2` double, `v3` double, `v5` double, `name` string TAG, `myVALUE` bigint NOT NULL, PRIMARY KEY(myVALUE,name,tsid,t), TIMESTAMP KEY(t)) ENGINE=Analytic WITH(arena_block_size='2097152', compaction_strategy='default', compression='ZSTD', enable_ttl='false', layered_enable='false', layered_mutable_switch_threshold='3145728', memtable_type='skiplist', num_rows_per_row_group='8192', segment_duration='2h', storage_format='AUTO', ttl='7d', update_mode='APPEND', write_buffer_size='33554432')"), - - -select * from `sampling_primary_key_table`; - -tsid,t,v1,v2,v3,v5,name,myVALUE, -UInt64(9464314370308696884),Timestamp(1695348000001),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb1"),Int64(100), -UInt64(14649097417416496686),Timestamp(1695348000005),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb2"),Int64(100), -UInt64(14649097417416496686),Timestamp(1695348000000),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb2"),Int64(200), -UInt64(955822522188633507),Timestamp(1695348000003),Double(0.0),Double(0.0),Double(0.0),Double(0.0),String("horaedb3"),Int64(200), - - -DROP TABLE IF EXISTS `sampling_primary_key_table`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/ddl/sampling-primary-key.sql b/integration_tests/cases/env/local/ddl/sampling-primary-key.sql deleted file mode 100644 index 7a34b66b15..0000000000 --- a/integration_tests/cases/env/local/ddl/sampling-primary-key.sql +++ /dev/null @@ -1,53 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - - -DROP TABLE IF EXISTS `sampling_primary_key_table`; - -CREATE TABLE `sampling_primary_key_table` ( - v1 double, - v2 double, - v3 double, - v5 double, - name string TAG, - myVALUE int64 NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t)) ENGINE = Analytic WITH ( - update_mode='append', - enable_ttl = 'false' -); - -show create table `sampling_primary_key_table`; - -INSERT INTO `sampling_primary_key_table` (t, name, myVALUE) - VALUES - (1695348000000, "horaedb2", 200), - (1695348000005, "horaedb2", 100), - (1695348000001, "horaedb1", 100), - (1695348000003, "horaedb3", 200); - -select * from `sampling_primary_key_table`; - --- After flush, its primary key should changed. --- SQLNESS ARG pre_cmd=flush -show create table `sampling_primary_key_table`; - -select * from `sampling_primary_key_table`; - -DROP TABLE IF EXISTS `sampling_primary_key_table`; diff --git a/integration_tests/cases/env/local/dml/insert_into_select.result b/integration_tests/cases/env/local/dml/insert_into_select.result deleted file mode 100644 index 93fc82567c..0000000000 --- a/integration_tests/cases/env/local/dml/insert_into_select.result +++ /dev/null @@ -1,83 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `insert_into_select_table1`; - -affected_rows: 0 - -CREATE TABLE `insert_into_select_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - `name` string, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `insert_into_select_table1` (`timestamp`, `value`, `name`) -VALUES - (1, 100, "s1"), - (2, 200, "s2"), - (3, 300, "s3"), - (4, 400, "s4"), - (5, 500, "s5"); - -affected_rows: 5 - -DROP TABLE IF EXISTS `insert_into_select_table2`; - -affected_rows: 0 - -CREATE TABLE `insert_into_select_table2` ( - `timestamp` timestamp NOT NULL, - `value` int, - `name` string NULL, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -affected_rows: 0 - -INSERT INTO `insert_into_select_table2` (`timestamp`, `value`) -SELECT `timestamp`, `value` -FROM `insert_into_select_table1`; - -affected_rows: 5 - -SELECT `timestamp`, `value`, `name` -FROM `insert_into_select_table2`; - -timestamp,value,name, -Timestamp(1),Int32(100),String(""), -Timestamp(2),Int32(200),String(""), -Timestamp(3),Int32(300),String(""), -Timestamp(4),Int32(400),String(""), -Timestamp(5),Int32(500),String(""), - - -DROP TABLE `insert_into_select_table1`; - -affected_rows: 0 - -DROP TABLE `insert_into_select_table2`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/dml/insert_into_select.sql b/integration_tests/cases/env/local/dml/insert_into_select.sql deleted file mode 100644 index 1a0d4a1da0..0000000000 --- a/integration_tests/cases/env/local/dml/insert_into_select.sql +++ /dev/null @@ -1,59 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `insert_into_select_table1`; - -CREATE TABLE `insert_into_select_table1` ( - `timestamp` timestamp NOT NULL, - `value` int, - `name` string, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `insert_into_select_table1` (`timestamp`, `value`, `name`) -VALUES - (1, 100, "s1"), - (2, 200, "s2"), - (3, 300, "s3"), - (4, 400, "s4"), - (5, 500, "s5"); - -DROP TABLE IF EXISTS `insert_into_select_table2`; - -CREATE TABLE `insert_into_select_table2` ( - `timestamp` timestamp NOT NULL, - `value` int, - `name` string NULL, - timestamp KEY (timestamp)) ENGINE=Analytic -WITH( - enable_ttl='false' -); - -INSERT INTO `insert_into_select_table2` (`timestamp`, `value`) -SELECT `timestamp`, `value` -FROM `insert_into_select_table1`; - -SELECT `timestamp`, `value`, `name` -FROM `insert_into_select_table2`; - -DROP TABLE `insert_into_select_table1`; - -DROP TABLE `insert_into_select_table2`; diff --git a/integration_tests/cases/env/local/influxql/basic.result b/integration_tests/cases/env/local/influxql/basic.result deleted file mode 100644 index 61b6f1fdb6..0000000000 --- a/integration_tests/cases/env/local/influxql/basic.result +++ /dev/null @@ -1,95 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `h2o_feet`; - -affected_rows: 0 - -CREATE TABLE `h2o_feet` ( - `time` timestamp NOT NULL, - `level_description` string TAG, - `location` string TAG, - `water_level` double, - timestamp KEY (time)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - -affected_rows: 0 - --- Insert Records: --- ("2015-08-18T00:00:00Z", "between 6 and 9 feet", "coyote_creek", 8.12), --- ("2015-08-18T00:00:00Z", "below 3 feet", "santa_monica", 2.064), --- ("2015-08-18T00:06:00Z", "between 6 and 9 feet", "coyote_creek", 8.005), --- ("2015-08-18T00:06:00Z", "below 3 feet", "santa_monica", 2.116), --- ("2015-08-18T00:12:00Z", "between 6 and 9 feet", "coyote_creek", 7.887), --- ("2015-08-18T00:12:00Z", "below 3 feet", "santa_monica", 2.028); -INSERT INTO h2o_feet(time, level_description, location, water_level) - VALUES - (1439827200000, "between 6 and 9 feet", "coyote_creek", 8.12), - (1439827200000, "below 3 feet", "santa_monica", 2.064), - (1439827560000, "between 6 and 9 feet", "coyote_creek", 8.005), - (1439827560000, "below 3 feet", "santa_monica", 2.116), - (1439827620000, "between 6 and 9 feet", "coyote_creek", 7.887), - (1439827620000, "below 3 feet", "santa_monica", 2.028); - -affected_rows: 6 - --- SQLNESS ARG protocol=influxql -SELECT * FROM "h2o_feet"; - -{"results":[{"statement_id":0,"series":[{"name":"h2o_feet","columns":["time","level_description","location","tsid","water_level"],"values":[[1439827200000,"below 3 feet","santa_monica",8247797837995683878,2.064],[1439827200000,"between 6 and 9 feet","coyote_creek",4483051411356144610,8.12],[1439827560000,"below 3 feet","santa_monica",8247797837995683878,2.116],[1439827560000,"between 6 and 9 feet","coyote_creek",4483051411356144610,8.005],[1439827620000,"below 3 feet","santa_monica",8247797837995683878,2.028],[1439827620000,"between 6 and 9 feet","coyote_creek",4483051411356144610,7.887]]}]}]} - --- SQLNESS ARG protocol=influxql method=get -SELECT * FROM "h2o_feet"; - -{"results":[{"statement_id":0,"series":[{"name":"h2o_feet","columns":["time","level_description","location","tsid","water_level"],"values":[[1439827200000,"below 3 feet","santa_monica",8247797837995683878,2.064],[1439827200000,"between 6 and 9 feet","coyote_creek",4483051411356144610,8.12],[1439827560000,"below 3 feet","santa_monica",8247797837995683878,2.116],[1439827560000,"between 6 and 9 feet","coyote_creek",4483051411356144610,8.005],[1439827620000,"below 3 feet","santa_monica",8247797837995683878,2.028],[1439827620000,"between 6 and 9 feet","coyote_creek",4483051411356144610,7.887]]}]}]} - --- SQLNESS ARG protocol=influxql -SELECT "level_description", location, water_level FROM "h2o_feet" where location = 'santa_monica'; - -{"results":[{"statement_id":0,"series":[{"name":"h2o_feet","columns":["time","level_description","location","water_level"],"values":[[1439827200000,"below 3 feet","santa_monica",2.064],[1439827560000,"below 3 feet","santa_monica",2.116],[1439827620000,"below 3 feet","santa_monica",2.028]]}]}]} - --- SQLNESS ARG protocol=influxql -show measurements; - -{"results":[{"statement_id":0,"series":[{"name":"measurements","columns":["name"],"values":[["h2o_feet"]]}]}]} - --- SQLNESS ARG protocol=influxql -SELECT count(water_level) FROM "h2o_feet" -group by location; - -{"results":[{"statement_id":0,"series":[{"name":"h2o_feet","columns":["time","location","count"],"values":[[0,"coyote_creek",3],[0,"santa_monica",3]]}]}]} - --- SQLNESS ARG protocol=influxql -SELECT count(water_level) FROM "h2o_feet" -where time < 1439828400000ms -group by location, time(5m); - -{"results":[{"statement_id":0,"series":[{"name":"h2o_feet","columns":["time","location","count"],"values":[[1439827200000,"coyote_creek",1],[1439827500000,"coyote_creek",2],[1439827800000,"coyote_creek",null],[1439828100000,"coyote_creek",null],[1439827200000,"santa_monica",1],[1439827500000,"santa_monica",2],[1439827800000,"santa_monica",null],[1439828100000,"santa_monica",null]]}]}]} - --- SQLNESS ARG protocol=influxql -SELECT count(water_level) FROM "h2o_feet" -where time < 1439828400000ms -group by location, time(5m) fill(666); - -{"results":[{"statement_id":0,"series":[{"name":"h2o_feet","columns":["time","location","count"],"values":[[1439827200000,"coyote_creek",1],[1439827500000,"coyote_creek",2],[1439827800000,"coyote_creek",666],[1439828100000,"coyote_creek",666],[1439827200000,"santa_monica",1],[1439827500000,"santa_monica",2],[1439827800000,"santa_monica",666],[1439828100000,"santa_monica",666]]}]}]} - -DROP TABLE IF EXISTS `h2o_feet`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/influxql/basic.sql b/integration_tests/cases/env/local/influxql/basic.sql deleted file mode 100644 index 50481e9a43..0000000000 --- a/integration_tests/cases/env/local/influxql/basic.sql +++ /dev/null @@ -1,74 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `h2o_feet`; - -CREATE TABLE `h2o_feet` ( - `time` timestamp NOT NULL, - `level_description` string TAG, - `location` string TAG, - `water_level` double, - timestamp KEY (time)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - --- Insert Records: --- ("2015-08-18T00:00:00Z", "between 6 and 9 feet", "coyote_creek", 8.12), --- ("2015-08-18T00:00:00Z", "below 3 feet", "santa_monica", 2.064), --- ("2015-08-18T00:06:00Z", "between 6 and 9 feet", "coyote_creek", 8.005), --- ("2015-08-18T00:06:00Z", "below 3 feet", "santa_monica", 2.116), --- ("2015-08-18T00:12:00Z", "between 6 and 9 feet", "coyote_creek", 7.887), --- ("2015-08-18T00:12:00Z", "below 3 feet", "santa_monica", 2.028); -INSERT INTO h2o_feet(time, level_description, location, water_level) - VALUES - (1439827200000, "between 6 and 9 feet", "coyote_creek", 8.12), - (1439827200000, "below 3 feet", "santa_monica", 2.064), - (1439827560000, "between 6 and 9 feet", "coyote_creek", 8.005), - (1439827560000, "below 3 feet", "santa_monica", 2.116), - (1439827620000, "between 6 and 9 feet", "coyote_creek", 7.887), - (1439827620000, "below 3 feet", "santa_monica", 2.028); - - --- SQLNESS ARG protocol=influxql -SELECT * FROM "h2o_feet"; - --- SQLNESS ARG protocol=influxql method=get -SELECT * FROM "h2o_feet"; - --- SQLNESS ARG protocol=influxql -SELECT "level_description", location, water_level FROM "h2o_feet" where location = 'santa_monica'; - --- SQLNESS ARG protocol=influxql -show measurements; - --- SQLNESS ARG protocol=influxql -SELECT count(water_level) FROM "h2o_feet" -group by location; - --- SQLNESS ARG protocol=influxql -SELECT count(water_level) FROM "h2o_feet" -where time < 1439828400000ms -group by location, time(5m); - --- SQLNESS ARG protocol=influxql -SELECT count(water_level) FROM "h2o_feet" -where time < 1439828400000ms -group by location, time(5m) fill(666); - -DROP TABLE IF EXISTS `h2o_feet`; diff --git a/integration_tests/cases/env/local/opentsdb/basic.result b/integration_tests/cases/env/local/opentsdb/basic.result deleted file mode 100644 index d3f7f444a2..0000000000 --- a/integration_tests/cases/env/local/opentsdb/basic.result +++ /dev/null @@ -1,106 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `opentsdb_table1`; - -affected_rows: 0 - -CREATE TABLE `opentsdb_table1` ( - `time` timestamp NOT NULL, - `level_description` string TAG, - `location` string TAG, - `value` double, - timestamp KEY (time)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - -affected_rows: 0 - --- Insert Records: --- ("2015-08-18T00:00:00Z", "between 6 and 9 feet", "coyote_creek", 8.12), --- ("2015-08-18T00:00:00Z", "below 3 feet", "santa_monica", 2.064), --- ("2015-08-18T00:06:00Z", "between 6 and 9 feet", "coyote_creek", 8.005), --- ("2015-08-18T00:06:00Z", "below 3 feet", "santa_monica", 2.116), --- ("2015-08-18T00:12:00Z", "between 6 and 9 feet", "coyote_creek", 7.887), --- ("2015-08-18T00:12:00Z", "below 3 feet", "santa_monica", 2.028); -INSERT INTO opentsdb_table1(time, level_description, location, value) - VALUES - (1439827200000, "between 6 and 9 feet", "coyote_creek", 8.12), - (1439827200000, "below 3 feet", "santa_monica", 2.064), - (1439827560000, "between 6 and 9 feet", "coyote_creek", 8.005), - (1439827560000, "below 3 feet", "santa_monica", 2.116), - (1439827620000, "between 6 and 9 feet", "coyote_creek", 7.887), - (1439827620000, "below 3 feet", "santa_monica", 2.028); - -affected_rows: 6 - --- SQLNESS ARG protocol=opentsdb -{ - "start": 1439827200000, - "end": 1439827620000, - "queries": [ - { - "aggregator": "none", - "metric": "opentsdb_table1", - "tags": {} - } - ] -} -; - -[{"metric":"opentsdb_table1","tags":{"level_description":"below 3 feet","location":"santa_monica"},"aggregatedTags":[],"dps":{"1439827200000":2.064,"1439827560000":2.116,"1439827620000":2.028}},{"metric":"opentsdb_table1","tags":{"level_description":"between 6 and 9 feet","location":"coyote_creek"},"aggregatedTags":[],"dps":{"1439827200000":8.12,"1439827560000":8.005,"1439827620000":7.887}}] - --- SQLNESS ARG protocol=opentsdb -{ - "start": 1439827200000, - "end": 1439827620000, - "queries": [ - { - "aggregator": "none", - "metric": "opentsdb_table1", - "tags": { - "location": "coyote_creek" - } - } - ] -} -; - -[{"metric":"opentsdb_table1","tags":{"level_description":"between 6 and 9 feet","location":"coyote_creek"},"aggregatedTags":[],"dps":{"1439827200000":8.12,"1439827560000":8.005,"1439827620000":7.887}}] - --- SQLNESS ARG protocol=opentsdb -{ - "start": 1439827200000, - "end": 1439827620000, - "queries": [ - { - "aggregator": "sum", - "metric": "opentsdb_table1", - "tags": { - } - } - ] -} -; - -[{"metric":"opentsdb_table1","tags":{},"aggregatedTags":[],"dps":{"1439827200000":10.184,"1439827560000":10.121,"1439827620000":9.915}}] - -DROP TABLE IF EXISTS `opentsdb_table1`; - -affected_rows: 0 - diff --git a/integration_tests/cases/env/local/opentsdb/basic.sql b/integration_tests/cases/env/local/opentsdb/basic.sql deleted file mode 100644 index 5ab91ec51e..0000000000 --- a/integration_tests/cases/env/local/opentsdb/basic.sql +++ /dev/null @@ -1,93 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - -DROP TABLE IF EXISTS `opentsdb_table1`; - -CREATE TABLE `opentsdb_table1` ( - `time` timestamp NOT NULL, - `level_description` string TAG, - `location` string TAG, - `value` double, - timestamp KEY (time)) ENGINE = Analytic WITH ( - enable_ttl = 'false' -); - --- Insert Records: --- ("2015-08-18T00:00:00Z", "between 6 and 9 feet", "coyote_creek", 8.12), --- ("2015-08-18T00:00:00Z", "below 3 feet", "santa_monica", 2.064), --- ("2015-08-18T00:06:00Z", "between 6 and 9 feet", "coyote_creek", 8.005), --- ("2015-08-18T00:06:00Z", "below 3 feet", "santa_monica", 2.116), --- ("2015-08-18T00:12:00Z", "between 6 and 9 feet", "coyote_creek", 7.887), --- ("2015-08-18T00:12:00Z", "below 3 feet", "santa_monica", 2.028); -INSERT INTO opentsdb_table1(time, level_description, location, value) - VALUES - (1439827200000, "between 6 and 9 feet", "coyote_creek", 8.12), - (1439827200000, "below 3 feet", "santa_monica", 2.064), - (1439827560000, "between 6 and 9 feet", "coyote_creek", 8.005), - (1439827560000, "below 3 feet", "santa_monica", 2.116), - (1439827620000, "between 6 and 9 feet", "coyote_creek", 7.887), - (1439827620000, "below 3 feet", "santa_monica", 2.028); - - --- SQLNESS ARG protocol=opentsdb -{ - "start": 1439827200000, - "end": 1439827620000, - "queries": [ - { - "aggregator": "none", - "metric": "opentsdb_table1", - "tags": {} - } - ] -} -; - --- SQLNESS ARG protocol=opentsdb -{ - "start": 1439827200000, - "end": 1439827620000, - "queries": [ - { - "aggregator": "none", - "metric": "opentsdb_table1", - "tags": { - "location": "coyote_creek" - } - } - ] -} -; - --- SQLNESS ARG protocol=opentsdb -{ - "start": 1439827200000, - "end": 1439827620000, - "queries": [ - { - "aggregator": "sum", - "metric": "opentsdb_table1", - "tags": { - } - } - ] -} -; - -DROP TABLE IF EXISTS `opentsdb_table1`; diff --git a/integration_tests/cases/env/local/system/system_tables.result b/integration_tests/cases/env/local/system/system_tables.result deleted file mode 100644 index 3ac0d06484..0000000000 --- a/integration_tests/cases/env/local/system/system_tables.result +++ /dev/null @@ -1,51 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- -DROP TABLE IF EXISTS `01_system_table1`; - -affected_rows: 0 - -CREATE TABLE `01_system_table1` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `hostname` string TAG, - `value` double, - timestamp KEY (timestamp)) ENGINE=Analytic; - -affected_rows: 0 - --- TODO: when query table in system catalog, it will throw errors now --- Couldn't find table in table container --- SELECT --- `timestamp`, --- `catalog`, --- `schema`, --- `table_name`, --- `engine` --- FROM --- system.public.tables --- WHERE --- table_name = '01_system_table1'; --- FIXME -SHOW TABLES LIKE '01%'; - -Tables, -String("01_system_table1"), - - diff --git a/integration_tests/cases/env/local/system/system_tables.sql b/integration_tests/cases/env/local/system/system_tables.sql deleted file mode 100644 index a5778a8903..0000000000 --- a/integration_tests/cases/env/local/system/system_tables.sql +++ /dev/null @@ -1,47 +0,0 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. --- - - -DROP TABLE IF EXISTS `01_system_table1`; - -CREATE TABLE `01_system_table1` ( - `timestamp` timestamp NOT NULL, - `arch` string TAG, - `datacenter` string TAG, - `hostname` string TAG, - `value` double, - timestamp KEY (timestamp)) ENGINE=Analytic; - - --- TODO: when query table in system catalog, it will throw errors now --- Couldn't find table in table container --- SELECT --- `timestamp`, --- `catalog`, --- `schema`, --- `table_name`, --- `engine` --- FROM --- system.public.tables --- WHERE --- table_name = '01_system_table1'; - - --- FIXME -SHOW TABLES LIKE '01%'; diff --git a/integration_tests/config/compaction-offload.toml b/integration_tests/config/compaction-offload.toml deleted file mode 100644 index 1cb8fbb214..0000000000 --- a/integration_tests/config/compaction-offload.toml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[server] -bind_addr = "0.0.0.0" -http_port = 5440 -grpc_port = 8831 -postgresql_port = 5433 - -[logger] -level = "info" - -[tracing] -dir = "/tmp/compaction-offload" - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/compaction-offload" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/compaction-offload" - -[analytic.compaction_mode] -compaction_mode = "Offload" -node_picker = "Local" -endpoint = "127.0.0.1:8831" - -[analytic] -enable_primary_key_sampling = true diff --git a/integration_tests/config/horaedb-cluster-0.toml b/integration_tests/config/horaedb-cluster-0.toml deleted file mode 100644 index 3a585bdfda..0000000000 --- a/integration_tests/config/horaedb-cluster-0.toml +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[logger] -level = "debug" - -[server] -bind_addr = "0.0.0.0" -http_port = 5440 -grpc_port = 8831 -mysql_port = 3307 -deploy_mode = "Cluster" - -[tracing] -dir = "/tmp/horaedb0" - -[analytic.storage] -mem_cache_capacity = '1G' -mem_cache_partition_bits = 0 - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb0" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb0" - -[cluster_deployment] -mode = "WithMeta" -cmd_channel_buffer_size = 10 - -[cluster_deployment.meta_client] -# Only support "defaultCluster" currently. -cluster_name = "defaultCluster" -meta_addr = "http://127.0.0.1:2379" -lease = "10s" -timeout = "5s" - -[cluster_deployment.etcd_client] -server_addrs = ['127.0.0.1:2379'] - -[limiter] -write_block_list = ['block_test_table'] -read_block_list = ['block_test_table'] diff --git a/integration_tests/config/horaedb-cluster-1.toml b/integration_tests/config/horaedb-cluster-1.toml deleted file mode 100644 index e3943a6983..0000000000 --- a/integration_tests/config/horaedb-cluster-1.toml +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[logger] -level = "debug" - -[server] -bind_addr = "0.0.0.0" -http_port = 5441 -grpc_port = 8832 -mysql_port = 13307 -postgresql_port = 15433 -deploy_mode = "Cluster" - -[tracing] -dir = "/tmp/horaedb1" - -[analytic.storage] -mem_cache_capacity = '1G' -mem_cache_partition_bits = 0 - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb1" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb1" - -[cluster_deployment] -mode = "WithMeta" -cmd_channel_buffer_size = 10 - -[cluster_deployment.meta_client] -# Only support "defaultCluster" currently. -cluster_name = "defaultCluster" -meta_addr = "http://127.0.0.1:2379" -lease = "10s" -timeout = "5s" - -[cluster_deployment.etcd_client] -server_addrs = ['127.0.0.1:2379'] - -[limiter] -write_block_list = ['block_test_table'] -read_block_list = ['block_test_table'] diff --git a/integration_tests/config/horaemeta.toml b/integration_tests/config/horaemeta.toml deleted file mode 100644 index 2785e50b1c..0000000000 --- a/integration_tests/config/horaemeta.toml +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -etcd-start-timeout-ms = 30000 -peer-urls = "http://127.0.0.1:2380" -advertise-client-urls = "http://127.0.0.1:2379" -advertise-peer-urls = "http://127.0.0.1:2380" -client-urls = "http://127.0.0.1:2379" -data-dir = "/tmp/horaemeta" -node-name = "meta0" -initial-cluster = "meta0=http://127.0.0.1:2380" -default-cluster-node-count = 2 - -[log] -level = "info" - -[etcd-log] -level = "info" diff --git a/integration_tests/config/shard-based-recovery.toml b/integration_tests/config/shard-based-recovery.toml deleted file mode 100644 index 92e56f6a18..0000000000 --- a/integration_tests/config/shard-based-recovery.toml +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[server] -bind_addr = "0.0.0.0" -http_port = 5440 -grpc_port = 8831 - -[logger] -level = "info" - -[tracing] -dir = "/tmp/horaedb" - -[analytic] -recover_mode = "ShardBased" - -[analytic.storage.object_store] -type = "Local" -data_dir = "/tmp/horaedb" - -[analytic.wal] -type = "Local" -data_dir = "/tmp/horaedb" diff --git a/integration_tests/dist_query/diff.py b/integration_tests/dist_query/diff.py deleted file mode 100644 index 0bb90e232e..0000000000 --- a/integration_tests/dist_query/diff.py +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import argparse -import difflib - -def get_args(): - parser = argparse.ArgumentParser(description='cmd args') - parser.add_argument('--expected', '-e', type=str, help='expected queries result file') - parser.add_argument('--actual', '-a', type=str, help='actual queries result file') - args = vars(parser.parse_args()) - return args - -def main(): - args = get_args() - - # Load queries results. - f_expected_path = args['expected'] - f_actual_path = args['actual'] - - f_expected = open(f_expected_path, "r") - expecteds = f_expected.readlines() - - f_actual = open(f_actual_path, "r") - actuals = f_actual.readlines() - - # Diff them. - diffs = difflib.context_diff(expecteds, actuals) - diff_num = 0 - for diff in diffs: - diff_num += 1 - print(diff) - - f_expected.close() - f_actual.close() - - # If diff exists, write the actual to expected, we can use `git diff` to inspect the detail diffs. - if diff_num != 0: - f = open(f_expected_path, "w") - f.writelines(actuals) - f.close() - # Test failed, just panic - print("Test failed...") - assert(False) - - # Haha, test passed! - print("Test passed...") - -if __name__ == '__main__': - main() diff --git a/integration_tests/dist_query/run.sh b/integration_tests/dist_query/run.sh deleted file mode 100755 index 4bdf6e06cc..0000000000 --- a/integration_tests/dist_query/run.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# Get current dir -export CURR_DIR=$(pwd) - -# Related components -TSBS_REPO_PATH=${CURR_DIR}/tsbs -DATA_REPO_PATH=${CURR_DIR}/dist-query-testing -# Case contexts -CASE_DIR=tsbs-cpu-only -CASE_DATASOURCE=data.out -CASE_QUERY=single-groupby-5-8-1-queries.gz -CASE_QUERY_RESULT=queries.result - -# Test params -export RESULT_FILE=${RESULT_FILE:-${DEFAULT_RESULT_FILE}} -export OUTPUT_DIR=${OUTPUT_DIR:-${CURR_DIR}/output} -export HORAEDB_ADDR=${HORAEDB_ADDR:-127.0.0.1:8831} -export HORAEDB_HTTP_ADDR=${HORAEDB_HTTP_ADDR:-127.0.0.1:5440} -export WRITE_WORKER_NUM=${WRITE_WORKER_NUM:-36} -export WRITE_BATCH_SIZE=${WRITE_BATCH_SIZE:-500} -## Where generated data stored -export DATA_FILE=${DATA_FILE:-${CURR_DIR}/dist-query-testing/${CASE_DIR}/${CASE_DATASOURCE}} -## How many values in host tag -export HOST_NUM=${HOST_NUM:-10000} -export BULK_DATA_DIR=${CURR_DIR}/dist-query-testing/${CASE_DIR} -## Used for `generate_queries.sh` end. -export QUERY_TYPES="\ -single-groupby-1-1-1 \ -single-groupby-1-8-1 \ -single-groupby-5-1-1 \ -single-groupby-5-8-1" -## Where query results stored -export QUERY_RESULTS_FILE=${CURR_DIR}/output/queries.reuslt.tmp -export QUERY_EXPECTED_RESULTS_FILE=${QUERY_EXPECTED_RESULTS_FILE:-${CURR_DIR}/dist-query-testing/${CASE_DIR}/${CASE_QUERY_RESULT}} - -set -x - -mkdir -p ${OUTPUT_DIR} - -# Prepare components -## Tsbs -if [[ -d ${TSBS_REPO_PATH} ]] && [[ ${UPDATE_REPOS_TO_LATEST} == 'true' ]]; then - echo "Remove old tsbs..." - rm -rf ${TSBS_REPO_PATH} -fi - -if [[ ! -d ${TSBS_REPO_PATH} ]]; then - echo "Pull tsbs repo and build..." - git clone -b feat-ceresdb --depth 1 --single-branch https://github.com/CeresDB/tsbs.git - cd tsbs - go build ./cmd/tsbs_generate_data - go build ./cmd/tsbs_load_ceresdb - go build ./cmd/tsbs_generate_queries - go build ./cmd/tsbs_run_queries_ceresdb - cd .. -fi - -## Data -if [[ -d ${DATA_REPO_PATH} ]] && [[ ${UPDATE_REPOS_TO_LATEST} == 'true' ]]; then - echo "Remove old dist query testing..." - rm -rf ${DATA_REPO_PATH} -fi - -if [[ ! -d ${DATA_REPO_PATH} ]]; then - echo "Pull dist query testing repo..." - git clone -b main --depth 1 --single-branch https://github.com/CeresDB/dist-query-testing.git -fi - -# Clean old table if exist -curl -XPOST "${HORAEDB_HTTP_ADDR}/sql" -d 'DROP TABLE IF EXISTS `cpu`' - -# Write data to horaedb -${CURR_DIR}/tsbs/tsbs_load_ceresdb --ceresdb-addr=${HORAEDB_ADDR} --file ${DATA_FILE} --batch-size ${WRITE_BATCH_SIZE} --workers ${WRITE_WORKER_NUM} --access-mode proxy --partition-keys hostname --update-mode APPEND | tee ${OUTPUT_DIR}/${CASE_DIR}-${CASE_DATASOURCE}.log - -# Run queries against horaedb -# TODO: support more kinds of queries besides 5-8-1. -cat ${BULK_DATA_DIR}/${CASE_QUERY} | gunzip | ${CURR_DIR}/tsbs/tsbs_run_queries_ceresdb --ceresdb-addr=${HORAEDB_ADDR} --print-responses true --access-mode proxy --responses-file ${QUERY_RESULTS_FILE} | tee ${OUTPUT_DIR}/${CASE_DIR}-${CASE_QUERY}.log - -# Diff the results -python3 ${CURR_DIR}/diff.py --expected ${QUERY_EXPECTED_RESULTS_FILE} --actual ${QUERY_RESULTS_FILE} diff --git a/integration_tests/mysql/basic.sh b/integration_tests/mysql/basic.sh deleted file mode 100755 index e4b2e2b84c..0000000000 --- a/integration_tests/mysql/basic.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# This only ensure query by mysql protocol is OK, -# Full SQL test in ensured by sqlness tests. -mysql -h 127.0.0.1 -P 3307 -e 'show tables' - -mysql -h 127.0.0.1 -P 3307 -e 'select 1, now()' - -mysql -h 127.0.0.1 -P 3307 -e 'CREATE TABLE `demo`(`name`string TAG,`id` int TAG,`value` double NOT NULL,`t` timestamp NOT NULL,TIMESTAMP KEY(t)) ENGINE = Analytic with(enable_ttl=false)' - -mysql -h 127.0.0.1 -P 3307 -e 'insert into demo (name,value,t)values("horaedb",1,1683280523000)' - -mysql -h 127.0.0.1 -P 3307 -e 'select * from demo' diff --git a/integration_tests/opentsdb/run-tests.sh b/integration_tests/opentsdb/run-tests.sh deleted file mode 100755 index dc7860d89b..0000000000 --- a/integration_tests/opentsdb/run-tests.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -python ./test-put.py \ No newline at end of file diff --git a/integration_tests/opentsdb/test-put.py b/integration_tests/opentsdb/test-put.py deleted file mode 100755 index f7986a8f2f..0000000000 --- a/integration_tests/opentsdb/test-put.py +++ /dev/null @@ -1,367 +0,0 @@ -#!/usr/bin/env python -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# coding: utf-8 - -import requests -import time - -api_root = 'http://localhost:5440' -headers = { - 'Content-Type': 'application/json' -} -table_prefix = 'opentsdb_test_' -table2_prefix = 'opentsdb_test2_' - - -def now(): - return int(time.time()) * 1000 - - -def execute_sql(sql): - r = requests.post('{}/sql'.format(api_root), json={'query': sql}, headers=headers) - return r - - -def drop_table(table_name): - sql = """ -DROP TABLE IF EXISTS {} - """.format(table_name) - r = execute_sql(sql) - assert r.status_code == 200, r.text - - -def show_create_table(table_name): - sql = """ -SHOW CREATE TABLE {} - """.format(table_name) - r = execute_sql(sql) - assert r.status_code == 200 - return r - - -def execute_sql_query(sql): - r = execute_sql(sql) - assert r.status_code == 200 - return r - - -def execute_put(points): - r = requests.post('{}/opentsdb/api/put'.format(api_root), data=points) - return r - - -def execute_put_then_assert_fail(points): - r = execute_put(points) - assert r.status_code == 500 - - -def execute_put_then_assert_success(points): - r = execute_put(points) - assert r.status_code == 204 - - -def test_put_validate_error(): - ts = now(); - # empty json string - execute_put_then_assert_fail("") - - # invalid json - execute_put_then_assert_fail("{xxx") - - # empty metric - execute_put_then_assert_fail(""" -{ - "metric": "", - "timestamp": {ts}, - "value": 18, - "tags": { - "host": "web01", - "dc": "lga" - } -} - """.replace('{ts}', str(ts))) - - # empty tag - execute_put_then_assert_fail(""" -{ - "metric": "sys.cpu.nice", - "timestamp": {ts}, - "value": 18, - "tags": { - } -} - """.replace('{ts}', str(ts))) - - # empty tag name - execute_put_then_assert_fail(""" -{ - "metric": "sys.cpu.nice", - "timestamp": {ts}, - "value": 18, - "tags": { - "": "web01", - "dc": "lga" - } -} - """.replace('{ts}', str(ts))) - - # too small timestamp - execute_put_then_assert_fail(""" -{ - "metric": "sys.cpu.nice", - "timestamp": 1, - "value": 18, - "tags": { - "host": "web01", - "dc": "lga" - } -} - """.replace('{ts}', str(ts))) - - # too big timestamp - execute_put_then_assert_fail(""" -{ - "metric": "sys.cpu.nice", - "timestamp": 10000000000000, - "value": 18, - "tags": { - "host": "web01", - "dc": "lga" - } -} - """.replace('{ts}', str(ts))) - - -def test_put_single_point_with_int_value(): - ts = now() - table_name = table_prefix + str(ts) - drop_table(table_name) - - execute_put_then_assert_success(""" -{ - "metric": "{metric}", - "timestamp": {ts}, - "value": 9527, - "tags": { - "host": "web01", - "dc": "lga" - } -} - """.replace('{metric}', table_name).replace('{ts}', str(ts))) - - r = show_create_table(table_name) - assert r.text.__contains__('`tsid` uint64 NOT NULL') - assert r.text.__contains__('`timestamp` timestamp NOT NULL') - assert r.text.__contains__('`dc` string TAG') - assert r.text.__contains__('`host` string TAG') - # value is a double column - assert r.text.__contains__('`value` double') - - r = execute_sql_query(""" -SELECT timestamp, dc, host, value FROM {metric} - """.replace('{metric}', table_name)) - assert r.text == """{"rows":[{"timestamp":{ts},"dc":"lga","host":"web01","value":9527.0}]}""".strip().replace('{ts}', str(ts)) - - -def test_put_single_point_with_float_value(): - ts = now() - table_name = table_prefix + str(ts) - drop_table(table_name) - - execute_put_then_assert_success(""" -{ - "metric": "{metric}", - "timestamp": {ts}, - "value": 95.27, - "tags": { - "host": "web01", - "dc": "lga" - } -} - """.replace('{metric}', table_name).replace('{ts}', str(ts))) - - r = show_create_table(table_name) - assert r.text.__contains__('`tsid` uint64 NOT NULL') - assert r.text.__contains__('`timestamp` timestamp NOT NULL') - assert r.text.__contains__('`dc` string TAG') - assert r.text.__contains__('`host` string TAG') - # value is a double column - assert r.text.__contains__('`value` double') - - r = execute_sql_query(""" -SELECT timestamp, dc, host, value FROM {metric} - """.replace('{metric}', table_name)) - assert r.text == """ -{"rows":[{"timestamp":{ts},"dc":"lga","host":"web01","value":95.27}]} - """.strip().replace('{ts}', str(ts)) - - -def test_put_single_point_with_second_timestamp(): - ts = now() - ts_in_seconds = ts // 1000; - table_name = table_prefix + str(ts) - drop_table(table_name) - - execute_put_then_assert_success(""" -{ - "metric": "{metric}", - "timestamp": {ts}, - "value": 95.27, - "tags": { - "host": "web01", - "dc": "lga" - } -} - """.replace('{metric}', table_name).replace('{ts}', str(ts_in_seconds))) - - r = execute_sql_query(""" -SELECT timestamp, dc, host, value FROM {metric} - """.replace('{metric}', table_name)) - assert r.text == """ -{"rows":[{"timestamp":{ts},"dc":"lga","host":"web01","value":95.27}]} - """.strip().replace('{ts}', str(ts)) - - -def test_put_multi_points_with_different_tags_in_one_table(): - ts = now() - table_name = table_prefix + str(ts) - drop_table(table_name) - - execute_put_then_assert_success(""" -[ - { - "metric": "{metric}", - "timestamp": {ts}, - "value": 18, - "tags": { - "host": "web01" - } - }, - { - "metric": "{metric}", - "timestamp": {ts}, - "value": 9, - "tags": { - "dc": "lga" - } - } -] - """.replace('{metric}', table_name).replace('{ts}', str(ts))) - - r = execute_sql_query(""" -SELECT timestamp, dc, host, value FROM {metric} ORDER BY value desc - """.replace('{metric}', table_name)) - assert r.text == """ -{"rows":[{"timestamp":{ts},"dc":null,"host":"web01","value":18.0},{"timestamp":{ts},"dc":"lga","host":null,"value":9.0}]} - """.strip().replace('{ts}', str(ts)) - - -# HoraeDB internal error: "Column: value in table: ??? data type is not same, expected: bigint, actual: double" -def test_put_multi_points_with_different_datatype_in_one_table(): - ts = now() - table_name = table_prefix + str(ts) - drop_table(table_name) - - execute_put_then_assert_success(""" -[ - { - "metric": "{metric}", - "timestamp": {ts}, - "value": 18, - "tags": { - "host": "web01", - "dc": "lga" - } - }, - { - "metric": "{metric}", - "timestamp": {ts}, - "value": 9.999, - "tags": { - "host": "web02", - "dc": "lga" - } - } -] - """.replace('{metric}', table_name).replace('{ts}', str(ts))) - - -def test_put_multi_points_in_multi_table(): - ts = now() - table_name = table_prefix + str(ts) - table2_name = table2_prefix + str(ts) - drop_table(table_name) - drop_table(table2_name) - - execute_put_then_assert_success(""" -[ - { - "metric": "{metric}", - "timestamp": {ts}, - "value": 18, - "tags": { - "host": "web01", - "dc": "lga" - } - }, - { - "metric": "{metric2}", - "timestamp": {ts}, - "value": 9, - "tags": { - "host": "web02", - "dc": "lga" - } - } -] - """.replace('{metric}', table_name).replace('{metric2}', table2_name).replace('{ts}', str(ts))) - - r = execute_sql_query(""" -SELECT timestamp, dc, host, value FROM {metric} - """.replace('{metric}', table_name)) - assert r.text == """ -{"rows":[{"timestamp":{ts},"dc":"lga","host":"web01","value":18.0}]} - """.strip().replace('{ts}', str(ts)) - - r = execute_sql_query(""" -SELECT timestamp, dc, host, value FROM {metric} - """.replace('{metric}', table2_name)) - assert r.text == """ -{"rows":[{"timestamp":{ts},"dc":"lga","host":"web02","value":9.0}]} - """.strip().replace('{ts}', str(ts)) - - -def main(): - print("OpenTSDB test start.") - - test_put_validate_error() - - test_put_single_point_with_int_value() - test_put_single_point_with_float_value() - test_put_single_point_with_second_timestamp() - - test_put_multi_points_with_different_tags_in_one_table() - test_put_multi_points_with_different_datatype_in_one_table() - test_put_multi_points_in_multi_table() - - print("OpenTSDB test finished.") - - -if __name__ == '__main__': - main() diff --git a/integration_tests/postgresql/basic.sh b/integration_tests/postgresql/basic.sh deleted file mode 100755 index f1f92a31a1..0000000000 --- a/integration_tests/postgresql/basic.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# This only ensure query by postgresql protocol is OK, -# Full SQL test in ensured by sqlness tests. -psql -h 127.0.0.1 -p 5433 -c 'show tables' - -psql -h 127.0.0.1 -p 5433 -c 'select 1, now();' - -psql -h 127.0.0.1 -p 5433 -c 'drop table if exists demo;' - -psql -h 127.0.0.1 -p 5433 -c 'CREATE TABLE `demo`(`name`string TAG,`id` int TAG,`value` double NOT NULL,`t` timestamp NOT NULL,TIMESTAMP KEY(t)) ENGINE = Analytic with(enable_ttl=false);' - -psql -h 127.0.0.1 -p 5433 -c 'insert into demo (name,value,t)values("horaedb",1,1691116127622);' - -psql -h 127.0.0.1 -p 5433 -c 'select * from demo;' diff --git a/integration_tests/prom/prometheus.yml b/integration_tests/prom/prometheus.yml deleted file mode 100644 index a2a1174ea9..0000000000 --- a/integration_tests/prom/prometheus.yml +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -remote_read: - - url: "http://127.0.0.1:5440/prom/v1/read" - read_recent: true diff --git a/integration_tests/prom/remote-query.py b/integration_tests/prom/remote-query.py deleted file mode 100755 index 0594df02b6..0000000000 --- a/integration_tests/prom/remote-query.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# coding: utf-8 - -import requests -import time - -api_root = 'http://localhost:5440' -prom_api_root = 'http://localhost:9090' -headers = { - 'Content-Type': 'application/json' -} - -def now(): - return int(time.time()) * 1000 - -table = 'prom_remote_query_test' + str(now()) -table2 = 'PROM_REMOTE_QUERY_TEST' + str(now()) - -def execute_sql(sql): - r = requests.post('{}/sql'.format(api_root), json={'query': sql}, headers=headers) - assert r.status_code == 200, r.text - -def execute_pql(pql): - r = requests.get('{}/api/v1/query?query={}'.format(prom_api_root, pql)) - assert r.status_code == 200, r.text - return r.json() - -def prepare_data(ts): - for t in [table, table2]: - execute_sql(""" -CREATE TABLE if not exists `{}` ( - `t` timestamp NOT NULL, - `tag1` string TAG, - `TAG2` string TAG, - `value` double NOT NULL, - `VALUE2` double NOT NULL, - timestamp KEY (t) -); - """.format(t)) - - execute_sql(""" -insert into {}(t, tag1, TAG2, value, VALUE2) -values -({}, "v1", "v2", 1, 2), -({}, "v1", "v2", 11, 22) - ; - """.format(table, ts-5000, ts)) - - execute_sql(""" -insert into {}(t, tag1, TAG2, value, VALUE2) -values -({}, "v1", "v2", 10, 20), -({}, "v1", "v2", 110, 220) - ; - """.format(table2, ts-5000, ts)) - - -def remote_query(ts): - ts = ts/1000 # prom return seconds - - r = execute_pql(table + '{tag1="v1"}[5m]') - result = r['data']['result'] - assert result == [{'metric': {'__name__': table, 'tag1': 'v1', 'TAG2': 'v2'}, 'values': [[ts-5, '1'], [ts, '11']]}] - - r = execute_pql(table + '{TAG2="v2"}[5m]') - result = r['data']['result'] - assert result == [{'metric': {'__name__': table, 'tag1': 'v1', 'TAG2': 'v2'}, 'values': [[ts-5, '1'], [ts, '11']]}] - - r = execute_pql(table + '{tag1=~"v1"}[5m]') - result = r['data']['result'] - assert result == [{'metric': {'__name__': table, 'tag1': 'v1', 'TAG2': 'v2'}, 'values': [[ts-5, '1'], [ts, '11']]}] - - r = execute_pql(table + '{tag1!="v1"}[5m]') - result = r['data']['result'] - assert result == [] - - r = execute_pql(table + '{tag1!~"v1"}[5m]') - result = r['data']['result'] - assert result == [] - - # uppercase field - r = execute_pql(table + '{tag1="v1",__horaedb_field__="VALUE2"}[5m]') - result = r['data']['result'] - assert result == [{'metric': {'__name__': table, 'tag1': 'v1', 'TAG2': 'v2'}, 'values': [[ts-5, '2'], [ts, '22']]}] - - # uppercase table - r = execute_pql(table2 + '{tag1="v1"}[5m]') - result = r['data']['result'] - assert result == [{'metric': {'__name__': table2, 'tag1': 'v1', 'TAG2': 'v2'}, 'values': [[ts-5, '10'], [ts, '110']]}] - -def main(): - ts = now() - prepare_data(ts) - remote_query(ts) - -if __name__ == '__main__': - main() diff --git a/integration_tests/prom/run-tests.sh b/integration_tests/prom/run-tests.sh deleted file mode 100755 index 81fdd29cea..0000000000 --- a/integration_tests/prom/run-tests.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -VERSION=prometheus-2.43.0.linux-amd64 -wget -q "https://github.com/prometheus/prometheus/releases/download/v2.43.0/${VERSION}.tar.gz" - -tar xvf prometheus*.tar.gz -nohup ./${VERSION}/prometheus --config.file ./prometheus.yml & -sleep 5 - -python ./remote-query.py diff --git a/integration_tests/recovery/check.py b/integration_tests/recovery/check.py deleted file mode 100644 index fae7b852f0..0000000000 --- a/integration_tests/recovery/check.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# coding: utf-8 - -import requests -import argparse - -api_root = 'http://localhost:5440' -headers = { - 'Content-Type': 'application/json' -} - -def get_test_tables(ts): - table = 'sql_test' + str(ts) - table2 = 'SQL_TEST' + str(ts) - return [table, table2] - -def get_args(): - parser = argparse.ArgumentParser(description='cmd args') - parser.add_argument('--timestamp', '-ts', type=int, help='timestamp') - parser.add_argument('--init_before_check', '-i', help='init_before_check', action="store_true") - args = vars(parser.parse_args()) - return args - - -def execute_sql(sql): - r = requests.post('{}/sql'.format(api_root), json={'query': sql}, headers=headers) - assert r.status_code == 200, r.text - return r.json() - -def prepare_data(ts, tables): - for t in tables: - execute_sql(""" -CREATE TABLE if not exists `{}` ( - `t` timestamp NOT NULL, - `tag1` string TAG, - `tag2` string TAG, - `value` double NOT NULL, - `VALUE2` double NOT NULL, - timestamp KEY (t) -); - """.format(t)) - - execute_sql(""" -insert into {}(t, tag1, tag2, value, VALUE2) -values -({}, "v1", "v2", 1, 2), -({}, "v1", "v2", 11, 22) - ; - """.format(tables[0], ts-5000, ts)) - - execute_sql(""" -insert into {}(t, tag1, tag2, value, VALUE2) -values -({}, "v1", "v2", 10, 20), -({}, "v1", "v2", 110, 220) - ; - """.format(tables[1], ts-5000, ts)) - -def query_and_check(ts, tables): - expected = {'rows': [{'tsid': 7518337278486593135, 't': ts - 5000, 'tag1': 'v1', 'tag2': 'v2', 'value': 1.0, 'VALUE2': 2.0},\ - {'tsid': 7518337278486593135, 't': ts, 'tag1': 'v1', 'tag2': 'v2', 'value': 11.0, 'VALUE2': 22.0}]} - expected2 = {'rows': [{'tsid': 7518337278486593135, 't': ts - 5000, 'tag1': 'v1', 'tag2': 'v2', 'value': 10.0, 'VALUE2': 20.0},\ - {'tsid': 7518337278486593135, 't': ts, 'tag1': 'v1', 'tag2': 'v2', 'value': 110.0, 'VALUE2': 220.0}]} - expecteds = [expected, expected2] - - for idx, t in enumerate(tables): - r = execute_sql("select * from {}".format(t)) - assert r == expecteds[idx] - - print('Restart test pass...') - -def main(): - args = get_args() - init_before_check = args['init_before_check'] - ts = args['timestamp'] - test_tables = get_test_tables(args['timestamp']) - - if init_before_check: - print("Init before check") - prepare_data(ts, test_tables) - query_and_check(ts, test_tables) - -if __name__ == '__main__': - main() diff --git a/integration_tests/recovery/run.sh b/integration_tests/recovery/run.sh deleted file mode 100755 index e716f7866b..0000000000 --- a/integration_tests/recovery/run.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -set -e - -ROOT=`pwd` -# For compatibility in macos, so convert to milliseconds by adding 3 zeros. -NOW=`date +%s000` -BINARY_PATH=${ROOT}/../../target/debug/horaedb-server -SERVER_HTTP_ENDPOINT=127.0.0.1:5440 - -CONFIG_FILE=${ROOT}/../../docs/minimal.toml -if [ ${1} == 'shard_based' ]; then - CONFIG_FILE=${ROOT}/../config/shard-based-recovery.toml -fi - -echo "Run with config: ${CONFIG_FILE}" -echo "First check..." -nohup ${BINARY_PATH} --config ${CONFIG_FILE} & -sleep 10 -python3 ./check.py -ts ${NOW} -i - -echo "Restart and check..." -killall horaedb-server | true -nohup ${BINARY_PATH} --config ${CONFIG_FILE} & -sleep 10 -python3 ./check.py -ts ${NOW} - -echo "Flush, restart and check..." -curl -XPOST ${SERVER_HTTP_ENDPOINT}/debug/flush_memtable -echo "\nFlush finish..." -killall horaedb-server | true -nohup ${BINARY_PATH} --config ${CONFIG_FILE} & -sleep 10 -python3 ./check.py -ts ${NOW} -echo "All finish..." diff --git a/integration_tests/sdk/go/alteraddcolumn.go b/integration_tests/sdk/go/alteraddcolumn.go deleted file mode 100644 index 6e97477c34..0000000000 --- a/integration_tests/sdk/go/alteraddcolumn.go +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package main - -import ( - "context" - "fmt" - - "github.com/apache/incubator-horaedb-client-go/horaedb" -) - -const fieldName = "b" -const tagName = "btag" -const timestampName = "t" - -func checkPartitionTableAddColumn(ctx context.Context, client horaedb.Client) error { - err := dropTable(ctx, client, partitionTable) - if err != nil { - return err - } - - _, err = ddl(ctx, client, partitionTable, fmt.Sprintf( - "CREATE TABLE `%s`( "+ - "`name`string TAG,"+ - "`id` int TAG,"+ - "`value` int64 NOT NULL,"+ - "`t` timestamp NOT NULL,"+ - "TIMESTAMP KEY(t)) "+ - "PARTITION BY KEY(name) PARTITIONS 4 ENGINE = Analytic", partitionTable)) - if err != nil { - return err - } - - _, err = ddl(ctx, client, partitionTable, fmt.Sprintf("ALTER TABLE `%s` ADD COLUMN (%s string);", partitionTable, fieldName)) - if err != nil { - return err - } - - ts := currentMS() - - // First write will fail, because the schema is not updated yet. - // Currently, horaedb.will update the schema when write failed. - err = writePartitionTableNewField(ctx, client, ts, fieldName) - if err == nil { - panic("first write should fail") - } - - if err := writePartitionTableNewField(ctx, client, ts, fieldName); err != nil { - return err - } - - _, err = ddl(ctx, client, partitionTable, fmt.Sprintf("ALTER TABLE `%s` ADD COLUMN (%s string TAG);", partitionTable, tagName)) - if err != nil { - return err - } - - // First write will fail, because the schema is not updated yet. - // Currently, write failed will update the schema. - err = writePartitionTableNewTag(ctx, client, ts, tagName) - if err == nil { - panic("first write should fail") - } - - if err := writePartitionTableNewTag(ctx, client, ts, tagName); err != nil { - return err - } - - if err := queryPartitionTable(ctx, client, ts, timestampName); err != nil { - return err - } - - return nil -} - -func writePartitionTableNewField(ctx context.Context, client horaedb.Client, ts int64, fieldName string) error { - points := make([]horaedb.Point, 0, 2) - for i := 0; i < 2; i++ { - builder := horaedb.NewPointBuilder(partitionTable). - SetTimestamp(ts). - AddTag("name", horaedb.NewStringValue(fmt.Sprintf("tag-%d", i))). - AddField("value", horaedb.NewInt64Value(int64(i))). - AddField(fieldName, horaedb.NewStringValue("ss")) - - point, err := builder.Build() - - if err != nil { - return err - } - points = append(points, point) - } - - resp, err := client.Write(ctx, horaedb.WriteRequest{ - Points: points, - }) - if err != nil { - return err - } - - if resp.Success != 2 { - return fmt.Errorf("write failed, resp: %+v", resp) - } - return nil -} - -func writePartitionTableNewTag(ctx context.Context, client horaedb.Client, ts int64, tagName string) error { - points := make([]horaedb.Point, 0, 2) - for i := 0; i < 2; i++ { - builder := horaedb.NewPointBuilder(partitionTable). - SetTimestamp(ts). - AddTag("name", horaedb.NewStringValue(fmt.Sprintf("tag-%d", i))). - AddField("value", horaedb.NewInt64Value(int64(i))). - AddTag(tagName, horaedb.NewStringValue("sstag")). - AddField(fieldName, horaedb.NewStringValue("ss")) - - point, err := builder.Build() - - if err != nil { - return err - } - points = append(points, point) - } - - resp, err := client.Write(ctx, horaedb.WriteRequest{ - Points: points, - }) - if err != nil { - return err - } - - if resp.Success != 2 { - return fmt.Errorf("write failed, resp: %+v", resp) - } - return nil -} - -func queryPartitionTable(ctx context.Context, client horaedb.Client, ts int64, timestampName string) error { - sql := fmt.Sprintf("select t, name, value,%s,%s from %s where %s = %d order by name,%s", fieldName, tagName, partitionTable, timestampName, ts, tagName) - - resp, err := client.SQLQuery(ctx, horaedb.SQLQueryRequest{ - Tables: []string{partitionTable}, - SQL: sql, - }) - if err != nil { - return err - } - - if len(resp.Rows) != 4 { - return fmt.Errorf("expect 2 rows, current: %+v", len(resp.Rows)) - } - - row0 := []horaedb.Value{ - horaedb.NewInt64Value(ts), - horaedb.NewStringValue("tag-0"), - horaedb.NewInt64Value(0), - horaedb.NewStringValue("ss"), - horaedb.NewStringValue("sstag"), - } - - row1 := []horaedb.Value{ - horaedb.NewInt64Value(ts), - horaedb.NewStringValue("tag-0"), - horaedb.NewInt64Value(0), - horaedb.NewStringValue("ss"), - } - - row2 := []horaedb.Value{ - horaedb.NewInt64Value(ts), - horaedb.NewStringValue("tag-1"), - horaedb.NewInt64Value(1), - horaedb.NewStringValue("ss"), - horaedb.NewStringValue("sstag"), - } - - row3 := []horaedb.Value{ - horaedb.NewInt64Value(ts), - horaedb.NewStringValue("tag-1"), - horaedb.NewInt64Value(1), - horaedb.NewStringValue("ss"), - } - - if err := ensureRow(row0, - resp.Rows[0].Columns()); err != nil { - return err - } - if err := ensureRow(row1, - resp.Rows[1].Columns()); err != nil { - return err - } - if err := ensureRow(row2, - resp.Rows[2].Columns()); err != nil { - return err - } - - return ensureRow(row3, resp.Rows[3].Columns()) -} diff --git a/integration_tests/sdk/go/autocreatetable.go b/integration_tests/sdk/go/autocreatetable.go deleted file mode 100644 index 6c113c897d..0000000000 --- a/integration_tests/sdk/go/autocreatetable.go +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package main - -import ( - "context" - - "github.com/apache/incubator-horaedb-client-go/horaedb" -) - -func checkAutoAddColumns(ctx context.Context, client horaedb.Client) error { - timestampName := "timestamp" - err := dropTable(ctx, client, table) - if err != nil { - return err - } - - err = writeAndQuery(ctx, client, timestampName) - if err != nil { - return err - } - - return writeAndQueryWithNewColumns(ctx, client, timestampName) -} diff --git a/integration_tests/sdk/go/go.mod b/integration_tests/sdk/go/go.mod deleted file mode 100644 index 8bae76d8c9..0000000000 --- a/integration_tests/sdk/go/go.mod +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -module go-sdk-test - -go 1.21 - -require github.com/apache/incubator-horaedb-client-go v1.2.0 - -require ( - github.com/CeresDB/horaedbproto/golang v0.0.0-20231129131648-5d5d868218c3 // indirect - github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect - github.com/golang/protobuf v1.5.3 // indirect - github.com/google/flatbuffers v2.0.0+incompatible // indirect - github.com/hashicorp/golang-lru v1.0.2 // indirect - github.com/klauspost/compress v1.15.14 // indirect - github.com/pierrec/lz4/v4 v4.1.8 // indirect - github.com/pkg/errors v0.9.1 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect - google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect - google.golang.org/grpc v1.56.3 // indirect - google.golang.org/protobuf v1.33.0 // indirect -) - -// TODO: remove this when a new version is released. -replace github.com/apache/incubator-horaedb-client-go => github.com/CeresDB/horaedb-client-go v1.2.0 diff --git a/integration_tests/sdk/go/go.sum b/integration_tests/sdk/go/go.sum deleted file mode 100644 index 7e3ecaee7f..0000000000 --- a/integration_tests/sdk/go/go.sum +++ /dev/null @@ -1,230 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/CeresDB/horaedb-client-go v1.2.0 h1:Z4fnRxsocWDYuATljyocESK8Sjs0vYTiwRtJyhG1/D0= -github.com/CeresDB/horaedb-client-go v1.2.0/go.mod h1:sGKM3eO8SfwNt/0+u1vLM5VRHQSa49w67ps9n0T6oGA= -github.com/CeresDB/horaedbproto/golang v0.0.0-20231129131648-5d5d868218c3 h1:RgOKVfdbnF4W5/TVFf7cIahom5HcV1v0GP2BYDc0BEM= -github.com/CeresDB/horaedbproto/golang v0.0.0-20231129131648-5d5d868218c3/go.mod h1:RHgzmQBZC4P5+Jm58flXMynMFGbiVQrw1t2ce3wLVas= -github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ= -github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs= -github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= -github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= -github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= -github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= -github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= -github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/flatbuffers v2.0.0+incompatible h1:dicJ2oXwypfwUGnB2/TYWYEKiuk9eYQlQO/AnOHl5mI= -github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c= -github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= -github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= -github.com/klauspost/compress v1.15.14 h1:i7WCKDToww0wA+9qrUZ1xOjp218vfFo3nTU6UHp+gOc= -github.com/klauspost/compress v1.15.14/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= -github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= -github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= -github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4= -github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs= -golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= -golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= -gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= -gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s= -gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= -gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= -gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= -gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= -google.golang.org/grpc v1.56.3 h1:8I4C0Yq1EjstUzUJzpcRVbuYA2mODtEmpWiQoN/b2nc= -google.golang.org/grpc v1.56.3/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/integration_tests/sdk/go/issue-779.go b/integration_tests/sdk/go/issue-779.go deleted file mode 100644 index 5c8b9893b4..0000000000 --- a/integration_tests/sdk/go/issue-779.go +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package main - -import ( - "context" - - "github.com/apache/incubator-horaedb-client-go/horaedb" -) - -func checkAutoAddColumnsWithCreateTable(ctx context.Context, client horaedb.Client) error { - timestampName := "timestamp" - - err := dropTable(ctx, client, table) - if err != nil { - return err - } - - err = createTable(ctx, client, timestampName) - if err != nil { - return err - } - - err = writeAndQuery(ctx, client, timestampName) - if err != nil { - return err - } - - return writeAndQueryWithNewColumns(ctx, client, timestampName) -} diff --git a/integration_tests/sdk/go/main.go b/integration_tests/sdk/go/main.go deleted file mode 100644 index e4b4d75f29..0000000000 --- a/integration_tests/sdk/go/main.go +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package main - -import ( - "context" - "fmt" - "os" - "time" - - "github.com/apache/incubator-horaedb-client-go/horaedb" -) - -var endpoint = "127.0.0.1:8831" - -func init() { - if v := os.Getenv("HORAEDB_ADDR"); v != "" { - endpoint = v - } -} - -func main() { - fmt.Printf("Begin test, endpoint %s...\n", endpoint) - - client, err := horaedb.NewClient(endpoint, horaedb.Direct, - horaedb.WithDefaultDatabase("public"), - ) - if err != nil { - panic(err) - } - - ctx := context.TODO() - if err = checkAutoAddColumns(ctx, client); err != nil { - panic(err) - } - - if err = checkAutoAddColumnsWithCreateTable(ctx, client); err != nil { - panic(err) - } - - if err = checkPartitionTableAddColumn(ctx, client); err != nil { - panic(err) - } - - fmt.Println("Test done") -} - -func currentMS() int64 { - return time.Now().UnixMilli() -} diff --git a/integration_tests/sdk/go/util.go b/integration_tests/sdk/go/util.go deleted file mode 100644 index 17a8e8f060..0000000000 --- a/integration_tests/sdk/go/util.go +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package main - -import ( - "context" - "fmt" - - "github.com/apache/incubator-horaedb-client-go/horaedb" -) - -const table = "godemo" -const partitionTable = "godemoPartition" - -func createTable(ctx context.Context, client horaedb.Client, timestampName string) error { - _, err := ddl(ctx, client, table, fmt.Sprintf("create table %s (`%s` timestamp not null, name string tag, value int64,TIMESTAMP KEY(%s))", table, timestampName, timestampName)) - return err -} - -func write(ctx context.Context, client horaedb.Client, ts int64, addNewColumn bool) error { - points := make([]horaedb.Point, 0, 2) - for i := 0; i < 2; i++ { - builder := horaedb.NewPointBuilder(table). - SetTimestamp(ts). - AddTag("name", horaedb.NewStringValue(fmt.Sprintf("tag-%d", i))). - AddField("value", horaedb.NewInt64Value(int64(i))) - - if addNewColumn { - builder = builder.AddTag("new_tag", horaedb.NewStringValue(fmt.Sprintf("new-tag-%d", i))). - AddField("new_field", horaedb.NewInt64Value(int64(i))) - } - - point, err := builder.Build() - - if err != nil { - return err - } - points = append(points, point) - } - - resp, err := client.Write(ctx, horaedb.WriteRequest{ - Points: points, - }) - if err != nil { - return err - } - - if resp.Success != 2 { - return fmt.Errorf("write failed, resp: %+v", resp) - } - - return nil -} - -func ensureRow(expectedVals []horaedb.Value, actualRow []horaedb.Column) error { - for i, expected := range expectedVals { - if actual := actualRow[i].Value(); actual != expected { - return fmt.Errorf("expected: %+v, actual: %+v", expected, actual) - } - } - return nil - -} - -func query(ctx context.Context, client horaedb.Client, ts int64, timestampName string, addNewColumn bool) error { - sql := fmt.Sprintf("select timestamp, name, value from %s where %s = %d order by name", table, timestampName, ts) - if addNewColumn { - sql = fmt.Sprintf("select timestamp, name, value, new_tag, new_field from %s where %s = %d order by name", table, timestampName, ts) - } - resp, err := client.SQLQuery(ctx, horaedb.SQLQueryRequest{ - Tables: []string{table}, - SQL: sql, - }) - if err != nil { - return err - } - - if len(resp.Rows) != 2 { - return fmt.Errorf("expect 2 rows, current: %+v", len(resp.Rows)) - } - - row0 := []horaedb.Value{ - horaedb.NewInt64Value(ts), - horaedb.NewStringValue("tag-0"), - horaedb.NewInt64Value(0)} - - row1 := []horaedb.Value{ - horaedb.NewInt64Value(ts), - horaedb.NewStringValue("tag-1"), - horaedb.NewInt64Value(1), - } - - if addNewColumn { - row0 = append(row0, horaedb.NewStringValue("new-tag-0"), horaedb.NewInt64Value(0)) - row1 = append(row1, horaedb.NewStringValue("new-tag-1"), horaedb.NewInt64Value(1)) - } - - if err := ensureRow(row0, - resp.Rows[0].Columns()); err != nil { - return err - } - - return ensureRow(row1, resp.Rows[1].Columns()) -} - -func ddl(ctx context.Context, client horaedb.Client, tableName string, sql string) (uint32, error) { - resp, err := client.SQLQuery(ctx, horaedb.SQLQueryRequest{ - Tables: []string{tableName}, - SQL: sql, - }) - if err != nil { - return 0, err - } - - return resp.AffectedRows, nil -} - -func writeAndQuery(ctx context.Context, client horaedb.Client, timestampName string) error { - ts := currentMS() - if err := write(ctx, client, ts, false); err != nil { - return err - } - - if err := query(ctx, client, ts, timestampName, false); err != nil { - return err - } - - return nil -} - -func writeAndQueryWithNewColumns(ctx context.Context, client horaedb.Client, timestampName string) error { - ts := currentMS() - if err := write(ctx, client, ts, true); err != nil { - return err - } - - if err := query(ctx, client, ts, timestampName, true); err != nil { - return err - } - - return nil -} - -func dropTable(ctx context.Context, client horaedb.Client, table string) error { - affected, err := ddl(ctx, client, table, "drop table if exists "+table) - if err != nil { - return err - } - - if affected != 0 { - panic(fmt.Sprintf("drop table expected 0, actual is %d", affected)) - } - return nil -} diff --git a/integration_tests/sdk/java/pom.xml b/integration_tests/sdk/java/pom.xml deleted file mode 100644 index b852eefa08..0000000000 --- a/integration_tests/sdk/java/pom.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - - 4.0.0 - - io.ceresdb - tests - 1.0-SNAPSHOT - - ceresdb-integration-test - https://github.com/apache/incubator-horaedb/ - - - UTF-8 - 1.8 - 1.8 - 2.17.1 - 1.0.1 - - - - - org.apache.logging.log4j - log4j-core - ${log4j.version} - - - org.apache.logging.log4j - log4j-api - ${log4j.version} - - - org.apache.logging.log4j - log4j-slf4j-impl - ${log4j.version} - - - - io.ceresdb - ceresdb-all - ${ceresdb.version} - - - junit - junit - 4.13.1 - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.6.0 - - - - java - - - - - io.ceresdb.App - - - - - - diff --git a/integration_tests/sdk/java/src/main/java/io/ceresdb/App.java b/integration_tests/sdk/java/src/main/java/io/ceresdb/App.java deleted file mode 100644 index 720404d17d..0000000000 --- a/integration_tests/sdk/java/src/main/java/io/ceresdb/App.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.ceresdb; - -import io.ceresdb.models.*; -import io.ceresdb.options.CeresDBOptions; -import org.junit.Assert; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.LinkedList; -import java.util.List; -import java.util.concurrent.CompletableFuture; - -import static io.ceresdb.RouteMode.DIRECT; - -public class App { - private static final Logger LOGGER = LoggerFactory.getLogger(App.class); - - private static String TABLE = "table_for_java_tests" + System.currentTimeMillis(); - private static String HOST = "localhost"; - private static int PORT = 8831; - private static CeresDBClient CLIENT; - - static { - final CeresDBOptions opts = CeresDBOptions.newBuilder(HOST, PORT, DIRECT) // CeresDB default grpc port 8831,use DIRECT RouteMode - .database("public") // use database for client, can be overridden by the RequestContext in request - // maximum retry times when write fails - // (only some error codes will be retried, such as the routing table failure) - .writeMaxRetries(1) - // maximum retry times when read fails - // (only some error codes will be retried, such as the routing table failure) - .readMaxRetries(1).build(); - - CLIENT = new CeresDBClient(); - if (!CLIENT.init(opts)) { - throw new IllegalStateException("Fail to start CeresDBClient"); - } - } - - private static void query(long now, boolean addNewColumn) throws Throwable { - final SqlQueryRequest queryRequest = SqlQueryRequest.newBuilder() - .forTables(TABLE) // table name is optional. If not provided, SQL parser will parse the `ssql` to get the table name and do the routing automaticly - .sql("select * from %s where timestamp = %d", TABLE, now) - .build(); - final CompletableFuture> qf = CLIENT.sqlQuery(queryRequest); - final Result queryResult = qf.get(); - - Assert.assertTrue(queryResult.isOk()); - - final SqlQueryOk queryOk = queryResult.getOk(); - // TODO: add row equal assert - LOGGER.warn("result {}", queryOk.getRowList()); - Assert.assertEquals(2, queryOk.getRowCount()); - } - - private static void write(long now, boolean addNewColumn) throws Throwable { - List points = new LinkedList<>(); - for (int i = 0; i < 2; i++) { - Point.PointBuilder pointBuilder = Point.newPointBuilder(TABLE) - .setTimestamp(now) - .addTag("tag", String.format("tag-%d", i)) - .addField("value", Value.withInt8(10 + i)); - if (addNewColumn) { - pointBuilder = pointBuilder - .addTag("new-tag", String.format("new-tag-%d", i)); - } - - points.add(pointBuilder.build()); - } - final CompletableFuture> wf = CLIENT.write(new WriteRequest(points)); - final Result writeResult = wf.get(); - Assert.assertTrue(writeResult.isOk()); - } - - private static void checkAutoCreateTable() throws Throwable { - long now = System.currentTimeMillis(); - write(now, false); - query(now, false); - } - - private static void checkAutoAddColumns() throws Throwable { - long now = System.currentTimeMillis(); - write(now, true); - query(now, true); - } - - private static void run() throws Throwable { - checkAutoCreateTable(); - checkAutoAddColumns(); - } - - public static void main(String[] args) { - LOGGER.warn("Begin tests, table:{}", TABLE); - try { - run(); - } catch (Throwable e) { - LOGGER.error("Test failed", e); - System.exit(1); - } - - LOGGER.warn("Test finish."); - System.exit(0); - } -} diff --git a/integration_tests/sdk/java/src/main/resources/log4j2.xml b/integration_tests/sdk/java/src/main/resources/log4j2.xml deleted file mode 100644 index 0afc2b6368..0000000000 --- a/integration_tests/sdk/java/src/main/resources/log4j2.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/integration_tests/sdk/rust/Cargo.toml b/integration_tests/sdk/rust/Cargo.toml deleted file mode 100644 index dccd331537..0000000000 --- a/integration_tests/sdk/rust/Cargo.toml +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "rust-sdk-test" - -[package.license] -workspace = true - -[package.edition] -workspace = true - -[package.version] -workspace = true - -[dependencies] -horaedb-client = { workspace = true } -tokio = { workspace = true, features = ["full"] } diff --git a/integration_tests/sdk/rust/src/main.rs b/integration_tests/sdk/rust/src/main.rs deleted file mode 100644 index 07698fdb2e..0000000000 --- a/integration_tests/sdk/rust/src/main.rs +++ /dev/null @@ -1,353 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - sync::Arc, - time::{self, SystemTime}, -}; - -use horaedb_client::{ - db_client::{Builder, DbClient, Mode}, - model::{ - sql_query::{Request as SqlQueryRequest, Response as SqlQueryResponse}, - value::Value, - write::{point::PointBuilder, Request as WriteRequest}, - }, - RpcContext, -}; - -const ENDPOINT: &str = "127.0.0.1:8831"; -const BLOCKED_TABLE: &str = "block_test_table"; - -struct TestDatas { - col_names: Vec, - rows: Vec>, -} - -impl TestDatas { - fn pick_rows_for_write(&self, new_column: bool) -> Vec> { - if !new_column { - self.rows.iter().take(2).cloned().collect::>() - } else { - vec![self.rows[2].clone(), self.rows[3].clone()] - } - } - - fn pick_rows_for_query_check(&self, new_column: bool) -> Vec> { - let mut expected_rows = Vec::new(); - if !new_column { - let rows = self - .rows - .iter() - .take(2) - .map(|row| row.iter().take(4).cloned().collect::>()); - - for row in rows { - let col_names = self.col_names.iter().take(4).cloned(); - let row = col_names.zip(row.into_iter()).collect::>(); - expected_rows.push(row); - } - } else { - let rows = self.rows.iter().cloned(); - - for row in rows { - let col_names = self.col_names.iter().cloned(); - let row = col_names.zip(row.into_iter()).collect::>(); - expected_rows.push(row); - } - }; - - expected_rows - } -} - -#[tokio::main] -async fn main() { - println!("Begin test, endpoint:{ENDPOINT}"); - - let client = Builder::new(ENDPOINT.to_string(), Mode::Direct).build(); - let rpc_ctx = RpcContext::default().database("public".to_string()); - let now = current_timestamp_ms(); - - let test_datas = generate_test_datas(now); - test_auto_create_table(&client, &rpc_ctx, now, &test_datas).await; - test_add_column(&client, &rpc_ctx, now, &test_datas).await; - test_block_table(&client, &rpc_ctx, now).await; - - drop_test_table_if_exists(&client, &rpc_ctx, now).await; - drop_table_if_exists(&client, &rpc_ctx, BLOCKED_TABLE).await; - print!("Test done") -} - -async fn test_auto_create_table( - client: &Arc, - rpc_ctx: &RpcContext, - timestamp: i64, - test_datas: &TestDatas, -) { - println!("Test auto create table"); - - drop_test_table_if_exists(client, rpc_ctx, timestamp).await; - - write(client, rpc_ctx, timestamp, test_datas, false).await; - sql_query(client, rpc_ctx, timestamp, test_datas, false).await; -} - -async fn test_add_column( - client: &Arc, - rpc_ctx: &RpcContext, - timestamp: i64, - test_datas: &TestDatas, -) { - println!("Test add column"); - - write(client, rpc_ctx, timestamp, test_datas, true).await; - sql_query(client, rpc_ctx, timestamp, test_datas, true).await; -} - -async fn test_block_table(client: &Arc, rpc_ctx: &RpcContext, timestamp: i64) { - println!("Test auto create table"); - - drop_table_if_exists(client, rpc_ctx, BLOCKED_TABLE).await; - create_table(client, rpc_ctx, BLOCKED_TABLE).await; - - // try to write, should return table blocked error - let mut write_req = WriteRequest::default(); - let mut points = Vec::new(); - let builder = PointBuilder::new(BLOCKED_TABLE.to_string()) - .timestamp(timestamp) - .tag("name", Value::String("name1".to_string())) - .field("value", Value::Double(0.42)); - let point = builder.build().unwrap(); - points.push(point); - write_req.add_points(points); - if let Err(e) = client.write(rpc_ctx, &write_req).await { - let e = e.to_string(); - assert!(e.contains("Table operation is blocked")); - } else { - panic!("it should return blocked error"); - } - - // try to query, should be blocked, too - let query_req = SqlQueryRequest { - tables: vec![BLOCKED_TABLE.to_string()], - sql: format!("SELECT * from {}", BLOCKED_TABLE), - }; - if let Err(e) = client.sql_query(rpc_ctx, &query_req).await { - let e = e.to_string(); - assert!(e.contains("Table operation is blocked")); - } else { - panic!("it should return blocked error"); - } -} - -async fn drop_test_table_if_exists( - client: &Arc, - rpc_ctx: &RpcContext, - timestamp: i64, -) { - let test_table = format!("test_table_{timestamp}"); - drop_table_if_exists(client, rpc_ctx, &test_table).await; -} - -async fn drop_table_if_exists(client: &Arc, rpc_ctx: &RpcContext, table: &str) { - let query_req = SqlQueryRequest { - tables: vec![table.to_string()], - sql: format!("DROP TABLE IF EXISTS {table}"), - }; - let _ = client.sql_query(rpc_ctx, &query_req).await.unwrap(); -} - -async fn create_table(client: &Arc, rpc_ctx: &RpcContext, table: &str) { - let query_req = SqlQueryRequest { - tables: vec![table.to_string()], - sql: format!( - " - CREATE TABLE {} ( - name string TAG, - value double NOT NULL, - t timestamp NOT NULL, - timestamp KEY (t))", - table - ), - }; - let _ = client.sql_query(rpc_ctx, &query_req).await.unwrap(); -} - -async fn sql_query( - client: &Arc, - rpc_ctx: &RpcContext, - timestamp: i64, - test_data: &TestDatas, - new_column: bool, -) { - let all_columns = test_data.col_names.clone(); - let selections = if !new_column { - format!( - "`{}`,`{}`,`{}`,`{}`", - all_columns[0], all_columns[1], all_columns[2], all_columns[3] - ) - } else { - format!( - "`{}`,`{}`,`{}`,`{}`,`{}`,`{}`", - all_columns[0], - all_columns[1], - all_columns[2], - all_columns[3], - all_columns[4], - all_columns[5] - ) - }; - - let test_table = format!("test_table_{timestamp}"); - let query_req = SqlQueryRequest { - tables: vec![test_table.clone()], - sql: format!("SELECT {selections} from {test_table}"), - }; - let resp = client.sql_query(rpc_ctx, &query_req).await.unwrap(); - assert_eq!(resp.affected_rows, 0); - - let resp_rows = extract_rows_from_sql_query(&resp); - let expected_rows = test_data.pick_rows_for_query_check(new_column); - let expected = format_rows(&expected_rows); - let actual = format_rows(&resp_rows); - assert_eq!(expected, actual); -} - -async fn write( - client: &Arc, - rpc_ctx: &RpcContext, - timestamp: i64, - test_data: &TestDatas, - new_column: bool, -) { - let test_table = format!("test_table_{timestamp}"); - let mut write_req = WriteRequest::default(); - let mut points = Vec::new(); - - let rows = test_data.pick_rows_for_write(new_column); - for row in rows { - let point = { - let builder = PointBuilder::new(test_table.clone()) - .timestamp(timestamp) - .tag(test_data.col_names[1].clone(), row[1].clone()) - .field(test_data.col_names[2].clone(), row[2].clone()) - .field(test_data.col_names[3].clone(), row[3].clone()); - - if new_column { - builder - .tag(test_data.col_names[4].clone(), row[4].clone()) - .field(test_data.col_names[5].clone(), row[5].clone()) - .build() - .unwrap() - } else { - builder.build().unwrap() - } - }; - points.push(point); - } - write_req.add_points(points); - - let resp = client.write(rpc_ctx, &write_req).await.unwrap(); - assert_eq!(resp.success, 2); - assert_eq!(resp.failed, 0); -} - -fn generate_test_datas(timestamp: i64) -> TestDatas { - let col_names = vec![ - "timestamp".to_string(), - "old-tag".to_string(), - "old-field0".to_string(), - "old-field1".to_string(), - "new-tag".to_string(), - "new-field".to_string(), - ]; - - let rows = vec![ - vec![ - Value::Timestamp(timestamp), - Value::String("old-tagv0".to_string()), - Value::Int64(123), - Value::UInt64(1222223333334), - Value::String("".to_string()), - Value::UInt64(0), - ], - vec![ - Value::Timestamp(timestamp), - Value::String("old-tagv1".to_string()), - Value::Int64(124), - Value::UInt64(1222223333335), - Value::String("".to_string()), - Value::UInt64(0), - ], - vec![ - Value::Timestamp(timestamp), - Value::String("old-tagv0".to_string()), - Value::Int64(123), - Value::UInt64(1222223333334), - Value::String("new-tagv0".to_string()), - Value::UInt64(666666), - ], - vec![ - Value::Timestamp(timestamp), - Value::String("old-tagv1".to_string()), - Value::Int64(124), - Value::UInt64(1222223333335), - Value::String("new-tagv1".to_string()), - Value::UInt64(88888888), - ], - ]; - - TestDatas { col_names, rows } -} - -fn current_timestamp_ms() -> i64 { - SystemTime::now() - .duration_since(time::UNIX_EPOCH) - .map(|duration| duration.as_millis() as i64) - .unwrap_or(0) -} - -fn extract_rows_from_sql_query(resp: &SqlQueryResponse) -> Vec> { - let mut rows = Vec::with_capacity(resp.rows.len()); - for row in &resp.rows { - let col_vals = row - .columns() - .iter() - .map(|col| (col.name().to_string(), col.value().clone())) - .collect(); - rows.push(col_vals); - } - - rows -} - -fn format_rows(rows: &[Vec<(String, Value)>]) -> Vec { - let mut sorted_row_strs = rows - .iter() - .map(|row| { - let mut sorted_row = row.clone(); - sorted_row.sort_by(|col1, col2| col1.0.cmp(&col2.0)); - let sorted_row = sorted_row.into_iter().map(|col| col.1).collect::>(); - - format!("{sorted_row:?}") - }) - .collect::>(); - sorted_row_strs.sort(); - - sorted_row_strs -} diff --git a/integration_tests/src/database.rs b/integration_tests/src/database.rs deleted file mode 100644 index e598a46ae7..0000000000 --- a/integration_tests/src/database.rs +++ /dev/null @@ -1,489 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::HashMap, env, fmt::Display, fs::File, process::Child, sync::Arc, time::Duration, -}; - -use async_trait::async_trait; -use horaedb_client::{ - db_client::{Builder, DbClient, Mode}, - model::sql_query::{display::CsvFormatter, Request}, - RpcContext, -}; -use reqwest::{ClientBuilder, StatusCode, Url}; -use sqlness::{Database, QueryContext}; - -const SERVER_GRPC_ENDPOINT_ENV: &str = "HORAEDB_SERVER_GRPC_ENDPOINT"; -const SERVER_HTTP_ENDPOINT_ENV: &str = "HORAEDB_SERVER_HTTP_ENDPOINT"; -const HORAEDB_BINARY_PATH_ENV: &str = "HORAEDB_BINARY_PATH"; -const HORAEDB_STDOUT_FILE_ENV: &str = "HORAEDB_STDOUT_FILE"; -const HORAEDB_CONFIG_FILE_ENV: &str = "HORAEDB_CONFIG_FILE"; - -const HORAEMETA_BINARY_PATH_ENV: &str = "HORAEMETA_BINARY_PATH"; -const HORAEMETA_CONFIG_ENV: &str = "HORAEMETA_CONFIG_PATH"; -const HORAEMETA_STDOUT_FILE_ENV: &str = "HORAEMETA_STDOUT_FILE"; -const HORAEDB_CONFIG_FILE_0_ENV: &str = "HORAEDB_CONFIG_FILE_0"; -const HORAEDB_CONFIG_FILE_1_ENV: &str = "HORAEDB_CONFIG_FILE_1"; -const CLUSTER_HORAEDB_STDOUT_FILE_0_ENV: &str = "CLUSTER_HORAEDB_STDOUT_FILE_0"; -const CLUSTER_HORAEDB_STDOUT_FILE_1_ENV: &str = "CLUSTER_HORAEDB_STDOUT_FILE_1"; -const CLUSTER_HORAEDB_HEALTH_CHECK_INTERVAL_SECONDS: usize = 5; - -const HORAEDB_STDOUT_FILE_2_ENV: &str = "HORAEDB_STDOUT_FILE_2"; -const HORAEDB_CONFIG_FILE_2_ENV: &str = "HORAEDB_CONFIG_FILE_2"; - -const HORAEDB_SERVER_ADDR: &str = "HORAEDB_SERVER_ADDR"; - -// Used to access HoraeDB by http service. -#[derive(Clone)] -struct HttpClient { - client: reqwest::Client, - endpoint: String, -} - -impl HttpClient { - fn new(endpoint: String) -> Self { - let client = ClientBuilder::new() - .build() - .expect("should succeed to build http client"); - Self { client, endpoint } - } -} - -#[async_trait] -pub trait Backend { - fn start() -> Self; - async fn wait_for_ready(&self); - fn stop(&mut self); -} - -pub struct HoraeDBServer { - server_process: Child, -} - -pub struct HoraeDBCluster { - server0: HoraeDBServer, - server1: HoraeDBServer, - horaemeta_process: Child, - - /// Used in meta health check - db_client: Arc, - meta_stable_check_sql: String, -} - -pub struct HoraeDBCompactionOffload { - server: HoraeDBServer, -} - -impl HoraeDBServer { - fn spawn(bin: String, config: String, stdout: String) -> Self { - let local_ip = local_ip_address::local_ip() - .expect("fail to get local ip") - .to_string(); - println!("Start server at {bin} with config {config} and stdout {stdout}, with local ip:{local_ip}"); - - let stdout = File::create(stdout).expect("Failed to create stdout file"); - let server_process = std::process::Command::new(&bin) - .env(HORAEDB_SERVER_ADDR, local_ip) - .args(["--config", &config]) - .stdout(stdout) - .spawn() - .unwrap_or_else(|_| panic!("Failed to start server at {bin:?}")); - Self { server_process } - } -} - -#[async_trait] -impl Backend for HoraeDBServer { - fn start() -> Self { - let config = env::var(HORAEDB_CONFIG_FILE_ENV).expect("Cannot parse horaedb config env"); - let bin = env::var(HORAEDB_BINARY_PATH_ENV).expect("Cannot parse binary path env"); - let stdout = env::var(HORAEDB_STDOUT_FILE_ENV).expect("Cannot parse stdout env"); - Self::spawn(bin, config, stdout) - } - - async fn wait_for_ready(&self) { - tokio::time::sleep(Duration::from_secs(10)).await - } - - fn stop(&mut self) { - self.server_process.kill().expect("Failed to kill server"); - } -} - -impl HoraeDBCluster { - async fn check_meta_stable(&self) -> bool { - let query_ctx = RpcContext { - database: Some("public".to_string()), - timeout: None, - }; - - let query_req = Request { - tables: vec![], - sql: self.meta_stable_check_sql.clone(), - }; - - let result = self.db_client.sql_query(&query_ctx, &query_req).await; - result.is_ok() - } -} - -#[async_trait] -impl Backend for HoraeDBCluster { - fn start() -> Self { - let horaemeta_bin = - env::var(HORAEMETA_BINARY_PATH_ENV).expect("Cannot parse horaedb binary path env"); - let horaemeta_config = - env::var(HORAEMETA_CONFIG_ENV).expect("Cannot parse horaemeta config path env"); - let horaemeta_stdout = - env::var(HORAEMETA_STDOUT_FILE_ENV).expect("Cannot parse horaemeta stdout env"); - println!("Start horaemeta at {horaemeta_bin} with config {horaemeta_config} and stdout {horaemeta_stdout}"); - - let horaemeta_stdout = - File::create(horaemeta_stdout).expect("Cannot create horaemeta stdout"); - let horaemeta_process = std::process::Command::new(&horaemeta_bin) - .args(["--config", &horaemeta_config]) - .stdout(horaemeta_stdout) - .spawn() - .expect("Failed to spawn process to start server"); - - println!("wait for horaemeta ready...\n"); - std::thread::sleep(Duration::from_secs(10)); - - let horaedb_bin = - env::var(HORAEDB_BINARY_PATH_ENV).expect("Cannot parse horaedb binary path env"); - let horaedb_config_0 = - env::var(HORAEDB_CONFIG_FILE_0_ENV).expect("Cannot parse horaedb0 config env"); - let horaedb_config_1 = - env::var(HORAEDB_CONFIG_FILE_1_ENV).expect("Cannot parse horaedb1 config env"); - let stdout0 = - env::var(CLUSTER_HORAEDB_STDOUT_FILE_0_ENV).expect("Cannot parse horaedb0 stdout env"); - let stdout1 = - env::var(CLUSTER_HORAEDB_STDOUT_FILE_1_ENV).expect("Cannot parse horaedb1 stdout env"); - - let server0 = HoraeDBServer::spawn(horaedb_bin.clone(), horaedb_config_0, stdout0); - let server1 = HoraeDBServer::spawn(horaedb_bin, horaedb_config_1, stdout1); - - // Meta stable check context - let endpoint = env::var(SERVER_GRPC_ENDPOINT_ENV).unwrap_or_else(|_| { - panic!("Cannot read server endpoint from env {SERVER_GRPC_ENDPOINT_ENV:?}") - }); - let db_client = Builder::new(endpoint, Mode::Proxy).build(); - - let meta_stable_check_sql = format!( - r#"CREATE TABLE `stable_check_{}` - (`name` string TAG, `value` double NOT NULL, `t` timestamp NOT NULL, TIMESTAMP KEY(t))"#, - uuid::Uuid::new_v4() - ); - - Self { - server0, - server1, - horaemeta_process, - db_client, - meta_stable_check_sql, - } - } - - async fn wait_for_ready(&self) { - println!("wait for cluster service initialized..."); - tokio::time::sleep(Duration::from_secs(20_u64)).await; - - println!("wait for cluster service stable begin..."); - let mut wait_cnt = 0; - let wait_max = 6; - loop { - if wait_cnt >= wait_max { - println!( - "wait too long for cluster service stable, maybe somethings went wrong..." - ); - return; - } - - if self.check_meta_stable().await { - println!("wait for cluster service stable finished..."); - return; - } - - wait_cnt += 1; - let has_waited = wait_cnt * CLUSTER_HORAEDB_HEALTH_CHECK_INTERVAL_SECONDS; - println!("waiting for cluster service stable, has_waited:{has_waited}s"); - tokio::time::sleep(Duration::from_secs( - CLUSTER_HORAEDB_HEALTH_CHECK_INTERVAL_SECONDS as u64, - )) - .await; - } - } - - fn stop(&mut self) { - self.server0.stop(); - self.server1.stop(); - self.horaemeta_process - .kill() - .expect("Failed to kill horaemeta"); - } -} - -#[async_trait] -impl Backend for HoraeDBCompactionOffload { - fn start() -> Self { - let config = env::var(HORAEDB_CONFIG_FILE_2_ENV).expect("Cannot parse horaedb2 config env"); - let bin = env::var(HORAEDB_BINARY_PATH_ENV).expect("Cannot parse binary path env"); - let stdout = env::var(HORAEDB_STDOUT_FILE_2_ENV).expect("Cannot parse stdout2 env"); - Self { - server: HoraeDBServer::spawn(bin, config, stdout), - } - } - - async fn wait_for_ready(&self) { - tokio::time::sleep(Duration::from_secs(10)).await - } - - fn stop(&mut self) { - self.server - .server_process - .kill() - .expect("Failed to kill server"); - } -} - -pub struct HoraeDB { - backend: T, - db_client: Arc, - // FIXME: Currently, the new protocol does not support by the dbclient but is exposed by http - // service. And remove this client when the new protocol is supported by the dbclient. - http_client: HttpClient, -} - -#[derive(Debug, Clone, Copy)] -enum Protocol { - Sql, - InfluxQL, - OpenTSDB, -} - -impl TryFrom<&str> for Protocol { - type Error = String; - - fn try_from(s: &str) -> Result { - let protocol = match s { - "influxql" => Protocol::InfluxQL, - "sql" => Protocol::Sql, - "opentsdb" => Protocol::OpenTSDB, - _ => return Err(format!("unknown protocol:{s}")), - }; - - Ok(protocol) - } -} - -#[derive(Debug, Clone, Copy)] -enum Command { - Flush, - Compact, -} - -impl TryFrom<&str> for Command { - type Error = String; - - fn try_from(s: &str) -> Result { - let cmd = match s { - "flush" => Self::Flush, - "compact" => Self::Compact, - _ => return Err(format!("Unknown command:{s}")), - }; - - Ok(cmd) - } -} - -struct ProtocolParser; - -impl ProtocolParser { - fn parse_from_ctx(&self, ctx: &HashMap) -> Result { - ctx.get("protocol") - .map(|s| Protocol::try_from(s.as_str())) - .unwrap_or(Ok(Protocol::Sql)) - } -} - -#[async_trait] -impl Database for HoraeDB { - async fn query(&self, context: QueryContext, query: String) -> Box { - let protocol = ProtocolParser - .parse_from_ctx(&context.context) - .expect("parse protocol"); - - if let Some(pre_cmd) = Self::parse_pre_cmd(&context.context) { - let cmd = pre_cmd.expect("parse command"); - match cmd { - Command::Flush => { - println!("Flush memtable..."); - if let Err(e) = self.execute_flush().await { - panic!("Execute flush command failed, err:{e}"); - } - } - Command::Compact => { - println!("Compact table..."); - if let Err(e) = self.execute_compact().await { - panic!("Execute compact command failed, err:{e}"); - } - } - } - } - - match protocol { - Protocol::Sql => Self::execute_sql(query, self.db_client.clone()).await, - Protocol::InfluxQL => { - let http_client = self.http_client.clone(); - Self::execute_influxql(query, http_client, context.context).await - } - Protocol::OpenTSDB => { - let http_client = self.http_client.clone(); - Self::execute_opentsdb(query, http_client, context.context).await - } - } - } -} - -impl HoraeDB { - pub async fn create() -> HoraeDB { - let backend = T::start(); - backend.wait_for_ready().await; - - let endpoint = env::var(SERVER_GRPC_ENDPOINT_ENV).unwrap_or_else(|_| { - panic!("Cannot read server endpoint from env {SERVER_GRPC_ENDPOINT_ENV:?}") - }); - let db_client = Builder::new(endpoint, Mode::Proxy).build(); - let http_endpoint = env::var(SERVER_HTTP_ENDPOINT_ENV).unwrap_or_else(|_| { - panic!("Cannot read server endpoint from env {SERVER_HTTP_ENDPOINT_ENV:?}") - }); - - HoraeDB { - backend, - db_client, - http_client: HttpClient::new(http_endpoint), - } - } - - pub fn stop(&mut self) { - self.backend.stop(); - } -} - -impl HoraeDB { - fn parse_pre_cmd(ctx: &HashMap) -> Option> { - ctx.get("pre_cmd").map(|s| Command::try_from(s.as_str())) - } - - async fn execute_flush(&self) -> Result<(), String> { - let url = format!("http://{}/debug/flush_memtable", self.http_client.endpoint); - let resp = self.http_client.client.post(url).send().await.unwrap(); - - if resp.status() == StatusCode::OK { - return Ok(()); - } - - Err(resp.text().await.unwrap_or_else(|e| format!("{e:?}"))) - } - - async fn execute_compact(&self) -> Result<(), String> { - // TODO(leslie): Improve code reusability. The following code is similar to - // `execute_flush()`. - let url = format!("http://{}/debug/compact_table", self.http_client.endpoint); - let resp = self.http_client.client.post(url).send().await.unwrap(); - - if resp.status() == StatusCode::OK { - return Ok(()); - } - - Err(resp.text().await.unwrap_or_else(|e| format!("{e:?}"))) - } - - async fn execute_influxql( - query: String, - http_client: HttpClient, - params: HashMap, - ) -> Box { - let url = format!("http://{}/influxdb/v1/query", http_client.endpoint); - let resp = match params.get("method") { - Some(v) if v == "get" => { - let url = Url::parse_with_params(&url, &[("q", query)]).unwrap(); - http_client.client.get(url).send().await.unwrap() - } - _ => http_client - .client - .post(url) - .form(&[("q", query)]) - .send() - .await - .unwrap(), - }; - let query_res = match resp.text().await { - Ok(text) => text, - Err(e) => format!("Failed to do influxql query, err:{e:?}"), - }; - Box::new(query_res) - } - - async fn execute_opentsdb( - query: String, - http_client: HttpClient, - _params: HashMap, - ) -> Box { - let query = query.trim().trim_end_matches(';'); - let url = format!("http://{}/opentsdb/api/query", http_client.endpoint); - let resp = http_client - .client - .post(url) - .header("content-type", "application/json") - .body(query.to_string()) - .send() - .await - .unwrap(); - let query_res = match resp.text().await { - Ok(text) => text, - Err(e) => format!("Failed to do influxql query, err:{e:?}"), - }; - Box::new(query_res) - } - - async fn execute_sql(query: String, client: Arc) -> Box { - let query_ctx = RpcContext { - database: Some("public".to_string()), - timeout: None, - }; - - let query_req = Request { - tables: vec![], - sql: query, - }; - - let result = client.sql_query(&query_ctx, &query_req).await; - - Box::new(match result { - Ok(resp) => { - if resp.rows.is_empty() { - format!("affected_rows: {}", resp.affected_rows) - } else { - format!("{}", CsvFormatter { resp }) - } - } - Err(e) => format!("Failed to execute query, err: {e:?}"), - }) - } -} diff --git a/integration_tests/src/main.rs b/integration_tests/src/main.rs deleted file mode 100644 index e2c63f10c6..0000000000 --- a/integration_tests/src/main.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#![feature(let_chains)] - -use std::{env, fmt::Display, path::Path}; - -use anyhow::Result; -use async_trait::async_trait; -use database::{Backend, HoraeDB, HoraeDBCompactionOffload}; -use sqlness::{Database, EnvController, QueryContext, Runner}; - -use crate::database::{HoraeDBCluster, HoraeDBServer}; - -mod database; - -const CASE_ROOT_PATH_ENV: &str = "HORAEDB_TEST_CASE_PATH"; -const ENV_FILTER_ENV: &str = "HORAEDB_ENV_FILTER"; -const RUN_MODE: &str = "HORAEDB_INTEGRATION_TEST_BIN_RUN_MODE"; - -struct HoraeDBController; -struct UntypedHoraeDB { - db: DbRef, -} - -pub trait StoppableDatabase: Database { - fn stop(&mut self); -} - -pub type DbRef = Box; - -impl StoppableDatabase for HoraeDB { - fn stop(&mut self) { - self.stop(); - } -} - -#[async_trait] -impl Database for UntypedHoraeDB { - async fn query(&self, context: QueryContext, query: String) -> Box { - self.db.query(context, query).await - } -} - -#[async_trait] -impl EnvController for HoraeDBController { - type DB = UntypedHoraeDB; - - async fn start(&self, env: &str, _config: Option<&Path>) -> Self::DB { - println!("start with env {env}"); - let db = match env { - "local" => Box::new(HoraeDB::::create().await) as DbRef, - "cluster" => Box::new(HoraeDB::::create().await) as DbRef, - "compaction_offload" => { - Box::new(HoraeDB::::create().await) as DbRef - } - _ => panic!("invalid env {env}"), - }; - - UntypedHoraeDB { db } - } - - async fn stop(&self, env: &str, mut database: Self::DB) { - println!("stop with env {env}"); - database.db.stop(); - } -} - -#[tokio::main] -async fn main() -> Result<()> { - let controller = HoraeDBController; - let run_mode = env::var(RUN_MODE).unwrap_or_else(|_| "sql_test".to_string()); - - match run_mode.as_str() { - // Run sql tests powered by `sqlness`. - "sql_test" => { - let case_dir = env::var(CASE_ROOT_PATH_ENV)?; - let env_filter = env::var(ENV_FILTER_ENV).unwrap_or_else(|_| ".*".to_string()); - let config = sqlness::ConfigBuilder::default() - .case_dir(case_dir) - .env_filter(env_filter) - .follow_links(true) - .build()?; - let runner = Runner::new(config, controller); - runner.run().await?; - } - // Just build the cluster testing env. - "build_cluster" => { - let _ = controller.start("cluster", None).await; - } - // Just build the local testing env. - "build_local" => { - let _ = controller.start("local", None).await; - } - // Just build the compaction offload testing env. - "build_compaction_offload" => { - let _ = controller.start("compaction_offload", None).await; - } - other => { - panic!("Unknown run mode:{other}") - } - } - - Ok(()) -} diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 10480e03e6..4c621ca810 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -16,5 +16,5 @@ # under the License. [toolchain] -channel = "nightly-2024-01-28" +channel = "nightly-2024-10-15" components = [ "rustfmt", "clippy" ] diff --git a/rustfmt.toml b/rustfmt.toml index cf1940875c..6f3bc6a3ff 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -30,7 +30,3 @@ group_imports = "StdExternalCrate" normalize_comments = true normalize_doc_attributes = true - -ignore = [ - "proto", -] diff --git a/scripts/run-tsbs.sh b/scripts/run-tsbs.sh deleted file mode 100755 index d2e9e063d4..0000000000 --- a/scripts/run-tsbs.sh +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# This bash supports these settings by enviroment variables: -# - RESULT_FILE -# - DATA_FILE -# - LOG_DIR -# - HORAEDB_CONFIG_FILE -# - HORAEDB_ADDR -# - WRITE_WORKER_NUM -# - WRITE_BATCH_SIZE - -export CURR_DIR=$(pwd) -export DEFAULT_RESULT_FILE=${CURR_DIR}/tsbs/result.md -export RESULT_FILE=${RESULT_FILE:-${DEFAULT_RESULT_FILE}} -export HORAEDB_CONFIG_FILE=${HORAEDB_CONFIG_FILE:-docs/minimal.toml} -export LOG_DIR=${LOG_DIR:-${CURR_DIR}/logs} -export HORAEDB_ADDR=${HORAEDB_ADDR:-127.0.0.1:8831} -export HORAEDB_PID_FILE=${CURR_DIR}/horaedb-server.pid -export WRITE_WORKER_NUM=${WRITE_WORKER_NUM:-36} -export WRITE_BATCH_SIZE=${WRITE_BATCH_SIZE:-500} -# Where generated data stored -export DATA_FILE=${DATA_FILE:-data.out} -# How many values in host tag -export HOST_NUM=${HOST_NUM:-10000} - -# Used for `generate_queries.sh` start. -export TS_START="2022-09-05T00:00:00Z" -export TS_END="2022-09-05T12:00:01Z" -export EXE_FILE_NAME=${CURR_DIR}/tsbs/tsbs_generate_queries -# where generated queries stored -export BULK_DATA_DIR=${CURR_DIR}/tsbs/data -export FORMATS=ceresdb -export QUERY_TYPES="\ -single-groupby-1-1-1 \ -single-groupby-1-1-12 \ -single-groupby-1-8-1 \ -single-groupby-5-1-1 \ -single-groupby-5-1-12 \ -single-groupby-5-8-1" -# Used for `generate_queries.sh` end. - -set -x - -kill_ceresdb_server() { - if [ -f ${HORAEDB_PID_FILE} ]; then - pid=$(cat ${HORAEDB_PID_FILE}) - if kill -0 "$pid" 2>/dev/null; then - kill "$pid" - fi - fi -} - -trap cleanup EXIT -cleanup() { - ls -lha ${LOG_DIR} - ls -lha ${CURR_DIR}/tsbs - ls -lha ${BULK_DATA_DIR} - - kill_ceresdb_server -} - -mkdir -p ${LOG_DIR} - -kill_ceresdb_server -nohup ./target/release/horaedb-server -c ${HORAEDB_CONFIG_FILE} > ${LOG_DIR}/server.log & echo $! > ${HORAEDB_PID_FILE} - -git clone -b feat-ceresdb --depth 1 --single-branch https://github.com/CeresDB/tsbs.git - -cd tsbs -go build ./cmd/tsbs_generate_data -go build ./cmd/tsbs_load_ceresdb -go build ./cmd/tsbs_generate_queries -go build ./cmd/tsbs_run_queries_ceresdb - -if [ ! -f ${DATA_FILE} ]; then - # Generate benchmark data if it does not exist. - ./tsbs_generate_data \ - --use-case="cpu-only" \ - --seed=123 \ - --initial-scale=${HOST_NUM} \ - --scale=${HOST_NUM} \ - --timestamp-start="${TS_START}" \ - --timestamp-end="${TS_END}" \ - --log-interval="60s" \ - --format="${FORMATS}" > ${DATA_FILE} -fi - - -# Write data to horaedb -./tsbs_load_ceresdb --ceresdb-addr=${HORAEDB_ADDR} --file ${DATA_FILE} --batch-size ${WRITE_BATCH_SIZE} --workers ${WRITE_WORKER_NUM} | tee ${LOG_DIR}/write.log - -# Generate queries for query -./scripts/generate_queries.sh - -# Run queries against horaedb -# TODO: support more kinds of queries besides 5-8-1. -cat ${BULK_DATA_DIR}/ceresdb-single-groupby-5-8-1-queries.gz | gunzip | ./tsbs_run_queries_ceresdb --ceresdb-addr=${HORAEDB_ADDR} | tee ${LOG_DIR}/5-8-1.log - -# Clean the result file -rm ${RESULT_FILE} - -# Output write & query result -echo '# Write' >> ${RESULT_FILE} -echo '```bash' >> ${RESULT_FILE} -cat ${LOG_DIR}/write.log >> ${RESULT_FILE} -echo '```' >> ${RESULT_FILE} - -echo '# Query' >> ${RESULT_FILE} -echo '```bash' >> ${RESULT_FILE} -cat ${LOG_DIR}/5-8-1.log >> ${RESULT_FILE} -echo '```' >> ${RESULT_FILE} diff --git a/src/analytic_engine/Cargo.toml b/src/analytic_engine/Cargo.toml deleted file mode 100644 index d6c642eb75..0000000000 --- a/src/analytic_engine/Cargo.toml +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "analytic_engine" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[features] -test = ["tempfile"] -wal-table-kv = ["wal/wal-table-kv"] -wal-message-queue = ["wal/wal-message-queue"] -wal-rocksdb = ["wal/wal-rocksdb"] -wal-local-storage = ["wal/wal-local-storage"] - -[dependencies] -# In alphabetical order -anyhow = { workspace = true } -arc-swap = "1.4.0" -arena = { workspace = true } -arrow = { workspace = true } -async-scoped = { version = "0.9.0", features = ["use-tokio"] } -async-stream = { workspace = true } -async-trait = { workspace = true } -atomic_enum = { workspace = true } -base64 = { workspace = true } -bytes_ext = { workspace = true } -cluster = { workspace = true } -codec = { workspace = true } -common_types = { workspace = true } -datafusion = { workspace = true } -future_ext = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -hash_ext = { workspace = true } -hex = { workspace = true } -horaedbproto = { workspace = true } -hyperloglog = { workspace = true } -id_allocator = { workspace = true } -itertools = { workspace = true } -lazy_static = { workspace = true } -logger = { workspace = true } -lru = { workspace = true } -macros = { workspace = true } -message_queue = { workspace = true } -meta_client = { workspace = true } -metric_ext = { workspace = true } -object_store = { workspace = true } -parquet = { workspace = true } -parquet_ext = { workspace = true } -prometheus = { workspace = true } -prost = { workspace = true } -remote_engine_client = { workspace = true } -reqwest = { workspace = true } -router = { workspace = true } -runtime = { workspace = true } -sampling_cache = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -size_ext = { workspace = true } -skiplist = { path = "../components/skiplist" } -smallvec = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -table_kv = { workspace = true } -tempfile = { workspace = true, optional = true } -thiserror = { workspace = true } -time_ext = { workspace = true } -tokio = { workspace = true } -tonic = { workspace = true } -trace_metric = { workspace = true } -url = "2.2" -wal = { workspace = true } -xorfilter-rs = { workspace = true } - -[dev-dependencies] -common_types = { workspace = true, features = ["test"] } -env_logger = { workspace = true } -pin-project-lite = { workspace = true } -rand = { workspace = true } -tempfile = { workspace = true } -test_util = { workspace = true } -wal = { workspace = true, features = ["wal-message-queue", "wal-rocksdb", "wal-table-kv"] } diff --git a/src/analytic_engine/src/compaction/compactor.rs b/src/analytic_engine/src/compaction/compactor.rs deleted file mode 100644 index d367ea5625..0000000000 --- a/src/analytic_engine/src/compaction/compactor.rs +++ /dev/null @@ -1,268 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp; - -use common_types::request_id::RequestId; -use logger::{debug, info}; -use snafu::ResultExt; - -use crate::{ - compaction::{ - runner::{ - CompactionRunner, CompactionRunnerPtr, CompactionRunnerResult, CompactionRunnerTask, - }, - CompactionInputFiles, CompactionTask, ExpiredFiles, - }, - instance::flush_compaction::{AllocFileId, Other, Result, StoreVersionEdit}, - manifest::{ - meta_edit::{MetaEdit, MetaEditRequest, MetaUpdate, VersionEditMeta}, - ManifestRef, - }, - sst::{factory::SstWriteOptions, file::FileMeta}, - table::{ - data::TableData, - version_edit::{AddFile, DeleteFile}, - }, -}; - -pub(crate) struct Compactor { - /// Sst files compaction runner - runner: Box, - - /// Manifest (or meta) stores meta data of the engine instance. - manifest: ManifestRef, -} - -impl Compactor { - pub fn new(runner: CompactionRunnerPtr, manifest: ManifestRef) -> Self { - Self { runner, manifest } - } - - pub async fn compact_table( - &self, - request_id: RequestId, - table_data: &TableData, - task: &CompactionTask, - sst_write_options: &SstWriteOptions, - ) -> Result<()> { - debug!( - "Begin compact table, table_name:{}, id:{}, task:{:?}", - table_data.name, table_data.id, task - ); - - if task.is_empty() { - // Nothing to compact. - debug!( - "Nothing to compact, table_name:{}, id:{}, task:{:?}", - table_data.name, table_data.id, task - ); - - return Ok(()); - } - - let inputs = task.inputs(); - let mut edit_meta = VersionEditMeta { - space_id: table_data.space_id, - table_id: table_data.id, - flushed_sequence: 0, - // Use the number of compaction inputs as the estimated number of files to add. - files_to_add: Vec::with_capacity(inputs.len()), - files_to_delete: vec![], - mems_to_remove: vec![], - max_file_id: 0, - }; - - for files in task.expired() { - self.delete_expired_files(table_data, &request_id, files, &mut edit_meta); - } - - for input in inputs { - self.compact_input_files( - request_id.clone(), - table_data, - input, - sst_write_options, - &mut edit_meta, - ) - .await?; - } - - if !table_data.allow_compaction() { - return Other { - msg: format!( - "Table status is not ok, unable to update manifest, table:{}, table_id:{}", - table_data.name, table_data.id - ), - } - .fail(); - } - - let edit_req = { - let meta_update = MetaUpdate::VersionEdit(edit_meta.clone()); - MetaEditRequest { - shard_info: table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: table_data.table_catalog_info.clone(), - } - }; - self.manifest - .apply_edit(edit_req) - .await - .context(StoreVersionEdit)?; - - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - pub async fn compact_input_files( - &self, - request_id: RequestId, - table_data: &TableData, - input: &CompactionInputFiles, - sst_write_options: &SstWriteOptions, - edit_meta: &mut VersionEditMeta, - ) -> Result<()> { - debug!( - "Compact input files, table_name:{}, id:{}, input::{:?}, edit_meta:{:?}", - table_data.name, table_data.id, input, edit_meta - ); - - if input.files.is_empty() { - return Ok(()); - } - - // Metrics - let _timer = table_data.metrics.start_compaction_timer(); - table_data - .metrics - .compaction_observe_sst_num(input.files.len()); - let mut sst_size = 0; - let mut sst_row_num = 0; - for file in &input.files { - sst_size += file.size(); - sst_row_num += file.row_num(); - } - table_data - .metrics - .compaction_observe_input_sst_size(sst_size); - table_data - .metrics - .compaction_observe_input_sst_row_num(sst_row_num); - - // TODO: seems should be debug log - info!( - "Begin to compact files of table, request_id:{}, table:{}, table_id:{}, input_files:{:?}", - request_id, table_data.name, table_data.id, input.files, - ); - - // Alloc file id for the merged sst. - let file_id = table_data - .alloc_file_id(&self.manifest) - .await - .context(AllocFileId)?; - - let task = CompactionRunnerTask::new( - request_id.clone(), - input.clone(), - table_data, - file_id, - sst_write_options.clone(), - ); - - let task_result = self.runner.run(task).await?; - let CompactionRunnerResult { - sst_info, - sst_meta, - output_file_path, - } = task_result; - - let sst_file_size = sst_info.file_size as u64; - let sst_row_num = sst_info.row_num as u64; - table_data - .metrics - .compaction_observe_output_sst_size(sst_file_size); - table_data - .metrics - .compaction_observe_output_sst_row_num(sst_row_num); - - // TODO: seems should be debug log - info!( - "Finish to compact files of table, request_id:{}, table:{}, table_id:{}, output_path:{}, input_files:{:?}, sst_meta:{:?}, sst_info:{:?}", - request_id, - table_data.name, - table_data.id, - output_file_path, - input.files, - sst_meta, - sst_info, - ); - - // Update the flushed sequence number. - edit_meta.flushed_sequence = cmp::max(sst_meta.max_sequence, edit_meta.flushed_sequence); - - // Store updates to edit_meta. - edit_meta.files_to_delete.reserve(input.files.len()); - // The compacted file can be deleted later. - for file in &input.files { - edit_meta.files_to_delete.push(DeleteFile { - level: input.level, - file_id: file.id(), - }); - } - - // Add the newly created file to meta. - edit_meta.files_to_add.push(AddFile { - level: input.output_level, - file: FileMeta { - id: file_id, - size: sst_file_size, - row_num: sst_row_num, - max_seq: sst_meta.max_sequence, - time_range: sst_meta.time_range, - storage_format: sst_info.storage_format, - associated_files: vec![sst_info.meta_path], - }, - }); - - Ok(()) - } - - pub fn delete_expired_files( - &self, - table_data: &TableData, - request_id: &RequestId, - expired: &ExpiredFiles, - edit_meta: &mut VersionEditMeta, - ) { - if !expired.files.is_empty() { - info!( - "Instance try to delete expired files, table:{}, table_id:{}, request_id:{}, level:{}, files:{:?}", - table_data.name, table_data.id, request_id, expired.level, expired.files, - ); - } - - let files = &expired.files; - edit_meta.files_to_delete.reserve(files.len()); - for file in files { - edit_meta.files_to_delete.push(DeleteFile { - level: expired.level, - file_id: file.id(), - }); - } - } -} diff --git a/src/analytic_engine/src/compaction/metrics.rs b/src/analytic_engine/src/compaction/metrics.rs deleted file mode 100644 index 38d08e4af1..0000000000 --- a/src/analytic_engine/src/compaction/metrics.rs +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Metrics of compaction. - -use lazy_static::lazy_static; -use prometheus::{register_int_gauge, IntGauge}; - -lazy_static! { - // Counters: - pub static ref COMPACTION_PENDING_REQUEST_GAUGE: IntGauge = register_int_gauge!( - "compaction_pending_request_gauge", - "Pending request queue length of compaction" - ) - .unwrap(); -} diff --git a/src/analytic_engine/src/compaction/mod.rs b/src/analytic_engine/src/compaction/mod.rs deleted file mode 100644 index 8f63c93ece..0000000000 --- a/src/analytic_engine/src/compaction/mod.rs +++ /dev/null @@ -1,682 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Compaction. - -use std::{collections::HashMap, fmt, str::FromStr, sync::Arc}; - -use common_types::COMPACTION_STRATEGY; -use generic_error::{BoxError, GenericError}; -use macros::define_result; -use serde::{Deserialize, Serialize}; -use size_ext::ReadableSize; -use snafu::{ensure, Backtrace, GenerateBacktrace, OptionExt, ResultExt, Snafu}; -use time_ext::TimeUnit; -use tokio::sync::oneshot; - -use crate::{ - compaction::picker::{CommonCompactionPicker, CompactionPickerRef}, - sst::file::{FileHandle, FileMeta, FilePurgeQueue, Level}, - table::data::TableDataRef, -}; - -pub mod compactor; -mod metrics; -pub mod picker; -pub mod runner; -pub mod scheduler; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Unable to parse compaction strategy, value: {}", value))] - ParseStrategy { value: String, backtrace: Backtrace }, - #[snafu(display("Unable to parse float, key: {}, value: {}", key, value))] - ParseFloat { - key: String, - value: String, - source: std::num::ParseFloatError, - backtrace: Backtrace, - }, - #[snafu(display("Unable to parse int, key: {}, value: {}", key, value))] - ParseInt { - key: String, - value: String, - source: std::num::ParseIntError, - backtrace: Backtrace, - }, - #[snafu(display("Unable to parse readable size, key: {}, value: {}", key, value))] - ParseSize { - key: String, - value: String, - error: String, - backtrace: Backtrace, - }, - #[snafu(display("Unable to parse time unit, key: {}, value: {}", key, value))] - ParseTimeUnit { - key: String, - value: String, - error: String, - backtrace: Backtrace, - }, - #[snafu(display("Invalid compaction option value, err: {}", error))] - InvalidOption { error: String, backtrace: Backtrace }, - - #[snafu(display("Empty file meta.\nBacktrace:\n{}", backtrace))] - EmptyFileMeta { backtrace: Backtrace }, - - #[snafu(display("Failed to convert file meta, err:{}", source))] - ConvertFileMeta { source: GenericError }, - - #[snafu(display("Empty purge queue.\nBacktrace:\n{}", backtrace))] - EmptyPurgeQueue { backtrace: Backtrace }, - - #[snafu(display("Failed to convert level, err:{}", source))] - ConvertLevel { source: GenericError }, -} - -define_result!(Error); - -#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Serialize)] -pub enum CompactionStrategy { - #[default] - Default, - TimeWindow(TimeWindowCompactionOptions), - SizeTiered(SizeTieredCompactionOptions), -} - -#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] -pub struct SizeTieredCompactionOptions { - pub bucket_low: f32, - pub bucket_high: f32, - pub min_sstable_size: ReadableSize, - pub min_threshold: usize, - pub max_threshold: usize, - pub max_input_sstable_size: ReadableSize, -} - -#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] -pub struct TimeWindowCompactionOptions { - pub size_tiered: SizeTieredCompactionOptions, - // TODO(boyan) In fact right now we only supports TimeUnit::Milliseconds resolution. - pub timestamp_resolution: TimeUnit, -} - -// TODO: MAX_INPUT_SSTABLE_SIZE is a temp solution to control sst size -// Remove this when we can control compaction's output size -// https://github.com/apache/incubator-horaedb/issues/408 -pub fn get_max_input_sstable_size() -> ReadableSize { - match std::env::var("HORAEDB_MAX_INPUT_SSTABLE_SIZE") { - Ok(size) => ReadableSize::from_str(&size).unwrap_or_else(|_| ReadableSize::mb(1200)), - Err(_) => ReadableSize::mb(1200), - } -} - -impl Default for SizeTieredCompactionOptions { - fn default() -> Self { - Self { - bucket_low: 0.5, - bucket_high: 1.5, - min_sstable_size: ReadableSize::mb(50), - min_threshold: 4, - max_threshold: 16, - max_input_sstable_size: get_max_input_sstable_size(), - } - } -} - -impl Default for TimeWindowCompactionOptions { - fn default() -> Self { - Self { - size_tiered: SizeTieredCompactionOptions::default(), - timestamp_resolution: TimeUnit::Milliseconds, - } - } -} - -const BUCKET_LOW_KEY: &str = "compaction_bucket_low"; -const BUCKET_HIGH_KEY: &str = "compaction_bucket_high"; -const MIN_THRESHOLD_KEY: &str = "compaction_min_threshold"; -const MAX_THRESHOLD_KEY: &str = "compaction_max_threshold"; -const MIN_SSTABLE_SIZE_KEY: &str = "compaction_min_sstable_size"; -const TIMESTAMP_RESOLUTION_KEY: &str = "compaction_timestamp_resolution"; -const DEFAULT_STRATEGY: &str = "default"; -const STC_STRATEGY: &str = "size_tiered"; -const TWC_STRATEGY: &str = "time_window"; - -impl CompactionStrategy { - pub(crate) fn parse_from( - value: &str, - options: &HashMap, - ) -> Result { - match value.trim().to_lowercase().as_str() { - DEFAULT_STRATEGY => Ok(CompactionStrategy::Default), - STC_STRATEGY => Ok(CompactionStrategy::SizeTiered( - SizeTieredCompactionOptions::parse_from(options)?, - )), - TWC_STRATEGY => Ok(CompactionStrategy::TimeWindow( - TimeWindowCompactionOptions::parse_from(options)?, - )), - _ => ParseStrategy { - value: value.to_string(), - } - .fail(), - } - } - - pub(crate) fn fill_raw_map(&self, m: &mut HashMap) { - match self { - CompactionStrategy::Default => { - m.insert( - COMPACTION_STRATEGY.to_string(), - DEFAULT_STRATEGY.to_string(), - ); - } - CompactionStrategy::SizeTiered(opts) => { - m.insert(COMPACTION_STRATEGY.to_string(), STC_STRATEGY.to_string()); - opts.fill_raw_map(m); - } - CompactionStrategy::TimeWindow(opts) => { - m.insert(COMPACTION_STRATEGY.to_string(), TWC_STRATEGY.to_string()); - opts.fill_raw_map(m); - } - } - } -} - -impl SizeTieredCompactionOptions { - pub(crate) fn validate(&self) -> Result<()> { - ensure!( - self.bucket_high > self.bucket_low, - InvalidOption { - error: format!( - "{} value({}) is less than or equal to the {} value({}) ", - BUCKET_HIGH_KEY, self.bucket_high, BUCKET_LOW_KEY, self.bucket_low - ), - } - ); - - Ok(()) - } - - fn fill_raw_map(&self, m: &mut HashMap) { - m.insert(BUCKET_LOW_KEY.to_string(), format!("{}", self.bucket_low)); - m.insert(BUCKET_HIGH_KEY.to_string(), format!("{}", self.bucket_high)); - m.insert( - MIN_SSTABLE_SIZE_KEY.to_string(), - format!("{}", self.min_sstable_size.0), - ); - m.insert( - MAX_THRESHOLD_KEY.to_string(), - format!("{}", self.max_threshold), - ); - m.insert( - MIN_THRESHOLD_KEY.to_string(), - format!("{}", self.min_threshold), - ); - } - - pub(crate) fn parse_from( - options: &HashMap, - ) -> Result { - let mut opts = SizeTieredCompactionOptions::default(); - if let Some(v) = options.get(BUCKET_LOW_KEY) { - opts.bucket_low = v.parse().context(ParseFloat { - key: BUCKET_HIGH_KEY, - value: v, - })?; - } - if let Some(v) = options.get(BUCKET_HIGH_KEY) { - opts.bucket_high = v.parse().context(ParseFloat { - key: BUCKET_HIGH_KEY, - value: v, - })?; - } - if let Some(v) = options.get(MIN_SSTABLE_SIZE_KEY) { - opts.min_sstable_size = v.parse::().map_err(|err| Error::ParseSize { - key: MIN_SSTABLE_SIZE_KEY.to_string(), - value: v.to_string(), - error: err, - backtrace: Backtrace::generate(), - })?; - } - if let Some(v) = options.get(MAX_THRESHOLD_KEY) { - opts.max_threshold = v.parse().context(ParseInt { - key: MAX_THRESHOLD_KEY, - value: v, - })?; - } - if let Some(v) = options.get(MIN_THRESHOLD_KEY) { - opts.min_threshold = v.parse().context(ParseInt { - key: MIN_THRESHOLD_KEY, - value: v, - })?; - } - - opts.validate()?; - - Ok(opts) - } -} - -impl TimeWindowCompactionOptions { - /// TODO(boyan) In fact right now we only supports TimeUnit::Milliseconds - /// resolution. - fn valid_timestamp_unit(unit: TimeUnit) -> bool { - matches!( - unit, - TimeUnit::Seconds - | TimeUnit::Milliseconds - | TimeUnit::Microseconds - | TimeUnit::Nanoseconds - ) - } - - fn fill_raw_map(&self, m: &mut HashMap) { - self.size_tiered.fill_raw_map(m); - - m.insert( - TIMESTAMP_RESOLUTION_KEY.to_string(), - format!("{}", self.timestamp_resolution), - ); - } - - pub(crate) fn validate(&self) -> Result<()> { - if !Self::valid_timestamp_unit(self.timestamp_resolution) { - return InvalidOption { - error: format!( - "{:?} is not valid for {}) ", - self.timestamp_resolution, TIMESTAMP_RESOLUTION_KEY - ), - } - .fail(); - } - - Ok(()) - } - - pub(crate) fn parse_from( - options: &HashMap, - ) -> Result { - let mut opts = TimeWindowCompactionOptions { - size_tiered: SizeTieredCompactionOptions::parse_from(options)?, - ..Default::default() - }; - - if let Some(v) = options.get(TIMESTAMP_RESOLUTION_KEY) { - opts.timestamp_resolution = - v.parse::().map_err(|err| Error::ParseTimeUnit { - key: TIMESTAMP_RESOLUTION_KEY.to_string(), - value: v.to_string(), - error: err, - backtrace: Backtrace::generate(), - })?; - } - - opts.validate()?; - - Ok(opts) - } -} - -#[derive(Debug, Clone)] -pub struct CompactionInputFiles { - /// Level of the files to be compacted. - pub level: Level, - /// Files to be compacted. - pub files: Vec, - /// The output level of the merged file. - pub output_level: Level, -} - -impl TryFrom for CompactionInputFiles { - type Error = Error; - - fn try_from(value: horaedbproto::compaction_service::CompactionInputFiles) -> Result { - let level: Level = value.level.try_into().box_err().context(ConvertLevel)?; - let output_level: Level = value - .output_level - .try_into() - .box_err() - .context(ConvertLevel)?; - - let mut files: Vec = Vec::with_capacity(value.files.len()); - for file in value.files { - let meta: FileMeta = file - .meta - .context(EmptyFileMeta)? - .try_into() - .box_err() - .context(ConvertFileMeta)?; - - let purge_queue: FilePurgeQueue = file.purge_queue.context(EmptyPurgeQueue)?.into(); - - files.push({ - let handle = FileHandle::new(meta, purge_queue); - handle.set_being_compacted(file.being_compacted); - handle - }); - } - - Ok(CompactionInputFiles { - level, - files, - output_level, - }) - } -} - -impl From for horaedbproto::compaction_service::CompactionInputFiles { - fn from(value: CompactionInputFiles) -> Self { - let mut files = Vec::with_capacity(value.files.len()); - for file in value.files { - let handle = horaedbproto::compaction_service::FileHandle { - meta: Some(file.meta().into()), - purge_queue: Some(horaedbproto::compaction_service::FilePurgeQueue { - space_id: file.space_id(), - table_id: file.table_id().into(), - }), - being_compacted: file.being_compacted(), - metrics: Some(horaedbproto::compaction_service::SstMetrics {}), - }; - files.push(handle); - } - - Self { - level: value.level.as_u32(), - files, - output_level: value.output_level.as_u32(), - } - } -} - -#[derive(Debug, Default, Clone)] -pub struct ExpiredFiles { - /// Level of the expired files. - pub level: Level, - /// Expired files. - pub files: Vec, -} - -#[derive(Default, Clone)] -pub struct CompactionTask { - inputs: Vec, - expired: Vec, -} - -impl Drop for CompactionTask { - fn drop(&mut self) { - // When a CompactionTask is dropped, it means - // 1. the task finished successfully, or - // 2. the task is cancelled for some reason, like memory limit - // - // In case 2, we need to mark files as not compacted in order for them to be - // scheduled again. In case 1, the files will be moved out of level controller, - // so it doesn't care what the flag is, so it's safe to set false here. - self.mark_files_being_compacted(false); - } -} - -impl CompactionTask { - fn mark_files_being_compacted(&self, being_compacted: bool) { - for input in &self.inputs { - for file in &input.files { - file.set_being_compacted(being_compacted); - } - } - for expired in &self.expired { - for file in &expired.files { - file.set_being_compacted(being_compacted); - } - } - } - - // Estimate the size of the total input files. - #[inline] - pub fn estimated_total_input_file_size(&self) -> usize { - let total_input_size: u64 = self - .inputs - .iter() - .map(|v| v.files.iter().map(|f| f.size()).sum::()) - .sum(); - - total_input_size as usize - } - - #[inline] - pub fn num_compact_files(&self) -> usize { - self.inputs.iter().map(|v| v.files.len()).sum() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.is_input_empty() && self.expired.is_empty() - } - - #[inline] - pub fn is_input_empty(&self) -> bool { - self.inputs.is_empty() - } - - #[inline] - pub fn expired(&self) -> &[ExpiredFiles] { - &self.expired - } - - #[inline] - pub fn inputs(&self) -> &[CompactionInputFiles] { - &self.inputs - } - - #[inline] - pub fn contains_min_level(&self) -> bool { - for input in &self.inputs { - if input.level.is_min() { - return true; - } - } - - false - } -} - -pub struct CompactionTaskBuilder { - expired: Vec, - inputs: Vec, -} - -impl CompactionTaskBuilder { - pub fn with_expired(expired: Vec) -> Self { - Self { - expired, - inputs: Vec::new(), - } - } - - pub fn add_inputs(&mut self, files: CompactionInputFiles) { - self.inputs.push(files); - } - - pub fn build(self) -> CompactionTask { - let task = CompactionTask { - expired: self.expired, - inputs: self.inputs, - }; - - task.mark_files_being_compacted(true); - - task - } -} - -impl fmt::Debug for CompactionTask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("CompactionTask") - .field("inputs", &self.inputs) - .field( - "expired", - &self - .expired - .iter() - .map(|expired| { - format!( - "level:{}, files:{:?}", - expired.level, - expired.files.iter().map(|f| f.id()) - ) - }) - // only print first 10 files - .take(10) - .collect::>(), - ) - .finish() - } -} - -#[derive(Default)] -pub struct PickerManager; - -impl PickerManager { - pub fn get_picker(&self, strategy: CompactionStrategy) -> CompactionPickerRef { - Arc::new(CommonCompactionPicker::new(strategy)) - } -} - -#[derive(Debug, Snafu)] -pub enum WaitError { - #[snafu(display("The compaction is canceled"))] - Canceled, - - #[snafu(display("Failed to compact, err:{}", source))] - Compaction { - source: Arc, - }, -} - -pub type WaitResult = std::result::Result; - -pub struct WaiterNotifier { - waiter: Option>>, -} - -impl WaiterNotifier { - pub fn new(waiter: Option>>) -> Self { - Self { waiter } - } - - pub fn notify_wait_result(mut self, res: WaitResult<()>) { - // Ignore error if failed to send result. - if let Some(waiter) = self.waiter.take() { - let _ = waiter.send(res); - } - } -} - -impl Drop for WaiterNotifier { - fn drop(&mut self) { - if let Some(waiter) = self.waiter.take() { - // The compaction result hasn't been sent before the notifier dropped, we - // send a canceled error to waiter. - let _ = waiter.send(Canceled.fail()); - } - } -} - -/// Request to compact single table. -pub struct TableCompactionRequest { - pub table_data: TableDataRef, - pub waiter: Option>>, -} - -impl TableCompactionRequest { - pub fn new(table_data: TableDataRef) -> (Self, oneshot::Receiver>) { - let (tx, rx) = oneshot::channel::>(); - let req = Self { - table_data, - waiter: Some(tx), - }; - - (req, rx) - } - - pub fn no_waiter(table_data: TableDataRef) -> Self { - TableCompactionRequest { - table_data, - waiter: None, - } - } -} - -#[cfg(test)] -mod tests { - use std::collections::HashMap; - - use super::*; - - #[test] - fn test_fill_raw_map_then_parse() { - let c = CompactionStrategy::Default; - let mut m = HashMap::new(); - c.fill_raw_map(&mut m); - assert_eq!(1, m.len()); - assert_eq!(m[COMPACTION_STRATEGY], "default"); - assert_eq!(c, CompactionStrategy::parse_from("default", &m).unwrap()); - - let opts = SizeTieredCompactionOptions { - bucket_low: 0.1, - min_sstable_size: ReadableSize(1024), - max_threshold: 10, - ..Default::default() - }; - - let c = CompactionStrategy::SizeTiered(opts); - let mut m = HashMap::new(); - c.fill_raw_map(&mut m); - assert_eq!(6, m.len()); - assert_eq!(m[COMPACTION_STRATEGY], "size_tiered"); - assert_eq!(m[BUCKET_LOW_KEY], "0.1"); - assert_eq!(m[BUCKET_HIGH_KEY], "1.5"); - assert_eq!(m[MIN_SSTABLE_SIZE_KEY], "1024"); - assert_eq!(m[MIN_THRESHOLD_KEY], "4"); - assert_eq!(m[MAX_THRESHOLD_KEY], "10"); - assert_eq!( - c, - CompactionStrategy::parse_from("size_tiered", &m).unwrap() - ); - - let twc_opts = TimeWindowCompactionOptions { - size_tiered: opts, - ..Default::default() - }; - let c = CompactionStrategy::TimeWindow(twc_opts); - let mut m = HashMap::new(); - c.fill_raw_map(&mut m); - - assert_eq!(7, m.len()); - assert_eq!(m[COMPACTION_STRATEGY], "time_window"); - assert_eq!(m[BUCKET_LOW_KEY], "0.1"); - assert_eq!(m[BUCKET_HIGH_KEY], "1.5"); - assert_eq!(m[MIN_SSTABLE_SIZE_KEY], "1024"); - assert_eq!(m[MIN_THRESHOLD_KEY], "4"); - assert_eq!(m[MAX_THRESHOLD_KEY], "10"); - assert_eq!(m[TIMESTAMP_RESOLUTION_KEY], "milliseconds"); - - assert_eq!( - c, - CompactionStrategy::parse_from("time_window", &m).unwrap() - ); - } -} diff --git a/src/analytic_engine/src/compaction/picker.rs b/src/analytic_engine/src/compaction/picker.rs deleted file mode 100644 index 994eaad4e6..0000000000 --- a/src/analytic_engine/src/compaction/picker.rs +++ /dev/null @@ -1,1087 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Compaction picker. - -use std::{ - collections::{BTreeMap, BTreeSet, HashMap}, - sync::Arc, - time::Duration, -}; - -use common_types::time::Timestamp; -use logger::{debug, info}; -use macros::define_result; -use snafu::Snafu; -use time_ext::TimeUnit; - -use crate::{ - compaction::{ - CompactionInputFiles, CompactionStrategy, CompactionTask, CompactionTaskBuilder, - SizeTieredCompactionOptions, TimeWindowCompactionOptions, - }, - sst::{ - file::{FileHandle, Level}, - manager::LevelsController, - }, -}; - -#[derive(Debug, Snafu)] -pub enum Error {} - -define_result!(Error); - -#[derive(Clone)] -pub struct PickerContext { - pub segment_duration: Duration, - /// The ttl of the data in sst. - pub ttl: Option, - pub strategy: CompactionStrategy, -} - -impl PickerContext { - fn size_tiered_opts(&self) -> SizeTieredCompactionOptions { - match self.strategy { - CompactionStrategy::SizeTiered(opts) => opts, - _ => SizeTieredCompactionOptions::default(), - } - } - - fn time_window_opts(&self) -> TimeWindowCompactionOptions { - match self.strategy { - CompactionStrategy::TimeWindow(opts) => opts, - _ => TimeWindowCompactionOptions::default(), - } - } -} - -pub trait CompactionPicker { - /// Pick candidate files for compaction. - /// - /// Note: files being compacted should be ignored. - fn pick_compaction( - &self, - ctx: PickerContext, - levels_controller: &mut LevelsController, - ) -> Result; -} - -pub type CompactionPickerRef = Arc; - -trait LevelPicker { - /// Pick candidate files for compaction at level - fn pick_candidates_at_level( - &self, - ctx: &PickerContext, - levels_controller: &LevelsController, - level: Level, - expire_time: Option, - ) -> Option>; -} - -type LevelPickerRef = Arc; - -pub struct CommonCompactionPicker { - level_picker: LevelPickerRef, -} - -impl CommonCompactionPicker { - pub fn new(strategy: CompactionStrategy) -> Self { - let level_picker: LevelPickerRef = match strategy { - CompactionStrategy::SizeTiered(_) => Arc::new(SizeTieredPicker::default()), - CompactionStrategy::TimeWindow(_) | CompactionStrategy::Default => { - Arc::new(TimeWindowPicker::default()) - } - }; - Self { level_picker } - } - - fn pick_compact_candidates( - &self, - ctx: &PickerContext, - levels_controller: &LevelsController, - expire_time: Option, - ) -> Option { - for level in levels_controller.levels() { - if let Some(files) = self.level_picker.pick_candidates_at_level( - ctx, - levels_controller, - level, - expire_time, - ) { - return Some(CompactionInputFiles { - level, - files, - output_level: level.next(), - }); - } - } - - None - } -} - -impl CompactionPicker for CommonCompactionPicker { - fn pick_compaction( - &self, - ctx: PickerContext, - levels_controller: &mut LevelsController, - ) -> Result { - let expire_time = ctx.ttl.map(Timestamp::expire_time); - let mut builder = - CompactionTaskBuilder::with_expired(levels_controller.expired_ssts(expire_time)); - - if let Some(input_files) = - self.pick_compact_candidates(&ctx, levels_controller, expire_time) - { - info!( - "Compaction strategy: {:?} picker pick files to compact, input_files:{:?}", - ctx.strategy, input_files - ); - - builder.add_inputs(input_files); - } - - Ok(builder.build()) - } -} - -#[inline] -fn find_uncompact_files( - levels_controller: &LevelsController, - level: Level, - expire_time: Option, -) -> Vec { - levels_controller - .iter_ssts_at_level(level) - // Only use files not being compacted and not expired. - .filter(|file| !file.being_compacted() && !file.time_range().is_expired(expire_time)) - .cloned() - .collect() -} - -// Trim the largest sstables off the end to meet the `max_threshold` and -// `max_input_sstable_size` -fn trim_to_threshold( - input_files: Vec, - max_threshold: usize, - max_input_sstable_size: u64, -) -> Vec { - let mut input_size = 0; - input_files - .into_iter() - .take(max_threshold) - .take_while(|f| { - input_size += f.size(); - input_size <= max_input_sstable_size - }) - .collect() -} - -// TODO: Remove this function when pick_by_seq is stable. -fn prefer_pick_by_seq() -> bool { - std::env::var("HORAEDB_COMPACT_PICK_BY_SEQ").unwrap_or_else(|_| "true".to_string()) == "true" -} - -/// Size tiered compaction strategy -/// -/// Origin solution[1] will only consider file size, but this will cause data -/// corrupt, see https://github.com/apache/incubator-horaedb/pull/1041 -/// -/// So we could only compact files with adjacent seq, or ssts without -/// overlapping key range among them. Currently solution is relative simple, -/// only pick adjacent sst. Maybe a better, but more complex solution could be -/// introduced later. -/// -/// [1]: https://github.com/jeffjirsa/twcs/blob/master/src/main/java/com/jeffjirsa/cassandra/db/compaction/SizeTieredCompactionStrategy.java -pub struct SizeTieredPicker { - pick_by_seq: bool, -} - -impl Default for SizeTieredPicker { - fn default() -> Self { - Self { - pick_by_seq: prefer_pick_by_seq(), - } - } -} - -/// Similar size files group -#[derive(Debug, Clone)] -struct Bucket { - pub avg_size: usize, - pub files: Vec, -} - -impl Bucket { - fn with_file(file: &FileHandle) -> Self { - Self { - avg_size: file.size() as usize, - files: vec![file.clone()], - } - } - - fn with_files(files: Vec) -> Self { - let total: usize = files.iter().map(|f| f.size() as usize).sum(); - let avg_size = if files.is_empty() { - 0 - } else { - total / files.len() - }; - Self { avg_size, files } - } - - fn insert_file(&mut self, file: &FileHandle) { - let total_size = self.files.len() * self.avg_size + file.size() as usize; - self.avg_size = total_size / (self.files.len() + 1); - self.files.push(file.clone()); - } - - fn get_hotness_map(&self) -> HashMap { - self.files - .iter() - .map(|f| (f.clone(), Self::hotness(f))) - .collect() - } - - #[inline] - fn hotness(f: &FileHandle) -> f64 { - // prevent NAN hotness - let row_num = f.row_num().max(1); - f.read_meter().h2_rate() / (row_num as f64) - } -} - -impl LevelPicker for SizeTieredPicker { - fn pick_candidates_at_level( - &self, - ctx: &PickerContext, - levels_controller: &LevelsController, - level: Level, - expire_time: Option, - ) -> Option> { - let files_by_segment = - Self::files_by_segment(levels_controller, level, ctx.segment_duration, expire_time); - if files_by_segment.is_empty() { - return None; - } - - let opts = ctx.size_tiered_opts(); - // Iterate the segment in reverse order, so newest segment is examined first. - for (idx, (segment_key, segment)) in files_by_segment.iter().rev().enumerate() { - let files = self.pick_ssts(segment.to_vec(), &opts); - if files.is_some() { - info!("Compact segment, idx:{idx}, segment_key:{segment_key:?}, files:{segment:?}"); - return files; - } - debug!("No compaction necessary for segment, idx:{idx}, segment_key:{segment_key:?}"); - } - - None - } -} - -impl SizeTieredPicker { - fn pick_ssts( - &self, - files: Vec, - opts: &SizeTieredCompactionOptions, - ) -> Option> { - if self.pick_by_seq { - return Self::pick_by_seq( - files, - opts.min_threshold, - opts.max_threshold, - opts.max_input_sstable_size.as_byte(), - ); - } - - Self::pick_by_size(files, opts) - } - - fn pick_by_seq( - mut files: Vec, - min_threshold: usize, - max_threshold: usize, - max_input_sstable_size: u64, - ) -> Option> { - // Sort files by max_seq desc. - files.sort_unstable_by_key(|b| std::cmp::Reverse(b.max_sequence())); - - 'outer: for start in 0..files.len() { - // Try max_threshold first, since we hope to compact as many small files as we - // can. - for step in (min_threshold..=max_threshold).rev() { - let end = (start + step).min(files.len()); - if end - start < min_threshold { - // too little files, switch to next loop and find again. - continue 'outer; - } - - let curr_size: u64 = files[start..end].iter().map(|f| f.size()).sum(); - if curr_size <= max_input_sstable_size { - return Some(files[start..end].to_vec()); - } - } - } - - None - } - - fn pick_by_size( - files: Vec, - opts: &SizeTieredCompactionOptions, - ) -> Option> { - let buckets = Self::get_buckets( - files, - opts.bucket_high, - opts.bucket_low, - opts.min_sstable_size.as_byte() as f32, - ); - - Self::most_interesting_bucket( - buckets, - opts.min_threshold, - opts.max_threshold, - opts.max_input_sstable_size.as_byte(), - ) - } - - /// Group files of similar size into buckets. - fn get_buckets( - mut files: Vec, - bucket_high: f32, - bucket_low: f32, - min_sst_size: f32, - ) -> Vec { - // sort by file length - files.sort_unstable_by_key(FileHandle::size); - - let mut buckets: Vec = Vec::new(); - 'outer: for sst in &files { - let size = sst.size() as f32; - // look for a bucket containing similar-sized files: - // group in the same bucket if it's w/in 50% of the average for this bucket, - // or this file and the bucket are all considered "small" (less than - // `min_sst_size`) - for bucket in buckets.iter_mut() { - let old_avg_size = bucket.avg_size as f32; - if (size > (old_avg_size * bucket_low) && size < (old_avg_size * bucket_high)) - || (size < min_sst_size && old_avg_size < min_sst_size) - { - // find a similar file, insert it into bucket - bucket.insert_file(sst); - continue 'outer; - } - } - - // no similar bucket found - // put it in a new bucket - buckets.push(Bucket::with_file(sst)); - } - - debug!("Group files of similar size into buckets: {:?}", buckets); - - buckets - } - - fn most_interesting_bucket( - buckets: Vec, - min_threshold: usize, - max_threshold: usize, - max_input_sstable_size: u64, - ) -> Option> { - debug!( - "Find most_interesting_bucket buckets:{:?}, min:{}, max:{}", - buckets, min_threshold, max_threshold - ); - - let mut pruned_bucket_and_hotness = Vec::with_capacity(buckets.len()); - // skip buckets containing less than min_threshold sstables, - // and limit other buckets to max_threshold sstables - for bucket in buckets { - let (bucket, hotness) = - Self::trim_to_threshold_with_hotness(bucket, max_threshold, max_input_sstable_size); - if bucket.files.len() >= min_threshold { - pruned_bucket_and_hotness.push((bucket, hotness)); - } - } - - if pruned_bucket_and_hotness.is_empty() { - return None; - } - - // Find the hottest bucket - if let Some((bucket, hotness)) = - pruned_bucket_and_hotness - .into_iter() - .max_by(|(b1, h1), (b2, h2)| { - let c = h1.partial_cmp(h2).unwrap(); - if !c.is_eq() { - return c; - } - // TODO(boyan), compacting smallest sstables first? - b1.avg_size.cmp(&b2.avg_size) - }) - { - debug!( - "Find the hottest bucket, hotness: {}, bucket: {:?}", - hotness, bucket - ); - Some(bucket.files) - } else { - None - } - } - - fn files_by_segment( - levels_controller: &LevelsController, - level: Level, - segment_duration: Duration, - expire_time: Option, - ) -> BTreeMap> { - let mut files_by_segment = BTreeMap::new(); - let uncompact_files = find_uncompact_files(levels_controller, level, expire_time); - for file in uncompact_files { - // We use the end time of the range to calculate segment. - let segment = file - .time_range() - .exclusive_end() - .truncate_by(segment_duration); - let files = files_by_segment.entry(segment).or_insert_with(Vec::new); - files.push(file); - } - - files_by_segment - } - - fn trim_to_threshold_with_hotness( - bucket: Bucket, - max_threshold: usize, - max_input_sstable_size: u64, - ) -> (Bucket, f64) { - let hotness_snapshot = bucket.get_hotness_map(); - - // Sort by sstable hotness (descending). - let mut sorted_files = bucket.files.to_vec(); - sorted_files.sort_unstable_by(|f1, f2| { - hotness_snapshot[f1] - .partial_cmp(&hotness_snapshot[f2]) - .unwrap() - .reverse() - }); - - let pruned_bucket = trim_to_threshold(sorted_files, max_threshold, max_input_sstable_size); - // bucket hotness is the sum of the hotness of all sstable members - let bucket_hotness = pruned_bucket.iter().map(Bucket::hotness).sum(); - - (Bucket::with_files(pruned_bucket), bucket_hotness) - } -} - -/// Time window compaction strategy -/// See https://github.com/jeffjirsa/twcs/blob/master/src/main/java/com/jeffjirsa/cassandra/db/compaction/TimeWindowCompactionStrategy.java -pub struct TimeWindowPicker { - pick_by_seq: bool, -} - -impl Default for TimeWindowPicker { - fn default() -> Self { - Self { - pick_by_seq: prefer_pick_by_seq(), - } - } -} - -impl TimeWindowPicker { - fn get_window_bounds_in_millis(window: &Duration, ts: i64) -> (i64, i64) { - let ts_secs = ts / 1000; - - let size = window.as_secs() as i64; - - let lower = ts_secs - (ts_secs % size); - let upper = lower + size - 1; - - (lower * 1000, upper * 1000) - } - - #[inline] - fn resolve_timestamp(ts: i64, timestamp_resolution: TimeUnit) -> i64 { - match timestamp_resolution { - TimeUnit::Microseconds => ts / 1000, - TimeUnit::Nanoseconds => ts / 1000000, - TimeUnit::Seconds => ts * 1000, - TimeUnit::Milliseconds => ts, - // the option is validated before, so it won't reach here - _ => unreachable!(), - } - } - - /// Group files of similar timestamp into buckets. - fn get_buckets( - files: &[FileHandle], - window: &Duration, - timestamp_resolution: TimeUnit, - ) -> (HashMap>, i64) { - let mut max_ts = 0i64; - let mut buckets: HashMap> = HashMap::new(); - for f in files { - let ts = f.time_range_ref().exclusive_end().as_i64(); - - let ts = Self::resolve_timestamp(ts, timestamp_resolution); - - let (left, _) = Self::get_window_bounds_in_millis(window, ts); - - let bucket_files = buckets.entry(left).or_default(); - - bucket_files.push(f.clone()); - - if left > max_ts { - max_ts = left; - } - } - - debug!( - "Group files of similar timestamp into buckets: {:?}", - buckets - ); - (buckets, max_ts) - } - - fn newest_bucket( - &self, - buckets: HashMap>, - size_tiered_opts: SizeTieredCompactionOptions, - now: i64, - ) -> Option> { - // If the current bucket has at least minThreshold SSTables, choose that one. - // For any other bucket, at least 2 SSTables is enough. - // In any case, limit to max_threshold SSTables. - - let all_keys: BTreeSet<_> = buckets.keys().collect(); - - // First compact latest buckets - for key in all_keys.into_iter().rev() { - if let Some(bucket) = buckets.get(key) { - debug!("Newest bucket loop, key:{key}, now:{now}"); - - if bucket.len() >= size_tiered_opts.min_threshold && *key >= now { - // If we're in the newest bucket, we'll use STCS to prioritize sstables - let size_picker = SizeTieredPicker::default(); - let files = size_picker.pick_ssts(bucket.to_vec(), &size_tiered_opts); - - if files.is_some() { - return files; - } - } else if bucket.len() >= 2 && *key < now { - debug!("Bucket size {} >= 2 and not in current bucket, compacting what's here: {:?}", bucket.len(), bucket); - let files = self.pick_sst_for_old_bucket(bucket.to_vec(), &size_tiered_opts); - if files.is_some() { - return files; - } - } else { - debug!( - "No compaction necessary for bucket size {} , key {}, now {}", - bucket.len(), - key, - now - ); - } - } - } - - None - } - - fn pick_sst_for_old_bucket( - &self, - mut files: Vec, - size_tiered_opts: &SizeTieredCompactionOptions, - ) -> Option> { - let max_input_size = size_tiered_opts.max_input_sstable_size.as_byte(); - // For old bucket, sst is likely already compacted, so min_thresold is not very - // strict, and greedy as `size_tiered_opts`. - let min_threshold = 2; - if self.pick_by_seq { - return SizeTieredPicker::pick_by_seq( - files, - min_threshold, - size_tiered_opts.max_threshold, - max_input_size, - ); - } - - files.sort_unstable_by_key(FileHandle::size); - let candidate_files = - trim_to_threshold(files, size_tiered_opts.max_threshold, max_input_size); - if candidate_files.len() >= min_threshold { - return Some(candidate_files); - } - - None - } - - /// Get current window timestamp, the caller MUST ensure the level has ssts, - /// panic otherwise. - fn get_current_window( - levels_controller: &LevelsController, - level: Level, - window: &Duration, - timestamp_resolution: TimeUnit, - ) -> i64 { - // always find the latest sst here - let now = levels_controller - .latest_sst(level) - .unwrap() - .time_range() - .exclusive_end() - .as_i64(); - let now = Self::resolve_timestamp(now, timestamp_resolution); - Self::get_window_bounds_in_millis(window, now).0 - } -} - -impl LevelPicker for TimeWindowPicker { - fn pick_candidates_at_level( - &self, - ctx: &PickerContext, - levels_controller: &LevelsController, - level: Level, - expire_time: Option, - ) -> Option> { - let uncompact_files = find_uncompact_files(levels_controller, level, expire_time); - - if uncompact_files.is_empty() { - return None; - } - - let opts = ctx.time_window_opts(); - - debug!("TWCS compaction options: {:?}", opts); - - let (buckets, max_bucket_ts) = Self::get_buckets( - &uncompact_files, - &ctx.segment_duration, - opts.timestamp_resolution, - ); - - let now = Self::get_current_window( - levels_controller, - level, - &ctx.segment_duration, - opts.timestamp_resolution, - ); - debug!( - "TWCS current window is {}, max_bucket_ts: {}", - now, max_bucket_ts - ); - assert!(now >= max_bucket_ts); - - self.newest_bucket(buckets, opts.size_tiered, now) - } -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use bytes_ext::Bytes; - use common_types::{ - tests::build_schema, - time::{TimeRange, Timestamp}, - }; - use macros::hash_map; - use tokio::sync::mpsc; - - use super::*; - use crate::{ - compaction::PickerManager, - sst::{ - file::{FileMeta, FilePurgeQueue}, - manager::{tests::LevelsControllerMockBuilder, LevelsController}, - meta_data::SstMetaData, - parquet::meta_data::ParquetMetaData, - }, - table_options::StorageFormat, - }; - - fn build_sst_meta_data(time_range: TimeRange) -> SstMetaData { - let parquet_meta_data = ParquetMetaData { - min_key: Bytes::from_static(b"100"), - max_key: Bytes::from_static(b"200"), - time_range, - max_sequence: 200, - schema: build_schema(), - parquet_filter: Default::default(), - column_values: None, - }; - - SstMetaData::Parquet(Arc::new(parquet_meta_data)) - } - - // testcase 0: file buckets: old bucket:[0,1] newest bucket:[2], expired:[3] - fn build_old_bucket_case(now: i64) -> LevelsController { - let builder = LevelsControllerMockBuilder::default(); - let sst_meta_vec = vec![ - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 14000), - Timestamp::new(now - 13000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 14000), - Timestamp::new(now - 13000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(100), - Timestamp::new(200), - )), - ]; - builder.add_sst(sst_meta_vec).build() - } - - // testcase 1: file buckets: old bucket:[0,1] newest bucket:[2,3,4,5] - // default min_threshold=4 - fn build_newest_bucket_case(now: i64) -> LevelsController { - let builder = LevelsControllerMockBuilder::default(); - let sst_meta_vec = vec![ - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 14000), - Timestamp::new(now - 13000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 14000), - Timestamp::new(now - 13000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - ]; - builder.add_sst(sst_meta_vec).build() - } - - // testcase 2: file buckets: old bucket:[0] newest bucket:[1,2,3] - // default min_threshold=4 - fn build_newest_bucket_no_match_case(now: i64) -> LevelsController { - let builder = LevelsControllerMockBuilder::default(); - let sst_meta_vec = vec![ - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 14000), - Timestamp::new(now - 13000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - build_sst_meta_data(TimeRange::new_unchecked( - Timestamp::new(now - 4000), - Timestamp::new(now - 3000), - )), - ]; - builder.add_sst(sst_meta_vec).build() - } - - #[test] - fn test_time_window_picker() { - let picker_manager = PickerManager; - let twp = picker_manager.get_picker(CompactionStrategy::Default); - let mut ctx = PickerContext { - segment_duration: Duration::from_millis(1000), - ttl: Some(Duration::from_secs(100000)), - strategy: CompactionStrategy::Default, - }; - let now = Timestamp::now(); - { - let mut lc = build_old_bucket_case(now.as_i64()); - let task = twp.pick_compaction(ctx.clone(), &mut lc).unwrap(); - assert_eq!(task.inputs[0].files.len(), 2); - assert_eq!(task.inputs[0].files[0].id(), 0); - assert_eq!(task.inputs[0].files[1].id(), 1); - assert_eq!(task.expired[0].files.len(), 1); - assert_eq!(task.expired[0].files[0].id(), 3); - } - - { - let mut lc = build_newest_bucket_case(now.as_i64()); - let task = twp.pick_compaction(ctx.clone(), &mut lc).unwrap(); - assert_eq!(task.inputs[0].files.len(), 4); - assert_eq!(task.inputs[0].files[0].id(), 2); - assert_eq!(task.inputs[0].files[1].id(), 3); - assert_eq!(task.inputs[0].files[2].id(), 4); - assert_eq!(task.inputs[0].files[3].id(), 5); - } - - { - let mut lc = build_newest_bucket_no_match_case(now.as_i64()); - let task = twp.pick_compaction(ctx.clone(), &mut lc).unwrap(); - assert_eq!(task.inputs.len(), 0); - } - - // If ttl is None, then no file is expired. - ctx.ttl = None; - { - let mut lc = build_old_bucket_case(now.as_i64()); - let task = twp.pick_compaction(ctx, &mut lc).unwrap(); - assert_eq!(task.inputs[0].files.len(), 2); - assert_eq!(task.inputs[0].files[0].id(), 0); - assert_eq!(task.inputs[0].files[1].id(), 1); - assert!(task.expired[0].files.is_empty()); - } - } - - fn build_file_handles(sizes: Vec<(u64, TimeRange)>) -> Vec { - let (tx, _rx) = mpsc::unbounded_channel(); - - sizes - .into_iter() - .map(|(size, time_range)| { - let file_meta = FileMeta { - size, - time_range, - id: 1, - row_num: 0, - max_seq: 0, - storage_format: StorageFormat::default(), - associated_files: Vec::new(), - }; - let queue = FilePurgeQueue::new(1, 1.into(), tx.clone()); - FileHandle::new(file_meta, queue) - }) - .collect() - } - - fn build_file_handles_seq(sizes: Vec<(u64, u64)>) -> Vec { - let (tx, _rx) = mpsc::unbounded_channel(); - - sizes - .into_iter() - .map(|(size, max_seq)| { - let file_meta = FileMeta { - size, - time_range: TimeRange::new_unchecked_for_test(0, 1), - id: 1, - row_num: 0, - max_seq, - storage_format: StorageFormat::default(), - associated_files: Vec::new(), - }; - let queue = FilePurgeQueue::new(1, 1.into(), tx.clone()); - FileHandle::new(file_meta, queue) - }) - .collect() - } - - #[test] - fn test_size_tiered_picker() { - let time_range = TimeRange::empty(); - let bucket = Bucket::with_files(build_file_handles(vec![ - (100, time_range), - (110, time_range), - (200, time_range), - ])); - - let (out_bucket, _) = - SizeTieredPicker::trim_to_threshold_with_hotness(bucket.clone(), 10, 300); - // limited by max input size - assert_eq!( - vec![100, 110], - out_bucket - .files - .iter() - .map(|f| f.size()) - .collect::>() - ); - - // no limit - let (out_bucket, _) = - SizeTieredPicker::trim_to_threshold_with_hotness(bucket.clone(), 10, 3000); - assert_eq!( - vec![100, 110, 200], - out_bucket - .files - .iter() - .map(|f| f.size()) - .collect::>() - ); - - // limited by max_threshold - let (out_bucket, _) = SizeTieredPicker::trim_to_threshold_with_hotness(bucket, 2, 3000); - assert_eq!( - vec![100, 110], - out_bucket - .files - .iter() - .map(|f| f.size()) - .collect::>() - ); - } - - #[test] - fn empty_bucket() { - let bucket = Bucket::with_files(vec![]); - assert_eq!(bucket.avg_size, 0); - assert!(bucket.files.is_empty()); - } - - #[test] - fn test_time_window_newest_bucket() { - let size_tiered_opts = SizeTieredCompactionOptions::default(); - let tw_picker = TimeWindowPicker { pick_by_seq: false }; - // old bucket have enough sst for compaction - { - let old_bucket = build_file_handles(vec![ - (102, TimeRange::new_unchecked_for_test(100, 200)), - (100, TimeRange::new_unchecked_for_test(100, 200)), - (101, TimeRange::new_unchecked_for_test(100, 200)), - ]); - let new_bucket = build_file_handles(vec![ - (200, TimeRange::new_unchecked_for_test(200, 300)), - (201, TimeRange::new_unchecked_for_test(200, 300)), - ]); - - let buckets = hash_map! { 100 => old_bucket, 200 => new_bucket }; - let bucket = tw_picker - .newest_bucket(buckets, size_tiered_opts, 200) - .unwrap(); - assert_eq!( - vec![100, 101, 102], - bucket.into_iter().map(|f| f.size()).collect::>() - ); - } - - // old bucket have only 1 sst, which is not enough for compaction - { - let old_bucket = - build_file_handles(vec![(100, TimeRange::new_unchecked_for_test(100, 200))]); - let new_bucket = build_file_handles(vec![ - (200, TimeRange::new_unchecked_for_test(200, 300)), - (201, TimeRange::new_unchecked_for_test(200, 300)), - ]); - - let buckets = hash_map! { 100 => old_bucket, 200 => new_bucket }; - let bucket = tw_picker.newest_bucket(buckets, size_tiered_opts, 200); - assert_eq!(None, bucket); - } - } - - #[test] - fn test_time_window_newest_bucket_for_seq() { - let size_tiered_opts = SizeTieredCompactionOptions::default(); - let tw_picker = TimeWindowPicker { pick_by_seq: true }; - // old bucket have enough sst for compaction - { - let old_bucket = build_file_handles(vec![ - (102, TimeRange::new_unchecked_for_test(100, 200)), - (100, TimeRange::new_unchecked_for_test(100, 200)), - (101, TimeRange::new_unchecked_for_test(100, 200)), - ]); - let new_bucket = build_file_handles(vec![ - (200, TimeRange::new_unchecked_for_test(200, 300)), - (201, TimeRange::new_unchecked_for_test(200, 300)), - ]); - - let buckets = hash_map! { 100 => old_bucket, 200 => new_bucket }; - let bucket = tw_picker - .newest_bucket(buckets, size_tiered_opts, 200) - .unwrap(); - assert_eq!( - vec![102, 100, 101], - bucket.into_iter().map(|f| f.size()).collect::>() - ); - } - - // old bucket have only 1 sst, which is not enough for compaction - { - let old_bucket = - build_file_handles(vec![(100, TimeRange::new_unchecked_for_test(100, 200))]); - let new_bucket = build_file_handles(vec![ - (200, TimeRange::new_unchecked_for_test(200, 300)), - (201, TimeRange::new_unchecked_for_test(200, 300)), - ]); - - let buckets = hash_map! { 100 => old_bucket, 200 => new_bucket }; - let bucket = tw_picker.newest_bucket(buckets, size_tiered_opts, 200); - assert_eq!(None, bucket); - } - } - - #[test] - fn test_size_pick_by_max_seq() { - let input_files = build_file_handles_seq(vec![ - // size, seq - (20, 10), - (10, 20), - (201, 25), - (100, 30), - (100, 40), - (100, 50), - ]); - - assert_eq!( - vec![50, 40, 30], - SizeTieredPicker::pick_by_seq(input_files.clone(), 2, 5, 300) - .unwrap() - .iter() - .map(|f| f.max_sequence()) - .collect::>() - ); - assert_eq!( - vec![50, 40, 30], - SizeTieredPicker::pick_by_seq(input_files.clone(), 2, 5, 500) - .unwrap() - .iter() - .map(|f| f.max_sequence()) - .collect::>() - ); - assert_eq!( - vec![50, 40, 30, 25], - SizeTieredPicker::pick_by_seq(input_files.clone(), 2, 5, 501) - .unwrap() - .iter() - .map(|f| f.max_sequence()) - .collect::>() - ); - assert_eq!( - vec![20, 10], - SizeTieredPicker::pick_by_seq(input_files, 2, 5, 30) - .unwrap() - .iter() - .map(|f| f.max_sequence()) - .collect::>() - ); - } -} diff --git a/src/analytic_engine/src/compaction/runner/local_runner.rs b/src/analytic_engine/src/compaction/runner/local_runner.rs deleted file mode 100644 index e379d78544..0000000000 --- a/src/analytic_engine/src/compaction/runner/local_runner.rs +++ /dev/null @@ -1,318 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, sync::Arc}; - -use async_trait::async_trait; -use common_types::projected_schema::{ProjectedSchema, RowProjectorBuilder}; -use generic_error::BoxError; -use runtime::Runtime; -use snafu::ResultExt; -use table_engine::predicate::Predicate; - -use crate::{ - compaction::runner::{CompactionRunner, CompactionRunnerResult, CompactionRunnerTask}, - instance::flush_compaction::{ - BuildMergeIterator, CreateSstWriter, ReadSstMeta, Result, WriteSst, - }, - row_iter::{ - self, - dedup::DedupIterator, - merge::{MergeBuilder, MergeConfig}, - }, - sst::{ - factory::{ColumnStats, FactoryRef, ObjectStorePickerRef, ScanOptions, SstWriteOptions}, - meta_data::{cache::MetaCacheRef, SstMetaData, SstMetaReader}, - writer::MetaData, - }, - Config, ScanType, SstReadOptionsBuilder, -}; - -const MAX_RECORD_BATCHES_IN_FLIGHT_WHEN_COMPACTION_READ: usize = 64; - -/// Executor carrying for actual compaction work -#[derive(Clone)] -pub struct LocalCompactionRunner { - runtime: Arc, - scan_options: ScanOptions, - /// Sst factory - sst_factory: FactoryRef, - /// Store picker for persisting sst - store_picker: ObjectStorePickerRef, - // TODO: maybe not needed in compaction - sst_meta_cache: Option, -} - -impl LocalCompactionRunner { - pub fn new( - runtime: Arc, - config: &Config, - sst_factory: FactoryRef, - store_picker: ObjectStorePickerRef, - sst_meta_cache: Option, - ) -> Self { - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: MAX_RECORD_BATCHES_IN_FLIGHT_WHEN_COMPACTION_READ, - num_streams_to_prefetch: config.num_streams_to_prefetch, - }; - - Self { - runtime, - scan_options, - sst_factory, - store_picker, - sst_meta_cache, - } - } -} - -#[async_trait] -impl CompactionRunner for LocalCompactionRunner { - async fn run(&self, task: CompactionRunnerTask) -> Result { - let projected_schema = ProjectedSchema::no_projection(task.schema.clone()); - let predicate = Arc::new(Predicate::empty()); - let sst_read_options_builder = SstReadOptionsBuilder::new( - ScanType::Compaction, - self.scan_options.clone(), - None, - task.input_ctx.num_rows_per_row_group, - predicate, - self.sst_meta_cache.clone(), - self.runtime.clone(), - ); - let fetched_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); - let fetched_schema = fetched_schema.into_record_schema(); - let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, Some(primary_key_indexes)); - - let request_id = task.request_id; - let merge_iter = { - let mut builder = MergeBuilder::new(MergeConfig { - request_id: request_id.clone(), - metrics_collector: None, - // no need to set deadline for compaction - deadline: None, - space_id: task.space_id, - table_id: task.table_id, - sequence: task.sequence, - projected_schema, - predicate: Arc::new(Predicate::empty()), - sst_read_options_builder: sst_read_options_builder.clone(), - sst_factory: &self.sst_factory, - store_picker: &self.store_picker, - merge_iter_options: task.input_ctx.merge_iter_options.clone(), - need_dedup: task.input_ctx.need_dedup, - reverse: false, - }); - // Add all ssts in compaction input to builder. - builder - .mut_ssts_of_level(task.input_ctx.files.level) - .extend_from_slice(&task.input_ctx.files.files); - builder.build().await.context(BuildMergeIterator { - msg: format!("table_id:{}, space_id:{}", task.table_id, task.space_id), - })? - }; - - let record_batch_stream = if task.input_ctx.need_dedup { - row_iter::record_batch_with_key_iter_to_stream(DedupIterator::new( - request_id.clone(), - merge_iter, - task.input_ctx.merge_iter_options, - )) - } else { - row_iter::record_batch_with_key_iter_to_stream(merge_iter) - }; - - // TODO: eliminate the duplicated building of `SstReadOptions`. - let sst_read_options = sst_read_options_builder.build(row_projector_builder); - let (sst_meta, column_stats) = { - let meta_reader = SstMetaReader { - space_id: task.space_id, - table_id: task.table_id, - factory: self.sst_factory.clone(), - read_opts: sst_read_options, - store_picker: self.store_picker.clone(), - }; - let sst_metas = meta_reader - .fetch_metas(&task.input_ctx.files.files) - .await - .context(ReadSstMeta)?; - - let column_stats = collect_column_stats_from_meta_datas(&sst_metas); - let merged_meta = - MetaData::merge(sst_metas.into_iter().map(MetaData::from), task.schema); - (merged_meta, column_stats) - }; - - let sst_write_options = SstWriteOptions { - storage_format_hint: task.output_ctx.write_options.storage_format_hint, - num_rows_per_row_group: task.output_ctx.write_options.num_rows_per_row_group, - compression: task.output_ctx.write_options.compression, - max_buffer_size: task.output_ctx.write_options.max_buffer_size, - column_stats, - }; - - let mut sst_writer = self - .sst_factory - .create_writer( - &sst_write_options, - &task.output_ctx.file_path, - &self.store_picker, - task.input_ctx.files.output_level, - ) - .await - .context(CreateSstWriter { - storage_format_hint: task.output_ctx.write_options.storage_format_hint, - })?; - - let sst_info = sst_writer - .write(request_id, &sst_meta, record_batch_stream) - .await - .box_err() - .with_context(|| WriteSst { - path: task.output_ctx.file_path.to_string(), - })?; - - Ok(CompactionRunnerResult { - sst_info, - sst_meta, - output_file_path: task.output_ctx.file_path.clone(), - }) - } -} - -/// Collect the column stats from a batch of sst meta data. -fn collect_column_stats_from_meta_datas(metas: &[SstMetaData]) -> HashMap { - let mut low_cardinality_counts: HashMap = HashMap::new(); - for meta_data in metas { - let SstMetaData::Parquet(meta_data) = meta_data; - if let Some(column_values) = &meta_data.column_values { - for (col_idx, val_set) in column_values.iter().enumerate() { - let low_cardinality = val_set.is_some(); - if low_cardinality { - let col_name = meta_data.schema.column(col_idx).name.clone(); - low_cardinality_counts - .entry(col_name) - .and_modify(|v| *v += 1) - .or_insert(1); - } - } - } - } - - // Only the column whose cardinality is low in all the metas is a - // low-cardinality column. - // TODO: shall we merge all the distinct values of the column to check whether - // the cardinality is still thought to be low? - let low_cardinality_cols = low_cardinality_counts - .into_iter() - .filter_map(|(col_name, cnt)| { - (cnt == metas.len()).then_some(( - col_name, - ColumnStats { - low_cardinality: true, - }, - )) - }); - HashMap::from_iter(low_cardinality_cols) -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use bytes_ext::Bytes; - use common_types::{schema::Schema, tests::build_schema, time::TimeRange}; - - use crate::{ - compaction::runner::local_runner::collect_column_stats_from_meta_datas, - sst::{ - meta_data::SstMetaData, - parquet::meta_data::{ColumnValueSet, ParquetMetaData}, - }, - }; - - fn check_collect_column_stats( - schema: &Schema, - expected_low_cardinality_col_indexes: Vec, - meta_datas: Vec, - ) { - let column_stats = collect_column_stats_from_meta_datas(&meta_datas); - assert_eq!( - column_stats.len(), - expected_low_cardinality_col_indexes.len() - ); - - for col_idx in expected_low_cardinality_col_indexes { - let col_schema = schema.column(col_idx); - assert!(column_stats.contains_key(&col_schema.name)); - } - } - - #[test] - fn test_collect_column_stats_from_metadata() { - let schema = build_schema(); - let build_meta_data = |low_cardinality_col_indexes: Vec| { - let mut column_values = vec![None; 6]; - for idx in low_cardinality_col_indexes { - column_values[idx] = Some(ColumnValueSet::StringValue(Default::default())); - } - let parquet_meta_data = ParquetMetaData { - min_key: Bytes::new(), - max_key: Bytes::new(), - time_range: TimeRange::empty(), - max_sequence: 0, - schema: schema.clone(), - parquet_filter: None, - column_values: Some(column_values), - }; - SstMetaData::Parquet(Arc::new(parquet_meta_data)) - }; - - // Normal case 0 - let meta_datas = vec![ - build_meta_data(vec![0]), - build_meta_data(vec![0]), - build_meta_data(vec![0, 1]), - build_meta_data(vec![0, 2]), - build_meta_data(vec![0, 3]), - ]; - check_collect_column_stats(&schema, vec![0], meta_datas); - - // Normal case 1 - let meta_datas = vec![ - build_meta_data(vec![0]), - build_meta_data(vec![0]), - build_meta_data(vec![]), - build_meta_data(vec![1]), - build_meta_data(vec![3]), - ]; - check_collect_column_stats(&schema, vec![], meta_datas); - - // Normal case 2 - let meta_datas = vec![ - build_meta_data(vec![3, 5]), - build_meta_data(vec![0, 3, 5]), - build_meta_data(vec![0, 1, 2, 3, 5]), - build_meta_data(vec![1, 3, 5]), - ]; - check_collect_column_stats(&schema, vec![3, 5], meta_datas); - } -} diff --git a/src/analytic_engine/src/compaction/runner/mod.rs b/src/analytic_engine/src/compaction/runner/mod.rs deleted file mode 100644 index c8e34484cc..0000000000 --- a/src/analytic_engine/src/compaction/runner/mod.rs +++ /dev/null @@ -1,377 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod local_runner; -pub mod node_picker; -mod remote_client; -pub mod remote_runner; - -use std::sync::Arc; - -use async_trait::async_trait; -use common_types::{request_id::RequestId, schema::Schema, SequenceNumber}; -use generic_error::{BoxError, GenericError}; -use macros::define_result; -use object_store::Path; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::table::TableId; - -use crate::{ - compaction::CompactionInputFiles, - instance::flush_compaction, - row_iter::IterOptions, - space::SpaceId, - sst::{ - factory::SstWriteOptions, - writer::{MetaData, SstInfo}, - }, - table::data::TableData, -}; - -/// Compaction runner -#[async_trait] -pub trait CompactionRunner: Send + Sync + 'static { - async fn run( - &self, - task: CompactionRunnerTask, - ) -> flush_compaction::Result; -} - -pub type CompactionRunnerPtr = Box; -pub type CompactionRunnerRef = Arc; - -#[derive(Debug, Snafu)] -#[snafu(visibility = "pub")] -pub enum Error { - #[snafu(display("Empty table schema.\nBacktrace:\n{}", backtrace))] - EmptyTableSchema { backtrace: Backtrace }, - - #[snafu(display("Empty input context.\nBacktrace:\n{}", backtrace))] - EmptyInputContext { backtrace: Backtrace }, - - #[snafu(display("Empty ouput context.\nBacktrace:\n{}", backtrace))] - EmptyOuputContext { backtrace: Backtrace }, - - #[snafu(display("Empty compaction input files.\nBacktrace:\n{}", backtrace))] - EmptyCompactionInputFiles { backtrace: Backtrace }, - - #[snafu(display("Empty write options.\nBacktrace:\n{}", backtrace))] - EmptySstWriteOptions { backtrace: Backtrace }, - - #[snafu(display("Sst meta data is empty.\nBacktrace:\n{backtrace}"))] - EmptySstMeta { backtrace: Backtrace }, - - #[snafu(display("Empty sst info.\nBacktrace:\n{}", backtrace))] - EmptySstInfo { backtrace: Backtrace }, - - #[snafu(display("Empty compaction task exec result.\nBacktrace:\n{}", backtrace))] - EmptyExecResult { backtrace: Backtrace }, - - #[snafu(display("Failed to convert table schema, err:{}", source))] - ConvertTableSchema { source: GenericError }, - - #[snafu(display("Failed to convert input context, err:{}", source))] - ConvertInputContext { source: GenericError }, - - #[snafu(display("Failed to convert ouput context, err:{}", source))] - ConvertOuputContext { source: GenericError }, - - #[snafu(display("Failed to convert compaction input files, err:{}", source))] - ConvertCompactionInputFiles { source: GenericError }, - - #[snafu(display("Failed to convert write options, err:{}", source))] - ConvertSstWriteOptions { source: GenericError }, - - #[snafu(display("Failed to convert sst info, err:{}", source))] - ConvertSstInfo { source: GenericError }, - - #[snafu(display("Failed to convert sst meta, err:{}", source))] - ConvertSstMeta { source: GenericError }, - - #[snafu(display("Failed to connect the service endpoint:{}, err:{}", addr, source,))] - FailConnect { addr: String, source: GenericError }, - - #[snafu(display("Failed to execute compaction task, err:{}", source))] - FailExecuteCompactionTask { source: GenericError }, - - #[snafu(display("Missing header in rpc response.\nBacktrace:\n{}", backtrace))] - MissingHeader { backtrace: Backtrace }, - - #[snafu(display( - "Bad response, resp code:{}, msg:{}.\nBacktrace:\n{}", - code, - msg, - backtrace - ))] - BadResponse { - code: u32, - msg: String, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -/// Compaction runner task -#[derive(Debug, Clone)] -pub struct CompactionRunnerTask { - // TODO: unused now, will be used in remote compaction. - #[allow(unused)] - pub task_key: String, - /// Trace id for this operation - pub request_id: RequestId, - - pub schema: Schema, - pub space_id: SpaceId, - pub table_id: TableId, - pub sequence: SequenceNumber, - - /// Input context - pub input_ctx: InputContext, - /// Output context - pub output_ctx: OutputContext, -} - -impl CompactionRunnerTask { - pub(crate) fn new( - request_id: RequestId, - input_files: CompactionInputFiles, - table_data: &TableData, - file_id: u64, - sst_write_options: SstWriteOptions, - ) -> Self { - // Create task key. - let task_key = table_data.compaction_task_key(file_id); - - // Create executor task. - let table_options = table_data.table_options(); - - let input_ctx = { - let iter_options = IterOptions { - batch_size: table_options.num_rows_per_row_group, - }; - - InputContext { - files: input_files, - num_rows_per_row_group: table_options.num_rows_per_row_group, - merge_iter_options: iter_options, - need_dedup: table_options.need_dedup(), - } - }; - - let output_ctx = { - let file_path = table_data.sst_file_path(file_id); - OutputContext { - file_path, - write_options: sst_write_options, - } - }; - - Self { - task_key, - request_id, - schema: table_data.schema(), - space_id: table_data.space_id, - table_id: table_data.id, - sequence: table_data.last_sequence(), - input_ctx, - output_ctx, - } - } -} - -impl TryFrom - for CompactionRunnerTask -{ - type Error = Error; - - fn try_from( - request: horaedbproto::compaction_service::ExecuteCompactionTaskRequest, - ) -> Result { - let task_key = request.task_key; - let request_id: RequestId = request.request_id.into(); - - let schema: Schema = request - .schema - .context(EmptyTableSchema)? - .try_into() - .box_err() - .context(ConvertTableSchema)?; - - let space_id: SpaceId = request.space_id; - let table_id: TableId = request.table_id.into(); - let sequence: SequenceNumber = request.sequence; - - let input_ctx: InputContext = request - .input_ctx - .context(EmptyInputContext)? - .try_into() - .box_err() - .context(ConvertInputContext)?; - - let output_ctx: OutputContext = request - .output_ctx - .context(EmptyOuputContext)? - .try_into() - .box_err() - .context(ConvertOuputContext)?; - - Ok(Self { - task_key, - request_id, - schema, - space_id, - table_id, - sequence, - input_ctx, - output_ctx, - }) - } -} - -impl From for horaedbproto::compaction_service::ExecuteCompactionTaskRequest { - fn from(task: CompactionRunnerTask) -> Self { - Self { - task_key: task.task_key, - request_id: task.request_id.into(), - schema: Some((&(task.schema)).into()), - space_id: task.space_id, - table_id: task.table_id.into(), - sequence: task.sequence, - input_ctx: Some(task.input_ctx.into()), - output_ctx: Some(task.output_ctx.into()), - } - } -} - -pub struct CompactionRunnerResult { - pub output_file_path: Path, - pub sst_info: SstInfo, - pub sst_meta: MetaData, -} - -impl TryFrom - for CompactionRunnerResult -{ - type Error = Error; - - fn try_from( - resp: horaedbproto::compaction_service::ExecuteCompactionTaskResponse, - ) -> Result { - let res = resp.result.context(EmptyExecResult)?; - let sst_info = res - .sst_info - .context(EmptySstInfo)? - .try_into() - .box_err() - .context(ConvertSstInfo)?; - let sst_meta = res - .sst_meta - .context(EmptySstMeta)? - .try_into() - .box_err() - .context(ConvertSstMeta)?; - - Ok(Self { - output_file_path: res.output_file_path.into(), - sst_info, - sst_meta, - }) - } -} - -#[derive(Debug, Clone)] -pub struct InputContext { - /// Input sst files in this compaction - pub files: CompactionInputFiles, - pub num_rows_per_row_group: usize, - pub merge_iter_options: IterOptions, - pub need_dedup: bool, -} - -impl TryFrom for InputContext { - type Error = Error; - - fn try_from(value: horaedbproto::compaction_service::InputContext) -> Result { - let num_rows_per_row_group: usize = value.num_rows_per_row_group as usize; - let merge_iter_options = IterOptions { - batch_size: value.merge_iter_options as usize, - }; - let need_dedup = value.need_dedup; - - let files: CompactionInputFiles = value - .files - .context(EmptyCompactionInputFiles)? - .try_into() - .box_err() - .context(ConvertCompactionInputFiles)?; - - Ok(InputContext { - files, - num_rows_per_row_group, - merge_iter_options, - need_dedup, - }) - } -} - -impl From for horaedbproto::compaction_service::InputContext { - fn from(value: InputContext) -> Self { - Self { - files: Some(value.files.into()), - num_rows_per_row_group: value.num_rows_per_row_group as u64, - merge_iter_options: value.merge_iter_options.batch_size as u64, - need_dedup: value.need_dedup, - } - } -} - -#[derive(Debug, Clone)] -pub struct OutputContext { - /// Output sst file path - pub file_path: Path, - /// Output sst write context - pub write_options: SstWriteOptions, -} - -impl TryFrom for OutputContext { - type Error = Error; - - fn try_from(value: horaedbproto::compaction_service::OutputContext) -> Result { - let file_path: Path = value.file_path.into(); - let write_options: SstWriteOptions = value - .write_options - .context(EmptySstWriteOptions)? - .try_into() - .box_err() - .context(ConvertSstWriteOptions)?; - - Ok(OutputContext { - file_path, - write_options, - }) - } -} - -impl From for horaedbproto::compaction_service::OutputContext { - fn from(value: OutputContext) -> Self { - Self { - file_path: value.file_path.into(), - write_options: Some(value.write_options.into()), - } - } -} diff --git a/src/analytic_engine/src/compaction/runner/node_picker.rs b/src/analytic_engine/src/compaction/runner/node_picker.rs deleted file mode 100644 index bf21787c71..0000000000 --- a/src/analytic_engine/src/compaction/runner/node_picker.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Remote compaction node picker. - -use std::sync::Arc; - -use async_trait::async_trait; -use macros::define_result; -use meta_client::{types::FetchCompactionNodeRequest, MetaClientRef}; -use serde::{Deserialize, Serialize}; -use snafu::{ResultExt, Snafu}; - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(tag = "node_picker", content = "endpoint")] -pub enum NodePicker { - // Local node picker that specifies the local endpoint. - // The endpoint in the form `addr:port`. - Local(String), - Remote, -} - -#[async_trait] -pub trait CompactionNodePicker: Send + Sync { - /// Get the addr of the remote compaction node. - async fn get_compaction_node(&self) -> Result; -} - -pub type RemoteCompactionNodePickerRef = Arc; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Meta client fetch compaciton node failed, err:{source}."))] - FetchCompactionNodeFailure { source: meta_client::Error }, -} - -define_result!(Error); - -/// RemoteCompactionNodePickerImpl is an implementation of -/// [`CompactionNodePicker`] based [`MetaClient`]. -pub struct RemoteCompactionNodePickerImpl { - pub meta_client: MetaClientRef, -} - -#[async_trait] -impl CompactionNodePicker for RemoteCompactionNodePickerImpl { - /// Get proper remote compaction node info for compaction offload with meta - /// client. - async fn get_compaction_node(&self) -> Result { - let req = FetchCompactionNodeRequest::default(); - let resp = self - .meta_client - .fetch_compaction_node(req) - .await - .context(FetchCompactionNodeFailure)?; - - let compaction_node_addr = resp.endpoint; - Ok(compaction_node_addr) - } -} - -/// LocalCompactionNodePickerImpl is an implementation of -/// [`CompactionNodePicker`] mainly used for testing. -pub struct LocalCompactionNodePickerImpl { - pub endpoint: String, -} - -#[async_trait] -impl CompactionNodePicker for LocalCompactionNodePickerImpl { - /// Return the local addr and port of grpc service. - async fn get_compaction_node(&self) -> Result { - Ok(self.endpoint.clone()) - } -} diff --git a/src/analytic_engine/src/compaction/runner/remote_client.rs b/src/analytic_engine/src/compaction/runner/remote_client.rs deleted file mode 100644 index cf1f69be4a..0000000000 --- a/src/analytic_engine/src/compaction/runner/remote_client.rs +++ /dev/null @@ -1,148 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use async_trait::async_trait; -use generic_error::BoxError; -use horaedbproto::{ - common::ResponseHeader, compaction_service::compaction_service_client::CompactionServiceClient, -}; -use logger::info; -use serde::{Deserialize, Serialize}; -use snafu::{OptionExt, ResultExt}; -use time_ext::ReadableDuration; - -use crate::compaction::runner::{ - BadResponse, FailConnect, FailExecuteCompactionTask, MissingHeader, Result, -}; - -type CompactionServiceGrpcClient = CompactionServiceClient; - -#[derive(Debug, Deserialize, Clone, Serialize)] -#[serde(default)] -pub struct CompactionClientConfig { - pub compaction_server_addr: String, - pub timeout: ReadableDuration, -} - -impl Default for CompactionClientConfig { - fn default() -> Self { - Self { - compaction_server_addr: "127.0.0.1:7878".to_string(), - timeout: ReadableDuration::secs(5), - } - } -} - -/// CompactionClient is the abstraction of client used for HoraeDB to -/// communicate with CompactionServer cluster. -#[async_trait] -pub trait CompactionClient: Send + Sync { - async fn execute_compaction_task( - &self, - req: horaedbproto::compaction_service::ExecuteCompactionTaskRequest, - ) -> Result; -} - -pub type CompactionClientRef = Arc; - -/// Default compaction client impl, will interact with the remote compaction -/// node. -pub struct CompactionClientImpl { - client: CompactionServiceGrpcClient, -} - -impl CompactionClientImpl { - pub async fn connect(config: CompactionClientConfig) -> Result { - let client = { - let endpoint = - tonic::transport::Endpoint::from_shared(config.compaction_server_addr.to_string()) - .box_err() - .context(FailConnect { - addr: &config.compaction_server_addr, - })? - .timeout(config.timeout.0); - CompactionServiceGrpcClient::connect(endpoint) - .await - .box_err() - .context(FailConnect { - addr: &config.compaction_server_addr, - })? - }; - - Ok(Self { client }) - } - - #[inline] - fn client(&self) -> CompactionServiceGrpcClient { - self.client.clone() - } -} - -#[async_trait] -impl CompactionClient for CompactionClientImpl { - async fn execute_compaction_task( - &self, - pb_req: horaedbproto::compaction_service::ExecuteCompactionTaskRequest, - ) -> Result { - // TODO(leslie): Add request header for ExecuteCompactionTaskRequest. - - info!( - "Compaction client try to execute compaction task in remote compaction node, req:{:?}", - pb_req - ); - - let pb_resp = self - .client() - .execute_compaction_task(pb_req) - .await - .box_err() - .context(FailExecuteCompactionTask)? - .into_inner(); - - info!( - "Compaction client finish executing compaction task in remote compaction node, req:{:?}", - pb_resp - ); - - check_response_header(&pb_resp.header)?; - Ok(pb_resp) - } -} - -// TODO(leslie): Consider to refactor and reuse the similar function in -// meta_client. -fn check_response_header(header: &Option) -> Result<()> { - let header = header.as_ref().context(MissingHeader)?; - if header.code == 0 { - Ok(()) - } else { - BadResponse { - code: header.code, - msg: header.error.clone(), - } - .fail() - } -} - -pub async fn build_compaction_client( - config: CompactionClientConfig, -) -> Result { - let compaction_client = CompactionClientImpl::connect(config).await?; - Ok(Arc::new(compaction_client)) -} diff --git a/src/analytic_engine/src/compaction/runner/remote_runner.rs b/src/analytic_engine/src/compaction/runner/remote_runner.rs deleted file mode 100644 index 59a70c2fc2..0000000000 --- a/src/analytic_engine/src/compaction/runner/remote_runner.rs +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use async_trait::async_trait; -use generic_error::BoxError; -use logger::info; -use snafu::ResultExt; - -use super::{local_runner::LocalCompactionRunner, node_picker::RemoteCompactionNodePickerRef}; -use crate::{ - compaction::runner::{ - remote_client::{build_compaction_client, CompactionClientConfig, CompactionClientRef}, - CompactionRunner, CompactionRunnerResult, CompactionRunnerTask, - }, - instance::flush_compaction::{ - self, BuildCompactionClientFailed, ConvertCompactionTaskResponse, - GetCompactionClientFailed, PickCompactionNodeFailed, Result, - }, -}; - -pub struct RemoteCompactionRunner { - pub node_picker: RemoteCompactionNodePickerRef, - - pub fallback_local_when_failed: bool, - /// Responsible for executing compaction task locally if fail to remote - /// compact when `fallback_local_when_failed` is true, used for better fault - /// tolerance. - pub local_compaction_runner: LocalCompactionRunner, -} - -impl RemoteCompactionRunner { - async fn get_compaction_client(&self) -> Result { - let mut config = CompactionClientConfig::default(); - let endpoint = self - .node_picker - .get_compaction_node() - .await - .context(PickCompactionNodeFailed)?; - config.compaction_server_addr = make_formatted_endpoint(&endpoint); - - let client = build_compaction_client(config) - .await - .context(BuildCompactionClientFailed)?; - Ok(client) - } - - async fn local_compact(&self, task: CompactionRunnerTask) -> Result { - self.local_compaction_runner.run(task).await - } -} - -#[async_trait] -impl CompactionRunner for RemoteCompactionRunner { - /// Run the compaction task either on a remote node or fall back to local - /// compaction. - async fn run(&self, task: CompactionRunnerTask) -> Result { - let client = self - .get_compaction_client() - .await - .box_err() - .context(GetCompactionClientFailed); - - let pb_resp = match client { - Ok(client) => match client.execute_compaction_task(task.clone().into()).await { - Ok(resp) => resp, - Err(e) => { - if !self.fallback_local_when_failed { - return Err(flush_compaction::Error::RemoteCompactFailed { source: e }); - } - - info!( - "The compaction task falls back to local because of error:{}", - e - ); - return self.local_compact(task).await; - } - }, - Err(e) => { - if !self.fallback_local_when_failed { - return Err(e); - } - - info!( - "The compaction task falls back to local because of error:{}", - e - ); - return self.local_compact(task).await; - } - }; - - let resp = pb_resp - .try_into() - .box_err() - .context(ConvertCompactionTaskResponse)?; - - Ok(resp) - } -} - -fn make_formatted_endpoint(endpoint: &str) -> String { - format!("http://{endpoint}") -} diff --git a/src/analytic_engine/src/compaction/scheduler.rs b/src/analytic_engine/src/compaction/scheduler.rs deleted file mode 100644 index 215bd0ac94..0000000000 --- a/src/analytic_engine/src/compaction/scheduler.rs +++ /dev/null @@ -1,822 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Compaction scheduler. - -use std::{ - collections::{HashMap, VecDeque}, - hash::Hash, - sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, RwLock, - }, - time::Duration, -}; - -use async_trait::async_trait; -use common_types::request_id::RequestId; -use futures::{stream::FuturesUnordered, StreamExt}; -use logger::{debug, error, info, warn}; -use macros::define_result; -use runtime::{JoinHandle, Runtime}; -use serde::{Deserialize, Serialize}; -use size_ext::ReadableSize; -use snafu::{ResultExt, Snafu}; -use table_engine::table::TableId; -use time_ext::{DurationExt, ReadableDuration}; -use tokio::{ - sync::{ - mpsc::{self, error::TrySendError, Receiver, Sender}, - Mutex, - }, - time, -}; - -use crate::{ - compaction::{ - compactor::Compactor, metrics::COMPACTION_PENDING_REQUEST_GAUGE, picker::PickerContext, - runner::CompactionRunnerPtr, CompactionTask, PickerManager, TableCompactionRequest, - WaitError, WaiterNotifier, - }, - instance::{ - flush_compaction::{Flusher, TableFlushOptions}, - SpaceStore, - }, - sst::factory::SstWriteOptions, - table::data::TableDataRef, - TableOptions, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to join compaction schedule worker, err:{}", source))] - JoinWorker { source: runtime::Error }, -} - -define_result!(Error); - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -pub struct SchedulerConfig { - pub schedule_channel_len: usize, - pub schedule_interval: ReadableDuration, - pub max_ongoing_tasks: usize, - pub max_unflushed_duration: ReadableDuration, - pub memory_limit: ReadableSize, - pub max_pending_compaction_tasks: usize, -} - -impl Default for SchedulerConfig { - fn default() -> Self { - Self { - schedule_channel_len: 16, - // 30 seconds schedule interval. - schedule_interval: ReadableDuration(Duration::from_secs(30)), - max_ongoing_tasks: 8, - // flush_interval default is 5h. - max_unflushed_duration: ReadableDuration(Duration::from_secs(60 * 60 * 5)), - memory_limit: ReadableSize::gb(4), - max_pending_compaction_tasks: 1024, - } - } -} - -enum ScheduleTask { - Request(TableCompactionRequest), - Schedule, - Exit, -} - -#[async_trait] -pub trait CompactionScheduler { - /// Stop the scheduler. - async fn stop_scheduler(&self) -> Result<()>; - - /// Schedule a compaction job to background workers. - async fn schedule_table_compaction(&self, request: TableCompactionRequest) -> bool; -} - -// A FIFO queue that remove duplicate values by key. -struct RequestQueue { - keys: VecDeque, - values: HashMap, -} - -impl Default for RequestQueue { - fn default() -> Self { - Self { - keys: VecDeque::default(), - values: HashMap::default(), - } - } -} - -impl RequestQueue { - fn push_back(&mut self, key: K, value: V) -> bool { - if self.values.insert(key.clone(), value).is_none() { - self.keys.push_back(key); - return true; - } - false - } - - fn pop_front(&mut self) -> Option { - if let Some(key) = self.keys.pop_front() { - return self.values.remove(&key); - } - None - } - - #[inline] - fn len(&self) -> usize { - self.values.len() - } - - #[inline] - fn is_empty(&self) -> bool { - self.values.is_empty() - } -} - -type RequestBuf = RwLock>; - -/// Combined with [`MemoryUsageToken`], [`MemoryLimit`] provides a mechanism to -/// impose limit on the memory usage. -#[derive(Clone, Debug)] -struct MemoryLimit { - usage: Arc, - // TODO: support to adjust this threshold dynamically. - limit: usize, -} - -/// The token for the memory usage, which should not derive Clone. -/// The applied memory will be subtracted from the global memory usage. -#[derive(Debug)] -struct MemoryUsageToken { - global_usage: Arc, - applied_usage: usize, -} - -impl Drop for MemoryUsageToken { - fn drop(&mut self) { - self.global_usage - .fetch_sub(self.applied_usage, Ordering::Relaxed); - } -} - -impl MemoryLimit { - fn new(limit: usize) -> Self { - Self { - usage: Arc::new(AtomicUsize::new(0)), - limit, - } - } - - /// Try to apply a token if possible. - fn try_apply_token(&self, bytes: usize) -> Option { - let token = self.apply_token(bytes); - if self.is_exceeded() { - None - } else { - Some(token) - } - } - - fn apply_token(&self, bytes: usize) -> MemoryUsageToken { - self.usage.fetch_add(bytes, Ordering::Relaxed); - - MemoryUsageToken { - global_usage: self.usage.clone(), - applied_usage: bytes, - } - } - - #[inline] - fn is_exceeded(&self) -> bool { - self.usage.load(Ordering::Relaxed) > self.limit - } -} - -struct OngoingTaskLimit { - ongoing_tasks: AtomicUsize, - /// Buffer to hold pending requests - request_buf: RequestBuf, - max_pending_compaction_tasks: usize, -} - -impl OngoingTaskLimit { - #[inline] - fn start_task(&self) { - self.ongoing_tasks.fetch_add(1, Ordering::SeqCst); - } - - #[inline] - fn finish_task(&self) { - self.ongoing_tasks.fetch_sub(1, Ordering::SeqCst); - } - - #[inline] - fn pending_task_size(&self) -> usize { - let buf = self.request_buf.read().unwrap(); - buf.len() - } - - #[inline] - fn add_request(&self, request: TableCompactionRequest) { - let mut dropped = 0; - - { - let mut req_buf = self.request_buf.write().unwrap(); - - // Remove older requests - if req_buf.len() >= self.max_pending_compaction_tasks { - while req_buf.len() >= self.max_pending_compaction_tasks { - req_buf.pop_front(); - dropped += 1; - } - COMPACTION_PENDING_REQUEST_GAUGE.sub(dropped) - } - - if req_buf.push_back(request.table_data.id, request) { - COMPACTION_PENDING_REQUEST_GAUGE.add(1) - } - } - - if dropped > 0 { - warn!( - "Too many compaction pending tasks, limit:{}, dropped:{}.", - self.max_pending_compaction_tasks, dropped, - ); - } - } - - fn drain_requests(&self, max_num: usize) -> Vec { - let mut result = Vec::with_capacity(max_num); - let mut req_buf = self.request_buf.write().unwrap(); - - while result.len() < max_num { - if let Some(req) = req_buf.pop_front() { - result.push(req); - } else { - break; - } - } - COMPACTION_PENDING_REQUEST_GAUGE.sub(result.len() as i64); - - result - } - - #[inline] - fn has_pending_requests(&self) -> bool { - !self.request_buf.read().unwrap().is_empty() - } - - #[inline] - fn request_buf_len(&self) -> usize { - self.request_buf.read().unwrap().len() - } - - #[inline] - fn ongoing_tasks(&self) -> usize { - self.ongoing_tasks.load(Ordering::SeqCst) - } -} - -pub type CompactionSchedulerRef = Arc; - -pub struct SchedulerImpl { - sender: Sender, - running: Arc, - handle: Mutex>, -} - -impl SchedulerImpl { - pub fn new( - space_store: Arc, - runner: CompactionRunnerPtr, - runtime: Arc, - config: SchedulerConfig, - write_sst_max_buffer_size: usize, - min_flush_interval_ms: u64, - ) -> Self { - let (tx, rx) = mpsc::channel(config.schedule_channel_len); - let running = Arc::new(AtomicBool::new(true)); - - let compactor = Arc::new(Compactor::new(runner, space_store.manifest.clone())); - let mut worker = ScheduleWorker { - sender: tx.clone(), - receiver: rx, - compactor, - space_store, - runtime: runtime.clone(), - schedule_interval: config.schedule_interval.0, - picker_manager: PickerManager, - max_ongoing_tasks: config.max_ongoing_tasks, - max_unflushed_duration: config.max_unflushed_duration.0, - write_sst_max_buffer_size, - min_flush_interval_ms, - limit: Arc::new(OngoingTaskLimit { - ongoing_tasks: AtomicUsize::new(0), - request_buf: RwLock::new(RequestQueue::default()), - max_pending_compaction_tasks: config.max_pending_compaction_tasks, - }), - running: running.clone(), - memory_limit: MemoryLimit::new(config.memory_limit.as_byte() as usize), - }; - - let handle = runtime.spawn(async move { - worker.schedule_loop().await; - }); - - Self { - sender: tx, - running, - handle: Mutex::new(handle), - } - } -} - -#[async_trait] -impl CompactionScheduler for SchedulerImpl { - async fn stop_scheduler(&self) -> Result<()> { - self.running.store(false, Ordering::Relaxed); - // Wake up the receiver, if the channel is full, the worker should be busy and - // check the running flag later. - let _ = self.sender.try_send(ScheduleTask::Exit); - - let mut handle = self.handle.lock().await; - (&mut *handle).await.context(JoinWorker)?; - - Ok(()) - } - - async fn schedule_table_compaction(&self, request: TableCompactionRequest) -> bool { - let send_res = self.sender.try_send(ScheduleTask::Request(request)); - - match send_res { - Err(TrySendError::Full(_)) => { - debug!("Compaction scheduler is busy, drop compaction request"); - false - } - Err(TrySendError::Closed(_)) => { - error!("Compaction scheduler is closed, drop compaction request"); - false - } - Ok(_) => true, - } - } -} - -struct OngoingTask { - limit: Arc, - sender: Sender, -} - -impl OngoingTask { - async fn schedule_worker_if_need(&self) { - if self.limit.has_pending_requests() { - if let Err(e) = self.sender.send(ScheduleTask::Schedule).await { - error!("Fail to schedule worker, err:{}", e); - } - } - } -} - -struct ScheduleWorker { - sender: Sender, - receiver: Receiver, - space_store: Arc, - compactor: Arc, - runtime: Arc, - schedule_interval: Duration, - max_unflushed_duration: Duration, - picker_manager: PickerManager, - max_ongoing_tasks: usize, - write_sst_max_buffer_size: usize, - min_flush_interval_ms: u64, - limit: Arc, - running: Arc, - memory_limit: MemoryLimit, -} - -#[inline] -async fn schedule_table_compaction(sender: Sender, request: TableCompactionRequest) { - if let Err(e) = sender.send(ScheduleTask::Request(request)).await { - error!("Fail to send table compaction request, err:{}", e); - } -} - -impl ScheduleWorker { - async fn schedule_loop(&mut self) { - while self.running.load(Ordering::Relaxed) { - // TODO(yingwen): Maybe add a random offset to the interval. - match time::timeout(self.schedule_interval, self.receiver.recv()).await { - Ok(Some(schedule_task)) => { - self.handle_schedule_task(schedule_task).await; - } - Ok(None) => { - // The channel is disconnected. - info!("Channel disconnected, compaction schedule worker exit"); - break; - } - Err(_) => { - // Timeout. - info!("Periodical compaction schedule start"); - - self.schedule().await; - - info!("Periodical compaction schedule end"); - } - } - } - - info!("Compaction schedule loop exit"); - } - - // This function is called sequentially, so we can mark files in compaction - // without race. - async fn handle_schedule_task(&self, schedule_task: ScheduleTask) { - let ongoing = self.limit.ongoing_tasks(); - match schedule_task { - ScheduleTask::Request(compact_req) => { - debug!("Ongoing compaction tasks:{ongoing}"); - if ongoing >= self.max_ongoing_tasks { - self.limit.add_request(compact_req); - warn!( - "Too many compaction ongoing tasks:{ongoing}, max:{}, buf_len:{}", - self.max_ongoing_tasks, - self.limit.request_buf_len() - ); - } else { - self.handle_table_compaction_request(compact_req).await; - } - } - ScheduleTask::Schedule => { - if self.max_ongoing_tasks > ongoing { - let pending = self.limit.drain_requests(self.max_ongoing_tasks - ongoing); - let mut futures: FuturesUnordered<_> = pending - .into_iter() - .map(|req| self.handle_table_compaction_request(req)) - .collect(); - - debug!("Scheduled {} pending compaction tasks.", futures.len()); - while (futures.next().await).is_some() {} - } else { - warn!( - "Too many compaction ongoing tasks:{ongoing}, max:{}, buf_len:{}", - self.max_ongoing_tasks, - self.limit.request_buf_len() - ); - } - } - ScheduleTask::Exit => (), - }; - } - - fn do_table_compaction_task( - &self, - table_data: TableDataRef, - compaction_task: CompactionTask, - waiter_notifier: WaiterNotifier, - token: MemoryUsageToken, - ) { - let keep_scheduling_compaction = - self.is_pending_queue_hungry() && compaction_task.contains_min_level(); - - let compactor = self.compactor.clone(); - self.limit.start_task(); - let task = OngoingTask { - sender: self.sender.clone(), - limit: self.limit.clone(), - }; - - let sender = self.sender.clone(); - let request_id = RequestId::next_id(); - let storage_format_hint = table_data.table_options().storage_format_hint; - let sst_write_options = SstWriteOptions { - storage_format_hint, - num_rows_per_row_group: table_data.table_options().num_rows_per_row_group, - compression: table_data.table_options().compression, - max_buffer_size: self.write_sst_max_buffer_size, - column_stats: Default::default(), - }; - - // Do actual costly compact job in background. - self.runtime.spawn(async move { - // Release the token after compaction finished. - let _token = token; - - // We will reschedule table with many l0 sst as fast as we can. - if keep_scheduling_compaction { - schedule_table_compaction( - sender, - TableCompactionRequest::no_waiter(table_data.clone()), - ) - .await; - } - let res = compactor - .compact_table( - request_id.clone(), - &table_data, - &compaction_task, - &sst_write_options, - ) - .await; - - task.limit.finish_task(); - task.schedule_worker_if_need().await; - - // Notify the background compact table result. - match res { - Ok(()) => { - waiter_notifier.notify_wait_result(Ok(())); - } - Err(e) => { - error!("Failed to compact table, table_name:{}, table_id:{}, request_id:{request_id}, err:{e}", table_data.name, table_data.id); - let e = Arc::new(e); - - let wait_err = WaitError::Compaction { source: e }; - waiter_notifier.notify_wait_result(Err(wait_err)); - } - } - }); - } - - // Try to apply the memory usage token. Return `None` if the current memory - // usage exceeds the limit. - fn try_apply_memory_usage_token_for_task( - &self, - task: &CompactionTask, - ) -> Option { - let input_size = task.estimated_total_input_file_size(); - // Currently sst build is in a streaming way, so it wouldn't consume memory more - // than its size. - let estimate_memory_usage = input_size; - - let token = self.memory_limit.try_apply_token(estimate_memory_usage); - - debug!( - "Apply memory for compaction, current usage:{}, applied:{}, applied_result:{:?}", - self.memory_limit.usage.load(Ordering::Relaxed), - estimate_memory_usage, - token, - ); - - token - } - - async fn handle_table_compaction_request(&self, compact_req: TableCompactionRequest) { - let table_data = compact_req.table_data.clone(); - if !table_data.allow_compaction() { - error!( - "Table status is not ok, unable to compact further, table:{}, table_id:{}", - table_data.name, table_data.id - ); - return; - } - - let table_options = table_data.table_options(); - let compaction_strategy = table_options.compaction_strategy; - let picker = self.picker_manager.get_picker(compaction_strategy); - let picker_ctx = match new_picker_context(&table_options) { - Some(v) => v, - None => { - warn!("No valid context can be created, compaction request will be ignored, table_id:{}, table_name:{}", - table_data.id, table_data.name); - return; - } - }; - let version = table_data.current_version(); - - // Pick compaction task. - let compaction_task = version.pick_for_compaction(picker_ctx, &picker); - let compaction_task = match compaction_task { - Ok(v) => v, - Err(e) => { - error!( - "Compaction scheduler failed to pick compaction, table:{}, table_id:{}, err:{}", - table_data.name, table_data.id, e - ); - // Now the error of picking compaction is considered not fatal and not sent to - // compaction notifier. - return; - } - }; - - let token = match self.try_apply_memory_usage_token_for_task(&compaction_task) { - Some(v) => v, - None => { - // Memory usage exceeds the threshold, let's put pack the - // request. - warn!( - "Compaction task is ignored, because of high memory usage:{}, task:{:?}, table:{}", - self.memory_limit.usage.load(Ordering::Relaxed), - compaction_task, table_data.name - ); - return; - } - }; - - let waiter_notifier = WaiterNotifier::new(compact_req.waiter); - - self.do_table_compaction_task(table_data, compaction_task, waiter_notifier, token); - } - - async fn schedule(&mut self) { - self.compact_tables().await; - self.flush_tables().await; - } - - async fn compact_tables(&mut self) { - let mut tables_buf = Vec::new(); - self.space_store.list_all_tables(&mut tables_buf); - - let request_id = RequestId::next_id(); - for table_data in tables_buf { - info!( - "Period purge, table:{}, table_id:{}, request_id:{}", - table_data.name, table_data.id, request_id - ); - - // This will add a compaction request to queue and avoid schedule thread - // blocked. - self.limit - .add_request(TableCompactionRequest::no_waiter(table_data)); - } - if let Err(e) = self.sender.send(ScheduleTask::Schedule).await { - error!("Fail to schedule table compaction request, err:{}", e); - } - } - - async fn flush_tables(&self) { - let mut tables_buf = Vec::new(); - self.space_store.list_all_tables(&mut tables_buf); - let flusher = Flusher { - space_store: self.space_store.clone(), - runtime: self.runtime.clone(), - write_sst_max_buffer_size: self.write_sst_max_buffer_size, - min_flush_interval_ms: Some(self.min_flush_interval_ms), - }; - - for table_data in &tables_buf { - let last_flush_time = table_data.last_flush_time(); - let flush_deadline_ms = last_flush_time + self.max_unflushed_duration.as_millis_u64(); - let now_ms = time_ext::current_time_millis(); - if now_ms > flush_deadline_ms { - info!( - "Scheduled flush is triggered, table:{}, last_flush_time:{last_flush_time}ms, max_unflushed_duration:{:?}", - table_data.name, - self.max_unflushed_duration, - ); - - let mut serial_exec = table_data.serial_exec.lock().await; - let flush_scheduler = serial_exec.flush_scheduler(); - // Instance flush the table asynchronously. - if let Err(e) = flusher - .schedule_flush(flush_scheduler, table_data, TableFlushOptions::default()) - .await - { - error!("Failed to flush table, err:{}", e); - } - } - } - } - - fn is_pending_queue_hungry(&self) -> bool { - // TODO: Currently we consider pending queue is hungry when number of pending - // tasks is less than `max_ongoing_tasks`, maybe we can add a new option - // to configure it. - self.limit.pending_task_size() < self.max_ongoing_tasks - } -} - -// If segment duration is None, then no compaction should be triggered, but we -// return a None context instead of panic here. -fn new_picker_context(table_opts: &TableOptions) -> Option { - table_opts - .segment_duration() - .map(|segment_duration| PickerContext { - segment_duration, - ttl: table_opts.ttl().map(|ttl| ttl.0), - strategy: table_opts.compaction_strategy, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_memory_usage_limit_apply() { - let limit = MemoryLimit::new(100); - let cases = vec![ - // One case is (applied_requests, applied_results). - (vec![10, 20, 90, 30], vec![true, true, false, true]), - (vec![100, 10], vec![true, false]), - (vec![0, 90, 10], vec![true, true, true]), - ]; - - for (apply_requests, expect_applied_results) in cases { - assert_eq!(limit.usage.load(Ordering::Relaxed), 0); - - let mut applied_tokens = Vec::with_capacity(apply_requests.len()); - for bytes in &apply_requests { - let token = limit.try_apply_token(*bytes); - applied_tokens.push(token); - } - assert_eq!(applied_tokens.len(), expect_applied_results.len()); - assert_eq!(applied_tokens.len(), applied_tokens.len()); - - for (token, (apply_bytes, applied)) in applied_tokens.into_iter().zip( - apply_requests - .into_iter() - .zip(expect_applied_results.into_iter()), - ) { - if applied { - let token = token.unwrap(); - assert_eq!(token.applied_usage, apply_bytes); - assert_eq!( - token.global_usage.load(Ordering::Relaxed), - limit.usage.load(Ordering::Relaxed), - ); - } - } - } - } - - #[test] - fn test_memory_usage_limit_release() { - let limit = MemoryLimit::new(100); - - let cases = vec![ - // One case includes the operation consisting of (applied bytes, whether to keep the - // applied token) and final memory usage. - (vec![(10, false), (20, false)], 0), - (vec![(100, false), (10, true), (20, true), (30, true)], 60), - (vec![(0, false), (100, false), (20, true), (30, false)], 20), - ]; - - for (ops, expect_memory_usage) in cases { - assert_eq!(limit.usage.load(Ordering::Relaxed), 0); - - let mut tokens = Vec::new(); - for (applied_bytes, keep_token) in ops { - let token = limit.try_apply_token(applied_bytes); - if keep_token { - tokens.push(token); - } - } - - assert_eq!(limit.usage.load(Ordering::Relaxed), expect_memory_usage); - } - } - - #[test] - fn test_request_queue() { - let mut q: RequestQueue = RequestQueue::default(); - assert!(q.is_empty()); - assert_eq!(0, q.len()); - - q.push_back(1, "task1".to_string()); - q.push_back(2, "task2".to_string()); - q.push_back(3, "task3".to_string()); - - assert_eq!(3, q.len()); - assert!(!q.is_empty()); - - assert_eq!("task1", q.pop_front().unwrap()); - assert_eq!("task2", q.pop_front().unwrap()); - assert_eq!("task3", q.pop_front().unwrap()); - assert!(q.pop_front().is_none()); - assert!(q.is_empty()); - - q.push_back(1, "task1".to_string()); - q.push_back(2, "task2".to_string()); - q.push_back(3, "task3".to_string()); - q.push_back(1, "task11".to_string()); - q.push_back(3, "task33".to_string()); - q.push_back(3, "task333".to_string()); - - assert_eq!(3, q.len()); - assert_eq!("task11", q.pop_front().unwrap()); - assert_eq!("task2", q.pop_front().unwrap()); - assert_eq!("task333", q.pop_front().unwrap()); - assert!(q.pop_front().is_none()); - assert!(q.is_empty()); - assert_eq!(0, q.len()); - } -} diff --git a/src/analytic_engine/src/context.rs b/src/analytic_engine/src/context.rs deleted file mode 100644 index 1a3ba8a848..0000000000 --- a/src/analytic_engine/src/context.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Context for instance - -use std::{fmt, sync::Arc}; - -use table_engine::engine::EngineRuntimes; - -use crate::{sst::meta_data::cache::MetaCacheRef, Config}; - -/// Context for instance open -pub struct OpenContext { - /// Engine config - pub config: Config, - - /// Background job runtime - pub runtimes: Arc, - - /// Sst meta data cache. - pub meta_cache: Option, -} - -impl fmt::Debug for OpenContext { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("OpenContext") - .field("config", &self.config) - .finish() - } -} diff --git a/src/analytic_engine/src/engine.rs b/src/analytic_engine/src/engine.rs deleted file mode 100644 index 6f8735343a..0000000000 --- a/src/analytic_engine/src/engine.rs +++ /dev/null @@ -1,381 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Implements the TableEngine trait - -use std::{collections::HashMap, sync::Arc}; - -use async_trait::async_trait; -use common_types::table::ShardId; -use generic_error::BoxError; -use logger::{error, info}; -use prometheus::{core::Collector, HistogramVec, IntCounterVec}; -use snafu::{OptionExt, ResultExt}; -use table_engine::{ - engine::{ - Close, CloseShardRequest, CloseTableRequest, CreateTableParams, CreateTableRequest, - DropTableRequest, OpenShard, OpenShardRequest, OpenShardResult, OpenTableNoCause, - OpenTableRequest, OpenTableWithCause, Result, ShardStats, TableDef, TableEngine, - TableEngineStats, Unexpected, - }, - table::{SchemaId, TableRef}, - ANALYTIC_ENGINE_TYPE, -}; - -use crate::{ - instance::InstanceRef, - space::SpaceId, - sst::metrics::FETCHED_SST_BYTES_HISTOGRAM, - table::{metrics::TABLE_WRITE_BYTES_COUNTER, TableImpl}, -}; - -/// TableEngine implementation -pub struct TableEngineImpl { - /// Instance of the table engine - instance: InstanceRef, -} - -impl Clone for TableEngineImpl { - fn clone(&self) -> Self { - Self { - instance: self.instance.clone(), - } - } -} - -impl TableEngineImpl { - pub fn new(instance: InstanceRef) -> Self { - Self { instance } - } - - async fn close_tables_of_shard( - &self, - close_requests: Vec, - ) -> Vec> { - if close_requests.is_empty() { - return Vec::new(); - } - - let mut close_results = Vec::with_capacity(close_requests.len()); - for request in close_requests { - let result = self - .close_table(request.clone()) - .await - .map_err(|e| { - error!("Failed to close table, close_request:{request:?}, err:{e}"); - e - }) - .map(|_| request.table_name); - - close_results.push(result); - } - - close_results - } -} - -impl Drop for TableEngineImpl { - fn drop(&mut self) { - info!("Table engine dropped"); - } -} - -#[async_trait] -impl TableEngine for TableEngineImpl { - fn engine_type(&self) -> &str { - ANALYTIC_ENGINE_TYPE - } - - async fn close(&self) -> Result<()> { - info!("Try to close table engine"); - - // Close the instance. - self.instance.close().await.box_err().context(Close)?; - - info!("Table engine closed"); - - Ok(()) - } - - async fn validate_create_table(&self, params: &CreateTableParams) -> Result<()> { - self.instance.validate_create_table(params)?; - - Ok(()) - } - - async fn create_table(&self, request: CreateTableRequest) -> Result { - let space_id = build_space_id(request.schema_id); - - info!( - "Table engine impl create table, space_id:{}, request:{:?}", - space_id, request - ); - - let space_table = self.instance.create_table(space_id, request).await?; - - let table_impl: TableRef = Arc::new(TableImpl::new(self.instance.clone(), space_table)); - - Ok(table_impl) - } - - async fn drop_table(&self, request: DropTableRequest) -> Result { - let space_id = build_space_id(request.schema_id); - - info!( - "Table engine impl drop table, space_id:{}, request:{:?}", - space_id, request - ); - - let dropped = self.instance.drop_table(space_id, request).await?; - Ok(dropped) - } - - async fn open_table(&self, request: OpenTableRequest) -> Result> { - let shard_id = request.shard_id; - let space_id = build_space_id(request.schema_id); - let table_id = request.table_id; - - info!( - "Table engine impl open table, space_id:{}, request:{:?}", - space_id, request - ); - - let table_def = TableDef { - catalog_name: request.catalog_name, - schema_name: request.schema_name, - schema_id: request.schema_id, - id: table_id, - name: request.table_name, - }; - - let shard_request = OpenShardRequest { - shard_id, - table_defs: vec![table_def], - engine: request.engine, - }; - - let mut shard_result = self.instance.open_tables_of_shard(shard_request).await?; - let table_opt = shard_result.remove(&table_id).with_context(|| OpenTableNoCause { - msg: Some(format!("table not exist, table_id:{table_id}, space_id:{space_id}, shard_id:{shard_id}")), - })? - .box_err() - .context(OpenTableWithCause { - msg: None, - })?; - - let table_opt = table_opt - .map(|space_table| Arc::new(TableImpl::new(self.instance.clone(), space_table)) as _); - - Ok(table_opt) - } - - async fn close_table(&self, request: CloseTableRequest) -> Result<()> { - let space_id = build_space_id(request.schema_id); - - info!( - "Table engine impl close table, space_id:{}, request:{:?}", - space_id, request, - ); - - self.instance.close_table(space_id, request).await?; - - Ok(()) - } - - async fn open_shard(&self, request: OpenShardRequest) -> Result { - let shard_result = self - .instance - .open_tables_of_shard(request) - .await - .box_err() - .context(OpenShard)?; - - let mut engine_shard_result = OpenShardResult::with_capacity(shard_result.len()); - for (table_id, table_res) in shard_result { - match table_res.box_err() { - Ok(Some(space_table)) => { - let table_impl = Arc::new(TableImpl::new(self.instance.clone(), space_table)); - engine_shard_result.insert(table_id, Ok(Some(table_impl))); - } - Ok(None) => { - engine_shard_result.insert(table_id, Ok(None)); - } - Err(e) => { - engine_shard_result.insert(table_id, Err(e)); - } - } - } - - Ok(engine_shard_result) - } - - async fn close_shard( - &self, - request: CloseShardRequest, - ) -> Vec> { - let table_defs = request.table_defs; - let close_requests = table_defs - .into_iter() - .map(|def| CloseTableRequest { - catalog_name: def.catalog_name, - schema_name: def.schema_name, - schema_id: def.schema_id, - table_name: def.name, - table_id: def.id, - engine: request.engine.clone(), - }) - .collect(); - - self.close_tables_of_shard(close_requests).await - } - - async fn report_statistics(&self) -> Result> { - let table_engine_stats = - collect_stats_from_metric(&FETCHED_SST_BYTES_HISTOGRAM, &TABLE_WRITE_BYTES_COUNTER)?; - - Ok(Some(table_engine_stats)) - } -} - -/// Collect the table engine stats from the two provided metric. -fn collect_stats_from_metric( - fetched_bytes_hist: &HistogramVec, - written_bytes_counter: &IntCounterVec, -) -> Result { - let mut shard_stats: HashMap = HashMap::new(); - - // Collect the metrics for fetched bytes by shards. - for_shard_metric(fetched_bytes_hist, |shard_id, metric| { - let sum = metric.get_histogram().get_sample_sum() as u64; - let stats = shard_stats.entry(shard_id).or_default(); - stats.num_fetched_bytes += sum; - })?; - - // Collect the metrics for the written bytes by shards. - for_shard_metric(written_bytes_counter, |shard_id, metric| { - let sum = metric.get_counter().get_value() as u64; - let stats = shard_stats.entry(shard_id).or_default(); - stats.num_written_bytes += sum; - })?; - - Ok(TableEngineStats { shard_stats }) -} - -/// Iterate the metrics collected by `metric_collector`, and provide the metric -/// with a valid shard_id to the `f` closure. -fn for_shard_metric(metric_collector: &C, mut f: F) -> Result<()> -where - C: Collector, - F: FnMut(ShardId, &prometheus::proto::Metric), -{ - const SHARD_LABEL: &str = "shard_id"; - - let metric_families = metric_collector.collect(); - for metric_family in metric_families { - for metric in metric_family.get_metric() { - let labels = metric.get_label(); - let shard_id = labels - .iter() - .find_map(|pair| (pair.get_name() == SHARD_LABEL).then(|| pair.get_value())); - if let Some(raw_shard_id) = shard_id { - let shard_id: ShardId = str::parse(raw_shard_id).box_err().context(Unexpected)?; - f(shard_id, metric); - } - } - } - - Ok(()) -} - -/// Generate the space id from the schema id with assumption schema id is unique -/// globally. -#[inline] -pub fn build_space_id(schema_id: SchemaId) -> SpaceId { - schema_id.as_u32() -} - -#[cfg(test)] -mod tests { - use prometheus::{exponential_buckets, register_histogram_vec, register_int_counter_vec}; - - use super::*; - - #[test] - fn test_collect_table_engine_stats() { - let hist = register_histogram_vec!( - "fetched_bytes", - "Histogram for sst get range length", - &["shard_id", "table"], - // The buckets: [1MB, 2MB, 4MB, 8MB, ... , 8GB] - exponential_buckets(1024.0 * 1024.0, 2.0, 13).unwrap() - ) - .unwrap(); - - hist.with_label_values(&["0", "table_0"]).observe(1000.0); - hist.with_label_values(&["0", "table_1"]).observe(1000.0); - hist.with_label_values(&["0", "table_2"]).observe(1000.0); - hist.with_label_values(&["1", "table_3"]).observe(1000.0); - hist.with_label_values(&["1", "table_4"]).observe(1000.0); - hist.with_label_values(&["2", "table_5"]).observe(4000.0); - - let counter = register_int_counter_vec!( - "written_counter", - "Write bytes counter of table", - &["shard_id", "table"] - ) - .unwrap(); - - counter.with_label_values(&["0", "table_0"]).inc_by(100); - counter.with_label_values(&["0", "table_1"]).inc_by(100); - counter.with_label_values(&["0", "table_2"]).inc_by(100); - counter.with_label_values(&["1", "table_3"]).inc_by(100); - counter.with_label_values(&["1", "table_4"]).inc_by(100); - counter.with_label_values(&["2", "table_5"]).inc_by(400); - - let stats = collect_stats_from_metric(&hist, &counter).unwrap(); - - let expected_stats = { - let mut shard_stats: HashMap = HashMap::new(); - - shard_stats.insert( - 0, - ShardStats { - num_fetched_bytes: 3000, - num_written_bytes: 300, - }, - ); - shard_stats.insert( - 1, - ShardStats { - num_fetched_bytes: 2000, - num_written_bytes: 200, - }, - ); - shard_stats.insert( - 2, - ShardStats { - num_fetched_bytes: 4000, - num_written_bytes: 400, - }, - ); - - shard_stats - }; - - assert_eq!(stats.shard_stats, expected_stats); - } -} diff --git a/src/analytic_engine/src/error.rs b/src/analytic_engine/src/error.rs deleted file mode 100644 index 205ef1f0d0..0000000000 --- a/src/analytic_engine/src/error.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/// Global Error type for analytic engine. -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum ErrorKind { - KeyTooLarge, - Internal, -} diff --git a/src/analytic_engine/src/instance/alter.rs b/src/analytic_engine/src/instance/alter.rs deleted file mode 100644 index 508ac49aa7..0000000000 --- a/src/analytic_engine/src/instance/alter.rs +++ /dev/null @@ -1,308 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Alter [Schema] and [TableOptions] logic of instance. - -use std::collections::HashMap; - -use generic_error::BoxError; -use logger::info; -use snafu::{ensure, ResultExt}; -use table_engine::table::AlterSchemaRequest; -use wal::{kv_encoder::LogBatchEncoder, manager::WriteContext}; - -use crate::{ - instance::{ - self, - engine::{ - AlterDroppedTable, EncodePayloads, FlushTable, InvalidOptions, InvalidPreVersion, - InvalidSchemaVersion, InvalidTableOptions, Result, WriteManifest, WriteWal, - }, - flush_compaction::TableFlushOptions, - serial_executor::TableOpSerialExecutor, - InstanceRef, - }, - manifest::meta_edit::{ - AlterOptionsMeta, AlterSchemaMeta, MetaEdit, MetaEditRequest, MetaUpdate, - }, - payload::WritePayload, - table::data::TableDataRef, - table_options, -}; - -pub struct Alterer<'a> { - table_data: TableDataRef, - serial_exec: &'a mut TableOpSerialExecutor, - - instance: InstanceRef, -} - -impl<'a> Alterer<'a> { - pub async fn new( - table_data: TableDataRef, - serial_exec: &'a mut TableOpSerialExecutor, - instance: InstanceRef, - ) -> Alterer<'a> { - assert_eq!(table_data.id, serial_exec.table_id()); - Self { - table_data, - serial_exec, - instance, - } - } -} - -impl<'a> Alterer<'a> { - // Alter schema need to be handled by write worker. - pub async fn alter_schema_of_table(&mut self, request: AlterSchemaRequest) -> Result<()> { - info!( - "Instance alter schema, table:{:?}, request:{:?}", - self.table_data.name, request - ); - - // Validate alter schema request. - // if the alter schema request is idempotent, we can skip the alter operation. - if self.validate_before_alter(&request)? { - info!( - "Skip alter because of the altered schema is the same as the current, table:{}", - self.table_data.name - ); - return Ok(()); - } - - // Now we can persist and update the schema, since this function is called by - // write worker, so there is no other concurrent writer altering the - // schema. - - // First trigger a flush before alter schema, to ensure ensure all wal entries - // with old schema are flushed - let opts = TableFlushOptions::default(); - let flush_scheduler = self.serial_exec.flush_scheduler(); - let flusher = self.instance.make_flusher(); - flusher - .do_flush(flush_scheduler, &self.table_data, opts) - .await - .context(FlushTable { - space_id: self.table_data.space_id, - table: &self.table_data.name, - table_id: self.table_data.id, - })?; - - // Build alter op - let manifest_update = AlterSchemaMeta { - space_id: self.table_data.space_id, - table_id: self.table_data.id, - schema: request.schema.clone(), - pre_schema_version: request.pre_schema_version, - }; - - // Write AlterSchema to Data Wal - let alter_schema_pb = manifest_update.clone().into(); - let payload = WritePayload::AlterSchema(&alter_schema_pb); - - // Encode payloads - let table_location = self.table_data.table_location(); - let wal_location = - instance::create_wal_location(table_location.id, table_location.shard_info); - let log_batch_encoder = LogBatchEncoder::create(wal_location); - let log_batch = log_batch_encoder.encode(&payload).context(EncodePayloads { - table: &self.table_data.name, - wal_location, - })?; - - // Write log batch - let write_ctx = WriteContext::default(); - self.instance - .space_store - .wal_manager - .write(&write_ctx, &log_batch) - .await - .box_err() - .context(WriteWal { - space_id: self.table_data.space_id, - table: &self.table_data.name, - table_id: self.table_data.id, - })?; - - info!( - "Instance update table schema, new_schema:{:?}", - request.schema - ); - - // Write to Manifest - let edit_req = { - let meta_update = MetaUpdate::AlterSchema(manifest_update); - MetaEditRequest { - shard_info: self.table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: self.table_data.table_catalog_info.clone(), - } - }; - self.instance - .space_store - .manifest - .apply_edit(edit_req) - .await - .context(WriteManifest { - space_id: self.table_data.space_id, - table: &self.table_data.name, - table_id: self.table_data.id, - })?; - - Ok(()) - } - - // Most validation should be done by catalog module, so we don't do too much - // duplicate check here, especially the schema compatibility. - // The returned value denotes whether the altered schema is same as the current - // one. - fn validate_before_alter(&self, request: &AlterSchemaRequest) -> Result { - ensure!( - !self.table_data.is_dropped(), - AlterDroppedTable { - table: &self.table_data.name, - } - ); - - if self.table_data.schema().columns() == request.schema.columns() { - return Ok(true); - } - - let current_version = self.table_data.schema_version(); - ensure!( - current_version < request.schema.version(), - InvalidSchemaVersion { - table: &self.table_data.name, - current_version, - given_version: request.schema.version(), - } - ); - - ensure!( - current_version == request.pre_schema_version, - InvalidPreVersion { - table: &self.table_data.name, - current_version, - pre_version: request.pre_schema_version, - } - ); - - Ok(false) - } - - pub async fn alter_options_of_table( - &self, - // todo: encapsulate this into a struct like other functions. - options: HashMap, - ) -> Result<()> { - info!( - "Instance alter options of table, table:{:?}, options:{:?}", - self.table_data.name, options - ); - - ensure!( - !self.table_data.is_dropped(), - AlterDroppedTable { - table: &self.table_data.name, - } - ); - - // AlterOptions doesn't need a flush. - - // Generate options after alter op - let current_table_options = self.table_data.table_options(); - info!( - "Instance alter options, space_id:{}, tables:{:?}, old_table_opts:{:?}, options:{:?}", - self.table_data.space_id, self.table_data.name, current_table_options, options - ); - let table_opts = { - let mut opts = - table_options::merge_table_options_for_alter(&options, ¤t_table_options) - .box_err() - .context(InvalidOptions { - table: &self.table_data.name, - })?; - opts.sanitize(); - opts - }; - - // We should check the options before altering - if let Some(reason) = table_opts.check_validity() { - return InvalidTableOptions { reason }.fail(); - } - - let manifest_update = AlterOptionsMeta { - space_id: self.table_data.space_id, - table_id: self.table_data.id, - options: table_opts.clone(), - }; - - // Now we can persist and update the options, since this function is called by - // write worker, so there is no other concurrent writer altering the - // options. - - // Write AlterOptions to Data Wal - let alter_options_pb = manifest_update.clone().into(); - let payload = WritePayload::AlterOption(&alter_options_pb); - - // Encode payload - let table_location = self.table_data.table_location(); - let wal_location = - instance::create_wal_location(table_location.id, table_location.shard_info); - let log_batch_encoder = LogBatchEncoder::create(wal_location); - let log_batch = log_batch_encoder.encode(&payload).context(EncodePayloads { - table: &self.table_data.name, - wal_location, - })?; - - // Write log batch - let write_ctx = WriteContext::default(); - self.instance - .space_store - .wal_manager - .write(&write_ctx, &log_batch) - .await - .box_err() - .context(WriteWal { - space_id: self.table_data.space_id, - table: &self.table_data.name, - table_id: self.table_data.id, - })?; - - // Write to Manifest - let edit_req = { - let meta_update = MetaUpdate::AlterOptions(manifest_update); - MetaEditRequest { - shard_info: self.table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: self.table_data.table_catalog_info.clone(), - } - }; - self.instance - .space_store - .manifest - .apply_edit(edit_req) - .await - .context(WriteManifest { - space_id: self.table_data.space_id, - table: &self.table_data.name, - table_id: self.table_data.id, - })?; - - Ok(()) - } -} diff --git a/src/analytic_engine/src/instance/close.rs b/src/analytic_engine/src/instance/close.rs deleted file mode 100644 index 6f003a2456..0000000000 --- a/src/analytic_engine/src/instance/close.rs +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Close table logic of instance - -use logger::{info, warn}; -use snafu::ResultExt; -use table_engine::engine::CloseTableRequest; - -use crate::{ - instance::{ - engine::{DoManifestSnapshot, FlushTable, Result}, - flush_compaction::{Flusher, TableFlushOptions}, - }, - manifest::{ManifestRef, SnapshotRequest}, - space::SpaceRef, - table::data::TableDataRef, -}; - -pub(crate) struct Closer { - pub space: SpaceRef, - pub manifest: ManifestRef, - - pub flusher: Flusher, -} - -impl Closer { - /// Close table need to be handled by write worker. - pub async fn close(&self, request: CloseTableRequest) -> Result<()> { - info!("Try to close table, request:{:?}", request); - - let table_data = match self.space.find_table_by_id(request.table_id) { - Some(v) => v, - None => { - warn!("try to close a closed table, request:{:?}", request); - return Ok(()); - } - }; - - // Do flush before close for the fast recovery during the following opening. - // And it should not stop closing if flush fails. - if let Err(e) = self.flush(&table_data).await { - warn!( - "Ignore the failure to flush data before close, table:{}, table_id:{}, err:{e}", - table_data.name, table_data.id - ); - } - - // Table has been closed so remove it from the space. - let removed_table = self.space.remove_table(&request.table_name); - assert!(removed_table.is_some()); - - // Table is already moved out of space, we should close it to stop background - // jobs. - table_data.set_closed(); - - info!( - "table:{}-{} has been removed from the space_id:{}", - table_data.name, table_data.id, self.space.id - ); - Ok(()) - } - - async fn flush(&self, table_data: &TableDataRef) -> Result<()> { - // Flush table. - let opts = TableFlushOptions::default(); - let mut serial_exec = table_data.serial_exec.lock().await; - let flush_scheduler = serial_exec.flush_scheduler(); - self.flusher - .do_flush(flush_scheduler, table_data, opts) - .await - .context(FlushTable { - space_id: self.space.id, - table: &table_data.name, - table_id: table_data.id, - })?; - - // Force manifest to do snapshot. - let snapshot_request = SnapshotRequest { - space_id: self.space.id, - table_id: table_data.id, - shard_id: table_data.shard_info.shard_id, - table_catalog_info: table_data.table_catalog_info.clone(), - }; - self.manifest - .do_snapshot(snapshot_request) - .await - .context(DoManifestSnapshot { - space_id: self.space.id, - table: &table_data.name, - }) - } -} diff --git a/src/analytic_engine/src/instance/create.rs b/src/analytic_engine/src/instance/create.rs deleted file mode 100644 index 232459a4d4..0000000000 --- a/src/analytic_engine/src/instance/create.rs +++ /dev/null @@ -1,133 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Create table logic of instance - -use generic_error::BoxError; -use logger::info; -use snafu::{ensure, OptionExt, ResultExt}; -use table_engine::{ - engine::{CreateTableParams, CreateTableRequest}, - partition::PartitionInfo, -}; - -use crate::{ - instance::{ - engine::{ - CreateOpenFailedTable, InvalidOptions, InvalidTableOptions, Result, TableNotExist, - TryCreateRandomPartitionTableInOverwriteMode, WriteManifest, - }, - Instance, - }, - manifest::meta_edit::{AddTableMeta, MetaEdit, MetaEditRequest, MetaUpdate}, - space::SpaceRef, - table::data::{TableCatalogInfo, TableDataRef, TableShardInfo}, - table_options, TableOptions, -}; - -impl Instance { - /// Validate the request of creating table. - pub fn validate_create_table(&self, params: &CreateTableParams) -> Result { - let table_opts = - table_options::merge_table_options_for_create(¶ms.table_options, &self.table_opts) - .box_err() - .context(InvalidOptions { - table: ¶ms.table_name, - })?; - - if let Some(reason) = table_opts.check_validity() { - return InvalidTableOptions { reason }.fail(); - } - - if let Some(partition_info) = ¶ms.partition_info { - let dedup_on_random_partition = - table_opts.need_dedup() && matches!(partition_info, PartitionInfo::Random(_)); - - ensure!( - !dedup_on_random_partition, - TryCreateRandomPartitionTableInOverwriteMode { - table: ¶ms.table_name, - } - ); - } - - Ok(table_opts) - } - - /// Create table need to be handled by write worker. - pub async fn do_create_table( - &self, - space: SpaceRef, - request: CreateTableRequest, - ) -> Result { - info!("Instance create table, request:{:?}", request); - - if space.is_open_failed_table(&request.params.table_name) { - return CreateOpenFailedTable { - table: request.params.table_name, - } - .fail(); - } - - let mut table_opts = self.validate_create_table(&request.params)?; - // Sanitize options before creating table. - table_opts.sanitize(); - - if let Some(table_data) = space.find_table_by_id(request.table_id) { - return Ok(table_data); - } - - // Store table info into meta both memory and storage. - let edit_req = { - let meta_update = MetaUpdate::AddTable(AddTableMeta { - space_id: space.id, - table_id: request.table_id, - table_name: request.params.table_name.clone(), - schema: request.params.table_schema, - opts: table_opts, - }); - MetaEditRequest { - shard_info: TableShardInfo::new(request.shard_id), - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: TableCatalogInfo { - schema_id: request.schema_id, - schema_name: request.params.schema_name, - catalog_name: request.params.catalog_name, - }, - } - }; - self.space_store - .manifest - .apply_edit(edit_req) - .await - .context(WriteManifest { - space_id: space.id, - table: &request.params.table_name, - table_id: request.table_id, - })?; - - // Table is sure to exist here. - space - .find_table_by_id(request.table_id) - .with_context(|| TableNotExist { - msg: format!( - "table not exist, space_id:{}, table_id:{}, table_name:{}", - space.id, request.table_id, request.params.table_name - ), - }) - } -} diff --git a/src/analytic_engine/src/instance/drop.rs b/src/analytic_engine/src/instance/drop.rs deleted file mode 100644 index e527e9f424..0000000000 --- a/src/analytic_engine/src/instance/drop.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Drop table logic of instance - -use common_types::MAX_SEQUENCE_NUMBER; -use logger::{info, warn}; -use snafu::ResultExt; -use table_engine::engine::DropTableRequest; - -use crate::{ - instance::{ - engine::{PurgeWal, Result, WriteManifest}, - SpaceStoreRef, - }, - manifest::meta_edit::{DropTableMeta, MetaEdit, MetaEditRequest, MetaUpdate}, - space::SpaceRef, - table::data::TableCatalogInfo, -}; - -pub(crate) struct Dropper { - pub space: SpaceRef, - pub space_store: SpaceStoreRef, -} - -impl Dropper { - /// Drop a table under given space - // TODO: Currently we first delete WAL then manifest, if wal is deleted but - // manifest failed to delete, it could cause the table in a unknown state, we - // should find a better way to deal with this. - pub async fn drop(&self, request: DropTableRequest) -> Result { - info!("Try to drop table, request:{:?}", request); - - let table_data = match self.space.find_table(&request.table_name) { - Some(v) => v, - None => { - warn!("No need to drop a dropped table, request:{:?}", request); - return Ok(false); - } - }; - - if table_data.is_dropped() { - warn!( - "Process drop table command tries to drop a dropped table, table:{:?}", - table_data.name, - ); - return Ok(false); - } - - // Mark table's WAL for deletable, memtable will also get freed automatically - // when table_data is dropped. - let table_location = table_data.table_location(); - let wal_location = - crate::instance::create_wal_location(table_location.id, table_location.shard_info); - // Use max to represent delete all WAL. - // TODO: add a method in wal_manager to delete all WAL with same prefix. - let sequence = MAX_SEQUENCE_NUMBER; - self.space_store - .wal_manager - .mark_delete_entries_up_to(wal_location, sequence) - .await - .context(PurgeWal { - wal_location, - sequence, - })?; - - // Store the dropping information into meta - let edit_req = { - let meta_update = MetaUpdate::DropTable(DropTableMeta { - space_id: self.space.id, - table_id: table_data.id, - table_name: table_data.name.clone(), - }); - MetaEditRequest { - shard_info: table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: TableCatalogInfo { - schema_id: request.schema_id, - schema_name: request.schema_name, - catalog_name: request.catalog_name, - }, - } - }; - self.space_store - .manifest - .apply_edit(edit_req) - .await - .context(WriteManifest { - space_id: self.space.id, - table: &table_data.name, - table_id: table_data.id, - })?; - - Ok(true) - } -} diff --git a/src/analytic_engine/src/instance/engine.rs b/src/analytic_engine/src/instance/engine.rs deleted file mode 100644 index 537b83314f..0000000000 --- a/src/analytic_engine/src/instance/engine.rs +++ /dev/null @@ -1,474 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table engine logic of instance - -use std::sync::Arc; - -use common_types::{schema::Version, SequenceNumber}; -use generic_error::GenericError; -use macros::define_result; -use snafu::{Backtrace, OptionExt, Snafu}; -use table_engine::{ - engine::{CloseTableRequest, CreateTableRequest, DropTableRequest, OpenShardRequest}, - table::TableId, -}; -use wal::manager::WalLocation; - -use crate::{ - engine::build_space_id, - instance::{ - close::Closer, - drop::Dropper, - open::{OpenTablesOfShardResult, TableContext, TablesOfShardContext}, - Instance, - }, - space::{MemSizeOptions, Space, SpaceAndTable, SpaceContext, SpaceId, SpaceRef}, -}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display( - "The space of the table does not exist, space_id:{}, table:{}.\nBacktrace:\n{}", - space_id, - table, - backtrace, - ))] - SpaceNotExist { - space_id: SpaceId, - table: String, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to read meta update, table_id:{}, err:{}", table_id, source))] - ReadMetaUpdate { - table_id: TableId, - source: GenericError, - }, - - #[snafu(display( - "Failed to recover table data, space_id:{}, table:{}, err:{}", - space_id, - table, - source - ))] - RecoverTableData { - space_id: SpaceId, - table: String, - source: crate::table::data::Error, - }, - - #[snafu(display("Failed to read wal, err:{}", source))] - ReadWal { source: wal::manager::Error }, - - #[snafu(display( - "Failed to apply log entry to memtable, table:{}, table_id:{}, err:{}", - table, - table_id, - source - ))] - ApplyMemTable { - space_id: SpaceId, - table: String, - table_id: TableId, - source: crate::instance::write::Error, - }, - - #[snafu(display( - "Flush failed, space_id:{}, table:{}, table_id:{}, err:{}", - space_id, - table, - table_id, - source - ))] - FlushTable { - space_id: SpaceId, - table: String, - table_id: TableId, - source: crate::instance::flush_compaction::Error, - }, - - #[snafu(display( - "Failed to persist meta update to manifest, space_id:{}, table:{}, table_id:{}, err:{}", - space_id, - table, - table_id, - source - ))] - WriteManifest { - space_id: SpaceId, - table: String, - table_id: TableId, - source: GenericError, - }, - - #[snafu(display( - "Failed to persist meta update to WAL, space_id:{}, table:{}, table_id:{}, err:{}", - space_id, - table, - table_id, - source - ))] - WriteWal { - space_id: SpaceId, - table: String, - table_id: TableId, - source: GenericError, - }, - - #[snafu(display("Invalid options, table:{table}, err:{source}",))] - InvalidOptions { table: String, source: GenericError }, - - #[snafu(display( - "Failed to create table data, space_id:{}, table:{}, table_id:{}, err:{}", - space_id, - table, - table_id, - source - ))] - CreateTableData { - space_id: SpaceId, - table: String, - table_id: TableId, - source: crate::table::data::Error, - }, - - #[snafu(display( - "Try to update schema to elder version, table:{}, current_version:{}, given_version:{}.\nBacktrace:\n{}", - table, - current_version, - given_version, - backtrace, - ))] - InvalidSchemaVersion { - table: String, - current_version: Version, - given_version: Version, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid previous schema version, table:{}, current_version:{}, pre_version:{}.\nBacktrace:\n{}", - table, - current_version, - pre_version, - backtrace, - ))] - InvalidPreVersion { - table: String, - current_version: Version, - pre_version: Version, - backtrace: Backtrace, - }, - - #[snafu(display( - "Alter schema of a dropped table:{}.\nBacktrace:\n{}", - table, - backtrace - ))] - AlterDroppedTable { table: String, backtrace: Backtrace }, - - #[snafu(display("Failed to store version edit, err:{}", source))] - StoreVersionEdit { source: GenericError }, - - #[snafu(display( - "Failed to encode payloads, table:{}, wal_location:{:?}, err:{}", - table, - wal_location, - source - ))] - EncodePayloads { - table: String, - wal_location: WalLocation, - source: wal::manager::Error, - }, - - #[snafu(display( - "Failed to do manifest snapshot for table, space_id:{}, table:{}, err:{}", - space_id, - table, - source - ))] - DoManifestSnapshot { - space_id: SpaceId, - table: String, - source: GenericError, - }, - - #[snafu(display( - "Table open failed and can not be created again, table:{}.\nBacktrace:\n{}", - table, - backtrace, - ))] - CreateOpenFailedTable { table: String, backtrace: Backtrace }, - - #[snafu(display("Failed to open manifest, err:{}", source))] - OpenManifest { source: crate::manifest::Error }, - - #[snafu(display("Failed to find table, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - TableNotExist { msg: String, backtrace: Backtrace }, - - #[snafu(display("Failed to open shard, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - OpenTablesOfShard { msg: String, backtrace: Backtrace }, - - #[snafu(display("Failed to replay wal, msg:{:?}, err:{}", msg, source))] - ReplayWalWithCause { - msg: Option, - source: GenericError, - }, - - #[snafu(display("Failed to replay wal, msg:{:?}.\nBacktrace:\n{}", msg, backtrace))] - ReplayWalNoCause { - msg: Option, - backtrace: Backtrace, - }, - - #[snafu(display( - "Try to create a random partition table in overwrite mode, table:{table}.\nBacktrace:\n{backtrace}", - ))] - TryCreateRandomPartitionTableInOverwriteMode { table: String, backtrace: Backtrace }, - - #[snafu(display("Found invalid table options, reason:{reason}.\nBacktrace:\n{backtrace}",))] - InvalidTableOptions { - reason: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to purge wal, wal_location:{:?}, sequence:{}", - wal_location, - sequence - ))] - PurgeWal { - wal_location: WalLocation, - sequence: SequenceNumber, - source: wal::manager::Error, - }, - - #[snafu(display( - "Failed to find meta client to construct remote compaction runner.\nBacktrace:\n{}", - backtrace - ))] - MetaClientNotExist { backtrace: Backtrace }, -} - -define_result!(Error); - -impl From for table_engine::engine::Error { - fn from(err: Error) -> Self { - match &err { - Error::InvalidOptions { table, .. } - | Error::SpaceNotExist { table, .. } - | Error::TryCreateRandomPartitionTableInOverwriteMode { table, .. } => { - Self::InvalidArguments { - table: table.clone(), - source: Box::new(err), - } - } - Error::WriteManifest { .. } => Self::WriteMeta { - source: Box::new(err), - }, - Error::WriteWal { .. } - | Error::InvalidSchemaVersion { .. } - | Error::InvalidPreVersion { .. } - | Error::CreateTableData { .. } - | Error::AlterDroppedTable { .. } - | Error::ReadMetaUpdate { .. } - | Error::RecoverTableData { .. } - | Error::ReadWal { .. } - | Error::ApplyMemTable { .. } - | Error::FlushTable { .. } - | Error::StoreVersionEdit { .. } - | Error::EncodePayloads { .. } - | Error::CreateOpenFailedTable { .. } - | Error::DoManifestSnapshot { .. } - | Error::OpenManifest { .. } - | Error::TableNotExist { .. } - | Error::MetaClientNotExist { .. } - | Error::OpenTablesOfShard { .. } - | Error::ReplayWalNoCause { .. } - | Error::PurgeWal { .. } - | Error::ReplayWalWithCause { .. } - | Error::InvalidTableOptions { .. } => Self::Unexpected { - source: Box::new(err), - }, - } - } -} - -impl Instance { - /// Find space by name, create if the space is not exists - pub async fn find_or_create_space( - self: &Arc, - space_id: SpaceId, - context: SpaceContext, - ) -> Result { - // Find space first - if let Some(space) = self.get_space_by_read_lock(space_id) { - return Ok(space); - } - - let mut spaces = self.space_store.spaces.write().unwrap(); - // The space may already been created by other thread - if let Some(space) = spaces.get_by_id(space_id) { - return Ok(space.clone()); - } - - // Now we are the one responsible to create and persist the space info into meta - - // Create space - let mem_size_options = MemSizeOptions { - write_buffer_size: self.space_write_buffer_size, - usage_collector: self.mem_usage_collector.clone(), - size_sampling_interval: self.mem_usage_sampling_interval, - }; - let space = Arc::new(Space::new(space_id, context, mem_size_options)); - - spaces.insert(space.clone()); - - Ok(space) - } - - /// Find space by id - pub fn find_space(&self, space_id: SpaceId) -> Option { - let spaces = self.space_store.spaces.read().unwrap(); - spaces.get_by_id(space_id).cloned() - } - - /// Create a table under given space - pub async fn create_table( - self: &Arc, - space_id: SpaceId, - request: CreateTableRequest, - ) -> Result { - let context = SpaceContext { - catalog_name: request.params.catalog_name.clone(), - schema_name: request.params.schema_name.clone(), - }; - let space = self.find_or_create_space(space_id, context).await?; - let table_data = self.do_create_table(space.clone(), request).await?; - - Ok(SpaceAndTable::new(space, table_data)) - } - - /// Find the table under given space by its table name - /// - /// Return None if space or table is not found - pub async fn find_table( - &self, - space_id: SpaceId, - table: &str, - ) -> Result> { - let space = match self.find_space(space_id) { - Some(s) => s, - None => return Ok(None), - }; - - let space_table = space - .find_table(table) - .map(|table_data| SpaceAndTable::new(space, table_data)); - - Ok(space_table) - } - - /// Drop a table under given space - pub async fn drop_table( - self: &Arc, - space_id: SpaceId, - request: DropTableRequest, - ) -> Result { - let space = self.find_space(space_id).context(SpaceNotExist { - space_id, - table: &request.table_name, - })?; - let dropper = Dropper { - space, - space_store: self.space_store.clone(), - }; - - dropper.drop(request).await - } - - /// Close the table under given space by its table name - pub async fn close_table( - self: &Arc, - space_id: SpaceId, - request: CloseTableRequest, - ) -> Result<()> { - let space = self.find_space(space_id).context(SpaceNotExist { - space_id, - table: &request.table_name, - })?; - - let closer = Closer { - space, - manifest: self.space_store.manifest.clone(), - flusher: self.make_flusher(), - }; - - closer.close(request).await - } - - /// Open tables of same shard together - // TODO: just return `TableRef` rather than `SpaceAndTable`. - pub async fn open_tables_of_shard( - self: &Arc, - request: OpenShardRequest, - ) -> Result { - let shard_id = request.shard_id; - let mut table_ctxs = Vec::with_capacity(request.table_defs.len()); - - let mut spaces_of_tables = Vec::with_capacity(request.table_defs.len()); - for table_def in request.table_defs { - let context = SpaceContext { - catalog_name: table_def.catalog_name.clone(), - schema_name: table_def.schema_name.clone(), - }; - - let space_id = build_space_id(table_def.schema_id); - let space = self.find_or_create_space(space_id, context).await?; - spaces_of_tables.push(((table_def.name.clone(), table_def.id), space.clone())); - table_ctxs.push(TableContext { table_def, space }); - } - let shard_ctx = TablesOfShardContext { - shard_id, - table_ctxs, - }; - - let shard_result = self.do_open_tables_of_shard(shard_ctx).await?; - - // Insert opened tables to spaces. - for ((table_name, table_id), space) in spaces_of_tables { - let table_result = shard_result - .get(&table_id) - .with_context(|| OpenTablesOfShard { - msg: format!( - "table not exist in result, table_id:{}, space_id:{shard_id}, shard_id:{}", - table_id, space.id - ), - })?; - - // TODO: should not modify space here, maybe should place it into manifest. - if table_result.is_err() { - space.insert_open_failed_table(table_name); - } - } - - Ok(shard_result) - } -} diff --git a/src/analytic_engine/src/instance/flush_compaction.rs b/src/analytic_engine/src/instance/flush_compaction.rs deleted file mode 100644 index 9deceff563..0000000000 --- a/src/analytic_engine/src/instance/flush_compaction.rs +++ /dev/null @@ -1,993 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Flush and compaction logic of instance - -use std::{cmp, collections::Bound, fmt}; - -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - request_id::RequestId, - row::RowViewOnBatch, - time::TimeRange, - SequenceNumber, -}; -use futures::{ - channel::{mpsc, mpsc::channel}, - stream, SinkExt, StreamExt, TryStreamExt, -}; -use generic_error::{BoxError, GenericError}; -use logger::{debug, error, info}; -use macros::define_result; -use runtime::RuntimeRef; -use snafu::{Backtrace, ResultExt, Snafu}; -use time_ext::{self, ReadableDuration}; -use tokio::{sync::oneshot, time::Instant}; -use wal::manager::WalLocation; - -use crate::{ - compaction::runner::node_picker, - instance::{ - self, reorder_memtable::Reorder, serial_executor::TableFlushScheduler, SpaceStoreRef, - }, - manifest::meta_edit::{ - AlterOptionsMeta, AlterSchemaMeta, MetaEdit, MetaEditRequest, MetaUpdate, VersionEditMeta, - }, - memtable::{ColumnarIterPtr, MemTableRef, ScanContext, ScanRequest}, - sst::{ - factory::{self, SstWriteOptions}, - file::{FileMeta, Level}, - writer::MetaData, - }, - table::{ - data::{self, TableDataRef}, - version::{FlushableMemTables, MemTableState, SamplingMemTable}, - version_edit::AddFile, - }, - table_options::StorageFormatHint, -}; - -const DEFAULT_CHANNEL_SIZE: usize = 5; - -#[derive(Debug, Snafu)] -#[snafu(visibility = "pub")] -pub enum Error { - #[snafu(display("Failed to store version edit, err:{}", source))] - StoreVersionEdit { source: GenericError }, - - #[snafu(display("Failed to store schema edit, err:{}", source))] - StoreSchemaEdit { source: GenericError }, - - #[snafu(display( - "Failed to purge wal, wal_location:{:?}, sequence:{}", - wal_location, - sequence - ))] - PurgeWal { - wal_location: WalLocation, - sequence: SequenceNumber, - source: wal::manager::Error, - }, - - #[snafu(display("Failed to build mem table iterator, source:{}", source))] - InvalidMemIter { source: GenericError }, - - #[snafu(display("Failed to reorder mem table iterator, source:{}", source))] - ReorderMemIter { - source: crate::instance::reorder_memtable::Error, - }, - - #[snafu(display( - "Failed to create sst writer, storage_format_hint:{:?}, err:{}", - storage_format_hint, - source, - ))] - CreateSstWriter { - storage_format_hint: StorageFormatHint, - source: factory::Error, - }, - - #[snafu(display("Failed to write sst, file_path:{}, source:{}", path, source))] - WriteSst { path: String, source: GenericError }, - - #[snafu(display( - "Background flush failed, cannot write more data, retry_count:{}, err:{}.\nBacktrace:\n{}", - retry_count, - msg, - backtrace - ))] - BackgroundFlushFailed { - msg: String, - retry_count: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to build merge iterator, mgs:{}, err:{}", msg, source))] - BuildMergeIterator { - msg: String, - source: crate::row_iter::merge::Error, - }, - - #[snafu(display("Failed to do manual compaction, err:{}", source))] - ManualCompactFailed { - source: crate::compaction::WaitError, - }, - - #[snafu(display("Failed to split record batch, source:{}", source))] - SplitRecordBatch { source: GenericError }, - - #[snafu(display("Failed to read sst meta, source:{}", source))] - ReadSstMeta { - source: crate::sst::meta_data::Error, - }, - - #[snafu(display("Failed to send to channel, source:{}", source))] - ChannelSend { source: mpsc::SendError }, - - #[snafu(display("Runtime join error, source:{}", source))] - RuntimeJoin { source: runtime::Error }, - - #[snafu(display("Other failure, msg:{}.\nBacktrace:\n{:?}", msg, backtrace))] - Other { msg: String, backtrace: Backtrace }, - - #[snafu(display("Failed to run flush job, msg:{:?}, err:{}", msg, source))] - FlushJobWithCause { - msg: Option, - source: GenericError, - }, - - #[snafu(display("Failed to run flush job, msg:{:?}.\nBacktrace:\n{}", msg, backtrace))] - FlushJobNoCause { - msg: Option, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to alloc file id, err:{}", source))] - AllocFileId { source: data::Error }, - - #[snafu(display("Failed to convert compaction task response, err:{}", source))] - ConvertCompactionTaskResponse { source: GenericError }, - - #[snafu(display("Failed to pick remote compaction node, err:{}", source))] - PickCompactionNodeFailed { source: node_picker::Error }, - - #[snafu(display("Failed to build compaction client, err:{}", source))] - BuildCompactionClientFailed { - source: crate::compaction::runner::Error, - }, - - #[snafu(display("Failed to get compaction client, err:{}", source))] - GetCompactionClientFailed { source: GenericError }, - - #[snafu(display("Failed to execute compaction task remotely, err:{}", source))] - RemoteCompactFailed { - source: crate::compaction::runner::Error, - }, -} - -define_result!(Error); - -/// Options to flush single table. -#[derive(Default)] -pub struct TableFlushOptions { - /// Flush result sender. - /// - /// Default is None. - pub res_sender: Option>>, - /// Max retry limit After flush failed - /// - /// Default is 0 - pub max_retry_flush_limit: usize, -} - -impl fmt::Debug for TableFlushOptions { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("TableFlushOptions") - .field("res_sender", &self.res_sender.is_some()) - .finish() - } -} - -/// Request to flush single table. -pub struct TableFlushRequest { - /// Table to flush. - pub table_data: TableDataRef, - /// Max sequence number to flush (inclusive). - pub max_sequence: SequenceNumber, - - /// We may suggest new primary keys in preflush. if suggestion happened, we - /// need to ensure data is in new order. - need_reorder: bool, -} - -#[derive(Clone)] -pub struct Flusher { - pub space_store: SpaceStoreRef, - - pub runtime: RuntimeRef, - pub write_sst_max_buffer_size: usize, - /// If the interval is set, it will generate a [`FlushTask`] with min flush - /// interval check. - pub min_flush_interval_ms: Option, -} - -struct FlushTask { - space_store: SpaceStoreRef, - table_data: TableDataRef, - runtime: RuntimeRef, - write_sst_max_buffer_size: usize, - // If the interval is set, it will be used to check whether flush is too frequent. - min_flush_interval_ms: Option, -} - -/// The checker to determine whether a flush is frequent. -struct FrequentFlushChecker { - min_flush_interval_ms: u64, - last_flush_time_ms: u64, -} - -impl FrequentFlushChecker { - #[inline] - fn is_frequent_flush(&self) -> bool { - let now = time_ext::current_time_millis(); - self.last_flush_time_ms + self.min_flush_interval_ms > now - } -} - -impl Flusher { - /// Schedule a flush request. - pub async fn schedule_flush( - &self, - flush_scheduler: &mut TableFlushScheduler, - table_data: &TableDataRef, - opts: TableFlushOptions, - ) -> Result<()> { - debug!( - "Instance flush table, table_data:{:?}, flush_opts:{:?}", - table_data, opts - ); - - self.schedule_table_flush(flush_scheduler, table_data.clone(), opts, false) - .await - } - - /// Do flush and wait for it to finish. - pub async fn do_flush( - &self, - flush_scheduler: &mut TableFlushScheduler, - table_data: &TableDataRef, - opts: TableFlushOptions, - ) -> Result<()> { - info!( - "Instance flush table, table_data:{:?}, flush_opts:{:?}", - table_data, opts - ); - - self.schedule_table_flush(flush_scheduler, table_data.clone(), opts, true) - .await - } - - /// Schedule table flush request to background workers - async fn schedule_table_flush( - &self, - flush_scheduler: &mut TableFlushScheduler, - table_data: TableDataRef, - opts: TableFlushOptions, - block_on: bool, - ) -> Result<()> { - let flush_task = FlushTask { - table_data: table_data.clone(), - space_store: self.space_store.clone(), - runtime: self.runtime.clone(), - write_sst_max_buffer_size: self.write_sst_max_buffer_size, - min_flush_interval_ms: self.min_flush_interval_ms, - }; - let flush_job = async move { flush_task.run().await }; - - flush_scheduler - .flush_sequentially(flush_job, block_on, opts, &self.runtime, table_data.clone()) - .await - } -} - -impl FlushTask { - /// Each table can only have one running flush task at the same time, which - /// should be ensured by the caller. - async fn run(&self) -> Result<()> { - let large_enough = self.table_data.should_flush_table(false); - if !large_enough && self.is_frequent_flush() { - debug!( - "Ignore flush task for too frequent flush of small memtable, table:{}", - self.table_data.name - ); - - return Ok(()); - } - - let instant = Instant::now(); - let flush_req = self.preprocess_flush(&self.table_data).await?; - - let current_version = self.table_data.current_version(); - let mems_to_flush = current_version.pick_memtables_to_flush(flush_req.max_sequence); - - if mems_to_flush.is_empty() { - return Ok(()); - } - - let request_id = RequestId::next_id(); - - // Start flush duration timer. - let local_metrics = self.table_data.metrics.local_flush_metrics(); - let _timer = local_metrics.start_flush_timer(); - self.dump_memtables(request_id.clone(), &mems_to_flush, flush_req.need_reorder) - .await - .box_err() - .context(FlushJobWithCause { - msg: Some(format!( - "table:{}, table_id:{}, request_id:{request_id}", - self.table_data.name, self.table_data.id - )), - })?; - - self.table_data - .set_last_flush_time(time_ext::current_time_millis()); - - info!( - "Instance flush memtables done, table:{}, table_id:{}, request_id:{}, cost:{}ms", - self.table_data.name, - self.table_data.id, - request_id, - instant.elapsed().as_millis() - ); - - Ok(()) - } - - fn is_frequent_flush(&self) -> bool { - if let Some(min_flush_interval_ms) = self.min_flush_interval_ms { - let checker = FrequentFlushChecker { - min_flush_interval_ms, - last_flush_time_ms: self.table_data.last_flush_time(), - }; - checker.is_frequent_flush() - } else { - false - } - } - - async fn preprocess_flush(&self, table_data: &TableDataRef) -> Result { - let current_version = table_data.current_version(); - let mut last_sequence = table_data.last_sequence(); - // Switch (freeze) all mutable memtables. And update segment duration if - // suggestion is returned. - let mut need_reorder = table_data.enable_layered_memtable; - if let Some(suggest_segment_duration) = current_version.suggest_duration() { - info!( - "Update segment duration, table:{}, table_id:{}, segment_duration:{:?}", - table_data.name, table_data.id, suggest_segment_duration - ); - assert!(!suggest_segment_duration.is_zero()); - - if let Some(pk_idx) = current_version.suggest_primary_key() { - need_reorder = true; - let mut schema = table_data.schema(); - info!( - "Update primary key, table:{}, table_id:{}, old:{:?}, new:{:?}", - table_data.name, - table_data.id, - schema.primary_key_indexes(), - pk_idx, - ); - - schema.reset_primary_key_indexes(pk_idx); - let pre_schema_version = schema.version(); - let meta_update = MetaUpdate::AlterSchema(AlterSchemaMeta { - space_id: table_data.space_id, - table_id: table_data.id, - schema, - pre_schema_version, - }); - let edit_req = MetaEditRequest { - shard_info: table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: table_data.table_catalog_info.clone(), - }; - self.space_store - .manifest - .apply_edit(edit_req) - .await - .context(StoreSchemaEdit)?; - } - - let mut new_table_opts = (*table_data.table_options()).clone(); - new_table_opts.segment_duration = Some(ReadableDuration(suggest_segment_duration)); - - let edit_req = { - let meta_update = MetaUpdate::AlterOptions(AlterOptionsMeta { - space_id: table_data.space_id, - table_id: table_data.id, - options: new_table_opts.clone(), - }); - MetaEditRequest { - shard_info: table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: table_data.table_catalog_info.clone(), - } - }; - self.space_store - .manifest - .apply_edit(edit_req) - .await - .context(StoreVersionEdit)?; - - // Now the segment duration is applied, we can stop sampling and freeze the - // sampling memtable. - if let Some(seq) = current_version.freeze_sampling_memtable() { - last_sequence = seq.max(last_sequence); - } - } else if let Some(seq) = current_version.switch_memtables() { - last_sequence = seq.max(last_sequence); - } - - info!("Try to trigger memtable flush of table, table:{}, table_id:{}, max_memtable_id:{}, last_sequence:{last_sequence}", - table_data.name, table_data.id, table_data.last_memtable_id()); - - // Try to flush all memtables of current table - Ok(TableFlushRequest { - table_data: table_data.clone(), - max_sequence: last_sequence, - need_reorder, - }) - } - - /// This will write picked memtables [FlushableMemTables] to level 0 sst - /// files. Sampling memtable may be dumped into multiple sst file according - /// to the sampled segment duration. - /// - /// Memtables will be removed after all of them are dumped. The max sequence - /// number in dumped memtables will be sent to the [WalManager]. - async fn dump_memtables( - &self, - request_id: RequestId, - mems_to_flush: &FlushableMemTables, - need_reorder: bool, - ) -> Result<()> { - let local_metrics = self.table_data.metrics.local_flush_metrics(); - let mut files_to_level0 = Vec::with_capacity(mems_to_flush.memtables.len()); - let mut flushed_sequence = 0; - let mut sst_num = 0; - - // process sampling memtable and frozen memtable - if let Some(sampling_mem) = &mems_to_flush.sampling_mem { - if let Some(seq) = self - .dump_sampling_memtable( - request_id.clone(), - sampling_mem, - &mut files_to_level0, - need_reorder, - ) - .await? - { - flushed_sequence = seq; - sst_num += files_to_level0.len(); - for add_file in &files_to_level0 { - local_metrics.observe_sst_size(add_file.file.size); - } - } - } - for mem in &mems_to_flush.memtables { - let file = self - .dump_normal_memtable(request_id.clone(), mem, need_reorder) - .await?; - if let Some(file) = file { - let sst_size = file.size; - files_to_level0.push(AddFile { - level: Level::MIN, - file, - }); - - // Set flushed sequence to max of the last_sequence of memtables. - flushed_sequence = cmp::max(flushed_sequence, mem.last_sequence()); - - sst_num += 1; - // Collect sst size metrics. - local_metrics.observe_sst_size(sst_size); - } - } - - // Collect sst num metrics. - local_metrics.observe_sst_num(sst_num); - - info!( - "Instance flush memtables to output, table:{}, table_id:{}, request_id:{}, mems_to_flush:{:?}, files_to_level0:{:?}, flushed_sequence:{}", - self.table_data.name, - self.table_data.id, - request_id, - mems_to_flush, - files_to_level0, - flushed_sequence - ); - - // Persist the flush result to manifest. - let edit_req = { - let edit_meta = VersionEditMeta { - space_id: self.table_data.space_id, - table_id: self.table_data.id, - flushed_sequence, - files_to_add: files_to_level0.clone(), - files_to_delete: vec![], - mems_to_remove: mems_to_flush.ids(), - max_file_id: 0, - }; - let meta_update = MetaUpdate::VersionEdit(edit_meta); - MetaEditRequest { - shard_info: self.table_data.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: self.table_data.table_catalog_info.clone(), - } - }; - // Update manifest and remove immutable memtables - self.space_store - .manifest - .apply_edit(edit_req) - .await - .context(StoreVersionEdit)?; - - // Mark sequence <= flushed_sequence to be deleted. - let table_location = self.table_data.table_location(); - let wal_location = - instance::create_wal_location(table_location.id, table_location.shard_info); - self.space_store - .wal_manager - .mark_delete_entries_up_to(wal_location, flushed_sequence) - .await - .context(PurgeWal { - wal_location, - sequence: flushed_sequence, - })?; - - Ok(()) - } - - /// Flush rows in sampling memtable to multiple ssts according to segment - /// duration. - /// - /// Returns flushed sequence. - async fn dump_sampling_memtable( - &self, - request_id: RequestId, - sampling_mem: &SamplingMemTable, - files_to_level0: &mut Vec, - need_reorder: bool, - ) -> Result> { - let (min_key, max_key) = match (sampling_mem.mem.min_key(), sampling_mem.mem.max_key()) { - (Some(min_key), Some(max_key)) => (min_key, max_key), - _ => { - // the memtable is empty and nothing needs flushing. - return Ok(None); - } - }; - - let max_sequence = sampling_mem.mem.last_sequence(); - let time_ranges = sampling_mem.sampler.ranges(); - - info!("Flush sampling memtable, table_id:{:?}, table_name:{:?}, request_id:{}, sampling memtable time_ranges:{:?}", - self.table_data.id, self.table_data.name, request_id, time_ranges); - - let mut batch_record_senders = Vec::with_capacity(time_ranges.len()); - let mut sst_handlers = Vec::with_capacity(time_ranges.len()); - let mut file_ids = Vec::with_capacity(time_ranges.len()); - - let sst_write_options = SstWriteOptions { - storage_format_hint: self.table_data.table_options().storage_format_hint, - num_rows_per_row_group: self.table_data.table_options().num_rows_per_row_group, - compression: self.table_data.table_options().compression, - max_buffer_size: self.write_sst_max_buffer_size, - column_stats: Default::default(), - }; - - for time_range in &time_ranges { - let (batch_record_sender, batch_record_receiver) = - channel::>(DEFAULT_CHANNEL_SIZE); - let file_id = self - .table_data - .alloc_file_id(&self.space_store.manifest) - .await - .context(AllocFileId)?; - - let sst_file_path = self.table_data.sst_file_path(file_id); - // TODO: `min_key` & `max_key` should be figured out when writing sst. - let sst_meta = MetaData { - min_key: min_key.clone(), - max_key: max_key.clone(), - time_range: *time_range, - max_sequence, - schema: self.table_data.schema(), - }; - - let store = self.space_store.clone(); - let storage_format_hint = self.table_data.table_options().storage_format_hint; - let sst_write_options = sst_write_options.clone(); - let request_id = request_id.clone(); - - // spawn build sst - let handler = self.runtime.spawn(async move { - let mut writer = store - .sst_factory - .create_writer( - &sst_write_options, - &sst_file_path, - store.store_picker(), - Level::MIN, - ) - .await - .context(CreateSstWriter { - storage_format_hint, - })?; - - let sst_info = writer - .write( - request_id, - &sst_meta, - Box::new(batch_record_receiver.map_err(|e| Box::new(e) as _)), - ) - .await - .map_err(|e| { - error!("Failed to write sst file, meta:{:?}, err:{}", sst_meta, e); - Box::new(e) as _ - }) - .with_context(|| WriteSst { - path: sst_file_path.to_string(), - })?; - - Ok((sst_info, sst_meta)) - }); - - batch_record_senders.push(batch_record_sender); - sst_handlers.push(handler); - file_ids.push(file_id); - } - - let iter = build_mem_table_iter(sampling_mem.mem.clone(), &self.table_data)?; - let timestamp_idx = self.table_data.schema().timestamp_index(); - if need_reorder { - let schema = self.table_data.schema(); - let primary_key_indexes = schema.primary_key_indexes(); - let reorder = Reorder { - iter, - schema: self.table_data.schema(), - order_by_col_indexes: primary_key_indexes.to_vec(), - }; - let mut stream = reorder.into_stream().await.context(ReorderMemIter)?; - while let Some(data) = stream.next().await { - for (idx, record_batch) in split_record_batch_with_time_ranges( - data.box_err().context(InvalidMemIter)?, - &time_ranges, - timestamp_idx, - )? - .into_iter() - .enumerate() - { - if !record_batch.is_empty() { - batch_record_senders[idx] - .send(Ok(record_batch)) - .await - .context(ChannelSend)?; - } - } - } - } else { - for data in iter { - for (idx, record_batch) in split_record_batch_with_time_ranges( - data.box_err().context(InvalidMemIter)?, - &time_ranges, - timestamp_idx, - )? - .into_iter() - .enumerate() - { - if !record_batch.is_empty() { - batch_record_senders[idx] - .send(Ok(record_batch)) - .await - .context(ChannelSend)?; - } - } - } - } - - batch_record_senders.clear(); - - for (idx, sst_handler) in sst_handlers.into_iter().enumerate() { - let info_and_metas = sst_handler.await.context(RuntimeJoin)?; - let (sst_info, sst_meta) = info_and_metas?; - files_to_level0.push(AddFile { - level: Level::MIN, - file: FileMeta { - id: file_ids[idx], - size: sst_info.file_size as u64, - row_num: sst_info.row_num as u64, - time_range: sst_info.time_range, - max_seq: sst_meta.max_sequence, - storage_format: sst_info.storage_format, - associated_files: vec![sst_info.meta_path], - }, - }) - } - - Ok(Some(max_sequence)) - } - - /// Flush rows in normal (non-sampling) memtable to at most one sst file. - async fn dump_normal_memtable( - &self, - request_id: RequestId, - memtable_state: &MemTableState, - need_reorder: bool, - ) -> Result> { - let (min_key, max_key) = match (memtable_state.mem.min_key(), memtable_state.mem.max_key()) - { - (Some(min_key), Some(max_key)) => (min_key, max_key), - _ => { - // the memtable is empty and nothing needs flushing. - return Ok(None); - } - }; - let max_sequence = memtable_state.last_sequence(); - let sst_meta = MetaData { - min_key, - max_key, - time_range: memtable_state.aligned_time_range, - max_sequence, - schema: self.table_data.schema(), - }; - - // Alloc file id for next sst file - let file_id = self - .table_data - .alloc_file_id(&self.space_store.manifest) - .await - .context(AllocFileId)?; - - let sst_file_path = self.table_data.sst_file_path(file_id); - let storage_format_hint = self.table_data.table_options().storage_format_hint; - let sst_write_options = SstWriteOptions { - storage_format_hint, - num_rows_per_row_group: self.table_data.table_options().num_rows_per_row_group, - compression: self.table_data.table_options().compression, - max_buffer_size: self.write_sst_max_buffer_size, - column_stats: Default::default(), - }; - let mut writer = self - .space_store - .sst_factory - .create_writer( - &sst_write_options, - &sst_file_path, - self.space_store.store_picker(), - Level::MIN, - ) - .await - .context(CreateSstWriter { - storage_format_hint, - })?; - - let iter = build_mem_table_iter(memtable_state.mem.clone(), &self.table_data)?; - - let record_batch_stream = if need_reorder { - let schema = self.table_data.schema(); - let primary_key_indexes = schema.primary_key_indexes().to_vec(); - let reorder = Reorder { - iter, - schema, - order_by_col_indexes: primary_key_indexes, - }; - Box::new( - reorder - .into_stream() - .await - .context(ReorderMemIter)? - .map(|batch| batch.box_err()), - ) as _ - } else { - Box::new(stream::iter(iter).map(|batch| batch.box_err())) as _ - }; - - let sst_info = writer - .write(request_id, &sst_meta, record_batch_stream) - .await - .box_err() - .with_context(|| WriteSst { - path: sst_file_path.to_string(), - })?; - - // update sst metadata by built info. - - Ok(Some(FileMeta { - id: file_id, - row_num: sst_info.row_num as u64, - size: sst_info.file_size as u64, - time_range: sst_info.time_range, - max_seq: memtable_state.last_sequence(), - storage_format: sst_info.storage_format, - associated_files: vec![sst_info.meta_path], - })) - } -} - -fn split_record_batch_with_time_ranges( - record_batch: FetchedRecordBatch, - time_ranges: &[TimeRange], - timestamp_idx: usize, -) -> Result> { - let fetched_schema = record_batch.schema(); - let primary_key_indexes = record_batch.primary_key_indexes(); - let mut builders: Vec = (0..time_ranges.len()) - .map(|_| { - let primary_key_indexes = primary_key_indexes.map(|idxs| idxs.to_vec()); - FetchedRecordBatchBuilder::new(fetched_schema.clone(), primary_key_indexes) - }) - .collect(); - - for row_idx in 0..record_batch.num_rows() { - let datum = record_batch.column(timestamp_idx).datum(row_idx); - let timestamp = datum.as_timestamp().unwrap(); - let mut idx = None; - for (i, time_range) in time_ranges.iter().enumerate() { - if time_range.contains(timestamp) { - idx = Some(i); - break; - } - } - - if let Some(idx) = idx { - let view = RowViewOnBatch { - record_batch: &record_batch, - row_idx, - }; - builders[idx] - .append_row_view(&view) - .box_err() - .context(SplitRecordBatch)?; - } else { - panic!( - "Record timestamp is not in time_ranges, timestamp:{timestamp:?}, time_ranges:{time_ranges:?}" - ); - } - } - let mut ret = Vec::with_capacity(builders.len()); - for mut builder in builders { - ret.push(builder.build().box_err().context(SplitRecordBatch)?); - } - Ok(ret) -} - -fn build_mem_table_iter( - memtable: MemTableRef, - table_data: &TableDataRef, -) -> Result { - let scan_ctx = ScanContext::default(); - let projected_schema = ProjectedSchema::no_projection(table_data.schema()); - let fetched_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); - let fetched_schema = fetched_schema.into_record_schema(); - let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, Some(primary_key_indexes)); - let scan_req = ScanRequest { - start_user_key: Bound::Unbounded, - end_user_key: Bound::Unbounded, - sequence: common_types::MAX_SEQUENCE_NUMBER, - row_projector_builder, - need_dedup: table_data.dedup(), - reverse: false, - metrics_collector: None, - time_range: TimeRange::min_to_max(), - }; - memtable - .scan(scan_ctx, scan_req) - .box_err() - .context(InvalidMemIter) -} - -#[cfg(test)] -mod tests { - - use common_types::{ - tests::{ - build_fetched_record_batch_by_rows, build_row, build_row_opt, - check_record_batch_with_key_with_rows, - }, - time::TimeRange, - }; - - use super::FrequentFlushChecker; - use crate::instance::flush_compaction::split_record_batch_with_time_ranges; - - #[test] - fn test_split_record_batch_with_time_ranges() { - let rows0 = vec![build_row( - b"binary key", - 20, - 10.0, - "string value", - 1000, - 1_000_000, - )]; - let rows1 = vec![build_row( - b"binary key1", - 120, - 11.0, - "string value 1", - 1000, - 1_000_000, - )]; - let rows2 = vec![ - build_row_opt( - b"binary key2", - 220, - None, - Some("string value 2"), - Some(1000), - None, - ), - build_row_opt(b"binary key3", 250, Some(13.0), None, None, Some(1_000_000)), - ]; - - let rows = vec![rows0.clone(), rows1.clone(), rows2.clone()] - .into_iter() - .flatten() - .collect(); - let record_batch_with_key = build_fetched_record_batch_by_rows(rows); - let column_num = record_batch_with_key.num_columns(); - let time_ranges = vec![ - TimeRange::new_unchecked_for_test(0, 100), - TimeRange::new_unchecked_for_test(100, 200), - TimeRange::new_unchecked_for_test(200, 300), - ]; - - let timestamp_idx = 1; - let rets = - split_record_batch_with_time_ranges(record_batch_with_key, &time_ranges, timestamp_idx) - .unwrap(); - - check_record_batch_with_key_with_rows(&rets[0], rows0.len(), column_num, rows0); - check_record_batch_with_key_with_rows(&rets[1], rows1.len(), column_num, rows1); - check_record_batch_with_key_with_rows(&rets[2], rows2.len(), column_num, rows2); - } - - #[test] - fn test_frequent_flush() { - let now = time_ext::current_time_millis(); - let cases = vec![ - (now - 1000, 100, false), - (now - 1000, 2000, true), - (now - 10000, 200, false), - (now - 2000, 2000, false), - (now + 2000, 1000, true), - ]; - for (last_flush_time_ms, min_flush_interval_ms, expect) in cases { - let checker = FrequentFlushChecker { - min_flush_interval_ms, - last_flush_time_ms, - }; - - assert_eq!(expect, checker.is_frequent_flush()); - } - } -} diff --git a/src/analytic_engine/src/instance/mem_collector.rs b/src/analytic_engine/src/instance/mem_collector.rs deleted file mode 100644 index 905c2f2b49..0000000000 --- a/src/analytic_engine/src/instance/mem_collector.rs +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; - -use arena::{Collector, CollectorRef}; - -pub type MemUsageCollectorRef = Arc; - -/// Space memtable memory usage collector -pub struct MemUsageCollector { - /// Memory size allocated in bytes. - bytes_allocated: AtomicUsize, - /// Memory size used in bytes. - bytes_used: AtomicUsize, - parent: Option, -} - -impl Collector for MemUsageCollector { - fn on_alloc(&self, bytes: usize) { - self.bytes_allocated.fetch_add(bytes, Ordering::Relaxed); - - if let Some(c) = &self.parent { - c.on_alloc(bytes); - } - } - - fn on_used(&self, bytes: usize) { - self.bytes_used.fetch_add(bytes, Ordering::Relaxed); - - if let Some(c) = &self.parent { - c.on_used(bytes); - } - } - - fn on_free(&self, used: usize, allocated: usize) { - self.bytes_allocated.fetch_sub(allocated, Ordering::Relaxed); - self.bytes_used.fetch_sub(used, Ordering::Relaxed); - - if let Some(c) = &self.parent { - c.on_free(used, allocated); - } - } -} - -impl Default for MemUsageCollector { - fn default() -> Self { - Self { - bytes_allocated: AtomicUsize::new(0), - bytes_used: AtomicUsize::new(0), - parent: None, - } - } -} - -impl MemUsageCollector { - pub fn with_parent(collector: CollectorRef) -> Self { - Self { - bytes_allocated: AtomicUsize::new(0), - bytes_used: AtomicUsize::new(0), - parent: Some(collector), - } - } - - #[inline] - pub fn total_memory_allocated(&self) -> usize { - self.bytes_allocated.load(Ordering::Relaxed) - } -} - -#[cfg(test)] -mod tests { - use std::sync::{atomic::Ordering, Arc}; - - use super::*; - #[test] - fn test_collector() { - let collector = MemUsageCollector::default(); - - collector.on_alloc(1024); - collector.on_used(128); - assert_eq!(1024, collector.total_memory_allocated()); - assert_eq!(128, collector.bytes_used.load(Ordering::Relaxed)); - - collector.on_free(64, 512); - assert_eq!(512, collector.total_memory_allocated()); - assert_eq!(64, collector.bytes_used.load(Ordering::Relaxed)); - collector.on_free(64, 512); - assert_eq!(0, collector.total_memory_allocated()); - assert_eq!(0, collector.bytes_used.load(Ordering::Relaxed)); - } - - #[test] - fn test_collector_with_parent() { - let p = Arc::new(MemUsageCollector::default()); - let c1 = MemUsageCollector::with_parent(p.clone()); - let c2 = MemUsageCollector::with_parent(p.clone()); - - c1.on_alloc(1024); - c1.on_used(128); - c2.on_alloc(1024); - c2.on_used(128); - assert_eq!(1024, c1.total_memory_allocated()); - assert_eq!(128, c1.bytes_used.load(Ordering::Relaxed)); - assert_eq!(1024, c2.total_memory_allocated()); - assert_eq!(128, c2.bytes_used.load(Ordering::Relaxed)); - assert_eq!(2048, p.total_memory_allocated()); - assert_eq!(256, p.bytes_used.load(Ordering::Relaxed)); - - c1.on_free(64, 512); - assert_eq!(512, c1.total_memory_allocated()); - assert_eq!(64, c1.bytes_used.load(Ordering::Relaxed)); - assert_eq!(1536, p.total_memory_allocated()); - assert_eq!(192, p.bytes_used.load(Ordering::Relaxed)); - c2.on_free(64, 512); - assert_eq!(512, c2.total_memory_allocated()); - assert_eq!(64, c2.bytes_used.load(Ordering::Relaxed)); - assert_eq!(1024, p.total_memory_allocated()); - assert_eq!(128, p.bytes_used.load(Ordering::Relaxed)); - } -} diff --git a/src/analytic_engine/src/instance/mod.rs b/src/analytic_engine/src/instance/mod.rs deleted file mode 100644 index 56b91763a1..0000000000 --- a/src/analytic_engine/src/instance/mod.rs +++ /dev/null @@ -1,401 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! A table engine instance -//! -//! The root mod only contains common functions of instance, other logics are -//! divided into the sub crates - -pub(crate) mod alter; -mod close; -mod create; -mod drop; -pub mod engine; -pub mod flush_compaction; -pub(crate) mod mem_collector; -pub mod open; -mod read; -mod reorder_memtable; -pub(crate) mod serial_executor; -pub mod wal_replayer; -pub(crate) mod write; - -use std::sync::Arc; - -use common_types::{projected_schema::RowProjectorBuilder, table::TableId}; -use generic_error::{BoxError, GenericError}; -use logger::{error, info}; -use macros::define_result; -use mem_collector::MemUsageCollector; -use runtime::{PriorityRuntime, Runtime}; -use snafu::{ResultExt, Snafu}; -use table_engine::{engine::EngineRuntimes, predicate::PredicateRef, table::FlushRequest}; -use time_ext::ReadableDuration; -use tokio::sync::oneshot::{self, error::RecvError}; -use wal::manager::{WalLocation, WalManagerRef}; - -use self::flush_compaction::{Flusher, TableFlushOptions}; -use crate::{ - compaction::{scheduler::CompactionSchedulerRef, TableCompactionRequest}, - manifest::ManifestRef, - row_iter::IterOptions, - space::{SpaceId, SpaceRef, SpacesRef}, - sst::{ - factory::{ - FactoryRef as SstFactoryRef, ObjectStorePickerRef, ReadFrequency, ScanOptions, - SstReadOptions, - }, - file::FilePurgerRef, - meta_data::cache::MetaCacheRef, - metrics::MaybeTableLevelMetrics, - }, - table::data::{TableDataRef, TableShardInfo}, - RecoverMode, TableOptions, WalEncodeConfig, -}; - -#[allow(clippy::enum_variant_names)] -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to stop file purger, err:{}", source))] - StopFilePurger { source: crate::sst::file::Error }, - - #[snafu(display("Failed to stop compaction scheduler, err:{}", source))] - StopScheduler { - source: crate::compaction::scheduler::Error, - }, - - #[snafu(display("Failed to {} table manually, table:{}, err:{}", op, table, source))] - ManualOp { - op: String, - table: String, - source: GenericError, - }, - - #[snafu(display("Failed to receive {} result, table:{}, err:{}", op, table, source))] - RecvManualOpResult { - op: String, - table: String, - source: RecvError, - }, -} - -define_result!(Error); - -// TODO: `SpaceStore` seems not a good name -pub struct SpaceStore { - /// All spaces of the engine. - spaces: SpacesRef, - /// Manifest (or meta) stores meta data of the engine instance. - pub(crate) manifest: ManifestRef, - /// Wal of all tables - wal_manager: WalManagerRef, - /// Object store picker for persisting data. - store_picker: ObjectStorePickerRef, - /// Sst factory. - sst_factory: SstFactoryRef, -} - -pub type SpaceStoreRef = Arc; - -impl Drop for SpaceStore { - fn drop(&mut self) { - info!("SpaceStore dropped"); - } -} - -impl SpaceStore { - async fn close(&self) -> Result<()> { - // TODO: close all background jobs. - Ok(()) - } -} - -impl SpaceStore { - fn store_picker(&self) -> &ObjectStorePickerRef { - &self.store_picker - } - - /// List all tables of all spaces - pub fn list_all_tables(&self, tables: &mut Vec) { - let spaces = self.spaces.read().unwrap(); - spaces.list_all_tables(tables); - } - - /// Find the space which it's all memtables consumes maximum memory. - #[inline] - fn find_maximum_memory_usage_space(&self) -> Option { - let spaces = self.spaces.read().unwrap().list_all_spaces(); - spaces.into_iter().max_by_key(|t| t.memtable_memory_usage()) - } - - /// The memory space used by all tables in the space. - #[inline] - fn total_memory_usage_space(&self) -> usize { - let spaces = self.spaces.read().unwrap().list_all_spaces(); - spaces.into_iter().map(|t| t.memtable_memory_usage()).sum() - } -} - -/// Table engine instance -/// -/// Manages all spaces, also contains needed resources shared across all table -pub struct Instance { - /// Space storage - space_store: SpaceStoreRef, - /// Runtime to execute async tasks. - runtimes: Arc, - /// Global table options, overwrite mutable options in each table's - /// TableOptions. - table_opts: TableOptions, - - // End of write group options. - file_purger: FilePurgerRef, - compaction_scheduler: CompactionSchedulerRef, - - meta_cache: Option, - /// Engine memtable memory usage collector - mem_usage_collector: Arc, - pub(crate) max_rows_in_write_queue: usize, - /// Engine write buffer size - pub(crate) db_write_buffer_size: usize, - /// Space write buffer size - pub(crate) space_write_buffer_size: usize, - /// Replay wal batch size - pub(crate) replay_batch_size: usize, - /// Write sst max buffer size - pub(crate) write_sst_max_buffer_size: usize, - /// The min interval between flushes - pub(crate) min_flush_interval: ReadableDuration, - /// Max retry limit to flush memtables - pub(crate) max_retry_flush_limit: usize, - /// Max bytes per write batch - pub(crate) max_bytes_per_write_batch: Option, - /// The interval for sampling the mem size - pub(crate) mem_usage_sampling_interval: ReadableDuration, - /// Options for scanning sst - pub(crate) scan_options: ScanOptions, - pub(crate) iter_options: Option, - pub(crate) recover_mode: RecoverMode, - pub(crate) wal_encode: WalEncodeConfig, - pub(crate) disable_wal: bool, -} - -impl Instance { - /// Close the instance gracefully. - pub async fn close(&self) -> Result<()> { - self.file_purger.stop().await.context(StopFilePurger)?; - - self.space_store.close().await?; - - self.compaction_scheduler - .stop_scheduler() - .await - .context(StopScheduler) - } - - pub async fn manual_flush_table( - &self, - table_data: &TableDataRef, - request: FlushRequest, - ) -> Result<()> { - let mut rx_opt = None; - - let flush_opts = TableFlushOptions { - res_sender: if request.sync { - let (tx, rx) = oneshot::channel(); - rx_opt = Some(rx); - Some(tx) - } else { - None - }, - max_retry_flush_limit: 0, - }; - - let flusher = self.make_flusher(); - let mut serial_exec = table_data.serial_exec.lock().await; - let flush_scheduler = serial_exec.flush_scheduler(); - flusher - .schedule_flush(flush_scheduler, table_data, flush_opts) - .await - .box_err() - .context(ManualOp { - op: "flush", - table: &table_data.name, - })?; - - if let Some(rx) = rx_opt { - rx.await - .context(RecvManualOpResult { - op: "flush", - table: &table_data.name, - })? - .box_err() - .context(ManualOp { - op: "flush", - table: &table_data.name, - })?; - } - Ok(()) - } - - // This method will wait until compaction finished. - pub async fn manual_compact_table(&self, table_data: &TableDataRef) -> Result<()> { - let (request, rx) = TableCompactionRequest::new(table_data.clone()); - let succeed = self - .compaction_scheduler - .schedule_table_compaction(request) - .await; - if !succeed { - error!("Failed to schedule compaction, table:{}", table_data.name); - } - - rx.await - .context(RecvManualOpResult { - op: "compact", - table: &table_data.name, - })? - .box_err() - .context(ManualOp { - op: "compact", - table: &table_data.name, - }) - } -} - -// TODO(yingwen): Instance builder -impl Instance { - /// Find space using read lock - fn get_space_by_read_lock(&self, space: SpaceId) -> Option { - let spaces = self.space_store.spaces.read().unwrap(); - spaces.get_by_id(space).cloned() - } - - /// Returns true when engine instance's total memtable memory usage reaches - /// db_write_buffer_size limit. - #[inline] - fn should_flush_instance(&self) -> bool { - self.db_write_buffer_size > 0 - && self.space_store.total_memory_usage_space() >= self.db_write_buffer_size - } - - #[inline] - fn read_runtime(&self) -> &PriorityRuntime { - &self.runtimes.read_runtime - } - - #[inline] - pub fn write_runtime(&self) -> &Arc { - &self.runtimes.write_runtime - } - - #[inline] - fn make_flusher(&self) -> Flusher { - Flusher { - space_store: self.space_store.clone(), - // Do flush in write runtime - runtime: self.runtimes.write_runtime.clone(), - write_sst_max_buffer_size: self.write_sst_max_buffer_size, - min_flush_interval_ms: None, - } - } - - #[inline] - fn make_flusher_with_min_interval(&self) -> Flusher { - Flusher { - space_store: self.space_store.clone(), - // Do flush in write runtime - runtime: self.runtimes.write_runtime.clone(), - write_sst_max_buffer_size: self.write_sst_max_buffer_size, - min_flush_interval_ms: Some(self.min_flush_interval.as_millis()), - } - } - - #[inline] - fn max_retry_flush_limit(&self) -> usize { - self.max_retry_flush_limit - } -} - -#[derive(Debug, Clone)] -pub struct SstReadOptionsBuilder { - scan_type: ScanType, - scan_options: ScanOptions, - maybe_table_level_metrics: Option>, - num_rows_per_row_group: usize, - predicate: PredicateRef, - meta_cache: Option, - runtime: Arc, -} - -impl SstReadOptionsBuilder { - pub fn new( - scan_type: ScanType, - scan_options: ScanOptions, - maybe_table_level_metrics: Option>, - num_rows_per_row_group: usize, - predicate: PredicateRef, - meta_cache: Option, - runtime: Arc, - ) -> Self { - Self { - scan_type, - scan_options, - maybe_table_level_metrics, - num_rows_per_row_group, - predicate, - meta_cache, - runtime, - } - } - - pub fn build(self, row_projector_builder: RowProjectorBuilder) -> SstReadOptions { - SstReadOptions { - maybe_table_level_metrics: self.maybe_table_level_metrics.clone(), - num_rows_per_row_group: self.num_rows_per_row_group, - frequency: self.scan_type.into(), - row_projector_builder, - predicate: self.predicate, - meta_cache: self.meta_cache, - scan_options: self.scan_options, - runtime: self.runtime, - } - } -} - -/// Scan type which mapped to the low level `ReadFrequency` in sst reader. -#[derive(Debug, Clone, Copy)] -pub enum ScanType { - Query, - Compaction, -} - -impl From for ReadFrequency { - fn from(value: ScanType) -> Self { - match value { - ScanType::Query => ReadFrequency::Frequent, - ScanType::Compaction => ReadFrequency::Once, - } - } -} - -/// Instance reference -pub type InstanceRef = Arc; - -#[inline] -pub(crate) fn create_wal_location(table_id: TableId, shard_info: TableShardInfo) -> WalLocation { - WalLocation::new(shard_info.shard_id as u64, table_id) -} diff --git a/src/analytic_engine/src/instance/open.rs b/src/analytic_engine/src/instance/open.rs deleted file mode 100644 index 97717c5ab0..0000000000 --- a/src/analytic_engine/src/instance/open.rs +++ /dev/null @@ -1,558 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Open logic of instance - -use std::{ - collections::HashMap, - sync::{Arc, RwLock}, -}; - -use common_types::table::ShardId; -use logger::{error, info}; -use meta_client::MetaClientRef; -use object_store::ObjectStoreRef; -use snafu::{OptionExt, ResultExt}; -use table_engine::{engine::TableDef, table::TableId}; -use wal::manager::WalManagerRef; - -use crate::{ - compaction::{ - runner::{ - local_runner::LocalCompactionRunner, - node_picker::{ - LocalCompactionNodePickerImpl, NodePicker, RemoteCompactionNodePickerImpl, - }, - remote_runner::RemoteCompactionRunner, - CompactionRunnerPtr, CompactionRunnerRef, - }, - scheduler::SchedulerImpl, - }, - context::OpenContext, - engine, - instance::{ - engine::{MetaClientNotExist, OpenManifest, OpenTablesOfShard, ReadMetaUpdate, Result}, - flush_compaction::Flusher, - mem_collector::MemUsageCollector, - wal_replayer::{ReplayMode, WalReplayer}, - Instance, InstanceRef, SpaceStore, - }, - manifest::{details::ManifestImpl, LoadRequest, Manifest, ManifestRef}, - row_iter::IterOptions, - space::{SpaceAndTable, SpaceRef, Spaces}, - sst::{ - factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef, ScanOptions}, - file::FilePurger, - }, - table::data::{TableCatalogInfo, TableDataRef}, - table_meta_set_impl::TableMetaSetImpl, - CompactionMode, RecoverMode, -}; - -pub(crate) struct InstanceContext { - pub instance: InstanceRef, - // TODO: unused now, will be used in remote compaction. - pub local_compaction_runner: Option, -} - -impl InstanceContext { - pub async fn new( - ctx: OpenContext, - manifest_storages: ManifestStorages, - wal_manager: WalManagerRef, - store_picker: ObjectStorePickerRef, - sst_factory: SstFactoryRef, - meta_client: Option, - ) -> Result { - info!( - "Construct compaction runner with compaction_mode:{:?}", - ctx.config.compaction_mode - ); - - let local_compaction_runner = LocalCompactionRunner::new( - ctx.runtimes.compact_runtime.clone(), - &ctx.config, - sst_factory.clone(), - store_picker.clone(), - ctx.meta_cache.clone(), - ); - - let compaction_runner: CompactionRunnerPtr = match &ctx.config.compaction_mode { - CompactionMode::Offload(NodePicker::Local(endpoint)) => { - Box::new(RemoteCompactionRunner { - node_picker: Arc::new(LocalCompactionNodePickerImpl { - endpoint: endpoint.clone(), - }), - // This field is set to false here for testing. - fallback_local_when_failed: false, - local_compaction_runner: local_compaction_runner.clone(), - }) - } - CompactionMode::Offload(NodePicker::Remote) => Box::new(RemoteCompactionRunner { - node_picker: Arc::new(RemoteCompactionNodePickerImpl { - meta_client: meta_client.context(MetaClientNotExist)?, - }), - fallback_local_when_failed: true, - local_compaction_runner: local_compaction_runner.clone(), - }), - - CompactionMode::Local => Box::new(LocalCompactionRunner::new( - ctx.runtimes.compact_runtime.clone(), - &ctx.config, - sst_factory.clone(), - store_picker.clone(), - ctx.meta_cache.clone(), - )), - }; - - let instance = Instance::open( - ctx, - manifest_storages, - wal_manager, - store_picker, - sst_factory, - compaction_runner, - ) - .await?; - - Ok(Self { - instance, - local_compaction_runner: Some(Arc::new(local_compaction_runner)), - }) - } -} - -pub(crate) struct ManifestStorages { - pub wal_manager: WalManagerRef, - pub oss_storage: ObjectStoreRef, -} - -impl Instance { - /// Open a new instance - pub(crate) async fn open( - ctx: OpenContext, - manifest_storages: ManifestStorages, - wal_manager: WalManagerRef, - store_picker: ObjectStorePickerRef, - sst_factory: SstFactoryRef, - compaction_runner: CompactionRunnerPtr, - ) -> Result> { - let spaces: Arc> = Arc::new(RwLock::new(Spaces::default())); - let default_runtime = ctx.runtimes.default_runtime.clone(); - let file_purger = Arc::new(FilePurger::start( - &default_runtime, - store_picker.default_store().clone(), - )); - - let table_meta_set_impl = Arc::new(TableMetaSetImpl { - spaces: spaces.clone(), - file_purger: file_purger.clone(), - preflush_write_buffer_size_ratio: ctx.config.preflush_write_buffer_size_ratio, - manifest_snapshot_every_n_updates: ctx.config.manifest.snapshot_every_n_updates, - enable_primary_key_sampling: ctx.config.enable_primary_key_sampling, - try_compat_old_layered_memtable_opts: ctx.config.try_compat_old_layered_memtable_opts, - metrics_opt: ctx.config.metrics.clone(), - }); - let manifest = ManifestImpl::open( - ctx.config.manifest.clone(), - manifest_storages.wal_manager, - manifest_storages.oss_storage, - table_meta_set_impl, - ) - .await - .context(OpenManifest)?; - - let space_store = Arc::new(SpaceStore { - spaces, - manifest: Arc::new(manifest), - wal_manager: wal_manager.clone(), - store_picker: store_picker.clone(), - sst_factory, - }); - - let scheduler_config = ctx.config.compaction.clone(); - let compaction_runtime = ctx.runtimes.compact_runtime.clone(); - let compaction_scheduler = Arc::new(SchedulerImpl::new( - space_store.clone(), - compaction_runner, - compaction_runtime, - scheduler_config, - ctx.config.write_sst_max_buffer_size.as_byte() as usize, - ctx.config.min_flush_interval.as_millis(), - )); - - let scan_options = ScanOptions { - background_read_parallelism: ctx.config.sst_background_read_parallelism, - max_record_batches_in_flight: ctx.config.scan_max_record_batches_in_flight, - num_streams_to_prefetch: ctx.config.num_streams_to_prefetch, - }; - - let iter_options = ctx - .config - .scan_batch_size - .map(|batch_size| IterOptions { batch_size }); - let instance = Arc::new(Instance { - space_store, - runtimes: ctx.runtimes.clone(), - table_opts: ctx.config.table_opts.clone(), - - compaction_scheduler, - file_purger, - meta_cache: ctx.meta_cache.clone(), - mem_usage_collector: Arc::new(MemUsageCollector::default()), - max_rows_in_write_queue: ctx.config.max_rows_in_write_queue, - db_write_buffer_size: ctx.config.db_write_buffer_size, - space_write_buffer_size: ctx.config.space_write_buffer_size, - replay_batch_size: ctx.config.replay_batch_size, - write_sst_max_buffer_size: ctx.config.write_sst_max_buffer_size.as_byte() as usize, - min_flush_interval: ctx.config.min_flush_interval, - max_retry_flush_limit: ctx.config.max_retry_flush_limit, - mem_usage_sampling_interval: ctx.config.mem_usage_sampling_interval, - max_bytes_per_write_batch: ctx - .config - .max_bytes_per_write_batch - .map(|v| v.as_byte() as usize), - iter_options, - scan_options, - recover_mode: ctx.config.recover_mode, - wal_encode: ctx.config.wal_encode, - disable_wal: ctx.config.wal.disable_data, - }); - - Ok(instance) - } - - /// Open the table. - pub async fn do_open_tables_of_shard( - self: &Arc, - context: TablesOfShardContext, - ) -> Result { - let mut shard_opener = ShardOpener::init( - context, - self.space_store.manifest.clone(), - self.space_store.wal_manager.clone(), - self.replay_batch_size, - self.make_flusher(), - self.max_retry_flush_limit, - self.recover_mode, - )?; - - shard_opener.open().await - } -} - -#[derive(Debug, Clone)] -pub struct TablesOfShardContext { - /// Shard id - pub shard_id: ShardId, - /// Table infos - pub table_ctxs: Vec, -} - -#[derive(Clone, Debug)] -pub struct TableContext { - pub table_def: TableDef, - pub space: SpaceRef, -} - -#[derive(Debug)] -enum TableOpenStage { - RecoverTableMeta(RecoverTableMetaContext), - RecoverTableData(RecoverTableDataContext), - Failed(crate::instance::engine::Error), - Success(Option), -} - -#[derive(Debug)] -struct RecoverTableMetaContext { - table_def: TableDef, - space: SpaceRef, -} - -#[derive(Debug)] -struct RecoverTableDataContext { - table_data: TableDataRef, - space: SpaceRef, -} - -pub type OpenTablesOfShardResult = HashMap>>; - -/// Opener for tables of the same shard -struct ShardOpener { - shard_id: ShardId, - manifest: ManifestRef, - wal_manager: WalManagerRef, - stages: HashMap, - wal_replay_batch_size: usize, - flusher: Flusher, - max_retry_flush_limit: usize, - recover_mode: RecoverMode, -} - -impl ShardOpener { - fn init( - shard_context: TablesOfShardContext, - manifest: ManifestRef, - wal_manager: WalManagerRef, - wal_replay_batch_size: usize, - flusher: Flusher, - max_retry_flush_limit: usize, - recover_mode: RecoverMode, - ) -> Result { - let mut stages = HashMap::with_capacity(shard_context.table_ctxs.len()); - for table_ctx in shard_context.table_ctxs { - let space = &table_ctx.space; - let table_id = table_ctx.table_def.id; - let state = if let Some(table_data) = space.find_table_by_id(table_id) { - // Table is possible to have been opened, we just mark it ready and ignore in - // recovery. - TableOpenStage::Success(Some(SpaceAndTable::new(space.clone(), table_data))) - } else { - TableOpenStage::RecoverTableMeta(RecoverTableMetaContext { - table_def: table_ctx.table_def, - space: table_ctx.space, - }) - }; - stages.insert(table_id, state); - } - - Ok(Self { - shard_id: shard_context.shard_id, - manifest, - wal_manager, - stages, - wal_replay_batch_size, - flusher, - max_retry_flush_limit, - recover_mode, - }) - } - - async fn open(&mut self) -> Result { - // Recover tables' metadata. - self.recover_table_metas().await?; - - // Recover table' data. - self.recover_table_datas().await?; - - // Retrieve the table results and return. - let stages = std::mem::take(&mut self.stages); - let mut table_results = HashMap::with_capacity(stages.len()); - for (table_id, state) in stages { - match state { - TableOpenStage::Failed(e) => { - table_results.insert(table_id, Err(e)); - } - TableOpenStage::Success(data) => { - table_results.insert(table_id, Ok(data)); - } - TableOpenStage::RecoverTableMeta(_) | TableOpenStage::RecoverTableData(_) => { - return OpenTablesOfShard { - msg: format!( - "unexpected table state, state:{state:?}, table_id:{table_id}", - ), - } - .fail() - } - } - } - - Ok(table_results) - } - - /// Recover table meta data from manifest based on shard. - async fn recover_table_metas(&mut self) -> Result<()> { - let shard_id = self.shard_id; - let table_num = self.stages.len(); - info!("ShardOpener recover table metas begin, shard_id:{shard_id}, table_num:{table_num}"); - - for (table_id, state) in self.stages.iter_mut() { - match state { - // Only do the meta recovery work in `RecoverTableMeta` state. - TableOpenStage::RecoverTableMeta(RecoverTableMetaContext { table_def, space }) => { - match Self::recover_single_table_meta( - self.manifest.as_ref(), - shard_id, - table_def, - ) - .await - .map(|_| space.find_table_by_id(*table_id)) - { - Ok(Some(table_data)) => { - *state = TableOpenStage::RecoverTableData(RecoverTableDataContext { - table_data, - space: space.clone(), - }); - } - Ok(None) => { - error!("ShardOpener tried to open a dropped table, table:{table_def:?}, shard_id:{shard_id}"); - // TODO: is this an error? - *state = TableOpenStage::Success(None); - } - Err(e) => { - error!("ShardOpener recover single table meta failed, table:{table_def:?}, shard_id:{shard_id}, err:{e}"); - *state = TableOpenStage::Failed(e) - } - }; - } - // Table was found to be opened in init stage. - TableOpenStage::Success(_) => {} - TableOpenStage::RecoverTableData(_) | TableOpenStage::Failed(_) => { - return OpenTablesOfShard { - msg: format!("unexpected table state:{state:?}"), - } - .fail(); - } - } - } - - info!("ShardOpener recover table metas finish, shard_id:{shard_id}, table_num:{table_num}",); - Ok(()) - } - - /// Recover table data based on shard. - async fn recover_table_datas(&mut self) -> Result<()> { - info!( - "ShardOpener recover table datas begin, shard_id:{}", - self.shard_id - ); - - // Replay wal logs of tables. - let mut replay_table_datas = Vec::with_capacity(self.stages.len()); - for (table_id, stage) in self.stages.iter_mut() { - match stage { - // Only do the wal recovery work in `RecoverTableData` state. - TableOpenStage::RecoverTableData(ctx) => { - replay_table_datas.push(ctx.table_data.clone()); - } - // Table was found opened, or failed in meta recovery stage. - TableOpenStage::Failed(_) | TableOpenStage::Success(_) => {} - TableOpenStage::RecoverTableMeta(_) => { - return OpenTablesOfShard { - msg: format!( - "unexpected stage, stage:{stage:?}, table_id:{table_id}, shard_id:{}", - self.shard_id - ), - } - .fail(); - } - } - } - - if replay_table_datas.is_empty() { - info!( - "ShardOpener recover empty table datas finish, shard_id:{}", - self.shard_id - ); - - return Ok(()); - } - - let replay_mode = match self.recover_mode { - RecoverMode::TableBased => ReplayMode::TableBased, - RecoverMode::ShardBased => ReplayMode::RegionBased, - }; - let mut wal_replayer = WalReplayer::new( - &replay_table_datas, - self.shard_id, - self.wal_manager.clone(), - self.wal_replay_batch_size, - self.flusher.clone(), - self.max_retry_flush_limit, - replay_mode, - ); - let mut table_results = wal_replayer.replay().await?; - - // Process the replay results. - for table_data in replay_table_datas { - let table_id = table_data.id; - // Each `table_data` has its related `stage` in `stages`, impossible to panic - // here. - let stage = self.stages.get_mut(&table_id).unwrap(); - let failed_table_opt = table_results.remove(&table_id); - - match (&stage, failed_table_opt) { - (TableOpenStage::RecoverTableData(ctx), None) => { - let space_table = SpaceAndTable::new(ctx.space.clone(), ctx.table_data.clone()); - *stage = TableOpenStage::Success(Some(space_table)); - } - - (TableOpenStage::RecoverTableData(_), Some(e)) => { - error!("ShardOpener replay wals of single table failed, table:{}, table_id:{}, shard_id:{}, err:{e}", table_data.name, table_data.id, self.shard_id); - *stage = TableOpenStage::Failed(e); - } - - (other_stage, _) => { - return OpenTablesOfShard { - msg: format!("unexpected stage, stage:{other_stage:?}, table_id:{table_id}, shard_id:{}", self.shard_id), - }.fail(); - } - } - } - - info!( - "ShardOpener recover table datas finish, shard_id:{}", - self.shard_id - ); - Ok(()) - } - - /// Recover meta data from manifest. - /// - /// Return None if no meta data is found for the table. - async fn recover_single_table_meta( - manifest: &dyn Manifest, - shard_id: ShardId, - table_def: &TableDef, - ) -> Result<()> { - info!( - "Instance recover table meta begin, table_id:{}, table_name:{}, shard_id:{shard_id}", - table_def.id, table_def.name - ); - - // Load manifest, also create a new snapshot at startup. - let TableDef { - catalog_name, - schema_name, - schema_id, - id, - name: _, - } = table_def.clone(); - - let space_id = engine::build_space_id(schema_id); - let load_req = LoadRequest { - space_id, - table_id: id, - shard_id, - table_catalog_info: TableCatalogInfo { - schema_id, - schema_name, - catalog_name, - }, - }; - manifest.recover(&load_req).await.context(ReadMetaUpdate { - table_id: table_def.id, - })?; - - info!( - "Instance recover table meta end, table_id:{}, table_name:{}, shard_id:{shard_id}", - table_def.id, table_def.name - ); - - Ok(()) - } -} diff --git a/src/analytic_engine/src/instance/read.rs b/src/analytic_engine/src/instance/read.rs deleted file mode 100644 index 01065ea3bf..0000000000 --- a/src/analytic_engine/src/instance/read.rs +++ /dev/null @@ -1,453 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Read logic of instance - -use std::{ - collections::BTreeMap, - pin::Pin, - task::{Context, Poll}, -}; - -use async_stream::try_stream; -use common_types::{ - projected_schema::ProjectedSchema, - record_batch::{FetchedRecordBatch, RecordBatch}, - schema::RecordSchema, - time::TimeRange, -}; -use futures::stream::Stream; -use generic_error::BoxError; -use logger::debug; -use macros::define_result; -use snafu::{ResultExt, Snafu}; -use table_engine::{ - stream::{ - self, ErrWithSource, PartitionedStreams, RecordBatchStream, SendableRecordBatchStream, - }, - table::ReadRequest, -}; -use time_ext::current_time_millis; -use trace_metric::Metric; - -use crate::{ - instance::{Instance, ScanType, SstReadOptionsBuilder}, - row_iter::{ - chain, - chain::{ChainConfig, ChainIterator}, - dedup::DedupIterator, - merge::{MergeBuilder, MergeConfig, MergeIterator}, - FetchedRecordBatchIterator, IterOptions, - }, - table::{ - data::TableData, - version::{ReadView, TableVersion}, - }, - table_options::TableOptions, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to scan memtable, table:{}, err:{}", table, source))] - ScanMemTable { - table: String, - source: crate::memtable::Error, - }, - - #[snafu(display("Failed to build merge iterator, table:{}, err:{}", table, source))] - BuildMergeIterator { - table: String, - source: crate::row_iter::merge::Error, - }, - - #[snafu(display("Failed to build chain iterator, table:{}, err:{}", table, source))] - BuildChainIterator { - table: String, - source: crate::row_iter::chain::Error, - }, -} - -define_result!(Error); - -const MERGE_SORT_METRIC_NAME: &str = "do_merge_sort"; -const ITER_NUM_METRIC_NAME: &str = "iter_num"; -const MERGE_ITER_METRICS_COLLECTOR_NAME_PREFIX: &str = "merge_iter"; -const CHAIN_ITER_METRICS_COLLECTOR_NAME_PREFIX: &str = "chain_iter"; - -impl Instance { - /// Read data in multiple time range from table, and return - /// `read_parallelism` output streams. - pub async fn partitioned_read_from_table( - &self, - table_data: &TableData, - request: ReadRequest, - ) -> Result { - debug!( - "Instance read from table, space_id:{}, table:{}, table_id:{:?}, request:{:?}", - table_data.space_id, table_data.name, table_data.id, request - ); - - // Stats query time range information of table. - let time_range = request.predicate.time_range(); - let start_time = time_range.inclusive_start().as_i64(); - let end_time = time_range.exclusive_end().as_i64(); - let now = current_time_millis() as i64; - - let query_time_range = (end_time as f64 - start_time as f64) / 1000.0; - let table_metrics = table_data.metrics.maybe_table_level_metrics(); - table_metrics.query_time_range.observe(query_time_range); - let since_start = (now as f64 - start_time as f64) / 1000.0; - table_metrics - .duration_since_query_query_start_time - .observe(since_start); - - // Collect trace metrics. - let table_options = table_data.table_options(); - table_data.metrics.on_read_request_begin(); - let need_merge_sort = table_options.need_dedup(); - request.metrics_collector.collect(Metric::boolean( - MERGE_SORT_METRIC_NAME.to_string(), - need_merge_sort, - None, - )); - - let runtime = self - .read_runtime() - .choose_runtime(&request.priority) - .clone(); - let sst_read_options_builder = SstReadOptionsBuilder::new( - ScanType::Query, - self.scan_options.clone(), - Some(table_metrics.sst_metrics.clone()), - table_options.num_rows_per_row_group, - request.predicate.clone(), - self.meta_cache.clone(), - runtime, - ); - - if need_merge_sort { - let merge_iters = self - .build_merge_iters( - table_data, - &request, - &table_options, - sst_read_options_builder, - ) - .await?; - self.build_partitioned_streams(&request, merge_iters) - } else { - let chain_iters = self - .build_chain_iters( - table_data, - &request, - &table_options, - sst_read_options_builder, - ) - .await?; - self.build_partitioned_streams(&request, chain_iters) - } - } - - fn build_partitioned_streams( - &self, - request: &ReadRequest, - partitioned_iters: Vec, - ) -> Result { - let read_parallelism = request.opts.read_parallelism; - - // Split iterators into `read_parallelism` groups. - let mut splitted_iters: Vec<_> = std::iter::repeat_with(Vec::new) - .take(read_parallelism) - .collect(); - - for (i, time_aligned_iter) in partitioned_iters.into_iter().enumerate() { - splitted_iters[i % read_parallelism].push(time_aligned_iter); - } - - let mut streams = Vec::with_capacity(read_parallelism); - for iters in splitted_iters { - let stream = iters_to_stream(iters, request.projected_schema.clone()); - streams.push(stream); - } - - assert_eq!(read_parallelism, streams.len()); - - Ok(PartitionedStreams { streams }) - } - - async fn build_merge_iters( - &self, - table_data: &TableData, - request: &ReadRequest, - table_options: &TableOptions, - sst_read_options_builder: SstReadOptionsBuilder, - ) -> Result>> { - // Current visible sequence - let sequence = table_data.last_sequence(); - let time_range = request.predicate.time_range(); - let version = table_data.current_version(); - let read_views = self.partition_ssts_and_memtables(time_range, version, table_options); - let iter_options = self.make_iter_options(table_options.num_rows_per_row_group); - - let mut iters = Vec::with_capacity(read_views.len()); - for (idx, read_view) in read_views.into_iter().enumerate() { - let metrics_collector = request - .metrics_collector - .span(format!("{MERGE_ITER_METRICS_COLLECTOR_NAME_PREFIX}_{idx}")); - let merge_config = MergeConfig { - request_id: request.request_id.clone(), - metrics_collector: Some(metrics_collector), - deadline: request.opts.deadline, - space_id: table_data.space_id, - table_id: table_data.id, - sequence, - projected_schema: request.projected_schema.clone(), - predicate: request.predicate.clone(), - sst_factory: &self.space_store.sst_factory, - sst_read_options_builder: sst_read_options_builder.clone(), - store_picker: self.space_store.store_picker(), - merge_iter_options: iter_options.clone(), - need_dedup: table_options.need_dedup(), - reverse: false, - }; - - let merge_iter = MergeBuilder::new(merge_config) - .sampling_mem(read_view.sampling_mem) - .memtables(read_view.memtables) - .ssts_of_level(read_view.leveled_ssts) - .build() - .await - .context(BuildMergeIterator { - table: &table_data.name, - })?; - let dedup_iter = - DedupIterator::new(request.request_id.clone(), merge_iter, iter_options.clone()); - - iters.push(dedup_iter); - } - - request.metrics_collector.collect(Metric::number( - ITER_NUM_METRIC_NAME.to_string(), - iters.len(), - None, - )); - - Ok(iters) - } - - async fn build_chain_iters( - &self, - table_data: &TableData, - request: &ReadRequest, - table_options: &TableOptions, - sst_read_options_builder: SstReadOptionsBuilder, - ) -> Result> { - let projected_schema = request.projected_schema.clone(); - - let time_range = request.predicate.time_range(); - let version = table_data.current_version(); - let read_views = self.partition_ssts_and_memtables(time_range, version, table_options); - - let mut iters = Vec::with_capacity(read_views.len()); - for (idx, read_view) in read_views.into_iter().enumerate() { - let metrics_collector = request - .metrics_collector - .span(format!("{CHAIN_ITER_METRICS_COLLECTOR_NAME_PREFIX}_{idx}")); - let chain_config = ChainConfig { - request_id: request.request_id.clone(), - metrics_collector: Some(metrics_collector), - deadline: request.opts.deadline, - num_streams_to_prefetch: self.scan_options.num_streams_to_prefetch, - space_id: table_data.space_id, - table_id: table_data.id, - projected_schema: projected_schema.clone(), - predicate: request.predicate.clone(), - sst_read_options_builder: sst_read_options_builder.clone(), - sst_factory: &self.space_store.sst_factory, - store_picker: self.space_store.store_picker(), - }; - let builder = chain::Builder::new(chain_config); - let chain_iter = builder - .sampling_mem(read_view.sampling_mem) - .memtables(read_view.memtables) - .ssts(read_view.leveled_ssts) - .build() - .await - .context(BuildChainIterator { - table: &table_data.name, - })?; - - iters.push(chain_iter); - } - - Ok(iters) - } - - fn partition_ssts_and_memtables( - &self, - time_range: TimeRange, - version: &TableVersion, - table_options: &TableOptions, - ) -> Vec { - let read_view = version.pick_read_view(time_range); - - let segment_duration = match table_options.segment_duration { - Some(v) => v.0, - None => { - // Segment duration is unknown, the table maybe still in sampling phase - // or the segment duration is still not applied to the table options, - // just return one partition. - return vec![read_view]; - } - }; - if read_view.contains_sampling() { - // The table contains sampling memtable, just return one partition. - return vec![read_view]; - } - - // Collect the aligned ssts and memtables into the map. - // {aligned timestamp} => {read view} - let mut read_view_by_time = BTreeMap::new(); - for (level, leveled_ssts) in read_view.leveled_ssts.into_iter().enumerate() { - for file in leveled_ssts { - let aligned_ts = file - .time_range() - .inclusive_start() - .truncate_by(segment_duration); - let entry = read_view_by_time - .entry(aligned_ts) - .or_insert_with(ReadView::default); - entry.leveled_ssts[level].push(file); - } - } - - for memtable in read_view.memtables { - let aligned_ts = memtable - .aligned_time_range - .inclusive_start() - .truncate_by(segment_duration); - let entry = read_view_by_time - .entry(aligned_ts) - .or_insert_with(ReadView::default); - entry.memtables.push(memtable); - } - - read_view_by_time.into_values().collect() - } - - fn make_iter_options(&self, num_rows_per_row_group: usize) -> IterOptions { - self.iter_options.clone().unwrap_or(IterOptions { - batch_size: num_rows_per_row_group, - }) - } -} - -struct StreamStateOnMultiIters { - iters: Vec, - curr_iter_idx: usize, - projected_schema: ProjectedSchema, -} - -impl StreamStateOnMultiIters { - fn is_exhausted(&self) -> bool { - self.curr_iter_idx >= self.iters.len() - } - - fn advance(&mut self) { - self.curr_iter_idx += 1; - } - - fn curr_iter_mut(&mut self) -> &mut I { - &mut self.iters[self.curr_iter_idx] - } - - async fn fetch_next_batch( - &mut self, - ) -> Option> { - loop { - if self.is_exhausted() { - return None; - } - - let iter = self.curr_iter_mut(); - if let Some(v) = iter.next_batch().await.transpose() { - return Some(v); - } - - self.advance(); - } - } -} - -fn iters_to_stream( - iters: Vec, - projected_schema: ProjectedSchema, -) -> SendableRecordBatchStream { - let mut state = StreamStateOnMultiIters { - projected_schema: projected_schema.clone(), - iters, - curr_iter_idx: 0, - }; - - let record_batch_stream = try_stream! { - while let Some(value) = state.fetch_next_batch().await { - let record_batch = value - .box_err() - .context(ErrWithSource { - msg: "Read record batch", - }) - .and_then(|batch_with_key| { - // TODO(yingwen): Try to use projector to do this, which pre-compute row - // indexes to project. - batch_with_key - .try_project(&state.projected_schema) - .box_err() - .context(ErrWithSource { - msg: "Project record batch", - }) - }); - yield record_batch?; - } - }; - - let record_schema = projected_schema.to_record_schema(); - let stream_with_schema = RecordBatchStreamWithSchema { - schema: record_schema, - inner_stream: Box::pin(Box::pin(record_batch_stream)), - }; - Box::pin(stream_with_schema) -} - -pub struct RecordBatchStreamWithSchema { - schema: RecordSchema, - inner_stream: Pin> + Send + Unpin>>, -} - -impl Stream for RecordBatchStreamWithSchema { - type Item = stream::Result; - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.get_mut(); - this.inner_stream.as_mut().poll_next(cx) - } -} - -impl RecordBatchStream for RecordBatchStreamWithSchema { - fn schema(&self) -> &RecordSchema { - &self.schema - } -} diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs deleted file mode 100644 index d65bb92e0e..0000000000 --- a/src/analytic_engine/src/instance/reorder_memtable.rs +++ /dev/null @@ -1,297 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - any::Any, - fmt, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll}, -}; - -pub use arrow::{ - datatypes::SchemaRef as ArrowSchemaRef, record_batch::RecordBatch as ArrowRecordBatch, -}; -use async_trait::async_trait; -use common_types::{ - record_batch::{FetchedRecordBatch, RecordBatchData}, - schema::Schema, -}; -use datafusion::{ - datasource::{DefaultTableSource, TableProvider}, - error::DataFusionError, - execution::{context::SessionState, runtime_env::RuntimeEnv, TaskContext}, - logical_expr::{LogicalPlan, LogicalPlanBuilder, TableType}, - physical_expr::PhysicalSortExpr, - physical_plan::{ - execute_stream, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - RecordBatchStream as DfRecordBatchStream, SendableRecordBatchStream, Statistics, - }, - prelude::{ident, Expr, SessionConfig, SessionContext}, - sql::TableReference, -}; -use futures::{Stream, StreamExt}; -use macros::define_result; -use snafu::{ResultExt, Snafu}; - -use crate::memtable::ColumnarIterPtr; - -const DUMMY_TABLE_NAME: &str = "memtable_iter"; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to build plan, source:{source}"))] - MemtableIter { source: crate::memtable::Error }, - - #[snafu(display("Failed to build plan, source:{source}"))] - BuildPlan { source: DataFusionError }, - - #[snafu(display("Failed to fetch record batch, source:{source}"))] - FetchRecordBatch { source: DataFusionError }, - - #[snafu(display("Failed to convert to record batch data, source:{source}"))] - ConvertRecordBatchData { - source: common_types::record_batch::Error, - }, -} - -define_result!(Error); - -pub type DfResult = std::result::Result; -type SendableFetchingRecordBatchStream = - Pin> + Send>>; - -impl From for Error { - fn from(df_err: DataFusionError) -> Self { - Error::BuildPlan { source: df_err } - } -} - -/// Reorder will sort `iter` by given indexes. -/// Currently leverage DataFusion to do the sort, we will build a plan like -/// this: -/// -/// ```plaintext -/// Sort: (given columns) asc -/// Project: -/// TableScan (based on memtable's iter) -/// ``` -pub struct Reorder { - pub(crate) iter: ColumnarIterPtr, - pub(crate) schema: Schema, - pub(crate) order_by_col_indexes: Vec, -} - -struct ScanMemIter { - arrow_schema: ArrowSchemaRef, - iter: Mutex>, -} - -impl ExecutionPlan for ScanMemIter { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> ArrowSchemaRef { - self.arrow_schema.clone() - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } - - fn children(&self) -> Vec> { - // this is a leaf node and has no children - vec![] - } - - fn with_new_children( - self: Arc, - _: Vec>, - ) -> DfResult> { - Err(DataFusionError::Internal(format!( - "Children cannot be replaced in {self:?}" - ))) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> DfResult { - let mut iter = self.iter.lock().unwrap(); - let iter = iter.take().expect("only can execute once"); - - Ok(Box::pin(MemIterStream { - iter, - arrow_schema: self.arrow_schema.clone(), - })) - } - - fn statistics( - &self, - ) -> std::result::Result - { - Ok(Statistics::new_unknown(&self.schema())) - } -} - -struct MemIterStream { - iter: ColumnarIterPtr, - arrow_schema: ArrowSchemaRef, -} - -impl DfRecordBatchStream for MemIterStream { - fn schema(&self) -> ArrowSchemaRef { - self.arrow_schema.clone() - } -} - -impl Stream for MemIterStream { - type Item = DfResult; - - fn poll_next(mut self: Pin<&mut Self>, _ctx: &mut Context<'_>) -> Poll> { - let mut this = self.as_mut(); - Poll::Ready(this.iter.next().map(|batch| { - batch - .map(|batch| batch.into_arrow_record_batch()) - .map_err(|e| DataFusionError::External(Box::new(e))) - })) - } -} - -impl DisplayAs for ScanMemIter { - fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "ScanMemIter: table={}", DUMMY_TABLE_NAME) - } -} - -impl fmt::Debug for ScanMemIter { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ScanMemIter") - .field("schema", &self.arrow_schema) - .finish() - } -} - -struct MemIterProvider { - arrow_schema: ArrowSchemaRef, - iter: Mutex>, -} - -#[async_trait] -impl TableProvider for MemIterProvider { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> ArrowSchemaRef { - self.arrow_schema.clone() - } - - fn table_type(&self) -> TableType { - TableType::Temporary - } - - // TODO: export stats - fn statistics(&self) -> Option { - None - } - - async fn scan( - &self, - _state: &SessionState, - _projection: Option<&Vec>, - // filters and limit can be used here to inject some push-down operations if needed - _filters: &[Expr], - _limit: Option, - ) -> DfResult> { - let mut iter = self.iter.lock().unwrap(); - let iter = iter.take().expect("only can scan once"); - - let plan = ScanMemIter { - arrow_schema: self.arrow_schema.clone(), - iter: Mutex::new(Some(iter)), - }; - Ok(Arc::new(plan)) - } -} - -impl Reorder { - fn build_logical_plan( - schema: &Schema, - sort_by_col_idx: &[usize], - table_provider: Arc, - ) -> Result { - let source = Arc::new(DefaultTableSource::new(table_provider)); - - let columns = schema.columns(); - let sort_exprs = sort_by_col_idx - .iter() - .map(|i| ident(&columns[*i].name).sort(true, true)) - .collect::>(); - let df_plan = LogicalPlanBuilder::scan(DUMMY_TABLE_NAME, source, None)? - .sort(sort_exprs)? - .build() - .context(BuildPlan)?; - - Ok(df_plan) - } - - // TODO: In theory we can construct a physical plan directly, here we choose - // logical because it has a convenient builder API for use. - pub async fn into_stream(self) -> Result { - // 1. Init datafusion context - let runtime = Arc::new(RuntimeEnv::default()); - let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); - let ctx = SessionContext::new_with_state(state); - let table_provider = Arc::new(MemIterProvider { - arrow_schema: self.schema.to_arrow_schema_ref(), - iter: Mutex::new(Some(self.iter)), - }); - ctx.register_table( - TableReference::from(DUMMY_TABLE_NAME), - table_provider.clone(), - ) - .context(BuildPlan)?; - - // 2. Build plan - let logical_plan = - Self::build_logical_plan(&self.schema, &self.order_by_col_indexes, table_provider)?; - let physical_plan = ctx.state().create_physical_plan(&logical_plan).await?; - - // 3. Execute plan and transform stream - let stream = execute_stream(physical_plan, ctx.task_ctx())?; - let record_schema = self.schema.to_record_schema(); - let stream = stream.map(move |batch| { - let batch = batch.context(FetchRecordBatch)?; - let data = RecordBatchData::try_from(batch).context(ConvertRecordBatchData)?; - - Ok(FetchedRecordBatch::new_from_parts( - record_schema.clone(), - None, - data, - )) - }); - - Ok(Box::pin(stream)) - } -} diff --git a/src/analytic_engine/src/instance/serial_executor.rs b/src/analytic_engine/src/instance/serial_executor.rs deleted file mode 100644 index b2b1ef8cc3..0000000000 --- a/src/analytic_engine/src/instance/serial_executor.rs +++ /dev/null @@ -1,261 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, - }, - time::Instant, -}; - -use futures::Future; -use logger::{error, warn}; -use runtime::Runtime; -use table_engine::table::TableId; -use time_ext::InstantExt; -use tokio::sync::{ - oneshot, - watch::{self, Receiver, Sender}, -}; - -use crate::{ - instance::flush_compaction::{BackgroundFlushFailed, Other, Result, TableFlushOptions}, - table::data::TableData, -}; - -#[derive(Default)] -enum FlushState { - #[default] - Ready, - Flushing, - Failed { - err_msg: String, - }, -} - -type ScheduleSyncRef = Arc; - -struct ScheduleSync { - state: Mutex, - notifier: Sender<()>, - continuous_flush_failure_count: AtomicUsize, -} - -impl ScheduleSync { - #[inline] - pub fn should_retry_flush(&self, max_retry_limit: usize) -> bool { - self.continuous_flush_failure_count.load(Ordering::Relaxed) < max_retry_limit - } - - #[inline] - pub fn reset_flush_failure_count(&self) { - self.continuous_flush_failure_count - .store(0, Ordering::Relaxed); - } - - #[inline] - pub fn inc_flush_failure_count(&self) { - self.continuous_flush_failure_count - .fetch_add(1, Ordering::Relaxed); - } -} - -pub struct TableFlushScheduler { - schedule_sync: ScheduleSyncRef, - state_watcher: Receiver<()>, -} - -impl Default for TableFlushScheduler { - fn default() -> Self { - let (tx, rx) = watch::channel(()); - let schedule_sync = ScheduleSync { - state: Mutex::new(FlushState::Ready), - notifier: tx, - continuous_flush_failure_count: AtomicUsize::new(0), - }; - Self { - schedule_sync: Arc::new(schedule_sync), - state_watcher: rx, - } - } -} - -/// All operations on tables must hold the mutable reference of this -/// [TableOpSerialExecutor]. -/// -/// To ensure the consistency of a table's data, these rules are required: -/// - The write procedure (write wal + write memtable) should be serialized as a -/// whole, that is to say, it is not allowed to write wal and memtable -/// concurrently or interleave the two sub-procedures; -/// - Any operation that may change the data of a table should be serialized, -/// including altering table schema, dropping table, etc; -/// - The flush procedure of a table should be serialized; -pub struct TableOpSerialExecutor { - table_id: TableId, - flush_scheduler: TableFlushScheduler, -} - -impl TableOpSerialExecutor { - pub fn new(table_id: TableId) -> Self { - Self { - table_id, - flush_scheduler: TableFlushScheduler::default(), - } - } - - #[inline] - pub fn table_id(&self) -> TableId { - self.table_id - } -} - -impl TableOpSerialExecutor { - pub fn flush_scheduler(&mut self) -> &mut TableFlushScheduler { - &mut self.flush_scheduler - } -} - -impl TableFlushScheduler { - pub fn is_in_flush(&self) -> bool { - let state = self.schedule_sync.state.lock().unwrap(); - matches!(&*state, FlushState::Flushing) - } - - /// Control the flush procedure and ensure multiple flush procedures to be - /// sequential. - /// - /// REQUIRE: should only be called by the write thread. - pub async fn flush_sequentially( - &mut self, - flush_job: F, - block_on_write_thread: bool, - opts: TableFlushOptions, - runtime: &Runtime, - table_data: Arc, - ) -> Result<()> - where - F: Future> + Send + 'static, - { - let metrics = &table_data.metrics; - // If flush operation is running, then we need to wait for it to complete first. - // Actually, the loop waiting ensures the multiple flush procedures to be - // sequential, that is to say, at most one flush is being executed at - // the same time. - let mut stall_begin: Option = None; - - loop { - { - // Check if the flush procedure is running and the lock will be dropped when - // leaving the block. - let mut flush_state = self.schedule_sync.state.lock().unwrap(); - match &*flush_state { - FlushState::Ready => { - // Mark the worker is flushing. - *flush_state = FlushState::Flushing; - break; - } - FlushState::Flushing => {} - FlushState::Failed { err_msg } => { - if self - .schedule_sync - .should_retry_flush(opts.max_retry_flush_limit) - { - warn!("Re-flush memory tables after background flush failed:{err_msg}"); - // Mark the worker is flushing. - *flush_state = FlushState::Flushing; - break; - } else { - return BackgroundFlushFailed { - msg: err_msg, - retry_count: opts.max_retry_flush_limit, - } - .fail(); - } - } - } - - if stall_begin.is_none() { - stall_begin = Some(Instant::now()); - } - } - - if self.state_watcher.changed().await.is_err() { - return Other { - msg: "State notifier is dropped unexpectedly", - } - .fail(); - } - } - - // Record the write stall cost. - if let Some(stall_begin) = stall_begin { - let time = stall_begin.saturating_elapsed(); - metrics.on_write_stall(time); - } - - // TODO(yingwen): Store pending flush requests and retry flush on - // recoverable error, or try to recover from background - // error. - - let schedule_sync = self.schedule_sync.clone(); - let task = async move { - let flush_res = flush_job.await; - on_flush_finished(schedule_sync, &flush_res); - send_flush_result(opts.res_sender, flush_res); - }; - - if block_on_write_thread { - task.await; - } else { - runtime.spawn(task); - } - - Ok(()) - } -} - -fn on_flush_finished(schedule_sync: ScheduleSyncRef, res: &Result<()>) { - { - let mut flush_state = schedule_sync.state.lock().unwrap(); - match res { - Ok(()) => { - schedule_sync.reset_flush_failure_count(); - *flush_state = FlushState::Ready; - } - Err(e) => { - error!("Failed to run flush task, err:{e}"); - - schedule_sync.inc_flush_failure_count(); - let err_msg = e.to_string(); - *flush_state = FlushState::Failed { err_msg }; - } - } - } - - if schedule_sync.notifier.send(()).is_err() { - error!("Fail to notify flush state change, flush_res:{res:?}"); - } -} - -fn send_flush_result(res_sender: Option>>, res: Result<()>) { - if let Some(tx) = res_sender { - if let Err(send_res) = tx.send(res) { - error!("Fail to send flush result, send_res:{:?}", send_res); - } - } -} diff --git a/src/analytic_engine/src/instance/wal_replayer.rs b/src/analytic_engine/src/instance/wal_replayer.rs deleted file mode 100644 index 6c67414037..0000000000 --- a/src/analytic_engine/src/instance/wal_replayer.rs +++ /dev/null @@ -1,716 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Wal replayer - -use std::{ - collections::{HashMap, VecDeque}, - fmt::Display, - ops::Range, - sync::Arc, - time::Instant, -}; - -use async_trait::async_trait; -use common_types::{ - schema::{IndexInWriterSchema, Schema}, - table::ShardId, -}; -use generic_error::BoxError; -use lazy_static::lazy_static; -use logger::{debug, error, info, trace, warn}; -use prometheus::{exponential_buckets, register_histogram, Histogram}; -use snafu::ResultExt; -use table_engine::table::TableId; -use tokio::sync::{Mutex, MutexGuard, Semaphore}; -use wal::{ - log_batch::LogEntry, - manager::{ - ReadBoundary, ReadContext, ReadRequest, RegionId, ScanContext, ScanRequest, WalManagerRef, - }, -}; - -use crate::{ - instance::{ - self, - engine::{Error, ReplayWalWithCause, Result}, - flush_compaction::{Flusher, TableFlushOptions}, - serial_executor::TableOpSerialExecutor, - write::{Error as WriteError, MemTableWriter}, - }, - payload::{ReadPayload, SingleSchemaProviderAdapter, TableSchemaProvider, WalDecoder}, - table::data::TableDataRef, - ErrorKind, -}; - -// Metrics of wal replayer -lazy_static! { - static ref PULL_LOGS_DURATION_HISTOGRAM: Histogram = register_histogram!( - "wal_replay_pull_logs_duration", - "Histogram for pull logs duration in wal replay in seconds", - exponential_buckets(0.01, 2.0, 13).unwrap() - ) - .unwrap(); - static ref APPLY_LOGS_DURATION_HISTOGRAM: Histogram = register_histogram!( - "wal_replay_apply_logs_duration", - "Histogram for apply logs duration in wal replay in seconds", - exponential_buckets(0.01, 2.0, 13).unwrap() - ) - .unwrap(); -} - -const MAX_REPLAY_TASK_NUM: usize = 20; - -/// Wal replayer supporting both table based and region based -// TODO: limit the memory usage in `RegionBased` mode. -pub struct WalReplayer<'a> { - context: ReplayContext, - replay: Box, - table_datas: &'a [TableDataRef], -} - -impl<'a> WalReplayer<'a> { - pub fn new( - table_datas: &'a [TableDataRef], - shard_id: ShardId, - wal_manager: WalManagerRef, - wal_replay_batch_size: usize, - flusher: Flusher, - max_retry_flush_limit: usize, - replay_mode: ReplayMode, - ) -> Self { - let context = ReplayContext { - shard_id, - wal_manager, - wal_replay_batch_size, - flusher, - max_retry_flush_limit, - }; - - let replay = Self::build_replay(replay_mode); - - Self { - replay, - context, - table_datas, - } - } - - fn build_replay(mode: ReplayMode) -> Box { - info!("Replay wal in mode:{mode:?}"); - - match mode { - ReplayMode::RegionBased => Box::new(RegionBasedReplay), - ReplayMode::TableBased => Box::new(TableBasedReplay), - } - } - - /// Replay tables and return the failed tables and the causes. - pub async fn replay(&mut self) -> Result { - // Build replay action according to mode. - let table_num = self.table_datas.len(); - info!( - "Replay wal logs begin, context:{}, table_num:{table_num}, tables:{:?}", - self.context, self.table_datas - ); - let begin = Instant::now(); - let result = self.replay.run(&self.context, self.table_datas).await; - let cost = Instant::now().duration_since(begin); - info!("Replay wal logs finish, table_num:{table_num}, cost:{cost:?}"); - - result - } -} - -pub struct ReplayContext { - pub shard_id: ShardId, - pub wal_manager: WalManagerRef, - pub wal_replay_batch_size: usize, - pub flusher: Flusher, - pub max_retry_flush_limit: usize, -} - -impl Display for ReplayContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ReplayContext") - .field("shard_id", &self.shard_id) - .field("replay_batch_size", &self.wal_replay_batch_size) - .field("max_retry_flush_limit", &self.max_retry_flush_limit) - .finish() - } -} - -#[derive(Debug, Clone, Copy)] -pub enum ReplayMode { - RegionBased, - TableBased, -} - -pub type FailedTables = HashMap; - -/// Replay action, the abstract of different replay strategies -#[async_trait] -trait Replay: Send + Sync + 'static { - async fn run( - &self, - context: &ReplayContext, - table_datas: &[TableDataRef], - ) -> Result; -} - -/// Table based wal replay -struct TableBasedReplay; - -#[async_trait] -impl Replay for TableBasedReplay { - async fn run( - &self, - context: &ReplayContext, - table_datas: &[TableDataRef], - ) -> Result { - debug!("Replay wal logs on table mode, context:{context}, tables:{table_datas:?}",); - - let mut failed_tables = HashMap::new(); - let read_ctx = ReadContext { - batch_size: context.wal_replay_batch_size, - ..Default::default() - }; - - let ((), results) = async_scoped::TokioScope::scope_and_block(|scope| { - // Limit the maximum number of concurrent tasks. - let semaphore = Arc::new(Semaphore::new(MAX_REPLAY_TASK_NUM)); - for table_data in table_datas { - let table_id = table_data.id; - let read_ctx = &read_ctx; - let semaphore = semaphore.clone(); - scope.spawn(async move { - let _permit = semaphore.acquire().await.unwrap(); - let ret = Self::recover_table_logs(context, table_data, read_ctx).await; - (table_id, ret) - }); - } - }); - - for result in results.into_iter().flatten() { - if let (table_id, Err(e)) = result { - // If occur error, mark this table as failed and store the cause. - failed_tables.insert(table_id, e); - } - } - - Ok(failed_tables) - } -} - -impl TableBasedReplay { - async fn recover_table_logs( - context: &ReplayContext, - table_data: &TableDataRef, - read_ctx: &ReadContext, - ) -> Result<()> { - let table_location = table_data.table_location(); - let wal_location = - instance::create_wal_location(table_location.id, table_location.shard_info); - let read_req = ReadRequest { - location: wal_location, - start: ReadBoundary::Excluded(table_data.current_version().flushed_sequence()), - end: ReadBoundary::Max, - }; - - // Read all wal of current table. - let mut log_iter = context - .wal_manager - .read_batch(read_ctx, &read_req) - .await - .box_err() - .context(ReplayWalWithCause { msg: None })?; - - let mut serial_exec = table_data.serial_exec.lock().await; - let mut log_entry_buf = VecDeque::with_capacity(context.wal_replay_batch_size); - loop { - // fetch entries to log_entry_buf - let _timer = PULL_LOGS_DURATION_HISTOGRAM.start_timer(); - let adapter = SingleSchemaProviderAdapter { - schema: table_data.schema(), - }; - let decoder = WalDecoder::new(adapter); - // All the logs should belong the table, so no need to check again. - let filter = |_| true; - log_entry_buf = log_iter - .next_log_entries(decoder, filter, log_entry_buf) - .await - .box_err() - .context(ReplayWalWithCause { msg: None })?; - - if log_entry_buf.is_empty() { - break; - } - - // Replay all log entries of current table - let _timer = APPLY_LOGS_DURATION_HISTOGRAM.start_timer(); - replay_table_log_entries( - &context.flusher, - context.max_retry_flush_limit, - &mut serial_exec, - table_data, - log_entry_buf.iter(), - ) - .await?; - } - - Ok(()) - } -} - -/// Region based wal replay -struct RegionBasedReplay; - -#[async_trait] -impl Replay for RegionBasedReplay { - async fn run( - &self, - context: &ReplayContext, - table_datas: &[TableDataRef], - ) -> Result { - debug!("Replay wal logs on region mode, context:{context}, tables:{table_datas:?}",); - - // Init all table results to be oks, and modify to errs when failed to replay. - let mut failed_tables = FailedTables::new(); - let scan_ctx = ScanContext { - batch_size: context.wal_replay_batch_size, - ..Default::default() - }; - - Self::replay_region_logs(context, table_datas, &scan_ctx, &mut failed_tables).await?; - - Ok(failed_tables) - } -} - -#[derive(Clone)] -struct TableSchemaProviderAdapter { - table_datas: Arc>, -} - -impl TableSchemaProvider for TableSchemaProviderAdapter { - fn table_schema(&self, table_id: common_types::table::TableId) -> Option { - self.table_datas.get(&table_id).map(|v| v.schema()) - } -} - -impl RegionBasedReplay { - /// Replay logs in same region. - /// - /// Steps: - /// + Scan all logs of region. - /// + Split logs according to table ids. - /// + Replay logs to recover data of tables. - async fn replay_region_logs( - context: &ReplayContext, - table_datas: &[TableDataRef], - scan_ctx: &ScanContext, - failed_tables: &mut FailedTables, - ) -> Result<()> { - // Scan all wal logs of current shard. - let scan_req = ScanRequest { - region_id: context.shard_id as RegionId, - }; - - let mut log_iter = context - .wal_manager - .scan(scan_ctx, &scan_req) - .await - .box_err() - .context(ReplayWalWithCause { msg: None })?; - let mut log_entry_buf = VecDeque::with_capacity(context.wal_replay_batch_size); - - // Lock all related tables. - let mut serial_exec_ctxs = HashMap::with_capacity(table_datas.len()); - let mut table_datas_by_id = HashMap::with_capacity(table_datas.len()); - for table_data in table_datas { - let serial_exec = table_data.serial_exec.lock().await; - let serial_exec_ctx = SerialExecContext { - table_data: table_data.clone(), - serial_exec, - }; - serial_exec_ctxs.insert(table_data.id, Mutex::new(serial_exec_ctx)); - table_datas_by_id.insert(table_data.id.as_u64(), table_data.clone()); - } - - let table_datas_by_id = Arc::new(table_datas_by_id); - let schema_provider = TableSchemaProviderAdapter { - table_datas: table_datas_by_id.clone(), - }; - let serial_exec_ctxs = serial_exec_ctxs; - // Split and replay logs. - loop { - let _timer = PULL_LOGS_DURATION_HISTOGRAM.start_timer(); - let decoder = WalDecoder::new(schema_provider.clone()); - let table_datas_for_filter = table_datas_by_id.clone(); - let log_filter = move |log_table_id| table_datas_for_filter.contains_key(&log_table_id); - log_entry_buf = log_iter - .next_log_entries(decoder, log_filter, log_entry_buf) - .await - .box_err() - .context(ReplayWalWithCause { msg: None })?; - - if log_entry_buf.is_empty() { - break; - } - - let _timer = APPLY_LOGS_DURATION_HISTOGRAM.start_timer(); - Self::replay_single_batch(context, &log_entry_buf, &serial_exec_ctxs, failed_tables) - .await?; - } - - Ok(()) - } - - async fn replay_single_batch( - context: &ReplayContext, - log_batch: &VecDeque>, - serial_exec_ctxs: &HashMap>>, - failed_tables: &mut FailedTables, - ) -> Result<()> { - let mut table_batches = Vec::new(); - // TODO: No `group_by` method in `VecDeque`, so implement it manually here... - Self::split_log_batch_by_table(log_batch, &mut table_batches); - - let ((), results) = async_scoped::TokioScope::scope_and_block(|scope| { - // Limit the maximum number of concurrent tasks. - let semaphore = Arc::new(Semaphore::new(MAX_REPLAY_TASK_NUM)); - - for table_batch in table_batches { - // Some tables may have failed in previous replay, ignore them. - if failed_tables.contains_key(&table_batch.table_id) { - continue; - } - let log_entries: Vec<_> = table_batch - .ranges - .iter() - .flat_map(|range| log_batch.range(range.clone())) - .collect(); - let semaphore = semaphore.clone(); - - scope.spawn(async move { - let _permit = semaphore.acquire().await.unwrap(); - // Some tables may have been moved to other shards or dropped, ignore such logs. - if let Some(ctx) = serial_exec_ctxs.get(&table_batch.table_id) { - let mut ctx = ctx.lock().await; - let table_data = ctx.table_data.clone(); - let result = replay_table_log_entries( - &context.flusher, - context.max_retry_flush_limit, - &mut ctx.serial_exec, - &table_data, - log_entries.into_iter(), - ) - .await; - (table_batch.table_id, Some(result)) - } else { - (table_batch.table_id, None) - } - }); - } - }); - - for result in results.into_iter().flatten() { - if let (table_id, Some(Err(e))) = result { - // If occur error, mark this table as failed and store the cause. - failed_tables.insert(table_id, e); - } - } - - Ok(()) - } - - fn split_log_batch_by_table

( - log_batch: &VecDeque>, - table_batches: &mut Vec, - ) { - table_batches.clear(); - - if log_batch.is_empty() { - return; - } - - // Split log batch by table id, for example: - // input batch: - // |1|1|2|2|2|3|3|3|3|1|1| - // - // output batches: - // |1|1|1|1|, |2|2|2|, |3|3|3|3| - let mut start_log_idx = 0usize; - let mut curr_log_idx = 0usize; - let mut start_table_id = log_batch.get(start_log_idx).unwrap().table_id; - let mut table_ranges = HashMap::new(); - loop { - let time_to_break = curr_log_idx == log_batch.len(); - let found_end_idx = if time_to_break { - true - } else { - let current_table_id = log_batch.get(curr_log_idx).unwrap().table_id; - current_table_id != start_table_id - }; - - if found_end_idx { - table_ranges - .entry(TableId::new(start_table_id)) - .or_insert(Vec::new()) - .push(start_log_idx..curr_log_idx); - - // Step to next start idx. - start_log_idx = curr_log_idx; - start_table_id = if time_to_break { - // The final round, just set it to max as an invalid flag. - u64::MAX - } else { - log_batch.get(start_log_idx).unwrap().table_id - }; - } - - if time_to_break { - break; - } - curr_log_idx += 1; - } - for (table_id, ranges) in table_ranges { - table_batches.push(TableBatch { table_id, ranges }); - } - } -} - -#[derive(Debug, Eq, PartialEq)] -struct TableBatch { - table_id: TableId, - ranges: Vec>, -} - -struct SerialExecContext<'a> { - table_data: TableDataRef, - serial_exec: MutexGuard<'a, TableOpSerialExecutor>, -} - -/// Replay all log entries into memtable and flush if necessary -async fn replay_table_log_entries( - flusher: &Flusher, - max_retry_flush_limit: usize, - serial_exec: &mut TableOpSerialExecutor, - table_data: &TableDataRef, - log_entries: impl Iterator>, -) -> Result<()> { - let flushed_sequence = table_data.current_version().flushed_sequence(); - debug!( - "Replay table log entries begin, table:{}, table_id:{:?}, last_sequence:{}, flushed_sequence:{flushed_sequence}", - table_data.name, table_data.id, table_data.last_sequence(), - ); - - for log_entry in log_entries { - let (sequence, payload) = (log_entry.sequence, &log_entry.payload); - - // Ignore too old logs(sequence <= `flushed_sequence`). - if sequence <= flushed_sequence { - continue; - } - - // Apply logs to memtable. - match payload { - ReadPayload::Write { row_group } => { - trace!( - "Instance replay row_group, table:{}, row_group:{:?}", - table_data.name, - row_group - ); - - // TODO: too strict check here, should be modified to like what in - // `ColumnSchema::compatible_for_write`.` - let table_schema_version = table_data.schema_version(); - if table_schema_version != row_group.schema().version() { - // Data with old schema should already been flushed, but we avoid panic - // here. - error!( - "Ignore data with mismatch schema version during replaying, \ - table:{}, \ - table_id:{:?}, \ - expect:{}, \ - actual:{}, \ - last_sequence:{}, \ - sequence:{}", - table_data.name, - table_data.id, - table_schema_version, - row_group.schema().version(), - table_data.last_sequence(), - sequence, - ); - - continue; - } - - let index_in_writer = - IndexInWriterSchema::for_same_schema(row_group.schema().num_columns()); - let memtable_writer = MemTableWriter::new(table_data.clone(), serial_exec); - let write_res = memtable_writer.write(sequence, row_group, index_in_writer); - if let Err(e) = write_res { - if matches!(e, WriteError::UpdateMemTableSequence { ref source } if source.kind() == ErrorKind::KeyTooLarge ) - { - // ignore this error - warn!("Unable to insert memtable, err:{e}"); - } else { - return Err(Error::ReplayWalWithCause { - msg: Some(format!( - "table_id:{}, table_name:{}, space_id:{}", - table_data.space_id, table_data.name, table_data.id - )), - source: Box::new(e), - }); - } - } - - // Flush the table if necessary. - let in_flush = serial_exec.flush_scheduler().is_in_flush(); - if table_data.should_flush_table(in_flush) { - let opts = TableFlushOptions { - res_sender: None, - max_retry_flush_limit, - }; - let flush_scheduler = serial_exec.flush_scheduler(); - flusher - .schedule_flush(flush_scheduler, table_data, opts) - .await - .box_err() - .context(ReplayWalWithCause { - msg: Some(format!( - "table_id:{}, table_name:{}, space_id:{}", - table_data.space_id, table_data.name, table_data.id - )), - })?; - } - } - ReadPayload::AlterSchema { .. } | ReadPayload::AlterOptions { .. } => { - // Ignore records except Data. - // - // - DDL (AlterSchema and AlterOptions) should be recovered from - // Manifest on start. - } - } - - table_data.set_last_sequence(sequence); - } - - debug!( - "Replay table log entries finish, table:{}, table_id:{:?}, last_sequence:{}, flushed_sequence:{}", - table_data.name, table_data.id, table_data.last_sequence(), table_data.current_version().flushed_sequence() - ); - - Ok(()) -} - -#[cfg(test)] -mod tests { - use std::collections::VecDeque; - - use table_engine::table::TableId; - use wal::log_batch::LogEntry; - - use crate::instance::wal_replayer::{RegionBasedReplay, TableBatch}; - - #[test] - fn test_split_log_batch_by_table() { - let test_set = test_set(); - for (test_batch, expected) in test_set { - check_split_result(&test_batch, &expected); - } - } - - #[allow(clippy::single_range_in_vec_init)] - fn test_set() -> Vec<(VecDeque>, Vec)> { - let test_log_batch1: VecDeque> = VecDeque::from([ - LogEntry { - table_id: 0, - sequence: 1, - payload: 0, - }, - LogEntry { - table_id: 0, - sequence: 2, - payload: 0, - }, - LogEntry { - table_id: 0, - sequence: 3, - payload: 0, - }, - LogEntry { - table_id: 1, - sequence: 1, - payload: 0, - }, - LogEntry { - table_id: 1, - sequence: 2, - payload: 0, - }, - LogEntry { - table_id: 2, - sequence: 1, - payload: 0, - }, - ]); - let expected1 = vec![ - TableBatch { - table_id: TableId::new(0), - ranges: vec![0..3], - }, - TableBatch { - table_id: TableId::new(1), - ranges: vec![3..5], - }, - TableBatch { - table_id: TableId::new(2), - ranges: vec![5..6], - }, - ]; - - let test_log_batch2: VecDeque> = VecDeque::from([LogEntry { - table_id: 0, - sequence: 1, - payload: 0, - }]); - let expected2 = vec![TableBatch { - table_id: TableId::new(0), - ranges: vec![0..1], - }]; - - let test_log_batch3: VecDeque> = VecDeque::default(); - let expected3 = vec![]; - - vec![ - (test_log_batch1, expected1), - (test_log_batch2, expected2), - (test_log_batch3, expected3), - ] - } - - fn check_split_result(batch: &VecDeque>, expected: &[TableBatch]) { - let mut table_batches = Vec::new(); - RegionBasedReplay::split_log_batch_by_table(batch, &mut table_batches); - // split_log_batch_by_table returns unordered results, so sort it here. - table_batches.sort_by_key(|tb| tb.table_id); - assert_eq!(&table_batches, expected); - } -} diff --git a/src/analytic_engine/src/instance/write.rs b/src/analytic_engine/src/instance/write.rs deleted file mode 100644 index 8a007d5c98..0000000000 --- a/src/analytic_engine/src/instance/write.rs +++ /dev/null @@ -1,831 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Write logic of instance - -use std::iter; - -use bytes_ext::ByteVec; -use codec::{ - columnar::{ColumnarEncoder, EncodeHint}, - row, -}; -use common_types::{ - row::RowGroup, - schema::{IndexInWriterSchema, Schema}, -}; -use horaedbproto::{schema as schema_pb, table_requests}; -use itertools::Itertools; -use logger::{debug, error, info, trace, warn}; -use macros::define_result; -use smallvec::SmallVec; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; -use table_engine::table::WriteRequest; -use wal::{ - kv_encoder::LogBatchEncoder, - log_batch::Payload, - manager::{SequenceNumber, WalLocation, WriteContext}, -}; - -use crate::{ - instance, - instance::{ - flush_compaction::TableFlushOptions, serial_executor::TableOpSerialExecutor, InstanceRef, - }, - memtable::{key::KeySequence, PutContext}, - payload::WritePayload, - space::SpaceRef, - table::{data::TableDataRef, version::MemTableForWrite}, - WalEncodeConfig, WalEncodeFormat, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Failed to encode payloads, table:{}, wal_location:{:?}, err:{}", - table, - wal_location, - source - ))] - EncodePayloads { - table: String, - wal_location: WalLocation, - source: wal::manager::Error, - }, - - #[snafu(display("Failed to write to wal, table:{}, err:{}", table, source))] - WriteLogBatch { - table: String, - source: wal::manager::Error, - }, - - #[snafu(display("Failed to write to memtable, table:{}, err:{}", table, source))] - WriteMemTable { - table: String, - source: crate::table::version::Error, - }, - - #[snafu(display("Try to write to a dropped table, table:{}", table))] - WriteDroppedTable { table: String }, - - #[snafu(display( - "Too many rows to write (more than {}), table:{}, rows:{}.\nBacktrace:\n{}", - MAX_ROWS_TO_WRITE, - table, - rows, - backtrace, - ))] - TooManyRows { - table: String, - rows: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to find mutable memtable, table:{}, err:{}", table, source))] - FindMutableMemTable { - table: String, - source: crate::table::data::Error, - }, - - #[snafu(display("Failed to flush table, table:{}, err:{}", table, source))] - FlushTable { - table: String, - source: crate::instance::flush_compaction::Error, - }, - - #[snafu(display( - "Background flush failed, cannot write more data, err:{}.\nBacktrace:\n{}", - msg, - backtrace - ))] - BackgroundFlushFailed { msg: String, backtrace: Backtrace }, - - #[snafu(display("Schema of request is incompatible with table, err:{}", source))] - IncompatSchema { - source: common_types::schema::CompatError, - }, - - #[snafu(display("Failed to encode row group, err:{}", source))] - EncodeRowGroup { source: codec::row::Error }, - - #[snafu(display("Failed to update sequence of memtable, err:{}", source))] - UpdateMemTableSequence { source: crate::memtable::Error }, -} - -define_result!(Error); - -/// Max rows in a write request, must less than [u32::MAX] -const MAX_ROWS_TO_WRITE: usize = 10_000_000; - -/// The version used for [`table_requests::WriteRequest.version`]. -#[derive(Clone, Copy, Debug)] -pub enum WalEncodeVersion { - RowWise = 0, - Columnar, -} - -impl WalEncodeVersion { - #[inline] - pub fn as_u32(self) -> u32 { - match self { - Self::RowWise => 0, - Self::Columnar => 1, - } - } - - #[inline] - pub fn try_from_u32(v: u32) -> Option { - match v { - 0 => Some(Self::RowWise), - 1 => Some(Self::Columnar), - _ => None, - } - } -} - -pub(crate) struct EncodeContext { - pub row_group: RowGroup, - pub index_in_writer: IndexInWriterSchema, -} - -enum EncodedPayload { - Cols(Vec), - Rows(Vec), -} -impl EncodeContext { - pub fn new(row_group: RowGroup) -> Self { - Self { - row_group, - index_in_writer: IndexInWriterSchema::default(), - } - } - - fn encode( - &mut self, - config: &WalEncodeConfig, - table_schema: &Schema, - ) -> Result { - match config.format { - WalEncodeFormat::Columnar => self.encode_cols(config).map(EncodedPayload::Cols), - WalEncodeFormat::RowWise => self.encode_rows(table_schema).map(EncodedPayload::Rows), - } - } - - fn encode_cols(&mut self, config: &WalEncodeConfig) -> Result> { - let row_group_schema = self.row_group.schema(); - let mut encoded_cols = Vec::with_capacity(row_group_schema.num_columns()); - - for col_idx in 0..row_group_schema.num_columns() { - let col_schema = row_group_schema.column(col_idx); - let col_iter = self.row_group.iter_column(col_idx).map(|v| v.as_view()); - let enc = ColumnarEncoder::new( - col_schema.id, - config.num_bytes_compress_threshold.as_byte() as usize, - ); - let mut hint = EncodeHint { - num_nulls: None, - num_datums: Some(self.row_group.num_rows()), - datum_kind: col_schema.data_type, - }; - let sz = enc.estimated_encoded_size(col_iter.clone(), &mut hint); - let mut buf = Vec::with_capacity(sz); - enc.encode(&mut buf, col_iter, &mut hint).unwrap(); - encoded_cols.push(buf); - } - - Ok(encoded_cols) - } - - fn encode_rows(&mut self, table_schema: &Schema) -> Result> { - let mut encoded_rows = Vec::new(); - row::encode_row_group_for_wal( - &self.row_group, - table_schema, - &self.index_in_writer, - &mut encoded_rows, - ) - .context(EncodeRowGroup)?; - - assert_eq!(self.row_group.num_rows(), encoded_rows.len()); - - Ok(encoded_rows) - } -} - -/// Split the write request into multiple batches whose size is determined by -/// the `max_bytes_per_batch`. -struct WriteRowGroupSplitter { - /// Max bytes per batch. Actually, the size of a batch is not exactly - /// ensured less than this `max_bytes_per_batch`, but it is guaranteed that - /// the batch contains at most one more row when its size exceeds this - /// `max_bytes_per_batch`. - max_bytes_per_batch: usize, -} - -enum SplitResult { - Splitted { encoded_batches: Vec> }, - Integrate { encoded_rows: Vec }, -} - -impl WriteRowGroupSplitter { - pub fn new(max_bytes_per_batch: usize) -> Self { - Self { - max_bytes_per_batch, - } - } - - /// Split the write request into multiple batches. - pub fn split(&self, encoded_rows: Vec) -> SplitResult { - let end_row_indexes = self.compute_batches(&encoded_rows); - if end_row_indexes.len() <= 1 { - // No need to split. - return SplitResult::Integrate { encoded_rows }; - } - - let mut prev_end_row_index = 0; - let mut encoded_batches = Vec::with_capacity(end_row_indexes.len()); - for end_row_index in &end_row_indexes { - let end_row_index = *end_row_index; - let curr_batch = Vec::with_capacity(end_row_index - prev_end_row_index); - encoded_batches.push(curr_batch); - prev_end_row_index = end_row_index; - } - - let mut current_batch_idx = 0; - for (row_idx, encoded_row) in encoded_rows.into_iter().enumerate() { - if row_idx >= end_row_indexes[current_batch_idx] { - current_batch_idx += 1; - } - encoded_batches[current_batch_idx].push(encoded_row); - } - - SplitResult::Splitted { encoded_batches } - } - - /// Compute the end row indexes in the original `encoded_rows` of each - /// batch. - fn compute_batches(&self, encoded_rows: &[ByteVec]) -> Vec { - let mut current_batch_size = 0; - let mut end_row_indexes = Vec::new(); - for (row_idx, encoded_row) in encoded_rows.iter().enumerate() { - let row_size = encoded_row.len(); - current_batch_size += row_size; - - // If the current batch size exceeds the `max_bytes_per_batch`, freeze this - // batch by recording its end row index. - // Note that such check may cause the batch size exceeds the - // `max_bytes_per_batch`. - if current_batch_size >= self.max_bytes_per_batch { - current_batch_size = 0; - end_row_indexes.push(row_idx + 1) - } - } - - if current_batch_size > 0 { - end_row_indexes.push(encoded_rows.len()); - } - - end_row_indexes - } -} - -pub struct Writer<'a> { - instance: InstanceRef, - space: SpaceRef, - table_data: TableDataRef, - serial_exec: &'a mut TableOpSerialExecutor, -} - -impl<'a> Writer<'a> { - pub fn new( - instance: InstanceRef, - space: SpaceRef, - table_data: TableDataRef, - serial_exec: &'a mut TableOpSerialExecutor, - ) -> Writer<'a> { - // Ensure the writer has permission to handle the write of the table. - assert_eq!(table_data.id, serial_exec.table_id()); - - Self { - instance, - space, - table_data, - serial_exec, - } - } -} - -pub(crate) struct MemTableWriter<'a> { - table_data: TableDataRef, - _serial_exec: &'a mut TableOpSerialExecutor, -} - -impl<'a> MemTableWriter<'a> { - pub fn new(table_data: TableDataRef, serial_exec: &'a mut TableOpSerialExecutor) -> Self { - Self { - table_data, - _serial_exec: serial_exec, - } - } - - // TODO(yingwen): How to trigger flush if we found memtables are full during - // inserting memtable? RocksDB checks memtable size in MemTableInserter - /// Write data into memtable. - /// - /// index_in_writer must match the schema in table_data. - pub fn write( - &self, - sequence: SequenceNumber, - row_group: &RowGroup, - index_in_writer: IndexInWriterSchema, - ) -> Result<()> { - let _timer = self.table_data.metrics.start_table_write_memtable_timer(); - if row_group.is_empty() { - return Ok(()); - } - - let schema = &self.table_data.schema(); - // Store all memtables we wrote and update their last sequence later. - let mut wrote_memtables: SmallVec<[_; 4]> = SmallVec::new(); - let mut last_mutable_mem: Option = None; - - let mut ctx = PutContext::new(index_in_writer); - for (row_idx, row) in row_group.iter().enumerate() { - // TODO(yingwen): Add RowWithSchema and take RowWithSchema as input, then remove - // this unwrap() - let timestamp = row.timestamp(schema).unwrap(); - // skip expired row - if self.table_data.is_expired(timestamp) { - trace!("Skip expired row when write to memtable, row:{:?}", row); - continue; - } - if last_mutable_mem.is_none() - || !last_mutable_mem - .as_ref() - .unwrap() - .accept_timestamp(timestamp) - { - // The time range is not processed by current memtable, find next one. - let mutable_mem = self - .table_data - .find_or_create_mutable(timestamp, schema) - .context(FindMutableMemTable { - table: &self.table_data.name, - })?; - wrote_memtables.push(mutable_mem.clone()); - last_mutable_mem = Some(mutable_mem); - } - - // We have check the row num is less than `MAX_ROWS_TO_WRITE`, it is safe to - // cast it to u32 here - let key_seq = KeySequence::new(sequence, row_idx as u32); - // TODO(yingwen): Batch sample timestamp in sampling phase. - last_mutable_mem - .as_ref() - .unwrap() - .put(&mut ctx, key_seq, row, schema, timestamp) - .context(WriteMemTable { - table: &self.table_data.name, - })?; - } - - // Update last sequence of memtable. - for mem_wrote in wrote_memtables { - mem_wrote - .set_last_sequence(sequence) - .context(UpdateMemTableSequence)?; - } - - Ok(()) - } -} - -impl<'a> Writer<'a> { - pub(crate) async fn write(&mut self, request: WriteRequest) -> Result { - let _timer = self.table_data.metrics.start_table_write_execute_timer(); - self.table_data.metrics.on_write_request_begin(); - - self.validate_before_write(&request)?; - let mut encode_ctx = EncodeContext::new(request.row_group); - - self.preprocess_write(&mut encode_ctx).await?; - - let table_data = self.table_data.clone(); - let seq = if self.instance.disable_wal { - // When wal is disabled, just update the last_seq one by one. - table_data.next_sequence() - } else { - let encoded_payload = { - let _timer = self.table_data.metrics.start_table_write_encode_timer(); - let schema = self.table_data.schema(); - encode_ctx.encode(&self.instance.wal_encode, &schema)? - }; - - match encoded_payload { - EncodedPayload::Rows(encoded_rows) => { - self.write_to_wal_in_rows(encoded_rows).await? - } - EncodedPayload::Cols(encoded_cols) => { - self.write_to_wal_in_cols(encoded_cols).await? - } - } - }; - - // Write the row group to the memtable and update the state in the mem. - let EncodeContext { - row_group, - index_in_writer, - } = encode_ctx; - self.write_to_mem(&table_data, &row_group, index_in_writer, seq) - .await?; - - Ok(row_group.num_rows()) - } - - async fn write_to_wal_in_rows(&self, encoded_rows: Vec) -> Result { - let split_res = self.maybe_split_write_request(encoded_rows); - match split_res { - SplitResult::Integrate { encoded_rows } => { - let write_req = self.make_rowwise_write_request(encoded_rows); - let payload = WritePayload::Write(&write_req); - self.write_to_wal(iter::once(payload)).await - } - SplitResult::Splitted { encoded_batches } => { - let write_reqs = encoded_batches - .into_iter() - .map(|v| self.make_rowwise_write_request(v)) - .collect_vec(); - - let payload = write_reqs.iter().map(WritePayload::Write); - self.write_to_wal(payload).await - } - } - } - - async fn write_to_wal_in_cols(&self, encoded_cols: Vec) -> Result { - let write_req = table_requests::WriteRequest { - version: WalEncodeVersion::Columnar.as_u32(), - schema: None, - rows: vec![], - cols: encoded_cols, - }; - let payload = WritePayload::Write(&write_req); - - self.write_to_wal(iter::once(payload)).await - } - - fn make_rowwise_write_request( - &self, - encoded_rows: Vec, - ) -> table_requests::WriteRequest { - table_requests::WriteRequest { - version: WalEncodeVersion::RowWise.as_u32(), - // Use the table schema instead of the schema in request to avoid schema - // mismatch during replaying - schema: Some(schema_pb::TableSchema::from(&self.table_data.schema())), - rows: encoded_rows, - cols: vec![], - } - } - - fn maybe_split_write_request(&self, encoded_rows: Vec) -> SplitResult { - if self.instance.max_bytes_per_write_batch.is_none() { - return SplitResult::Integrate { encoded_rows }; - } - - let splitter = WriteRowGroupSplitter::new(self.instance.max_bytes_per_write_batch.unwrap()); - splitter.split(encoded_rows) - } - - /// Write `row_group` to memtable and update the memory states. - async fn write_to_mem( - &mut self, - table_data: &TableDataRef, - row_group: &RowGroup, - index_in_writer: IndexInWriterSchema, - sequence: SequenceNumber, - ) -> Result<()> { - let memtable_writer = MemTableWriter::new(table_data.clone(), self.serial_exec); - - memtable_writer - .write(sequence, row_group, index_in_writer) - .map_err(|e| { - error!( - "Failed to write to memtable, table:{}, table_id:{}, err:{}", - table_data.name, table_data.id, e - ); - e - })?; - - debug!( - "Instance write finished, update sequence, table:{}, table_id:{} last_sequence:{}", - table_data.name, table_data.id, sequence - ); - - table_data.set_last_sequence(sequence); - - // Collect metrics. - let num_columns = row_group.schema().num_columns(); - let num_written_bytes: usize = row_group.iter().map(|row| row.size()).sum(); - table_data.metrics.on_write_request_done( - row_group.num_rows(), - num_columns, - num_written_bytes, - ); - - Ok(()) - } - - /// Return Ok if the request is valid, this is done before entering the - /// write thread. - fn validate_before_write(&self, request: &WriteRequest) -> Result<()> { - ensure!( - request.row_group.num_rows() < MAX_ROWS_TO_WRITE, - TooManyRows { - table: &self.table_data.name, - rows: request.row_group.num_rows(), - } - ); - - Ok(()) - } - - /// Preprocess before write, check: - /// - whether table is dropped - /// - memtable capacity and maybe trigger flush - /// - /// Fills [common_types::schema::IndexInWriterSchema] in [EncodeContext] - async fn preprocess_write(&mut self, encode_ctx: &mut EncodeContext) -> Result<()> { - let _total_timer = self.table_data.metrics.start_table_write_preprocess_timer(); - ensure!( - !self.table_data.is_dropped(), - WriteDroppedTable { - table: &self.table_data.name, - } - ); - - // Checks schema compatibility. - self.table_data - .schema() - .compatible_for_write( - encode_ctx.row_group.schema(), - &mut encode_ctx.index_in_writer, - ) - .context(IncompatSchema)?; - - if self.instance.should_flush_instance() { - if let Some(space) = self.instance.space_store.find_maximum_memory_usage_space() { - if let Some(table) = space.find_maximum_memory_usage_table() { - info!("Trying to flush table {} bytes {} in space {} because engine total memtable memory usage exceeds db_write_buffer_size {}.", - table.name, - table.memtable_memory_usage(), - space.id, - self.instance.db_write_buffer_size, - ); - let _timer = self - .table_data - .metrics - .start_table_write_instance_flush_wait_timer(); - self.handle_memtable_flush(&table).await?; - } - } - } - - if self.space.should_flush_space() { - if let Some(table) = self.space.find_maximum_memory_usage_table() { - info!("Trying to flush table {} bytes {} in space {} because space total memtable memory usage exceeds space_write_buffer_size {}.", - table.name, - table.memtable_memory_usage() , - self.space.id, - self.space.write_buffer_size, - ); - let _timer = self - .table_data - .metrics - .start_table_write_space_flush_wait_timer(); - self.handle_memtable_flush(&table).await?; - } - } - - let in_flush = self.serial_exec.flush_scheduler().is_in_flush(); - if self.table_data.should_flush_table(in_flush) { - let table_data = self.table_data.clone(); - let _timer = table_data.metrics.start_table_write_flush_wait_timer(); - self.handle_memtable_flush(&table_data).await?; - } - - Ok(()) - } - - /// Write log_batch into wal, return the sequence number of log_batch. - async fn write_to_wal(&self, payloads: I) -> Result - where - I: Iterator, - P: Payload, - { - let _timer = self.table_data.metrics.start_table_write_wal_timer(); - let table_location = self.table_data.table_location(); - let wal_location = - instance::create_wal_location(table_location.id, table_location.shard_info); - let log_batch_encoder = LogBatchEncoder::create(wal_location); - let log_batch = log_batch_encoder - .encode_batch(payloads) - .context(EncodePayloads { - table: &self.table_data.name, - wal_location, - })?; - - // Write to wal manager - let write_ctx = WriteContext::default(); - let sequence = self - .instance - .space_store - .wal_manager - .write(&write_ctx, &log_batch) - .await - .context(WriteLogBatch { - table: &self.table_data.name, - })?; - - Ok(sequence) - } - - /// Flush memtables of table in background. - /// - /// Note the table to flush may not the same as `self.table_data`. And if we - /// try to flush other table in this table's writer, the lock should be - /// acquired in advance. And in order to avoid deadlock, we should not wait - /// for the lock. - async fn handle_memtable_flush(&mut self, table_data: &TableDataRef) -> Result<()> { - let opts = TableFlushOptions { - res_sender: None, - max_retry_flush_limit: self.instance.max_retry_flush_limit(), - }; - let flusher = self.instance.make_flusher_with_min_interval(); - if table_data.id == self.table_data.id { - let flush_scheduler = self.serial_exec.flush_scheduler(); - // Set `block_on_write_thread` to false and let flush do in background. - return flusher - .schedule_flush(flush_scheduler, table_data, opts) - .await - .context(FlushTable { - table: &table_data.name, - }); - } - - debug!( - "Try to trigger flush of other table:{} from the write procedure of table:{}", - table_data.name, self.table_data.name - ); - match table_data.serial_exec.try_lock() { - Ok(mut serial_exec) => { - let flush_scheduler = serial_exec.flush_scheduler(); - // Set `block_on_write_thread` to false and let flush do in background. - flusher - .schedule_flush(flush_scheduler, table_data, opts) - .await - .context(FlushTable { - table: &table_data.name, - }) - } - Err(_) => { - warn!( - "Failed to acquire write lock for flush table:{}", - table_data.name, - ); - Ok(()) - } - } - } -} - -#[cfg(test)] -mod tests { - use common_types::{ - column_schema::Builder as ColumnSchemaBuilder, - datum::{Datum, DatumKind}, - row::Row, - schema::Builder as SchemaBuilder, - time::Timestamp, - }; - - use super::*; - - fn generate_rows_for_test(sizes: Vec) -> (Vec, RowGroup) { - let encoded_rows: Vec<_> = sizes.iter().map(|size| vec![0; *size]).collect(); - let rows: Vec<_> = sizes - .iter() - .map(|size| { - let datum = Datum::Timestamp(Timestamp::new(*size as i64)); - Row::from_datums(vec![datum]) - }) - .collect(); - - let column_schema = ColumnSchemaBuilder::new("ts".to_string(), DatumKind::Timestamp) - .build() - .unwrap(); - let schema = SchemaBuilder::new() - .add_key_column(column_schema) - .unwrap() - .primary_key_indexes(vec![0]) - .build() - .unwrap(); - let row_group = RowGroup::try_new(schema, rows).unwrap(); - - (encoded_rows, row_group) - } - - #[test] - fn test_write_split_compute_batches() { - let cases = vec![ - (2, vec![1, 2, 3, 4, 5], vec![2, 3, 4, 5]), - (100, vec![50, 50, 100, 10], vec![2, 3, 4]), - (1000, vec![50, 50, 100, 10], vec![4]), - (2, vec![10, 10, 0, 10], vec![1, 2, 4]), - (0, vec![10, 10, 0, 10], vec![1, 2, 3, 4]), - (0, vec![0, 0], vec![1, 2]), - (10, vec![], vec![]), - ]; - for (batch_size, sizes, expected_batch_indexes) in cases { - let (encoded_rows, _) = generate_rows_for_test(sizes); - let write_row_group_splitter = WriteRowGroupSplitter::new(batch_size); - let batch_indexes = write_row_group_splitter.compute_batches(&encoded_rows); - assert_eq!(batch_indexes, expected_batch_indexes); - } - } - - #[test] - fn test_write_split_row_group() { - let cases = vec![ - ( - 2, - vec![1, 2, 3, 4, 5], - vec![vec![1, 2], vec![3], vec![4], vec![5]], - ), - ( - 100, - vec![50, 50, 100, 10], - vec![vec![50, 50], vec![100], vec![10]], - ), - (1000, vec![50, 50, 100, 10], vec![vec![50, 50, 100, 10]]), - ( - 2, - vec![10, 10, 0, 10], - vec![vec![10], vec![10], vec![0, 10]], - ), - ( - 0, - vec![10, 10, 0, 10], - vec![vec![10], vec![10], vec![0], vec![10]], - ), - (0, vec![0, 0], vec![vec![0], vec![0]]), - (10, vec![], vec![]), - ]; - - let check_encoded_rows = |encoded_rows: &[ByteVec], expected_row_sizes: &[usize]| { - assert_eq!(encoded_rows.len(), expected_row_sizes.len()); - for (encoded_row, expected_row_size) in - encoded_rows.iter().zip(expected_row_sizes.iter()) - { - assert_eq!(encoded_row.len(), *expected_row_size); - } - }; - for (batch_size, sizes, expected_batches) in cases { - let (encoded_rows, _) = generate_rows_for_test(sizes.clone()); - let write_row_group_splitter = WriteRowGroupSplitter::new(batch_size); - let split_res = write_row_group_splitter.split(encoded_rows); - if expected_batches.is_empty() { - assert!(matches!(split_res, SplitResult::Integrate { .. })); - } else if expected_batches.len() == 1 { - assert!(matches!(split_res, SplitResult::Integrate { .. })); - if let SplitResult::Integrate { encoded_rows } = split_res { - check_encoded_rows(&encoded_rows, &expected_batches[0]); - } - } else { - assert!(matches!(split_res, SplitResult::Splitted { .. })); - if let SplitResult::Splitted { encoded_batches } = split_res { - assert_eq!(encoded_batches.len(), expected_batches.len()); - for (encoded_batch, expected_batch) in - encoded_batches.iter().zip(expected_batches.iter()) - { - check_encoded_rows(encoded_batch, expected_batch); - } - } - } - } - } -} diff --git a/src/analytic_engine/src/lib.rs b/src/analytic_engine/src/lib.rs deleted file mode 100644 index d83ec341df..0000000000 --- a/src/analytic_engine/src/lib.rs +++ /dev/null @@ -1,238 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Analytic table engine implementations - -#![feature(option_get_or_insert_default)] - -pub mod compaction; -mod context; -mod engine; -pub mod error; -mod instance; -mod manifest; -pub mod memtable; -mod payload; -pub mod prefetchable_stream; -pub mod row_iter; -mod sampler; -pub mod setup; -pub mod space; -pub mod sst; -pub mod table; -pub mod table_options; - -pub mod table_meta_set_impl; -#[cfg(any(test, feature = "test"))] -pub mod tests; - -use compaction::runner::node_picker::NodePicker; -use error::ErrorKind; -use manifest::details::Options as ManifestOptions; -use object_store::config::StorageOptions; -use serde::{Deserialize, Serialize}; -use size_ext::ReadableSize; -use time_ext::ReadableDuration; -use wal::config::Config as WalConfig; - -pub use crate::{ - compaction::scheduler::SchedulerConfig, - instance::{ScanType, SstReadOptionsBuilder}, - table_options::TableOptions, -}; - -/// The compaction mode decides compaction offload or not. -/// -/// [CompactionMode::Offload] means offload the compaction task -/// to a local or remote node. -/// -/// [CompactionMode::Local] means local compaction, no offloading. -#[derive(Clone, Default, Debug, Deserialize, Serialize)] -#[serde(tag = "compaction_mode")] -pub enum CompactionMode { - #[default] - Local, - Offload(NodePicker), -} - -/// Config of analytic engine -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -pub struct Config { - /// Storage options of the engine - pub storage: StorageOptions, - - /// Batch size to read records from wal to replay - pub replay_batch_size: usize, - /// Batch size to replay tables - pub max_replay_tables_per_batch: usize, - - /// Default options for table - pub table_opts: TableOptions, - - /// Should we try to compat the `LayeredMemtableOptions` in `TableOptions` - /// The old one use if `mutable_segment_switch_threshold` > 0 to control - /// the on/off of layered memtable(`0`:off, `>0`:on). - /// The new one use a explicit flag `enable` to do that. - pub try_compat_old_layered_memtable_opts: bool, - - pub compaction: SchedulerConfig, - - /// Offload the compaction task or not. - pub compaction_mode: CompactionMode, - - /// sst meta cache capacity - pub sst_meta_cache_cap: Option, - /// sst data cache capacity - pub sst_data_cache_cap: Option, - - /// Manifest options - pub manifest: ManifestOptions, - - /// The maximum rows in the write queue. - pub max_rows_in_write_queue: usize, - /// The maximum write buffer size used for single space. - pub space_write_buffer_size: usize, - /// The maximum size of all Write Buffers across all spaces. - pub db_write_buffer_size: usize, - /// The ratio of table's write buffer size to trigger preflush, and it - /// should be in the range (0, 1]. - pub preflush_write_buffer_size_ratio: f32, - - pub enable_primary_key_sampling: bool, - - // Iterator scanning options - /// Batch size for iterator. - /// - /// The `num_rows_per_row_group` in `table options` will be used if this is - /// not set. - pub scan_batch_size: Option, - /// Max record batches in flight when scan - pub scan_max_record_batches_in_flight: usize, - /// Sst background reading parallelism - pub sst_background_read_parallelism: usize, - /// Number of streams to prefetch - pub num_streams_to_prefetch: usize, - /// Max buffer size for writing sst - pub write_sst_max_buffer_size: ReadableSize, - /// Max retry limit After flush failed - pub max_retry_flush_limit: usize, - /// The min interval between two consecutive flushes - pub min_flush_interval: ReadableDuration, - /// Max bytes per write batch. - /// - /// If this is set, the atomicity of write request will be broken. - pub max_bytes_per_write_batch: Option, - /// The interval for sampling the memory usage - pub mem_usage_sampling_interval: ReadableDuration, - /// The config for log in the wal. - // TODO: move this to WalConfig. - pub wal_encode: WalEncodeConfig, - - /// Wal storage config - /// - /// Now, following storages are supported: - /// + Local Disk - /// + RocksDB - /// + OBKV - /// + Kafka - pub wal: WalConfig, - - /// Recover mode - /// - /// + TableBased, tables on same shard will be recovered table by table. - /// + ShardBased, tables on same shard will be recovered together. - pub recover_mode: RecoverMode, - - pub remote_engine_client: remote_engine_client::config::Config, - - pub metrics: MetricsOptions, -} - -#[derive(Debug, Default, Clone, Deserialize, Serialize)] -#[serde(default)] -pub struct MetricsOptions { - enable_table_level_metrics: bool, -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] -pub enum RecoverMode { - TableBased, - ShardBased, -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] -pub enum WalEncodeFormat { - RowWise, - Columnar, -} -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct WalEncodeConfig { - /// The threshold of columnar bytes to do compression. - pub num_bytes_compress_threshold: ReadableSize, - /// Encode the data in a columnar layout if it is set. - pub format: WalEncodeFormat, -} - -impl Default for WalEncodeConfig { - fn default() -> Self { - Self { - num_bytes_compress_threshold: ReadableSize::kb(1), - format: WalEncodeFormat::RowWise, - } - } -} - -impl Default for Config { - fn default() -> Self { - Self { - storage: Default::default(), - replay_batch_size: 500, - max_replay_tables_per_batch: 64, - table_opts: TableOptions::default(), - try_compat_old_layered_memtable_opts: false, - compaction: SchedulerConfig::default(), - compaction_mode: CompactionMode::Local, - sst_meta_cache_cap: Some(1000), - sst_data_cache_cap: Some(1000), - manifest: ManifestOptions::default(), - max_rows_in_write_queue: 0, - // Zero means disabling this param, give a positive value to enable - // it. - space_write_buffer_size: 0, - // Zero means disabling this param, give a positive value to enable - // it. - db_write_buffer_size: 0, - preflush_write_buffer_size_ratio: 0.75, - enable_primary_key_sampling: false, - scan_batch_size: None, - sst_background_read_parallelism: 8, - num_streams_to_prefetch: 2, - scan_max_record_batches_in_flight: 1024, - write_sst_max_buffer_size: ReadableSize::mb(10), - max_retry_flush_limit: 0, - min_flush_interval: ReadableDuration::minutes(1), - max_bytes_per_write_batch: None, - mem_usage_sampling_interval: ReadableDuration::secs(0), - wal_encode: WalEncodeConfig::default(), - wal: WalConfig::default(), - remote_engine_client: remote_engine_client::config::Config::default(), - recover_mode: RecoverMode::ShardBased, - metrics: MetricsOptions::default(), - } - } -} diff --git a/src/analytic_engine/src/manifest/details.rs b/src/analytic_engine/src/manifest/details.rs deleted file mode 100644 index 4f999d967d..0000000000 --- a/src/analytic_engine/src/manifest/details.rs +++ /dev/null @@ -1,1544 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Implementation of Manifest - -use std::{ - collections::VecDeque, - fmt, mem, - num::NonZeroUsize, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, -}; - -use async_trait::async_trait; -use generic_error::{BoxError, GenericResult}; -use horaedbproto::manifest as manifest_pb; -use lazy_static::lazy_static; -use logger::{debug, info, warn}; -use object_store::{ObjectStoreRef, Path}; -use parquet::data_type::AsBytes; -use prometheus::{exponential_buckets, register_histogram, Histogram}; -use prost::Message; -use serde::{Deserialize, Serialize}; -use table_engine::table::TableId; -use time_ext::ReadableDuration; -use tokio::sync::Mutex; -use wal::{ - kv_encoder::LogBatchEncoder, - log_batch::LogEntry, - manager::{ - BatchLogIteratorAdapter, ReadBoundary, ReadContext, ReadRequest, SequenceNumber, - WalLocation, WalManagerRef, WriteContext, - }, -}; - -use crate::{ - manifest::{ - meta_edit::{ - MetaEdit, MetaEditRequest, MetaUpdate, MetaUpdateDecoder, MetaUpdatePayload, Snapshot, - }, - meta_snapshot::{MetaSnapshot, MetaSnapshotBuilder}, - Error, LoadRequest, Manifest, Result, SnapshotRequest, - }, - space::SpaceId, - table::data::{TableDataRef, TableShardInfo}, -}; - -lazy_static! { - static ref RECOVER_TABLE_META_FROM_SNAPSHOT_DURATION: Histogram = register_histogram!( - "recover_table_meta_from_snapshot_duration", - "Histogram for recover table meta from snapshot in seconds", - exponential_buckets(0.01, 2.0, 13).unwrap() - ) - .unwrap(); - static ref RECOVER_TABLE_META_FROM_LOG_DURATION: Histogram = register_histogram!( - "recover_table_meta_from_log_duration", - "Histogram for recover table meta from log in seconds", - exponential_buckets(0.01, 2.0, 13).unwrap() - ) - .unwrap(); -} - -#[async_trait] -trait MetaUpdateLogEntryIterator { - async fn next_update(&mut self) -> Result>; -} - -/// Implementation of [`MetaUpdateLogEntryIterator`]. -#[derive(Debug)] -pub struct MetaUpdateReaderImpl { - iter: BatchLogIteratorAdapter, - has_next: bool, - buffer: VecDeque>, -} - -#[async_trait] -impl MetaUpdateLogEntryIterator for MetaUpdateReaderImpl { - async fn next_update(&mut self) -> Result> { - if !self.has_next { - return Ok(None); - } - - if self.buffer.is_empty() { - let decoder = MetaUpdateDecoder; - let buffer = mem::take(&mut self.buffer); - self.buffer = self - .iter - .next_log_entries(decoder, |_| true, buffer) - .await - .map_err(anyhow::Error::new)?; - } - - match self.buffer.pop_front() { - Some(entry) => Ok(Some((entry.sequence, entry.payload))), - None => { - self.has_next = false; - Ok(None) - } - } - } -} - -/// Table meta set -/// -/// Get snapshot of or modify table's metadata through it. -pub(crate) trait TableMetaSet: fmt::Debug + Send + Sync { - // Get snapshot of `TableData`. - fn get_table_snapshot( - &self, - space_id: SpaceId, - table_id: TableId, - ) -> Result>; - - // Apply update to `TableData` and return it. - fn apply_edit_to_table(&self, update: MetaEditRequest) -> Result; -} - -/// Snapshot recoverer -/// -/// Usually, it will recover the snapshot from storage(like disk, oss, etc). -// TODO: remove `LogStore` and related operations, it should be called directly but not in the -// `SnapshotRecover`. -#[derive(Debug, Clone)] -struct SnapshotRecoverer { - table_id: TableId, - space_id: SpaceId, - log_store: LogStore, - snapshot_store: SnapshotStore, -} - -impl SnapshotRecoverer -where - LogStore: MetaUpdateLogStore + Send + Sync, - SnapshotStore: MetaUpdateSnapshotStore + Send + Sync, -{ - async fn recover(&self) -> Result> { - // Load the current snapshot first. - let snapshot_opt = { - let _timer = RECOVER_TABLE_META_FROM_SNAPSHOT_DURATION.start_timer(); - self.snapshot_store.load().await? - }; - - match snapshot_opt { - Some(v) => Ok(Some(self.create_latest_snapshot_with_prev(v).await?)), - None => self.create_latest_snapshot_without_prev().await, - } - } - - async fn create_latest_snapshot_with_prev(&self, prev_snapshot: Snapshot) -> Result { - debug!( - "Manifest recover with prev snapshot, snapshot:{:?}, table_id:{}, space_id:{}", - prev_snapshot, self.table_id, self.space_id - ); - - let log_start_boundary = ReadBoundary::Excluded(prev_snapshot.end_seq); - let mut reader = self.log_store.scan(log_start_boundary).await?; - - let mut latest_seq = prev_snapshot.end_seq; - let mut manifest_data_builder = if let Some(v) = prev_snapshot.data { - MetaSnapshotBuilder::new(Some(v.table_meta), v.version_meta) - } else { - MetaSnapshotBuilder::default() - }; - while let Some((seq, update)) = reader.next_update().await? { - let _timer = RECOVER_TABLE_META_FROM_LOG_DURATION.start_timer(); - - latest_seq = seq; - manifest_data_builder - .apply_update(update) - .map_err(anyhow::Error::new)?; - } - Ok(Snapshot { - end_seq: latest_seq, - data: manifest_data_builder.build(), - }) - } - - async fn create_latest_snapshot_without_prev(&self) -> Result> { - debug!( - "Manifest recover without prev snapshot, table_id:{}, space_id:{}", - self.table_id, self.space_id - ); - - let mut reader = self.log_store.scan(ReadBoundary::Min).await?; - - let mut latest_seq = SequenceNumber::MIN; - let mut manifest_data_builder = MetaSnapshotBuilder::default(); - let mut has_logs = false; - while let Some((seq, update)) = reader.next_update().await? { - let _timer = RECOVER_TABLE_META_FROM_LOG_DURATION.start_timer(); - - latest_seq = seq; - manifest_data_builder - .apply_update(update) - .map_err(anyhow::Error::new)?; - has_logs = true; - } - - if has_logs { - Ok(Some(Snapshot { - end_seq: latest_seq, - data: manifest_data_builder.build(), - })) - } else { - debug!( - "Manifest recover nothing, table_id:{}, space_id:{}", - self.table_id, self.space_id - ); - Ok(None) - } - } -} - -/// Snapshot creator -/// -/// Usually, it will get snapshot from memory, and store them to storage(like -/// disk, oss, etc). -// TODO: remove `LogStore` and related operations, it should be called directly but not in the -// `Snapshotter`. -#[derive(Debug, Clone)] -struct Snapshotter { - log_store: LogStore, - snapshot_store: SnapshotStore, - end_seq: SequenceNumber, - snapshot_data_provider: Arc, - space_id: SpaceId, - table_id: TableId, -} - -impl Snapshotter -where - LogStore: MetaUpdateLogStore + Send + Sync, - SnapshotStore: MetaUpdateSnapshotStore + Send + Sync, -{ - /// Create a latest snapshot of the current logs. - async fn snapshot(&self) -> Result> { - // Get snapshot data from memory. - let table_snapshot_opt = self - .snapshot_data_provider - .get_table_snapshot(self.space_id, self.table_id)?; - let snapshot = Snapshot { - end_seq: self.end_seq, - data: table_snapshot_opt, - }; - - // Update the current snapshot to the new one. - self.snapshot_store.store(&snapshot).await?; - // Delete the expired logs after saving the snapshot. - // TODO: Actually this operation can be performed background, and the failure of - // it can be ignored. - self.log_store.delete_up_to(snapshot.end_seq).await?; - - Ok(Some(snapshot)) - } -} - -/// Options for manifest -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -pub struct Options { - /// Steps to do snapshot - // TODO: move this field to suitable place. - pub snapshot_every_n_updates: NonZeroUsize, - - /// Timeout to read manifest entries - pub scan_timeout: ReadableDuration, - - /// Batch size to read manifest entries - pub scan_batch_size: NonZeroUsize, - - /// Timeout to store manifest entries - pub store_timeout: ReadableDuration, -} - -impl Default for Options { - fn default() -> Self { - Self { - snapshot_every_n_updates: NonZeroUsize::new(100).unwrap(), - scan_timeout: ReadableDuration::secs(5), - scan_batch_size: NonZeroUsize::new(100).unwrap(), - store_timeout: ReadableDuration::secs(5), - } - } -} - -/// The implementation based on wal and object store of [`Manifest`]. -#[derive(Debug)] -pub struct ManifestImpl { - opts: Options, - wal_manager: WalManagerRef, - store: ObjectStoreRef, - - /// Number of updates wrote to wal since last snapshot. - num_updates_since_snapshot: Arc, - - /// Ensure the snapshot procedure is non-concurrent. - /// - /// Use tokio mutex because this guard protects the snapshot procedure which - /// contains io operations. - snapshot_write_guard: Arc>, - - table_meta_set: Arc, -} - -impl ManifestImpl { - pub(crate) async fn open( - opts: Options, - wal_manager: WalManagerRef, - store: ObjectStoreRef, - table_meta_set: Arc, - ) -> Result { - let manifest = Self { - opts, - wal_manager, - store, - num_updates_since_snapshot: Arc::new(AtomicUsize::new(0)), - snapshot_write_guard: Arc::new(Mutex::new(())), - table_meta_set, - }; - - Ok(manifest) - } - - async fn store_update_to_wal( - &self, - meta_update: MetaUpdate, - location: WalLocation, - ) -> Result { - let log_store = WalBasedLogStore { - opts: self.opts.clone(), - location, - wal_manager: self.wal_manager.clone(), - }; - let latest_sequence = log_store.append(meta_update).await?; - self.num_updates_since_snapshot - .fetch_add(1, Ordering::Relaxed); - - Ok(latest_sequence) - } - - /// Do snapshot if no other snapshot is triggered. - /// - /// Returns the latest snapshot if snapshot is done. - async fn do_snapshot_internal( - &self, - space_id: SpaceId, - table_id: TableId, - location: WalLocation, - ) -> Result> { - if let Ok(_guard) = self.snapshot_write_guard.try_lock() { - let log_store = WalBasedLogStore { - opts: self.opts.clone(), - location, - wal_manager: self.wal_manager.clone(), - }; - let snapshot_store = - ObjectStoreBasedSnapshotStore::new(space_id, table_id, self.store.clone()); - let end_seq = self.wal_manager.sequence_num(location).await.unwrap(); - let snapshotter = Snapshotter { - log_store, - snapshot_store, - end_seq, - snapshot_data_provider: self.table_meta_set.clone(), - space_id, - table_id, - }; - - let snapshot = snapshotter.snapshot().await?; - Ok(snapshot) - } else { - debug!("Avoid concurrent snapshot"); - Ok(None) - } - } -} - -#[async_trait] -impl Manifest for ManifestImpl { - async fn apply_edit(&self, request: MetaEditRequest) -> GenericResult<()> { - info!("Manifest store update, request:{:?}", request); - - // Update storage. - let MetaEditRequest { - shard_info, - meta_edit, - table_catalog_info: _, - } = request.clone(); - - let meta_update = MetaUpdate::try_from(meta_edit).box_err()?; - let table_id = meta_update.table_id(); - let shard_id = shard_info.shard_id; - let location = WalLocation::new(shard_id as u64, table_id.as_u64()); - let space_id = meta_update.space_id(); - - self.store_update_to_wal(meta_update, location).await?; - - // Update memory. - let table_data = self.table_meta_set.apply_edit_to_table(request).box_err()?; - - // Update manifest updates count. - table_data.increase_manifest_updates(1); - // Judge if snapshot is needed. - if table_data.should_do_manifest_snapshot() { - self.do_snapshot_internal(space_id, table_id, location) - .await?; - table_data.reset_manifest_updates(); - } - - Ok(()) - } - - async fn recover(&self, load_req: &LoadRequest) -> GenericResult<()> { - info!("Manifest recover begin, request:{load_req:?}"); - - // Load table meta snapshot from storage. - let location = WalLocation::new(load_req.shard_id as u64, load_req.table_id.as_u64()); - - let log_store = WalBasedLogStore { - opts: self.opts.clone(), - location, - wal_manager: self.wal_manager.clone(), - }; - let snapshot_store = ObjectStoreBasedSnapshotStore::new( - load_req.space_id, - load_req.table_id, - self.store.clone(), - ); - let recover = SnapshotRecoverer { - table_id: load_req.table_id, - space_id: load_req.space_id, - log_store, - snapshot_store, - }; - let meta_snapshot_opt = recover.recover().await?.and_then(|v| v.data); - let meta_snapshot_exists = meta_snapshot_opt.is_some(); - // Apply it to table. - if let Some(snapshot) = meta_snapshot_opt { - let meta_edit = MetaEdit::Snapshot(snapshot); - let request = MetaEditRequest { - shard_info: TableShardInfo::new(load_req.shard_id), - meta_edit, - table_catalog_info: load_req.table_catalog_info.clone(), - }; - self.table_meta_set.apply_edit_to_table(request)?; - } - - info!("Manifest recover finish, request:{load_req:?}, meta_snapshot_exist:{meta_snapshot_exists}"); - - Ok(()) - } - - async fn do_snapshot(&self, request: SnapshotRequest) -> GenericResult<()> { - info!("Manifest do snapshot, request:{:?}", request); - - let table_id = request.table_id; - let location = WalLocation::new(request.shard_id as u64, table_id.as_u64()); - let space_id = request.space_id; - let table_id = request.table_id; - - self.do_snapshot_internal(space_id, table_id, location) - .await - .box_err()?; - - Ok(()) - } -} - -#[async_trait] -trait MetaUpdateLogStore: std::fmt::Debug { - type Iter: MetaUpdateLogEntryIterator + Send; - async fn scan(&self, start: ReadBoundary) -> Result; - - async fn append(&self, meta_update: MetaUpdate) -> Result; - - async fn delete_up_to(&self, inclusive_end: SequenceNumber) -> Result<()>; -} - -#[async_trait] -trait MetaUpdateSnapshotStore: std::fmt::Debug { - async fn store(&self, snapshot: &Snapshot) -> Result<()>; - - async fn load(&self) -> Result>; -} - -#[derive(Debug)] -struct ObjectStoreBasedSnapshotStore { - store: ObjectStoreRef, - snapshot_path: Path, -} - -impl ObjectStoreBasedSnapshotStore { - const CURRENT_SNAPSHOT_NAME: &'static str = "current"; - const SNAPSHOT_PATH_PREFIX: &'static str = "manifest/snapshot"; - - pub fn new(space_id: SpaceId, table_id: TableId, store: ObjectStoreRef) -> Self { - let snapshot_path = Self::snapshot_path(space_id, table_id); - Self { - store, - snapshot_path, - } - } - - fn snapshot_path(space_id: SpaceId, table_id: TableId) -> Path { - format!( - "{}/{}/{}/{}", - Self::SNAPSHOT_PATH_PREFIX, - space_id, - table_id, - Self::CURRENT_SNAPSHOT_NAME, - ) - .into() - } -} - -#[async_trait] -impl MetaUpdateSnapshotStore for ObjectStoreBasedSnapshotStore { - /// Store the latest snapshot to the underlying store by overwriting the old - /// snapshot. - async fn store(&self, snapshot: &Snapshot) -> Result<()> { - let snapshot_pb = manifest_pb::Snapshot::from(snapshot.clone()); - let payload = snapshot_pb.encode_to_vec(); - // The atomic write is ensured by the [`ObjectStore`] implementation. - self.store - .put(&self.snapshot_path, payload.into()) - .await - .map_err(anyhow::Error::new)?; - - Ok(()) - } - - /// Load the `current_snapshot` file from the underlying store, and with the - /// mapping info in it load the latest snapshot file then. - async fn load(&self) -> Result> { - let get_res = self.store.get(&self.snapshot_path).await; - if let Err(object_store::ObjectStoreError::NotFound { path, source }) = &get_res { - warn!( - "Current snapshot file doesn't exist, path:{}, err:{}", - path, source - ); - return Ok(None); - }; - - // TODO: currently, this is just a workaround to handle the case where the error - // is not thrown as [object_store::ObjectStoreError::NotFound]. - if let Err(err) = &get_res { - let err_msg = err.to_string().to_lowercase(); - if err_msg.contains("404") || err_msg.contains("not found") { - warn!("Current snapshot file doesn't exist, err:{}", err); - return Ok(None); - } - } - - let payload = get_res - .map_err(anyhow::Error::new)? - .bytes() - .await - .map_err(anyhow::Error::new)?; - let snapshot_pb = - manifest_pb::Snapshot::decode(payload.as_bytes()).map_err(anyhow::Error::new)?; - let snapshot = Snapshot::try_from(snapshot_pb).map_err(anyhow::Error::new)?; - - Ok(Some(snapshot)) - } -} - -#[derive(Debug, Clone)] -struct WalBasedLogStore { - opts: Options, - location: WalLocation, - wal_manager: WalManagerRef, -} - -#[async_trait] -impl MetaUpdateLogStore for WalBasedLogStore { - type Iter = MetaUpdateReaderImpl; - - async fn scan(&self, start: ReadBoundary) -> Result { - let ctx = ReadContext { - timeout: self.opts.scan_timeout.0, - batch_size: self.opts.scan_batch_size.into(), - }; - - let read_req = ReadRequest { - location: self.location, - start, - end: ReadBoundary::Max, - }; - - let iter = self - .wal_manager - .read_batch(&ctx, &read_req) - .await - .map_err(anyhow::Error::new)?; - - Ok(MetaUpdateReaderImpl { - iter, - has_next: true, - buffer: VecDeque::with_capacity(ctx.batch_size), - }) - } - - async fn append(&self, meta_update: MetaUpdate) -> Result { - let payload = MetaUpdatePayload::from(meta_update); - let log_batch_encoder = LogBatchEncoder::create(self.location); - let log_batch = log_batch_encoder.encode(&payload).map_err(|e| { - anyhow::anyhow!( - "Failed to encode payloads, wal_location:{:?}, err:{}", - self.location, - e - ) - })?; - - let write_ctx = WriteContext { - timeout: self.opts.store_timeout.0, - }; - - self.wal_manager - .write(&write_ctx, &log_batch) - .await - .map_err(|e| Error::from(anyhow::Error::new(e))) - } - - async fn delete_up_to(&self, inclusive_end: SequenceNumber) -> Result<()> { - self.wal_manager - .mark_delete_entries_up_to(self.location, inclusive_end) - .await - .map_err(|e| Error::from(anyhow::Error::new(e))) - } -} - -#[cfg(test)] -mod tests { - use std::{num::NonZeroUsize, path::PathBuf, sync::Arc, vec}; - - use arena::NoopCollector; - use common_types::{ - column_schema, datum::DatumKind, schema, schema::Schema, table::DEFAULT_SHARD_ID, - }; - use futures::future::BoxFuture; - use object_store::local_file; - use runtime::Runtime; - use table_engine::table::{SchemaId, TableId, TableSeqGenerator}; - use wal::rocksdb_impl::manager::Builder as WalBuilder; - - use super::*; - use crate::{ - manifest::{ - details::{MetaUpdateLogEntryIterator, MetaUpdateLogStore}, - meta_edit::{ - AddTableMeta, AlterOptionsMeta, AlterSchemaMeta, DropTableMeta, MetaEdit, - MetaUpdate, VersionEditMeta, - }, - LoadRequest, Manifest, - }, - sst::file::tests::FilePurgerMocker, - table::data::{ - tests::default_schema, MemSizeOptions, TableCatalogInfo, TableConfig, TableData, - TableDesc, TableShardInfo, - }, - MetricsOptions, TableOptions, - }; - - fn build_altered_schema(schema: &Schema) -> Schema { - let mut builder = schema::Builder::new().auto_increment_column_id(true); - let old_pk_indexes = schema.primary_key_indexes(); - for column_schema in schema.key_columns() { - builder = builder - .add_key_column(column_schema.clone()) - .expect("should succeed to add key column"); - } - for column_schema in schema.normal_columns() { - builder = builder - .add_normal_column(column_schema.clone()) - .expect("should succeed to add normal column"); - } - builder - .add_normal_column( - column_schema::Builder::new("field5".to_string(), DatumKind::String) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(old_pk_indexes.to_vec()) - .build() - .unwrap() - } - - fn build_runtime(thread_num: usize) -> Arc { - Arc::new( - runtime::Builder::default() - .worker_threads(thread_num) - .enable_all() - .build() - .unwrap(), - ) - } - - #[derive(Debug, Default)] - struct MockProviderImpl { - builder: std::sync::Mutex, - } - - impl TableMetaSet for MockProviderImpl { - fn get_table_snapshot( - &self, - _space_id: SpaceId, - _table_id: TableId, - ) -> Result> { - let builder = self.builder.lock().unwrap(); - Ok(builder.clone().build()) - } - - fn apply_edit_to_table(&self, request: MetaEditRequest) -> Result { - let mut builder = self.builder.lock().unwrap(); - let MetaEditRequest { - shard_info: _, - meta_edit, - table_catalog_info, - } = request; - - match meta_edit { - MetaEdit::Update(update) => { - builder.apply_update(update).unwrap(); - } - MetaEdit::Snapshot(meta_snapshot) => { - let MetaSnapshot { - table_meta, - version_meta, - } = meta_snapshot; - *builder = MetaSnapshotBuilder::new(Some(table_meta), version_meta); - } - } - - let TableCatalogInfo { - schema_id, - schema_name, - catalog_name, - } = table_catalog_info; - let table_opts = TableOptions::default(); - let purger = FilePurgerMocker::mock(); - let collector = Arc::new(NoopCollector); - let mem_size_options = MemSizeOptions { - collector, - size_sampling_interval: Default::default(), - }; - let test_data = TableData::new( - TableDesc { - space_id: 0, - id: TableId::new(0), - name: "test_table".to_string(), - schema: default_schema(), - schema_id, - schema_name, - catalog_name, - shard_id: 0, - }, - table_opts, - TableConfig { - preflush_write_buffer_size_ratio: 0.75, - manifest_snapshot_every_n_updates: NonZeroUsize::new(usize::MAX).unwrap(), - metrics_opt: MetricsOptions::default(), - enable_primary_key_sampling: false, - try_compat_old_layered_memtable_opts: false, - }, - &purger, - mem_size_options, - ) - .unwrap(); - - Ok(Arc::new(test_data)) - } - } - - struct TestContext { - dir: PathBuf, - runtime: Arc, - options: Options, - table_catalog_info: TableCatalogInfo, - table_seq_gen: TableSeqGenerator, - mock_provider: Arc, - } - - impl TestContext { - fn new(prefix: &str, schema_id: SchemaId) -> Self { - test_util::init_log_for_test(); - let dir = tempfile::Builder::new().prefix(prefix).tempdir().unwrap(); - let runtime = build_runtime(2); - - let options = Options { - snapshot_every_n_updates: NonZeroUsize::new(100).unwrap(), - ..Default::default() - }; - Self { - dir: dir.into_path(), - runtime, - options, - table_catalog_info: TableCatalogInfo { - schema_id, - catalog_name: "test_catalog".to_string(), - schema_name: "public".to_string(), - }, - table_seq_gen: TableSeqGenerator::default(), - mock_provider: Arc::new(MockProviderImpl::default()), - } - } - - fn alloc_table_id(&self) -> TableId { - TableId::with_seq( - self.table_catalog_info.schema_id, - self.table_seq_gen.alloc_table_seq().unwrap(), - ) - .unwrap() - } - - fn table_name_from_id(table_id: TableId) -> String { - format!("table_{table_id:?}") - } - - async fn open_manifest(&self) -> ManifestImpl { - let manifest_wal = WalBuilder::new(self.dir.clone(), self.runtime.clone()) - .build() - .unwrap(); - - let local_path = self.dir.to_string_lossy().to_string(); - let object_store = local_file::try_new_with_default(local_path).unwrap(); - - ManifestImpl::open( - self.options.clone(), - Arc::new(manifest_wal), - Arc::new(object_store), - self.mock_provider.clone(), - ) - .await - .unwrap() - } - - async fn check_table_manifest_data_with_manifest( - &self, - load_req: &LoadRequest, - expected: &Option, - manifest: &ManifestImpl, - ) { - manifest.recover(load_req).await.unwrap(); - let data = self.mock_provider.builder.lock().unwrap().clone().build(); - assert_eq!(&data, expected); - } - - async fn check_table_manifest_data( - &self, - load_req: &LoadRequest, - expected: &Option, - ) { - let manifest = self.open_manifest().await; - self.check_table_manifest_data_with_manifest(load_req, expected, &manifest) - .await; - } - - fn meta_update_add_table(&self, table_id: TableId) -> MetaUpdate { - let table_name = Self::table_name_from_id(table_id); - MetaUpdate::AddTable(AddTableMeta { - space_id: self.table_catalog_info.schema_id.as_u32(), - table_id, - table_name, - schema: common_types::tests::build_schema(), - opts: TableOptions::default(), - }) - } - - fn meta_update_drop_table(&self, table_id: TableId) -> MetaUpdate { - let table_name = Self::table_name_from_id(table_id); - MetaUpdate::DropTable(DropTableMeta { - space_id: self.table_catalog_info.schema_id.as_u32(), - table_id, - table_name, - }) - } - - fn meta_update_version_edit( - &self, - table_id: TableId, - flushed_seq: Option, - ) -> MetaUpdate { - MetaUpdate::VersionEdit(VersionEditMeta { - space_id: self.table_catalog_info.schema_id.as_u32(), - table_id, - flushed_sequence: flushed_seq.unwrap_or(100), - files_to_add: vec![], - files_to_delete: vec![], - mems_to_remove: vec![], - max_file_id: 0, - }) - } - - fn meta_update_alter_table_options(&self, table_id: TableId) -> MetaUpdate { - MetaUpdate::AlterOptions(AlterOptionsMeta { - space_id: self.table_catalog_info.schema_id.as_u32(), - table_id, - options: TableOptions { - enable_ttl: false, - ..Default::default() - }, - }) - } - - fn meta_update_alter_table_schema(&self, table_id: TableId) -> MetaUpdate { - MetaUpdate::AlterSchema(AlterSchemaMeta { - space_id: self.table_catalog_info.schema_id.as_u32(), - table_id, - schema: build_altered_schema(&common_types::tests::build_schema()), - pre_schema_version: 1, - }) - } - - async fn add_table_with_manifest( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - manifest: &ManifestImpl, - ) { - let shard_info = TableShardInfo { - shard_id: DEFAULT_SHARD_ID, - }; - - let add_table = self.meta_update_add_table(table_id); - let edit_req = { - MetaEditRequest { - shard_info, - meta_edit: MetaEdit::Update(add_table.clone()), - table_catalog_info: self.table_catalog_info.clone(), - } - }; - - manifest.apply_edit(edit_req).await.unwrap(); - manifest_data_builder - .apply_update(add_table.clone()) - .unwrap(); - } - - async fn drop_table_with_manifest( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - manifest: &ManifestImpl, - ) { - let shard_info = TableShardInfo { - shard_id: DEFAULT_SHARD_ID, - }; - - let drop_table = self.meta_update_drop_table(table_id); - let edit_req = { - MetaEditRequest { - shard_info, - meta_edit: MetaEdit::Update(drop_table.clone()), - table_catalog_info: self.table_catalog_info.clone(), - } - }; - manifest.apply_edit(edit_req).await.unwrap(); - manifest_data_builder - .apply_update(drop_table.clone()) - .unwrap(); - } - - async fn version_edit_table_with_manifest( - &self, - table_id: TableId, - flushed_seq: Option, - manifest_data_builder: &mut MetaSnapshotBuilder, - manifest: &ManifestImpl, - ) { - let shard_info = TableShardInfo { - shard_id: DEFAULT_SHARD_ID, - }; - - let version_edit = self.meta_update_version_edit(table_id, flushed_seq); - let edit_req = { - MetaEditRequest { - shard_info, - meta_edit: MetaEdit::Update(version_edit.clone()), - table_catalog_info: self.table_catalog_info.clone(), - } - }; - manifest.apply_edit(edit_req).await.unwrap(); - manifest_data_builder - .apply_update(version_edit.clone()) - .unwrap(); - } - - async fn add_table( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - ) { - let manifest = self.open_manifest().await; - self.add_table_with_manifest(table_id, manifest_data_builder, &manifest) - .await; - } - - async fn drop_table( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - ) { - let manifest = self.open_manifest().await; - - self.drop_table_with_manifest(table_id, manifest_data_builder, &manifest) - .await; - } - - async fn version_edit_table( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - ) { - let manifest = self.open_manifest().await; - self.version_edit_table_with_manifest(table_id, None, manifest_data_builder, &manifest) - .await; - } - - async fn alter_table_options( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - ) { - let manifest = self.open_manifest().await; - - let shard_info = TableShardInfo { - shard_id: DEFAULT_SHARD_ID, - }; - - let alter_options = self.meta_update_alter_table_options(table_id); - let edit_req = { - MetaEditRequest { - shard_info, - meta_edit: MetaEdit::Update(alter_options.clone()), - table_catalog_info: self.table_catalog_info.clone(), - } - }; - manifest.apply_edit(edit_req).await.unwrap(); - manifest_data_builder.apply_update(alter_options).unwrap(); - } - - async fn alter_table_schema( - &self, - table_id: TableId, - manifest_data_builder: &mut MetaSnapshotBuilder, - ) { - let manifest = self.open_manifest().await; - - let shard_info = TableShardInfo { - shard_id: DEFAULT_SHARD_ID, - }; - - let alter_schema = self.meta_update_alter_table_schema(table_id); - let edit_req = { - MetaEditRequest { - shard_info, - meta_edit: MetaEdit::Update(alter_schema.clone()), - table_catalog_info: self.table_catalog_info.clone(), - } - }; - - manifest.apply_edit(edit_req).await.unwrap(); - manifest_data_builder.apply_update(alter_schema).unwrap(); - } - } - - fn run_basic_manifest_test(ctx: TestContext, update_table_meta: F) - where - F: for<'a> FnOnce( - &'a TestContext, - TableId, - &'a mut MetaSnapshotBuilder, - ) -> BoxFuture<'a, ()>, - { - let runtime = ctx.runtime.clone(); - runtime.block_on(async move { - let table_id = ctx.alloc_table_id(); - let mut manifest_data_builder = MetaSnapshotBuilder::default(); - - update_table_meta(&ctx, table_id, &mut manifest_data_builder).await; - - let load_req = LoadRequest { - table_id, - shard_id: DEFAULT_SHARD_ID, - space_id: ctx.table_catalog_info.schema_id.as_u32(), - table_catalog_info: ctx.table_catalog_info.clone(), - }; - let expected_table_manifest_data = manifest_data_builder.build(); - ctx.check_table_manifest_data(&load_req, &expected_table_manifest_data) - .await; - }) - } - - #[test] - fn test_manifest_add_table() { - let ctx = TestContext::new("add_table", SchemaId::from_u32(0)); - run_basic_manifest_test(ctx, |ctx, table_id, manifest_data_builder| { - Box::pin(async move { - ctx.add_table(table_id, manifest_data_builder).await; - }) - }); - } - - #[test] - fn test_manifest_drop_table() { - let ctx = TestContext::new("drop_table", SchemaId::from_u32(0)); - run_basic_manifest_test(ctx, |ctx, table_id, manifest_data_builder| { - Box::pin(async move { - ctx.add_table(table_id, manifest_data_builder).await; - ctx.drop_table(table_id, manifest_data_builder).await; - }) - }); - } - - #[test] - fn test_manifest_version_edit() { - let ctx = TestContext::new("version_edit", SchemaId::from_u32(0)); - run_basic_manifest_test(ctx, |ctx, table_id, manifest_data_builder| { - Box::pin(async move { - ctx.add_table(table_id, manifest_data_builder).await; - ctx.version_edit_table(table_id, manifest_data_builder) - .await; - }) - }); - } - - #[test] - fn test_manifest_alter_options() { - let ctx = TestContext::new("version_edit", SchemaId::from_u32(0)); - run_basic_manifest_test(ctx, |ctx, table_id, manifest_data_builder| { - Box::pin(async move { - ctx.add_table(table_id, manifest_data_builder).await; - ctx.alter_table_options(table_id, manifest_data_builder) - .await; - }) - }); - } - - #[test] - fn test_manifest_alter_schema() { - let ctx = TestContext::new("version_edit", SchemaId::from_u32(0)); - run_basic_manifest_test(ctx, |ctx, table_id, manifest_data_builder| { - Box::pin(async move { - ctx.add_table(table_id, manifest_data_builder).await; - ctx.alter_table_schema(table_id, manifest_data_builder) - .await; - }) - }); - } - - #[test] - fn test_manifest_snapshot_one_table() { - let ctx = TestContext::new("snapshot_one_table", SchemaId::from_u32(0)); - let runtime = ctx.runtime.clone(); - - runtime.block_on(async move { - let table_id = ctx.alloc_table_id(); - let location = WalLocation::new(DEFAULT_SHARD_ID as u64, table_id.as_u64()); - let mut manifest_data_builder = MetaSnapshotBuilder::default(); - let manifest = ctx.open_manifest().await; - ctx.add_table_with_manifest(table_id, &mut manifest_data_builder, &manifest) - .await; - - manifest - .do_snapshot_internal( - ctx.table_catalog_info.schema_id.as_u32(), - table_id, - location, - ) - .await - .unwrap(); - - ctx.version_edit_table_with_manifest( - table_id, - None, - &mut manifest_data_builder, - &manifest, - ) - .await; - let load_req = LoadRequest { - space_id: ctx.table_catalog_info.schema_id.as_u32(), - table_catalog_info: ctx.table_catalog_info.clone(), - table_id, - shard_id: DEFAULT_SHARD_ID, - }; - ctx.check_table_manifest_data_with_manifest( - &load_req, - &manifest_data_builder.build(), - &manifest, - ) - .await; - }); - } - - #[test] - fn test_manifest_snapshot_one_table_massive_logs() { - let ctx = TestContext::new("snapshot_one_table_massive_logs", SchemaId::from_u32(0)); - let runtime = ctx.runtime.clone(); - runtime.block_on(async move { - let table_id = ctx.alloc_table_id(); - let load_req = LoadRequest { - space_id: ctx.table_catalog_info.schema_id.as_u32(), - table_catalog_info: ctx.table_catalog_info.clone(), - table_id, - shard_id: DEFAULT_SHARD_ID, - }; - let mut manifest_data_builder = MetaSnapshotBuilder::default(); - let manifest = ctx.open_manifest().await; - ctx.add_table_with_manifest(table_id, &mut manifest_data_builder, &manifest) - .await; - - for i in 0..500 { - ctx.version_edit_table_with_manifest( - table_id, - Some(i), - &mut manifest_data_builder, - &manifest, - ) - .await; - } - ctx.check_table_manifest_data_with_manifest( - &load_req, - &manifest_data_builder.clone().build(), - &manifest, - ) - .await; - - let location = WalLocation::new(DEFAULT_SHARD_ID as u64, table_id.as_u64()); - manifest - .do_snapshot_internal( - ctx.table_catalog_info.schema_id.as_u32(), - table_id, - location, - ) - .await - .unwrap(); - for i in 500..550 { - ctx.version_edit_table_with_manifest( - table_id, - Some(i), - &mut manifest_data_builder, - &manifest, - ) - .await; - } - ctx.check_table_manifest_data_with_manifest( - &load_req, - &manifest_data_builder.build(), - &manifest, - ) - .await; - }); - } - - #[derive(Debug, Clone)] - struct MemLogStore { - logs: Arc>>>, - } - - impl MemLogStore { - fn from_updates(updates: &[MetaUpdate]) -> Self { - let mut buf = Vec::with_capacity(updates.len()); - buf.extend(updates.iter().map(|v| Some(v.clone()))); - Self { - logs: Arc::new(std::sync::Mutex::new(buf)), - } - } - - async fn to_meta_updates(&self) -> Vec { - let logs = self.logs.lock().unwrap(); - logs.iter().filter_map(|v| v.clone()).collect() - } - - fn next_seq(&self) -> u64 { - let logs = self.logs.lock().unwrap(); - logs.len() as u64 - } - } - - #[async_trait] - impl MetaUpdateLogStore for MemLogStore { - type Iter = vec::IntoIter<(SequenceNumber, MetaUpdate)>; - - async fn scan(&self, start: ReadBoundary) -> Result { - let logs = self.logs.lock().unwrap(); - let start = start.as_start_sequence_number().unwrap() as usize; - - let mut exist_logs = Vec::new(); - let logs_with_idx = logs.iter().enumerate(); - for (idx, update) in logs_with_idx { - if idx < start || update.is_none() { - continue; - } - exist_logs.push((idx as u64, update.clone().unwrap())); - } - - Ok(exist_logs.into_iter()) - } - - async fn append(&self, meta_update: MetaUpdate) -> Result { - let mut logs = self.logs.lock().unwrap(); - let seq = logs.len() as u64; - logs.push(Some(meta_update)); - - Ok(seq) - } - - async fn delete_up_to(&self, inclusive_end: SequenceNumber) -> Result<()> { - let mut logs = self.logs.lock().unwrap(); - for i in 0..=inclusive_end { - logs[i as usize] = None; - } - - Ok(()) - } - } - - #[async_trait] - impl MetaUpdateLogEntryIterator for T - where - T: Iterator + Send + Sync, - { - async fn next_update(&mut self) -> Result> { - Ok(self.next()) - } - } - - #[derive(Debug, Clone)] - struct MemSnapshotStore { - curr_snapshot: Arc>>, - } - - impl MemSnapshotStore { - fn new() -> Self { - Self { - curr_snapshot: Arc::new(Mutex::new(None)), - } - } - } - - #[async_trait] - impl MetaUpdateSnapshotStore for MemSnapshotStore { - async fn store(&self, snapshot: &Snapshot) -> Result<()> { - let mut curr_snapshot = self.curr_snapshot.lock().await; - *curr_snapshot = Some(snapshot.clone()); - Ok(()) - } - - async fn load(&self) -> Result> { - let curr_snapshot = self.curr_snapshot.lock().await; - Ok(curr_snapshot.clone()) - } - } - - fn run_snapshot_test( - ctx: Arc, - table_id: TableId, - input_updates: Vec, - updates_after_snapshot: Vec, - ) { - let log_store = MemLogStore::from_updates(&input_updates); - let snapshot_store = MemSnapshotStore::new(); - let snapshot_data_provider = ctx.mock_provider.clone(); - let table_catalog_info = ctx.table_catalog_info.clone(); - - ctx.runtime.block_on(async move { - let log_store = log_store; - let snapshot_store = snapshot_store; - let snapshot_provider = snapshot_data_provider; - - // 1. Test write and do snapshot - // Create and check the latest snapshot first. - let mut manifest_builder = MetaSnapshotBuilder::default(); - for update in &input_updates { - manifest_builder.apply_update(update.clone()).unwrap(); - let request = MetaEditRequest { - shard_info: TableShardInfo::new(DEFAULT_SHARD_ID), - meta_edit: MetaEdit::Update(update.clone()), - table_catalog_info: table_catalog_info.clone(), - }; - snapshot_provider.apply_edit_to_table(request).unwrap(); - } - let expect_table_manifest_data = manifest_builder.clone().build(); - - // Do snapshot from memory and check the snapshot result. - let snapshot = build_and_store_snapshot( - &log_store, - &snapshot_store, - snapshot_provider.clone(), - table_id, - ) - .await; - if input_updates.is_empty() { - assert!(snapshot.is_none()); - } else { - assert!(snapshot.is_some()); - let snapshot = snapshot.unwrap(); - assert_eq!(snapshot.data, expect_table_manifest_data); - assert_eq!(snapshot.end_seq, log_store.next_seq() - 1); - - let recovered_snapshot = - recover_snapshot(table_id, 0, &log_store, &snapshot_store).await; - assert_eq!(snapshot, recovered_snapshot.unwrap()); - } - // The logs in the log store should be cleared after snapshot. - let updates_in_log_store = log_store.to_meta_updates().await; - assert!(updates_in_log_store.is_empty()); - - // 2. Test write after snapshot, and do snapshot again - // Write the updates after snapshot. - for update in &updates_after_snapshot { - manifest_builder.apply_update(update.clone()).unwrap(); - log_store.append(update.clone()).await.unwrap(); - let request = MetaEditRequest { - shard_info: TableShardInfo::new(DEFAULT_SHARD_ID), - meta_edit: MetaEdit::Update(update.clone()), - table_catalog_info: table_catalog_info.clone(), - }; - snapshot_provider.apply_edit_to_table(request).unwrap(); - } - let expect_table_manifest_data = manifest_builder.build(); - // Do snapshot and check the snapshot result again. - let snapshot = - build_and_store_snapshot(&log_store, &snapshot_store, snapshot_provider, table_id) - .await; - - if input_updates.is_empty() && updates_after_snapshot.is_empty() { - assert!(snapshot.is_none()); - } else { - assert!(snapshot.is_some()); - let snapshot = snapshot.unwrap(); - assert_eq!(snapshot.data, expect_table_manifest_data); - assert_eq!(snapshot.end_seq, log_store.next_seq() - 1); - - let recovered_snapshot = - recover_snapshot(table_id, 0, &log_store, &snapshot_store).await; - assert_eq!(snapshot, recovered_snapshot.unwrap()); - } - // The logs in the log store should be cleared after snapshot. - let updates_in_log_store = log_store.to_meta_updates().await; - assert!(updates_in_log_store.is_empty()); - }); - } - - async fn build_and_store_snapshot( - log_store: &MemLogStore, - snapshot_store: &MemSnapshotStore, - snapshot_provider: Arc, - table_id: TableId, - ) -> Option { - let end_seq = log_store.next_seq() - 1; - let snapshotter = Snapshotter { - log_store: log_store.clone(), - snapshot_store: snapshot_store.clone(), - end_seq, - snapshot_data_provider: snapshot_provider, - space_id: 0, - table_id, - }; - snapshotter.snapshot().await.unwrap() - } - - async fn recover_snapshot( - table_id: TableId, - space_id: SpaceId, - log_store: &MemLogStore, - snapshot_store: &MemSnapshotStore, - ) -> Option { - let recoverer = SnapshotRecoverer { - table_id, - space_id, - log_store: log_store.clone(), - snapshot_store: snapshot_store.clone(), - }; - recoverer.recover().await.unwrap() - } - - #[test] - fn test_simple_snapshot() { - let ctx = Arc::new(TestContext::new( - "snapshot_merge_no_snapshot", - SchemaId::from_u32(0), - )); - let table_id = ctx.alloc_table_id(); - let input_updates = vec![ - ctx.meta_update_add_table(table_id), - ctx.meta_update_version_edit(table_id, Some(1)), - ctx.meta_update_version_edit(table_id, Some(3)), - ]; - - run_snapshot_test(ctx, table_id, input_updates, vec![]); - } - - #[test] - fn test_snapshot_drop_table() { - let ctx = Arc::new(TestContext::new( - "snapshot_drop_table", - SchemaId::from_u32(0), - )); - let table_id = ctx.alloc_table_id(); - let input_updates = vec![ - ctx.meta_update_add_table(table_id), - ctx.meta_update_drop_table(table_id), - ]; - - run_snapshot_test(ctx, table_id, input_updates, vec![]); - } - - #[test] - fn test_snapshot_twice() { - let ctx = Arc::new(TestContext::new( - "snapshot_merge_no_snapshot", - SchemaId::from_u32(0), - )); - let table_id = ctx.alloc_table_id(); - let input_updates = vec![ - ctx.meta_update_add_table(table_id), - ctx.meta_update_version_edit(table_id, Some(1)), - ctx.meta_update_version_edit(table_id, Some(3)), - ]; - let updates_after_snapshot = vec![ - ctx.meta_update_version_edit(table_id, Some(4)), - ctx.meta_update_version_edit(table_id, Some(8)), - ]; - - run_snapshot_test(ctx, table_id, input_updates, updates_after_snapshot); - } -} diff --git a/src/analytic_engine/src/manifest/error.rs b/src/analytic_engine/src/manifest/error.rs deleted file mode 100644 index 632aaaad36..0000000000 --- a/src/analytic_engine/src/manifest/error.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use thiserror::Error; - -use crate::ErrorKind; - -#[derive(Debug, Error)] -#[error(transparent)] -pub struct Error(#[from] InnerError); - -impl From for Error { - fn from(source: anyhow::Error) -> Self { - Self(InnerError::Other { source }) - } -} - -impl Error { - pub fn kind(&self) -> ErrorKind { - match self.0 { - InnerError::Other { .. } => ErrorKind::Internal, - } - } -} - -#[derive(Error, Debug)] -pub(crate) enum InnerError { - #[error(transparent)] - Other { - #[from] - source: anyhow::Error, - }, -} diff --git a/src/analytic_engine/src/manifest/meta_edit.rs b/src/analytic_engine/src/manifest/meta_edit.rs deleted file mode 100644 index 62a0848d67..0000000000 --- a/src/analytic_engine/src/manifest/meta_edit.rs +++ /dev/null @@ -1,495 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Update to meta - -use std::convert::TryFrom; - -use anyhow::Context; -use bytes_ext::{Buf, BufMut}; -use common_types::{ - schema::{Schema, Version}, - SequenceNumber, -}; -use horaedbproto::{manifest as manifest_pb, schema as schema_pb}; -use prost::Message; -use table_engine::table::TableId; -use wal::log_batch::{Payload, PayloadDecodeContext, PayloadDecoder}; - -use crate::{ - manifest::{meta_snapshot::MetaSnapshot, Error, Result}, - space::SpaceId, - sst::manager::FileId, - table::{ - data::{MemTableId, TableCatalogInfo, TableShardInfo}, - version::TableVersionMeta, - version_edit::{AddFile, DeleteFile, VersionEdit}, - }, - TableOptions, -}; - -/// Modifications to meta data in meta -#[derive(Debug, Clone)] -pub enum MetaUpdate { - AddTable(AddTableMeta), - DropTable(DropTableMeta), - VersionEdit(VersionEditMeta), - AlterSchema(AlterSchemaMeta), - AlterOptions(AlterOptionsMeta), -} - -impl From for manifest_pb::MetaUpdate { - fn from(update: MetaUpdate) -> Self { - let meta = match update { - MetaUpdate::AddTable(v) => manifest_pb::meta_update::Meta::AddTable(v.into()), - MetaUpdate::VersionEdit(v) => manifest_pb::meta_update::Meta::VersionEdit(v.into()), - MetaUpdate::AlterSchema(v) => manifest_pb::meta_update::Meta::AlterSchema(v.into()), - MetaUpdate::AlterOptions(v) => manifest_pb::meta_update::Meta::AlterOptions(v.into()), - MetaUpdate::DropTable(v) => manifest_pb::meta_update::Meta::DropTable(v.into()), - }; - - manifest_pb::MetaUpdate { meta: Some(meta) } - } -} - -impl MetaUpdate { - pub fn table_id(&self) -> TableId { - match self { - MetaUpdate::AddTable(v) => v.table_id, - MetaUpdate::VersionEdit(v) => v.table_id, - MetaUpdate::AlterSchema(v) => v.table_id, - MetaUpdate::AlterOptions(v) => v.table_id, - MetaUpdate::DropTable(v) => v.table_id, - } - } - - pub fn space_id(&self) -> SpaceId { - match self { - MetaUpdate::AddTable(v) => v.space_id, - MetaUpdate::VersionEdit(v) => v.space_id, - MetaUpdate::AlterSchema(v) => v.space_id, - MetaUpdate::AlterOptions(v) => v.space_id, - MetaUpdate::DropTable(v) => v.space_id, - } - } -} - -impl TryFrom for MetaUpdate { - type Error = Error; - - fn try_from(src: manifest_pb::MetaUpdate) -> Result { - let meta_update = match src.meta.context("Empty meta update.")? { - manifest_pb::meta_update::Meta::AddTable(v) => { - let add_table = AddTableMeta::try_from(v)?; - MetaUpdate::AddTable(add_table) - } - manifest_pb::meta_update::Meta::VersionEdit(v) => { - let version_edit = VersionEditMeta::try_from(v)?; - MetaUpdate::VersionEdit(version_edit) - } - manifest_pb::meta_update::Meta::AlterSchema(v) => { - let alter_schema = AlterSchemaMeta::try_from(v)?; - MetaUpdate::AlterSchema(alter_schema) - } - manifest_pb::meta_update::Meta::AlterOptions(v) => { - let alter_options = AlterOptionsMeta::try_from(v)?; - MetaUpdate::AlterOptions(alter_options) - } - manifest_pb::meta_update::Meta::DropTable(v) => { - let drop_table = DropTableMeta::from(v); - MetaUpdate::DropTable(drop_table) - } - }; - - Ok(meta_update) - } -} - -/// Meta data for a new table -#[derive(Debug, Clone, PartialEq)] -pub struct AddTableMeta { - /// Space id of the table - pub space_id: SpaceId, - pub table_id: TableId, - pub table_name: String, - /// Schema of the table - pub schema: Schema, - // Options needed to persist - pub opts: TableOptions, -} - -impl From for manifest_pb::AddTableMeta { - fn from(v: AddTableMeta) -> Self { - manifest_pb::AddTableMeta { - space_id: v.space_id, - table_id: v.table_id.as_u64(), - table_name: v.table_name, - schema: Some(schema_pb::TableSchema::from(&v.schema)), - options: Some(manifest_pb::TableOptions::from(v.opts)), - // Deprecated. - partition_info: None, - } - } -} - -impl TryFrom for AddTableMeta { - type Error = Error; - - fn try_from(src: manifest_pb::AddTableMeta) -> Result { - let table_schema = src.schema.context("Empty table schema.")?; - let opts = src.options.context("Empty table options.")?; - - Ok(Self { - space_id: src.space_id, - table_id: TableId::from(src.table_id), - table_name: src.table_name, - schema: Schema::try_from(table_schema).map_err(anyhow::Error::new)?, - opts: TableOptions::try_from(opts).map_err(anyhow::Error::new)?, - }) - } -} - -/// Meta data for dropping a table -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DropTableMeta { - /// Space id of the table - pub space_id: SpaceId, - pub table_id: TableId, - pub table_name: String, -} - -impl From for manifest_pb::DropTableMeta { - fn from(v: DropTableMeta) -> Self { - manifest_pb::DropTableMeta { - space_id: v.space_id, - table_id: v.table_id.as_u64(), - table_name: v.table_name, - } - } -} - -impl From for DropTableMeta { - fn from(src: manifest_pb::DropTableMeta) -> Self { - Self { - space_id: src.space_id, - table_id: TableId::from(src.table_id), - table_name: src.table_name, - } - } -} - -/// Meta data of version edit to table -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct VersionEditMeta { - pub space_id: SpaceId, - pub table_id: TableId, - /// Sequence number of the flushed data. Set to 0 if this edit is not - /// created by a flush request. - pub flushed_sequence: SequenceNumber, - pub files_to_add: Vec, - pub files_to_delete: Vec, - /// Id of memtables to remove from immutable memtable lists. - /// No need to persist. - pub mems_to_remove: Vec, - pub max_file_id: FileId, -} - -impl VersionEditMeta { - /// Convert into [crate::table::version_edit::VersionEdit]. The - /// `mems_to_remove` field is left empty. - pub fn into_version_edit(self) -> VersionEdit { - VersionEdit { - mems_to_remove: Vec::new(), - flushed_sequence: self.flushed_sequence, - files_to_add: self.files_to_add, - files_to_delete: self.files_to_delete, - max_file_id: self.max_file_id, - } - } -} - -impl From for manifest_pb::VersionEditMeta { - fn from(v: VersionEditMeta) -> Self { - let files_to_add = v.files_to_add.into_iter().map(|file| file.into()).collect(); - let files_to_delete = v - .files_to_delete - .into_iter() - .map(|file| file.into()) - .collect(); - manifest_pb::VersionEditMeta { - space_id: v.space_id, - table_id: v.table_id.as_u64(), - flushed_sequence: v.flushed_sequence, - files_to_add, - files_to_delete, - max_file_id: v.max_file_id, - } - } -} - -impl TryFrom for VersionEditMeta { - type Error = Error; - - fn try_from(src: manifest_pb::VersionEditMeta) -> Result { - let mut files_to_add = Vec::with_capacity(src.files_to_add.len()); - for file_meta in src.files_to_add { - files_to_add.push(AddFile::try_from(file_meta).map_err(anyhow::Error::new)?); - } - - let mut files_to_delete = Vec::with_capacity(src.files_to_delete.len()); - for file_meta in src.files_to_delete { - files_to_delete.push(DeleteFile::try_from(file_meta).map_err(anyhow::Error::new)?); - } - - Ok(Self { - space_id: src.space_id, - table_id: TableId::from(src.table_id), - flushed_sequence: src.flushed_sequence, - files_to_add, - files_to_delete, - mems_to_remove: Vec::default(), - max_file_id: src.max_file_id, - }) - } -} - -/// Meta data of schema update. -#[derive(Debug, Clone, PartialEq)] -pub struct AlterSchemaMeta { - pub space_id: SpaceId, - pub table_id: TableId, - pub schema: Schema, - pub pre_schema_version: Version, -} - -impl From for manifest_pb::AlterSchemaMeta { - fn from(v: AlterSchemaMeta) -> Self { - manifest_pb::AlterSchemaMeta { - space_id: v.space_id, - table_id: v.table_id.as_u64(), - schema: Some(schema_pb::TableSchema::from(&v.schema)), - pre_schema_version: v.pre_schema_version, - } - } -} - -impl TryFrom for AlterSchemaMeta { - type Error = Error; - - fn try_from(src: manifest_pb::AlterSchemaMeta) -> Result { - let table_schema = src.schema.context("Empty table schema.")?; - - Ok(Self { - space_id: src.space_id, - table_id: TableId::from(src.table_id), - schema: Schema::try_from(table_schema).map_err(anyhow::Error::new)?, - pre_schema_version: src.pre_schema_version, - }) - } -} - -/// Meta data of options update. -#[derive(Debug, Clone, PartialEq)] -pub struct AlterOptionsMeta { - pub space_id: SpaceId, - pub table_id: TableId, - pub options: TableOptions, -} - -impl From for manifest_pb::AlterOptionsMeta { - fn from(v: AlterOptionsMeta) -> Self { - manifest_pb::AlterOptionsMeta { - space_id: v.space_id, - table_id: v.table_id.as_u64(), - options: Some(manifest_pb::TableOptions::from(v.options)), - } - } -} - -impl TryFrom for AlterOptionsMeta { - type Error = Error; - - fn try_from(src: manifest_pb::AlterOptionsMeta) -> Result { - let table_options = src.options.context("Empty table options.")?; - - Ok(Self { - space_id: src.space_id, - table_id: TableId::from(src.table_id), - options: TableOptions::try_from(table_options).map_err(anyhow::Error::new)?, - }) - } -} - -/// An adapter to implement [wal::log_batch::Payload] for -/// [proto::meta_update::MetaUpdate] -#[derive(Debug)] -pub struct MetaUpdatePayload(manifest_pb::MetaUpdate); - -impl From for MetaUpdatePayload { - fn from(src: MetaUpdate) -> Self { - Self(src.into()) - } -} - -impl From<&MetaUpdate> for MetaUpdatePayload { - fn from(src: &MetaUpdate) -> Self { - Self::from(src.clone()) - } -} - -impl Payload for MetaUpdatePayload { - type Error = Error; - - fn encode_size(&self) -> usize { - self.0.encoded_len() - } - - fn encode_to(&self, buf: &mut B) -> Result<()> { - self.0.encode(buf).map_err(anyhow::Error::new)?; - Ok(()) - } -} - -/// Decoder to decode MetaUpdate from log entry -pub struct MetaUpdateDecoder; - -impl PayloadDecoder for MetaUpdateDecoder { - type Error = Error; - type Target = MetaUpdate; - - fn decode(&self, _ctx: &PayloadDecodeContext, buf: &mut B) -> Result { - let meta_update_pb = - manifest_pb::MetaUpdate::decode(buf.chunk()).map_err(anyhow::Error::new)?; - MetaUpdate::try_from(meta_update_pb) - } -} - -/// The snapshot for the current logs. -#[derive(Debug, Clone, PartialEq)] -pub struct Snapshot { - /// The end sequence of the logs that this snapshot covers. - /// Basically it is the latest sequence number of the logs when creating a - /// new snapshot. - pub end_seq: SequenceNumber, - /// The data of the snapshot. - /// None means the table not exists(maybe dropped or not created yet). - pub data: Option, -} - -impl TryFrom for Snapshot { - type Error = Error; - - fn try_from(src: manifest_pb::Snapshot) -> Result { - let meta = src.meta.map(AddTableMeta::try_from).transpose()?; - - let version_edit = src - .version_edit - .map(VersionEditMeta::try_from) - .transpose()?; - - let version_meta = version_edit.map(|v| { - let mut version_meta = TableVersionMeta::default(); - version_meta.apply_edit(v.into_version_edit()); - version_meta - }); - - let table_manifest_data = meta.map(|v| MetaSnapshot { - table_meta: v, - version_meta, - }); - Ok(Self { - end_seq: src.end_seq, - data: table_manifest_data, - }) - } -} - -impl From for manifest_pb::Snapshot { - fn from(src: Snapshot) -> Self { - if let Some((meta, version_edit)) = src.data.map(|v| { - let space_id = v.table_meta.space_id; - let table_id = v.table_meta.table_id; - let table_meta = manifest_pb::AddTableMeta::from(v.table_meta); - let version_edit = v.version_meta.map(|version_meta| VersionEditMeta { - space_id, - table_id, - flushed_sequence: version_meta.flushed_sequence, - files_to_add: version_meta.ordered_files(), - files_to_delete: vec![], - mems_to_remove: vec![], - max_file_id: version_meta.max_file_id, - }); - ( - table_meta, - version_edit.map(manifest_pb::VersionEditMeta::from), - ) - }) { - Self { - end_seq: src.end_seq, - meta: Some(meta), - version_edit, - } - } else { - Self { - end_seq: src.end_seq, - meta: None, - version_edit: None, - } - } - } -} - -#[derive(Debug, Clone)] -pub enum MetaEdit { - Update(MetaUpdate), - Snapshot(MetaSnapshot), -} - -impl TryFrom for MetaUpdate { - type Error = Error; - - fn try_from(value: MetaEdit) -> std::result::Result { - if let MetaEdit::Update(update) = value { - Ok(update) - } else { - Err(Self::Error::from(anyhow::anyhow!( - "Failed to convert meta edit, it is not the update type meta edit" - ))) - } - } -} - -impl TryFrom for MetaSnapshot { - type Error = Error; - - fn try_from(value: MetaEdit) -> std::result::Result { - if let MetaEdit::Snapshot(table_manifest_data) = value { - Ok(table_manifest_data) - } else { - Err(Self::Error::from(anyhow::anyhow!( - "Failed to convert meta edit, it is not the snapshot type meta edit" - ))) - } - } -} - -#[derive(Debug, Clone)] -pub struct MetaEditRequest { - pub shard_info: TableShardInfo, - pub meta_edit: MetaEdit, - pub table_catalog_info: TableCatalogInfo, -} diff --git a/src/analytic_engine/src/manifest/meta_snapshot.rs b/src/analytic_engine/src/manifest/meta_snapshot.rs deleted file mode 100644 index 2dc5a1b1c6..0000000000 --- a/src/analytic_engine/src/manifest/meta_snapshot.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Meta data of manifest. - -use logger::debug; -use macros::ensure; - -use crate::{ - manifest::{ - meta_edit::{AddTableMeta, MetaUpdate}, - Result, - }, - table::version::TableVersionMeta, -}; - -#[derive(Debug, Clone, PartialEq)] -pub struct MetaSnapshot { - pub table_meta: AddTableMeta, - pub version_meta: Option, -} - -#[derive(Clone, Debug, Default)] -pub struct MetaSnapshotBuilder { - table_meta: Option, - version_meta: Option, -} - -impl MetaSnapshotBuilder { - pub fn new(table_meta: Option, version_meta: Option) -> Self { - Self { - table_meta, - version_meta, - } - } - - pub fn build(mut self) -> Option { - let version_meta = self.version_meta.take(); - self.table_meta.map(|v| MetaSnapshot { - table_meta: v, - version_meta, - }) - } - - #[inline] - pub fn is_table_exists(&self) -> bool { - self.table_meta.is_some() - } - - /// Apply the meta update. - /// - /// Any update except [`MetaUpdate::AddTable`] on a non-exist table will - /// fail. - pub fn apply_update(&mut self, update: MetaUpdate) -> Result<()> { - debug!("Apply meta update, update:{:?}", update); - - if let MetaUpdate::AddTable(_) = &update { - } else { - ensure!( - self.is_table_exists(), - "Apply update on non-exist table, meta update:{update:?}", - ); - } - - match update { - MetaUpdate::AddTable(meta) => { - self.table_meta = Some(meta); - } - MetaUpdate::VersionEdit(meta) => { - let edit = meta.into_version_edit(); - let mut version = self.version_meta.take().unwrap_or_default(); - version.apply_edit(edit); - self.version_meta = Some(version); - } - MetaUpdate::AlterSchema(meta) => { - let table_meta = self.table_meta.as_mut().unwrap(); - table_meta.schema = meta.schema; - } - MetaUpdate::AlterOptions(meta) => { - let table_meta = self.table_meta.as_mut().unwrap(); - table_meta.opts = meta.options; - } - MetaUpdate::DropTable(meta) => { - self.table_meta = None; - self.version_meta = None; - debug!( - "Apply drop table meta update, removed table:{}", - meta.table_name, - ); - } - } - - Ok(()) - } -} diff --git a/src/analytic_engine/src/manifest/mod.rs b/src/analytic_engine/src/manifest/mod.rs deleted file mode 100644 index 05a5419239..0000000000 --- a/src/analytic_engine/src/manifest/mod.rs +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Manage meta data of the engine - -pub mod details; -pub mod error; -pub mod meta_edit; -pub mod meta_snapshot; - -use std::{fmt, sync::Arc}; - -use async_trait::async_trait; -use common_types::table::ShardId; -pub use error::Error; -use generic_error::GenericResult; -use macros::define_result; -use table_engine::table::TableId; - -use crate::{manifest::meta_edit::MetaEditRequest, space::SpaceId, table::data::TableCatalogInfo}; - -define_result!(error::Error); - -#[derive(Debug)] -pub struct LoadRequest { - pub space_id: SpaceId, - pub table_id: TableId, - pub shard_id: ShardId, - pub table_catalog_info: TableCatalogInfo, -} - -pub type SnapshotRequest = LoadRequest; -/// Manifest holds meta data of all tables. -#[async_trait] -pub trait Manifest: Send + Sync + fmt::Debug { - /// Apply edit to table metas, store it to storage. - async fn apply_edit(&self, request: MetaEditRequest) -> GenericResult<()>; - - /// Recover table metas from storage. - async fn recover(&self, load_request: &LoadRequest) -> GenericResult<()>; - - async fn do_snapshot(&self, request: SnapshotRequest) -> GenericResult<()>; -} - -pub type ManifestRef = Arc; diff --git a/src/analytic_engine/src/memtable/columnar/factory.rs b/src/analytic_engine/src/memtable/columnar/factory.rs deleted file mode 100644 index 5b351f9e65..0000000000 --- a/src/analytic_engine/src/memtable/columnar/factory.rs +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Columnar memtable factory - -use std::{ - collections::HashMap, - sync::{ - atomic::{AtomicU64, AtomicUsize}, - Arc, RwLock, - }, -}; - -use crate::memtable::{ - columnar::ColumnarMemTable, - factory::{Factory, Options}, - MemTableRef, Result, -}; -/// Factory to create memtable -#[derive(Debug)] -pub struct ColumnarMemTableFactory; - -impl Factory for ColumnarMemTableFactory { - fn create_memtable(&self, opts: Options) -> Result { - let memtable = Arc::new(ColumnarMemTable { - memtable: Arc::new(RwLock::new(HashMap::with_capacity( - opts.schema.num_columns(), - ))), - schema: opts.schema.clone(), - last_sequence: AtomicU64::new(opts.creation_sequence), - row_num: AtomicUsize::new(0), - opts, - memtable_size: AtomicUsize::new(0), - metrics: Default::default(), - }); - - Ok(memtable) - } -} diff --git a/src/analytic_engine/src/memtable/columnar/iter.rs b/src/analytic_engine/src/memtable/columnar/iter.rs deleted file mode 100644 index 5400fbd993..0000000000 --- a/src/analytic_engine/src/memtable/columnar/iter.rs +++ /dev/null @@ -1,399 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - cmp::Ordering, - collections::HashMap, - ops::Bound, - sync::{Arc, RwLock}, - time::Instant, -}; - -use anyhow::Context; -use arena::{Arena, BasicStats, MonoIncArena}; -use bytes_ext::{ByteVec, Bytes}; -use codec::{memcomparable::MemComparable, row, Encoder}; -use common_types::{ - column::Column, - column_schema::ColumnId, - datum::Datum, - projected_schema::RowProjector, - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - row::Row, - schema::Schema, - SequenceNumber, -}; -use logger::trace; -use macros::ensure; -use parquet::data_type::AsBytes; -use skiplist::{ArenaSlice, BytewiseComparator, IterRef, Skiplist}; - -use crate::memtable::{ - key::{self, KeySequence, SequenceCodec}, - Result, ScanContext, ScanRequest, -}; - -/// Iterator state -#[derive(Debug, PartialEq)] -enum State { - /// The iterator struct is created but not initialized - Uninitialized, - /// The iterator is initialized (seek) - Initialized, - /// No more element the iterator can return - Finished, -} - -/// Columnar iterator for [ColumnarMemTable] -pub struct ColumnarIterImpl + Clone + Sync + Send> { - memtable: Arc>>, - row_num: usize, - current_idx: usize, - // Schema related: - /// Schema of this memtable, used to decode row - memtable_schema: Schema, - /// Projection of schema to read - row_projector: RowProjector, - - // Options related: - batch_size: usize, - deadline: Option, - - start_user_key: Bound, - end_user_key: Bound, - /// The last sequence of the memtable. - last_sequence: SequenceNumber, - - /// State of iterator - state: State, - - /// Dedup rows with key - need_dedup: bool, - - skiplist: Skiplist, - /// The internal skiplist iter - iter: IterRef, BytewiseComparator, A>, - last_internal_key: Option>, -} - -impl + Clone + Sync + Send> ColumnarIterImpl { - /// Create a new [ColumnarIterImpl]. - pub fn new( - memtable: Arc>>, - row_num: usize, - schema: Schema, - ctx: ScanContext, - request: ScanRequest, - last_sequence: SequenceNumber, - skiplist: Skiplist, - ) -> Result { - let row_projector = request - .row_projector_builder - .build(&schema) - .context("ProjectSchema")?; - let mut columnar_iter = Self { - memtable, - row_num, - current_idx: 0, - memtable_schema: schema, - row_projector, - batch_size: ctx.batch_size, - deadline: ctx.deadline, - start_user_key: request.start_user_key, - end_user_key: request.end_user_key, - state: State::Uninitialized, - need_dedup: request.need_dedup, - iter: skiplist.iter(), - skiplist, - last_internal_key: None, - last_sequence, - }; - - columnar_iter.init()?; - - Ok(columnar_iter) - } - - /// Init the iterator, will seek to the proper position for first next() - /// call, so the first entry next() returned is after the - /// `start_user_key`, but we still need to check `end_user_key`. - fn init(&mut self) -> Result<()> { - self.current_idx = 0; - self.state = State::Initialized; - // If need_dedup is true, we need to build the skiplist to dedup. - if self.need_dedup { - // TODO: remove the lock or else it will block write. - let memtable = self.memtable.read().unwrap(); - let mut key_vec = vec![ByteVec::new(); self.row_num]; - let encoder = MemComparable; - - for idx in self.memtable_schema.primary_key_indexes() { - let column_schema = self.memtable_schema.column(*idx); - let column = memtable - .get(&column_schema.id) - .with_context(|| format!("column not found, column:{}", column_schema.name))?; - for (i, key) in key_vec.iter_mut().enumerate().take(self.row_num) { - let datum = column.get_datum(i); - encoder.encode(key, &datum).context("encode key")?; - } - } - - // TODO: Persist the skiplist. - for (i, mut key) in key_vec.into_iter().enumerate() { - SequenceCodec - .encode(&mut key, &KeySequence::new(self.last_sequence, i as u32)) - .context("encode key sequence")?; - self.skiplist.put(&key, (i as u32).to_le_bytes().as_slice()); - } - - match &self.start_user_key { - Bound::Included(user_key) => { - // Seek the skiplist - self.iter.seek(user_key.as_bytes()); - } - Bound::Excluded(user_key) => { - // Construct seek key, just seek to the key with next prefix, so there is no - // need to skip the key until we meet the first key > - // start_user_key - let seek_key = row::key_prefix_next(user_key); - - // Seek the skiplist - self.iter.seek(seek_key.as_ref()); - } - Bound::Unbounded => self.iter.seek_to_first(), - } - } - Ok(()) - } - - /// Fetch next record batch - fn fetch_next_record_batch(&mut self) -> Result> { - debug_assert_eq!(State::Initialized, self.state); - assert!(self.batch_size > 0); - let rows = if !self.need_dedup { - self.fetch_next_record_batch_rows_no_dedup()? - } else { - self.fetch_next_record_batch_rows()? - }; - - if !rows.is_empty() { - if let Some(deadline) = self.deadline { - let now = Instant::now(); - ensure!( - now < deadline, - "iter timeout, now:{now:?}, deadline:{deadline:?}" - ); - } - - let fetched_schema = self.row_projector.fetched_schema().clone(); - let primary_key_indexes = self - .row_projector - .primary_key_indexes() - .map(|idxs| idxs.to_vec()); - let mut builder = FetchedRecordBatchBuilder::with_capacity( - fetched_schema, - primary_key_indexes, - self.batch_size, - ); - for row in rows.into_iter() { - builder.append_row(row).context("AppendRow")?; - } - - let batch = builder.build().context("BuildRecordBatch")?; - trace!("column iterator send one batch:{:?}", batch); - Ok(Some(batch)) - } else { - // If iter is invalid after seek (nothing matched), then it may not be marked as - // finished yet. - self.finish(); - Ok(None) - } - } - - /// Fetch next row matched the given condition, the current entry of iter - /// will be considered - /// - /// REQUIRE: The iter is valid - fn fetch_next_row(&mut self) -> Result>> { - debug_assert_eq!(State::Initialized, self.state); - - // TODO(yingwen): Some operation like delete needs to be considered during - // iterating: we need to ignore this key if found a delete mark - while self.iter.valid() { - // Fetch current entry - let key = self.iter.key(); - let (user_key, _) = - key::user_key_from_internal_key(key).context("DecodeInternalKey")?; - - // Check user key is still in range - if self.is_after_end_bound(user_key) { - // Out of bound - self.finish(); - return Ok(None); - } - - if self.need_dedup { - // Whether this user key is already returned - let same_key = match &self.last_internal_key { - Some(last_internal_key) => { - // TODO(yingwen): Actually this call wont fail, only valid internal key will - // be set as last_internal_key so maybe we can just - // unwrap it? - let (last_user_key, _) = key::user_key_from_internal_key(last_internal_key) - .context("DecodeInternalKey")?; - user_key == last_user_key - } - // This is the first user key - None => false, - }; - - if same_key { - // We meet duplicate key, move forward and continue to find next user key - self.iter.next(); - continue; - } - // Now this is a new user key - } - - // This is the row we want - let row = self.iter.value_with_arena(); - - // Store the last key - self.last_internal_key = Some(self.iter.key_with_arena()); - // Move iter forward - self.iter.next(); - - return Ok(Some(row)); - } - - // No more row in range, we can stop the iterator - self.finish(); - Ok(None) - } - - fn fetch_next_record_batch_rows(&mut self) -> Result> { - let mut num_rows = 0; - let mut row_idxs = Vec::with_capacity(self.batch_size); - while self.iter.valid() && num_rows < self.batch_size { - if let Some(row) = self.fetch_next_row()? { - let mut buf = [0u8; 4]; - buf.copy_from_slice(&row); - let idx = u32::from_le_bytes(buf); - row_idxs.push(idx); - num_rows += 1; - } else { - // There is no more row to fetch. - self.finish(); - break; - } - } - - let memtable = self.memtable.read().unwrap(); - let mut rows = vec![ - Row::from_datums(vec![Datum::Null; self.memtable_schema.num_columns()]); - self.batch_size - ]; - for (col_idx, column_schema_idx) in self - .row_projector - .fetched_source_column_indexes() - .iter() - .enumerate() - { - if let Some(column_schema_idx) = column_schema_idx { - let column_schema = self.memtable_schema.column(*column_schema_idx); - if let Some(column) = memtable.get(&column_schema.id) { - for (i, row_idx) in row_idxs.iter().enumerate() { - let datum = column.get_datum(*row_idx as usize); - rows[i][col_idx] = datum; - } - } - } - } - rows.resize(num_rows, Row::from_datums(vec![])); - Ok(rows) - } - - /// Fetch next record batch - fn fetch_next_record_batch_rows_no_dedup(&mut self) -> Result> { - let mut num_rows = 0; - let memtable = self.memtable.read().unwrap(); - - let record_schema = self.row_projector.fetched_schema(); - let mut rows = - vec![Row::from_datums(vec![Datum::Null; record_schema.num_columns()]); self.batch_size]; - - for (col_idx, column_schema_idx) in self - .row_projector - .fetched_source_column_indexes() - .iter() - .enumerate() - { - if let Some(column_schema_idx) = column_schema_idx { - let column_schema = self.memtable_schema.column(*column_schema_idx); - if let Some(column) = memtable.get(&column_schema.id) { - for (i, row) in rows.iter_mut().enumerate().take(self.batch_size) { - let row_idx = self.current_idx + i; - if row_idx >= column.len() { - break; - } - if col_idx == 0 { - num_rows += 1; - } - let datum = column.get_datum(row_idx); - row[col_idx] = datum; - } - } - } - } - rows.resize(num_rows, Row::from_datums(vec![])); - self.current_idx += num_rows; - Ok(rows) - } - - /// Return true if the key is after the `end_user_key` bound - fn is_after_end_bound(&self, key: &[u8]) -> bool { - match &self.end_user_key { - Bound::Included(end) => match key.cmp(end) { - Ordering::Less | Ordering::Equal => false, - Ordering::Greater => true, - }, - Bound::Excluded(end) => match key.cmp(end) { - Ordering::Less => false, - Ordering::Equal | Ordering::Greater => true, - }, - // All key is valid - Bound::Unbounded => false, - } - } - - /// Mark the iterator state to finished and return None - fn finish(&mut self) { - self.state = State::Finished; - } -} - -impl Iterator for ColumnarIterImpl { - type Item = Result; - - fn next(&mut self) -> Option { - if self.state != State::Initialized { - return None; - } - - self.fetch_next_record_batch().transpose() - } -} diff --git a/src/analytic_engine/src/memtable/columnar/mod.rs b/src/analytic_engine/src/memtable/columnar/mod.rs deleted file mode 100644 index 33bd86bdff..0000000000 --- a/src/analytic_engine/src/memtable/columnar/mod.rs +++ /dev/null @@ -1,233 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::HashMap, - sync::{ - atomic::{AtomicU64, AtomicUsize, Ordering}, - Arc, RwLock, - }, -}; - -use anyhow::Context; -use arena::MonoIncArena; -use bytes_ext::Bytes; -use common_types::{ - column::Column, column_schema::ColumnId, datum::Datum, row::Row, schema::Schema, - time::TimeRange, SequenceNumber, -}; -use logger::debug; -use macros::ensure; -use skiplist::{BytewiseComparator, Skiplist}; - -use crate::memtable::{ - columnar::iter::ColumnarIterImpl, factory::Options, key::KeySequence, - reversed_iter::ReversedColumnarIterator, ColumnarIterPtr, MemTable, Metrics as MemtableMetrics, - PutContext, Result, ScanContext, ScanRequest, -}; - -pub mod factory; -pub mod iter; - -#[derive(Default, Debug)] -struct Metrics { - row_raw_size: AtomicUsize, - row_count: AtomicUsize, -} - -pub struct ColumnarMemTable { - /// Schema of this memtable, is immutable. - schema: Schema, - memtable: Arc>>, - /// The last sequence of the rows in this memtable. Update to this field - /// require external synchronization. - last_sequence: AtomicU64, - row_num: AtomicUsize, - opts: Options, - memtable_size: AtomicUsize, - - metrics: Metrics, -} - -impl ColumnarMemTable { - // TODO: Optimize memtable size calculation. - fn memtable_size(&self) -> usize { - self.memtable - .read() - .unwrap() - .iter() - .map(|(_, column)| column.size()) - .sum() - } -} - -impl MemTable for ColumnarMemTable { - fn schema(&self) -> &Schema { - &self.schema - } - - fn min_key(&self) -> Option { - // TODO: columnar memtable should support min_key and max_key - Some(Bytes::from("0")) - } - - fn max_key(&self) -> Option { - // TODO: columnar memtable should support min_key and max_key - Some(Bytes::from("9")) - } - - // Now the caller is required to encode the row into the `value_buf` in - // PutContext first. - fn put( - &self, - ctx: &mut PutContext, - _sequence: KeySequence, - row: &Row, - schema: &Schema, - ) -> Result<()> { - let mut columns = HashMap::with_capacity(schema.num_columns()); - - for (i, column_schema) in schema.columns().iter().enumerate() { - let column = if let Some(column) = columns.get_mut(&column_schema.id) { - column - } else { - // TODO: impl append() one row in column, avoid memory expansion. - let column = Column::with_capacity(1, column_schema.data_type) - .context("new column failed")?; - columns.insert(column_schema.id, column); - columns - .get_mut(&column_schema.id) - .context("get column failed")? - }; - - if let Some(writer_index) = ctx.index_in_writer.column_index_in_writer(i) { - let datum = &row[writer_index]; - if datum == &Datum::Null { - column.append_nulls(1); - } else { - column - .append_datum_ref(&row[writer_index]) - .context("append datum failed")? - } - } else { - column.append_nulls(1); - } - } - { - let mut memtable = self.memtable.write().unwrap(); - for (k, v) in columns { - if let Some(column) = memtable.get_mut(&k) { - column.append_column(v).context("append column")?; - } else { - memtable.insert(k, v); - }; - } - } - - self.row_num.fetch_add(1, Ordering::Acquire); - - // May have performance issue. - self.memtable_size - .store(self.memtable_size(), Ordering::Relaxed); - - // Update metrics - self.metrics - .row_raw_size - .fetch_add(row.size(), Ordering::Relaxed); - self.metrics.row_count.fetch_add(1, Ordering::Relaxed); - - Ok(()) - } - - fn scan(&self, ctx: ScanContext, request: ScanRequest) -> Result { - debug!( - "Scan columnar memtable, ctx:{:?}, request:{:?}", - ctx, request - ); - - let timestamp_column = self - .schema - .columns() - .get(self.schema.timestamp_index()) - .context("timestamp column is missing")?; - - let num_rows = self - .memtable - .read() - .unwrap() - .get(×tamp_column.id) - .context("get timestamp column failed")? - .len(); - let (reverse, batch_size) = (request.reverse, ctx.batch_size); - let arena = MonoIncArena::with_collector( - self.opts.arena_block_size as usize, - self.opts.collector.clone(), - ); - let skiplist = Skiplist::with_arena(BytewiseComparator, arena); - let iter = ColumnarIterImpl::new( - self.memtable.clone(), - self.row_num.load(Ordering::Relaxed), - self.schema.clone(), - ctx, - request, - self.last_sequence.load(Ordering::Relaxed), - skiplist, - )?; - if reverse { - Ok(Box::new(ReversedColumnarIterator::new( - iter, num_rows, batch_size, - ))) - } else { - Ok(Box::new(iter)) - } - } - - fn approximate_memory_usage(&self) -> usize { - self.memtable_size.load(Ordering::Relaxed) - } - - fn set_last_sequence(&self, sequence: SequenceNumber) -> Result<()> { - let last = self.last_sequence(); - ensure!( - sequence >= last, - "invalid sequence, given:{sequence}, last:{last}" - ); - - self.last_sequence.store(sequence, Ordering::Relaxed); - - Ok(()) - } - - fn last_sequence(&self) -> SequenceNumber { - self.last_sequence.load(Ordering::Relaxed) - } - - // TODO: implement this. - fn time_range(&self) -> Option { - None - } - - fn metrics(&self) -> MemtableMetrics { - let row_raw_size = self.metrics.row_raw_size.load(Ordering::Relaxed); - let row_count = self.metrics.row_count.load(Ordering::Relaxed); - MemtableMetrics { - row_raw_size, - row_encoded_size: self.memtable_size.load(Ordering::Relaxed), - row_count, - } - } -} diff --git a/src/analytic_engine/src/memtable/error.rs b/src/analytic_engine/src/memtable/error.rs deleted file mode 100644 index 4389b0e41c..0000000000 --- a/src/analytic_engine/src/memtable/error.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use thiserror::Error; - -use crate::ErrorKind; - -#[derive(Debug, Error)] -#[error(transparent)] -pub struct Error(#[from] InnerError); - -impl From for Error { - fn from(source: anyhow::Error) -> Self { - Self(InnerError::Other { source }) - } -} - -impl Error { - pub fn kind(&self) -> ErrorKind { - match self.0 { - InnerError::KeyTooLarge { .. } => ErrorKind::KeyTooLarge, - InnerError::Other { .. } => ErrorKind::Internal, - } - } -} - -#[derive(Error, Debug)] -pub(crate) enum InnerError { - #[error("too large key, max:{max}, current:{current}")] - KeyTooLarge { current: usize, max: usize }, - - #[error(transparent)] - Other { - #[from] - source: anyhow::Error, - }, -} diff --git a/src/analytic_engine/src/memtable/factory.rs b/src/analytic_engine/src/memtable/factory.rs deleted file mode 100644 index 55bc8d6636..0000000000 --- a/src/analytic_engine/src/memtable/factory.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! MemTable factory - -use std::{fmt, sync::Arc}; - -use arena::CollectorRef; -use common_types::{schema::Schema, SequenceNumber}; - -use crate::memtable::{MemTableRef, Result}; - -/// MemTable options -#[derive(Clone)] -pub struct Options { - /// Schema of the skiplist. - pub schema: Schema, - /// Block size of arena in bytes. - pub arena_block_size: u32, - /// Log sequence at the memtable creation. - pub creation_sequence: SequenceNumber, - /// Memory usage collector - pub collector: CollectorRef, -} - -/// MemTable factory -pub trait Factory: fmt::Debug { - /// Create a new memtable instance - fn create_memtable(&self, opts: Options) -> Result; -} - -/// MemTable Factory reference -pub type FactoryRef = Arc; diff --git a/src/analytic_engine/src/memtable/key.rs b/src/analytic_engine/src/memtable/key.rs deleted file mode 100644 index ef9acb66cd..0000000000 --- a/src/analytic_engine/src/memtable/key.rs +++ /dev/null @@ -1,232 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Memtable key -//! -//! Some concepts: -//! - User key (row key) is a bytes encoded from the key columns of a row -//! - Internal key contains -//! - user key -//! - memtable key sequence -//! - sequence number -//! - index - -use std::mem; - -use bytes_ext::{BufMut, BytesMut, SafeBuf, SafeBufMut}; -use codec::{memcomparable::MemComparable, Decoder, Encoder}; -use common_types::{row::Row, schema::Schema, SequenceNumber}; -use macros::ensure; - -use crate::memtable::{Error, Result}; - -// u64 + u32 -const KEY_SEQUENCE_BYTES_LEN: usize = 12; - -/// Row index in the batch -pub type RowIndex = u32; - -/// Sequence number of row in memtable -/// -/// Contains: -/// - sequence number in wal (sequence number of the write batch) -/// - unique index of the row in the write batch -/// -/// Ordering: -/// 1. ordered by sequence desc -/// 2. ordered by index desc -/// -/// The desc order is implemented via MAX - seq -/// -/// The index is used to distinguish rows with same key of the same write batch -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct KeySequence(SequenceNumber, RowIndex); - -impl KeySequence { - pub fn new(sequence: SequenceNumber, index: RowIndex) -> Self { - Self(sequence, index) - } - - #[inline] - pub fn sequence(&self) -> SequenceNumber { - self.0 - } - - #[inline] - pub fn row_index(&self) -> RowIndex { - self.1 - } -} - -// TODO(yingwen): We also need opcode (PUT/DELETE), put it in key or row value -/// Comparable internal key encoder -/// -/// Key order: -/// 1. ordered by user key ascend (key parts of a row) -/// 2. ordered by sequence descend -/// -/// Encoding: -/// user_key + sequence -/// -/// REQUIRE: The schema of row to encode matches the Self::schema -pub struct ComparableInternalKey<'a> { - /// Sequence number of the row - sequence: KeySequence, - /// Schema of row - schema: &'a Schema, -} - -impl<'a> ComparableInternalKey<'a> { - pub fn new(sequence: KeySequence, schema: &'a Schema) -> Self { - Self { sequence, schema } - } -} - -impl<'a> Encoder for ComparableInternalKey<'a> { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &Row) -> Result<()> { - let encoder = MemComparable; - for idx in self.schema.primary_key_indexes() { - encoder - .encode(buf, &value[*idx]) - .map_err(anyhow::Error::new)?; - } - SequenceCodec.encode(buf, &self.sequence)?; - - Ok(()) - } - - fn estimate_encoded_size(&self, value: &Row) -> usize { - let encoder = MemComparable; - let mut total_len = 0; - for idx in self.schema.primary_key_indexes() { - total_len += encoder.estimate_encoded_size(&value[*idx]); - } - total_len += KEY_SEQUENCE_BYTES_LEN; - - total_len - } -} - -pub struct SequenceCodec; - -impl Encoder for SequenceCodec { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &KeySequence) -> Result<()> { - // Encode sequence number and index in descend order - encode_sequence_number(buf, value.sequence())?; - let reversed_index = RowIndex::MAX - value.row_index(); - buf.try_put_u32(reversed_index) - .map_err(anyhow::Error::new)?; - - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &KeySequence) -> usize { - KEY_SEQUENCE_BYTES_LEN - } -} - -impl Decoder for SequenceCodec { - type Error = Error; - - fn decode(&self, buf: &mut B) -> Result { - let sequence = buf.try_get_u64().map_err(anyhow::Error::new)?; - // Reverse sequence - let sequence = SequenceNumber::MAX - sequence; - let row_index = buf.try_get_u32().map_err(anyhow::Error::new)?; - // Reverse row index - let row_index = RowIndex::MAX - row_index; - - Ok(KeySequence::new(sequence, row_index)) - } -} - -#[inline] -fn encode_sequence_number(buf: &mut B, sequence: SequenceNumber) -> Result<()> { - // The sequence need to encode in descend order - let reversed_sequence = SequenceNumber::MAX - sequence; - // Encode sequence - buf.try_put_u64(reversed_sequence) - .map_err(anyhow::Error::new)?; - Ok(()) -} - -// TODO(yingwen): Maybe make decoded internal key a type? - -/// Encode internal key from user key for seek -/// -/// - user_key: the user key to encode -/// - sequence: the sequence number to encode into internal key -/// - scratch: buffer to store the encoded internal key, the scratch will be -/// clear -/// -/// Returns the slice to the encoded internal key -pub fn internal_key_for_seek<'a>( - user_key: &[u8], - sequence: SequenceNumber, - scratch: &'a mut BytesMut, -) -> Result<&'a [u8]> { - scratch.clear(); - - scratch.reserve(user_key.len() + mem::size_of::()); - scratch.extend_from_slice(user_key); - encode_sequence_number(scratch, sequence)?; - - Ok(&scratch[..]) -} - -/// Decode user key and sequence number from the internal key -pub fn user_key_from_internal_key(internal_key: &[u8]) -> Result<(&[u8], KeySequence)> { - // Empty user key is meaningless - ensure!( - internal_key.len() > KEY_SEQUENCE_BYTES_LEN, - anyhow::anyhow!( - "Insufficient internal key length, len:{}", - internal_key.len() - ) - ); - - let (left, mut right) = internal_key.split_at(internal_key.len() - KEY_SEQUENCE_BYTES_LEN); - // Decode sequence number from right part - let sequence = SequenceCodec.decode(&mut right)?; - - Ok((left, sequence)) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_sequence_codec() { - let codec = SequenceCodec; - - let sequence = KeySequence::new(123, 456); - assert_eq!(12, codec.estimate_encoded_size(&sequence)); - let mut buf = Vec::new(); - codec.encode(&mut buf, &sequence).unwrap(); - assert_eq!(12, buf.len()); - - let mut b = &buf[..]; - let decoded_sequence = codec.decode(&mut b).unwrap(); - - assert_eq!(sequence, decoded_sequence); - } -} diff --git a/src/analytic_engine/src/memtable/layered/factory.rs b/src/analytic_engine/src/memtable/layered/factory.rs deleted file mode 100644 index 002943ab20..0000000000 --- a/src/analytic_engine/src/memtable/layered/factory.rs +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Skiplist memtable factory - -use std::sync::Arc; - -use crate::memtable::{ - factory::{Factory, FactoryRef, Options}, - layered::LayeredMemTable, - MemTableRef, Result, -}; - -/// Factory to create memtable -#[derive(Debug)] -pub struct LayeredMemtableFactory { - inner_memtable_factory: FactoryRef, - mutable_switch_threshold: usize, -} - -impl LayeredMemtableFactory { - pub fn new(inner_memtable_factory: FactoryRef, mutable_switch_threshold: usize) -> Self { - Self { - inner_memtable_factory, - mutable_switch_threshold, - } - } -} - -impl Factory for LayeredMemtableFactory { - fn create_memtable(&self, opts: Options) -> Result { - let memtable = LayeredMemTable::new( - &opts, - self.inner_memtable_factory.clone(), - self.mutable_switch_threshold, - )?; - - Ok(Arc::new(memtable)) - } -} diff --git a/src/analytic_engine/src/memtable/layered/iter.rs b/src/analytic_engine/src/memtable/layered/iter.rs deleted file mode 100644 index 7b051966d1..0000000000 --- a/src/analytic_engine/src/memtable/layered/iter.rs +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Skiplist memtable iterator - -use anyhow::Context; -use common_types::{record_batch::FetchedRecordBatch, schema::Schema, time::TimeRange}; - -use crate::memtable::{ - layered::{ImmutableSegment, MutableSegment}, - ColumnarIterPtr, Error, Result, ScanContext, ScanRequest, -}; - -/// Columnar iterator for [LayeredMemTable] -pub(crate) struct ColumnarIterImpl { - selected_batch_iter: ColumnarIterPtr, -} - -impl ColumnarIterImpl { - pub fn new( - memtable_schema: &Schema, - ctx: ScanContext, - request: ScanRequest, - mutable: &MutableSegment, - immutables: &[ImmutableSegment], - ) -> Result { - // Create projection for the memtable schema - let row_projector = request - .row_projector_builder - .build(memtable_schema) - .context("build row projector")?; - - let (maybe_mutable, selected_immutables) = - Self::filter_by_time_range(mutable, immutables, request.time_range); - - let immutable_batches = selected_immutables - .flat_map(|imm| { - imm.record_batches().iter().map(|batch| { - // TODO: reduce clone here. - let fetched_schema = row_projector.fetched_schema().clone(); - let primary_key_indexes = row_projector - .primary_key_indexes() - .map(|idxs| idxs.to_vec()); - let fetched_column_indexes = row_projector.fetched_source_column_indexes(); - FetchedRecordBatch::try_new( - fetched_schema, - primary_key_indexes, - fetched_column_indexes, - batch.clone(), - ) - .map_err(|e| Error::from(anyhow::Error::new(e))) - }) - }) - .collect::>(); - - let immutable_iter = immutable_batches.into_iter(); - - let maybe_mutable_iter = match maybe_mutable { - Some(mutable) => Some(mutable.scan(ctx, request)?), - None => None, - }; - - let maybe_chained_iter = match maybe_mutable_iter { - Some(mutable_iter) => Box::new(mutable_iter.chain(immutable_iter)) as _, - None => Box::new(immutable_iter) as _, - }; - - Ok(Self { - selected_batch_iter: maybe_chained_iter, - }) - } - - fn filter_by_time_range<'a>( - mutable: &'a MutableSegment, - immutables: &'a [ImmutableSegment], - time_range: TimeRange, - ) -> ( - Option<&'a MutableSegment>, - impl Iterator, - ) { - let maybe_mutable = { - let mutable_time_range = mutable.time_range(); - mutable_time_range.and_then(|range| { - if range.intersect_with(time_range) { - Some(mutable) - } else { - None - } - }) - }; - - let selected_immutables = immutables - .iter() - .filter(move |imm| imm.time_range().intersect_with(time_range)); - - (maybe_mutable, selected_immutables) - } -} - -impl Iterator for ColumnarIterImpl { - type Item = Result; - - fn next(&mut self) -> Option { - self.selected_batch_iter.next() - } -} diff --git a/src/analytic_engine/src/memtable/layered/mod.rs b/src/analytic_engine/src/memtable/layered/mod.rs deleted file mode 100644 index 72e992bc43..0000000000 --- a/src/analytic_engine/src/memtable/layered/mod.rs +++ /dev/null @@ -1,724 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! MemTable based on skiplist - -pub mod factory; -pub mod iter; - -use std::{ - mem, - ops::{Bound, Deref}, - sync::{ - atomic::{self, AtomicU64}, - RwLock, - }, -}; - -use anyhow::Context; -use arena::CollectorRef; -use arrow::record_batch::RecordBatch as ArrowRecordBatch; -use bytes_ext::Bytes; -use common_types::{ - projected_schema::RowProjectorBuilder, row::Row, schema::Schema, time::TimeRange, - SequenceNumber, -}; -use logger::debug; -use skiplist::{BytewiseComparator, KeyComparator}; - -use crate::memtable::{ - factory::{FactoryRef, Options}, - key::KeySequence, - layered::iter::ColumnarIterImpl, - ColumnarIterPtr, MemTable, MemTableRef, Metrics as MemtableMetrics, PutContext, Result, - ScanContext, ScanRequest, -}; - -/// MemTable implementation based on skiplist -pub(crate) struct LayeredMemTable { - /// Schema of this memtable, is immutable. - schema: Schema, - - /// The last sequence of the rows in this memtable. Update to this field - /// require external synchronization. - last_sequence: AtomicU64, - - inner: RwLock, - - mutable_switch_threshold: usize, -} - -impl LayeredMemTable { - pub fn new( - opts: &Options, - inner_memtable_factory: FactoryRef, - mutable_switch_threshold: usize, - ) -> Result { - let inner = Inner::new(inner_memtable_factory, opts)?; - - Ok(Self { - schema: opts.schema.clone(), - last_sequence: AtomicU64::new(opts.creation_sequence), - inner: RwLock::new(inner), - mutable_switch_threshold, - }) - } - - // Used for testing only - #[cfg(test)] - fn force_switch_mutable_segment(&self) -> Result<()> { - let inner = &mut *self.inner.write().unwrap(); - inner.switch_mutable_segment(self.schema.clone()) - } -} - -impl MemTable for LayeredMemTable { - fn schema(&self) -> &Schema { - &self.schema - } - - fn min_key(&self) -> Option { - self.inner.read().unwrap().min_key() - } - - fn max_key(&self) -> Option { - self.inner.read().unwrap().max_key() - } - - fn put( - &self, - ctx: &mut PutContext, - sequence: KeySequence, - row: &Row, - schema: &Schema, - ) -> Result<()> { - let memory_usage = { - let inner = self.inner.read().unwrap(); - inner.put(ctx, sequence, row, schema)?; - inner.mutable_segment.0.approximate_memory_usage() - }; - - if memory_usage > self.mutable_switch_threshold { - debug!( - "LayeredMemTable put, memory_usage:{memory_usage}, mutable_switch_threshold:{}", - self.mutable_switch_threshold - ); - let inner = &mut *self.inner.write().unwrap(); - inner.switch_mutable_segment(self.schema.clone())?; - } - - Ok(()) - } - - fn scan(&self, ctx: ScanContext, request: ScanRequest) -> Result { - let inner = self.inner.read().unwrap(); - inner.scan(&self.schema, ctx, request) - } - - fn approximate_memory_usage(&self) -> usize { - self.inner.read().unwrap().approximate_memory_usage() - } - - fn set_last_sequence(&self, sequence: SequenceNumber) -> Result<()> { - self.last_sequence - .store(sequence, atomic::Ordering::Relaxed); - Ok(()) - } - - fn last_sequence(&self) -> SequenceNumber { - self.last_sequence.load(atomic::Ordering::Relaxed) - } - - fn time_range(&self) -> Option { - let inner = self.inner.read().unwrap(); - inner.time_range() - } - - fn metrics(&self) -> MemtableMetrics { - // FIXME: stats and return metrics - MemtableMetrics::default() - } -} - -/// Layered memtable inner -struct Inner { - mutable_segment_builder: MutableSegmentBuilder, - mutable_segment: MutableSegment, - immutable_segments: Vec, -} - -impl Inner { - fn new(memtable_factory: FactoryRef, opts: &Options) -> Result { - let builder_opts = MutableBuilderOptions { - schema: opts.schema.clone(), - arena_block_size: opts.arena_block_size, - collector: opts.collector.clone(), - }; - let mutable_segment_builder = MutableSegmentBuilder::new(memtable_factory, builder_opts); - - // Build the first mutable batch. - let init_mutable_segment = mutable_segment_builder.build()?; - - Ok(Self { - mutable_segment_builder, - mutable_segment: init_mutable_segment, - immutable_segments: vec![], - }) - } - - /// Scan batches including `mutable` and `immutable`s. - #[inline] - fn scan( - &self, - schema: &Schema, - ctx: ScanContext, - request: ScanRequest, - ) -> Result { - let iter = ColumnarIterImpl::new( - schema, - ctx, - request, - &self.mutable_segment, - &self.immutable_segments, - )?; - Ok(Box::new(iter)) - } - - #[inline] - fn put( - &self, - ctx: &mut PutContext, - sequence: KeySequence, - row: &Row, - schema: &Schema, - ) -> Result<()> { - self.mutable_segment.put(ctx, sequence, row, schema) - } - - fn switch_mutable_segment(&mut self, schema: Schema) -> Result<()> { - let imm_num = self.immutable_segments.len(); - debug!("LayeredMemTable switch_mutable_segment, imm_num:{imm_num}"); - - // Build a new mutable segment, and replace current's. - let new_mutable = self.mutable_segment_builder.build()?; - let current_mutable = mem::replace(&mut self.mutable_segment, new_mutable); - let fetched_schema = schema.to_record_schema(); - - // Convert current's to immutable. - let scan_ctx = ScanContext::default(); - let row_projector_builder = RowProjectorBuilder::new(fetched_schema, schema, None); - let scan_req = ScanRequest { - start_user_key: Bound::Unbounded, - end_user_key: Bound::Unbounded, - sequence: common_types::MAX_SEQUENCE_NUMBER, - need_dedup: false, - reverse: false, - metrics_collector: None, - time_range: TimeRange::min_to_max(), - row_projector_builder, - }; - - let immutable_batches = current_mutable - .scan(scan_ctx, scan_req)? - .map(|batch_res| batch_res.map(|batch| batch.into_arrow_record_batch())) - .collect::>>()?; - - let time_range = current_mutable - .time_range() - .context("failed to get time range from mutable segment")?; - let max_key = current_mutable - .max_key() - .context("failed to get max key from mutable segment")?; - let min_key = current_mutable - .min_key() - .context("failed to get min key from mutable segment")?; - let immutable = ImmutableSegment::new(immutable_batches, time_range, min_key, max_key); - - self.immutable_segments.push(immutable); - - Ok(()) - } - - pub fn min_key(&self) -> Option { - let comparator = BytewiseComparator; - - let mutable_min_key = self.mutable_segment.min_key(); - - let immutable_min_key = if self.immutable_segments.is_empty() { - None - } else { - let mut min_key = self.immutable_segments.first().unwrap().min_key(); - let mut imm_iter = self.immutable_segments.iter(); - let _ = imm_iter.next(); - for imm in imm_iter { - if let std::cmp::Ordering::Greater = comparator.compare_key(&min_key, &imm.min_key) - { - min_key = imm.min_key(); - } - } - - Some(min_key) - }; - - match (mutable_min_key, immutable_min_key) { - (None, None) => None, - (None, Some(key)) | (Some(key), None) => Some(key), - (Some(key1), Some(key2)) => Some(match comparator.compare_key(&key1, &key2) { - std::cmp::Ordering::Greater => key2, - std::cmp::Ordering::Less | std::cmp::Ordering::Equal => key1, - }), - } - } - - pub fn max_key(&self) -> Option { - let comparator = BytewiseComparator; - - let mutable_max_key = self.mutable_segment.max_key(); - - let immutable_max_key = if self.immutable_segments.is_empty() { - None - } else { - let mut max_key = self.immutable_segments.first().unwrap().max_key(); - let mut imm_iter = self.immutable_segments.iter(); - let _ = imm_iter.next(); - for imm in imm_iter { - if let std::cmp::Ordering::Less = comparator.compare_key(&max_key, &imm.max_key) { - max_key = imm.max_key(); - } - } - - Some(max_key) - }; - - match (mutable_max_key, immutable_max_key) { - (None, None) => None, - (None, Some(key)) | (Some(key), None) => Some(key), - (Some(key1), Some(key2)) => Some(match comparator.compare_key(&key1, &key2) { - std::cmp::Ordering::Less => key2, - std::cmp::Ordering::Greater | std::cmp::Ordering::Equal => key1, - }), - } - } - - pub fn time_range(&self) -> Option { - let mutable_time_range = self.mutable_segment.time_range(); - - let immutable_time_range = if self.immutable_segments.is_empty() { - None - } else { - let mut time_range = self.immutable_segments.first().unwrap().time_range(); - let mut imm_iter = self.immutable_segments.iter(); - let _ = imm_iter.next(); - for imm in imm_iter { - time_range = time_range.merge_range(imm.time_range()); - } - - Some(time_range) - }; - - match (mutable_time_range, immutable_time_range) { - (None, None) => None, - (None, Some(range)) | (Some(range), None) => Some(range), - (Some(range1), Some(range2)) => Some(range1.merge_range(range2)), - } - } - - fn approximate_memory_usage(&self) -> usize { - let mutable_mem_usage = self.mutable_segment.approximate_memory_usage(); - - let immutable_mem_usage = self - .immutable_segments - .iter() - .map(|imm| imm.approximate_memory_usage()) - .sum::(); - - mutable_mem_usage + immutable_mem_usage - } -} - -/// Mutable batch -pub(crate) struct MutableSegment(MemTableRef); - -impl Deref for MutableSegment { - type Target = MemTableRef; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -/// Builder for `MutableBatch` -struct MutableSegmentBuilder { - memtable_factory: FactoryRef, - opts: MutableBuilderOptions, -} - -impl MutableSegmentBuilder { - fn new(memtable_factory: FactoryRef, opts: MutableBuilderOptions) -> Self { - Self { - memtable_factory, - opts, - } - } - - fn build(&self) -> Result { - let memtable_opts = Options { - schema: self.opts.schema.clone(), - arena_block_size: self.opts.arena_block_size, - // `creation_sequence` is meaningless in inner memtable, just set it to min. - creation_sequence: SequenceNumber::MIN, - collector: self.opts.collector.clone(), - }; - - let memtable = self - .memtable_factory - .create_memtable(memtable_opts) - .context("failed to build mutable segment")?; - - Ok(MutableSegment(memtable)) - } -} - -struct MutableBuilderOptions { - pub schema: Schema, - - /// Block size of arena in bytes. - pub arena_block_size: u32, - - /// Memory usage collector - pub collector: CollectorRef, -} - -/// Immutable batch -pub(crate) struct ImmutableSegment { - /// Record batch converted from `MutableBatch` - record_batches: Vec, - - /// Min time of source `MutableBatch` - time_range: TimeRange, - - /// Min key of source `MutableBatch` - min_key: Bytes, - - /// Max key of source `MutableBatch` - max_key: Bytes, - - approximate_memory_size: usize, -} - -impl ImmutableSegment { - fn new( - record_batches: Vec, - time_range: TimeRange, - min_key: Bytes, - max_key: Bytes, - ) -> Self { - let approximate_memory_size = record_batches - .iter() - .map(|batch| batch.get_array_memory_size()) - .sum(); - - Self { - record_batches, - time_range, - min_key, - max_key, - approximate_memory_size, - } - } - - pub fn time_range(&self) -> TimeRange { - self.time_range - } - - pub fn min_key(&self) -> Bytes { - self.min_key.clone() - } - - pub fn max_key(&self) -> Bytes { - self.max_key.clone() - } - - // TODO: maybe return a iterator? - pub fn record_batches(&self) -> &[ArrowRecordBatch] { - &self.record_batches - } - - pub fn approximate_memory_usage(&self) -> usize { - self.approximate_memory_size - } -} - -#[cfg(test)] -mod tests { - - use std::{ops::Bound, sync::Arc}; - - use arena::NoopCollector; - use bytes_ext::ByteVec; - use codec::{memcomparable::MemComparable, Encoder}; - use common_types::{ - datum::Datum, - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - record_batch::FetchedRecordBatch, - row::Row, - schema::IndexInWriterSchema, - tests::{build_row, build_schema}, - }; - - use super::*; - use crate::memtable::{ - factory::Options, - key::ComparableInternalKey, - skiplist::factory::SkiplistMemTableFactory, - test_util::{TestMemtableBuilder, TestUtil}, - MemTableRef, - }; - - struct TestMemtableBuilderImpl; - - impl TestMemtableBuilder for TestMemtableBuilderImpl { - fn build(&self, data: &[(KeySequence, Row)]) -> MemTableRef { - let schema = build_schema(); - let factory = SkiplistMemTableFactory; - let opts = Options { - schema: schema.clone(), - arena_block_size: 512, - creation_sequence: 1, - collector: Arc::new(NoopCollector {}), - }; - let memtable = LayeredMemTable::new(&opts, Arc::new(factory), usize::MAX).unwrap(); - - let mut ctx = - PutContext::new(IndexInWriterSchema::for_same_schema(schema.num_columns())); - let partitioned_data = data.chunks(3).collect::>(); - let chunk_num = partitioned_data.len(); - - for chunk in partitioned_data.iter().take(chunk_num - 1) { - for (seq, row) in *chunk { - memtable.put(&mut ctx, *seq, row, &schema).unwrap(); - } - memtable.force_switch_mutable_segment().unwrap(); - } - - let last_chunk = partitioned_data[chunk_num - 1]; - for (seq, row) in last_chunk { - memtable.put(&mut ctx, *seq, row, &schema).unwrap(); - } - - Arc::new(memtable) - } - } - - fn test_data() -> Vec<(KeySequence, Row)> { - vec![ - ( - KeySequence::new(1, 1), - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - ), - ( - KeySequence::new(1, 2), - build_row(b"b", 2, 10.0, "v2", 2000, 2_000_000), - ), - ( - KeySequence::new(1, 4), - build_row(b"c", 3, 10.0, "v3", 3000, 3_000_000), - ), - ( - KeySequence::new(2, 1), - build_row(b"d", 4, 10.0, "v4", 4000, 4_000_000), - ), - ( - KeySequence::new(2, 1), - build_row(b"e", 5, 10.0, "v5", 5000, 5_000_000), - ), - ( - KeySequence::new(2, 3), - build_row(b"f", 6, 10.0, "v6", 6000, 6_000_000), - ), - ( - KeySequence::new(3, 4), - build_row(b"g", 7, 10.0, "v7", 7000, 7_000_000), - ), - ] - } - - #[test] - fn test_memtable_scan() { - let builder = TestMemtableBuilderImpl; - let data = test_data(); - let test_util = TestUtil::new(builder, data); - let memtable = test_util.memtable(); - let schema = memtable.schema().clone(); - - // No projection. - let projection = (0..schema.num_columns()).collect::>(); - let expected = test_util.data(); - test_memtable_scan_internal( - schema.clone(), - projection, - TimeRange::min_to_max(), - memtable.clone(), - expected, - ); - - // Projection to first three. - let projection = vec![0, 1, 3]; - let expected = test_util - .data() - .iter() - .map(|row| { - let datums = vec![row[0].clone(), row[1].clone(), row[3].clone()]; - Row::from_datums(datums) - }) - .collect(); - test_memtable_scan_internal( - schema.clone(), - projection, - TimeRange::min_to_max(), - memtable.clone(), - expected, - ); - - // No projection. - let projection = (0..schema.num_columns()).collect::>(); - let time_range = TimeRange::new(2.into(), 7.into()).unwrap(); - // Memtable data after switching may be like(just showing timestamp column using - // to filter): [1, 2, 3], [4, 5, 6], [7] - // - // And the target time range is: [2, 7) - // - // So the filter result should be: [1, 2, 3], [4, 5, 6] - let expected = test_util - .data() - .iter() - .enumerate() - .filter_map(|(idx, row)| if idx < 6 { Some(row.clone()) } else { None }) - .collect(); - test_memtable_scan_internal( - schema.clone(), - projection, - time_range, - memtable.clone(), - expected, - ); - } - - #[test] - fn test_time_range() { - let builder = TestMemtableBuilderImpl; - let data = test_data(); - let test_util = TestUtil::new(builder, data); - let memtable = test_util.memtable(); - - assert_eq!(TimeRange::new(1.into(), 8.into()), memtable.time_range()); - } - - #[test] - fn test_min_max_key() { - let builder = TestMemtableBuilderImpl; - let data = test_data(); - let test_util = TestUtil::new(builder, data.clone()); - let memtable = test_util.memtable(); - let schema = memtable.schema(); - - // Min key - let key_encoder = ComparableInternalKey::new(data[0].0, schema); - let mut min_key = Vec::with_capacity(key_encoder.estimate_encoded_size(&data[0].1)); - key_encoder.encode(&mut min_key, &data[0].1).unwrap(); - let key_encoder = ComparableInternalKey::new(data[0].0, schema); - let mut min_key = Vec::with_capacity(key_encoder.estimate_encoded_size(&data[0].1)); - key_encoder.encode(&mut min_key, &data[0].1).unwrap(); - - // Max key - let key_encoder = ComparableInternalKey::new(data[6].0, schema); - let mut max_key = Vec::with_capacity(key_encoder.estimate_encoded_size(&data[6].1)); - key_encoder.encode(&mut max_key, &data[6].1).unwrap(); - let key_encoder = ComparableInternalKey::new(data[6].0, schema); - let mut max_key = Vec::with_capacity(key_encoder.estimate_encoded_size(&data[6].1)); - key_encoder.encode(&mut max_key, &data[6].1).unwrap(); - - assert_eq!(min_key, memtable.min_key().unwrap().to_vec()); - assert_eq!(max_key, memtable.max_key().unwrap().to_vec()); - } - - fn test_memtable_scan_internal( - schema: Schema, - projection: Vec, - time_range: TimeRange, - memtable: Arc, - expected: Vec, - ) { - let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - let fetched_schema = projected_schema.to_record_schema(); - let table_schema = projected_schema.table_schema(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); - - // limited by sequence - let scan_request = ScanRequest { - start_user_key: Bound::Unbounded, - end_user_key: Bound::Unbounded, - sequence: SequenceNumber::MAX, - row_projector_builder, - need_dedup: false, - reverse: false, - metrics_collector: None, - time_range, - }; - let scan_ctx = ScanContext::default(); - let iter = memtable.scan(scan_ctx, scan_request).unwrap(); - check_iterator(iter, expected); - } - - fn check_iterator>>( - iter: T, - expected_rows: Vec, - ) { - // sort it first. - let mut rows = Vec::new(); - for batch in iter { - let batch = batch.unwrap(); - for row_idx in 0..batch.num_rows() { - rows.push(batch.clone_row_at(row_idx)); - } - } - - rows.sort_by(|a, b| { - let key1 = build_scan_key( - &String::from_utf8_lossy(a[0].as_varbinary().unwrap()), - a[1].as_timestamp().unwrap().as_i64(), - ); - let key2 = build_scan_key( - &String::from_utf8_lossy(b[0].as_varbinary().unwrap()), - b[1].as_timestamp().unwrap().as_i64(), - ); - BytewiseComparator.compare_key(&key1, &key2) - }); - - assert_eq!(rows, expected_rows); - } - - fn build_scan_key(c1: &str, c2: i64) -> Bytes { - let mut buf = ByteVec::new(); - let encoder = MemComparable; - encoder.encode(&mut buf, &Datum::from(c1)).unwrap(); - encoder.encode(&mut buf, &Datum::from(c2)).unwrap(); - - Bytes::from(buf) - } -} diff --git a/src/analytic_engine/src/memtable/mod.rs b/src/analytic_engine/src/memtable/mod.rs deleted file mode 100644 index cdb216a4e3..0000000000 --- a/src/analytic_engine/src/memtable/mod.rs +++ /dev/null @@ -1,277 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! MemTable - -pub mod columnar; -pub mod error; -pub mod factory; -pub mod key; -pub mod layered; -mod reversed_iter; -pub mod skiplist; -pub mod test_util; - -use std::{ops::Bound, sync::Arc, time::Instant}; - -use bytes_ext::{ByteVec, Bytes}; -use common_types::{ - projected_schema::RowProjectorBuilder, - record_batch::FetchedRecordBatch, - row::Row, - schema::{IndexInWriterSchema, Schema}, - time::TimeRange, - SequenceNumber, -}; -pub use error::Error; -use horaedbproto::manifest; -use macros::define_result; -use serde::{Deserialize, Serialize}; -use size_ext::ReadableSize; -use trace_metric::MetricsCollector; - -use crate::memtable::key::KeySequence; - -const DEFAULT_SCAN_BATCH_SIZE: usize = 500; -const MEMTABLE_TYPE_SKIPLIST: &str = "skiplist"; -const MEMTABLE_TYPE_COLUMNAR: &str = "columnar"; - -#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Serialize)] -pub enum MemtableType { - SkipList, - Column, -} - -impl MemtableType { - pub fn parse_from(s: &str) -> Self { - if s.eq_ignore_ascii_case(MEMTABLE_TYPE_COLUMNAR) { - MemtableType::Column - } else { - MemtableType::SkipList - } - } -} - -impl ToString for MemtableType { - fn to_string(&self) -> String { - match self { - MemtableType::SkipList => MEMTABLE_TYPE_SKIPLIST.to_string(), - MemtableType::Column => MEMTABLE_TYPE_COLUMNAR.to_string(), - } - } -} - -/// Layered memtable options -/// If `mutable_segment_switch_threshold` is set zero, layered memtable will be -/// disable. -#[derive(Debug, Clone, Deserialize, PartialEq, Serialize)] -#[serde(default)] -pub struct LayeredMemtableOptions { - pub enable: bool, - pub mutable_segment_switch_threshold: ReadableSize, -} - -impl LayeredMemtableOptions { - #[inline] - pub fn enable_layered_memtable(&self) -> bool { - self.enable && self.mutable_segment_switch_threshold.0 > 0 - } -} - -impl Default for LayeredMemtableOptions { - fn default() -> Self { - Self { - enable: false, - mutable_segment_switch_threshold: ReadableSize::mb(3), - } - } -} - -impl From for LayeredMemtableOptions { - fn from(value: manifest::LayeredMemtableOptions) -> Self { - // For compatibility here. - // Layered memtable is enabled default in former, - // so some horaedb service is running with layered memtable in production - // and we shouldn't make difference to such exist running services - // after switching to control layered memtable's on/off with the new added - // `enable` field in manifest(that says `enable` should assume to true when not - // exist). - // However, pb version used now don't support to define default value - // explicitly, and default value of bool is always false... - // So we use `disable` rather than `enable` in pb to reach it - // (disable: false --> enable: true). - let enable = !value.disable; - let mutable_segment_switch_threshold = ReadableSize(value.mutable_segment_switch_threshold); - - Self { - enable, - mutable_segment_switch_threshold, - } - } -} - -impl From for manifest::LayeredMemtableOptions { - fn from(value: LayeredMemtableOptions) -> Self { - Self { - mutable_segment_switch_threshold: value.mutable_segment_switch_threshold.0, - disable: !value.enable, - } - } -} - -define_result!(error::Error); - -/// Options for put and context for tracing -pub struct PutContext { - /// Buffer for encoding key, can reuse during put - pub key_buf: ByteVec, - /// Buffer for encoding value, can reuse during put - pub value_buf: ByteVec, - /// Used to encode row. - pub index_in_writer: IndexInWriterSchema, -} - -impl PutContext { - pub fn new(index_in_writer: IndexInWriterSchema) -> Self { - Self { - key_buf: ByteVec::new(), - value_buf: ByteVec::new(), - index_in_writer, - } - } -} - -/// Options for scan and context for tracing -#[derive(Debug, Clone)] -pub struct ScanContext { - /// Suggested row number per batch - pub batch_size: usize, - pub deadline: Option, -} - -impl Default for ScanContext { - fn default() -> Self { - Self { - batch_size: DEFAULT_SCAN_BATCH_SIZE, - deadline: None, - } - } -} - -/// Scan request -/// -/// Now we only support forward scan. -#[derive(Debug, Clone)] -pub struct ScanRequest { - /// The start key of the encoded user key (without sequence). - pub start_user_key: Bound, - /// The end key of the encoded user key (without sequence). - pub end_user_key: Bound, - /// Max visible sequence (inclusive), row key with sequence <= this can be - /// visible. - pub sequence: SequenceNumber, - /// Schema and projection to read. - pub row_projector_builder: RowProjectorBuilder, - pub need_dedup: bool, - pub reverse: bool, - /// Collector for scan metrics. - pub metrics_collector: Option, - pub time_range: TimeRange, -} - -/// In memory storage for table's data. -/// -/// # Concurrency -/// The memtable is designed for single-writer and multiple-reader usage, so -/// not all function supports concurrent writer, the caller should guarantee not -/// writing to the memtable concurrently. -// All operation is done in memory, no need to use async trait -pub trait MemTable { - /// Schema of this memtable - /// - /// The schema of a memtable is not allowed to change now. Modifying the - /// schema of a table requires a memtable switch and external - /// synchronization - fn schema(&self) -> &Schema; - - /// Peek the min key of this memtable. - fn min_key(&self) -> Option; - - /// Peek the max key of this memtable. - fn max_key(&self) -> Option; - - /// Insert one row into the memtable. - /// - /// .- ctx: The put context - /// - sequence: The sequence of the row - /// - row: The row to insert - /// - schema: The schema of the row - /// - /// REQUIRE: - /// - The schema of RowGroup must equal to the schema of memtable. How to - /// handle duplicate entries is implementation specific. - fn put( - &self, - ctx: &mut PutContext, - sequence: KeySequence, - row_group: &Row, - schema: &Schema, - ) -> Result<()>; - - /// Scan the memtable. - /// - /// Returns the data in columnar format. The returned rows is guaranteed - /// to be ordered by the primary key. - fn scan(&self, ctx: ScanContext, request: ScanRequest) -> Result; - - /// Returns an estimate of the number of bytes of data in used - fn approximate_memory_usage(&self) -> usize; - - /// Set last sequence of the memtable, returns error if the given `sequence` - /// is less than existing last sequence. - /// - /// REQUIRE: - /// - External synchronization is required. - fn set_last_sequence(&self, sequence: SequenceNumber) -> Result<()>; - - /// Returns the last sequence of the memtable. - /// - /// If the memtable is empty, then the last sequence is 0. - fn last_sequence(&self) -> SequenceNumber; - - /// Time range of written rows. - fn time_range(&self) -> Option; - - /// Metrics of inner state. - fn metrics(&self) -> Metrics; -} - -#[derive(Debug, Default)] -pub struct Metrics { - /// Size of original rows. - pub row_raw_size: usize, - /// Size of rows after encoded. - pub row_encoded_size: usize, - /// Row number count. - pub row_count: usize, -} - -/// A reference to memtable -pub type MemTableRef = Arc; - -/// A pointer to columnar iterator -pub type ColumnarIterPtr = Box> + Send + Sync>; diff --git a/src/analytic_engine/src/memtable/reversed_iter.rs b/src/analytic_engine/src/memtable/reversed_iter.rs deleted file mode 100644 index 14b3851f43..0000000000 --- a/src/analytic_engine/src/memtable/reversed_iter.rs +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::iter::Rev; - -use common_types::record_batch::FetchedRecordBatch; - -use crate::memtable::{Error, Result}; - -/// Reversed columnar iterator. -// TODO(xikai): Now the implementation is not perfect: read all the entries -// into a buffer and reverse read it. The memtable should support scan in -// reverse order naturally. -pub struct ReversedColumnarIterator { - iter: I, - reversed_iter: Option>>>, - num_record_batch: usize, -} - -impl ReversedColumnarIterator -where - I: Iterator>, -{ - pub fn new(iter: I, num_rows: usize, batch_size: usize) -> Self { - Self { - iter, - reversed_iter: None, - num_record_batch: num_rows / batch_size, - } - } - - fn init_if_necessary(&mut self) { - if self.reversed_iter.is_some() { - return; - } - - let mut buf = Vec::with_capacity(self.num_record_batch); - for item in &mut self.iter { - buf.push(item); - } - self.reversed_iter = Some(buf.into_iter().rev()); - } -} - -impl Iterator for ReversedColumnarIterator -where - I: Iterator>, -{ - type Item = Result; - - fn next(&mut self) -> Option { - self.init_if_necessary(); - self.reversed_iter - .as_mut() - .unwrap() - .next() - .map(|v| match v { - Ok(mut batch_with_key) => { - batch_with_key - .reverse_data() - .map_err(|e| Error::from(anyhow::Error::new(e)))?; - - Ok(batch_with_key) - } - Err(e) => Err(e), - }) - } -} diff --git a/src/analytic_engine/src/memtable/skiplist/factory.rs b/src/analytic_engine/src/memtable/skiplist/factory.rs deleted file mode 100644 index 91733b6648..0000000000 --- a/src/analytic_engine/src/memtable/skiplist/factory.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Skiplist memtable factory - -use std::sync::{atomic::AtomicU64, Arc}; - -use arena::MonoIncArena; -use skiplist::{BytewiseComparator, Skiplist}; - -use crate::memtable::{ - factory::{Factory, Options}, - skiplist::SkiplistMemTable, - MemTableRef, Result, -}; - -/// Factory to create memtable -#[derive(Debug)] -pub struct SkiplistMemTableFactory; - -impl Factory for SkiplistMemTableFactory { - fn create_memtable(&self, opts: Options) -> Result { - let arena = MonoIncArena::with_collector(opts.arena_block_size as usize, opts.collector); - let skiplist = Skiplist::with_arena(BytewiseComparator, arena); - let memtable = Arc::new(SkiplistMemTable::new( - opts.schema, - skiplist, - AtomicU64::new(opts.creation_sequence), - )); - - Ok(memtable) - } -} diff --git a/src/analytic_engine/src/memtable/skiplist/iter.rs b/src/analytic_engine/src/memtable/skiplist/iter.rs deleted file mode 100644 index ab9b59da03..0000000000 --- a/src/analytic_engine/src/memtable/skiplist/iter.rs +++ /dev/null @@ -1,320 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Skiplist memtable iterator - -use std::{cmp::Ordering, ops::Bound, time::Instant}; - -use anyhow::Context; -use arena::{Arena, BasicStats}; -use bytes_ext::{Bytes, BytesMut}; -use codec::row; -use common_types::{ - projected_schema::RowProjector, - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - row::contiguous::{ContiguousRowReader, ProjectedContiguousRow}, - schema::Schema, - SequenceNumber, -}; -use logger::trace; -use macros::ensure; -use skiplist::{ArenaSlice, BytewiseComparator, IterRef, Skiplist}; - -use crate::memtable::{ - key::{self, KeySequence}, - skiplist::SkiplistMemTable, - Result, ScanContext, ScanRequest, -}; - -/// Iterator state -#[derive(Debug, PartialEq)] -enum State { - /// The iterator struct is created but not initialized - Uninitialized, - /// The iterator is initialized (seek) - Initialized, - /// No more element the iterator can return - Finished, -} - -/// Columnar iterator for [SkiplistMemTable] -pub struct ColumnarIterImpl + Clone + Sync + Send> { - /// The internal skiplist iter - iter: IterRef, BytewiseComparator, A>, - - // Schema related: - /// Schema of this memtable, used to decode row - memtable_schema: Schema, - /// Projection of schema to read - row_projector: RowProjector, - - // Options related: - batch_size: usize, - deadline: Option, - - start_user_key: Bound, - end_user_key: Bound, - /// Max visible sequence - sequence: SequenceNumber, - /// State of iterator - state: State, - /// Last internal key this iterator returned - // TODO(yingwen): Wrap a internal key struct? - last_internal_key: Option>, - - /// Dedup rows with key - need_dedup: bool, -} - -impl + Clone + Sync + Send> ColumnarIterImpl { - /// Create a new [ColumnarIterImpl] - pub fn new( - memtable: &SkiplistMemTable, - ctx: ScanContext, - request: ScanRequest, - ) -> Result { - // Create projection for the memtable schema - let row_projector = request - .row_projector_builder - .build(&memtable.schema) - .context("build projector")?; - - let iter = memtable.skiplist.iter(); - let mut columnar_iter = Self { - iter, - memtable_schema: memtable.schema.clone(), - row_projector, - batch_size: ctx.batch_size, - deadline: ctx.deadline, - start_user_key: request.start_user_key, - end_user_key: request.end_user_key, - sequence: request.sequence, - state: State::Uninitialized, - last_internal_key: None, - need_dedup: request.need_dedup, - }; - - columnar_iter.init()?; - - Ok(columnar_iter) - } - - /// Init the iterator, will seek to the proper position for first next() - /// call, so the first entry next() returned is after the - /// `start_user_key`, but we still need to check `end_user_key` - fn init(&mut self) -> Result<()> { - match &self.start_user_key { - Bound::Included(user_key) => { - // Construct seek key - let mut key_buf = BytesMut::new(); - let seek_key = key::internal_key_for_seek(user_key, self.sequence, &mut key_buf) - .context("encode internal key")?; - - // Seek the skiplist - self.iter.seek(seek_key); - } - Bound::Excluded(user_key) => { - // Construct seek key, just seek to the key with next prefix, so there is no - // need to skip the key until we meet the first key > - // start_user_key - let next_user_key = row::key_prefix_next(user_key); - let mut key_buf = BytesMut::new(); - let seek_key = - key::internal_key_for_seek(&next_user_key, self.sequence, &mut key_buf) - .context("encode internal key")?; - - // Seek the skiplist - self.iter.seek(seek_key); - } - Bound::Unbounded => self.iter.seek_to_first(), - } - - self.state = State::Initialized; - - Ok(()) - } - - /// Fetch next record batch - fn fetch_next_record_batch(&mut self) -> Result> { - debug_assert_eq!(State::Initialized, self.state); - assert!(self.batch_size > 0); - - let record_schema = self.row_projector.fetched_schema().clone(); - let is_empty_projection = record_schema.columns().is_empty(); - let primary_key_indexes = self - .row_projector - .primary_key_indexes() - .map(|idxs| idxs.to_vec()); - let mut builder = FetchedRecordBatchBuilder::with_capacity( - record_schema, - primary_key_indexes, - self.batch_size, - ); - let mut num_rows = 0; - while self.iter.valid() && num_rows < self.batch_size { - if let Some(row) = self.fetch_next_row()? { - let row_reader = ContiguousRowReader::try_new(&row, &self.memtable_schema) - .context("decode continuous row")?; - let projected_row = ProjectedContiguousRow::new(row_reader, &self.row_projector); - - trace!("Column iterator fetch next row, row:{:?}", projected_row); - - builder - .append_projected_contiguous_row(&projected_row) - .context("append row")?; - num_rows += 1; - } else { - // There is no more row to fetch - self.finish(); - break; - } - } - - if is_empty_projection { - builder.inc_row_num(num_rows); - } - - if num_rows > 0 { - if let Some(deadline) = self.deadline { - let now = Instant::now(); - ensure!( - now < deadline, - "iter timeout, now:{now:?}, deadline:{deadline:?}" - ) - } - - let batch = builder.build().context("build record batch")?; - trace!("column iterator send one batch:{:?}", batch); - - Ok(Some(batch)) - } else { - // If iter is invalid after seek (nothing matched), then it may not be marked as - // finished yet - self.finish(); - Ok(None) - } - } - - /// Fetch next row matched the given condition, the current entry of iter - /// will be considered - /// - /// REQUIRE: The iter is valid - fn fetch_next_row(&mut self) -> Result>> { - debug_assert_eq!(State::Initialized, self.state); - - // TODO(yingwen): Some operation like delete needs to be considered during - // iterating: we need to ignore this key if found a delete mark - while self.iter.valid() { - // Fetch current entry - let key = self.iter.key(); - let (user_key, sequence) = - key::user_key_from_internal_key(key).context("DecodeInternalKey")?; - - // Check user key is still in range - if self.is_after_end_bound(user_key) { - // Out of bound - self.finish(); - return Ok(None); - } - - if self.need_dedup { - // Whether this user key is already returned - let same_key = match &self.last_internal_key { - Some(last_internal_key) => { - // TODO(yingwen): Actually this call wont fail, only valid internal key will - // be set as last_internal_key so maybe we can just - // unwrap it? - let (last_user_key, _) = key::user_key_from_internal_key(last_internal_key) - .context("DecodeInternalKey")?; - user_key == last_user_key - } - // This is the first user key - None => false, - }; - - if same_key { - // We meet duplicate key, move forward and continue to find next user key - self.iter.next(); - continue; - } - // Now this is a new user key - } - - // Check whether this key is visible - if !self.is_visible(sequence) { - // The sequence of this key is not visible, move forward - self.iter.next(); - continue; - } - - // This is the row we want - let row = self.iter.value_with_arena(); - - // Store the last key - self.last_internal_key = Some(self.iter.key_with_arena()); - // Move iter forward - self.iter.next(); - - return Ok(Some(row)); - } - - // No more row in range, we can stop the iterator - self.finish(); - Ok(None) - } - - /// Return true if the sequence is visible - #[inline] - fn is_visible(&self, sequence: KeySequence) -> bool { - sequence.sequence() <= self.sequence - } - - /// Return true if the key is after the `end_user_key` bound - fn is_after_end_bound(&self, key: &[u8]) -> bool { - match &self.end_user_key { - Bound::Included(end) => match key.cmp(end) { - Ordering::Less | Ordering::Equal => false, - Ordering::Greater => true, - }, - Bound::Excluded(end) => match key.cmp(end) { - Ordering::Less => false, - Ordering::Equal | Ordering::Greater => true, - }, - // All key is valid - Bound::Unbounded => false, - } - } - - /// Mark the iterator state to finished and return None - fn finish(&mut self) { - self.state = State::Finished; - } -} - -impl + Clone + Sync + Send> Iterator for ColumnarIterImpl { - type Item = Result; - - fn next(&mut self) -> Option { - if self.state != State::Initialized { - return None; - } - - self.fetch_next_record_batch().transpose() - } -} - -// TODO(yingwen): Test diff --git a/src/analytic_engine/src/memtable/skiplist/mod.rs b/src/analytic_engine/src/memtable/skiplist/mod.rs deleted file mode 100644 index ae5b37f89a..0000000000 --- a/src/analytic_engine/src/memtable/skiplist/mod.rs +++ /dev/null @@ -1,526 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! MemTable based on skiplist - -pub mod factory; -pub mod iter; - -use std::sync::atomic::{self, AtomicI64, AtomicU64, AtomicUsize}; - -use anyhow::Context; -use arena::{Arena, BasicStats}; -use bytes_ext::Bytes; -use codec::Encoder; -use common_types::{ - row::{contiguous::ContiguousRowWriter, Row}, - schema::Schema, - time::TimeRange, - SequenceNumber, -}; -use logger::{debug, trace}; -use macros::ensure; -use skiplist::{BytewiseComparator, Skiplist}; - -use crate::memtable::{ - error::InnerError, - key::{ComparableInternalKey, KeySequence}, - reversed_iter::ReversedColumnarIterator, - skiplist::iter::ColumnarIterImpl, - ColumnarIterPtr, MemTable, Metrics as MemtableMetrics, PutContext, Result, ScanContext, - ScanRequest, -}; - -#[derive(Default, Debug)] -struct Metrics { - row_raw_size: AtomicUsize, - row_encoded_size: AtomicUsize, - row_count: AtomicUsize, -} - -/// MemTable implementation based on skiplist -pub struct SkiplistMemTable { - /// Schema of this memtable, is immutable. - schema: Schema, - skiplist: Skiplist, - /// The last sequence of the rows in this memtable. Update to this field - /// require external synchronization. - last_sequence: AtomicU64, - - metrics: Metrics, - min_time: AtomicI64, - max_time: AtomicI64, -} - -impl Drop for SkiplistMemTable { - fn drop(&mut self) { - logger::debug!( - "Drop skiplist memtable, last_seq:{}, schema:{:?}", - self.last_sequence.load(atomic::Ordering::Relaxed), - self.schema - ); - } -} - -impl + Clone> SkiplistMemTable { - fn new( - schema: Schema, - skiplist: Skiplist, - last_sequence: AtomicU64, - ) -> Self { - Self { - schema, - skiplist, - last_sequence, - metrics: Metrics::default(), - // Init to max value first, so we can use `min(min_time, row.time)` to get real min - // time. - min_time: AtomicI64::new(i64::MAX), - max_time: AtomicI64::new(i64::MIN), - } - } -} - -impl + Clone + Sync + Send + 'static> MemTable - for SkiplistMemTable -{ - fn schema(&self) -> &Schema { - &self.schema - } - - fn min_key(&self) -> Option { - let mut iter = self.skiplist.iter(); - iter.seek_to_first(); - if !iter.valid() { - None - } else { - Some(iter.key().to_vec().into()) - } - } - - fn max_key(&self) -> Option { - let mut iter = self.skiplist.iter(); - iter.seek_to_last(); - if !iter.valid() { - None - } else { - Some(iter.key().to_vec().into()) - } - } - - // TODO(yingwen): Encode value if value_buf is not set. - // Now the caller is required to encode the row into the `value_buf` in - // PutContext first. - fn put( - &self, - ctx: &mut PutContext, - sequence: KeySequence, - row: &Row, - schema: &Schema, - ) -> Result<()> { - trace!("skiplist put row, sequence:{:?}, row:{:?}", sequence, row); - - let key_encoder = ComparableInternalKey::new(sequence, schema); - - let internal_key = &mut ctx.key_buf; - // Reset key buffer - internal_key.clear(); - // Reserve capacity for key - internal_key.reserve(key_encoder.estimate_encoded_size(row)); - // Encode key - key_encoder - .encode(internal_key, row) - .context("encode interval key")?; - - // TODO: we should check row's primary key size at the beginning of write - // process, so WAL and memtable can keep in sync. - ensure!( - internal_key.len() <= skiplist::MAX_KEY_SIZE as usize, - InnerError::KeyTooLarge { - current: internal_key.len(), - max: skiplist::MAX_KEY_SIZE as usize, - } - ); - - // Encode row value. The ContiguousRowWriter will clear the buf. - let row_value = &mut ctx.value_buf; - let mut row_writer = ContiguousRowWriter::new(row_value, schema, &ctx.index_in_writer); - row_writer.write_row(row).context("invalid row")?; - let encoded_size = internal_key.len() + row_value.len(); - self.skiplist.put(internal_key, row_value); - - // Update min/max time - let timestamp = row - .timestamp(schema) - .context("timestamp not found")? - .as_i64(); - _ = self - .min_time - .fetch_update(atomic::Ordering::Relaxed, atomic::Ordering::Relaxed, |v| { - if timestamp < v { - Some(timestamp) - } else { - None - } - }); - _ = self - .max_time - .fetch_update(atomic::Ordering::Relaxed, atomic::Ordering::Relaxed, |v| { - if timestamp > v { - Some(timestamp) - } else { - None - } - }); - - // Update metrics - self.metrics - .row_raw_size - .fetch_add(row.size(), atomic::Ordering::Relaxed); - self.metrics - .row_count - .fetch_add(1, atomic::Ordering::Relaxed); - self.metrics - .row_encoded_size - .fetch_add(encoded_size, atomic::Ordering::Relaxed); - - Ok(()) - } - - fn scan(&self, ctx: ScanContext, request: ScanRequest) -> Result { - debug!( - "Scan skiplist memtable, ctx:{:?}, request:{:?}", - ctx, request - ); - - let num_rows = self.skiplist.len(); - let (reverse, batch_size) = (request.reverse, ctx.batch_size); - let iter = ColumnarIterImpl::new(self, ctx, request)?; - if reverse { - Ok(Box::new(ReversedColumnarIterator::new( - iter, num_rows, batch_size, - ))) - } else { - Ok(Box::new(iter)) - } - } - - fn approximate_memory_usage(&self) -> usize { - let encoded_size = self - .metrics - .row_encoded_size - .load(atomic::Ordering::Relaxed); - let arena_block_size = self.skiplist.arena_block_size(); - - // Ceil to block_size - (encoded_size + arena_block_size - 1) / arena_block_size * arena_block_size - } - - fn set_last_sequence(&self, sequence: SequenceNumber) -> Result<()> { - let last = self.last_sequence(); - ensure!( - sequence >= last, - "invalid sequence, given:{sequence}, last:{last}" - ); - - self.last_sequence - .store(sequence, atomic::Ordering::Relaxed); - - Ok(()) - } - - fn last_sequence(&self) -> SequenceNumber { - self.last_sequence.load(atomic::Ordering::Relaxed) - } - - fn time_range(&self) -> Option { - let min_time = self.min_time.load(atomic::Ordering::Relaxed); - let max_time = self.max_time.load(atomic::Ordering::Relaxed); - TimeRange::new(min_time.into(), (max_time + 1).into()) - } - - fn metrics(&self) -> MemtableMetrics { - let row_raw_size = self.metrics.row_raw_size.load(atomic::Ordering::Relaxed); - let row_encoded_size = self - .metrics - .row_encoded_size - .load(atomic::Ordering::Relaxed); - let row_count = self.metrics.row_count.load(atomic::Ordering::Relaxed); - MemtableMetrics { - row_raw_size, - row_encoded_size, - row_count, - } - } -} - -#[cfg(test)] -mod tests { - - use std::{ops::Bound, sync::Arc}; - - use arena::NoopCollector; - use bytes_ext::ByteVec; - use codec::memcomparable::MemComparable; - use common_types::{ - datum::Datum, - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - record_batch::FetchedRecordBatch, - row::Row, - schema::IndexInWriterSchema, - tests::{build_row, build_schema}, - time::Timestamp, - }; - - use super::*; - use crate::memtable::{ - factory::{Factory, Options}, - skiplist::factory::SkiplistMemTableFactory, - test_util::{TestMemtableBuilder, TestUtil}, - MemTableRef, - }; - - struct TestMemtableBuilderImpl; - - impl TestMemtableBuilder for TestMemtableBuilderImpl { - fn build(&self, data: &[(KeySequence, Row)]) -> MemTableRef { - let schema = build_schema(); - let factory = SkiplistMemTableFactory; - let memtable = factory - .create_memtable(Options { - schema: schema.clone(), - arena_block_size: 512, - creation_sequence: 1, - collector: Arc::new(NoopCollector {}), - }) - .unwrap(); - - let mut ctx = - PutContext::new(IndexInWriterSchema::for_same_schema(schema.num_columns())); - for (seq, row) in data { - memtable.put(&mut ctx, *seq, row, &schema).unwrap(); - } - - memtable - } - } - - fn test_memtable_scan_for_scan_request( - schema: Schema, - memtable: Arc, - ) { - let projection: Vec = (0..schema.num_columns()).collect(); - let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - let fetched_schema = projected_schema.to_record_schema(); - let table_schema = projected_schema.table_schema(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); - let testcases = vec![ - ( - // limited by sequence - ScanRequest { - start_user_key: Bound::Unbounded, - end_user_key: Bound::Unbounded, - sequence: 2, - row_projector_builder: row_projector_builder.clone(), - need_dedup: true, - reverse: false, - metrics_collector: None, - time_range: TimeRange::min_to_max(), - }, - vec![ - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - build_row(b"b", 2, 10.0, "v2", 2000, 2_000_000), - build_row(b"c", 3, 10.0, "v3", 3000, 3_000_000), - build_row(b"d", 4, 10.0, "v4", 4000, 4_000_000), - build_row(b"e", 5, 10.0, "v5", 5000, 5_000_000), - build_row(b"f", 6, 10.0, "v6", 6000, 6_000_000), - ], - ), - ( - // limited by sequence and start/end key - ScanRequest { - start_user_key: Bound::Included(build_scan_key("a", 1)), - end_user_key: Bound::Excluded(build_scan_key("e", 5)), - sequence: 2, - row_projector_builder: row_projector_builder.clone(), - need_dedup: true, - reverse: false, - metrics_collector: None, - time_range: TimeRange::min_to_max(), - }, - vec![ - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - build_row(b"b", 2, 10.0, "v2", 2000, 2_000_000), - build_row(b"c", 3, 10.0, "v3", 3000, 3_000_000), - build_row(b"d", 4, 10.0, "v4", 4000, 4_000_000), - ], - ), - ( - // limited by sequence and start/end key - // but seq is one smaller than last case - ScanRequest { - start_user_key: Bound::Included(build_scan_key("a", 1)), - end_user_key: Bound::Excluded(build_scan_key("e", 5)), - sequence: 1, - row_projector_builder, - need_dedup: true, - reverse: false, - metrics_collector: None, - time_range: TimeRange::min_to_max(), - }, - vec![ - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - build_row(b"b", 2, 10.0, "v2", 2000, 2_000_000), - build_row(b"c", 3, 10.0, "v3", 3000, 3_000_000), - ], - ), - ]; - - for (req, expected) in testcases { - let scan_ctx = ScanContext::default(); - let iter = memtable.scan(scan_ctx, req).unwrap(); - check_iterator(iter, expected); - } - } - - fn test_memtable_scan_for_projection( - schema: Schema, - memtable: Arc, - ) { - let projection: Vec = (0..2).collect(); - let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - let fetched_schema = projected_schema.to_record_schema(); - let table_schema = projected_schema.table_schema(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); - let testcases = vec![( - ScanRequest { - start_user_key: Bound::Included(build_scan_key("a", 1)), - end_user_key: Bound::Excluded(build_scan_key("e", 5)), - sequence: 2, - row_projector_builder, - need_dedup: true, - reverse: false, - metrics_collector: None, - time_range: TimeRange::min_to_max(), - }, - vec![ - build_row_for_two_column(b"a", 1), - build_row_for_two_column(b"b", 2), - build_row_for_two_column(b"c", 3), - build_row_for_two_column(b"d", 4), - ], - )]; - - for (req, expected) in testcases { - let scan_ctx = ScanContext::default(); - let iter = memtable.scan(scan_ctx, req).unwrap(); - check_iterator(iter, expected); - } - } - - #[test] - fn test_memtable_scan() { - let data = test_data(); - let builder = TestMemtableBuilderImpl; - let test_util = TestUtil::new(builder, data); - let memtable = test_util.memtable(); - let schema = memtable.schema().clone(); - - test_memtable_scan_for_scan_request(schema.clone(), memtable.clone()); - test_memtable_scan_for_projection(schema, memtable); - } - - fn check_iterator>>( - iter: T, - expected_rows: Vec, - ) { - let mut visited_rows = 0; - for batch in iter { - let batch = batch.unwrap(); - for row_idx in 0..batch.num_rows() { - assert_eq!(batch.clone_row_at(row_idx), expected_rows[visited_rows]); - visited_rows += 1; - } - } - - assert_eq!(visited_rows, expected_rows.len()); - } - - fn build_scan_key(c1: &str, c2: i64) -> Bytes { - let mut buf = ByteVec::new(); - let encoder = MemComparable; - encoder.encode(&mut buf, &Datum::from(c1)).unwrap(); - encoder.encode(&mut buf, &Datum::from(c2)).unwrap(); - - Bytes::from(buf) - } - - pub fn build_row_for_two_column(key1: &[u8], key2: i64) -> Row { - let datums = vec![ - Datum::Varbinary(Bytes::copy_from_slice(key1)), - Datum::Timestamp(Timestamp::new(key2)), - ]; - - Row::from_datums(datums) - } - - fn test_data() -> Vec<(KeySequence, Row)> { - vec![ - ( - KeySequence::new(1, 1), - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - ), - ( - KeySequence::new(1, 2), - build_row(b"b", 2, 10.0, "v2", 2000, 2_000_000), - ), - ( - KeySequence::new(1, 3), - build_row( - b"c", - 3, - 10.0, - "primary_key same with next row", - 3000, - 3_000_000, - ), - ), - ( - KeySequence::new(1, 4), - build_row(b"c", 3, 10.0, "v3", 3000, 3_000_000), - ), - ( - KeySequence::new(2, 1), - build_row(b"d", 4, 10.0, "v4", 4000, 4_000_000), - ), - ( - KeySequence::new(2, 1), - build_row(b"e", 5, 10.0, "v5", 5000, 5_000_000), - ), - ( - KeySequence::new(2, 3), - build_row(b"f", 6, 10.0, "v6", 6000, 6_000_000), - ), - ( - KeySequence::new(3, 4), - build_row(b"g", 7, 10.0, "v7", 7000, 7_000_000), - ), - ] - } -} diff --git a/src/analytic_engine/src/memtable/test_util.rs b/src/analytic_engine/src/memtable/test_util.rs deleted file mode 100644 index 18a26f5eed..0000000000 --- a/src/analytic_engine/src/memtable/test_util.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use common_types::row::Row; - -use crate::memtable::*; - -pub trait TestMemtableBuilder { - fn build(&self, data: &[(KeySequence, Row)]) -> MemTableRef; -} - -pub struct TestUtil { - memtable: MemTableRef, - data: Vec<(KeySequence, Row)>, -} - -impl TestUtil { - pub fn new(builder: B, data: Vec<(KeySequence, Row)>) -> Self { - let memtable = builder.build(&data); - - Self { memtable, data } - } - - pub fn memtable(&self) -> MemTableRef { - self.memtable.clone() - } - - pub fn data(&self) -> Vec { - self.data.iter().map(|d| d.1.clone()).collect() - } -} diff --git a/src/analytic_engine/src/payload.rs b/src/analytic_engine/src/payload.rs deleted file mode 100644 index d114163beb..0000000000 --- a/src/analytic_engine/src/payload.rs +++ /dev/null @@ -1,338 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Payloads to write to wal - -use bytes_ext::{Buf, BufMut, SafeBuf, SafeBufMut}; -use codec::{ - columnar::{ColumnarDecoder, DecodeContext, DecodeResult}, - row::WalRowDecoder, - Decoder, -}; -use common_types::{ - row::{RowGroup, RowGroupBuilderFromColumn}, - schema::Schema, - table::TableId, -}; -use horaedbproto::{manifest as manifest_pb, table_requests}; -use macros::define_result; -use prost::Message; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use wal::log_batch::{Payload, PayloadDecodeContext, PayloadDecoder}; - -use crate::{instance::write::WalEncodeVersion, table_options, TableOptions}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to encode header, err:{}", source))] - EncodeHeader { source: bytes_ext::Error }, - - #[snafu(display("Failed to encode body, err:{}.\nBacktrace:\n{}", source, backtrace))] - EncodeBody { - source: prost::EncodeError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to decode header, err:{}", source))] - DecodeHeader { source: bytes_ext::Error }, - - #[snafu(display( - "Invalid wal entry header, value:{}.\nBacktrace:\n{}", - value, - backtrace - ))] - InvalidHeader { value: u8, backtrace: Backtrace }, - - #[snafu(display("Failed to decode body, err:{}.\nBacktrace:\n{}", source, backtrace))] - DecodeBody { - source: prost::DecodeError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to decode schema, err:{}", source))] - DecodeSchema { source: common_types::schema::Error }, - - #[snafu(display("Failed to decode row, err:{}", source))] - DecodeRow { source: codec::row::Error }, - - #[snafu(display("Failed to decode column, err:{}", source))] - DecodeColumn { source: codec::columnar::Error }, - - #[snafu(display("Failed to build row group, err:{}", source))] - BuildRowGroup { source: common_types::row::Error }, - - #[snafu(display( - "Invalid version of write request, version:{version}.\nBacktrace:\n{backtrace}" - ))] - InvalidWriteReqVersion { version: u32, backtrace: Backtrace }, - - #[snafu(display("Table schema is not found.\nBacktrace:\n{}", backtrace))] - TableSchemaNotFound { backtrace: Backtrace }, - - #[snafu(display( - "Table options is not found in the write request.\nBacktrace:\n{}", - backtrace - ))] - TableOptionsNotFound { backtrace: Backtrace }, - - #[snafu(display("Invalid table options, err:{}", source))] - InvalidTableOptions { source: table_options::Error }, -} - -define_result!(Error); - -/// Wal entry header -#[derive(Clone, Copy)] -enum Header { - Write = 1, - AlterSchema = 2, - AlterOption = 3, -} - -impl Header { - pub fn to_u8(self) -> u8 { - self as u8 - } - - pub fn from_u8(value: u8) -> Option { - match value { - value if value == Self::Write as u8 => Some(Self::Write), - value if value == Self::AlterSchema as u8 => Some(Self::AlterSchema), - value if value == Self::AlterOption as u8 => Some(Self::AlterOption), - _ => None, - } - } -} - -fn write_header(header: Header, buf: &mut B) -> Result<()> { - buf.try_put_u8(header.to_u8()).context(EncodeHeader) -} - -/// Header size in bytes -const HEADER_SIZE: usize = 1; - -/// Write request to persist in wal -#[derive(Debug)] -pub enum WritePayload<'a> { - Write(&'a table_requests::WriteRequest), - AlterSchema(&'a manifest_pb::AlterSchemaMeta), - AlterOption(&'a manifest_pb::AlterOptionsMeta), -} - -impl<'a> Payload for WritePayload<'a> { - type Error = Error; - - fn encode_size(&self) -> usize { - let body_size = match self { - WritePayload::Write(req) => req.encoded_len(), - WritePayload::AlterSchema(req) => req.encoded_len(), - WritePayload::AlterOption(req) => req.encoded_len(), - }; - - HEADER_SIZE + body_size - } - - fn encode_to(&self, buf: &mut B) -> Result<()> { - match self { - WritePayload::Write(req) => { - write_header(Header::Write, buf)?; - req.encode(buf).context(EncodeBody) - } - WritePayload::AlterSchema(req) => { - write_header(Header::AlterSchema, buf)?; - req.encode(buf).context(EncodeBody) - } - WritePayload::AlterOption(req) => { - write_header(Header::AlterOption, buf)?; - req.encode(buf).context(EncodeBody) - } - } - } -} - -impl<'a> From<&'a table_requests::WriteRequest> for WritePayload<'a> { - fn from(write_request: &'a table_requests::WriteRequest) -> Self { - Self::Write(write_request) - } -} - -/// Payload decoded from wal -#[derive(Debug)] -pub enum ReadPayload { - Write { row_group: RowGroup }, - AlterSchema { schema: Schema }, - AlterOptions { options: TableOptions }, -} - -impl ReadPayload { - fn decode_write_from_pb(schema: &Schema, buf: &[u8]) -> Result { - let write_req_pb: table_requests::WriteRequest = - Message::decode(buf).context(DecodeBody)?; - - let version = { - let version = write_req_pb.version; - WalEncodeVersion::try_from_u32(version).context(InvalidWriteReqVersion { version })? - }; - match version { - WalEncodeVersion::RowWise => Self::decode_rowwise_write_req(write_req_pb), - WalEncodeVersion::Columnar => { - Self::decode_columnar_write_req(schema.clone(), write_req_pb) - } - } - } - - fn decode_rowwise_write_req(write_req_pb: table_requests::WriteRequest) -> Result { - // Consume and convert schema in pb - let schema: Schema = write_req_pb - .schema - .context(TableSchemaNotFound)? - .try_into() - .context(DecodeSchema)?; - - // Consume and convert rows in pb - let encoded_rows = write_req_pb.rows; - let mut rows = Vec::with_capacity(encoded_rows.len()); - let row_decoder = WalRowDecoder::new(&schema); - for row_bytes in &encoded_rows { - let row = row_decoder - .decode(&mut row_bytes.as_slice()) - .context(DecodeRow)?; - // We skip schema check here - rows.push(row); - } - - // The `rows` are decoded according to the schema, so there is no need to do one - // more check here. - let row_group = RowGroup::new_unchecked(schema, rows); - Ok(Self::Write { row_group }) - } - - fn decode_columnar_write_req( - schema: Schema, - write_req_pb: table_requests::WriteRequest, - ) -> Result { - let encoded_cols = write_req_pb.cols; - let mut row_group_builder = - RowGroupBuilderFromColumn::with_capacity(schema, encoded_cols.len()); - let mut decode_buf = Vec::new(); - for encoded_col in encoded_cols { - let decoder = ColumnarDecoder; - let mut col_buf = encoded_col.as_slice(); - let decode_ctx = DecodeContext { - buf: &mut decode_buf, - }; - let DecodeResult { column_id, datums } = decoder - .decode(decode_ctx, &mut col_buf) - .context(DecodeColumn)?; - - row_group_builder - .try_add_column(column_id, datums) - .context(BuildRowGroup)?; - } - - let row_group = row_group_builder.build(); - Ok(Self::Write { row_group }) - } - - fn decode_alter_schema_from_pb(buf: &[u8]) -> Result { - let alter_schema_meta_pb: manifest_pb::AlterSchemaMeta = - Message::decode(buf).context(DecodeBody)?; - - // Consume and convert schema in pb - let schema: Schema = alter_schema_meta_pb - .schema - .context(TableSchemaNotFound)? - .try_into() - .context(DecodeSchema)?; - - Ok(Self::AlterSchema { schema }) - } - - fn decode_alter_option_from_pb(buf: &[u8]) -> Result { - let alter_option_meta_pb: manifest_pb::AlterOptionsMeta = - Message::decode(buf).context(DecodeBody)?; - - // Consume and convert options in pb - let options: TableOptions = alter_option_meta_pb - .options - .context(TableOptionsNotFound)? - .try_into() - .context(InvalidTableOptions)?; - - Ok(Self::AlterOptions { options }) - } -} - -/// The provider is used to provide the schema according to the table id. -pub trait TableSchemaProvider { - fn table_schema(&self, table_id: TableId) -> Option; -} - -pub struct SingleSchemaProviderAdapter { - pub schema: Schema, -} - -impl TableSchemaProvider for SingleSchemaProviderAdapter { - fn table_schema(&self, _table_id: TableId) -> Option { - Some(self.schema.clone()) - } -} - -/// Wal payload decoder -pub struct WalDecoder

{ - schema_provider: P, -} - -impl WalDecoder

{ - pub fn new(schema_provider: P) -> Self { - Self { schema_provider } - } -} - -impl

PayloadDecoder for WalDecoder

-where - P: TableSchemaProvider + Send + Sync, -{ - type Error = Error; - type Target = ReadPayload; - - fn decode(&self, ctx: &PayloadDecodeContext, buf: &mut B) -> Result { - let header_value = buf.try_get_u8().context(DecodeHeader)?; - let header = match Header::from_u8(header_value) { - Some(header) => header, - None => { - return InvalidHeader { - value: header_value, - } - .fail() - } - }; - - let chunk = buf.chunk(); - let schema = self - .schema_provider - .table_schema(ctx.table_id) - .context(TableSchemaNotFound)?; - let payload = match header { - Header::Write => ReadPayload::decode_write_from_pb(&schema, chunk)?, - Header::AlterSchema => ReadPayload::decode_alter_schema_from_pb(chunk)?, - Header::AlterOption => ReadPayload::decode_alter_option_from_pb(chunk)?, - }; - - Ok(payload) - } -} diff --git a/src/analytic_engine/src/prefetchable_stream.rs b/src/analytic_engine/src/prefetchable_stream.rs deleted file mode 100644 index a49b33aa2f..0000000000 --- a/src/analytic_engine/src/prefetchable_stream.rs +++ /dev/null @@ -1,187 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// A stream can be prefetchable. - -use async_stream::stream; -use async_trait::async_trait; -use futures::{Stream, StreamExt}; - -pub type BoxedStream = Box + Send + Unpin>; - -#[async_trait] -pub trait PrefetchableStream: Send { - type Item; - - /// Start the prefetch procedure in background. In most implementation, this - /// method should not block the caller, that is to say, the prefetching - /// procedure should be run in the background. - async fn start_prefetch(&mut self); - - /// Fetch the next record batch. - /// - /// If None is returned, all the following batches will be None. - async fn fetch_next(&mut self) -> Option; -} - -pub trait PrefetchableStreamExt: PrefetchableStream { - fn into_boxed_stream(mut self) -> BoxedStream - where - Self: 'static + Sized, - Self::Item: Send, - { - let stream = stream! { - while let Some(v) = self.fetch_next().await { - yield v; - } - }; - - // FIXME: Will this conversion to a stream introduce overhead? - Box::new(Box::pin(stream)) - } - - fn filter_map(self, f: F) -> FilterMap - where - F: FnMut(Self::Item) -> Option, - Self: Sized, - { - FilterMap { stream: self, f } - } - - fn map(self, f: F) -> Map - where - F: FnMut(Self::Item) -> O, - Self: Sized, - { - Map { stream: self, f } - } -} - -impl PrefetchableStreamExt for T where T: PrefetchableStream {} - -#[async_trait] -impl PrefetchableStream for Box> { - type Item = T; - - async fn start_prefetch(&mut self) { - (**self).start_prefetch().await; - } - - async fn fetch_next(&mut self) -> Option { - (**self).fetch_next().await - } -} - -/// The implementation for `filter_map` operator on the PrefetchableStream. -pub struct FilterMap { - stream: St, - f: F, -} - -#[async_trait] -impl PrefetchableStream for FilterMap -where - St: PrefetchableStream, - F: FnMut(St::Item) -> Option + Send, - O: Send, -{ - type Item = O; - - async fn start_prefetch(&mut self) { - self.stream.start_prefetch().await; - } - - async fn fetch_next(&mut self) -> Option { - loop { - match self.stream.fetch_next().await { - Some(v) => { - let filtered_batch = (self.f)(v); - if filtered_batch.is_some() { - return filtered_batch; - } - // If the filtered batch is none, just continue to fetch and - // filter until the underlying stream is exhausted. - } - None => return None, - } - } - } -} - -/// The implementation for `map` operator on the PrefetchableStream. -pub struct Map { - stream: St, - f: F, -} - -#[async_trait] -impl PrefetchableStream for Map -where - St: PrefetchableStream, - F: FnMut(St::Item) -> O + Send, - O: Send, -{ - type Item = O; - - async fn start_prefetch(&mut self) { - self.stream.start_prefetch().await; - } - - async fn fetch_next(&mut self) -> Option { - self.stream.fetch_next().await.map(|v| (self.f)(v)) - } -} - -/// A noop prefetcher. -/// -/// A wrapper with a underlying stream without prefetch logic. -pub struct NoopPrefetcher(pub BoxedStream); - -#[async_trait] -impl PrefetchableStream for NoopPrefetcher { - type Item = T; - - async fn start_prefetch(&mut self) { - // It's just a noop operation. - } - - async fn fetch_next(&mut self) -> Option { - self.0.next().await - } -} - -#[cfg(test)] -mod tests { - use futures::stream; - - use super::*; - - #[tokio::test] - async fn test_trait_object_prefetchable_stream() { - let numbers = vec![1, 2, 3]; - let stream = stream::iter(numbers.clone()); - let stream = NoopPrefetcher(Box::new(stream)); - let mut stream: Box> = Box::new(stream); - - let mut fetched_numbers = Vec::with_capacity(numbers.len()); - while let Some(v) = stream.fetch_next().await { - fetched_numbers.push(v); - } - - assert_eq!(numbers, fetched_numbers); - } -} diff --git a/src/analytic_engine/src/row_iter/chain.rs b/src/analytic_engine/src/row_iter/chain.rs deleted file mode 100644 index 50d6c3aa39..0000000000 --- a/src/analytic_engine/src/row_iter/chain.rs +++ /dev/null @@ -1,499 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - fmt, - time::{Duration, Instant}, -}; - -use async_trait::async_trait; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - record_batch::FetchedRecordBatch, - request_id::RequestId, - schema::RecordSchemaWithKey, -}; -use generic_error::GenericError; -use logger::debug; -use macros::define_result; -use snafu::{ResultExt, Snafu}; -use table_engine::{predicate::PredicateRef, table::TableId}; -use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; - -use crate::{ - instance::SstReadOptionsBuilder, - row_iter::{ - record_batch_stream::{ - self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SstStreamContext, - }, - FetchedRecordBatchIterator, - }, - space::SpaceId, - sst::{ - factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef}, - file::FileHandle, - }, - table::version::{MemTableVec, SamplingMemTable}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Fail to build stream from the memtable, err:{}", source))] - BuildStreamFromMemtable { - source: crate::row_iter::record_batch_stream::Error, - }, - - #[snafu(display("Fail to build stream from the sst file, err:{}", source))] - BuildStreamFromSst { - source: crate::row_iter::record_batch_stream::Error, - }, - - #[snafu(display("Fail to poll next record batch, err:{}", source))] - PollNextRecordBatch { source: GenericError }, -} - -define_result!(Error); - -/// Required parameters to construct the [Builder]. -#[derive(Clone, Debug)] -pub struct ChainConfig<'a> { - pub request_id: RequestId, - pub metrics_collector: Option, - pub deadline: Option, - pub space_id: SpaceId, - pub table_id: TableId, - /// The projected schema to read. - pub projected_schema: ProjectedSchema, - /// Predicate of the query. - pub predicate: PredicateRef, - pub num_streams_to_prefetch: usize, - - pub sst_read_options_builder: SstReadOptionsBuilder, - /// Sst factory - pub sst_factory: &'a SstFactoryRef, - /// Store picker for persisting sst. - pub store_picker: &'a ObjectStorePickerRef, -} - -/// Builder for [ChainIterator]. -#[must_use] -pub struct Builder<'a> { - config: ChainConfig<'a>, - /// Sampling memtable to read. - sampling_mem: Option, - memtables: MemTableVec, - ssts: Vec>, -} - -impl<'a> Builder<'a> { - pub fn new(config: ChainConfig<'a>) -> Self { - Self { - config, - sampling_mem: None, - memtables: Vec::new(), - ssts: Vec::new(), - } - } - - pub fn sampling_mem(mut self, sampling_mem: Option) -> Self { - self.sampling_mem = sampling_mem; - self - } - - pub fn memtables(mut self, memtables: MemTableVec) -> Self { - self.memtables = memtables; - self - } - - pub fn ssts(mut self, ssts: Vec>) -> Self { - self.ssts = ssts; - self - } -} - -impl<'a> Builder<'a> { - pub async fn build(self) -> Result { - let fetched_schema = self.config.projected_schema.to_record_schema(); - let table_schema = self.config.projected_schema.table_schema(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema.clone(), table_schema.clone(), None); - let sst_read_options = self - .config - .sst_read_options_builder - .build(row_projector_builder.clone()); - - let memtable_stream_ctx = MemtableStreamContext { - row_projector_builder, - fetched_schema: fetched_schema.clone(), - predicate: self.config.predicate, - need_dedup: false, - reverse: false, - deadline: self.config.deadline, - }; - - let sst_stream_ctx = SstStreamContext { - sst_read_options, - fetched_schema, - }; - - let total_sst_streams: usize = self.ssts.iter().map(|v| v.len()).sum(); - let mut total_streams = self.memtables.len() + total_sst_streams; - if self.sampling_mem.is_some() { - total_streams += 1; - } - let mut streams = Vec::with_capacity(total_streams); - - if let Some(v) = &self.sampling_mem { - let stream = record_batch_stream::filtered_stream_from_memtable( - &v.mem, - &memtable_stream_ctx, - self.config.metrics_collector.clone(), - ) - .context(BuildStreamFromMemtable)?; - streams.push(stream); - } - - for memtable in &self.memtables { - let stream = record_batch_stream::filtered_stream_from_memtable( - // chain iterator only handle the case reading in no order so just read in asc - // order by default. - &memtable.mem, - &memtable_stream_ctx, - self.config.metrics_collector.clone(), - ) - .context(BuildStreamFromMemtable)?; - streams.push(stream); - } - - for leveled_ssts in &self.ssts { - for sst in leveled_ssts { - let stream = record_batch_stream::filtered_stream_from_sst_file( - self.config.space_id, - self.config.table_id, - sst, - self.config.sst_factory, - self.config.store_picker, - &sst_stream_ctx, - self.config.metrics_collector.clone(), - ) - .await - .context(BuildStreamFromSst)?; - streams.push(stream); - } - } - - debug!( - "Build chain iterator, table_id:{:?}, request_id:{}, memtables:{:?}, ssts:{:?}", - self.config.table_id, self.config.request_id, self.memtables, self.ssts - ); - - Ok(ChainIterator { - space_id: self.config.space_id, - table_id: self.config.table_id, - request_id: self.config.request_id, - schema: self.config.projected_schema.to_record_schema_with_key(), - streams, - num_streams_to_prefetch: self.config.num_streams_to_prefetch, - ssts: self.ssts, - next_stream_idx: 0, - next_prefetch_stream_idx: 0, - inited_at: None, - created_at: Instant::now(), - metrics: Metrics::new( - self.memtables.len(), - total_sst_streams, - self.config.metrics_collector.clone(), - ), - }) - } -} - -/// Metrics for [ChainIterator]. -#[derive(TraceMetricWhenDrop)] -struct Metrics { - #[metric(number)] - num_memtables: usize, - #[metric(number)] - num_ssts: usize, - /// Total batch fetched. - #[metric(number)] - total_batch_fetched: usize, - /// Total rows fetched. - #[metric(number)] - total_rows_fetched: usize, - /// Create time of the metrics. - #[metric(duration)] - since_create: Duration, - /// Inited time of the iterator. - #[metric(duration)] - since_init: Duration, - /// Actual scan duration. - #[metric(duration)] - scan_duration: Duration, - #[metric(collector)] - metrics_collector: Option, -} - -impl Metrics { - fn new( - num_memtables: usize, - num_ssts: usize, - metrics_collector: Option, - ) -> Self { - Self { - num_memtables, - num_ssts, - total_batch_fetched: 0, - total_rows_fetched: 0, - since_create: Duration::default(), - since_init: Duration::default(), - scan_duration: Duration::default(), - metrics_collector, - } - } -} - -impl fmt::Debug for Metrics { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Metrics") - .field("num_memtables", &self.num_memtables) - .field("num_ssts", &self.num_ssts) - .field("total_batch_fetched", &self.total_batch_fetched) - .field("total_rows_fetched", &self.total_rows_fetched) - .field("duration_since_create", &self.since_create) - .field("duration_since_init", &self.since_init) - .field("scan_duration", &self.scan_duration) - .finish() - } -} - -/// ChainIter chains memtables and ssts and reads the [RecordBatch] from them -/// batch by batch. -/// -/// Note: The chain order is `memtable -> sst level 0 -> sst_level 1`. -pub struct ChainIterator { - space_id: SpaceId, - table_id: TableId, - request_id: RequestId, - schema: RecordSchemaWithKey, - streams: Vec, - num_streams_to_prefetch: usize, - /// ssts are kept here to avoid them from being purged. - #[allow(dead_code)] - ssts: Vec>, - /// The range of the index is [0, streams.len()] and the iterator is - /// exhausted if it reaches `streams.len()`. - next_stream_idx: usize, - next_prefetch_stream_idx: usize, - - inited_at: Option, - created_at: Instant, - /// metrics for the iterator. - metrics: Metrics, -} - -impl ChainIterator { - fn init_if_necessary(&mut self) { - if self.inited_at.is_some() { - return; - } - self.inited_at = Some(Instant::now()); - - debug!("Init ChainIterator, space_id:{}, table_id:{:?}, request_id:{}, total_streams:{}, schema:{:?}", - self.space_id, self.table_id, self.request_id, self.streams.len(), self.schema - ); - } - - /// Maybe prefetch the necessary stream for future reading. - async fn maybe_prefetch(&mut self) { - while self.next_prefetch_stream_idx < self.next_stream_idx + self.num_streams_to_prefetch - && self.next_prefetch_stream_idx < self.streams.len() - { - self.streams[self.next_prefetch_stream_idx] - .start_prefetch() - .await; - self.next_prefetch_stream_idx += 1; - } - } - - async fn next_batch_internal(&mut self) -> Result> { - self.init_if_necessary(); - self.maybe_prefetch().await; - - while self.next_stream_idx < self.streams.len() { - let read_stream = &mut self.streams[self.next_stream_idx]; - let sequenced_record_batch = read_stream - .fetch_next() - .await - .transpose() - .context(PollNextRecordBatch)?; - - match sequenced_record_batch { - Some(v) => { - self.metrics.total_rows_fetched += v.num_rows(); - self.metrics.total_batch_fetched += 1; - - if v.num_rows() > 0 { - return Ok(Some(v.record_batch)); - } - } - // Fetch next stream only if the current sequence_record_batch is None. - None => { - self.next_stream_idx += 1; - self.maybe_prefetch().await; - } - } - } - - self.metrics.since_create = self.created_at.elapsed(); - self.metrics.since_init = self - .inited_at - .as_ref() - .map(|v| v.elapsed()) - .unwrap_or_default(); - - Ok(None) - } -} - -impl Drop for ChainIterator { - fn drop(&mut self) { - debug!( - "Chain iterator dropped, space_id:{}, table_id:{:?}, request_id:{}, inited_at:{:?}, metrics:{:?}", - self.space_id, self.table_id, self.request_id, self.inited_at, self.metrics, - ); - } -} - -#[async_trait] -impl FetchedRecordBatchIterator for ChainIterator { - type Error = Error; - - fn schema(&self) -> &RecordSchemaWithKey { - &self.schema - } - - async fn next_batch(&mut self) -> Result> { - let timer = Instant::now(); - let res = self.next_batch_internal().await; - self.metrics.scan_duration += timer.elapsed(); - - res - } -} - -#[cfg(test)] -mod tests { - use common_types::{ - self, - row::Row, - tests::{build_row, build_schema}, - SequenceNumber, - }; - - use super::*; - use crate::row_iter::tests::check_iterator; - - async fn run_and_check(testcases: Vec<(SequenceNumber, Vec)>) { - let schema = build_schema(); - - let expect_rows: Vec<_> = testcases - .iter() - .flat_map(|(_, rows)| rows.clone()) - .collect(); - - let streams = - record_batch_stream::tests::build_sequenced_record_batch_stream(&schema, testcases); - - let mut chain_iter = ChainIterator { - space_id: 0, - table_id: TableId::MIN, - request_id: RequestId::next_id(), - schema: schema.to_record_schema_with_key(), - streams, - num_streams_to_prefetch: 2, - ssts: Vec::new(), - next_stream_idx: 0, - next_prefetch_stream_idx: 0, - inited_at: None, - created_at: Instant::now(), - metrics: Metrics::new(0, 0, None), - }; - - check_iterator(&mut chain_iter, expect_rows).await; - } - - #[tokio::test] - async fn test_chain_multiple_streams() { - let testcases = vec![ - // (sequence, rows) - ( - 10, - vec![build_row(b"key4", 1000000, 10.0, "v4", 1000, 1_000_000)], - ), - ( - 20, - vec![build_row(b"key2", 1000000, 10.0, "v2", 2000, 2_000_000)], - ), - ( - 100, - vec![build_row(b"key3", 1000000, 10.0, "v3", 3000, 3_000_000)], - ), - ( - 1, - vec![build_row(b"key1", 1000000, 10.0, "v1", 4000, 4_000_000)], - ), - ]; - run_and_check(testcases).await; - } - - #[tokio::test] - async fn test_chain_empty_streams() { - let testcases = vec![ - // (sequence, rows) - (10, vec![]), - (20, vec![]), - (100, vec![]), - (1, vec![]), - ]; - run_and_check(testcases).await; - } - - #[tokio::test] - async fn test_chain_no_streams() { - let testcases = vec![]; - run_and_check(testcases).await; - } - - #[tokio::test] - async fn test_chain_half_empty_streams() { - let testcases = vec![ - // (sequence, rows) - ( - 10, - vec![build_row(b"key4", 1000000, 10.0, "v4", 1000, 1_000_000)], - ), - (20, vec![]), - (100, vec![]), - ( - 1, - vec![build_row(b"key1", 1000000, 10.0, "v1", 1000, 1_000_000)], - ), - ]; - run_and_check(testcases).await; - } -} diff --git a/src/analytic_engine/src/row_iter/dedup.rs b/src/analytic_engine/src/row_iter/dedup.rs deleted file mode 100644 index 83ee9b888a..0000000000 --- a/src/analytic_engine/src/row_iter/dedup.rs +++ /dev/null @@ -1,264 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp::Ordering; - -use async_trait::async_trait; -use common_types::{ - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - request_id::RequestId, - row::{Row, RowViewOnBatch, RowWithMeta}, - schema::RecordSchemaWithKey, -}; -use generic_error::{BoxError, GenericError}; -use logger::{info, trace}; -use macros::define_result; -use snafu::{ResultExt, Snafu}; - -use crate::row_iter::{FetchedRecordBatchIterator, IterOptions}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to iterate column, error:{:?}", source))] - IterateColumn { source: common_types::row::Error }, - - #[snafu(display("Failed to build record batch, error:{:?}", source))] - BuildRecordBatch { - source: common_types::record_batch::Error, - }, - - #[snafu(display("Failed to append row, err:{:?}", source))] - AppendRow { - source: common_types::record_batch::Error, - }, - - #[snafu(display("Failed to read data from the sub iterator, err:{:?}", source))] - ReadFromSubIter { source: GenericError }, -} - -define_result!(Error); - -/// Dedup the elements from the `iter` by choosing the first one in the -/// duplicate rows. -pub struct DedupIterator { - request_id: RequestId, - schema: RecordSchemaWithKey, - record_batch_builder: FetchedRecordBatchBuilder, - iter: I, - /// Previous row returned. - prev_row: Option, - /// Store which row in record batch is keep, use Vec is a bit faster - /// than a bitmap. - selected_rows: Vec, - - // Metrics: - total_duplications: usize, - total_selected_rows: usize, -} - -impl DedupIterator { - pub fn new(request_id: RequestId, iter: I, iter_options: IterOptions) -> Self { - let schema_with_key = iter.schema(); - let primary_key_indexes = schema_with_key.primary_key_idx().to_vec(); - let fetched_schema = schema_with_key.to_record_schema(); - let record_batch_builder = FetchedRecordBatchBuilder::with_capacity( - fetched_schema, - Some(primary_key_indexes), - iter_options.batch_size, - ); - Self { - request_id, - schema: schema_with_key.clone(), - record_batch_builder, - iter, - prev_row: None, - selected_rows: Vec::new(), - total_duplications: 0, - total_selected_rows: 0, - } - } - - fn dedup_batch(&mut self, record_batch: FetchedRecordBatch) -> Result { - self.selected_rows.clear(); - // Ignore all rows by default. - self.selected_rows.resize(record_batch.num_rows(), false); - - if record_batch.is_empty() { - return Ok(record_batch); - } - - // Dedup batch. - for col_idx in self.schema.primary_key_idx() { - let column = record_batch.column(*col_idx); - - column.dedup(&mut self.selected_rows); - } - - // Dedup first row in record batch with previous row. - if let Some(prev_row) = &self.prev_row { - let prev_row_view = RowWithMeta { - row: prev_row, - schema: &self.schema, - }; - let curr_row_view = RowViewOnBatch { - record_batch: &record_batch, - // First row. - row_idx: 0, - }; - - let is_equal = matches!( - // TODO(yingwen): Compare row needs clone data of row. - self.schema.compare_row(&prev_row_view, &curr_row_view), - Ordering::Equal - ); - - if is_equal { - // Depulicate with previous row. - self.selected_rows[0] = false; - } - } - - let selected_num = self - .selected_rows - .iter() - .map(|v| if *v { 1 } else { 0 }) - .sum(); - - // Eventhough all rows are duplicate, we can still use row pointed by - // prev_row_idx because they have same row key. - self.prev_row = Some(record_batch.clone_row_at(record_batch.num_rows() - 1)); - - self.filter_batch(record_batch, selected_num) - } - - /// Filter batch by `selected_rows`. - fn filter_batch( - &mut self, - record_batch: FetchedRecordBatch, - selected_num: usize, - ) -> Result { - self.total_selected_rows += selected_num; - self.total_duplications += record_batch.num_rows() - selected_num; - - if selected_num == record_batch.num_rows() { - // No duplicate rows in batch. - return Ok(record_batch); - } - - self.record_batch_builder.clear(); - for (row_idx, selected) in self.selected_rows.iter().enumerate() { - if *selected { - self.record_batch_builder - .append_row_view(&RowViewOnBatch { - record_batch: &record_batch, - row_idx, - }) - .context(AppendRow)?; - } - } - - self.record_batch_builder.build().context(BuildRecordBatch) - } -} - -#[async_trait] -impl FetchedRecordBatchIterator for DedupIterator { - type Error = Error; - - fn schema(&self) -> &RecordSchemaWithKey { - &self.schema - } - - async fn next_batch(&mut self) -> Result> { - match self - .iter - .next_batch() - .await - .box_err() - .context(ReadFromSubIter)? - { - Some(record_batch) => { - trace!( - "DedupIterator received next record batch, request_id:{}, batch:{:?}", - self.request_id, - record_batch - ); - - self.dedup_batch(record_batch).map(Some) - } - None => { - info!( - "DedupIterator received none record batch, request_id:{}, total_duplications:{}, total_selected_rows:{}", - self.request_id, self.total_duplications, self.total_selected_rows, - ); - - Ok(None) - } - } - } -} - -#[cfg(test)] -mod tests { - use common_types::tests::{build_row, build_schema}; - - use super::*; - use crate::row_iter::tests::{ - build_fetched_record_batch_with_key, check_iterator, VectorIterator, - }; - - #[tokio::test] - async fn test_dedup_iterator() { - // first two columns are key columns - let schema = build_schema(); - let iter = VectorIterator::new( - schema.to_record_schema_with_key(), - vec![ - build_fetched_record_batch_with_key( - schema.clone(), - vec![ - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - build_row(b"a", 1, 10.0, "v", 1000, 1_000_000), - build_row(b"a", 2, 10.0, "v2", 2000, 2_000_000), - ], - ), - build_fetched_record_batch_with_key( - schema, - vec![ - build_row(b"a", 2, 10.0, "v", 2000, 2_000_000), - build_row(b"a", 3, 10.0, "v3", 3000, 3_000_000), - build_row(b"a", 3, 10.0, "v", 3000, 3_000_000), - build_row(b"a", 4, 10.0, "v4", 4000, 4_000_000), - ], - ), - ], - ); - - let mut iter = - DedupIterator::new(RequestId::next_id(), iter, IterOptions { batch_size: 500 }); - check_iterator( - &mut iter, - vec![ - build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), - build_row(b"a", 2, 10.0, "v2", 2000, 2_000_000), - build_row(b"a", 3, 10.0, "v3", 3000, 3_000_000), - build_row(b"a", 4, 10.0, "v4", 4000, 4_000_000), - ], - ) - .await; - } -} diff --git a/src/analytic_engine/src/row_iter/merge.rs b/src/analytic_engine/src/row_iter/merge.rs deleted file mode 100644 index 6df277843c..0000000000 --- a/src/analytic_engine/src/row_iter/merge.rs +++ /dev/null @@ -1,1036 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - cmp, - cmp::Ordering, - collections::BinaryHeap, - mem, - ops::{Deref, DerefMut}, - time::{Duration, Instant}, -}; - -use async_trait::async_trait; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - request_id::RequestId, - row::RowViewOnBatch, - schema::RecordSchemaWithKey, - SequenceNumber, -}; -use futures::{stream::FuturesUnordered, StreamExt}; -use generic_error::GenericError; -use logger::{debug, trace}; -use macros::define_result; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; -use table_engine::{predicate::PredicateRef, table::TableId}; -use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; - -use crate::{ - instance::SstReadOptionsBuilder, - row_iter::{ - record_batch_stream::{ - self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SequencedRecordBatch, - SstStreamContext, - }, - FetchedRecordBatchIterator, IterOptions, - }, - space::SpaceId, - sst::{ - factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef}, - file::{FileHandle, Level, SST_LEVEL_NUM}, - }, - table::version::{MemTableVec, SamplingMemTable}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Expect the same schema, expect:{:?}, given:{:?}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - MismatchedSchema { - expect: RecordSchemaWithKey, - given: RecordSchemaWithKey, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to pull record batch, error:{}", source))] - PullRecordBatch { source: GenericError }, - - #[snafu(display("Failed to build record batch, error:{}", source))] - BuildRecordBatch { - source: common_types::record_batch::Error, - }, - - #[snafu(display("Failed to append row, err:{:?}", source))] - AppendRow { - source: common_types::record_batch::Error, - }, - - #[snafu(display("Failed to build stream from memtable, err:{}", source))] - BuildStreamFromMemtable { - source: crate::row_iter::record_batch_stream::Error, - }, - - #[snafu(display("Failed to build record batch from sst, err:{}", source))] - BuildStreamFromSst { - source: crate::row_iter::record_batch_stream::Error, - }, -} - -define_result!(Error); - -/// Required parameters to construct the [MergeBuilder] -#[derive(Debug)] -pub struct MergeConfig<'a> { - pub request_id: RequestId, - pub metrics_collector: Option, - /// None for background jobs, such as: compaction - pub deadline: Option, - pub space_id: SpaceId, - pub table_id: TableId, - /// Max visible sequence (inclusive) - // TODO: remove it? seems useless. - pub sequence: SequenceNumber, - /// The projected schema to read. - pub projected_schema: ProjectedSchema, - /// The predicate of the query. - pub predicate: PredicateRef, - - pub sst_read_options_builder: SstReadOptionsBuilder, - /// Sst factory - pub sst_factory: &'a SstFactoryRef, - /// Store picker for persisting sst. - pub store_picker: &'a ObjectStorePickerRef, - - pub merge_iter_options: IterOptions, - - pub need_dedup: bool, - // TODO: Currently, the read the sst in a reverse order is not supported yet, that is to say, - // the output won't be expected if it is set. - pub reverse: bool, -} - -/// Builder for building merge stream from memtables and sst files. -#[must_use] -pub struct MergeBuilder<'a> { - config: MergeConfig<'a>, - - /// Sampling memtable to read. - sampling_mem: Option, - - /// MemTables to read. - memtables: MemTableVec, - - /// Ssts to read of each level. - ssts: Vec>, -} - -impl<'a> MergeBuilder<'a> { - pub fn new(config: MergeConfig<'a>) -> Self { - Self { - config, - sampling_mem: None, - memtables: Vec::new(), - ssts: vec![Vec::new(); SST_LEVEL_NUM], - } - } - - pub fn sampling_mem(mut self, sampling_mem: Option) -> Self { - self.sampling_mem = sampling_mem; - self - } - - pub fn memtables(mut self, memtables: MemTableVec) -> Self { - self.memtables = memtables; - self - } - - pub fn ssts_of_level(mut self, ssts: Vec>) -> Self { - self.ssts = ssts; - self - } - - pub fn mut_memtables(&mut self) -> &mut MemTableVec { - &mut self.memtables - } - - /// Returns file handles in `level`, panic if level >= MAX_LEVEL - pub fn mut_ssts_of_level(&mut self, level: Level) -> &mut Vec { - &mut self.ssts[level.as_usize()] - } - - pub async fn build(self) -> Result { - let fetched_schema = self.config.projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); - let fetched_schema = fetched_schema.into_record_schema(); - let table_schema = self.config.projected_schema.table_schema(); - let row_projector_builder = RowProjectorBuilder::new( - fetched_schema.clone(), - table_schema.clone(), - Some(primary_key_indexes), - ); - let sst_read_options = self - .config - .sst_read_options_builder - .build(row_projector_builder.clone()); - - let memtable_stream_ctx = MemtableStreamContext { - row_projector_builder, - fetched_schema: fetched_schema.clone(), - predicate: self.config.predicate, - need_dedup: self.config.need_dedup, - reverse: self.config.reverse, - deadline: self.config.deadline, - }; - - let sst_stream_ctx = SstStreamContext { - sst_read_options, - fetched_schema, - }; - - let sst_streams_num: usize = self - .ssts - .iter() - .map(|leveled_ssts| leveled_ssts.len()) - .sum(); - let mut streams_num = sst_streams_num + self.memtables.len(); - if self.sampling_mem.is_some() { - streams_num += 1; - } - let mut streams = Vec::with_capacity(streams_num); - - debug!( - "Build merge iterator, table_id:{:?}, request_id:{}, sampling_mem:{:?}, memtables:{:?}, ssts:{:?}", - self.config.table_id, - self.config.request_id, - self.sampling_mem, - self.memtables, - self.ssts - ); - - if let Some(v) = &self.sampling_mem { - let stream = record_batch_stream::filtered_stream_from_memtable( - &v.mem, - &memtable_stream_ctx, - self.config.metrics_collector.clone(), - ) - .context(BuildStreamFromMemtable)?; - streams.push(stream); - } - - for memtable in &self.memtables { - let stream = record_batch_stream::filtered_stream_from_memtable( - &memtable.mem, - &memtable_stream_ctx, - self.config.metrics_collector.clone(), - ) - .context(BuildStreamFromMemtable)?; - streams.push(stream); - } - - let mut sst_ids = Vec::with_capacity(self.ssts.len()); - for leveled_ssts in &self.ssts { - for f in leveled_ssts { - let stream = record_batch_stream::filtered_stream_from_sst_file( - self.config.space_id, - self.config.table_id, - f, - self.config.sst_factory, - self.config.store_picker, - &sst_stream_ctx, - self.config.metrics_collector.clone(), - ) - .await - .context(BuildStreamFromSst)?; - streams.push(stream); - sst_ids.push(f.id()); - } - } - - Ok(MergeIterator::new( - self.config.table_id, - self.config.request_id, - // Use the schema after projection as the schema of the merge iterator. - self.config.projected_schema.to_record_schema_with_key(), - streams, - self.ssts, - self.config.merge_iter_options, - self.config.reverse, - Metrics::new( - self.memtables.len(), - sst_streams_num, - self.config.metrics_collector, - ), - )) - } -} - -struct BufferedStreamState { - /// Buffered record batch. - /// - /// invariant: `buffered_record_batch` is not empty. - buffered_record_batch: SequencedRecordBatch, - /// Cursor for reading buffered record batch. - /// - /// `cursor` increases monotonically from 0 to - /// `buffered_record_batch.num_rows()` and `cursor == - /// buffered_record_batch.num_rows()` means no more buffered rows to read. - cursor: usize, -} - -impl BufferedStreamState { - #[inline] - fn is_valid(&self) -> bool { - self.cursor < self.buffered_record_batch.num_rows() - } - - #[inline] - fn is_empty(&self) -> bool { - self.cursor >= self.buffered_record_batch.num_rows() - } - - #[inline] - fn sequence(&self) -> SequenceNumber { - self.buffered_record_batch.sequence - } - - #[inline] - fn first_row(&self) -> RowViewOnBatch<'_> { - assert!(self.is_valid()); - - RowViewOnBatch { - record_batch: &self.buffered_record_batch.record_batch, - row_idx: self.cursor, - } - } - - #[inline] - fn last_row(&self) -> RowViewOnBatch<'_> { - assert!(self.is_valid()); - - RowViewOnBatch { - record_batch: &self.buffered_record_batch.record_batch, - row_idx: self.buffered_record_batch.num_rows() - 1, - } - } - - /// Returns the next available row in the buffer and advance the cursor by - /// one step. - fn next_row(&mut self) -> Option> { - if self.cursor < self.buffered_record_batch.num_rows() { - let row_view = RowViewOnBatch { - record_batch: &self.buffered_record_batch.record_batch, - row_idx: self.cursor, - }; - self.cursor += 1; - Some(row_view) - } else { - None - } - } - - /// Append `len` rows from cursor to the `builder` and advance the cursor. - /// - /// Returns number of rows added. - fn append_rows_to( - &mut self, - builder: &mut FetchedRecordBatchBuilder, - len: usize, - ) -> Result { - let added = builder - .append_batch_range(&self.buffered_record_batch.record_batch, self.cursor, len) - .context(AppendRow)?; - self.cursor += added; - Ok(added) - } - - /// Take record batch slice with at most `len` rows from cursor and advance - /// the cursor. - fn take_record_batch_slice(&mut self, len: usize) -> FetchedRecordBatch { - let len_to_fetch = cmp::min( - self.buffered_record_batch.record_batch.num_rows() - self.cursor, - len, - ); - let record_batch = self - .buffered_record_batch - .record_batch - .slice(self.cursor, len_to_fetch); - self.cursor += record_batch.num_rows(); - record_batch - } - - #[inline] - fn reset(&mut self, record_batch: SequencedRecordBatch) { - self.buffered_record_batch = record_batch; - self.cursor = 0; - } -} - -struct BufferedStream { - schema: RecordSchemaWithKey, - stream: BoxedPrefetchableRecordBatchStream, - /// `None` state means the stream is exhausted. - state: Option, -} - -impl BufferedStream { - async fn build( - schema: RecordSchemaWithKey, - mut stream: BoxedPrefetchableRecordBatchStream, - ) -> Result { - let buffered_record_batch = Self::pull_next_non_empty_batch(&mut stream).await?; - let state = buffered_record_batch.map(|v| BufferedStreamState { - buffered_record_batch: v, - cursor: 0, - }); - - Ok(Self { - schema, - stream, - state, - }) - } - - fn sequence_in_buffer(&self) -> SequenceNumber { - self.state.as_ref().unwrap().sequence() - } - - /// REQUIRE: the buffer is not exhausted. - fn first_row_in_buffer(&self) -> RowViewOnBatch<'_> { - self.state.as_ref().unwrap().first_row() - } - - /// REQUIRE: the buffer is not exhausted. - fn last_row_in_buffer(&self) -> RowViewOnBatch<'_> { - self.state.as_ref().unwrap().last_row() - } - - /// REQUIRE: the buffer is not exhausted. - fn next_row_in_buffer(&mut self) -> Option> { - self.state.as_mut().unwrap().next_row() - } - - /// REQUIRE: the buffer is not exhausted. - fn append_rows_to( - &mut self, - builder: &mut FetchedRecordBatchBuilder, - len: usize, - ) -> Result { - self.state.as_mut().unwrap().append_rows_to(builder, len) - } - - /// REQUIRE: the buffer is not exhausted. - fn take_record_batch_slice(&mut self, len: usize) -> FetchedRecordBatch { - self.state.as_mut().unwrap().take_record_batch_slice(len) - } - - /// Pull the next non empty record batch. - /// - /// The returned record batch is ensured `num_rows() > 0`. - async fn pull_next_non_empty_batch( - stream: &mut BoxedPrefetchableRecordBatchStream, - ) -> Result> { - loop { - match stream - .fetch_next() - .await - .transpose() - .context(PullRecordBatch)? - { - Some(record_batch) => { - trace!( - "MergeIterator one record batch is fetched:{:?}", - record_batch - ); - - if record_batch.num_rows() > 0 { - return Ok(Some(record_batch)); - } - } - None => return Ok(None), - } - } - } - - /// Pull the next batch if the stream is not exhausted and the inner state - /// is empty. - async fn pull_next_batch_if_necessary(&mut self, metrics: &mut Metrics) -> Result { - let need_pull_new_batch = !self.is_exhausted() && self.state.as_ref().unwrap().is_empty(); - if !need_pull_new_batch { - return Ok(false); - } - - // TODO(xikai): do the metrics collection in the `pull_next_non_empty_batch`. - let pull_start = Instant::now(); - let pulled = match Self::pull_next_non_empty_batch(&mut self.stream).await? { - None => { - self.state = None; - Ok(false) - } - Some(record_batch) => { - self.state.as_mut().unwrap().reset(record_batch); - Ok(true) - } - }; - - metrics.scan_duration += pull_start.elapsed(); - metrics.scan_count += 1; - - pulled - } - - #[inline] - fn is_exhausted(&self) -> bool { - self.state.is_none() - } - - fn into_heaped(self, reverse: bool) -> HeapBufferedStream { - HeapBufferedStream { - stream: self, - reverse, - } - } - - #[inline] - fn schema(&self) -> &RecordSchemaWithKey { - &self.schema - } -} - -/// The wrapper struct determines the compare result for the min binary heap. -struct HeapBufferedStream { - stream: BufferedStream, - reverse: bool, -} - -impl HeapBufferedStream { - /// Check whether all the buffered rows in the `stream` is after the - /// `boundary_row`. - /// - /// NOTE: - /// - The first row in the stream is actually the max row if in reverse - /// order and should check whether it is smaller than `boundary_row`. - /// - The first row in the stream is actually the min row if in normal - /// order and should check whether it is greater than `boundary_row`. - fn is_after_boundary( - &self, - schema: &RecordSchemaWithKey, - boundary_row: &RowViewOnBatch, - ) -> bool { - if self.reverse { - // Compare the max row(the first row) in of the stream with the boundary row. - // The stream is after the boundary if the max row is smaller than boundary. - // is_after: (boundary_row) > [first_row in buffer] - matches!( - schema.compare_row(boundary_row, &self.first_row_in_buffer()), - Ordering::Greater - ) - } else { - // compare the min row(the first row) in of the stream with the boundary row. - // The stream is after the boundary if the min row is greater than boundary. - // is_after: (boundary_row) < [first_row in buffer] - matches!( - schema.compare_row(&self.first_row_in_buffer(), boundary_row), - Ordering::Greater - ) - } - } -} - -impl Deref for HeapBufferedStream { - type Target = BufferedStream; - - fn deref(&self) -> &BufferedStream { - &self.stream - } -} - -impl DerefMut for HeapBufferedStream { - fn deref_mut(&mut self) -> &mut BufferedStream { - &mut self.stream - } -} - -impl PartialEq for HeapBufferedStream { - fn eq(&self, other: &Self) -> bool { - let ordering = self - .schema - .compare_row(&self.first_row_in_buffer(), &other.first_row_in_buffer()); - if let Ordering::Equal = ordering { - self.sequence_in_buffer() == other.sequence_in_buffer() - } else { - false - } - } -} - -impl Eq for HeapBufferedStream {} - -impl PartialOrd for HeapBufferedStream { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for HeapBufferedStream { - fn cmp(&self, other: &Self) -> Ordering { - let ordering = if self.reverse { - // keep the original ordering so the greater row comes before the smaller one. - self.schema - .compare_row(&self.first_row_in_buffer(), &other.first_row_in_buffer()) - } else { - // reverse the original ordering so the smaller row comes before the greater - // one. - self.schema - .compare_row(&other.first_row_in_buffer(), &self.first_row_in_buffer()) - }; - - if let Ordering::Equal = ordering { - // The larger sequence number should always comes before the smaller one. - self.sequence_in_buffer().cmp(&other.sequence_in_buffer()) - } else { - ordering - } - } -} - -/// Metrics for merge iterator. -#[derive(TraceMetricWhenDrop)] -pub struct Metrics { - #[metric(number)] - num_memtables: usize, - #[metric(number)] - num_ssts: usize, - /// Total rows collected using fetch_rows_from_one_stream(). - #[metric(number)] - total_rows_fetch_from_one: usize, - /// Times to fetch rows from one stream. - #[metric(number)] - times_fetch_rows_from_one: usize, - /// Times to fetch one row from multiple stream. - #[metric(number)] - times_fetch_row_from_multiple: usize, - /// Init time cost of the metrics. - #[metric(duration)] - init_duration: Duration, - /// Scan time cost of the metrics. - #[metric(duration)] - scan_duration: Duration, - /// Scan count - #[metric(number)] - scan_count: usize, - #[metric(collector)] - metrics_collector: Option, -} - -impl Metrics { - fn new(num_memtables: usize, num_ssts: usize, collector: Option) -> Self { - Self { - num_memtables, - num_ssts, - times_fetch_rows_from_one: 0, - total_rows_fetch_from_one: 0, - times_fetch_row_from_multiple: 0, - init_duration: Duration::default(), - scan_duration: Duration::default(), - scan_count: 0, - metrics_collector: collector, - } - } -} - -pub struct MergeIterator { - table_id: TableId, - request_id: RequestId, - inited: bool, - schema: RecordSchemaWithKey, - record_batch_builder: FetchedRecordBatchBuilder, - origin_streams: Vec, - /// ssts are kept here to avoid them from being purged. - #[allow(dead_code)] - ssts: Vec>, - /// Any [BufferedStream] in the hot heap is not empty. - hot: BinaryHeap, - /// Any [BufferedStream] in the cold heap is not empty. - cold: BinaryHeap, - iter_options: IterOptions, - reverse: bool, - metrics: Metrics, -} - -impl MergeIterator { - #[allow(clippy::too_many_arguments)] - pub fn new( - table_id: TableId, - request_id: RequestId, - schema: RecordSchemaWithKey, - streams: Vec, - ssts: Vec>, - iter_options: IterOptions, - reverse: bool, - metrics: Metrics, - ) -> Self { - let heap_cap = streams.len(); - let primary_key_indexes = schema.primary_key_idx().to_vec(); - let fetched_schema = schema.to_record_schema(); - let record_batch_builder = FetchedRecordBatchBuilder::with_capacity( - fetched_schema, - Some(primary_key_indexes), - iter_options.batch_size, - ); - - Self { - table_id, - request_id, - inited: false, - schema, - ssts, - record_batch_builder, - origin_streams: streams, - hot: BinaryHeap::with_capacity(heap_cap), - cold: BinaryHeap::with_capacity(heap_cap), - iter_options, - reverse, - metrics, - } - } - - fn merge_window_end(&self) -> Option { - self.hot.peek().as_ref().map(|v| v.last_row_in_buffer()) - } - - async fn init_if_necessary(&mut self) -> Result<()> { - if self.inited { - return Ok(()); - } - - debug!( - "Merge iterator init, table_id:{:?}, request_id:{}, schema:{:?}", - self.table_id, self.request_id, self.schema - ); - let init_start = Instant::now(); - - // Initialize buffered streams concurrently. - let mut init_buffered_streams = FuturesUnordered::new(); - for origin_stream in mem::take(&mut self.origin_streams) { - let schema = self.schema.clone(); - init_buffered_streams.push(BufferedStream::build(schema, origin_stream)); - } - - let pull_start = Instant::now(); - self.metrics.scan_duration += pull_start.elapsed(); - self.metrics.scan_count += init_buffered_streams.len(); - - // Push streams to heap. - let current_schema = &self.schema; - while let Some(buffered_stream) = init_buffered_streams.next().await { - let buffered_stream = buffered_stream?; - let stream_schema = buffered_stream.schema(); - ensure!( - current_schema == stream_schema, - MismatchedSchema { - expect: current_schema.clone(), - given: stream_schema.clone(), - } - ); - - if !buffered_stream.is_exhausted() { - self.cold.push(buffered_stream.into_heaped(self.reverse)); - } - } - self.refill_hot(); - - self.inited = true; - self.metrics.init_duration = init_start.elapsed(); - - Ok(()) - } - - fn refill_hot(&mut self) { - while !self.cold.is_empty() { - if !self.hot.is_empty() { - let merge_window_end = self.merge_window_end().unwrap(); - let warmest = self.cold.peek().unwrap(); - if warmest.is_after_boundary(&self.schema, &merge_window_end) { - // if the warmest stream in the cold stream sets is totally after the - // merge_window_end then no need to add more streams into - // the hot stream sets for merge sorting. - break; - } - } - - let warmest = self.cold.pop().unwrap(); - self.hot.push(warmest); - } - } - - /// Pull the next batch Rearrange the heap - async fn reheap(&mut self, mut buffered_stream: HeapBufferedStream) -> Result<()> { - let pulled_new_batch = buffered_stream - .pull_next_batch_if_necessary(&mut self.metrics) - .await?; - - if buffered_stream.is_exhausted() { - self.refill_hot(); - } else if pulled_new_batch { - // TODO(xikai): it seems no need to decide to which heap push the - // `buffered_stream`. Just put the new batch into the cold heap if - // the max bound of the hottest batch is smaller than the min bound - // of new one. - let cold_new_batch = if let Some(hottest) = self.hot.peek() { - buffered_stream.is_after_boundary(&self.schema, &hottest.last_row_in_buffer()) - } else { - false - }; - - if cold_new_batch { - self.cold.push(buffered_stream); - } else { - self.hot.push(buffered_stream); - } - self.refill_hot(); - } else { - // No new batch is pulled and the `buffered_stream` is not exhausted so just put - // it back to the hot heap. - self.hot.push(buffered_stream); - } - - Ok(()) - } - - /// Fetch at most `num_rows_to_fetch` rows from the hottest - /// `BufferedStream`. - /// - /// If the inner builder is empty, returns a slice of the record batch in - /// stream. - async fn fetch_rows_from_one_stream( - &mut self, - num_rows_to_fetch: usize, - ) -> Result> { - assert_eq!(self.hot.len(), 1); - self.metrics.times_fetch_rows_from_one += 1; - - let mut buffered_stream = self.hot.pop().unwrap(); - - let record_batch = if self.record_batch_builder.is_empty() { - let record_batch = buffered_stream.take_record_batch_slice(num_rows_to_fetch); - - self.metrics.total_rows_fetch_from_one += record_batch.num_rows(); - - Some(record_batch) - } else { - let fetched_row_num = buffered_stream - .append_rows_to(&mut self.record_batch_builder, num_rows_to_fetch)?; - - self.metrics.total_rows_fetch_from_one += fetched_row_num; - - None - }; - - self.reheap(buffered_stream).await?; - - Ok(record_batch) - } - - /// Fetch one row from the hottest `BufferedStream`. - /// - /// REQUIRES: `self.hot` is not empty. - async fn fetch_one_row_from_multiple_streams(&mut self) -> Result<()> { - assert!(!self.hot.is_empty()); - self.metrics.times_fetch_row_from_multiple += 1; - - let mut hottest = self.hot.pop().unwrap(); - let row = hottest.next_row_in_buffer().unwrap(); - self.record_batch_builder - .append_row_view(&row) - .context(AppendRow)?; - self.reheap(hottest).await - } - - /// Fetch the next batch from the streams. - /// - /// `init_if_necessary` should be finished before this method. - async fn fetch_next_batch(&mut self) -> Result> { - self.init_if_necessary().await?; - - self.record_batch_builder.clear(); - - while !self.hot.is_empty() && self.record_batch_builder.len() < self.iter_options.batch_size - { - // no need to do merge sort if only one batch in the hot heap. - if self.hot.len() == 1 { - let fetch_row_num = self.iter_options.batch_size - self.record_batch_builder.len(); - - if let Some(record_batch) = self.fetch_rows_from_one_stream(fetch_row_num).await? { - // The builder is empty and we have fetch a record batch from this stream, just - // return that batch. - return Ok(Some(record_batch)); - } - // Else, some rows may have been pushed into the builder. - } else { - self.fetch_one_row_from_multiple_streams().await?; - } - } - - if self.record_batch_builder.is_empty() { - Ok(None) - } else { - let record_batch = self - .record_batch_builder - .build() - .context(BuildRecordBatch)?; - Ok(Some(record_batch)) - } - } -} - -#[async_trait] -impl FetchedRecordBatchIterator for MergeIterator { - type Error = Error; - - fn schema(&self) -> &RecordSchemaWithKey { - &self.schema - } - - async fn next_batch(&mut self) -> Result> { - let record_batch = self.fetch_next_batch().await?; - - trace!("MergeIterator send next record batch:{:?}", record_batch); - - Ok(record_batch) - } -} - -#[cfg(test)] -mod tests { - use common_types::{ - self, - tests::{build_row, build_schema}, - }; - - use super::*; - use crate::row_iter::tests::check_iterator; - - #[tokio::test] - async fn test_row_merge_iterator() { - // first two columns are key columns - let schema = build_schema(); - - let testcases = vec![ - // (sequence, rows) - ( - 10, - vec![build_row(b"y", 1000000, 10.0, "v4", 1000, 1_000_000)], - ), - ( - 20, - vec![build_row(b"y", 1000000, 10.0, "v3", 1000, 1_000_000)], - ), - ( - 100, - vec![build_row(b"b", 1000000, 10.0, "v2", 1000, 1_000_000)], - ), - ( - 1, - vec![build_row(b"a", 1000000, 10.0, "v1", 1000, 1_000_000)], - ), - ]; - - let streams = - record_batch_stream::tests::build_sequenced_record_batch_stream(&schema, testcases); - let mut iter = MergeIterator::new( - TableId::MIN, - RequestId::next_id(), - schema.to_record_schema_with_key(), - streams, - Vec::new(), - IterOptions { batch_size: 500 }, - false, - Metrics::new(1, 1, None), - ); - - check_iterator( - &mut iter, - vec![ - build_row(b"a", 1000000, 10.0, "v1", 1000, 1_000_000), - build_row(b"b", 1000000, 10.0, "v2", 1000, 1_000_000), - build_row(b"y", 1000000, 10.0, "v3", 1000, 1_000_000), - build_row(b"y", 1000000, 10.0, "v4", 1000, 1_000_000), - ], - ) - .await; - } - - #[tokio::test] - async fn test_row_merge_iterator_reverse() { - // first two columns are key columns - let schema = build_schema(); - - let testcases = vec![ - // (sequence, rows) - ( - 10, - vec![ - build_row(b"y", 1000001, 10.0, "v5", 1000, 1_000_000), - build_row(b"y", 1000000, 10.0, "v4", 1000, 1_000_000), - ], - ), - ( - 20, - vec![build_row(b"y", 1000000, 10.0, "v3", 1000, 1_000_000)], - ), - ( - 100, - vec![build_row(b"b", 1000000, 10.0, "v2", 1000, 1_000_000)], - ), - ( - 1, - vec![build_row(b"a", 1000000, 10.0, "v1", 1000, 1_000_000)], - ), - ]; - - let streams = - record_batch_stream::tests::build_sequenced_record_batch_stream(&schema, testcases); - let mut iter = MergeIterator::new( - TableId::MIN, - RequestId::next_id(), - schema.to_record_schema_with_key(), - streams, - Vec::new(), - IterOptions { batch_size: 500 }, - true, - Metrics::new(1, 1, None), - ); - - check_iterator( - &mut iter, - vec![ - build_row(b"y", 1000001, 10.0, "v5", 1000, 1_000_000), - build_row(b"y", 1000000, 10.0, "v3", 1000, 1_000_000), - build_row(b"y", 1000000, 10.0, "v4", 1000, 1_000_000), - build_row(b"b", 1000000, 10.0, "v2", 1000, 1_000_000), - build_row(b"a", 1000000, 10.0, "v1", 1000, 1_000_000), - ], - ) - .await; - } -} diff --git a/src/analytic_engine/src/row_iter/mod.rs b/src/analytic_engine/src/row_iter/mod.rs deleted file mode 100644 index 0a76cfc033..0000000000 --- a/src/analytic_engine/src/row_iter/mod.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Iterators for row. - -use async_stream::try_stream; -use async_trait::async_trait; -use common_types::{record_batch::FetchedRecordBatch, schema::RecordSchemaWithKey}; -use generic_error::BoxError; - -use crate::sst::writer::RecordBatchStream; - -pub mod chain; -pub mod dedup; -pub mod merge; -pub mod record_batch_stream; -#[cfg(test)] -pub mod tests; - -#[derive(Debug, Clone)] -pub struct IterOptions { - pub batch_size: usize, -} - -/// The iterator for reading RecordBatch from a table. -/// -/// The `schema()` should be the same as the RecordBatch from `read()`. -/// The reader is exhausted if the `read()` returns the `Ok(None)`. -#[async_trait] -pub trait FetchedRecordBatchIterator: Send { - type Error: std::error::Error + Send + Sync + 'static; - - fn schema(&self) -> &RecordSchemaWithKey; - - async fn next_batch(&mut self) -> std::result::Result, Self::Error>; -} - -pub fn record_batch_with_key_iter_to_stream( - mut iter: I, -) -> RecordBatchStream { - let record_batch_stream = try_stream! { - while let Some(batch) = iter.next_batch().await.box_err().transpose() { - yield batch?; - } - }; - Box::new(Box::pin(record_batch_stream)) -} diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs deleted file mode 100644 index 49c41f2432..0000000000 --- a/src/analytic_engine/src/row_iter/record_batch_stream.rs +++ /dev/null @@ -1,400 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - ops::{Bound, Not}, - sync::Arc, - time::Instant, -}; - -use arrow::{ - array::BooleanArray, - datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, -}; -use common_types::{ - projected_schema::RowProjectorBuilder, record_batch::FetchedRecordBatch, schema::RecordSchema, - SequenceNumber, -}; -use datafusion::{ - common::ToDFSchema, - error::DataFusionError, - optimizer::utils::conjunction, - physical_expr::{self, execution_props::ExecutionProps}, - physical_plan::PhysicalExpr, -}; -use futures::stream::{self, StreamExt}; -use generic_error::{BoxError, GenericResult}; -use itertools::Itertools; -use macros::define_result; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::{ - predicate::{Predicate, PredicateRef}, - table::TableId, -}; -use trace_metric::MetricsCollector; - -use crate::{ - memtable::{MemTableRef, ScanContext, ScanRequest}, - prefetchable_stream::{NoopPrefetcher, PrefetchableStream, PrefetchableStreamExt}, - space::SpaceId, - sst::{ - factory::{ - self, FactoryRef as SstFactoryRef, ObjectStorePickerRef, SstReadHint, SstReadOptions, - }, - file::FileHandle, - }, - table::sst_util, -}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Failed to create sst reader, err:{:?}", source,))] - CreateSstReader { source: factory::Error }, - - #[snafu(display("Fail to read sst meta, err:{}", source))] - ReadSstMeta { source: crate::sst::reader::Error }, - - #[snafu(display("Fail to read sst data, err:{}", source))] - ReadSstData { source: crate::sst::reader::Error }, - - #[snafu(display("Fail to scan memtable, err:{}", source))] - ScanMemtable { source: crate::memtable::Error }, - - #[snafu(display( - "Fail to execute filter expression, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - FilterExec { - source: DataFusionError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Fail to downcast boolean array, actual data type:{:?}.\nBacktrace:\n{}", - data_type, - backtrace - ))] - DowncastBooleanArray { - data_type: ArrowDataType, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to get datafusion schema, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - DatafusionSchema { - source: DataFusionError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to generate datafusion physical expr, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - DatafusionExpr { - source: DataFusionError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to select from record batch, err:{}", source))] - SelectBatchData { - source: common_types::record_batch::Error, - }, - - #[snafu(display( - "Timeout when read record batch, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - Timeout { - source: tokio::time::error::Elapsed, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -// TODO(yingwen): Can we move sequence to FetchedRecordBatch and remove this -// struct? But what is the sequence after merge? -#[derive(Debug)] -pub struct SequencedRecordBatch { - pub record_batch: FetchedRecordBatch, - pub sequence: SequenceNumber, -} - -impl SequencedRecordBatch { - #[inline] - pub fn num_rows(&self) -> usize { - self.record_batch.num_rows() - } -} - -pub type SequencedRecordBatchRes = GenericResult; -pub type BoxedPrefetchableRecordBatchStream = - Box>; - -/// Filter the `sequenced_record_batch` according to the `predicate`. -fn filter_record_batch( - mut sequenced_record_batch: SequencedRecordBatch, - predicate: Arc, -) -> Result> { - let record_batch = sequenced_record_batch.record_batch.as_arrow_record_batch(); - let filter_array = predicate - .evaluate(record_batch) - .map(|v| v.into_array(record_batch.num_rows())) - .context(FilterExec)? - .context(FilterExec)?; - let selected_rows = filter_array - .as_any() - .downcast_ref::() - .context(DowncastBooleanArray { - data_type: filter_array.as_ref().data_type().clone(), - })?; - - sequenced_record_batch - .record_batch - .select_data(selected_rows) - .context(SelectBatchData)?; - - sequenced_record_batch - .record_batch - .is_empty() - .not() - .then_some(Ok(sequenced_record_batch)) - .transpose() -} - -/// Filter the sequenced record batch stream by applying the `predicate`. -pub fn filter_stream( - origin_stream: BoxedPrefetchableRecordBatchStream, - input_schema: ArrowSchemaRef, - predicate: &Predicate, -) -> Result { - let filter = match conjunction(predicate.exprs().to_owned()) { - Some(filter) => filter, - None => return Ok(origin_stream), - }; - - let input_df_schema = input_schema - .clone() - .to_dfschema() - .context(DatafusionSchema)?; - let execution_props = ExecutionProps::new(); - let predicate = physical_expr::create_physical_expr( - &filter, - &input_df_schema, - input_schema.as_ref(), - &execution_props, - ) - .context(DatafusionExpr)?; - - let stream = - origin_stream.filter_map(move |sequence_record_batch| match sequence_record_batch { - Ok(v) => filter_record_batch(v, predicate.clone()) - .box_err() - .transpose(), - Err(e) => Some(Err(e)), - }); - - Ok(Box::new(stream)) -} - -/// Build filtered (by `predicate`) [SequencedRecordBatchStream] from a -/// memtable. -pub fn filtered_stream_from_memtable( - memtable: &MemTableRef, - ctx: &MemtableStreamContext, - metrics_collector: Option, -) -> Result { - stream_from_memtable(memtable, ctx, metrics_collector).and_then(|origin_stream| { - filter_stream( - origin_stream, - ctx.fetched_schema.to_arrow_schema_ref(), - &ctx.predicate, - ) - }) -} - -/// Build [SequencedRecordBatchStream] from a memtable. -pub fn stream_from_memtable( - memtable: &MemTableRef, - ctx: &MemtableStreamContext, - metrics_collector: Option, -) -> Result { - let scan_ctx = ScanContext { - deadline: ctx.deadline, - ..Default::default() - }; - let max_seq = memtable.last_sequence(); - let fetched_cols = ctx - .fetched_schema - .columns() - .iter() - .format_with(",", |col, f| f(&format_args!("{}", col.name))); - let scan_memtable_desc = format!("scan_memtable_{max_seq}, fetched_columns:[{fetched_cols}]",); - let metrics_collector = metrics_collector.map(|v| v.span(scan_memtable_desc)); - let scan_req = ScanRequest { - start_user_key: Bound::Unbounded, - end_user_key: Bound::Unbounded, - sequence: max_seq, - row_projector_builder: ctx.row_projector_builder.clone(), - need_dedup: ctx.need_dedup, - reverse: ctx.reverse, - metrics_collector, - time_range: ctx.predicate.time_range(), - }; - - let iter = memtable.scan(scan_ctx, scan_req).context(ScanMemtable)?; - let stream = stream::iter(iter).map(move |v| { - v.map(|record_batch| SequencedRecordBatch { - record_batch, - sequence: max_seq, - }) - .box_err() - }); - - Ok(Box::new(NoopPrefetcher(Box::new(stream)))) -} - -pub struct MemtableStreamContext { - pub row_projector_builder: RowProjectorBuilder, - pub fetched_schema: RecordSchema, - pub predicate: PredicateRef, - pub need_dedup: bool, - pub reverse: bool, - pub deadline: Option, -} - -/// Build the filtered by `sst_read_options.predicate` -/// [SequencedRecordBatchStream] from a sst. -pub async fn filtered_stream_from_sst_file( - space_id: SpaceId, - table_id: TableId, - sst_file: &FileHandle, - sst_factory: &SstFactoryRef, - store_picker: &ObjectStorePickerRef, - ctx: &SstStreamContext, - metrics_collector: Option, -) -> Result { - stream_from_sst_file( - space_id, - table_id, - sst_file, - sst_factory, - store_picker, - ctx, - metrics_collector, - ) - .await - .and_then(|origin_stream| { - filter_stream( - origin_stream, - ctx.fetched_schema.to_arrow_schema_ref(), - &ctx.sst_read_options.predicate, - ) - }) -} - -/// Build the [SequencedRecordBatchStream] from a sst. -pub async fn stream_from_sst_file( - space_id: SpaceId, - table_id: TableId, - sst_file: &FileHandle, - sst_factory: &SstFactoryRef, - store_picker: &ObjectStorePickerRef, - ctx: &SstStreamContext, - metrics_collector: Option, -) -> Result { - sst_file.read_meter().mark(); - let path = sst_util::new_sst_file_path(space_id, table_id, sst_file.id()); - - let read_hint = SstReadHint { - file_size: Some(sst_file.size() as usize), - file_format: Some(sst_file.storage_format()), - }; - let fetched_cols = ctx - .fetched_schema - .columns() - .iter() - .format_with(",", |col, f| f(&format_args!("{}", col.name))); - let scan_sst_desc = format!( - "scan_sst_{}, fetched_columns:[{fetched_cols}]", - sst_file.id() - ); - let metrics_collector = metrics_collector.map(|v| v.span(scan_sst_desc)); - let mut sst_reader = sst_factory - .create_reader( - &path, - &ctx.sst_read_options, - read_hint, - store_picker, - metrics_collector, - ) - .await - .context(CreateSstReader)?; - let meta = sst_reader.meta_data().await.context(ReadSstMeta)?; - let max_seq = meta.max_sequence(); - let stream = sst_reader.read().await.context(ReadSstData)?; - let stream = stream.map(move |v| { - v.map(|record_batch| SequencedRecordBatch { - record_batch, - sequence: max_seq, - }) - .box_err() - }); - - Ok(Box::new(stream)) -} - -pub struct SstStreamContext { - pub sst_read_options: SstReadOptions, - pub fetched_schema: RecordSchema, -} - -#[cfg(test)] -pub mod tests { - use common_types::{row::Row, schema::Schema}; - - use super::*; - use crate::row_iter; - - /// Build [SequencedRecordBatchStream] from the sequenced rows. - pub fn build_sequenced_record_batch_stream( - schema: &Schema, - batches: Vec<(SequenceNumber, Vec)>, - ) -> Vec { - batches - .into_iter() - .map(|(seq, rows)| { - let batch = SequencedRecordBatch { - record_batch: row_iter::tests::build_fetched_record_batch_with_key( - schema.clone(), - rows, - ), - sequence: seq, - }; - let stream = Box::new(stream::iter(vec![Ok(batch)])); - Box::new(NoopPrefetcher(stream as _)) as BoxedPrefetchableRecordBatchStream - }) - .collect() - } -} diff --git a/src/analytic_engine/src/row_iter/tests.rs b/src/analytic_engine/src/row_iter/tests.rs deleted file mode 100644 index efa0a5617c..0000000000 --- a/src/analytic_engine/src/row_iter/tests.rs +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use async_trait::async_trait; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjector}, - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - row::{ - contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, - Row, - }, - schema::{IndexInWriterSchema, RecordSchemaWithKey, Schema}, -}; -use macros::define_result; -use snafu::Snafu; - -use crate::row_iter::FetchedRecordBatchIterator; - -#[derive(Debug, Snafu)] -pub enum Error {} - -define_result!(Error); - -pub struct VectorIterator { - schema: RecordSchemaWithKey, - items: Vec>, - idx: usize, -} - -impl VectorIterator { - pub fn new(schema: RecordSchemaWithKey, items: Vec) -> Self { - Self { - schema, - items: items.into_iter().map(Some).collect(), - idx: 0, - } - } -} - -#[async_trait] -impl FetchedRecordBatchIterator for VectorIterator { - type Error = Error; - - fn schema(&self) -> &RecordSchemaWithKey { - &self.schema - } - - async fn next_batch(&mut self) -> Result> { - if self.idx == self.items.len() { - return Ok(None); - } - - let ret = Ok(self.items[self.idx].take()); - self.idx += 1; - - ret - } -} - -pub fn build_fetched_record_batch_with_key(schema: Schema, rows: Vec) -> FetchedRecordBatch { - assert!(schema.num_columns() > 1); - let projection: Vec = (0..schema.num_columns()).collect(); - let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let fetched_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); - let fetched_schema = fetched_schema.to_record_schema(); - let table_schema = projected_schema.table_schema(); - let row_projector = RowProjector::new( - &fetched_schema, - Some(primary_key_indexes), - table_schema, - table_schema, - ) - .unwrap(); - let primary_key_indexes = row_projector - .primary_key_indexes() - .map(|idxs| idxs.to_vec()); - let mut builder = - FetchedRecordBatchBuilder::with_capacity(fetched_schema, primary_key_indexes, 2); - let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); - - let mut buf = Vec::new(); - for row in rows { - let mut writer = ContiguousRowWriter::new(&mut buf, &schema, &index_in_writer); - - writer.write_row(&row).unwrap(); - - let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &row_projector); - builder - .append_projected_contiguous_row(&projected_row) - .unwrap(); - } - builder.build().unwrap() -} - -pub async fn check_iterator(iter: &mut T, expected_rows: Vec) { - let mut visited_rows = 0; - while let Some(batch) = iter.next_batch().await.unwrap() { - for row_idx in 0..batch.num_rows() { - assert_eq!(batch.clone_row_at(row_idx), expected_rows[visited_rows]); - visited_rows += 1; - } - } - - assert_eq!(visited_rows, expected_rows.len()); -} diff --git a/src/analytic_engine/src/sampler.rs b/src/analytic_engine/src/sampler.rs deleted file mode 100644 index 691d646add..0000000000 --- a/src/analytic_engine/src/sampler.rs +++ /dev/null @@ -1,618 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Segment duration sampler. - -use std::{ - collections::HashSet, - sync::{Arc, Mutex}, - time::Duration, -}; - -use common_types::{ - datum::DatumView, - row::Row, - schema::Schema, - time::{TimeRange, Timestamp}, -}; -use hyperloglog::HyperLogLog; -use macros::define_result; -use snafu::{ensure, Backtrace, Snafu}; - -use crate::table_options; - -/// Initial size of timestamps set. -const INIT_CAPACITY: usize = 1000; -const HOUR_MS: u64 = 3600 * 1000; -const DAY_MS: u64 = 24 * HOUR_MS; -const AVAILABLE_DURATIONS: [u64; 8] = [ - 2 * HOUR_MS, - DAY_MS, - 7 * DAY_MS, - 30 * DAY_MS, - 180 * DAY_MS, - 360 * DAY_MS, - 5 * 360 * DAY_MS, - 10 * 360 * DAY_MS, -]; -const INTERVAL_RATIO: f64 = 0.9; -/// Expected points per timeseries in a segment, used to pick a proper segment -/// duration. -const POINTS_PER_SERIES: u64 = 100; -/// Max timestamp that wont overflow even using max duration. -const MAX_TIMESTAMP_MS_FOR_DURATION: i64 = - i64::MAX - 2 * AVAILABLE_DURATIONS[AVAILABLE_DURATIONS.len() - 1] as i64; -/// Minimun sample timestamps to compute duration. -const MIN_SAMPLES: usize = 2; -const HLL_ERROR_RATE: f64 = 0.01; -pub const MAX_SUGGEST_PRIMARY_KEY_NUM: usize = 2; - -#[derive(Debug, Snafu)] -#[snafu(display( - "Invalid timestamp to collect, timestamp:{:?}.\nBacktrace:\n{}", - timestamp, - backtrace -))] -pub struct Error { - timestamp: Timestamp, - backtrace: Backtrace, -} - -define_result!(Error); - -/// Segment duration sampler. -/// -/// Collects all timestamps and then yield a suggested segment duration to hold -/// all data with similar timestamp interval. -pub trait DurationSampler { - /// Collect a timestamp. - fn collect(&self, timestamp: Timestamp) -> Result<()>; - - /// Returns a suggested duration to partition the timestamps or default - /// duration if no enough timestamp has been sampled. - /// - /// Note that this method may be invoked more than once. - fn suggest_duration(&self) -> Duration; - - /// Returns a vector of time range with suggested duration that can hold all - /// timestamps collected by this sampler. - fn ranges(&self) -> Vec; - - // TODO(yingwen): Memory usage. -} - -pub type SamplerRef = Arc; - -struct State { - /// Deduplicated timestamps. - deduped_timestamps: HashSet, - /// Cached suggested duration. - duration: Option, - /// Sorted timestamps cache, empty if `duration` is None. - sorted_timestamps: Vec, -} - -impl State { - fn clear_cache(&mut self) { - self.duration = None; - self.sorted_timestamps.clear(); - } -} - -pub struct DefaultSampler { - state: Mutex, -} - -impl Default for DefaultSampler { - fn default() -> Self { - Self { - state: Mutex::new(State { - deduped_timestamps: HashSet::with_capacity(INIT_CAPACITY), - duration: None, - sorted_timestamps: Vec::new(), - }), - } - } -} - -impl DurationSampler for DefaultSampler { - fn collect(&self, timestamp: Timestamp) -> Result<()> { - ensure!( - timestamp.as_i64() < MAX_TIMESTAMP_MS_FOR_DURATION, - Context { timestamp } - ); - - let mut state = self.state.lock().unwrap(); - state.deduped_timestamps.insert(timestamp); - state.clear_cache(); - - Ok(()) - } - - fn suggest_duration(&self) -> Duration { - if let Some(v) = self.duration() { - return v; - } - - let timestamps = self.compute_sorted_timestamps(); - let picked = match evaluate_interval(×tamps) { - Some(interval) => pick_duration(interval), - None => table_options::DEFAULT_SEGMENT_DURATION, - }; - - { - // Cache the picked duration. - let mut state = self.state.lock().unwrap(); - state.duration = Some(picked); - state.sorted_timestamps = timestamps; - } - - picked - } - - fn ranges(&self) -> Vec { - let duration = self.suggest_duration(); - let sorted_timestamps = self.cached_sorted_timestamps(); - // This type hint is needed to make `ranges.last()` work. - let mut ranges: Vec = Vec::new(); - - for ts in sorted_timestamps { - if let Some(range) = ranges.last() { - if range.contains(ts) { - continue; - } - } - - // collect() ensures timestamp won't overflow. - let range = TimeRange::bucket_of(ts, duration).unwrap(); - ranges.push(range); - } - - ranges - } -} - -impl DefaultSampler { - fn cached_sorted_timestamps(&self) -> Vec { - self.state.lock().unwrap().sorted_timestamps.clone() - } - - fn compute_sorted_timestamps(&self) -> Vec { - let mut timestamps: Vec<_> = { - let state = self.state.lock().unwrap(); - state.deduped_timestamps.iter().copied().collect() - }; - - timestamps.sort_unstable(); - - timestamps - } - - fn duration(&self) -> Option { - self.state.lock().unwrap().duration - } -} - -fn evaluate_interval(sorted_timestamps: &[Timestamp]) -> Option { - if sorted_timestamps.len() < MIN_SAMPLES { - return None; - } - - let mut intervals = Vec::with_capacity(sorted_timestamps.len()); - for i in 0..sorted_timestamps.len() - 1 { - let current = sorted_timestamps[i]; - let next = sorted_timestamps[i + 1]; - let interval = next.as_i64() - current.as_i64(); - intervals.push(interval); - } - - intervals.sort_unstable(); - - let mut index = (intervals.len() as f64 * INTERVAL_RATIO) as usize; - if index > 1 { - index -= 1; - }; - let selected = intervals[index]; - // Interval should larger than 0. - assert!(selected > 0); - - Some(selected as u64) -} - -fn pick_duration(interval: u64) -> Duration { - let scaled_interval = interval.checked_mul(POINTS_PER_SERIES).unwrap_or(u64::MAX); - for du_ms in AVAILABLE_DURATIONS { - if du_ms > scaled_interval { - return Duration::from_millis(du_ms); - } - } - - // No duration larger than scaled interval, returns the largest duration. - let du_ms = AVAILABLE_DURATIONS[AVAILABLE_DURATIONS.len() - 1]; - - Duration::from_millis(du_ms) -} - -#[derive(Clone)] -struct DistinctCounter { - hll: HyperLogLog, -} - -impl DistinctCounter { - fn new() -> Self { - Self { - hll: HyperLogLog::new(HLL_ERROR_RATE), - } - } - - fn insert(&mut self, bs: &DatumView) { - self.hll.insert(bs); - } - - fn len(&self) -> f64 { - self.hll.len() - } -} - -/// PrimaryKeySampler will sample written rows, and suggest new primary keys -/// based on column cardinality, column with lower cardinality should come first -/// since they are beneficial for sst prune. -/// -/// For special columns like tsid/timestmap, we ignore sampling them to save -/// CPU, and append to primary keys directly at last. -#[derive(Clone)] -pub struct PrimaryKeySampler { - // Currently all columns will share one big lock, which means decrease perf when we - // remove lock at the beginning of write process. - // This maybe acceptable, since this is only used in sampling memtable. - column_counters: Arc>>>, - timestamp_index: usize, - tsid_index: Option, - max_suggest_num: usize, - num_columns: usize, -} - -impl PrimaryKeySampler { - pub fn new(schema: &Schema, max_suggest_num: usize) -> Self { - let timestamp_index = schema.timestamp_index(); - let tsid_index = schema.index_of_tsid(); - let column_counters = schema - .columns() - .iter() - .enumerate() - .map(|(idx, col)| { - if col.data_type.is_timestamp() { - return None; - } - - if let Some(tsid_idx) = tsid_index { - if idx == tsid_idx { - return None; - } - } - - if col.data_type.is_key_kind() { - Some(DistinctCounter::new()) - } else { - None - } - }) - .collect::>(); - let num_columns = column_counters.len(); - let column_counters = Arc::new(Mutex::new(column_counters)); - - Self { - column_counters, - tsid_index, - timestamp_index, - max_suggest_num, - num_columns, - } - } - - pub fn collect(&self, row: &Row) { - assert_eq!(row.num_columns(), self.num_columns); - - let mut column_counters = self.column_counters.lock().unwrap(); - for (datum, counter) in row.iter().zip(column_counters.iter_mut()) { - if let Some(counter) = counter { - let view = datum.as_view(); - counter.insert(&view); - } - } - } - - pub fn suggest(&self) -> Vec { - let column_counters = self.column_counters.lock().unwrap(); - let mut col_idx_and_counts = column_counters - .iter() - .enumerate() - .filter_map(|(col_idx, values)| values.as_ref().map(|values| (col_idx, values.len()))) - .collect::>(); - - // sort asc and take first N columns as primary keys - col_idx_and_counts.sort_by(|a, b| a.1.total_cmp(&b.1)); - let mut pk_indexes = col_idx_and_counts - .iter() - .take(self.max_suggest_num) - .map(|v| v.0) - .collect::>(); - - if let Some(tsid_idx) = self.tsid_index { - pk_indexes.push(tsid_idx); - } - pk_indexes.push(self.timestamp_index); - - pk_indexes - } -} - -#[cfg(test)] -mod tests { - use common_types::tests::{build_row_for_cpu, build_schema_for_cpu}; - - use super::*; - - const SEC_MS: u64 = 1000; - const MIN_MS: u64 = 60 * SEC_MS; - - #[test] - fn test_pick_duration() { - let cases = [ - (1, 2 * HOUR_MS), - (5 * SEC_MS, 2 * HOUR_MS), - (15 * SEC_MS, 2 * HOUR_MS), - (MIN_MS, 2 * HOUR_MS), - (5 * MIN_MS, DAY_MS), - (10 * MIN_MS, DAY_MS), - (30 * MIN_MS, 7 * DAY_MS), - (HOUR_MS, 7 * DAY_MS), - (4 * HOUR_MS, 30 * DAY_MS), - (8 * HOUR_MS, 180 * DAY_MS), - (DAY_MS, 180 * DAY_MS), - (3 * DAY_MS, 360 * DAY_MS), - (7 * DAY_MS, 5 * 360 * DAY_MS), - (30 * DAY_MS, 10 * 360 * DAY_MS), - (360 * DAY_MS, 10 * 360 * DAY_MS), - (10 * 360 * DAY_MS, 10 * 360 * DAY_MS), - (20 * 360 * DAY_MS, 10 * 360 * DAY_MS), - ]; - - for (i, (interval, expect)) in cases.iter().enumerate() { - assert_eq!( - *expect, - pick_duration(*interval).as_millis() as u64, - "Case {i}" - ); - } - } - - #[test] - fn test_empty_sampler() { - let sampler = DefaultSampler::default(); - - assert_eq!( - table_options::DEFAULT_SEGMENT_DURATION, - sampler.suggest_duration() - ); - assert!(sampler.ranges().is_empty()); - } - - #[test] - fn test_one_sample() { - let sampler = DefaultSampler::default(); - - sampler.collect(Timestamp::new(0)).unwrap(); - - assert_eq!( - table_options::DEFAULT_SEGMENT_DURATION, - sampler.suggest_duration() - ); - let time_range = - TimeRange::bucket_of(Timestamp::new(0), table_options::DEFAULT_SEGMENT_DURATION) - .unwrap(); - assert_eq!(&[time_range], &sampler.ranges()[..]); - } - - #[test] - fn test_all_sample_same() { - let sampler = DefaultSampler::default(); - - let ts = Timestamp::now(); - for _ in 0..5 { - sampler.collect(ts).unwrap(); - } - - assert_eq!( - table_options::DEFAULT_SEGMENT_DURATION, - sampler.suggest_duration() - ); - let time_range = TimeRange::bucket_of(ts, table_options::DEFAULT_SEGMENT_DURATION).unwrap(); - assert_eq!(&[time_range], &sampler.ranges()[..]); - } - - #[test] - fn test_collect_invalid() { - let sampler = DefaultSampler::default(); - - assert!(sampler - .collect(Timestamp::new(MAX_TIMESTAMP_MS_FOR_DURATION - 1)) - .is_ok()); - assert!(sampler - .collect(Timestamp::new(MAX_TIMESTAMP_MS_FOR_DURATION)) - .is_err()); - } - - #[test] - fn test_sampler_cache() { - let sampler = DefaultSampler::default(); - - let ts1 = Timestamp::now(); - for i in 0..3 { - sampler - .collect(Timestamp::new(ts1.as_i64() + i * SEC_MS as i64)) - .unwrap(); - } - - assert_eq!( - table_options::DEFAULT_SEGMENT_DURATION, - sampler.suggest_duration() - ); - let time_range1 = - TimeRange::bucket_of(ts1, table_options::DEFAULT_SEGMENT_DURATION).unwrap(); - assert_eq!(&[time_range1], &sampler.ranges()[..]); - - // A new timestamp is sampled. - let ts2 = Timestamp::new(ts1.as_i64() + DAY_MS as i64); - sampler.collect(ts2).unwrap(); - - assert!(sampler.state.lock().unwrap().duration.is_none()); - assert!(sampler.state.lock().unwrap().sorted_timestamps.is_empty()); - - assert_eq!( - table_options::DEFAULT_SEGMENT_DURATION, - sampler.suggest_duration() - ); - let time_range2 = - TimeRange::bucket_of(ts2, table_options::DEFAULT_SEGMENT_DURATION).unwrap(); - assert_eq!(&[time_range1, time_range2], &sampler.ranges()[..]); - } - - fn test_suggest_duration_and_ranges_case( - timestamps: &[i64], - duration: u64, - ranges: &[(i64, i64)], - ) { - let sampler = DefaultSampler::default(); - - for ts in timestamps { - sampler.collect(Timestamp::new(*ts)).unwrap(); - } - - assert_eq!(Duration::from_millis(duration), sampler.suggest_duration()); - - let suggested_ranges = sampler.ranges(); - for (range, suggested_range) in ranges.iter().zip(suggested_ranges) { - assert_eq!(range.0, suggested_range.inclusive_start().as_i64()); - assert_eq!(range.1, suggested_range.exclusive_end().as_i64()); - } - } - - #[test] - fn test_suggest_duration_and_ranges() { - test_suggest_duration_and_ranges_case( - // Intervals: 3, 5 - &[100, 103, 108], - 2 * HOUR_MS, - &[(0, 2 * HOUR_MS as i64)], - ); - - let now = 1672502400000i64; - let now_ts = Timestamp::new(now); - let sec_ms_i64 = SEC_MS as i64; - - let bucket = TimeRange::bucket_of(now_ts, Duration::from_millis(2 * HOUR_MS)).unwrap(); - let expect_range = ( - bucket.inclusive_start().as_i64(), - bucket.exclusive_end().as_i64(), - ); - test_suggest_duration_and_ranges_case( - // Intervals: 5s, 5s, 5s, 5s, 100s, - &[ - now, - now + 5 * sec_ms_i64, - now + 2 * 5 * sec_ms_i64, - now + 3 * 5 * sec_ms_i64, - now + 4 * 5 * sec_ms_i64, - now + 4 * 5 * sec_ms_i64 + 100 * sec_ms_i64, - ], - 2 * HOUR_MS, - &[expect_range], - ); - - // Same with previous case, but shuffle the input timestamps. - test_suggest_duration_and_ranges_case( - &[ - now + 3 * 5 * sec_ms_i64, - now, - now + 5 * sec_ms_i64, - now + 4 * 5 * sec_ms_i64, - now + 2 * 5 * sec_ms_i64, - now + 4 * 5 * sec_ms_i64 + 100 * sec_ms_i64, - ], - 2 * HOUR_MS, - &[expect_range], - ); - - test_suggest_duration_and_ranges_case( - // Intervals: nine 5s and one 8h - &[ - now + 5 * 5 * sec_ms_i64 + 8 * HOUR_MS as i64, - now, - now + 5 * sec_ms_i64, - now + 2 * 5 * sec_ms_i64, - now + 7 * 5 * sec_ms_i64 + 8 * HOUR_MS as i64, - now + 3 * 5 * sec_ms_i64, - now + 4 * 5 * sec_ms_i64, - now + 4 * 5 * sec_ms_i64 + 8 * HOUR_MS as i64, - now + 6 * 5 * sec_ms_i64 + 8 * HOUR_MS as i64, - now + 8 * 5 * sec_ms_i64 + 8 * HOUR_MS as i64, - now + 9 * 5 * sec_ms_i64 + 8 * HOUR_MS as i64, - ], - 2 * HOUR_MS, - &[ - expect_range, - ( - expect_range.0 + 8 * HOUR_MS as i64, - expect_range.1 + 8 * HOUR_MS as i64, - ), - ], - ); - } - - #[test] - fn test_suggest_primary_keys() { - let schema = build_schema_for_cpu(); - // By default, primary keys are first two columns. - assert_eq!(&[0, 1], schema.primary_key_indexes()); - - let collect_and_suggest = |rows: Vec<(u64, i64, &str, &str, i8, f32)>, expected| { - let sampler = PrimaryKeySampler::new(&schema, 2); - for row in rows { - let row = build_row_for_cpu(row.0, row.1, row.2, row.3, row.4, row.5); - sampler.collect(&row); - } - assert_eq!(expected, sampler.suggest()); - }; - - let rows = vec![ - (1, 100, "horaedb", "a", 1, 1.0), - (2, 101, "horaedb", "a", 2, 1.0), - (3, 102, "horaedb", "a", 3, 1.0), - (4, 102, "horaedb", "b", 4, 1.0), - ]; - collect_and_suggest(rows, vec![2, 3, 0, 1]); - - let rows = vec![ - (1, 100, "horaedb", "a", 1, 1.0), - (2, 100, "horaedb", "a", 2, 1.0), - (3, 100, "horaedb", "a", 3, 1.0), - (4, 100, "horaedb", "b", 4, 1.0), - ]; - collect_and_suggest(rows, vec![2, 3, 0, 1]); - } -} diff --git a/src/analytic_engine/src/setup.rs b/src/analytic_engine/src/setup.rs deleted file mode 100644 index 4075e250db..0000000000 --- a/src/analytic_engine/src/setup.rs +++ /dev/null @@ -1,284 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Setup the analytic engine - -use std::{num::NonZeroUsize, path::Path, pin::Pin, sync::Arc}; - -use futures::Future; -use macros::define_result; -use meta_client::MetaClientRef; -use object_store::{ - aliyun, - config::{ObjectStoreOptions, StorageOptions}, - disk_cache::DiskCacheStore, - local_file, - mem_cache::{MemCache, MemCacheStore}, - metrics::StoreWithMetrics, - prefix::StoreWithPrefix, - s3, ObjectStoreRef, -}; -use snafu::{ResultExt, Snafu}; -use table_engine::engine::{EngineRuntimes, TableEngineRef}; -use wal::manager::{OpenedWals, WalManagerRef}; - -use crate::{ - compaction::runner::CompactionRunnerRef, - context::OpenContext, - engine::TableEngineImpl, - instance::open::{InstanceContext, ManifestStorages}, - sst::{ - factory::{FactoryImpl, ObjectStorePicker, ObjectStorePickerRef, ReadFrequency}, - meta_data::cache::{MetaCache, MetaCacheRef}, - }, - Config, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to open engine instance, err:{}", source))] - OpenInstance { - source: crate::instance::engine::Error, - }, - - #[snafu(display("Failed to execute in runtime, err:{}", source))] - RuntimeExec { source: runtime::Error }, - - #[snafu(display("Failed to open object store, err:{}", source))] - OpenObjectStore { - source: object_store::ObjectStoreError, - }, - - #[snafu(display("Failed to access object store by openDal , err:{}", source))] - OpenDal { source: object_store::OpenDalError }, - - #[snafu(display("Failed to create dir for {}, err:{}", path, source))] - CreateDir { - path: String, - source: std::io::Error, - }, - - #[snafu(display("Failed to create mem cache, err:{}", source))] - OpenMemCache { - source: object_store::mem_cache::Error, - }, -} - -define_result!(Error); - -const STORE_DIR_NAME: &str = "store"; -const DISK_CACHE_DIR_NAME: &str = "sst_cache"; - -pub struct TableEngineContext { - pub table_engine: TableEngineRef, - // TODO: unused now, will be used in remote compaction. - pub local_compaction_runner: Option, -} - -/// Builder for [TableEngine]. -/// -/// [TableEngine]: table_engine::engine::TableEngine -#[derive(Clone)] -pub struct EngineBuilder<'a> { - pub config: &'a Config, - pub engine_runtimes: Arc, - pub opened_wals: OpenedWals, - // Meta client is needed when compaction offload with remote node picker. - pub meta_client: Option, -} - -impl<'a> EngineBuilder<'a> { - pub async fn build(self) -> Result { - let opened_storages = - open_storage(self.config.storage.clone(), self.engine_runtimes.clone()).await?; - let manifest_storages = ManifestStorages { - wal_manager: self.opened_wals.manifest_wal.clone(), - oss_storage: opened_storages.default_store().clone(), - }; - - let InstanceContext { - instance, - local_compaction_runner, - } = build_instance_context( - self.config.clone(), - self.engine_runtimes, - self.opened_wals.data_wal, - manifest_storages, - Arc::new(opened_storages), - self.meta_client, - ) - .await?; - - let table_engine = Arc::new(TableEngineImpl::new(instance)); - - Ok(TableEngineContext { - table_engine, - local_compaction_runner, - }) - } -} - -async fn build_instance_context( - config: Config, - engine_runtimes: Arc, - wal_manager: WalManagerRef, - manifest_storages: ManifestStorages, - store_picker: ObjectStorePickerRef, - meta_client: Option, -) -> Result { - let meta_cache: Option = config - .sst_meta_cache_cap - .map(|cap| Arc::new(MetaCache::new(cap))); - - let open_ctx = OpenContext { - config, - runtimes: engine_runtimes, - meta_cache, - }; - - let instance_ctx = InstanceContext::new( - open_ctx, - manifest_storages, - wal_manager, - store_picker, - Arc::new(FactoryImpl), - meta_client.clone(), - ) - .await - .context(OpenInstance)?; - - Ok(instance_ctx) -} - -#[derive(Debug)] -struct OpenedStorages { - default_store: ObjectStoreRef, - store_with_readonly_cache: ObjectStoreRef, -} - -impl ObjectStorePicker for OpenedStorages { - fn default_store(&self) -> &ObjectStoreRef { - &self.default_store - } - - fn pick_by_freq(&self, freq: ReadFrequency) -> &ObjectStoreRef { - match freq { - ReadFrequency::Once => &self.store_with_readonly_cache, - ReadFrequency::Frequent => &self.default_store, - } - } -} - -// Build store in multiple layer, access speed decrease in turn. -// MemCacheStore → DiskCacheStore → real ObjectStore(OSS/S3...) -// MemCacheStore(ReadOnly) ↑ -// ```plaintext -// +-------------------------------+ -// | MemCacheStore | -// | +-----------------------+ -// | | DiskCacheStore | -// | | +----------------+ -// | | | | -// | | | OSS/S3.... | -// +-------+------+----------------+ -// ``` -fn open_storage( - opts: StorageOptions, - engine_runtimes: Arc, -) -> Pin> + Send>> { - Box::pin(async move { - let mut store = match opts.object_store { - ObjectStoreOptions::Local(mut local_opts) => { - let data_path = Path::new(&local_opts.data_dir); - let sst_path = data_path - .join(STORE_DIR_NAME) - .to_string_lossy() - .into_owned(); - tokio::fs::create_dir_all(&sst_path) - .await - .context(CreateDir { - path: sst_path.clone(), - })?; - local_opts.data_dir = sst_path; - - let store: ObjectStoreRef = - Arc::new(local_file::try_new(&local_opts).context(OpenDal)?); - Arc::new(store) as _ - } - ObjectStoreOptions::Aliyun(aliyun_opts) => { - let store: ObjectStoreRef = - Arc::new(aliyun::try_new(&aliyun_opts).context(OpenDal)?); - let store_with_prefix = StoreWithPrefix::new(aliyun_opts.prefix, store); - Arc::new(store_with_prefix.context(OpenObjectStore)?) as _ - } - ObjectStoreOptions::S3(s3_option) => { - let store: ObjectStoreRef = Arc::new(s3::try_new(&s3_option).context(OpenDal)?); - let store_with_prefix = StoreWithPrefix::new(s3_option.prefix, store); - Arc::new(store_with_prefix.context(OpenObjectStore)?) as _ - } - }; - - store = Arc::new(StoreWithMetrics::new( - store, - engine_runtimes.io_runtime.clone(), - )); - - if opts.disk_cache_capacity.as_byte() > 0 { - let path = Path::new(&opts.disk_cache_dir).join(DISK_CACHE_DIR_NAME); - tokio::fs::create_dir_all(&path).await.context(CreateDir { - path: path.to_string_lossy().into_owned(), - })?; - - // TODO: Consider the readonly cache. - store = Arc::new( - DiskCacheStore::try_new( - path.to_string_lossy().into_owned(), - opts.disk_cache_capacity.as_byte() as usize, - opts.disk_cache_page_size.as_byte() as usize, - store, - opts.disk_cache_partition_bits, - engine_runtimes.io_runtime.clone(), - ) - .await - .context(OpenObjectStore)?, - ) as _; - } - - if opts.mem_cache_capacity.as_byte() > 0 { - let mem_cache = Arc::new( - MemCache::try_new( - opts.mem_cache_partition_bits, - NonZeroUsize::new(opts.mem_cache_capacity.as_byte() as usize).unwrap(), - ) - .context(OpenMemCache)?, - ); - let default_store = Arc::new(MemCacheStore::new(mem_cache.clone(), store.clone())) as _; - let store_with_readonly_cache = - Arc::new(MemCacheStore::new_with_readonly_cache(mem_cache, store)) as _; - Ok(OpenedStorages { - default_store, - store_with_readonly_cache, - }) - } else { - let store_with_readonly_cache = store.clone(); - Ok(OpenedStorages { - default_store: store, - store_with_readonly_cache, - }) - } - }) -} diff --git a/src/analytic_engine/src/space.rs b/src/analytic_engine/src/space.rs deleted file mode 100644 index 1696d72109..0000000000 --- a/src/analytic_engine/src/space.rs +++ /dev/null @@ -1,271 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table space -//! -//! A table space acts like a namespace of a bunch of tables, tables under -//! different space can use same table name - -use std::{ - collections::HashMap, - fmt, - sync::{Arc, RwLock}, -}; - -use sampling_cache::SamplingCachedUsize; -use table_engine::table::TableId; -use time_ext::ReadableDuration; - -use crate::{ - instance::mem_collector::{MemUsageCollector, MemUsageCollectorRef}, - table::data::{TableDataRef, TableDataSet}, -}; - -pub type SpaceId = u32; - -/// Holds references to the table data and its space -/// -/// REQUIRE: The table must belongs to the space -#[derive(Clone)] -pub struct SpaceAndTable { - /// The space of the table - space: SpaceRef, - /// Data of the table - table_data: TableDataRef, -} - -impl SpaceAndTable { - /// Create SpaceAndTable - /// - /// REQUIRE: The table must belongs to the space - pub fn new(space: SpaceRef, table_data: TableDataRef) -> Self { - // Checks table is in space - debug_assert!(space - .table_datas - .read() - .unwrap() - .find_table(&table_data.name) - .is_some()); - - Self { space, table_data } - } - - /// Get space info - #[inline] - pub fn space(&self) -> &SpaceRef { - &self.space - } - - /// Get table data - #[inline] - pub fn table_data(&self) -> &TableDataRef { - &self.table_data - } -} - -impl fmt::Debug for SpaceAndTable { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SpaceAndTable") - .field("space_id", &self.space.id) - .field("table_id", &self.table_data.id) - .field("table_name", &self.table_data.name) - .finish() - } -} - -#[derive(Debug)] -pub struct SpaceContext { - /// Catalog name - pub catalog_name: String, - /// Schema name - pub schema_name: String, -} - -/// A space can hold multiple tables -pub struct Space { - /// Space id - pub id: SpaceId, - /// Space context - pub context: SpaceContext, - - /// Data of tables in this space - /// - /// Adding table into it should acquire the space lock first, then the write - /// lock - table_datas: RwLock, - - /// If table open failed, request of this table is not allowed, otherwise - /// schema may become inconsistent. - // TODO: engine should provide a repair method to fix those failed tables. - open_failed_tables: RwLock>, - - cached_mem_size: SamplingCachedUsize, - - /// Space memtable memory usage collector - pub mem_usage_collector: MemUsageCollectorRef, - /// The maximum write buffer size used for single space. - pub write_buffer_size: usize, - /// The interval for sampling mem usage - pub mem_usage_sampling_interval: ReadableDuration, -} - -pub struct MemSizeOptions { - pub write_buffer_size: usize, - pub usage_collector: MemUsageCollectorRef, - pub size_sampling_interval: ReadableDuration, -} - -impl Space { - pub fn new(id: SpaceId, context: SpaceContext, mem_size_options: MemSizeOptions) -> Self { - Self { - id, - context, - table_datas: Default::default(), - open_failed_tables: Default::default(), - mem_usage_sampling_interval: mem_size_options.size_sampling_interval, - cached_mem_size: SamplingCachedUsize::new( - mem_size_options.size_sampling_interval.as_millis(), - ), - mem_usage_collector: Arc::new(MemUsageCollector::with_parent( - mem_size_options.usage_collector, - )), - write_buffer_size: mem_size_options.write_buffer_size, - } - } - - /// Returns true when space total memtable memory usage reaches - /// space_write_buffer_size limit. - #[inline] - pub fn should_flush_space(&self) -> bool { - self.write_buffer_size > 0 && self.memtable_memory_usage() >= self.write_buffer_size - } - - /// Find the table whose memtable consumes the most memory in the space by - /// specifying Worker. - #[inline] - pub fn find_maximum_memory_usage_table(&self) -> Option { - self.table_datas - .read() - .unwrap() - .find_maximum_memory_usage_table() - } - - #[inline] - pub fn memtable_memory_usage(&self) -> usize { - let fetch_total_memory_usage = || -> std::result::Result { - Ok(self.table_datas.read().unwrap().total_memory_usage()) - }; - self.cached_mem_size.read(fetch_total_memory_usage).unwrap() - } - - /// Insert table data into space memory state if the table is - /// absent. For internal use only - /// - /// Panic if the table has already existed. - pub(crate) fn insert_table(&self, table_data: TableDataRef) { - let success = self - .table_datas - .write() - .unwrap() - .insert_if_absent(table_data); - assert!(success); - } - - pub(crate) fn insert_open_failed_table(&self, table_name: String) { - self.open_failed_tables.write().unwrap().push(table_name) - } - - pub(crate) fn is_open_failed_table(&self, table_name: &String) -> bool { - self.open_failed_tables.read().unwrap().contains(table_name) - } - - /// Find table under this space by table name - pub fn find_table(&self, table_name: &str) -> Option { - self.table_datas.read().unwrap().find_table(table_name) - } - - /// Find table under this space by its id - pub fn find_table_by_id(&self, table_id: TableId) -> Option { - self.table_datas.read().unwrap().find_table_by_id(table_id) - } - - /// Remove table under this space by table name - pub fn remove_table(&self, table_name: &str) -> Option { - self.table_datas.write().unwrap().remove_table(table_name) - } - - /// Returns the total table num in this space - pub fn table_num(&self) -> usize { - self.table_datas.read().unwrap().table_num() - } - - /// List all tables of this space to `tables` - pub fn list_all_tables(&self, tables: &mut Vec) { - self.table_datas.read().unwrap().list_all_tables(tables) - } - - pub fn space_id(&self) -> SpaceId { - self.id - } -} - -/// A reference to space -pub type SpaceRef = Arc; - -impl fmt::Debug for Space { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Space") - .field("id", &self.id) - .field("context", &self.context) - .field("write_buffer_size", &self.write_buffer_size) - .finish() - } -} - -/// Spaces states -#[derive(Default)] -pub(crate) struct Spaces { - /// Id to space - id_to_space: HashMap, -} - -impl Spaces { - /// Insert space by name, and also insert id to space mapping - pub fn insert(&mut self, space: SpaceRef) { - let space_id = space.id; - self.id_to_space.insert(space_id, space); - } - - pub fn get_by_id(&self, id: SpaceId) -> Option<&SpaceRef> { - self.id_to_space.get(&id) - } - - /// List all tables of all spaces - pub fn list_all_tables(&self, tables: &mut Vec) { - let total_tables = self.id_to_space.values().map(|s| s.table_num()).sum(); - tables.reserve(total_tables); - for space in self.id_to_space.values() { - space.list_all_tables(tables); - } - } - - pub fn list_all_spaces(&self) -> Vec { - self.id_to_space.values().cloned().collect() - } -} - -pub(crate) type SpacesRef = Arc>; diff --git a/src/analytic_engine/src/sst/factory.rs b/src/analytic_engine/src/sst/factory.rs deleted file mode 100644 index 1f17b8df1d..0000000000 --- a/src/analytic_engine/src/sst/factory.rs +++ /dev/null @@ -1,303 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Factory for different kinds sst writer and reader. - -use std::{collections::HashMap, fmt::Debug, sync::Arc}; - -use async_trait::async_trait; -use common_types::projected_schema::RowProjectorBuilder; -use generic_error::{BoxError, GenericError}; -use macros::define_result; -use object_store::{ObjectStoreRef, Path}; -use runtime::Runtime; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::predicate::PredicateRef; -use trace_metric::MetricsCollector; - -use super::parquet::encoding::ColumnEncoding; -use crate::{ - sst::{ - file::Level, - header, - header::HeaderParser, - meta_data::cache::MetaCacheRef, - metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, - parquet::{ - writer::{ParquetSstWriter, WriteOptions}, - AsyncParquetReader, ThreadedReader, - }, - reader::SstReader, - writer::SstWriter, - }, - table_options::{Compression, StorageFormat, StorageFormatHint}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to parse sst header, err:{}", source,))] - ParseHeader { source: header::Error }, - - #[snafu(display("Empty storage format hint.\nBacktrace:\n{}", backtrace))] - EmptyStorageFormatHint { backtrace: Backtrace }, - - #[snafu(display("Failed to convert storage format hint, err:{}", source))] - ConvertStorageFormatHint { source: GenericError }, - - #[snafu(display("Failed to convert compression, err:{}", source))] - ConvertCompression { source: GenericError }, -} - -define_result!(Error); - -/// Pick suitable object store for different scenes. -pub trait ObjectStorePicker: Send + Sync + Debug { - /// Just provide default object store for the scenes where user don't care - /// about it. - fn default_store(&self) -> &ObjectStoreRef; - - /// Pick an object store according to the read frequency. - fn pick_by_freq(&self, freq: ReadFrequency) -> &ObjectStoreRef; -} - -pub type ObjectStorePickerRef = Arc; - -/// For any [`ObjectStoreRef`], it can be used as an [`ObjectStorePicker`]. -impl ObjectStorePicker for ObjectStoreRef { - fn default_store(&self) -> &ObjectStoreRef { - self - } - - fn pick_by_freq(&self, _freq: ReadFrequency) -> &ObjectStoreRef { - self - } -} - -/// Sst factory reference -pub type FactoryRef = Arc; - -#[async_trait] -pub trait Factory: Send + Sync + Debug { - async fn create_reader<'a>( - &self, - path: &'a Path, - options: &SstReadOptions, - hint: SstReadHint, - store_picker: &'a ObjectStorePickerRef, - metrics_collector: Option, - ) -> Result>; - - async fn create_writer<'a>( - &self, - options: &SstWriteOptions, - path: &'a Path, - store_picker: &'a ObjectStorePickerRef, - level: Level, - ) -> Result>; -} - -/// The frequency of query execution may decide some behavior in the sst reader, -/// e.g. cache policy. -#[derive(Debug, Copy, Clone)] -pub enum ReadFrequency { - Once, - Frequent, -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct SstReadHint { - /// Hint for the size of the sst file. It may avoid some io if provided. - pub file_size: Option, - /// Hint for the storage format of the sst file. It may avoid some io if - /// provided. - pub file_format: Option, -} - -#[derive(Debug, Clone)] -pub struct ScanOptions { - /// The suggested parallelism while reading sst - pub background_read_parallelism: usize, - /// The max record batches in flight - pub max_record_batches_in_flight: usize, - /// The number of streams to prefetch when scan - pub num_streams_to_prefetch: usize, -} - -impl Default for ScanOptions { - fn default() -> Self { - Self { - background_read_parallelism: 1, - max_record_batches_in_flight: 64, - num_streams_to_prefetch: 2, - } - } -} - -#[derive(Debug, Clone)] -pub struct SstReadOptions { - pub maybe_table_level_metrics: Option>, - - pub frequency: ReadFrequency, - pub num_rows_per_row_group: usize, - pub row_projector_builder: RowProjectorBuilder, - pub predicate: PredicateRef, - pub meta_cache: Option, - pub scan_options: ScanOptions, - - pub runtime: Arc, -} -#[derive(Clone, Debug)] -pub struct ColumnStats { - pub low_cardinality: bool, -} - -#[derive(Debug, Clone)] -pub struct SstWriteOptions { - pub storage_format_hint: StorageFormatHint, - pub num_rows_per_row_group: usize, - pub compression: Compression, - pub max_buffer_size: usize, - pub column_stats: HashMap, -} - -impl TryFrom for SstWriteOptions { - type Error = Error; - - fn try_from(value: horaedbproto::compaction_service::SstWriteOptions) -> Result { - let storage_format_hint: StorageFormatHint = value - .storage_format_hint - .context(EmptyStorageFormatHint)? - .try_into() - .box_err() - .context(ConvertStorageFormatHint)?; - - let num_rows_per_row_group = value.num_rows_per_row_group as usize; - let compression: Compression = value - .compression - .try_into() - .box_err() - .context(ConvertCompression)?; - let max_buffer_size = value.max_buffer_size as usize; - - let column_stats: HashMap = value - .column_stats - .into_iter() - .map(|(k, v)| (k, ColumnStats { low_cardinality: v })) - .collect(); - - Ok(SstWriteOptions { - storage_format_hint, - num_rows_per_row_group, - compression, - max_buffer_size, - column_stats, - }) - } -} - -impl From for horaedbproto::compaction_service::SstWriteOptions { - fn from(value: SstWriteOptions) -> Self { - let column_stats = value - .column_stats - .into_iter() - .map(|(k, v)| (k, v.low_cardinality)) - .collect(); - - Self { - storage_format_hint: Some(value.storage_format_hint.into()), - num_rows_per_row_group: value.num_rows_per_row_group as u64, - compression: value.compression.into(), - max_buffer_size: value.max_buffer_size as u64, - column_stats, - } - } -} - -impl From<&ColumnStats> for ColumnEncoding { - fn from(value: &ColumnStats) -> Self { - ColumnEncoding { - enable_dict: value.low_cardinality, - } - } -} - -#[derive(Debug, Default)] -pub struct FactoryImpl; - -#[async_trait] -impl Factory for FactoryImpl { - async fn create_reader<'a>( - &self, - path: &'a Path, - options: &SstReadOptions, - hint: SstReadHint, - store_picker: &'a ObjectStorePickerRef, - metrics_collector: Option, - ) -> Result> { - let storage_format = match hint.file_format { - Some(v) => v, - None => { - let header_parser = HeaderParser::new(path, store_picker.default_store()); - header_parser.parse().await.context(ParseHeader)? - } - }; - - match storage_format { - StorageFormat::Columnar => { - let reader = AsyncParquetReader::new( - path, - options, - hint.file_size, - store_picker, - metrics_collector, - ); - let reader = ThreadedReader::new( - reader, - options.runtime.clone(), - options.scan_options.background_read_parallelism, - options.scan_options.max_record_batches_in_flight, - ); - Ok(Box::new(reader)) - } - } - } - - async fn create_writer<'a>( - &self, - options: &SstWriteOptions, - path: &'a Path, - store_picker: &'a ObjectStorePickerRef, - level: Level, - ) -> Result> { - let column_encodings = - HashMap::from_iter(options.column_stats.iter().map(|(col_name, col_stats)| { - (col_name.to_owned(), ColumnEncoding::from(col_stats)) - })); - let write_options = WriteOptions { - num_rows_per_row_group: options.num_rows_per_row_group, - max_buffer_size: options.max_buffer_size, - compression: options.compression.into(), - sst_level: level, - column_encodings, - }; - Ok(Box::new(ParquetSstWriter::new( - path, - write_options, - store_picker, - ))) - } -} diff --git a/src/analytic_engine/src/sst/file.rs b/src/analytic_engine/src/sst/file.rs deleted file mode 100644 index a6cc336a31..0000000000 --- a/src/analytic_engine/src/sst/file.rs +++ /dev/null @@ -1,740 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Sst file and storage info - -use std::{ - borrow::Borrow, - collections::{BTreeMap, HashSet}, - fmt, - fmt::Debug, - hash::{Hash, Hasher}, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::Duration, -}; - -use common_types::{ - time::{TimeRange, Timestamp}, - SequenceNumber, -}; -use future_ext::{retry_async, BackoffConfig, RetryConfig}; -use generic_error::{BoxError, GenericError}; -use logger::{error, info, trace, warn}; -use macros::define_result; -use metric_ext::Meter; -use object_store::{ObjectStoreRef, Path}; -use runtime::{JoinHandle, Runtime}; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::table::TableId; -use tokio::sync::{ - mpsc::{self, UnboundedReceiver, UnboundedSender}, - Mutex, -}; - -use crate::{space::SpaceId, sst::manager::FileId, table::sst_util, table_options::StorageFormat}; - -/// Error of sst file. -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to join purger, err:{}", source))] - StopPurger { source: runtime::Error }, - - #[snafu(display("Empty time range.\nBacktrace:\n{}", backtrace))] - EmptyTimeRange { backtrace: Backtrace }, - - #[snafu(display("Failed to convert time range, err:{}", source))] - ConvertTimeRange { source: GenericError }, - - #[snafu(display("Failed to convert storage format, err:{}", source))] - ConvertStorageFormat { source: GenericError }, - - #[snafu(display("Converted overflow, err:{}", source))] - ConvertOverflow { source: GenericError }, -} - -define_result!(Error); - -pub const SST_LEVEL_NUM: usize = 2; - -#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] -pub struct Level(u16); - -impl Level { - // Currently there are only two levels: 0, 1. - pub const MAX: Self = Self(1); - pub const MIN: Self = Self(0); - - pub fn next(&self) -> Self { - Self::MAX.0.min(self.0 + 1).into() - } - - pub fn is_min(&self) -> bool { - self == &Self::MIN - } - - pub fn as_usize(&self) -> usize { - self.0 as usize - } - - pub fn as_u32(&self) -> u32 { - self.0 as u32 - } - - pub fn as_u16(&self) -> u16 { - self.0 - } -} - -impl From for Level { - fn from(value: u16) -> Self { - Self(value) - } -} - -impl TryFrom for Level { - type Error = Error; - - fn try_from(value: u32) -> Result { - let value: u16 = value.try_into().box_err().context(ConvertOverflow)?; - Ok(value.into()) - } -} - -impl fmt::Display for Level { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -// TODO(yingwen): Order or split file by time range to speed up filter (even in -// level 0). -/// Manage files of single level -pub struct LevelHandler { - pub level: Level, - /// All files in current level. - files: FileHandleSet, -} - -impl LevelHandler { - pub fn new(level: Level) -> Self { - Self { - level, - files: FileHandleSet::default(), - } - } - - #[inline] - pub fn insert(&mut self, file: FileHandle) { - self.files.insert(file); - } - - pub fn latest_sst(&self) -> Option { - self.files.latest() - } - - pub fn pick_ssts(&self, time_range: TimeRange) -> Vec { - self.files.files_by_time_range(time_range) - } - - #[inline] - pub fn remove_ssts(&mut self, file_ids: &[FileId]) { - self.files.remove_by_ids(file_ids); - } - - pub fn iter_ssts(&self) -> Iter { - let iter = self.files.file_map.values(); - Iter(iter) - } - - #[inline] - pub fn collect_expired( - &self, - expire_time: Option, - expired_files: &mut Vec, - ) { - self.files.collect_expired(expire_time, expired_files); - } - - #[inline] - pub fn has_expired_sst(&self, expire_time: Option) -> bool { - self.files.has_expired_sst(expire_time) - } -} - -pub struct Iter<'a>(std::collections::btree_map::Values<'a, FileOrdKey, FileHandle>); - -impl<'a> Iterator for Iter<'a> { - type Item = &'a FileHandle; - - fn next(&mut self) -> Option { - self.0.next() - } -} - -#[derive(Clone)] -pub struct FileHandle { - inner: Arc, -} - -impl PartialEq for FileHandle { - fn eq(&self, other: &Self) -> bool { - self.id() == other.id() - } -} - -impl Eq for FileHandle {} - -impl Hash for FileHandle { - fn hash(&self, state: &mut H) { - self.id().hash(state); - } -} - -impl FileHandle { - pub fn new(meta: FileMeta, purge_queue: FilePurgeQueue) -> Self { - Self { - inner: Arc::new(FileHandleInner { - meta, - purge_queue, - being_compacted: AtomicBool::new(false), - metrics: SstMetrics::default(), - }), - } - } - - #[inline] - pub fn space_id(&self) -> SpaceId { - self.inner.purge_queue.space_id() - } - - #[inline] - pub fn table_id(&self) -> TableId { - self.inner.purge_queue.table_id() - } - - #[inline] - pub fn read_meter(&self) -> Arc { - self.inner.metrics.read_meter.clone() - } - - #[inline] - pub fn row_num(&self) -> u64 { - self.inner.meta.row_num - } - - #[inline] - pub fn id(&self) -> FileId { - self.inner.meta.id - } - - #[inline] - pub fn id_ref(&self) -> &FileId { - &self.inner.meta.id - } - - #[inline] - pub fn intersect_with_time_range(&self, time_range: TimeRange) -> bool { - self.inner.meta.intersect_with_time_range(time_range) - } - - #[inline] - pub fn time_range(&self) -> TimeRange { - self.inner.meta.time_range - } - - #[inline] - pub fn time_range_ref(&self) -> &TimeRange { - &self.inner.meta.time_range - } - - #[inline] - pub fn max_sequence(&self) -> SequenceNumber { - self.inner.meta.max_seq - } - - #[inline] - pub fn being_compacted(&self) -> bool { - self.inner.being_compacted.load(Ordering::Relaxed) - } - - #[inline] - pub fn size(&self) -> u64 { - self.inner.meta.size - } - - #[inline] - pub fn set_being_compacted(&self, value: bool) { - self.inner.being_compacted.store(value, Ordering::Relaxed); - } - - #[inline] - pub fn storage_format(&self) -> StorageFormat { - self.inner.meta.storage_format - } - - #[inline] - pub fn meta(&self) -> FileMeta { - self.inner.meta.clone() - } -} - -impl fmt::Debug for FileHandle { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("FileHandle") - .field("meta", &self.inner.meta) - .field("being_compacted", &self.being_compacted()) - .finish() - } -} - -struct SstMetrics { - pub read_meter: Arc, - pub key_num: usize, -} - -impl Default for SstMetrics { - fn default() -> Self { - SstMetrics { - read_meter: Arc::new(Meter::new()), - key_num: 0, - } - } -} - -impl fmt::Debug for SstMetrics { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SstMetrics") - .field("read_meter", &self.read_meter.h2_rate()) - .field("key_num", &self.key_num) - .finish() - } -} - -struct FileHandleInner { - meta: FileMeta, - purge_queue: FilePurgeQueue, - /// The file is being compacting. - being_compacted: AtomicBool, - metrics: SstMetrics, -} - -impl Drop for FileHandleInner { - fn drop(&mut self) { - info!("FileHandle is dropped, meta:{:?}", self.meta); - - // Push file cannot block or be async because we are in drop(). - self.purge_queue.push_file(&self.meta); - } -} - -/// Used to order [FileHandle] by (end_time, start_time, file_id) -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -struct FileOrdKey { - exclusive_end: Timestamp, - inclusive_start: Timestamp, - file_id: FileId, -} - -impl FileOrdKey { - fn for_seek(exclusive_end: Timestamp) -> Self { - Self { - exclusive_end, - inclusive_start: Timestamp::MIN, - file_id: 0, - } - } - - fn key_of(file: &FileHandle) -> Self { - Self { - exclusive_end: file.time_range().exclusive_end(), - inclusive_start: file.time_range().inclusive_start(), - file_id: file.id(), - } - } -} - -/// Used to index [FileHandle] by file_id -struct FileHandleHash(FileHandle); - -impl PartialEq for FileHandleHash { - fn eq(&self, other: &Self) -> bool { - self.0.id() == other.0.id() - } -} - -impl Eq for FileHandleHash {} - -impl Hash for FileHandleHash { - fn hash(&self, state: &mut H) { - self.0.id().hash(state); - } -} - -impl Borrow for FileHandleHash { - #[inline] - fn borrow(&self) -> &FileId { - self.0.id_ref() - } -} - -#[derive(Default)] -struct FileHandleSet { - /// Files ordered by time range and id. - file_map: BTreeMap, - /// Files indexed by file id, used to speed up removal. - id_to_files: HashSet, -} - -impl FileHandleSet { - fn latest(&self) -> Option { - if let Some(file) = self.file_map.values().next_back() { - return Some(file.clone()); - } - None - } - - fn files_by_time_range(&self, time_range: TimeRange) -> Vec { - // Seek to first sst whose end time >= time_range.inclusive_start(). - trace!( - "Pick sst file by range for query, time_range:{time_range:?}, file_map:{:?}", - self.file_map - ); - let seek_key = FileOrdKey::for_seek(time_range.inclusive_start()); - self.file_map - .range(seek_key..) - .filter_map(|(_key, file)| { - if file.intersect_with_time_range(time_range) { - Some(file.clone()) - } else { - None - } - }) - .collect() - } - - fn insert(&mut self, file: FileHandle) { - self.file_map - .insert(FileOrdKey::key_of(&file), file.clone()); - self.id_to_files.insert(FileHandleHash(file)); - } - - fn remove_by_ids(&mut self, file_ids: &[FileId]) { - for file_id in file_ids { - if let Some(file) = self.id_to_files.take(file_id) { - let key = FileOrdKey::key_of(&file.0); - self.file_map.remove(&key); - } - } - } - - /// Collect ssts with time range is expired. - fn collect_expired(&self, expire_time: Option, expired_files: &mut Vec) { - for file in self.file_map.values() { - if file.time_range().is_expired(expire_time) { - expired_files.push(file.clone()); - } else { - // Files are sorted by end time first, so there is no more file whose end time - // is less than `expire_time`. - break; - } - } - } - - fn has_expired_sst(&self, expire_time: Option) -> bool { - // Files are sorted by end time first, so check first file is enough. - if let Some(file) = self.file_map.values().next() { - return file.time_range().is_expired(expire_time); - } - - false - } -} - -/// Meta of a sst file, immutable once created -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct FileMeta { - /// Id of the sst file - pub id: FileId, - /// File size in bytes - pub size: u64, - /// Total row number - pub row_num: u64, - /// The time range of the file. - pub time_range: TimeRange, - /// The max sequence number of the file. - pub max_seq: u64, - /// The format of the file. - pub storage_format: StorageFormat, - /// Associated files, such as: meta_path - pub associated_files: Vec, -} - -impl FileMeta { - pub fn intersect_with_time_range(&self, time_range: TimeRange) -> bool { - self.time_range.intersect_with(time_range) - } -} - -impl TryFrom for FileMeta { - type Error = Error; - - fn try_from(value: horaedbproto::compaction_service::FileMeta) -> Result { - let time_range: TimeRange = value - .time_range - .context(EmptyTimeRange)? - .try_into() - .box_err() - .context(ConvertTimeRange)?; - - let storage_format: StorageFormat = value - .storage_format - .try_into() - .box_err() - .context(ConvertStorageFormat)?; - let mut associated_files: Vec = Vec::with_capacity(value.associated_files.len()); - for file in value.associated_files { - associated_files.push(file); - } - - Ok(FileMeta { - id: value.file_id, - size: value.size, - row_num: value.row_num, - time_range, - max_seq: value.max_seq, - storage_format, - associated_files, - }) - } -} - -impl From for horaedbproto::compaction_service::FileMeta { - fn from(value: FileMeta) -> Self { - Self { - file_id: value.id, - max_seq: value.max_seq, - time_range: Some(value.time_range.into()), - size: value.size, - row_num: value.row_num, - storage_format: value.storage_format.into(), - associated_files: value.associated_files, - } - } -} - -// Queue to store files to be deleted for a table. -#[derive(Clone)] -pub struct FilePurgeQueue { - // Wrap a inner struct to avoid storing space/table ids for each file. - inner: Arc, -} - -impl FilePurgeQueue { - pub fn new(space_id: SpaceId, table_id: TableId, sender: UnboundedSender) -> Self { - Self { - inner: Arc::new(FilePurgeQueueInner { - space_id, - table_id, - sender, - closed: AtomicBool::new(false), - }), - } - } - - /// Close the purge queue, then all request pushed to this queue will be - /// ignored. This is mainly used to avoid files being deleted after the - /// db is closed. - pub fn close(&self) { - self.inner.closed.store(true, Ordering::SeqCst); - } - - fn push_file(&self, file_meta: &FileMeta) { - if self.inner.closed.load(Ordering::SeqCst) { - warn!("Purger closed, ignore file_id:{}", file_meta.id); - return; - } - - // Send the file id via a channel to file purger and delete the file from sst - // store in background. - let request = FilePurgeRequest { - space_id: self.inner.space_id, - table_id: self.inner.table_id, - file_id: file_meta.id, - associated_files: file_meta.associated_files.clone(), - }; - - if let Err(send_res) = self.inner.sender.send(Request::Purge(request)) { - error!( - "Failed to send delete file request, request:{:?}", - send_res.0 - ); - } - } - - #[inline] - pub fn space_id(&self) -> SpaceId { - self.inner.space_id - } - - #[inline] - pub fn table_id(&self) -> TableId { - self.inner.table_id - } -} - -impl From for FilePurgeQueue { - fn from(value: horaedbproto::compaction_service::FilePurgeQueue) -> Self { - let (tx, _rx) = mpsc::unbounded_channel(); - FilePurgeQueue::new(value.space_id, value.table_id.into(), tx) - } -} - -struct FilePurgeQueueInner { - space_id: SpaceId, - table_id: TableId, - closed: AtomicBool, - sender: UnboundedSender, -} - -#[derive(Debug)] -pub struct FilePurgeRequest { - space_id: SpaceId, - table_id: TableId, - file_id: FileId, - associated_files: Vec, -} - -#[derive(Debug)] -pub enum Request { - Purge(FilePurgeRequest), - Exit, -} - -/// Background file purger. -pub struct FilePurger { - sender: UnboundedSender, - handle: Mutex>>, -} - -impl FilePurger { - const RETRY_CONFIG: RetryConfig = RetryConfig { - max_retries: 3, - backoff: BackoffConfig { - init_backoff: Duration::from_millis(500), - max_backoff: Duration::from_secs(5), - base: 3., - }, - }; - - pub fn start(runtime: &Runtime, store: ObjectStoreRef) -> Self { - // We must use unbound channel, so the sender wont block when the handle is - // dropped. - let (tx, rx) = mpsc::unbounded_channel(); - - // Spawn a background job to purge files. - let handle = runtime.spawn(async { - Self::purge_file_loop(store, rx).await; - }); - - Self { - sender: tx, - handle: Mutex::new(Some(handle)), - } - } - - pub async fn stop(&self) -> Result<()> { - info!("Try to stop file purger"); - - if self.sender.send(Request::Exit).is_err() { - error!("File purge task already exited"); - } - - let mut handle = self.handle.lock().await; - // Also clear the handle to avoid await a ready future. - if let Some(h) = handle.take() { - h.await.context(StopPurger)?; - } - - Ok(()) - } - - pub fn create_purge_queue(&self, space_id: SpaceId, table_id: TableId) -> FilePurgeQueue { - FilePurgeQueue::new(space_id, table_id, self.sender.clone()) - } - - // TODO: currently we ignore errors when delete. - async fn delete_file(store: &ObjectStoreRef, path: &Path) { - if let Err(e) = retry_async(|| store.delete(path), &Self::RETRY_CONFIG).await { - error!("File purger failed to delete file, path:{path}, err:{e}"); - } - } - - async fn purge_file_loop(store: ObjectStoreRef, mut receiver: UnboundedReceiver) { - info!("File purger start"); - - while let Some(request) = receiver.recv().await { - match request { - Request::Purge(purge_request) => { - let sst_file_path = sst_util::new_sst_file_path( - purge_request.space_id, - purge_request.table_id, - purge_request.file_id, - ); - - info!( - "File purger delete file, purge_request:{:?}, sst_file_path:{}", - purge_request, - sst_file_path.to_string() - ); - - for path in purge_request.associated_files { - let path = Path::from(path); - Self::delete_file(&store, &path).await; - } - - Self::delete_file(&store, &sst_file_path).await; - } - Request::Exit => break, - } - } - - info!("File purger exit"); - } -} - -pub type FilePurgerRef = Arc; - -#[cfg(test)] -pub mod tests { - use super::*; - - pub struct FilePurgerMocker; - - impl FilePurgerMocker { - pub fn mock() -> FilePurger { - let (sender, _receiver) = mpsc::unbounded_channel(); - - FilePurger { - sender, - handle: Mutex::new(None), - } - } - } -} diff --git a/src/analytic_engine/src/sst/header.rs b/src/analytic_engine/src/sst/header.rs deleted file mode 100644 index ff459b87ca..0000000000 --- a/src/analytic_engine/src/sst/header.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// The header parser for one sst. - -use bytes_ext::Bytes; -use macros::define_result; -use object_store::{ObjectStoreRef, Path}; -use parquet::data_type::AsBytes; -use snafu::{Backtrace, ResultExt, Snafu}; - -use crate::table_options::StorageFormat; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to read header bytes, err:{}", source,))] - ReadHeaderBytes { - source: object_store::ObjectStoreError, - }, - - #[snafu(display( - "Unknown header, header value:{:?}.\nBacktrace:\n{}", - header_value, - backtrace - ))] - UnknownHeader { - header_value: Bytes, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -/// A parser for decoding the header of SST. -/// -/// Assume that every SST shares the same encoding format: -/// -/// +------------+----------------------+ -/// | 4B(header) | Payload | -/// +------------+----------------------+ -pub struct HeaderParser<'a> { - path: &'a Path, - store: &'a ObjectStoreRef, -} - -impl<'a> HeaderParser<'a> { - const HEADER_LEN: usize = 4; - const PARQUET: &'static [u8] = b"PAR1"; - - pub fn new(path: &'a Path, store: &'a ObjectStoreRef) -> HeaderParser<'a> { - Self { path, store } - } - - /// Detect the storage format by parsing header of the sst. - pub async fn parse(&self) -> Result { - let header_value = self - .store - .get_range(self.path, 0..Self::HEADER_LEN) - .await - .context(ReadHeaderBytes)?; - - match header_value.as_bytes() { - Self::PARQUET => Ok(StorageFormat::Columnar), - _ => UnknownHeader { header_value }.fail(), - } - } -} diff --git a/src/analytic_engine/src/sst/manager.rs b/src/analytic_engine/src/sst/manager.rs deleted file mode 100644 index 3cac557d75..0000000000 --- a/src/analytic_engine/src/sst/manager.rs +++ /dev/null @@ -1,175 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Multi-level SST management - -use common_types::time::{TimeRange, Timestamp}; - -use crate::{ - compaction::ExpiredFiles, - sst::file::{FileHandle, FileMeta, FilePurgeQueue, Iter, Level, LevelHandler}, -}; - -/// Id for a sst file -pub type FileId = u64; - -/// A table level manager that manages all the sst files of the table -pub struct LevelsController { - levels: Vec, - purge_queue: FilePurgeQueue, -} - -impl Drop for LevelsController { - fn drop(&mut self) { - // Close the purge queue to avoid files being deleted. - self.purge_queue.close(); - } -} - -impl LevelsController { - /// Create an empty LevelsController - pub fn new(purge_queue: FilePurgeQueue) -> Self { - Self { - levels: (Level::MIN.as_u16()..=Level::MAX.as_u16()) - .map(|v| LevelHandler::new(v.into())) - .collect::>(), - purge_queue, - } - } - - /// Add sst file to level - /// - /// Panic: If the level is greater than the max level - pub fn add_sst_to_level(&mut self, level: Level, file_meta: FileMeta) { - let level_handler = &mut self.levels[level.as_usize()]; - let file = FileHandle::new(file_meta, self.purge_queue.clone()); - - level_handler.insert(file); - } - - pub fn latest_sst(&self, level: Level) -> Option { - self.levels[level.as_usize()].latest_sst() - } - - /// Pick the ssts and collect it by `append_sst`. - pub fn pick_ssts( - &self, - time_range: TimeRange, - mut append_sst: impl FnMut(Level, &[FileHandle]), - ) { - for level_handler in self.levels.iter() { - let ssts = level_handler.pick_ssts(time_range); - append_sst(level_handler.level, &ssts); - } - } - - /// Remove sst files from level. - /// - /// Panic: If the level is greater than the max level - pub fn remove_ssts_from_level(&mut self, level: Level, file_ids: &[FileId]) { - let level_handler = &mut self.levels[level.as_usize()]; - level_handler.remove_ssts(file_ids); - } - - pub fn levels(&self) -> impl Iterator + '_ { - self.levels.iter().map(|v| v.level) - } - - /// Iter ssts at given `level`. - /// - /// Panic if level is out of bound. - pub fn iter_ssts_at_level(&self, level: Level) -> Iter { - let level_handler = &self.levels[level.as_usize()]; - level_handler.iter_ssts() - } - - pub fn collect_expired_at_level( - &self, - level: Level, - expire_time: Option, - ) -> Vec { - let level_handler = &self.levels[level.as_usize()]; - let mut expired = Vec::new(); - level_handler.collect_expired(expire_time, &mut expired); - - expired - } - - pub fn has_expired_sst(&self, expire_time: Option) -> bool { - self.levels - .iter() - .any(|level_handler| level_handler.has_expired_sst(expire_time)) - } - - pub fn expired_ssts(&self, expire_time: Option) -> Vec { - self.levels() - .map(|level| { - let files = self.collect_expired_at_level(level, expire_time); - ExpiredFiles { level, files } - }) - .collect() - } -} - -#[cfg(test)] -pub mod tests { - use table_engine::table::TableId; - use tokio::sync::mpsc; - - use crate::{ - sst::{ - file::{FileMeta, FilePurgeQueue, Level}, - manager::{FileId, LevelsController}, - meta_data::SstMetaData, - }, - table_options::StorageFormat, - }; - - #[must_use] - #[derive(Default)] - pub struct LevelsControllerMockBuilder { - sst_meta_vec: Vec, - } - - impl LevelsControllerMockBuilder { - pub fn add_sst(mut self, mut sst_meta: Vec) -> Self { - self.sst_meta_vec.append(&mut sst_meta); - self - } - - pub fn build(self) -> LevelsController { - let (tx, _rx) = mpsc::unbounded_channel(); - let file_purge_queue = FilePurgeQueue::new(100, TableId::from(101), tx); - let mut levels_controller = LevelsController::new(file_purge_queue); - for (id, sst_meta) in self.sst_meta_vec.into_iter().enumerate() { - levels_controller.add_sst_to_level( - Level::MIN, - FileMeta { - id: id as FileId, - size: 0, - row_num: 0, - time_range: sst_meta.time_range(), - max_seq: sst_meta.max_sequence(), - storage_format: StorageFormat::Columnar, - associated_files: Vec::new(), - }, - ); - } - levels_controller - } - } -} diff --git a/src/analytic_engine/src/sst/meta_data/cache.rs b/src/analytic_engine/src/sst/meta_data/cache.rs deleted file mode 100644 index d90e71b089..0000000000 --- a/src/analytic_engine/src/sst/meta_data/cache.rs +++ /dev/null @@ -1,351 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - fmt::Debug, - sync::{Arc, RwLock}, -}; - -use lru::LruCache; -use object_store::{ObjectStoreRef, Path}; -use parquet::{file::metadata::FileMetaData, format::KeyValue}; -use snafu::{ensure, OptionExt, ResultExt}; - -use crate::sst::{ - meta_data::{ - metadata_reader::parse_metadata, InvalidSize, KvMetaDataNotFound, KvMetaVersionEmpty, - ParquetMetaDataRef, Result, - }, - metrics::{META_DATA_CACHE_HIT_COUNTER, META_DATA_CACHE_MISS_COUNTER}, - parquet::encoding, -}; - -pub type MetaCacheRef = Arc; - -/// The metadata of one sst file, including the original metadata of parquet and -/// the custom metadata of horaedb. -#[derive(Debug, Clone)] -pub struct MetaData { - /// The extended information in the parquet is removed for less memory - /// consumption. - parquet: parquet_ext::ParquetMetaDataRef, - custom: ParquetMetaDataRef, -} - -impl MetaData { - /// Build [`MetaData`] from the original parquet_meta_data. - /// - /// After the building, a new parquet meta data will be generated which - /// contains no extended custom information. - // TODO: remove it and use the suggested api. - #[allow(deprecated)] - pub async fn try_new( - parquet_meta_data: &parquet_ext::ParquetMetaData, - ignore_sst_filter: bool, - store: ObjectStoreRef, - ) -> Result { - let file_meta_data = parquet_meta_data.file_metadata(); - let kv_metas = file_meta_data - .key_value_metadata() - .context(KvMetaDataNotFound)?; - - ensure!(!kv_metas.is_empty(), KvMetaDataNotFound); - - let mut meta_path = None; - let mut meta_size = None; - let mut other_kv_metas: Vec = Vec::with_capacity(kv_metas.len() - 1); - let mut custom_kv_meta = None; - let mut meta_version = encoding::META_VERSION_V1; // default is v1 - - for kv_meta in kv_metas { - if kv_meta.key == encoding::META_KEY { - custom_kv_meta = Some(kv_meta); - } else if kv_meta.key == encoding::META_PATH_KEY { - meta_path = kv_meta.value.as_ref().map(|path| Path::from(path.as_str())) - } else if kv_meta.key == encoding::META_VERSION_KEY { - meta_version = kv_meta.value.as_ref().context(KvMetaVersionEmpty)?; - } else if kv_meta.key == encoding::META_SIZE_KEY { - let size = kv_meta.value.as_ref().context(KvMetaVersionEmpty)?; - let size = size.parse::().context(InvalidSize { size })?; - meta_size = Some(size); - } else { - other_kv_metas.push(kv_meta.clone()); - } - } - - let custom = parse_metadata( - meta_version, - custom_kv_meta, - ignore_sst_filter, - meta_path.clone(), - meta_size, - store, - ) - .await?; - - // let's build a new parquet metadata without the extended key value - // metadata. - let other_kv_metas = if other_kv_metas.is_empty() { - None - } else { - Some(other_kv_metas) - }; - let parquet = { - let thin_file_meta_data = FileMetaData::new( - file_meta_data.version(), - file_meta_data.num_rows(), - file_meta_data.created_by().map(|v| v.to_string()), - other_kv_metas, - file_meta_data.schema_descr_ptr(), - file_meta_data.column_orders().cloned(), - ); - let thin_parquet_meta_data = parquet_ext::ParquetMetaData::new_with_page_index( - thin_file_meta_data, - parquet_meta_data.row_groups().to_vec(), - parquet_meta_data.page_indexes().cloned(), - parquet_meta_data.offset_indexes().cloned(), - ); - - Arc::new(thin_parquet_meta_data) - }; - Ok(Self { parquet, custom }) - } - - #[inline] - pub fn parquet(&self) -> &parquet_ext::ParquetMetaDataRef { - &self.parquet - } - - #[inline] - pub fn custom(&self) -> &ParquetMetaDataRef { - &self.custom - } -} - -/// A cache for storing [`MetaData`]. -#[derive(Debug)] -pub struct MetaCache { - cache: RwLock>, -} - -impl MetaCache { - pub fn new(cap: usize) -> Self { - Self { - cache: RwLock::new(LruCache::new(cap)), - } - } - - pub fn get(&self, key: &str) -> Option { - let v = self.cache.write().unwrap().get(key).cloned(); - if v.is_some() { - META_DATA_CACHE_HIT_COUNTER.inc() - } else { - META_DATA_CACHE_MISS_COUNTER.inc() - } - - v - } - - pub fn put(&self, key: String, value: MetaData) { - self.cache.write().unwrap().put(key, value); - } -} - -#[cfg(test)] -mod tests { - use std::{fs::File, path::Path, sync::Arc}; - - use arrow::{ - array::UInt64Builder, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, - }; - use bytes_ext::Bytes; - use common_types::{ - column_schema::Builder as ColumnSchemaBuilder, - schema::Builder as CustomSchemaBuilder, - time::{TimeRange, Timestamp}, - }; - use object_store::{local_file, ObjectStoreRef}; - use parquet::{arrow::ArrowWriter, file::footer}; - use parquet_ext::ParquetMetaData; - - use super::*; - use crate::{ - sst::parquet::{ - encoding::{self, META_PATH_KEY, META_VERSION_KEY}, - meta_data::ParquetMetaData as CustomParquetMetaData, - }, - table::sst_util::new_metadata_path, - }; - - // TODO: remove it and use the suggested api. - #[allow(deprecated)] - fn check_parquet_meta_data(original: &ParquetMetaData, processed: &ParquetMetaData) { - assert_eq!(original.page_indexes(), processed.page_indexes()); - assert_eq!(original.offset_indexes(), processed.offset_indexes()); - assert_eq!(original.num_row_groups(), processed.num_row_groups()); - assert_eq!(original.row_groups(), processed.row_groups()); - - let original_file_md = original.file_metadata(); - let processed_file_md = processed.file_metadata(); - assert_eq!(original_file_md.num_rows(), processed_file_md.num_rows()); - assert_eq!(original_file_md.version(), processed_file_md.version()); - assert_eq!( - original_file_md.created_by(), - processed_file_md.created_by() - ); - assert_eq!(original_file_md.schema(), processed_file_md.schema()); - assert_eq!( - original_file_md.schema_descr(), - processed_file_md.schema_descr() - ); - assert_eq!( - original_file_md.schema_descr_ptr(), - processed_file_md.schema_descr_ptr() - ); - assert_eq!( - original_file_md.column_orders(), - processed_file_md.column_orders() - ); - - if let Some(kv_metas) = original_file_md.key_value_metadata() { - let processed_kv_metas = processed_file_md.key_value_metadata().unwrap(); - assert_eq!(kv_metas.len(), processed_kv_metas.len() + 2); - for kv in kv_metas { - match kv.key.as_str() { - "ARROW:schema" => { - // don't care this - } - encoding::META_KEY => assert!(kv.value.is_none()), - encoding::META_VERSION_KEY => assert_eq!("2", kv.value.clone().unwrap()), - encoding::META_PATH_KEY => { - let meta_path = kv.value.as_ref().unwrap(); - assert!(meta_path.ends_with(".metadata")); - } - _ => panic!("Unknown parquet kv, value:{kv:?}"), - } - } - } else { - assert!(processed_file_md.key_value_metadata().is_none()); - } - } - - async fn write_parquet_file_with_metadata( - store: ObjectStoreRef, - parquet_file_path: &Path, - custom_meta_data: &CustomParquetMetaData, - ) { - let tsid_array = { - let mut builder = UInt64Builder::new(); - builder.append_value(10); - builder.append_null(); - builder.append_value(11); - builder.finish() - }; - let timestamp_array = { - let mut builder = UInt64Builder::new(); - builder.append_value(1000); - builder.append_null(); - builder.append_value(1001); - builder.finish() - }; - let file = File::create(parquet_file_path).unwrap(); - let schema = Schema::new(vec![ - Field::new("tsid", DataType::UInt64, true), - Field::new("timestamp", DataType::UInt64, true), - ]); - - let batch = RecordBatch::try_new( - Arc::new(schema), - vec![Arc::new(tsid_array), Arc::new(timestamp_array)], - ) - .unwrap(); - let mut writer = ArrowWriter::try_new(file, batch.schema(), None).unwrap(); - let meta_path = new_metadata_path(parquet_file_path.to_str().unwrap()); - writer.append_key_value_metadata(parquet::format::KeyValue { - key: META_PATH_KEY.to_string(), - value: Some(meta_path.clone()), - }); - writer.append_key_value_metadata(parquet::format::KeyValue { - key: META_VERSION_KEY.to_string(), - value: Some("2".to_string()), - }); - writer.write(&batch).unwrap(); - writer.close().unwrap(); - - let bytes = encoding::encode_sst_meta_data(custom_meta_data.clone()).unwrap(); - let meta_path = object_store::Path::from(meta_path); - store.put(&meta_path, bytes.into()).await.unwrap(); - } - - #[tokio::test] - async fn test_arrow_meta_data() { - let temp_dir = tempfile::tempdir().unwrap(); - let parquet_file_path = temp_dir.path().join("test_arrow_meta_data.par"); - let schema = { - let tsid_column_schema = ColumnSchemaBuilder::new( - "tsid".to_string(), - common_types::datum::DatumKind::UInt64, - ) - .build() - .unwrap(); - let timestamp_column_schema = ColumnSchemaBuilder::new( - "timestamp".to_string(), - common_types::datum::DatumKind::Timestamp, - ) - .build() - .unwrap(); - CustomSchemaBuilder::new() - .auto_increment_column_id(true) - .add_key_column(tsid_column_schema) - .unwrap() - .add_key_column(timestamp_column_schema) - .unwrap() - .primary_key_indexes(vec![0, 1]) - .build() - .unwrap() - }; - let custom_meta_data = CustomParquetMetaData { - min_key: Bytes::from_static(&[0, 1]), - max_key: Bytes::from_static(&[2, 2]), - time_range: TimeRange::new_unchecked(Timestamp::new(0), Timestamp::new(10)), - max_sequence: 1001, - schema, - parquet_filter: None, - column_values: None, - }; - - let local_path = temp_dir.as_ref().to_string_lossy().to_string(); - let store = Arc::new(local_file::try_new_with_default(local_path).unwrap()); - write_parquet_file_with_metadata( - store.clone(), - parquet_file_path.as_path(), - &custom_meta_data, - ) - .await; - - let parquet_file = File::open(parquet_file_path.as_path()).unwrap(); - let parquet_meta_data = footer::parse_metadata(&parquet_file).unwrap(); - let meta_data = MetaData::try_new(&parquet_meta_data, false, store) - .await - .unwrap(); - - assert_eq!(**meta_data.custom(), custom_meta_data); - check_parquet_meta_data(&parquet_meta_data, meta_data.parquet()); - } -} diff --git a/src/analytic_engine/src/sst/meta_data/metadata_reader.rs b/src/analytic_engine/src/sst/meta_data/metadata_reader.rs deleted file mode 100644 index 79e48a1e12..0000000000 --- a/src/analytic_engine/src/sst/meta_data/metadata_reader.rs +++ /dev/null @@ -1,148 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use async_trait::async_trait; -use macros::define_result; -use object_store::{ObjectStoreRef, Path}; -use parquet::{data_type::AsBytes, file::metadata::KeyValue}; -use snafu::{ensure, OptionExt, ResultExt}; - -use crate::sst::{ - meta_data::{ - DecodeCustomMetaData, FetchAndDecodeSstMeta, FetchFromStore, KvMetaDataNotFound, - KvMetaPathEmpty, UnknownMetaVersion, - }, - parquet::{ - encoding::{self, decode_sst_meta_data_from_bytes, META_VERSION_CURRENT, META_VERSION_V1}, - meta_data::{ParquetMetaData, ParquetMetaDataRef}, - }, -}; - -define_result!(crate::sst::meta_data::Error); - -#[async_trait] -pub trait CustomMetadataReader { - async fn get_metadata(&self) -> Result; -} - -pub struct MetaV1Reader<'a> { - custom_kv_meta: Option<&'a KeyValue>, -} - -impl<'a> MetaV1Reader<'a> { - fn new(custom_kv_meta: Option<&'a KeyValue>) -> Self { - Self { custom_kv_meta } - } -} - -#[async_trait] -impl CustomMetadataReader for MetaV1Reader<'_> { - async fn get_metadata(&self) -> Result { - let custom_kv_meta = self.custom_kv_meta.context(KvMetaDataNotFound)?; - - encoding::decode_sst_meta_data_from_kv(custom_kv_meta).context(DecodeCustomMetaData) - } -} - -pub struct MetaV2Reader { - meta_path: Option, - meta_size: Option, - store: ObjectStoreRef, -} - -impl MetaV2Reader { - fn new(meta_path: Option, meta_size: Option, store: ObjectStoreRef) -> Self { - Self { - meta_path, - meta_size, - store, - } - } -} - -#[async_trait] -impl CustomMetadataReader for MetaV2Reader { - async fn get_metadata(&self) -> Result { - match &self.meta_path { - None => KvMetaPathEmpty {}.fail(), - Some(meta_path) => { - // TODO: The disk cache only works for `get_range` now, so here - // We prefer to use `get_range` to fetch metadata when possible. - // A better way is to fix https://github.com/apache/incubator-horaedb/issues/1473. - let metadata = match self.meta_size { - Some(size) => { - let all_range = 0..size; - self.store - .get_range(meta_path, all_range) - .await - .with_context(|| FetchFromStore { - file_path: meta_path.to_string(), - })? - } - None => self - .store - .get(meta_path) - .await - .with_context(|| FetchFromStore { - file_path: meta_path.to_string(), - })? - .bytes() - .await - .with_context(|| FetchAndDecodeSstMeta { - file_path: meta_path.to_string(), - })?, - }; - - decode_sst_meta_data_from_bytes(metadata.as_bytes()).context(DecodeCustomMetaData) - } - } - } -} - -pub async fn parse_metadata( - meta_version: &str, - custom_kv_meta: Option<&KeyValue>, - ignore_sst_filter: bool, - meta_path: Option, - meta_size: Option, - store: ObjectStoreRef, -) -> Result { - // Must ensure custom metadata only store in one place - ensure!( - custom_kv_meta.is_none() || meta_path.is_none(), - KvMetaDataNotFound - ); - - let reader: Box = match meta_version { - META_VERSION_V1 => Box::new(MetaV1Reader::new(custom_kv_meta)), - META_VERSION_CURRENT => Box::new(MetaV2Reader::new(meta_path, meta_size, store)), - _ => { - return UnknownMetaVersion { - version: meta_version, - } - .fail() - } - }; - let mut metadata = reader.get_metadata().await?; - if ignore_sst_filter { - metadata.parquet_filter = None; - } - - Ok(Arc::new(metadata)) -} diff --git a/src/analytic_engine/src/sst/meta_data/mod.rs b/src/analytic_engine/src/sst/meta_data/mod.rs deleted file mode 100644 index 5f55d2bbd2..0000000000 --- a/src/analytic_engine/src/sst/meta_data/mod.rs +++ /dev/null @@ -1,222 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod cache; -mod metadata_reader; - -use std::{str::Utf8Error, sync::Arc}; - -use common_types::{schema::Schema, time::TimeRange, SequenceNumber}; -use horaedbproto::sst as sst_pb; -use macros::define_result; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::table::TableId; - -use crate::{ - space::SpaceId, - sst::{ - factory, - factory::{FactoryRef, ObjectStorePickerRef, SstReadHint, SstReadOptions}, - file::FileHandle, - parquet::{ - self, encoding, - meta_data::{ParquetMetaData, ParquetMetaDataRef}, - }, - reader, - writer::MetaData, - }, - table::sst_util, -}; - -/// Error of sst file. -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Key value metadata in parquet is not found.\nBacktrace\n:{}", - backtrace - ))] - KvMetaDataNotFound { backtrace: Backtrace }, - - #[snafu(display( - "Key value meta version in parquet is empty\nBacktrace\n:{}", - backtrace - ))] - KvMetaVersionEmpty { backtrace: Backtrace }, - - #[snafu(display("Key value meta path in parquet is empty\nBacktrace\n:{}", backtrace))] - KvMetaPathEmpty { backtrace: Backtrace }, - - #[snafu(display("Unknown meta version, value:{}.\nBacktrace\n:{}", version, backtrace))] - UnknownMetaVersion { - version: String, - backtrace: Backtrace, - }, - - #[snafu(display("Metadata in proto struct is not found.\nBacktrace\n:{}", backtrace))] - MetaDataNotFound { backtrace: Backtrace }, - - #[snafu(display("Failed to decode custom metadata in parquet, err:{}", source))] - DecodeCustomMetaData { source: encoding::Error }, - - #[snafu(display("Failed to create sst reader, err:{}", source))] - CreateSstReader { source: factory::Error }, - - #[snafu(display("Failed to read meta data from reader, err:{}", source))] - ReadMetaData { source: reader::Error }, - - #[snafu(display("Failed to convert parquet meta data, err:{}", source))] - ConvertParquetMetaData { source: parquet::meta_data::Error }, - - #[snafu(display( - "Failed to decode sst meta data, file_path:{file_path}, err:{source}.\nBacktrace:\n{backtrace:?}", - ))] - FetchAndDecodeSstMeta { - file_path: String, - source: object_store::ObjectStoreError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode sst meta data, file_path:{file_path}, err:{source}.\nBacktrace:\n{backtrace:?}", - ))] - FetchFromStore { - file_path: String, - source: object_store::ObjectStoreError, - backtrace: Backtrace, - }, - - #[snafu(display("Meet a object store error, err:{source}\nBacktrace:\n{backtrace}"))] - Utf8ErrorWrapper { - source: Utf8Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Parse meta size failed, size:{size}, source:{source}.\nBacktrace:\n{backtrace}" - ))] - InvalidSize { - size: String, - source: std::num::ParseIntError, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -#[derive(Debug, Clone, PartialEq)] -pub enum SstMetaData { - Parquet(ParquetMetaDataRef), -} - -impl SstMetaData { - #[inline] - pub fn schema(&self) -> &Schema { - match self { - Self::Parquet(v) => &v.schema, - } - } - - #[inline] - pub fn time_range(&self) -> TimeRange { - match self { - Self::Parquet(v) => v.time_range, - } - } - - #[inline] - pub fn max_sequence(&self) -> SequenceNumber { - match self { - Self::Parquet(v) => v.max_sequence, - } - } - - #[inline] - pub fn as_parquet(&self) -> Option { - match self { - Self::Parquet(v) => Some(v.clone()), - } - } -} - -impl From for sst_pb::SstMetaData { - fn from(src: SstMetaData) -> Self { - match src { - SstMetaData::Parquet(meta_data) => { - let meta_data = sst_pb::ParquetMetaData::from(meta_data.as_ref().to_owned()); - sst_pb::SstMetaData { - meta_data: Some(sst_pb::sst_meta_data::MetaData::Parquet(meta_data)), - } - } - } - } -} - -impl TryFrom for SstMetaData { - type Error = Error; - - fn try_from(src: sst_pb::SstMetaData) -> Result { - let meta_data = src.meta_data.context(MetaDataNotFound)?; - match meta_data { - sst_pb::sst_meta_data::MetaData::Parquet(meta_data) => { - let parquet_meta_data = - ParquetMetaData::try_from(meta_data).context(ConvertParquetMetaData)?; - - Ok(Self::Parquet(Arc::new(parquet_meta_data))) - } - } - } -} - -impl From for MetaData { - fn from(meta: SstMetaData) -> Self { - match meta { - SstMetaData::Parquet(v) => Self::from(v.as_ref().clone()), - } - } -} - -/// A utility reader to fetch meta data of multiple sst files. -pub struct SstMetaReader { - pub space_id: SpaceId, - pub table_id: TableId, - pub factory: FactoryRef, - pub read_opts: SstReadOptions, - pub store_picker: ObjectStorePickerRef, -} - -impl SstMetaReader { - /// Fetch meta data of the `files` from object store. - pub async fn fetch_metas(&self, files: &[FileHandle]) -> Result> { - let mut sst_metas = Vec::with_capacity(files.len()); - for f in files { - let path = sst_util::new_sst_file_path(self.space_id, self.table_id, f.id()); - let read_hint = SstReadHint { - file_size: Some(f.size() as usize), - file_format: Some(f.storage_format()), - }; - let mut reader = self - .factory - .create_reader(&path, &self.read_opts, read_hint, &self.store_picker, None) - .await - .context(CreateSstReader)?; - let meta_data = reader.meta_data().await.context(ReadMetaData)?; - sst_metas.push(meta_data.clone()); - } - - Ok(sst_metas) - } -} diff --git a/src/analytic_engine/src/sst/metrics.rs b/src/analytic_engine/src/sst/metrics.rs deleted file mode 100644 index f2eff571f7..0000000000 --- a/src/analytic_engine/src/sst/metrics.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::atomic::{AtomicU64, Ordering}; - -use lazy_static::lazy_static; -use prometheus::{ - exponential_buckets, register_counter, register_histogram, register_histogram_vec, - register_int_counter_vec, Counter, Histogram, HistogramVec, IntCounter, IntCounterVec, -}; - -lazy_static! { - // Histogram: - // Buckets: 100B,200B,400B,...,2KB - pub static ref SST_GET_RANGE_HISTOGRAM: Histogram = register_histogram!( - "sst_get_range_length", - "Histogram for sst get range length", - exponential_buckets(100.0, 2.0, 5).unwrap() - ).unwrap(); - - pub static ref META_DATA_CACHE_HIT_COUNTER: Counter = register_counter!( - "META_DATA_CACHE_HIT", - "The counter for meta data cache hit" - ).unwrap(); - - pub static ref META_DATA_CACHE_MISS_COUNTER: Counter = register_counter!( - "META_DATA_CACHE_MISS", - "The counter for meta data cache miss" - ).unwrap(); - - static ref ROW_GROUP_BEFORE_PRUNE_COUNTER: IntCounterVec = register_int_counter_vec!( - "row_group_before_prune", - "The counter for row group before prune", - &["table"] - ).unwrap(); - - static ref ROW_GROUP_AFTER_PRUNE_COUNTER: IntCounterVec = register_int_counter_vec!( - "row_group_after_prune", - "The counter for row group after prune", - &["table"] - ).unwrap(); - - pub static ref FETCHED_SST_BYTES_HISTOGRAM: HistogramVec = register_histogram_vec!( - "fetched_sst_bytes", - "Histogram for sst get range length", - &["shard_id", "table"], - // The buckets: [1MB, 2MB, 4MB, 8MB, ... , 8GB] - exponential_buckets(1024.0 * 1024.0, 2.0, 13).unwrap() - ).unwrap(); -} - -#[derive(Debug)] -pub struct MaybeTableLevelMetrics { - pub row_group_before_prune_counter: IntCounter, - pub row_group_after_prune_counter: IntCounter, - pub num_fetched_sst_bytes_hist: Histogram, - pub num_fetched_sst_bytes: AtomicU64, -} - -impl MaybeTableLevelMetrics { - pub fn new(table: &str, shard_id_label: &str) -> Self { - Self { - row_group_before_prune_counter: ROW_GROUP_BEFORE_PRUNE_COUNTER - .with_label_values(&[table]), - row_group_after_prune_counter: ROW_GROUP_AFTER_PRUNE_COUNTER - .with_label_values(&[table]), - num_fetched_sst_bytes_hist: FETCHED_SST_BYTES_HISTOGRAM - .with_label_values(&[&shard_id_label, table]), - num_fetched_sst_bytes: AtomicU64::new(0), - } - } - - #[inline] - pub fn maybe_observe_num_fetched_sst_bytes(&self) { - let num_fetched_sst_bytes = self.num_fetched_sst_bytes.load(Ordering::Relaxed); - if num_fetched_sst_bytes != 0 { - self.num_fetched_sst_bytes_hist - .observe(num_fetched_sst_bytes as f64); - } - } -} - -impl Drop for MaybeTableLevelMetrics { - fn drop(&mut self) { - self.maybe_observe_num_fetched_sst_bytes(); - } -} diff --git a/src/analytic_engine/src/sst/mod.rs b/src/analytic_engine/src/sst/mod.rs deleted file mode 100644 index 64603aed62..0000000000 --- a/src/analytic_engine/src/sst/mod.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! SST (Sorted String Table) file - -pub mod factory; -pub mod file; -pub mod header; -pub mod manager; -pub mod meta_data; -pub mod metrics; -pub mod parquet; -pub mod reader; -pub mod writer; diff --git a/src/analytic_engine/src/sst/parquet/async_reader.rs b/src/analytic_engine/src/sst/parquet/async_reader.rs deleted file mode 100644 index 318926d169..0000000000 --- a/src/analytic_engine/src/sst/parquet/async_reader.rs +++ /dev/null @@ -1,922 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Sst reader implementation based on parquet. - -use std::{ - ops::Range, - pin::Pin, - sync::{atomic::Ordering, Arc}, - task::{Context, Poll}, - time::{Duration, Instant}, -}; - -use arrow::{datatypes::SchemaRef, record_batch::RecordBatch as ArrowRecordBatch}; -use async_trait::async_trait; -use bytes_ext::Bytes; -use common_types::{ - projected_schema::{RowProjector, RowProjectorBuilder}, - record_batch::FetchedRecordBatch, -}; -use datafusion::{ - common::ToDFSchema, - datasource::physical_plan::{parquet::page_filter::PagePruningPredicate, ParquetFileMetrics}, - physical_expr::{create_physical_expr, execution_props::ExecutionProps}, - physical_plan::metrics::ExecutionPlanMetricsSet, -}; -use futures::{Stream, StreamExt}; -use generic_error::{BoxError, GenericResult}; -use logger::{debug, error, warn}; -use object_store::{ObjectStoreRef, Path}; -use parquet::{ - arrow::{arrow_reader::RowSelection, ParquetRecordBatchStreamBuilder, ProjectionMask}, - file::metadata::RowGroupMetaData, -}; -use parquet_ext::{ - meta_data::ChunkReader, - reader::{MetricsObserver, ObjectStoreReader}, -}; -use runtime::{AbortOnDropMany, JoinHandle, Runtime}; -use snafu::ResultExt; -use table_engine::predicate::PredicateRef; -use time_ext::InstantExt; -use tokio::sync::{ - mpsc::{self, Receiver, Sender}, - watch, -}; -use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; - -use crate::{ - prefetchable_stream::{NoopPrefetcher, PrefetchableStream}, - sst::{ - factory::{ObjectStorePickerRef, ReadFrequency, SstReadOptions}, - meta_data::{ - cache::{MetaCacheRef, MetaData}, - SstMetaData, - }, - metrics::MaybeTableLevelMetrics, - parquet::{ - encoding::ParquetDecoder, - meta_data::{filter::ParquetFilter, ColumnValueSet}, - row_group_pruner::RowGroupPruner, - }, - reader::{error::*, Result, SstReader}, - }, -}; - -const PRUNE_ROW_GROUPS_METRICS_COLLECTOR_NAME: &str = "prune_row_groups"; -type SendableRecordBatchStream = Pin> + Send>>; -type FetchedRecordBatchStream = Box> + Send + Unpin>; - -pub struct Reader<'a> { - /// The path where the data is persisted. - path: &'a Path, - /// The storage where the data is persist. - store: &'a ObjectStoreRef, - /// The hint for the sst file size. - file_size_hint: Option, - num_rows_per_row_group: usize, - meta_cache: Option, - predicate: PredicateRef, - /// Current frequency decides the cache policy. - frequency: ReadFrequency, - /// Init those fields in `init_if_necessary` - meta_data: Option, - - row_projector_builder: RowProjectorBuilder, - row_projector: Option, - - /// Options for `read_parallelly` - metrics: Metrics, - df_plan_metrics: ExecutionPlanMetricsSet, - - table_level_sst_metrics: Option>, -} - -#[derive(Default, Debug, Clone, TraceMetricWhenDrop)] -pub(crate) struct Metrics { - #[metric(boolean)] - pub meta_data_cache_hit: bool, - #[metric(duration)] - pub read_meta_data_duration: Duration, - #[metric(number)] - pub parallelism: usize, - #[metric(collector)] - pub metrics_collector: Option, -} - -impl<'a> Reader<'a> { - pub fn new( - path: &'a Path, - options: &SstReadOptions, - file_size_hint: Option, - store_picker: &'a ObjectStorePickerRef, - metrics_collector: Option, - ) -> Self { - let store = store_picker.pick_by_freq(options.frequency); - let df_plan_metrics = ExecutionPlanMetricsSet::new(); - let metrics = Metrics { - metrics_collector, - ..Default::default() - }; - - Self { - path, - store, - file_size_hint, - num_rows_per_row_group: options.num_rows_per_row_group, - meta_cache: options.meta_cache.clone(), - predicate: options.predicate.clone(), - frequency: options.frequency, - meta_data: None, - row_projector_builder: options.row_projector_builder.clone(), - row_projector: None, - metrics, - df_plan_metrics, - table_level_sst_metrics: options.maybe_table_level_metrics.clone(), - } - } - - async fn maybe_read_parallelly( - &mut self, - read_parallelism: usize, - ) -> Result> { - assert!(read_parallelism > 0); - - self.init_if_necessary().await?; - let streams = self.fetch_record_batch_streams(read_parallelism).await?; - if streams.is_empty() { - return Ok(Vec::new()); - } - - let row_projector = self.row_projector.take().unwrap(); - let streams: Vec<_> = streams - .into_iter() - .map(|stream| { - Box::new(RecordBatchProjector::new( - stream, - row_projector.clone(), - self.metrics.metrics_collector.clone(), - )) as _ - }) - .collect(); - - Ok(streams) - } - - fn prune_row_groups( - &self, - schema: SchemaRef, - row_groups: &[RowGroupMetaData], - parquet_filter: Option<&ParquetFilter>, - column_values: Option<&Vec>>, - ) -> Result> { - let metrics_collector = self - .metrics - .metrics_collector - .as_ref() - .map(|v| v.span(PRUNE_ROW_GROUPS_METRICS_COLLECTOR_NAME.to_string())); - let mut pruner = RowGroupPruner::try_new( - &schema, - row_groups, - parquet_filter, - self.predicate.exprs(), - metrics_collector, - column_values, - )?; - - Ok(pruner.prune()) - } - - /// The final parallelism is ensured in the range: [1, num_row_groups]. - #[inline] - fn decide_read_parallelism(suggested: usize, num_row_groups: usize) -> usize { - suggested.min(num_row_groups).max(1) - } - - fn build_row_selection( - &self, - arrow_schema: SchemaRef, - row_groups: &[usize], - file_metadata: &parquet_ext::ParquetMetaData, - ) -> Result> { - // TODO: remove fixed partition - let partition = 0; - let exprs = datafusion::optimizer::utils::conjunction(self.predicate.exprs().to_vec()); - let exprs = match exprs { - Some(exprs) => exprs, - None => return Ok(None), - }; - - let df_schema = arrow_schema - .clone() - .to_dfschema() - .context(DataFusionError)?; - let physical_expr = - create_physical_expr(&exprs, &df_schema, &arrow_schema, &ExecutionProps::new()) - .context(DataFusionError)?; - let page_predicate = PagePruningPredicate::try_new(&physical_expr, arrow_schema.clone()) - .context(DataFusionError)?; - - let metrics = ParquetFileMetrics::new(partition, self.path.as_ref(), &self.df_plan_metrics); - page_predicate - .prune(row_groups, file_metadata, &metrics) - .context(DataFusionError) - } - - // TODO: remove it and use the suggested api. - async fn fetch_record_batch_streams( - &mut self, - suggested_parallelism: usize, - ) -> Result> { - assert!(self.meta_data.is_some()); - - let meta_data = self.meta_data.as_ref().unwrap(); - let row_projector = self.row_projector.as_ref().unwrap(); - let arrow_schema = meta_data.custom().schema.to_arrow_schema_ref(); - // Get target row groups. - let target_row_groups = { - let custom = meta_data.custom(); - - self.prune_row_groups( - arrow_schema.clone(), - meta_data.parquet().row_groups(), - custom.parquet_filter.as_ref(), - custom.column_values.as_ref(), - )? - }; - - let num_row_group_before_prune = meta_data.parquet().num_row_groups(); - let num_row_group_after_prune = target_row_groups.len(); - // Maybe it is a sub table of partitioned table, try to extract its parent - // table. - if let Some(metrics) = &self.table_level_sst_metrics { - metrics - .row_group_before_prune_counter - .inc_by(num_row_group_before_prune as u64); - metrics - .row_group_after_prune_counter - .inc_by(num_row_group_after_prune as u64); - } - - debug!( - "Reader fetch record batches, path:{}, row_groups total:{num_row_group_before_prune}, after prune:{num_row_group_after_prune}", - self.path - ); - - if target_row_groups.is_empty() { - return Ok(Vec::new()); - } - - // Partition the batches by `read_parallelism`. - let parallelism = - Self::decide_read_parallelism(suggested_parallelism, target_row_groups.len()); - - // TODO: we only support read parallelly when `batch_size` == - // `num_rows_per_row_group`, so this placing method is ok, we should - // adjust it when supporting it other situations. - let chunks_num = parallelism; - let chunk_size = target_row_groups.len() / parallelism; - self.metrics.parallelism = parallelism; - - let mut target_row_group_chunks = vec![Vec::with_capacity(chunk_size); chunks_num]; - for (row_group_idx, row_group) in target_row_groups.into_iter().enumerate() { - let chunk_idx = row_group_idx % chunks_num; - target_row_group_chunks[chunk_idx].push(row_group); - } - - let parquet_metadata = meta_data.parquet(); - let proj_mask = ProjectionMask::leaves( - meta_data.parquet().file_metadata().schema_descr(), - row_projector.existed_source_projection().iter().copied(), - ); - debug!( - "Reader fetch record batches, parallelism suggest:{}, real:{}, chunk_size:{}, project:{:?}", - suggested_parallelism, parallelism, chunk_size, proj_mask - ); - - let mut streams = Vec::with_capacity(target_row_group_chunks.len()); - let metrics_collector = ObjectStoreMetricsObserver { - table_level_sst_metrics: self.table_level_sst_metrics.clone(), - }; - for chunk in target_row_group_chunks { - let object_store_reader = ObjectStoreReader::with_metrics( - self.store.clone(), - self.path.clone(), - parquet_metadata.clone(), - metrics_collector.clone(), - ); - let mut builder = ParquetRecordBatchStreamBuilder::new(object_store_reader) - .await - .with_context(|| ParquetError)?; - - let row_selection = - self.build_row_selection(arrow_schema.clone(), &chunk, parquet_metadata)?; - - debug!( - "Build row selection for file path:{}, result:{row_selection:?}, page indexes:{}", - self.path, - parquet_metadata.column_index().is_some() - ); - if let Some(selection) = row_selection { - builder = builder.with_row_selection(selection); - }; - - let stream = builder - .with_batch_size(self.num_rows_per_row_group) - .with_row_groups(chunk) - .with_projection(proj_mask.clone()) - .build() - .with_context(|| ParquetError)? - .map(|batch| batch.with_context(|| ParquetError)); - - streams.push(Box::pin(stream) as _); - } - - Ok(streams) - } - - async fn init_if_necessary(&mut self) -> Result<()> { - if self.meta_data.is_some() { - return Ok(()); - } - - let meta_data = { - let start = Instant::now(); - let meta_data = self.read_sst_meta().await?; - self.metrics.read_meta_data_duration = start.elapsed(); - meta_data - }; - - let row_projector = self - .row_projector_builder - .build(&meta_data.custom().schema) - .box_err() - .context(Projection)?; - - self.meta_data = Some(meta_data); - self.row_projector = Some(row_projector); - - Ok(()) - } - - async fn load_file_size(&self) -> Result { - let file_size = match self.file_size_hint { - Some(v) => v, - None => { - let object_meta = self.store.head(self.path).await.context(ObjectStoreError)?; - object_meta.size - } - }; - - Ok(file_size) - } - - async fn load_meta_data_from_storage(&self, ignore_sst_filter: bool) -> Result { - let file_size = self.load_file_size().await?; - let chunk_reader_adapter = ChunkReaderAdapter::new(self.path, self.store); - - let (parquet_meta_data, _) = - parquet_ext::meta_data::fetch_parquet_metadata(file_size, &chunk_reader_adapter) - .await - .with_context(|| FetchAndDecodeSstMeta { - file_path: self.path.to_string(), - })?; - - // TODO: Support page index until https://github.com/apache/incubator-horaedb/issues/1040 is fixed. - let mut parquet_meta_data = Arc::new(parquet_meta_data); - let object_store_reader = parquet_ext::reader::ObjectStoreReader::new( - self.store.clone(), - self.path.clone(), - parquet_meta_data.clone(), - ); - - if let Ok(meta_data) = parquet_ext::meta_data::meta_with_page_indexes(object_store_reader) - .await - .map_err(|e| { - // When loading page indexes failed, we just log the error and continue querying - // TODO: Fix this in stream. https://github.com/apache/incubator-horaedb/issues/1040 - warn!( - "Fail to load page indexes, path:{}, err:{:?}.", - self.path, e - ); - e - }) - { - parquet_meta_data = meta_data; - } - - MetaData::try_new(&parquet_meta_data, ignore_sst_filter, self.store.clone()) - .await - .box_err() - .context(DecodeSstMeta) - } - - fn need_update_cache(&self) -> bool { - match self.frequency { - ReadFrequency::Once => false, - ReadFrequency::Frequent => true, - } - } - - async fn read_sst_meta(&mut self) -> Result { - if let Some(cache) = &self.meta_cache { - if let Some(meta_data) = cache.get(self.path.as_ref()) { - self.metrics.meta_data_cache_hit = true; - return Ok(meta_data); - } - } - - // The metadata can't be found in the cache, and let's fetch it from the - // storage. - let avoid_update_cache = !self.need_update_cache(); - let empty_predicate = self.predicate.exprs().is_empty(); - - let meta_data = { - let ignore_sst_filter = avoid_update_cache && empty_predicate; - self.load_meta_data_from_storage(ignore_sst_filter).await? - }; - - if avoid_update_cache || self.meta_cache.is_none() { - return Ok(meta_data); - } - - // Update the cache. - self.meta_cache - .as_ref() - .unwrap() - .put(self.path.to_string(), meta_data.clone()); - - Ok(meta_data) - } - - #[cfg(test)] - pub(crate) async fn row_groups(&mut self) -> Vec { - let meta_data = self.read_sst_meta().await.unwrap(); - meta_data.parquet().row_groups().to_vec() - } -} - -impl<'a> Drop for Reader<'a> { - fn drop(&mut self) { - debug!( - "Parquet reader dropped, path:{:?}, df_plan_metrics:{}", - self.path, - self.df_plan_metrics.clone_inner().to_string() - ); - } -} - -pub struct ChunkReaderAdapter<'a> { - path: &'a Path, - store: &'a ObjectStoreRef, -} - -impl<'a> ChunkReaderAdapter<'a> { - pub fn new(path: &'a Path, store: &'a ObjectStoreRef) -> Self { - Self { path, store } - } -} - -#[async_trait] -impl<'a> ChunkReader for ChunkReaderAdapter<'a> { - async fn get_bytes(&self, range: Range) -> GenericResult { - self.store.get_range(self.path, range).await.box_err() - } -} - -#[derive(Default, Debug, Clone, TraceMetricWhenDrop)] -pub(crate) struct ProjectorMetrics { - #[metric(number, sum)] - pub row_num: usize, - #[metric(number, sum)] - pub row_mem: usize, - #[metric(duration, sum)] - pub project_record_batch: Duration, - #[metric(collector)] - pub metrics_collector: Option, -} - -struct RecordBatchProjector { - stream: SendableRecordBatchStream, - row_projector: RowProjector, - - metrics: ProjectorMetrics, - start_time: Instant, -} - -impl RecordBatchProjector { - fn new( - stream: SendableRecordBatchStream, - row_projector: RowProjector, - metrics_collector: Option, - ) -> Self { - let metrics = ProjectorMetrics { - metrics_collector, - ..Default::default() - }; - - Self { - stream, - row_projector, - metrics, - start_time: Instant::now(), - } - } -} - -impl Stream for RecordBatchProjector { - type Item = Result; - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let projector = self.get_mut(); - - match projector.stream.poll_next_unpin(cx) { - Poll::Ready(Some(record_batch)) => { - match record_batch.box_err().context(DecodeRecordBatch {}) { - Err(e) => Poll::Ready(Some(Err(e))), - Ok(record_batch) => { - let parquet_decoder = ParquetDecoder::new(); - let record_batch = parquet_decoder - .decode_record_batch(record_batch) - .box_err() - .context(DecodeRecordBatch)?; - - for col in record_batch.columns() { - projector.metrics.row_mem += col.get_array_memory_size(); - } - projector.metrics.row_num += record_batch.num_rows(); - - let fetched_schema = projector.row_projector.fetched_schema().clone(); - let primary_key_indexes = projector - .row_projector - .primary_key_indexes() - .map(|idxs| idxs.to_vec()); - let fetching_column_indexes = - projector.row_projector.target_record_projection_remapping(); - let projected_batch = FetchedRecordBatch::try_new( - fetched_schema, - primary_key_indexes, - fetching_column_indexes, - record_batch, - ) - .box_err() - .context(DecodeRecordBatch {}); - - Poll::Ready(Some(projected_batch)) - } - } - } - Poll::Pending => Poll::Pending, - Poll::Ready(None) => { - projector.metrics.project_record_batch += projector.start_time.saturating_elapsed(); - Poll::Ready(None) - } - } - } - - fn size_hint(&self) -> (usize, Option) { - self.stream.size_hint() - } -} - -#[async_trait] -impl<'a> SstReader for Reader<'a> { - async fn meta_data(&mut self) -> Result { - self.init_if_necessary().await?; - - Ok(SstMetaData::Parquet( - self.meta_data.as_ref().unwrap().custom().clone(), - )) - } - - async fn read( - &mut self, - ) -> Result>>> { - let mut streams = self.maybe_read_parallelly(1).await?; - assert_eq!(streams.len(), 1); - let stream = streams.pop().expect("impossible to fetch no stream"); - - Ok(Box::new(NoopPrefetcher(stream))) - } -} - -struct RecordBatchReceiver { - bg_prefetch_tx: Option>, - rx_group: Vec>>, - cur_rx_idx: usize, - #[allow(dead_code)] - drop_helper: AbortOnDropMany<()>, -} - -#[async_trait] -impl PrefetchableStream for RecordBatchReceiver { - type Item = Result; - - async fn start_prefetch(&mut self) { - // Start the prefetch work in background when first poll is called. - if let Some(tx) = self.bg_prefetch_tx.take() { - if tx.send(()).is_err() { - error!("The receiver for start prefetched has been closed"); - } - } - } - - async fn fetch_next(&mut self) -> Option { - self.next().await - } -} - -impl Stream for RecordBatchReceiver { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - if self.rx_group.is_empty() { - return Poll::Ready(None); - } - - // Start the prefetch work in background when first poll is called. - if let Some(tx) = self.bg_prefetch_tx.take() { - if tx.send(()).is_err() { - error!("The receiver for start prefetched has been closed"); - } - } - - let cur_rx_idx = self.cur_rx_idx; - // `cur_rx_idx` is impossible to be out-of-range, because it is got by round - // robin. - let rx_group_len = self.rx_group.len(); - let cur_rx = self.rx_group.get_mut(cur_rx_idx).unwrap_or_else(|| { - panic!( - "cur_rx_idx is impossible to be out-of-range, cur_rx_idx:{cur_rx_idx}, rx_group len:{rx_group_len}" - ) - }); - let poll_result = cur_rx.poll_recv(cx); - - match poll_result { - Poll::Ready(result) => { - // If found `Poll::Pending`, we need to keep polling current rx - // until found `Poll::Ready` for ensuring the order of record batches, - // because batches are placed into each stream by round robin: - // +------+ +------+ +------+ - // | 1 | | 2 | | 3 | - // +------+ +------+ +------+ - // | 4 | | 5 | | 6 | - // +------+ +------+ +------+ - // | ... | | ... | | ... | - // +------+ +------+ +------+ - self.cur_rx_idx = (self.cur_rx_idx + 1) % self.rx_group.len(); - Poll::Ready(result) - } - Poll::Pending => Poll::Pending, - } - } - - fn size_hint(&self) -> (usize, Option) { - (0, None) - } -} - -/// Spawn a new thread to read record_batches -pub struct ThreadedReader<'a> { - inner: Reader<'a>, - runtime: Arc, - - channel_cap: usize, - read_parallelism: usize, -} - -impl<'a> ThreadedReader<'a> { - pub fn new( - reader: Reader<'a>, - runtime: Arc, - read_parallelism: usize, - channel_cap: usize, - ) -> Self { - assert!( - read_parallelism > 0, - "read parallelism must be greater than 0" - ); - - Self { - inner: reader, - runtime, - channel_cap, - read_parallelism, - } - } - - fn read_record_batches_from_sub_reader( - &mut self, - mut reader: Box> + Send + Unpin>, - tx: Sender>, - mut rx: watch::Receiver<()>, - ) -> JoinHandle<()> { - self.runtime.spawn(async move { - // Wait for the notification to start the bg prefetch work. - if rx.changed().await.is_err() { - error!("The prefetch notifier has been closed, exit the prefetch work"); - return; - } - - while let Some(batch) = reader.next().await { - if let Err(e) = tx.send(batch).await { - error!("fail to send the fetched record batch result, err:{}", e); - } - } - }) - } -} - -#[async_trait] -impl<'a> SstReader for ThreadedReader<'a> { - async fn meta_data(&mut self) -> Result { - self.inner.meta_data().await - } - - async fn read( - &mut self, - ) -> Result>>> { - // Get underlying sst readers and channels. - let sub_readers = self - .inner - .maybe_read_parallelly(self.read_parallelism) - .await?; - if sub_readers.is_empty() { - return Ok(Box::new(RecordBatchReceiver { - bg_prefetch_tx: None, - rx_group: Vec::new(), - cur_rx_idx: 0, - drop_helper: AbortOnDropMany(Vec::new()), - }) as _); - } - - let read_parallelism = sub_readers.len(); - debug!( - "ThreadedReader read, suggest read_parallelism:{}, actual:{}", - self.read_parallelism, read_parallelism - ); - - let channel_cap_per_sub_reader = self.channel_cap / sub_readers.len(); - let channel_cap_per_sub_reader = channel_cap_per_sub_reader.max(1); - let (tx_group, rx_group): (Vec<_>, Vec<_>) = (0..read_parallelism) - .map(|_| mpsc::channel::>(channel_cap_per_sub_reader)) - .unzip(); - - let (bg_prefetch_tx, bg_prefetch_rx) = watch::channel(()); - // Start the background readings. - let mut handles = Vec::with_capacity(sub_readers.len()); - for (sub_reader, tx) in sub_readers.into_iter().zip(tx_group.into_iter()) { - let bg_prefetch_handle = - self.read_record_batches_from_sub_reader(sub_reader, tx, bg_prefetch_rx.clone()); - handles.push(bg_prefetch_handle); - } - - Ok(Box::new(RecordBatchReceiver { - bg_prefetch_tx: Some(bg_prefetch_tx), - rx_group, - cur_rx_idx: 0, - drop_helper: AbortOnDropMany(handles), - }) as _) - } -} - -#[derive(Clone)] -struct ObjectStoreMetricsObserver { - table_level_sst_metrics: Option>, -} - -impl MetricsObserver for ObjectStoreMetricsObserver { - fn elapsed(&self, path: &Path, elapsed: Duration) { - debug!("ObjectStoreReader dropped, path:{path}, elapsed:{elapsed:?}"); - } - - fn num_bytes_fetched(&self, _: &Path, num_bytes: usize) { - if let Some(metrics) = &self.table_level_sst_metrics { - metrics - .num_fetched_sst_bytes - .fetch_add(num_bytes as u64, Ordering::Relaxed); - } - } -} - -#[cfg(test)] -mod tests { - use std::{ - pin::Pin, - task::{Context, Poll}, - time::Duration, - }; - - use futures::{Stream, StreamExt}; - use tokio::sync::mpsc::{self, Receiver, Sender}; - - struct MockReceivers { - rx_group: Vec>, - cur_rx_idx: usize, - } - - impl Stream for MockReceivers { - type Item = u32; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let cur_rx_idx = self.cur_rx_idx; - // `cur_rx_idx` is impossible to be out-of-range, because it is got by round - // robin. - let cur_rx = self.rx_group.get_mut(cur_rx_idx).unwrap(); - let poll_result = cur_rx.poll_recv(cx); - - match poll_result { - Poll::Ready(result) => { - self.cur_rx_idx = (self.cur_rx_idx + 1) % self.rx_group.len(); - Poll::Ready(result) - } - Poll::Pending => Poll::Pending, - } - } - - fn size_hint(&self) -> (usize, Option) { - (0, None) - } - } - - struct MockRandomSenders { - tx_group: Vec>, - test_datas: Vec>, - } - - impl MockRandomSenders { - fn start_to_send(&mut self) { - while let Some(tx) = self.tx_group.pop() { - let test_data = self.test_datas.pop().unwrap(); - tokio::spawn(async move { - for datum in test_data { - let random_millis = rand::random::() % 30; - tokio::time::sleep(Duration::from_millis(random_millis)).await; - tx.send(datum).await.unwrap(); - } - }); - } - } - } - - fn gen_test_data(amount: usize) -> Vec { - (0..amount).map(|_| rand::random::()).collect() - } - - // We mock a thread model same as the one in `ThreadedReader` to check its - // validity. - // TODO: we should make the `ThreadedReader` mockable and refactor this test - // using it. - #[tokio::test(flavor = "multi_thread", worker_threads = 4)] - async fn test_simulated_threaded_reader() { - let test_data = gen_test_data(123); - let expected = test_data.clone(); - let channel_cap_per_sub_reader = 10; - let reader_num = 5; - let (tx_group, rx_group): (Vec<_>, Vec<_>) = (0..reader_num) - .map(|_| mpsc::channel::(channel_cap_per_sub_reader)) - .unzip(); - - // Partition datas. - let chunk_len = reader_num; - let mut test_data_chunks = vec![Vec::new(); chunk_len]; - for (idx, datum) in test_data.into_iter().enumerate() { - let chunk_idx = idx % chunk_len; - test_data_chunks.get_mut(chunk_idx).unwrap().push(datum); - } - - // Start senders. - let mut mock_senders = MockRandomSenders { - tx_group, - test_datas: test_data_chunks, - }; - mock_senders.start_to_send(); - - // Poll receivers. - let mut actual = Vec::new(); - let mut mock_receivers = MockReceivers { - rx_group, - cur_rx_idx: 0, - }; - while let Some(datum) = mock_receivers.next().await { - actual.push(datum); - } - - assert_eq!(actual, expected); - } -} diff --git a/src/analytic_engine/src/sst/parquet/encoding.rs b/src/analytic_engine/src/sst/parquet/encoding.rs deleted file mode 100644 index 42f4883ce3..0000000000 --- a/src/analytic_engine/src/sst/parquet/encoding.rs +++ /dev/null @@ -1,431 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, convert::TryFrom}; - -use arrow::{compute, record_batch::RecordBatch as ArrowRecordBatch}; -use async_trait::async_trait; -use bytes::Bytes; -use bytes_ext::{BytesMut, SafeBufMut}; -use common_types::schema::{ArrowSchemaRef, Schema}; -use generic_error::{BoxError, GenericError}; -use horaedbproto::sst as sst_pb; -use macros::define_result; -use parquet::{ - arrow::AsyncArrowWriter, - basic::Compression, - file::{metadata::KeyValue, properties::WriterProperties}, - schema::types::ColumnPath, -}; -use prost::{bytes, Message}; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; -use tokio::io::AsyncWrite; - -use crate::sst::parquet::meta_data::ParquetMetaData; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Failed to encode sst meta data, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - EncodeIntoPb { - source: prost::EncodeError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode sst meta data, base64 of meta value:{}, err:{}.\nBacktrace:\n{}", - meta_value, - source, - backtrace, - ))] - DecodeFromPb { - meta_value: String, - source: prost::DecodeError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode sst meta data, bytes:{:?}, err:{}.\nBacktrace:\n{}", - bytes, - source, - backtrace, - ))] - DecodeFromBytes { - bytes: Vec, - source: prost::DecodeError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid meta key, expect:{}, given:{}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - InvalidMetaKey { - expect: String, - given: String, - backtrace: Backtrace, - }, - - #[snafu(display("Base64 meta value not found.\nBacktrace:\n{}", backtrace))] - Base64MetaValueNotFound { backtrace: Backtrace }, - - #[snafu(display( - "Invalid base64 meta value length, base64 of meta value:{}.\nBacktrace:\n{}", - meta_value, - backtrace, - ))] - InvalidBase64MetaValueLen { - meta_value: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode base64 meta value, base64 of meta value:{}, err:{}", - meta_value, - source - ))] - DecodeBase64MetaValue { - meta_value: String, - source: base64::DecodeError, - }, - - #[snafu(display( - "Invalid meta value length, base64 of meta value:{}.\nBacktrace:\n{}", - meta_value, - backtrace - ))] - InvalidMetaValueLen { - meta_value: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid meta value header, base64 of meta value:{}.\nBacktrace:\n{}", - meta_value, - backtrace - ))] - InvalidMetaValueHeader { - meta_value: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid meta value header, bytes:{:?}.\nBacktrace:\n{}", - bytes, - backtrace - ))] - InvalidMetaBytesHeader { - bytes: Vec, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to convert sst meta data from protobuf, err:{}", source))] - ConvertSstMetaData { - source: crate::sst::parquet::meta_data::Error, - }, - - #[snafu(display( - "Failed to encode record batch into sst, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - EncodeRecordBatch { - source: GenericError, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -// In v1 format, our customized meta is encoded in parquet itself, this may -// incur storage overhead since parquet KV only accept string, so we need to -// base64 our meta. -// In v2, we save meta in another independent file on object_store, its path is -// encoded in parquet KV, which is identified by `meta_path`. -pub const META_VERSION_V1: &str = "1"; -pub const META_VERSION_CURRENT: &str = "2"; -pub const META_KEY: &str = "meta"; // used in v1 -pub const META_PATH_KEY: &str = "meta_path"; // used in v2 -pub const META_SIZE_KEY: &str = "meta_size"; // used in v2 -pub const META_VERSION_KEY: &str = "meta_version"; -pub const META_VALUE_HEADER: u8 = 0; - -/// Encode the sst custom meta data into binary key value pair. -pub fn encode_sst_meta_data(meta_data: ParquetMetaData) -> Result { - let meta_data_pb = sst_pb::ParquetMetaData::from(meta_data); - - let mut buf = BytesMut::with_capacity(meta_data_pb.encoded_len() + 1); - buf.try_put_u8(META_VALUE_HEADER) - .expect("Should write header into the buffer successfully"); - - // encode the sst custom meta data into protobuf binary - meta_data_pb.encode(&mut buf).context(EncodeIntoPb)?; - Ok(buf.into()) -} - -/// Decode the sst custom meta data from the binary key value pair. -pub fn decode_sst_meta_data_from_bytes(bytes: &[u8]) -> Result { - ensure!( - bytes[0] == META_VALUE_HEADER, - InvalidMetaBytesHeader { - bytes: bytes.to_vec() - } - ); - let meta_data_pb: sst_pb::ParquetMetaData = - Message::decode(&bytes[1..]).context(DecodeFromBytes { - bytes: bytes.to_vec(), - })?; - - ParquetMetaData::try_from(meta_data_pb).context(ConvertSstMetaData) -} - -/// Decode the sst meta data from the binary key value pair. -/// Used in v1 format. -pub fn decode_sst_meta_data_from_kv(kv: &KeyValue) -> Result { - ensure!( - kv.key == META_KEY, - InvalidMetaKey { - expect: META_KEY, - given: &kv.key, - } - ); - - let meta_value = kv.value.as_ref().context(Base64MetaValueNotFound)?; - ensure!( - !meta_value.is_empty(), - InvalidBase64MetaValueLen { meta_value } - ); - - let raw_bytes = base64::decode(meta_value).context(DecodeBase64MetaValue { meta_value })?; - - decode_sst_meta_data_from_bytes(&raw_bytes) -} - -/// RecordEncoder is used for encoding ArrowBatch. -/// -/// TODO: allow pre-allocate buffer -#[async_trait] -trait RecordEncoder { - /// Encode vector of arrow batch, return encoded row number - async fn encode(&mut self, record_batches: Vec) -> Result; - - fn set_meta_data_path(&mut self, metadata_path: Option) -> Result<()>; - fn set_meta_data_size(&mut self, size: usize) -> Result<()>; - - /// Return encoded bytes - /// Note: trait method cannot receive `self`, so take a &mut self here to - /// indicate this encoder is already consumed - async fn close(&mut self) -> Result<()>; -} - -struct ColumnarRecordEncoder { - // wrap in Option so ownership can be taken out behind `&mut self` - arrow_writer: Option>, - arrow_schema: ArrowSchemaRef, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ColumnEncoding { - pub enable_dict: bool, -} - -#[derive(Debug, Clone)] -pub struct EncodeOptions { - pub num_rows_per_row_group: usize, - pub max_buffer_size: usize, - pub compression: Compression, - pub column_encodings: HashMap, -} - -impl ColumnarRecordEncoder { - fn try_new(sink: W, schema: &Schema, options: &EncodeOptions) -> Result { - let arrow_schema = schema.to_arrow_schema_ref(); - - let write_props = { - let mut builder = WriterProperties::builder() - .set_max_row_group_size(options.num_rows_per_row_group) - .set_compression(options.compression); - - for (col_name, encoding) in &options.column_encodings { - let col_path = ColumnPath::new(vec![col_name.to_string()]); - builder = builder.set_column_dictionary_enabled(col_path, encoding.enable_dict); - } - - builder.build() - }; - - let arrow_writer = AsyncArrowWriter::try_new( - sink, - arrow_schema.clone(), - options.max_buffer_size, - Some(write_props), - ) - .box_err() - .context(EncodeRecordBatch)?; - - Ok(Self { - arrow_writer: Some(arrow_writer), - arrow_schema, - }) - } -} - -#[async_trait] -impl RecordEncoder for ColumnarRecordEncoder { - async fn encode(&mut self, arrow_record_batch_vec: Vec) -> Result { - assert!(self.arrow_writer.is_some()); - - let record_batch = compute::concat_batches(&self.arrow_schema, &arrow_record_batch_vec) - .box_err() - .context(EncodeRecordBatch)?; - - self.arrow_writer - .as_mut() - .unwrap() - .write(&record_batch) - .await - .box_err() - .context(EncodeRecordBatch)?; - - Ok(record_batch.num_rows()) - } - - fn set_meta_data_path(&mut self, metadata_path: Option) -> Result<()> { - let path_kv = KeyValue { - key: META_PATH_KEY.to_string(), - value: metadata_path, - }; - let version_kv = KeyValue { - key: META_VERSION_KEY.to_string(), - value: Some(META_VERSION_CURRENT.to_string()), - }; - let writer = self.arrow_writer.as_mut().unwrap(); - writer.append_key_value_metadata(path_kv); - writer.append_key_value_metadata(version_kv); - - Ok(()) - } - - fn set_meta_data_size(&mut self, size: usize) -> Result<()> { - let size_kv = KeyValue { - key: META_SIZE_KEY.to_string(), - value: Some(size.to_string()), - }; - let writer = self.arrow_writer.as_mut().unwrap(); - writer.append_key_value_metadata(size_kv); - - Ok(()) - } - - async fn close(&mut self) -> Result<()> { - assert!(self.arrow_writer.is_some()); - - let arrow_writer = self.arrow_writer.take().unwrap(); - arrow_writer - .close() - .await - .box_err() - .context(EncodeRecordBatch)?; - - Ok(()) - } -} - -pub struct ParquetEncoder { - record_encoder: Box, -} - -impl ParquetEncoder { - pub fn try_new( - sink: W, - schema: &Schema, - options: &EncodeOptions, - ) -> Result { - Ok(ParquetEncoder { - record_encoder: Box::new(ColumnarRecordEncoder::try_new(sink, schema, options)?), - }) - } - - /// Encode the record batch with [ArrowWriter] and the encoded contents is - /// written to the buffer. - pub async fn encode_record_batches( - &mut self, - arrow_record_batches: Vec, - ) -> Result { - if arrow_record_batches.is_empty() { - return Ok(0); - } - - self.record_encoder.encode(arrow_record_batches).await - } - - pub fn set_meta_data_path(&mut self, meta_data_path: Option) -> Result<()> { - self.record_encoder.set_meta_data_path(meta_data_path) - } - - pub fn set_meta_data_size(&mut self, size: usize) -> Result<()> { - self.record_encoder.set_meta_data_size(size) - } - - pub async fn close(mut self) -> Result<()> { - self.record_encoder.close().await - } -} - -/// RecordDecoder is used for decoding ArrowRecordBatch based on -/// `schema.StorageFormat` -trait RecordDecoder { - fn decode(&self, arrow_record_batch: ArrowRecordBatch) -> Result; -} - -struct ColumnarRecordDecoder {} - -impl RecordDecoder for ColumnarRecordDecoder { - fn decode(&self, arrow_record_batch: ArrowRecordBatch) -> Result { - Ok(arrow_record_batch) - } -} - -pub struct ParquetDecoder { - record_decoder: Box, -} - -impl Default for ParquetDecoder { - fn default() -> Self { - Self::new() - } -} - -impl ParquetDecoder { - pub fn new() -> Self { - Self { - record_decoder: Box::new(ColumnarRecordDecoder {}), - } - } - - pub fn decode_record_batch( - &self, - arrow_record_batch: ArrowRecordBatch, - ) -> Result { - self.record_decoder.decode(arrow_record_batch) - } -} diff --git a/src/analytic_engine/src/sst/parquet/meta_data/filter.rs b/src/analytic_engine/src/sst/parquet/meta_data/filter.rs deleted file mode 100644 index d64ad974cc..0000000000 --- a/src/analytic_engine/src/sst/parquet/meta_data/filter.rs +++ /dev/null @@ -1,372 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// TODO: Better module name should be index. - -use std::{fmt, ops::Index}; - -use common_types::{datum::DatumKind, schema::Schema}; -use horaedbproto::sst as sst_pb; -use snafu::ResultExt; -use xorfilter::xor8::{Xor8, Xor8Builder}; - -use crate::sst::parquet::meta_data::{BuildXor8Filter, Error, ParseXor8Filter, Result}; - -// TODO: move this to sst module, and add a FilterBuild trait -/// Filter can be used to test whether an element is a member of a set. -/// False positive matches are possible if space-efficient probabilistic data -/// structure are used. -trait Filter: fmt::Debug { - fn r#type(&self) -> FilterType; - - /// Check the key is in the bitmap index. - fn contains(&self, key: &[u8]) -> bool; - - /// Serialize the bitmap index to binary array. - fn to_bytes(&self) -> Vec; - - /// Serialized size - fn size(&self) -> usize { - self.to_bytes().len() - } - - /// Deserialize the binary array to specific filter. - fn from_bytes(buf: Vec) -> Result - where - Self: Sized; -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum FilterType { - Xor8, -} - -/// Filter based on https://docs.rs/xorfilter-rs/latest/xorfilter/struct.Xor8.html -#[derive(Default)] -struct Xor8Filter { - xor8: Xor8, -} - -impl fmt::Debug for Xor8Filter { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("XorFilter") - } -} - -impl Filter for Xor8Filter { - fn r#type(&self) -> FilterType { - FilterType::Xor8 - } - - fn contains(&self, key: &[u8]) -> bool { - self.xor8.contains(key) - } - - fn to_bytes(&self) -> Vec { - self.xor8.to_bytes() - } - - fn from_bytes(buf: Vec) -> Result - where - Self: Sized, - { - Xor8::from_bytes(buf) - .context(ParseXor8Filter) - .map(|xor8| Self { xor8 }) - } -} - -pub struct RowGroupFilterBuilder { - builders: Vec>, -} - -impl RowGroupFilterBuilder { - pub(crate) fn new(schema: &Schema) -> Self { - let builders = schema - .columns() - .iter() - .enumerate() - .map(|(i, col)| { - // No need to create filter index over the timestamp column. - if schema.timestamp_index() == i { - return None; - } - - // No need to create filter index over the tsid column. - if schema.index_of_tsid().map(|idx| idx == i).unwrap_or(false) { - return None; - } - - if matches!( - col.data_type, - DatumKind::Null - | DatumKind::Double - | DatumKind::Float - | DatumKind::Varbinary - | DatumKind::Boolean - ) { - return None; - } - - Some(Xor8Builder::default()) - }) - .collect(); - - Self { builders } - } - - pub(crate) fn add_key(&mut self, col_idx: usize, key: &[u8]) { - if let Some(b) = self.builders[col_idx].as_mut() { - b.insert(key) - } - } - - pub(crate) fn build(self) -> Result { - self.builders - .into_iter() - .map(|b| { - b.map(|mut b| { - b.build() - .context(BuildXor8Filter) - .map(|xor8| Box::new(Xor8Filter { xor8 }) as _) - }) - .transpose() - }) - .collect::>>() - .map(|column_filters| RowGroupFilter { column_filters }) - } -} - -#[derive(Debug, Default)] -pub struct RowGroupFilter { - // The column filter can be None if the column is not indexed. - column_filters: Vec>>, -} - -impl PartialEq for RowGroupFilter { - fn eq(&self, other: &Self) -> bool { - if self.column_filters.len() != other.column_filters.len() { - return false; - } - - for (a, b) in self.column_filters.iter().zip(other.column_filters.iter()) { - if !a - .as_ref() - .map(|a| a.to_bytes()) - .eq(&b.as_ref().map(|b| b.to_bytes())) - { - return false; - } - } - - true - } -} - -impl Clone for RowGroupFilter { - fn clone(&self) -> Self { - let column_filters = self - .column_filters - .iter() - .map(|f| { - f.as_ref() - .map(|f| Box::new(Xor8Filter::from_bytes(f.to_bytes()).unwrap()) as Box<_>) - }) - .collect(); - - Self { column_filters } - } -} - -impl RowGroupFilter { - /// Return None if the column is not indexed. - pub fn contains_column_data(&self, column_idx: usize, data: &[u8]) -> Option { - self.column_filters[column_idx] - .as_ref() - .map(|v| v.contains(data)) - } - - fn size(&self) -> usize { - self.column_filters - .iter() - .map(|cf| cf.as_ref().map(|cf| cf.size()).unwrap_or(0)) - .sum() - } -} - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct ParquetFilter { - /// Every filter is a row group filter consists of column filters. - row_group_filters: Vec, -} - -impl ParquetFilter { - pub fn push_row_group_filter(&mut self, row_group_filter: RowGroupFilter) { - self.row_group_filters.push(row_group_filter); - } - - pub fn len(&self) -> usize { - self.row_group_filters.len() - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn size(&self) -> usize { - self.row_group_filters.iter().map(|f| f.size()).sum() - } -} - -impl Index for ParquetFilter { - type Output = RowGroupFilter; - - fn index(&self, index: usize) -> &Self::Output { - &self.row_group_filters[index] - } -} - -impl From for sst_pb::ParquetFilter { - fn from(parquet_filter: ParquetFilter) -> Self { - let row_group_filters = parquet_filter - .row_group_filters - .into_iter() - .map(|row_group_filter| { - let column_filters = row_group_filter - .column_filters - .into_iter() - .map(|column_filter| match column_filter { - Some(v) => { - let encoded_filter = v.to_bytes(); - match v.r#type() { - FilterType::Xor8 => sst_pb::ColumnFilter { - filter: Some(sst_pb::column_filter::Filter::Xor( - encoded_filter, - )), - }, - } - } - None => sst_pb::ColumnFilter { filter: None }, - }) - .collect::>(); - - sst_pb::RowGroupFilter { column_filters } - }) - .collect::>(); - - sst_pb::ParquetFilter { row_group_filters } - } -} - -impl TryFrom for ParquetFilter { - type Error = Error; - - fn try_from(src: sst_pb::ParquetFilter) -> Result { - let row_group_filters = src - .row_group_filters - .into_iter() - .map(|row_group_filter| { - let column_filters = row_group_filter - .column_filters - .into_iter() - .map(|column_filter| match column_filter.filter { - Some(v) => match v { - sst_pb::column_filter::Filter::Xor(encoded_bytes) => { - Xor8Filter::from_bytes(encoded_bytes) - .map(|v| Some(Box::new(v) as _)) - } - }, - None => Ok(None), - }) - .collect::>>()?; - Ok(RowGroupFilter { column_filters }) - }) - .collect::>>()?; - - Ok(ParquetFilter { row_group_filters }) - } -} - -#[cfg(test)] -mod tests { - use common_types::tests::build_schema; - - use super::*; - - #[test] - fn test_conversion_parquet_filter() { - let parquet_filter = ParquetFilter { - row_group_filters: vec![ - RowGroupFilter { - column_filters: vec![None, Some(Box::::default() as _)], - }, - RowGroupFilter { - column_filters: vec![Some(Box::::default() as _), None], - }, - ], - }; - - let parquet_filter_pb: sst_pb::ParquetFilter = parquet_filter.clone().into(); - assert_eq!(parquet_filter_pb.row_group_filters.len(), 2); - assert_eq!( - parquet_filter_pb.row_group_filters[0].column_filters.len(), - 2 - ); - assert_eq!( - parquet_filter_pb.row_group_filters[1].column_filters.len(), - 2 - ); - assert!(parquet_filter_pb.row_group_filters[0].column_filters[0] - .filter - .is_none()); - assert!(parquet_filter_pb.row_group_filters[0].column_filters[1] - .filter - .is_some(),); - assert!(parquet_filter_pb.row_group_filters[1].column_filters[0] - .filter - .is_some(),); - assert!(parquet_filter_pb.row_group_filters[1].column_filters[1] - .filter - .is_none()); - - let decoded_parquet_filter = ParquetFilter::try_from(parquet_filter_pb).unwrap(); - assert_eq!(decoded_parquet_filter, parquet_filter); - } - - #[test] - fn test_row_group_filter_builder() { - // (key1(varbinary), key2(timestamp), field1(double), field2(string)) - let schema = build_schema(); - let mut builders = RowGroupFilterBuilder::new(&schema); - for key in ["host-123", "host-456", "host-789"] { - builders.add_key(3, key.as_bytes()); - } - let row_group_filter = builders.build().unwrap(); - for i in 0..3 { - assert!(row_group_filter.column_filters[i].is_none()); - } - - let testcase = [("host-123", true), ("host-321", false)]; - for (key, expected) in testcase { - let actual = row_group_filter - .contains_column_data(3, key.as_bytes()) - .unwrap(); - - assert_eq!(expected, actual); - } - } -} diff --git a/src/analytic_engine/src/sst/parquet/meta_data/mod.rs b/src/analytic_engine/src/sst/parquet/meta_data/mod.rs deleted file mode 100644 index 0120d64944..0000000000 --- a/src/analytic_engine/src/sst/parquet/meta_data/mod.rs +++ /dev/null @@ -1,247 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// MetaData for SST based on parquet. - -use std::{collections::HashSet, fmt, sync::Arc}; - -use bytes_ext::Bytes; -use common_types::{schema::Schema, time::TimeRange, SequenceNumber}; -use horaedbproto::{schema as schema_pb, sst as sst_pb}; -use macros::define_result; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::sst::{parquet::meta_data::filter::ParquetFilter, writer::MetaData}; - -pub mod filter; - -/// Error of sst file. -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Time range is not found.\nBacktrace\n:{}", backtrace))] - TimeRangeNotFound { backtrace: Backtrace }, - - #[snafu(display("Table schema is not found.\nBacktrace\n:{}", backtrace))] - TableSchemaNotFound { backtrace: Backtrace }, - - #[snafu(display( - "Failed to parse Xor8Filter from bytes, err:{}.\nBacktrace\n:{}", - source, - backtrace - ))] - ParseXor8Filter { - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to build Xor8Filter, err:{}.\nBacktrace\n:{}", - source, - backtrace - ))] - BuildXor8Filter { - source: xorfilter::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to convert time range, err:{}", source))] - ConvertTimeRange { source: common_types::time::Error }, - - #[snafu(display("Failed to convert table schema, err:{}", source))] - ConvertTableSchema { source: common_types::schema::Error }, -} - -define_result!(Error); - -/// Meta data of a sst file -#[derive(Clone, PartialEq)] -pub struct ParquetMetaData { - pub min_key: Bytes, - pub max_key: Bytes, - /// Time Range of the sst - pub time_range: TimeRange, - /// Max sequence number in the sst - pub max_sequence: SequenceNumber, - pub schema: Schema, - pub parquet_filter: Option, - pub column_values: Option>>, -} - -pub type ParquetMetaDataRef = Arc; - -impl From<&MetaData> for ParquetMetaData { - fn from(meta: &MetaData) -> Self { - Self { - min_key: meta.min_key.clone(), - max_key: meta.max_key.clone(), - time_range: meta.time_range, - max_sequence: meta.max_sequence, - schema: meta.schema.clone(), - parquet_filter: None, - column_values: None, - } - } -} - -impl From for MetaData { - fn from(meta: ParquetMetaData) -> Self { - Self { - min_key: meta.min_key, - max_key: meta.max_key, - time_range: meta.time_range, - max_sequence: meta.max_sequence, - schema: meta.schema, - } - } -} - -impl From> for MetaData { - fn from(meta: Arc) -> Self { - Self { - min_key: meta.min_key.clone(), - max_key: meta.max_key.clone(), - time_range: meta.time_range, - max_sequence: meta.max_sequence, - schema: meta.schema.clone(), - } - } -} - -impl fmt::Debug for ParquetMetaData { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ParquetMetaData") - .field("min_key", &hex::encode(&self.min_key)) - .field("max_key", &hex::encode(&self.max_key)) - .field("time_range", &self.time_range) - .field("max_sequence", &self.max_sequence) - .field("schema", &self.schema) - .field("column_values", &self.column_values) - .field( - "filter_size", - &self - .parquet_filter - .as_ref() - .map(|filter| filter.size()) - .unwrap_or(0), - ) - .finish() - } -} - -impl From for sst_pb::ParquetMetaData { - fn from(src: ParquetMetaData) -> Self { - let column_values = if let Some(v) = src.column_values { - v.into_iter() - .map(|col| sst_pb::ColumnValueSet { - value: col.map(|col| col.into()), - }) - .collect() - } else { - Vec::new() - }; - sst_pb::ParquetMetaData { - min_key: src.min_key.to_vec(), - max_key: src.max_key.to_vec(), - max_sequence: src.max_sequence, - time_range: Some(src.time_range.into()), - schema: Some(schema_pb::TableSchema::from(&src.schema)), - filter: src.parquet_filter.map(|v| v.into()), - // collapsible_cols_idx is used in hybrid format ,and it's deprecated. - collapsible_cols_idx: Vec::new(), - column_values, - } - } -} - -impl TryFrom for ParquetMetaData { - type Error = Error; - - fn try_from(src: sst_pb::ParquetMetaData) -> Result { - let time_range = { - let time_range = src.time_range.context(TimeRangeNotFound)?; - TimeRange::try_from(time_range).context(ConvertTimeRange)? - }; - let schema = { - let schema = src.schema.context(TableSchemaNotFound)?; - Schema::try_from(schema).context(ConvertTableSchema)? - }; - let parquet_filter = src.filter.map(ParquetFilter::try_from).transpose()?; - let column_values = if src.column_values.is_empty() { - // Old version sst don't has this, so set to none. - None - } else { - Some( - src.column_values - .into_iter() - .map(|v| v.value.map(|v| v.into())) - .collect(), - ) - }; - - Ok(Self { - min_key: src.min_key.into(), - max_key: src.max_key.into(), - time_range, - max_sequence: src.max_sequence, - schema, - parquet_filter, - column_values, - }) - } -} - -#[derive(Debug, PartialEq, Clone)] -pub enum ColumnValueSet { - StringValue(HashSet), -} - -impl ColumnValueSet { - pub fn is_empty(&self) -> bool { - match self { - Self::StringValue(sv) => sv.is_empty(), - } - } - - pub fn len(&self) -> usize { - match self { - Self::StringValue(sv) => sv.len(), - } - } -} - -impl From for sst_pb::column_value_set::Value { - fn from(value: ColumnValueSet) -> Self { - match value { - ColumnValueSet::StringValue(values) => { - let values = values.into_iter().collect(); - sst_pb::column_value_set::Value::StringSet(sst_pb::column_value_set::StringSet { - values, - }) - } - } - } -} - -impl From for ColumnValueSet { - fn from(value: sst_pb::column_value_set::Value) -> Self { - match value { - sst_pb::column_value_set::Value::StringSet(ss) => { - ColumnValueSet::StringValue(HashSet::from_iter(ss.values)) - } - } - } -} diff --git a/src/analytic_engine/src/sst/parquet/mod.rs b/src/analytic_engine/src/sst/parquet/mod.rs deleted file mode 100644 index bab5eaa07f..0000000000 --- a/src/analytic_engine/src/sst/parquet/mod.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Sst implementation based on parquet. - -pub mod async_reader; -pub mod encoding; -pub mod meta_data; -mod row_group_pruner; -pub mod writer; - -pub use async_reader::{Reader as AsyncParquetReader, ThreadedReader}; diff --git a/src/analytic_engine/src/sst/parquet/row_group_pruner.rs b/src/analytic_engine/src/sst/parquet/row_group_pruner.rs deleted file mode 100644 index 3aa0c43cc0..0000000000 --- a/src/analytic_engine/src/sst/parquet/row_group_pruner.rs +++ /dev/null @@ -1,415 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Row group pruner. - -use std::{ - borrow::Cow, - cmp::Ordering, - collections::{HashMap, HashSet}, -}; - -use arrow::datatypes::SchemaRef; -use common_types::datum::Datum; -use datafusion::{ - logical_expr::Operator, - prelude::{lit, Expr}, - scalar::ScalarValue, -}; -use logger::debug; -use parquet::file::metadata::RowGroupMetaData; -use parquet_ext::prune::{ - equal::{self, ColumnPosition}, - min_max, -}; -use snafu::ensure; -use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; - -use crate::sst::{ - parquet::meta_data::{filter::ParquetFilter, ColumnValueSet}, - reader::error::{OtherNoCause, Result}, -}; - -#[derive(Default, Debug, Clone, TraceMetricWhenDrop)] -struct Metrics { - #[metric(boolean)] - use_custom_filter: bool, - #[metric(number)] - total_row_groups: usize, - #[metric(number)] - row_groups_after_prune: usize, - #[metric(number)] - pruned_by_custom_filter: usize, - #[metric(number)] - pruned_by_min_max: usize, - #[metric(collector)] - collector: Option, -} - -/// RowGroupPruner is used to prune row groups according to the provided -/// predicates and filters. -/// -/// Currently, two kinds of filters will be applied to such filtering: -/// min max & parquet_filter. -pub struct RowGroupPruner<'a> { - schema: &'a SchemaRef, - row_groups: &'a [RowGroupMetaData], - parquet_filter: Option<&'a ParquetFilter>, - predicates: Cow<'a, [Expr]>, - metrics: Metrics, -} - -/// This functions will rewrite not related expr to its oppsite. -/// Current filter is based on bloom-filter like structure, it can give accurate -/// answer if an item doesn't exist in one collection, so by convert `col != -/// value` to `col == value2`, we can fully utilize this feature. -fn rewrite_not_expr(expr: Expr, column_values: &HashMap>) -> Expr { - let get_all_values = |column_name| { - column_values.get(column_name).and_then(|all_values| { - all_values.as_ref().map(|all| match all { - ColumnValueSet::StringValue(sv) => sv, - }) - }) - }; - let get_utf8_string = |expr| match expr { - Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => v, - _ => None, - }, - _ => None, - }; - - let expr = match expr { - // Case1: not in - // - // ```plaintext - // [InList(InList { expr: Column(Column { relation: None, name: "name" }), - // list: [Literal(Utf8("v1")), Literal(Utf8("v2")), Literal(Utf8("v3"))], - // negated: true })] - // ``` - Expr::InList(in_list) => { - if !in_list.negated { - return Expr::InList(in_list); - } - - let column_name = match *in_list.expr.clone() { - Expr::Column(column) => column.name.to_string(), - _ => return Expr::InList(in_list), - }; - - let all_values = if let Some(v) = get_all_values(&column_name) { - v - } else { - return Expr::InList(in_list); - }; - - let mut not_values = HashSet::new(); - for item in &in_list.list { - if let Some(v) = get_utf8_string(item.clone()) { - not_values.insert(v); - } else { - return Expr::InList(in_list); - } - } - - let wanted_values = all_values.difference(¬_values); - let wanted_values = wanted_values.into_iter().map(lit).collect(); - datafusion::logical_expr::in_list(*in_list.expr, wanted_values, false) - } - // Case2: != - // - // ```plaintext - // [BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: "name" }), - // op: NotEq, - // right: Literal(Utf8("value")) })] - // ``` - Expr::BinaryExpr(binary_expr) => { - if binary_expr.op != Operator::NotEq { - return Expr::BinaryExpr(binary_expr); - } - - let column_name = match *binary_expr.left.clone() { - Expr::Column(column) => column.name.to_string(), - _ => return Expr::BinaryExpr(binary_expr), - }; - let all_values = if let Some(v) = get_all_values(&column_name) { - v - } else { - return Expr::BinaryExpr(binary_expr); - }; - - let not_value = if let Some(v) = get_utf8_string(*binary_expr.right.clone()) { - v - } else { - return Expr::BinaryExpr(binary_expr); - }; - - let wanted_values = all_values - .iter() - .filter_map(|value| { - if value == ¬_value { - None - } else { - Some(lit(value)) - } - }) - .collect(); - - datafusion::logical_expr::in_list(*binary_expr.left, wanted_values, false) - } - _ => expr, - }; - - expr -} -impl<'a> RowGroupPruner<'a> { - // TODO: DataFusion already change predicates to PhyscialExpr, we should keep up - // with upstream. - // https://github.com/apache/arrow-datafusion/issues/4695 - pub fn try_new( - schema: &'a SchemaRef, - row_groups: &'a [RowGroupMetaData], - parquet_filter: Option<&'a ParquetFilter>, - predicates: &'a [Expr], - metrics_collector: Option, - column_values: Option<&'a Vec>>, - ) -> Result { - if let Some(f) = parquet_filter { - ensure!(f.len() == row_groups.len(), OtherNoCause { - msg: format!("expect sst_filters.len() == row_groups.len(), num_sst_filters:{}, num_row_groups:{}", f.len(), row_groups.len()), - }); - } - - if let Some(values) = column_values { - ensure!(values.len() == schema.fields.len(), OtherNoCause { - msg: format!("expect column_value.len() == schema_fields.len(), num_sst_filters:{}, num_row_groups:{}", values.len(), schema.fields.len()), - }); - } - let predicates = if let Some(column_values) = column_values { - let column_values = schema - .fields - .iter() - .enumerate() - .map(|(i, f)| (f.name().to_string(), column_values[i].clone())) - .collect(); - - debug!("Pruner rewrite predicates, before:{predicates:?}"); - let predicates = predicates - .iter() - .map(|expr| rewrite_not_expr(expr.clone(), &column_values)) - .collect(); - debug!( - "Pruner rewrite predicates, after:{predicates:?}, column_values:{column_values:?}" - ); - - Cow::Owned(predicates) - } else { - Cow::Borrowed(predicates) - }; - - let metrics = Metrics { - use_custom_filter: parquet_filter.is_some(), - total_row_groups: row_groups.len(), - collector: metrics_collector, - ..Default::default() - }; - - Ok(Self { - schema, - row_groups, - parquet_filter, - predicates, - metrics, - }) - } - - pub fn prune(&mut self) -> Vec { - debug!( - "Begin to prune row groups, total_row_groups:{}, parquet_filter:{}, predicates:{:?}", - self.row_groups.len(), - self.parquet_filter.is_some(), - self.predicates, - ); - - let pruned0 = self.prune_by_min_max(); - self.metrics.pruned_by_min_max = self.row_groups.len() - pruned0.len(); - - let pruned = match self.parquet_filter { - Some(v) => { - // TODO: We can do continuous prune based on the `pruned0` to reduce the - // filtering cost. - let pruned1 = self.prune_by_filters(v); - let pruned = Self::intersect_pruned_row_groups(&pruned0, &pruned1); - - self.metrics.pruned_by_custom_filter = self.row_groups.len() - pruned1.len(); - debug!( - "Finish pruning row groups by parquet_filter and min_max, total_row_groups:{}, pruned_by_min_max:{}, pruned_by_blooms:{}, pruned_by_both:{}", - self.row_groups.len(), - pruned0.len(), - pruned1.len(), - pruned.len(), - ); - - pruned - } - None => { - debug!( - "Finish pruning row groups by min_max, total_row_groups:{}, pruned_row_groups:{}", - self.row_groups.len(), - pruned0.len(), - ); - pruned0 - } - }; - - self.metrics.row_groups_after_prune = pruned.len(); - pruned - } - - fn prune_by_min_max(&self) -> Vec { - min_max::prune_row_groups(self.schema.clone(), &self.predicates, self.row_groups) - } - - /// Prune row groups according to the filter. - fn prune_by_filters(&self, parquet_filter: &ParquetFilter) -> Vec { - let is_equal = - |col_pos: ColumnPosition, val: &ScalarValue, negated: bool| -> Option { - let datum = Datum::from_scalar_value(val)?; - let exist = parquet_filter[col_pos.row_group_idx] - .contains_column_data(col_pos.column_idx, &datum.to_bytes())?; - if exist { - // parquet_filter has false positivity, that is to say we are unsure whether - // this value exists even if the parquet_filter says it - // exists. - None - } else { - Some(negated) - } - }; - - equal::prune_row_groups( - self.schema.clone(), - &self.predicates, - self.row_groups.len(), - is_equal, - ) - } - - /// Compute the intersection of the two row groups which are in increasing - /// order. - fn intersect_pruned_row_groups(row_groups0: &[usize], row_groups1: &[usize]) -> Vec { - let mut intersect = Vec::with_capacity(row_groups0.len().min(row_groups1.len())); - - let (mut i0, mut i1) = (0, 0); - while i0 < row_groups0.len() && i1 < row_groups1.len() { - let idx0 = row_groups0[i0]; - let idx1 = row_groups1[i1]; - - match idx0.cmp(&idx1) { - Ordering::Less => i0 += 1, - Ordering::Greater => i1 += 1, - Ordering::Equal => { - intersect.push(idx0); - i0 += 1; - i1 += 1; - } - } - } - - intersect - } -} - -#[cfg(test)] -mod tests { - use datafusion::prelude::col; - - use super::*; - - #[test] - fn test_intersect_row_groups() { - let test_cases = vec![ - (vec![0, 1, 2, 3, 4], vec![0, 3, 4, 5], vec![0, 3, 4]), - (vec![], vec![0, 3, 4, 5], vec![]), - (vec![1, 2, 3], vec![4, 5, 6], vec![]), - (vec![3], vec![1, 2, 3], vec![3]), - (vec![4, 5, 6], vec![4, 6, 7], vec![4, 6]), - ]; - - for (row_groups0, row_groups1, expect_row_groups) in test_cases { - let real_row_groups = - RowGroupPruner::intersect_pruned_row_groups(&row_groups0, &row_groups1); - assert_eq!(real_row_groups, expect_row_groups) - } - } - #[test] - fn test_rewrite_not_expr() { - let column_values = [("host", Some(["web1", "web2"])), ("ip", None)] - .into_iter() - .map(|(column_name, values)| { - ( - column_name.to_string(), - values.map(|vs| { - ColumnValueSet::StringValue(HashSet::from_iter( - vs.into_iter().map(|v| v.to_string()), - )) - }), - ) - }) - .collect(); - - let testcases = [ - (col("host").eq(lit("web1")), col("host").eq(lit("web1"))), - // Rewrite ok - ( - // host != web1 - col("host").not_eq(lit("web1")), - col("host").in_list(vec![lit("web2")], false), - ), - ( - // host not in (web1, web3) --> host in (web2) - col("host").in_list(vec![lit("web1"), lit("web3")], true), - col("host").in_list(vec![lit("web2")], false), - ), - ( - // host not in (web1, web2) --> host in () - col("host").in_list(vec![lit("web1"), lit("web2")], true), - col("host").in_list(vec![], false), - ), - // Can't rewrite since ip in column_values is None. - ( - // ip != 127.0.0.1 - col("ip").not_eq(lit("127.0.0.1")), - col("ip").not_eq(lit("127.0.0.1")), - ), - ( - // ip = 127.0.0.1 - col("ip").eq(lit("127.0.0.1")), - col("ip").eq(lit("127.0.0.1")), - ), - // Can't rewrite since host-not-exists is not in column_values. - ( - // ip != 127.0.0.1 - col("host-not-exists").not_eq(lit("web1")), - col("host-not-exists").not_eq(lit("web1")), - ), - ]; - for (input, expected) in testcases { - assert_eq!(expected, rewrite_not_expr(input, &column_values)); - } - } -} diff --git a/src/analytic_engine/src/sst/parquet/writer.rs b/src/analytic_engine/src/sst/parquet/writer.rs deleted file mode 100644 index 732753b773..0000000000 --- a/src/analytic_engine/src/sst/parquet/writer.rs +++ /dev/null @@ -1,1033 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Sst writer implementation based on parquet. - -use std::collections::{HashMap, HashSet}; - -use async_trait::async_trait; -use common_types::{ - datum::DatumKind, record_batch::FetchedRecordBatch, request_id::RequestId, schema::Schema, - time::TimeRange, -}; -use datafusion::parquet::basic::Compression; -use futures::StreamExt; -use generic_error::BoxError; -use logger::{debug, error}; -use object_store::{ - multi_part::{MultiUploadRef, MultiUploadWriter}, - ObjectStore, ObjectStoreRef, Path, -}; -use snafu::{OptionExt, ResultExt}; -use tokio::io::AsyncWrite; - -use crate::{ - sst::{ - factory::ObjectStorePickerRef, - file::Level, - parquet::{ - encoding::{encode_sst_meta_data, ColumnEncoding, EncodeOptions, ParquetEncoder}, - meta_data::{ - filter::{ParquetFilter, RowGroupFilter, RowGroupFilterBuilder}, - ColumnValueSet, ParquetMetaData, - }, - }, - writer::{ - BuildParquetFilter, EncodePbData, EncodeRecordBatch, ExpectTimestampColumn, MetaData, - PollRecordBatch, RecordBatchStream, Result, SstInfo, SstWriter, Storage, - }, - }, - table::sst_util, - table_options::StorageFormat, -}; - -const KEEP_COLUMN_VALUE_THRESHOLD: usize = 20; -/// Only the row group which contains at least -/// `MIN_NUM_ROWS_DICT_ENCODING_SAMPLE` rows can be sampling to decide whether -/// to do dictionary encoding. -const MIN_NUM_ROWS_SAMPLE_DICT_ENCODING: usize = 1024; -/// If the number of unique value exceeds -/// `total_num_values * MAX_UNIQUE_VALUE_RATIO_DICT_ENCODING`, there is no need -/// to do dictionary encoding for such column. -const MAX_UNIQUE_VALUE_RATIO_DICT_ENCODING: f64 = 0.12; - -/// The implementation of sst based on parquet and object storage. -#[derive(Debug)] -pub struct ParquetSstWriter<'a> { - /// The path where the data is persisted. - path: &'a Path, - /// The storage where the data is persist. - store: &'a ObjectStoreRef, - options: WriteOptions, -} - -impl<'a> ParquetSstWriter<'a> { - pub fn new( - path: &'a Path, - options: WriteOptions, - store_picker: &'a ObjectStorePickerRef, - ) -> Self { - let store = store_picker.default_store(); - Self { - path, - store, - options, - } - } -} - -/// The writer will reorganize the record batches into row groups, and then -/// encode them to parquet file. -struct RecordBatchGroupWriter<'a> { - request_id: RequestId, - input: RecordBatchStream, - meta_data: &'a MetaData, - options: WriteOptions, - - // inner status - input_exhausted: bool, - // Time range of rows, not aligned to segment. - real_time_range: Option, - // `column_values` is used to collect distinct values in each columns, - // its order is the same with schema's columns. - column_values: Option>>, -} - -#[derive(Clone, Debug)] -pub struct WriteOptions { - pub num_rows_per_row_group: usize, - pub max_buffer_size: usize, - pub compression: Compression, - pub sst_level: Level, - pub column_encodings: HashMap, -} - -impl WriteOptions { - #[inline] - pub fn need_custom_filter(&self) -> bool { - !self.sst_level.is_min() - } -} - -impl<'a> RecordBatchGroupWriter<'a> { - fn new( - request_id: RequestId, - input: RecordBatchStream, - meta_data: &'a MetaData, - options: WriteOptions, - ) -> Self { - // No need to build complex index for the min-level sst so there is no need to - // collect the column values. - let column_values = options.need_custom_filter().then(|| { - meta_data - .schema - .columns() - .iter() - .map(|col| { - // Only keep string values now. - if matches!(col.data_type, DatumKind::String) { - Some(ColumnValueSet::StringValue(HashSet::new())) - } else { - None - } - }) - .collect() - }); - - Self { - request_id, - input, - meta_data, - options, - input_exhausted: false, - real_time_range: None, - column_values, - } - } - - /// Fetch an integral row group from the `self.input`. - /// - /// Except the last one, every row group is ensured to contains exactly - /// `self.num_rows_per_row_group`. As for the last one, it will cover all - /// the left rows. - async fn fetch_next_row_group( - &mut self, - prev_record_batch: &mut Option, - ) -> Result> { - let mut curr_row_group = vec![]; - // Used to record the number of remaining rows to fill `curr_row_group`. - let mut remaining = self.options.num_rows_per_row_group; - - // Keep filling `curr_row_group` until `remaining` is zero. - while remaining > 0 { - // Use the `prev_record_batch` to fill `curr_row_group` if possible. - if let Some(v) = prev_record_batch { - let total_rows = v.num_rows(); - if total_rows <= remaining { - // The whole record batch is part of the `curr_row_group`, and let's feed it - // into `curr_row_group`. - curr_row_group.push(prev_record_batch.take().unwrap()); - remaining -= total_rows; - } else { - // Only first `remaining` rows of the record batch belongs to `curr_row_group`, - // the rest should be put to `prev_record_batch` for next row group. - curr_row_group.push(v.slice(0, remaining)); - *v = v.slice(remaining, total_rows - remaining); - remaining = 0; - } - - continue; - } - - if self.input_exhausted { - break; - } - - // Previous record batch has been exhausted, and let's fetch next record batch. - match self.input.next().await { - Some(v) => { - let v = v.context(PollRecordBatch)?; - debug_assert!( - !v.is_empty(), - "found empty record batch, request id:{}", - self.request_id - ); - - // Updated the exhausted `prev_record_batch`, and let next loop to continue to - // fill `curr_row_group`. - prev_record_batch.replace(v); - } - None => { - self.input_exhausted = true; - break; - } - }; - } - - Ok(curr_row_group) - } - - fn build_column_encodings( - &self, - sample_row_groups: &[FetchedRecordBatch], - column_encodings: &mut HashMap, - ) -> Result<()> { - let mut sampler = ColumnEncodingSampler { - sample_row_groups, - meta_data: self.meta_data, - min_num_sample_rows: MIN_NUM_ROWS_SAMPLE_DICT_ENCODING, - max_unique_value_ratio: MAX_UNIQUE_VALUE_RATIO_DICT_ENCODING, - column_encodings, - }; - sampler.sample() - } - - /// Build the parquet filter for the given `row_group`. - fn build_row_group_filter( - &self, - schema: &Schema, - row_group_batch: &[FetchedRecordBatch], - ) -> Result { - let mut builder = RowGroupFilterBuilder::new(schema); - - for partial_batch in row_group_batch { - for (col_idx, column) in partial_batch.columns().iter().enumerate() { - for row in 0..column.num_rows() { - let datum_view = column.datum_view(row); - datum_view.do_with_bytes(|bytes| { - builder.add_key(col_idx, bytes); - }); - } - } - } - - builder.build().box_err().context(BuildParquetFilter) - } - - fn update_column_values( - column_values: &mut [Option], - record_batch: &FetchedRecordBatch, - ) { - for (col_idx, col_values) in column_values.iter_mut().enumerate() { - let mut too_many_values = false; - { - let col_values = match col_values { - None => continue, - Some(v) => v, - }; - let rows_num = record_batch.num_rows(); - let column_block = record_batch.column(col_idx); - for row_idx in 0..rows_num { - match col_values { - ColumnValueSet::StringValue(ss) => { - let datum = column_block.datum(row_idx); - if let Some(v) = datum.as_str() { - ss.insert(v.to_string()); - } - } - } - - if row_idx % KEEP_COLUMN_VALUE_THRESHOLD == 0 - && col_values.len() > KEEP_COLUMN_VALUE_THRESHOLD - { - too_many_values = true; - break; - } - } - - // Do one last check. - if col_values.len() > KEEP_COLUMN_VALUE_THRESHOLD { - too_many_values = true; - } - } - - // When there are too many values, don't keep this column values - // any more. - if too_many_values { - *col_values = None; - } - } - } - - fn update_time_range(&mut self, current_range: Option) { - if let Some(current_range) = current_range { - if let Some(real_range) = self.real_time_range { - // Use current range to update real range, - // We should expand range as possible as we can. - self.real_time_range = Some(TimeRange::new_unchecked( - current_range - .inclusive_start() - .min(real_range.inclusive_start()), - current_range - .exclusive_end() - .max(real_range.exclusive_end()), - )); - } else { - self.real_time_range = Some(current_range); - } - } - } - - async fn write_all( - mut self, - sink: W, - meta_path: &Path, - ) -> Result<(usize, ParquetMetaData, ParquetEncoder)> { - let mut prev_record_batch: Option = None; - let mut arrow_row_group = Vec::new(); - let mut total_num_rows = 0; - - // Build the parquet encoder. - let mut row_group = self.fetch_next_row_group(&mut prev_record_batch).await?; - let mut column_encodings = std::mem::take(&mut self.options.column_encodings); - self.build_column_encodings(&row_group, &mut column_encodings)?; - let encode_options = EncodeOptions { - num_rows_per_row_group: self.options.num_rows_per_row_group, - max_buffer_size: self.options.max_buffer_size, - compression: self.options.compression, - column_encodings, - }; - let mut parquet_encoder = - ParquetEncoder::try_new(sink, &self.meta_data.schema, &encode_options) - .box_err() - .context(EncodeRecordBatch)?; - - let mut parquet_filter = self - .options - .need_custom_filter() - .then(ParquetFilter::default); - let timestamp_index = self.meta_data.schema.timestamp_index(); - while !row_group.is_empty() { - if let Some(filter) = &mut parquet_filter { - filter.push_row_group_filter( - self.build_row_group_filter(&self.meta_data.schema, &row_group)?, - ); - } - - let num_batches = row_group.len(); - for record_batch in row_group { - let column_block = record_batch.column(timestamp_index); - let ts_col = column_block.as_timestamp().context(ExpectTimestampColumn { - datum_kind: column_block.datum_kind(), - })?; - self.update_time_range(ts_col.time_range()); - if let Some(column_values) = self.column_values.as_mut() { - Self::update_column_values(column_values, &record_batch); - } - - arrow_row_group.push(record_batch.into_record_batch().into_arrow_record_batch()); - } - let num_rows = parquet_encoder - .encode_record_batches(arrow_row_group) - .await - .box_err() - .context(EncodeRecordBatch)?; - - // TODO: it will be better to use `arrow_row_group.clear()` to reuse the - // allocated memory. - arrow_row_group = Vec::with_capacity(num_batches); - total_num_rows += num_rows; - - row_group = self.fetch_next_row_group(&mut prev_record_batch).await?; - } - - let parquet_meta_data = { - let mut parquet_meta_data = ParquetMetaData::from(self.meta_data); - parquet_meta_data.parquet_filter = parquet_filter; - if let Some(range) = self.real_time_range { - parquet_meta_data.time_range = range; - } - // TODO: when all compaction input SST files already have column_values, we can - // merge them from meta_data directly, calculate them here waste CPU - // cycles. - parquet_meta_data.column_values = self.column_values; - parquet_meta_data - }; - - parquet_encoder - .set_meta_data_path(Some(meta_path.to_string())) - .box_err() - .context(EncodeRecordBatch)?; - - Ok((total_num_rows, parquet_meta_data, parquet_encoder)) - } -} - -async fn write_metadata( - meta_sink: MultiUploadWriter, - parquet_metadata: ParquetMetaData, -) -> Result { - let buf = encode_sst_meta_data(parquet_metadata).context(EncodePbData)?; - let buf_size = buf.len(); - let mut uploader = meta_sink.multi_upload.lock().await; - uploader.put(buf); - uploader.finish().await.context(Storage)?; - - Ok(buf_size) -} - -async fn multi_upload_abort(aborter: MultiUploadRef) { - // The uploading file will be leaked if failed to abort. A repair command - // will be provided to clean up the leaked files. - if let Err(e) = aborter.lock().await.abort().await { - error!("Failed to abort multi-upload sst, err:{}", e); - } -} - -#[async_trait] -impl<'a> SstWriter for ParquetSstWriter<'a> { - async fn write( - &mut self, - request_id: RequestId, - meta: &MetaData, - input: RecordBatchStream, - ) -> Result { - debug!( - "Build parquet file, request_id:{}, meta:{:?}, num_rows_per_row_group:{}", - request_id, meta, self.options.num_rows_per_row_group - ); - - let write_options = WriteOptions { - num_rows_per_row_group: self.options.num_rows_per_row_group, - max_buffer_size: self.options.max_buffer_size, - compression: self.options.compression, - sst_level: self.options.sst_level, - column_encodings: std::mem::take(&mut self.options.column_encodings), - }; - let group_writer = RecordBatchGroupWriter::new(request_id, input, meta, write_options); - - let sink = MultiUploadWriter::new(self.store, self.path) - .await - .context(Storage)?; - let aborter = sink.aborter(); - - let meta_path = Path::from(sst_util::new_metadata_path(self.path.as_ref())); - - let (total_num_rows, parquet_metadata, mut data_encoder) = - match group_writer.write_all(sink, &meta_path).await { - Ok(v) => v, - Err(e) => { - multi_upload_abort(aborter).await; - return Err(e); - } - }; - let time_range = parquet_metadata.time_range; - - let meta_sink = MultiUploadWriter::new(self.store, &meta_path) - .await - .context(Storage)?; - let meta_aborter = meta_sink.aborter(); - let meta_size = match write_metadata(meta_sink, parquet_metadata).await { - Ok(v) => v, - Err(e) => { - multi_upload_abort(aborter).await; - multi_upload_abort(meta_aborter).await; - return Err(e); - } - }; - - data_encoder - .set_meta_data_size(meta_size) - .box_err() - .context(EncodeRecordBatch)?; - - data_encoder - .close() - .await - .box_err() - .context(EncodeRecordBatch)?; - - let file_head = self.store.head(self.path).await.context(Storage)?; - Ok(SstInfo { - file_size: file_head.size, - row_num: total_num_rows, - storage_format: StorageFormat::Columnar, - meta_path: meta_path.to_string(), - time_range, - }) - } -} - -/// A sampler to decide the column encoding options (whether to do dictionary -/// encoding) with a bunch of sample row groups. -struct ColumnEncodingSampler<'a> { - sample_row_groups: &'a [FetchedRecordBatch], - meta_data: &'a MetaData, - min_num_sample_rows: usize, - max_unique_value_ratio: f64, - column_encodings: &'a mut HashMap, -} - -impl<'a> ColumnEncodingSampler<'a> { - fn sample(&mut self) -> Result<()> { - let num_total_rows: usize = self.sample_row_groups.iter().map(|v| v.num_rows()).sum(); - let ignore_sampling = num_total_rows < self.min_num_sample_rows; - if ignore_sampling { - self.decide_column_encodings_by_data_type(); - return Ok(()); - } - - assert!(self.max_unique_value_ratio <= 1.0 && self.max_unique_value_ratio >= 0.0); - let max_unique_values = (num_total_rows as f64 * self.max_unique_value_ratio) as usize; - let mut column_hashes = HashSet::with_capacity(max_unique_values); - for (col_idx, col_schema) in self.meta_data.schema.columns().iter().enumerate() { - if !Self::is_dictionary_type(col_schema.data_type) { - self.column_encodings.insert( - col_schema.name.clone(), - ColumnEncoding { enable_dict: false }, - ); - continue; - } - - if self.column_encodings.contains_key(&col_schema.name) { - continue; - } - - for row_group in self.sample_row_groups { - let col_block = &row_group.columns()[col_idx]; - for idx in 0..row_group.num_rows() { - if column_hashes.len() >= max_unique_values { - break; - } - let datum_view = col_block.datum_view(idx); - datum_view.do_with_bytes(|val| { - let hash = hash_ext::hash64(val); - column_hashes.insert(hash); - }) - } - } - - // The dictionary encoding make senses only if the number of unique values is - // small. - let enable_dict = column_hashes.len() < max_unique_values; - column_hashes.clear(); - self.column_encodings - .insert(col_schema.name.clone(), ColumnEncoding { enable_dict }); - } - - Ok(()) - } - - fn decide_column_encodings_by_data_type(&mut self) { - for col_schema in self.meta_data.schema.columns().iter() { - if !Self::is_dictionary_type(col_schema.data_type) { - self.column_encodings.insert( - col_schema.name.clone(), - ColumnEncoding { enable_dict: false }, - ); - } - } - } - - #[inline] - fn is_dictionary_type(data_type: DatumKind) -> bool { - // Only do dictionary encoding for string or bytes column. - matches!(data_type, DatumKind::String | DatumKind::Varbinary) - } -} - -#[cfg(test)] -mod tests { - - use std::{sync::Arc, task::Poll}; - - use bytes_ext::Bytes; - use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - tests::{build_row, build_row_for_dictionary, build_schema, build_schema_with_dictionary}, - time::{TimeRange, Timestamp}, - }; - use futures::stream; - use object_store::local_file; - use runtime::{self, Runtime}; - use table_engine::predicate::Predicate; - use tempfile::tempdir; - - use super::*; - use crate::{ - row_iter::tests::build_fetched_record_batch_with_key, - sst::{ - factory::{ - Factory, FactoryImpl, ReadFrequency, ScanOptions, SstReadOptions, SstWriteOptions, - }, - parquet::AsyncParquetReader, - reader::{tests::check_stream, SstReader}, - }, - table_options::{self, StorageFormatHint}, - }; - - // TODO(xikai): add test for reverse reader - - #[test] - fn test_parquet_build_and_read() { - test_util::init_log_for_test(); - - let runtime = Arc::new(runtime::Builder::default().enable_all().build().unwrap()); - parquet_write_and_then_read_back(runtime.clone(), 2, vec![2, 2, 2, 2, 2, 2, 2, 2, 2, 2]); - parquet_write_and_then_read_back(runtime.clone(), 3, vec![3, 3, 3, 3, 3, 3, 2]); - parquet_write_and_then_read_back(runtime.clone(), 4, vec![4, 4, 4, 4, 4]); - parquet_write_and_then_read_back(runtime, 5, vec![5, 5, 5, 5]); - } - - fn parquet_write_and_then_read_back( - runtime: Arc, - num_rows_per_row_group: usize, - expected_num_rows: Vec, - ) { - runtime.block_on(async { - let sst_factory = FactoryImpl; - let sst_write_options = SstWriteOptions { - storage_format_hint: StorageFormatHint::Auto, - num_rows_per_row_group, - compression: table_options::Compression::Uncompressed, - max_buffer_size: 0, - column_stats: Default::default(), - }; - - let root = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let store: ObjectStoreRef = Arc::new(local_file::try_new_with_default(root).unwrap()); - let store_picker: ObjectStorePickerRef = Arc::new(store); - let sst_file_path = Path::from("data.par"); - - let schema = build_schema_with_dictionary(); - let reader_projected_schema = ProjectedSchema::no_projection(schema.clone()); - let mut sst_meta = MetaData { - min_key: Bytes::from_static(b"100"), - max_key: Bytes::from_static(b"200"), - time_range: TimeRange::new_unchecked(Timestamp::new(1), Timestamp::new(2)), - max_sequence: 200, - schema: schema.clone(), - }; - - let mut counter = 5; - let record_batch_stream = Box::new(stream::poll_fn(move |_| -> Poll> { - if counter == 0 { - return Poll::Ready(None); - } - counter -= 1; - - let ts = 100 + counter; - let rows = vec![ - build_row_for_dictionary( - b"a", - ts, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tagv1"), - "tagv2", - ), - build_row_for_dictionary( - b"b", - ts, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tagv2"), - "tagv4", - ), - build_row_for_dictionary(b"c", ts, 10.0, "v4", 1000, 1_000_000, None, "tagv2"), - build_row_for_dictionary( - b"d", - ts, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tagv3"), - "tagv2", - ), - ]; - let batch = build_fetched_record_batch_with_key(schema.clone(), rows); - Poll::Ready(Some(Ok(batch))) - })); - - let mut writer = sst_factory - .create_writer( - &sst_write_options, - &sst_file_path, - &store_picker, - Level::MAX, - ) - .await - .unwrap(); - let sst_info = writer - .write( - RequestId::next_id(), - &sst_meta, - Box::new(record_batch_stream), - ) - .await - .unwrap(); - - assert_eq!(20, sst_info.row_num); - - let scan_options = ScanOptions::default(); - // read sst back to test - let row_projector_builder = RowProjectorBuilder::new( - reader_projected_schema.to_record_schema(), - reader_projected_schema.table_schema().clone(), - None, - ); - let sst_read_options = SstReadOptions { - maybe_table_level_metrics: None, - frequency: ReadFrequency::Frequent, - num_rows_per_row_group: 5, - predicate: Arc::new(Predicate::empty()), - meta_cache: None, - scan_options, - runtime: runtime.clone(), - row_projector_builder, - }; - - let mut reader: Box = { - let mut reader = AsyncParquetReader::new( - &sst_file_path, - &sst_read_options, - None, - &store_picker, - None, - ); - let mut sst_meta_readback = reader - .meta_data() - .await - .unwrap() - .as_parquet() - .unwrap() - .as_ref() - .clone(); - // sst filter is built insider sst writer, so overwrite to default for - // comparison. - sst_meta_readback.parquet_filter = Default::default(); - sst_meta_readback.column_values = None; - // time_range is built insider sst writer, so overwrite it for - // comparison. - sst_meta.time_range = sst_info.time_range; - assert_eq!( - sst_meta.time_range, - TimeRange::new_unchecked(100.into(), 105.into()) - ); - assert_eq!(&sst_meta_readback, &ParquetMetaData::from(&sst_meta)); - assert_eq!( - expected_num_rows, - reader - .row_groups() - .await - .iter() - .map(|g| g.num_rows()) - .collect::>() - ); - - Box::new(reader) - }; - - let mut stream = reader.read().await.unwrap(); - let mut expect_rows = vec![]; - for counter in &[4, 3, 2, 1, 0] { - expect_rows.push(build_row_for_dictionary( - b"a", - 100 + counter, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tagv1"), - "tagv2", - )); - expect_rows.push(build_row_for_dictionary( - b"b", - 100 + counter, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tagv2"), - "tagv4", - )); - expect_rows.push(build_row_for_dictionary( - b"c", - 100 + counter, - 10.0, - "v4", - 1000, - 1_000_000, - None, - "tagv2", - )); - expect_rows.push(build_row_for_dictionary( - b"d", - 100 + counter, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tagv3"), - "tagv2", - )); - } - check_stream(&mut stream, expect_rows).await; - }); - } - - #[tokio::test] - async fn test_fetch_row_group() { - // rows per group: 10 - let testcases = vec![ - // input, expected - (10, vec![], vec![]), - (10, vec![10, 10], vec![10, 10]), - (10, vec![10, 10, 1], vec![10, 10, 1]), - (10, vec![10, 10, 21], vec![10, 10, 10, 10, 1]), - (10, vec![5, 6, 10], vec![10, 10, 1]), - (10, vec![5, 4, 4, 30], vec![10, 10, 10, 10, 3]), - (10, vec![20, 7, 23, 20], vec![10, 10, 10, 10, 10, 10, 10]), - (10, vec![21], vec![10, 10, 1]), - (10, vec![2, 2, 2, 2, 2], vec![10]), - (4, vec![3, 3, 3, 3, 3], vec![4, 4, 4, 3]), - (5, vec![3, 3, 3, 3, 3], vec![5, 5, 5]), - ]; - - for (num_rows_per_group, input, expected) in testcases { - check_num_rows_of_row_group(num_rows_per_group, input, expected).await; - } - } - - async fn check_num_rows_of_row_group( - num_rows_per_row_group: usize, - input_num_rows: Vec, - expected_num_rows: Vec, - ) { - test_util::init_log_for_test(); - let schema = build_schema(); - let mut poll_cnt = 0; - let schema_clone = schema.clone(); - let record_batch_stream = Box::new(stream::poll_fn(move |_ctx| -> Poll> { - if poll_cnt == input_num_rows.len() { - return Poll::Ready(None); - } - - let rows = (0..input_num_rows[poll_cnt]) - .map(|_| build_row(b"a", 100, 10.0, "v4", 1000, 1_000_000)) - .collect::>(); - - let batch = build_fetched_record_batch_with_key(schema_clone.clone(), rows); - poll_cnt += 1; - - Poll::Ready(Some(Ok(batch))) - })); - - let write_options = WriteOptions { - num_rows_per_row_group, - max_buffer_size: 0, - compression: Compression::UNCOMPRESSED, - sst_level: Level::default(), - column_encodings: Default::default(), - }; - let meta_data = MetaData { - min_key: Default::default(), - max_key: Default::default(), - time_range: Default::default(), - max_sequence: 1, - schema, - }; - let mut group_writer = RecordBatchGroupWriter::new( - RequestId::next_id(), - record_batch_stream, - &meta_data, - write_options, - ); - - let mut prev_record_batch = None; - for expect_num_row in expected_num_rows { - let batch = group_writer - .fetch_next_row_group(&mut prev_record_batch) - .await - .unwrap(); - - let actual_num_row: usize = batch.iter().map(|b| b.num_rows()).sum(); - assert_eq!(expect_num_row, actual_num_row); - } - } - - fn check_sample_column_encoding( - mut sampler: ColumnEncodingSampler<'_>, - expect_enable_dicts: Vec>, - ) { - sampler.sample().unwrap(); - for (col_idx, col_schema) in sampler.meta_data.schema.columns().iter().enumerate() { - let expect_enable_dict = - expect_enable_dicts[col_idx].map(|v| ColumnEncoding { enable_dict: v }); - let column_encoding = sampler.column_encodings.get(&col_schema.name).cloned(); - assert_eq!( - expect_enable_dict, column_encoding, - "column:{}", - col_schema.name - ); - } - } - - #[test] - fn test_column_encoding_option_sample() { - let schema = build_schema(); - let raw_rows = vec![ - (b"a", 100, 10.0, "v4", 1000, 1_000_000), - (b"a", 100, 10.0, "v4", 1000, 1_000_000), - (b"a", 100, 10.0, "v5", 1000, 1_000_000), - (b"a", 100, 10.0, "v5", 1000, 1_000_000), - (b"a", 100, 10.0, "v6", 1000, 1_000_000), - (b"a", 100, 10.0, "v6", 1000, 1_000_000), - (b"a", 100, 10.0, "v8", 1000, 1_000_000), - (b"a", 100, 10.0, "v8", 1000, 1_000_000), - (b"a", 100, 10.0, "v9", 1000, 1_000_000), - (b"a", 100, 10.0, "v9", 1000, 1_000_000), - ]; - let rows: Vec<_> = raw_rows - .into_iter() - .map(|v| build_row(v.0, v.1, v.2, v.3, v.4, v.5)) - .collect(); - let record_batch_with_key0 = - build_fetched_record_batch_with_key(schema.clone(), rows.clone()); - let record_batch_with_key1 = build_fetched_record_batch_with_key(schema.clone(), rows); - let meta_data = MetaData { - min_key: Bytes::from_static(b""), - max_key: Bytes::from_static(b""), - time_range: TimeRange::new_unchecked(Timestamp::new(1), Timestamp::new(2)), - max_sequence: 200, - schema, - }; - let record_batches_with_key = vec![record_batch_with_key0, record_batch_with_key1]; - - let mut column_encodings = HashMap::new(); - let sampler = ColumnEncodingSampler { - sample_row_groups: &record_batches_with_key, - meta_data: &meta_data, - min_num_sample_rows: 10, - max_unique_value_ratio: 0.6, - column_encodings: &mut column_encodings, - }; - let expect_enable_dicts = vec![ - Some(true), - Some(false), - Some(false), - Some(true), - Some(false), - Some(false), - ]; - check_sample_column_encoding(sampler, expect_enable_dicts); - - column_encodings.clear(); - let sampler = ColumnEncodingSampler { - sample_row_groups: &record_batches_with_key, - meta_data: &meta_data, - min_num_sample_rows: 10, - max_unique_value_ratio: 0.2, - column_encodings: &mut column_encodings, - }; - let expect_enable_dicts = vec![ - Some(true), - Some(false), - Some(false), - Some(false), - Some(false), - Some(false), - ]; - check_sample_column_encoding(sampler, expect_enable_dicts); - - column_encodings.clear(); - let sampler = ColumnEncodingSampler { - sample_row_groups: &record_batches_with_key, - meta_data: &meta_data, - min_num_sample_rows: 30, - max_unique_value_ratio: 0.2, - column_encodings: &mut column_encodings, - }; - let expect_enable_dicts = vec![ - None, - Some(false), - Some(false), - None, - Some(false), - Some(false), - ]; - check_sample_column_encoding(sampler, expect_enable_dicts); - - column_encodings.clear(); - // `field1` is double type, it will still be changed to false even if it is set - // as true. - // `field2` is string type, it will be kept as the pre-set. - column_encodings.insert("field1".to_string(), ColumnEncoding { enable_dict: true }); - column_encodings.insert("field2".to_string(), ColumnEncoding { enable_dict: true }); - let sampler = ColumnEncodingSampler { - sample_row_groups: &record_batches_with_key, - meta_data: &meta_data, - min_num_sample_rows: 10, - max_unique_value_ratio: 0.2, - column_encodings: &mut column_encodings, - }; - let expect_enable_dicts = vec![ - Some(true), - Some(false), - Some(false), - Some(true), - Some(false), - Some(false), - ]; - check_sample_column_encoding(sampler, expect_enable_dicts); - } -} diff --git a/src/analytic_engine/src/sst/reader.rs b/src/analytic_engine/src/sst/reader.rs deleted file mode 100644 index 68f22fa0d0..0000000000 --- a/src/analytic_engine/src/sst/reader.rs +++ /dev/null @@ -1,136 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Sst reader trait definition. - -use async_trait::async_trait; -use common_types::record_batch::FetchedRecordBatch; - -use crate::{prefetchable_stream::PrefetchableStream, sst::meta_data::SstMetaData}; - -pub mod error { - use generic_error::GenericError; - use macros::define_result; - use snafu::{Backtrace, Snafu}; - - #[derive(Debug, Snafu)] - #[snafu(visibility(pub))] - pub enum Error { - #[snafu(display("Try to read again, path:{path}.\nBacktrace:\n{backtrace}"))] - ReadAgain { backtrace: Backtrace, path: String }, - - #[snafu(display("Fail to read persisted file, path:{path}, err:{source}"))] - ReadPersist { path: String, source: GenericError }, - - #[snafu(display("Failed to decode record batch, err:{source}"))] - DecodeRecordBatch { source: GenericError }, - - #[snafu(display( - "Failed to decode sst meta data, file_path:{file_path}, err:{source}.\nBacktrace:\n{backtrace:?}", - ))] - FetchAndDecodeSstMeta { - file_path: String, - source: parquet::errors::ParquetError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode page indexes for meta data, file_path:{file_path}, err:{source}.\nBacktrace:\n{backtrace:?}", - ))] - DecodePageIndexes { - file_path: String, - source: parquet::errors::ParquetError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to decode sst meta data, err:{source}"))] - DecodeSstMeta { source: GenericError }, - - #[snafu(display("Sst meta data is not found.\nBacktrace:\n{backtrace}"))] - SstMetaNotFound { backtrace: Backtrace }, - - #[snafu(display("Fail to projection, err:{source}"))] - Projection { source: GenericError }, - - #[snafu(display("Sst meta data is empty.\nBacktrace:\n{backtrace}"))] - EmptySstMeta { backtrace: Backtrace }, - - #[snafu(display("Invalid schema, err:{source}"))] - InvalidSchema { source: common_types::schema::Error }, - - #[snafu(display("Meet a datafusion error, err:{source}\nBacktrace:\n{backtrace}"))] - DataFusionError { - source: datafusion::error::DataFusionError, - backtrace: Backtrace, - }, - - #[snafu(display("Meet a object store error, err:{source}\nBacktrace:\n{backtrace}"))] - ObjectStoreError { - source: object_store::ObjectStoreError, - backtrace: Backtrace, - }, - - #[snafu(display("Meet a parquet error, err:{source}\nBacktrace:\n{backtrace}"))] - ParquetError { - source: parquet::errors::ParquetError, - backtrace: Backtrace, - }, - - #[snafu(display("Other kind of error:{source}"))] - Other { source: GenericError }, - - #[snafu(display("Other kind of error, msg:{msg}.\nBacktrace:\n{backtrace}"))] - OtherNoCause { msg: String, backtrace: Backtrace }, - } - - define_result!(Error); -} - -pub use error::*; - -#[async_trait] -pub trait SstReader { - async fn meta_data(&mut self) -> Result; - - async fn read( - &mut self, - ) -> Result>>>; -} - -#[cfg(test)] -pub mod tests { - use common_types::row::Row; - - use super::*; - use crate::prefetchable_stream::PrefetchableStream; - - pub async fn check_stream(stream: &mut S, expected_rows: Vec) - where - S: PrefetchableStream> + Unpin, - { - let mut visited_rows = 0; - while let Some(batch) = stream.fetch_next().await { - let batch = batch.unwrap(); - for row_idx in 0..batch.num_rows() { - assert_eq!(batch.clone_row_at(row_idx), expected_rows[visited_rows]); - visited_rows += 1; - } - } - - assert_eq!(visited_rows, expected_rows.len()); - } -} diff --git a/src/analytic_engine/src/sst/writer.rs b/src/analytic_engine/src/sst/writer.rs deleted file mode 100644 index 577f499332..0000000000 --- a/src/analytic_engine/src/sst/writer.rs +++ /dev/null @@ -1,272 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Sst writer trait definition - -use std::cmp; - -use async_trait::async_trait; -use bytes_ext::Bytes; -use common_types::{ - record_batch::FetchedRecordBatch, request_id::RequestId, schema::Schema, time::TimeRange, - SequenceNumber, -}; -use futures::Stream; -use generic_error::{BoxError, GenericError}; -use snafu::{OptionExt, ResultExt}; - -use crate::table_options::StorageFormat; - -pub mod error { - use common_types::datum::DatumKind; - use generic_error::GenericError; - use macros::define_result; - use snafu::{Backtrace, Snafu}; - - #[derive(Debug, Snafu)] - #[snafu(visibility(pub))] - pub enum Error { - #[snafu(display( - "Failed to perform storage operation, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - Storage { - source: object_store::ObjectStoreError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to encode meta data, err:{}", source))] - EncodeMetaData { source: GenericError }, - - #[snafu(display("Failed to encode pb data, err:{}", source))] - EncodePbData { - source: crate::sst::parquet::encoding::Error, - }, - - #[snafu(display("IO failed, file:{file}, source:{source}.\nbacktrace:\n{backtrace}",))] - Io { - file: String, - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to encode record batch into sst, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - EncodeRecordBatch { - source: GenericError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Expect column to be timestamp, actual:{datum_kind}.\nBacktrace:\n{backtrace}" - ))] - ExpectTimestampColumn { - datum_kind: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to build parquet filter, err:{}", source))] - BuildParquetFilter { source: GenericError }, - - #[snafu(display("Failed to build parquet filter msg:{msg}.\nBacktrace:\n{backtrace}"))] - BuildParquetFilterNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Failed to poll record batch, err:{}", source))] - PollRecordBatch { source: GenericError }, - - #[snafu(display("Failed to read data, err:{}", source))] - ReadData { source: GenericError }, - - #[snafu(display("Other kind of error, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - OtherNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Empty time range.\nBacktrace:\n{}", backtrace))] - EmptyTimeRange { backtrace: Backtrace }, - - #[snafu(display("Empty schema.\nBacktrace:\n{}", backtrace))] - EmptySchema { backtrace: Backtrace }, - - #[snafu(display("Failed to convert time range, err:{}", source))] - ConvertTimeRange { source: GenericError }, - - #[snafu(display("Failed to convert sst info, err:{}", source))] - ConvertSstInfo { source: GenericError }, - - #[snafu(display("Failed to convert schema, err:{}", source))] - ConvertSchema { source: GenericError }, - } - - define_result!(Error); -} - -pub use error::*; - -pub type RecordBatchStreamItem = std::result::Result; -// TODO(yingwen): SstReader also has a RecordBatchStream, can we use same type? -pub type RecordBatchStream = Box + Send + Unpin>; - -#[derive(Debug, Clone)] -pub struct SstInfo { - pub file_size: usize, - pub row_num: usize, - pub storage_format: StorageFormat, - pub meta_path: String, - /// Real time range, not aligned to segment. - pub time_range: TimeRange, -} - -impl TryFrom for SstInfo { - type Error = Error; - - fn try_from(value: horaedbproto::compaction_service::SstInfo) -> Result { - let storage_format = value - .storage_format - .try_into() - .box_err() - .context(ConvertSstInfo)?; - let time_range = value - .time_range - .context(EmptyTimeRange)? - .try_into() - .box_err() - .context(ConvertTimeRange)?; - - Ok(Self { - file_size: value.file_size as usize, - row_num: value.row_num as usize, - storage_format, - meta_path: value.meta_path, - time_range, - }) - } -} - -impl From for horaedbproto::compaction_service::SstInfo { - fn from(value: SstInfo) -> Self { - Self { - file_size: value.file_size as u64, - row_num: value.row_num as u64, - storage_format: value.storage_format.into(), - meta_path: value.meta_path, - time_range: Some(value.time_range.into()), - } - } -} - -#[derive(Debug, Clone)] -pub struct MetaData { - /// Min key of the sst. - pub min_key: Bytes, - /// Max key of the sst. - pub max_key: Bytes, - /// Time Range of the sst. - pub time_range: TimeRange, - /// Max sequence number in the sst. - pub max_sequence: SequenceNumber, - /// The schema of the sst. - pub schema: Schema, -} - -impl TryFrom for MetaData { - type Error = Error; - - fn try_from(meta: horaedbproto::compaction_service::MetaData) -> Result { - let time_range = meta - .time_range - .context(EmptyTimeRange)? - .try_into() - .box_err() - .context(ConvertTimeRange)?; - let schema = meta - .schema - .context(EmptySchema)? - .try_into() - .box_err() - .context(ConvertSchema)?; - - Ok(Self { - min_key: Bytes::from(meta.min_key), - max_key: Bytes::from(meta.max_key), - time_range, - max_sequence: meta.max_sequence, - schema, - }) - } -} - -impl From for horaedbproto::compaction_service::MetaData { - fn from(meta: MetaData) -> Self { - Self { - min_key: meta.min_key.to_vec(), - max_key: meta.max_key.to_vec(), - max_sequence: meta.max_sequence, - time_range: Some(meta.time_range.into()), - schema: Some((&meta.schema).into()), - } - } -} - -/// The writer for sst. -/// -/// The caller provides a stream of [RecordBatch] and the writer takes -/// responsibilities for persisting the records. -#[async_trait] -pub trait SstWriter { - async fn write( - &mut self, - request_id: RequestId, - meta: &MetaData, - record_stream: RecordBatchStream, - ) -> Result; -} - -impl MetaData { - /// Merge multiple meta datas into the one. - /// - /// Panic if the metas is empty. - pub fn merge(mut metas: I, schema: Schema) -> Self - where - I: Iterator, - { - let first_meta = metas.next().unwrap(); - let mut min_key = first_meta.min_key; - let mut max_key = first_meta.max_key; - let mut time_range_start = first_meta.time_range.inclusive_start(); - let mut time_range_end = first_meta.time_range.exclusive_end(); - let mut max_sequence = first_meta.max_sequence; - - for file in metas { - min_key = cmp::min(file.min_key, min_key); - max_key = cmp::max(file.max_key, max_key); - time_range_start = cmp::min(file.time_range.inclusive_start(), time_range_start); - time_range_end = cmp::max(file.time_range.exclusive_end(), time_range_end); - max_sequence = cmp::max(file.max_sequence, max_sequence); - } - - MetaData { - min_key, - max_key, - time_range: TimeRange::new(time_range_start, time_range_end).unwrap(), - max_sequence, - schema, - } - } -} diff --git a/src/analytic_engine/src/table/data.rs b/src/analytic_engine/src/table/data.rs deleted file mode 100644 index 3f01362319..0000000000 --- a/src/analytic_engine/src/table/data.rs +++ /dev/null @@ -1,1184 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table data - -use std::{ - collections::HashMap, - convert::TryInto, - fmt, - fmt::Formatter, - num::NonZeroUsize, - sync::{ - atomic::{AtomicU32, AtomicU64, AtomicUsize, Ordering}, - Arc, Mutex, - }, - time::Duration, -}; - -use arc_swap::ArcSwap; -use arena::CollectorRef; -use common_types::{ - self, - schema::{Schema, Version}, - table::ShardId, - time::{TimeRange, Timestamp}, - SequenceNumber, -}; -use generic_error::{GenericError, GenericResult}; -use id_allocator::IdAllocator; -use logger::{debug, info}; -use macros::define_result; -use object_store::Path; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::table::{SchemaId, TableId}; -use time_ext::ReadableDuration; - -use crate::{ - instance::serial_executor::TableOpSerialExecutor, - manifest::{ - meta_edit::{AddTableMeta, MetaEdit, MetaEditRequest, MetaUpdate, VersionEditMeta}, - ManifestRef, - }, - memtable::{ - columnar::factory::ColumnarMemTableFactory, - factory::{FactoryRef as MemTableFactoryRef, Options as MemTableOptions}, - layered::factory::LayeredMemtableFactory, - skiplist::factory::SkiplistMemTableFactory, - MemtableType, - }, - space::SpaceId, - sst::{file::FilePurger, manager::FileId}, - table::{ - metrics::{Metrics, MetricsContext}, - sst_util, - version::{MemTableForWrite, MemTableState, SamplingMemTable, TableVersion}, - }, - table_options::UpdateMode, - MetricsOptions, TableOptions, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to create memtable, err:{}", source))] - CreateMemTable { source: crate::memtable::Error }, - - #[snafu(display( - "Failed to find or create memtable, timestamp overflow, timestamp:{:?}, duration:{:?}.\nBacktrace:\n{}", - timestamp, - duration, - backtrace, - ))] - TimestampOverflow { - timestamp: Timestamp, - duration: Duration, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to find memtable for write, err:{}", source))] - FindMemTable { - source: crate::table::version::Error, - }, - - #[snafu(display("Failed to alloc file id, err:{}", source))] - AllocFileId { source: GenericError }, - - #[snafu(display("Found invalid table opts, msg:{msg}.\nBacktrace:\n{backtrace}"))] - InvalidTableOpts { msg: String, backtrace: Backtrace }, -} - -define_result!(Error); - -pub type MemTableId = u64; - -pub const DEFAULT_ALLOC_STEP: u64 = 100; - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct TableShardInfo { - pub shard_id: ShardId, -} - -impl TableShardInfo { - pub fn new(shard_id: ShardId) -> Self { - Self { shard_id } - } -} - -/// `atomic_enum` macro will expand method like -/// ```text -/// compare_exchange(..) -> Result -/// ``` -/// The result type is conflict with outer -/// Result, so add this hack -// TODO: fix this in atomic_enum crate. -mod hack { - use atomic_enum::atomic_enum; - - #[atomic_enum] - #[derive(PartialEq)] - pub enum TableStatus { - Ok = 0, - Closed, - /// No write/alter are allowed after table is dropped. - Dropped, - } -} - -use self::hack::{AtomicTableStatus, TableStatus}; - -pub struct TableDesc { - pub id: TableId, - pub shard_id: ShardId, - pub space_id: SpaceId, - pub schema_id: SchemaId, - pub schema_name: String, - pub catalog_name: String, - pub name: String, - pub schema: Schema, -} - -pub struct TableConfig { - pub preflush_write_buffer_size_ratio: f32, - pub manifest_snapshot_every_n_updates: NonZeroUsize, - pub metrics_opt: MetricsOptions, - pub enable_primary_key_sampling: bool, - pub try_compat_old_layered_memtable_opts: bool, -} - -#[derive(Debug, Clone)] -pub struct TableCatalogInfo { - pub schema_id: SchemaId, - pub schema_name: String, - pub catalog_name: String, -} - -impl TableCatalogInfo { - pub fn new(schema_id: SchemaId, schema_name: String, catalog_name: String) -> Self { - Self { - schema_id, - schema_name, - catalog_name, - } - } -} - -/// Data of a table -pub struct TableData { - /// Id of this table - pub id: TableId, - /// Name of this table - pub name: String, - /// Schema of this table - schema: Mutex, - pub table_catalog_info: TableCatalogInfo, - /// Space id of this table - pub space_id: SpaceId, - - /// Mutable memtable memory size limitation - mutable_limit: AtomicU32, - /// Mutable memtable memory usage ratio of the write buffer size. - mutable_limit_write_buffer_ratio: f32, - - /// Options of this table - /// - /// Most modification to `opts` can be done by replacing the old options - /// with a new one. However, altering the segment duration should be done - /// carefully to avoid the reader seeing inconsistent segment duration - /// and memtables/ssts during query/compaction/flush . - opts: ArcSwap, - /// MemTable factory of this table - memtable_factory: MemTableFactoryRef, - /// Space memtable memory usage collector - mem_usage_collector: CollectorRef, - - /// Current table version - current_version: TableVersion, - /// Last sequence visible to the reads - /// - /// Write to last_sequence should be guarded by a mutex and only done by - /// single writer, but reads are allowed to be done concurrently without - /// mutex protected - last_sequence: AtomicU64, - - /// Auto incremented id to track memtable, reset on engine open - /// - /// Allocating memtable id should be guarded by write lock - last_memtable_id: AtomicU64, - - /// Allocating file id - allocator: IdAllocator, - - /// Last flush time - /// - /// Not persist, used to determine if this table should flush. - last_flush_time_ms: AtomicU64, - - /// Table Status - status: AtomicTableStatus, - - /// Manifest updates after last snapshot - manifest_updates: AtomicUsize, - - /// Every n manifest updates to trigger a snapshot - manifest_snapshot_every_n_updates: NonZeroUsize, - - /// Whether enable primary key sampling - enable_primary_key_sampling: bool, - - /// Whether enable layered memtable - pub enable_layered_memtable: bool, - - /// Metrics of this table - pub metrics: Metrics, - - /// Shard info of the table - pub shard_info: TableShardInfo, - - /// The table operation serial_exec - pub serial_exec: tokio::sync::Mutex, -} - -impl fmt::Debug for TableData { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.debug_struct("TableData") - .field("id", &self.id) - .field("name", &self.name) - .field("space", &self.space_id) - .field("mutable_limit", &self.mutable_limit) - .field("opts", &self.opts) - .field("last_sequence", &self.last_sequence) - .field("last_memtable_id", &self.last_memtable_id) - .field("status", &self.status.load(Ordering::Relaxed)) - .field("shard_info", &self.shard_info) - .finish() - } -} - -impl Drop for TableData { - fn drop(&mut self) { - debug!("TableData is dropped, id:{}, name:{}", self.id, self.name); - } -} - -#[inline] -fn compute_mutable_limit( - write_buffer_size: u32, - mutable_limit_write_buffer_size_ratio: f32, -) -> u32 { - assert!((0.0..=1.0).contains(&mutable_limit_write_buffer_size_ratio)); - - let limit = write_buffer_size as f32 * mutable_limit_write_buffer_size_ratio; - // This is safe because the limit won't be larger than the write_buffer_size. - limit as u32 -} - -pub struct MemSizeOptions { - pub collector: CollectorRef, - pub size_sampling_interval: ReadableDuration, -} - -impl TableData { - /// Create a new TableData - /// - /// This function should only be called when a new table is creating and - /// there is no existing data of the table - pub fn new( - desc: TableDesc, - opts: TableOptions, - config: TableConfig, - purger: &FilePurger, - mem_size_options: MemSizeOptions, - ) -> Result { - // TODO: Validate TableOptions, such as bucket_duration >= - // segment_duration and bucket_duration is aligned to segment_duration - - let TableDesc { - space_id, - schema_id, - schema_name, - catalog_name, - shard_id, - id, - name, - schema, - } = desc; - - let TableConfig { - preflush_write_buffer_size_ratio, - manifest_snapshot_every_n_updates, - metrics_opt, - enable_primary_key_sampling, - .. - } = config; - - let memtable_factory: MemTableFactoryRef = match opts.memtable_type { - MemtableType::SkipList => Arc::new(SkiplistMemTableFactory), - MemtableType::Column => Arc::new(ColumnarMemTableFactory), - }; - - let enable_layered_memtable = opts.layered_memtable_opts.enable; - let memtable_factory = if enable_layered_memtable { - let mutable_segment_switch_threshold = opts - .layered_memtable_opts - .mutable_segment_switch_threshold - .0 as usize; - - ensure!( - mutable_segment_switch_threshold > 0, - InvalidTableOpts { - msg: "layered memtable is enabled but mutable_switch_threshold is 0", - } - ); - - Arc::new(LayeredMemtableFactory::new( - memtable_factory, - mutable_segment_switch_threshold, - )) - } else { - memtable_factory - }; - - let purge_queue = purger.create_purge_queue(space_id, id); - let current_version = - TableVersion::new(mem_size_options.size_sampling_interval, purge_queue); - let metrics_ctx = MetricsContext::new(&name, shard_id, metrics_opt); - let metrics = Metrics::new(metrics_ctx); - let mutable_limit = AtomicU32::new(compute_mutable_limit( - opts.write_buffer_size, - preflush_write_buffer_size_ratio, - )); - - Ok(Self { - id, - name, - schema: Mutex::new(schema), - table_catalog_info: TableCatalogInfo { - schema_id, - schema_name, - catalog_name, - }, - space_id, - mutable_limit, - mutable_limit_write_buffer_ratio: preflush_write_buffer_size_ratio, - opts: ArcSwap::new(Arc::new(opts)), - memtable_factory, - mem_usage_collector: mem_size_options.collector, - current_version, - last_sequence: AtomicU64::new(0), - last_memtable_id: AtomicU64::new(0), - allocator: IdAllocator::new(0, 0, DEFAULT_ALLOC_STEP), - last_flush_time_ms: AtomicU64::new(0), - status: TableStatus::Ok.into(), - metrics, - shard_info: TableShardInfo::new(shard_id), - serial_exec: tokio::sync::Mutex::new(TableOpSerialExecutor::new(id)), - manifest_updates: AtomicUsize::new(0), - manifest_snapshot_every_n_updates, - enable_primary_key_sampling, - enable_layered_memtable, - }) - } - - /// Recover table from add table meta - /// - /// This wont recover sequence number, which will be set after wal replayed - pub fn recover_from_add( - add_meta: AddTableMeta, - purger: &FilePurger, - shard_id: ShardId, - config: TableConfig, - mem_size_options: MemSizeOptions, - allocator: IdAllocator, - table_catalog_info: TableCatalogInfo, - ) -> Result { - let TableConfig { - preflush_write_buffer_size_ratio, - manifest_snapshot_every_n_updates, - metrics_opt, - enable_primary_key_sampling, - try_compat_old_layered_memtable_opts, - } = config; - - let memtable_factory: MemTableFactoryRef = match add_meta.opts.memtable_type { - MemtableType::SkipList => Arc::new(SkiplistMemTableFactory), - MemtableType::Column => Arc::new(ColumnarMemTableFactory), - }; - // Maybe wrap it by `LayeredMemtable`. - let enable_layered_memtable = add_meta.opts.layered_memtable_opts.enable; - let memtable_factory = if enable_layered_memtable { - let mutable_segment_switch_threshold = add_meta - .opts - .layered_memtable_opts - .mutable_segment_switch_threshold - .0 as usize; - - if mutable_segment_switch_threshold > 0 { - ensure!( - add_meta.opts.update_mode != UpdateMode::Overwrite, - InvalidTableOpts { - msg: "layered memtable is enabled but update mode is Overwrite", - } - ); - - Arc::new(LayeredMemtableFactory::new( - memtable_factory, - mutable_segment_switch_threshold, - )) as _ - } else if try_compat_old_layered_memtable_opts { - // Maybe some old layered memtable opts controlling the on/off of this feature - // by checking `mutable_segment_switch_threshold`(`0`:disable, `>0`:enable) - // were persisted. - // If `try_compat_old_layered_memtable_opts` is true, we will try to follow the - // old behavior. - memtable_factory as _ - } else { - return InvalidTableOpts { - msg: "layered memtable is enabled but mutable_switch_threshold is 0", - } - .fail(); - } - } else { - memtable_factory as _ - }; - - let purge_queue = purger.create_purge_queue(add_meta.space_id, add_meta.table_id); - let current_version = - TableVersion::new(mem_size_options.size_sampling_interval, purge_queue); - let metrics_ctx = MetricsContext::new(&add_meta.table_name, shard_id, metrics_opt); - let metrics = Metrics::new(metrics_ctx); - let mutable_limit = AtomicU32::new(compute_mutable_limit( - add_meta.opts.write_buffer_size, - preflush_write_buffer_size_ratio, - )); - - Ok(Self { - id: add_meta.table_id, - name: add_meta.table_name, - schema: Mutex::new(add_meta.schema), - table_catalog_info, - space_id: add_meta.space_id, - mutable_limit, - mutable_limit_write_buffer_ratio: preflush_write_buffer_size_ratio, - opts: ArcSwap::new(Arc::new(add_meta.opts)), - memtable_factory, - mem_usage_collector: mem_size_options.collector, - current_version, - last_sequence: AtomicU64::new(0), - last_memtable_id: AtomicU64::new(0), - allocator, - last_flush_time_ms: AtomicU64::new(0), - status: TableStatus::Ok.into(), - metrics, - shard_info: TableShardInfo::new(shard_id), - serial_exec: tokio::sync::Mutex::new(TableOpSerialExecutor::new(add_meta.table_id)), - manifest_updates: AtomicUsize::new(0), - manifest_snapshot_every_n_updates, - enable_primary_key_sampling, - enable_layered_memtable, - }) - } - - /// Get current schema of the table. - pub fn schema(&self) -> Schema { - self.schema.lock().unwrap().clone() - } - - /// Set current schema of the table. - pub fn set_schema(&self, schema: Schema) { - *self.schema.lock().unwrap() = schema; - } - - /// Get current version of schema. - pub fn schema_version(&self) -> Version { - self.schema.lock().unwrap().version() - } - - /// Get current table version - #[inline] - pub fn current_version(&self) -> &TableVersion { - &self.current_version - } - - /// Get last sequence number - #[inline] - pub fn last_sequence(&self) -> SequenceNumber { - self.last_sequence.load(Ordering::Acquire) - } - - /// Set last sequence number - #[inline] - pub fn set_last_sequence(&self, seq: SequenceNumber) { - self.last_sequence.store(seq, Ordering::Release); - } - - #[inline] - pub fn next_sequence(&self) -> SequenceNumber { - self.last_sequence.fetch_add(1, Ordering::Relaxed) + 1 - } - - /// Get last flush time - #[inline] - pub fn last_flush_time(&self) -> u64 { - self.last_flush_time_ms.load(Ordering::Relaxed) - } - - /// Set last flush time - #[inline] - pub fn set_last_flush_time(&self, time: u64) { - self.last_flush_time_ms.store(time, Ordering::Release); - } - - #[inline] - pub fn table_options(&self) -> Arc { - self.opts.load().clone() - } - - /// Update table options. - #[inline] - pub fn set_table_options(&self, opts: TableOptions) { - let mutable_limit = compute_mutable_limit( - opts.write_buffer_size, - self.mutable_limit_write_buffer_ratio, - ); - self.mutable_limit.store(mutable_limit, Ordering::Relaxed); - self.opts.store(Arc::new(opts)) - } - - #[inline] - pub fn is_dropped(&self) -> bool { - self.status.load(Ordering::SeqCst) == TableStatus::Dropped - } - - /// Set the table is dropped and forbid any writes/alter on this table. - #[inline] - pub fn set_dropped(&self) { - self.status.store(TableStatus::Dropped, Ordering::SeqCst) - } - - #[inline] - pub fn set_closed(&self) { - self.status.store(TableStatus::Closed, Ordering::SeqCst) - } - - #[inline] - pub fn allow_compaction(&self) -> bool { - match self.status.load(Ordering::SeqCst) { - TableStatus::Ok => true, - TableStatus::Closed | TableStatus::Dropped => false, - } - } - - /// Returns total memtable memory usage in bytes. - #[inline] - pub fn memtable_memory_usage(&self) -> usize { - self.current_version.total_memory_usage() - } - - /// Returns mutable memtable memory usage in bytes. - #[inline] - pub fn mutable_memory_usage(&self) -> usize { - self.current_version.mutable_memory_usage() - } - - /// Find memtable for given timestamp to insert, create if not exists - /// - /// If the memtable schema is outdated, switch all memtables and create the - /// needed mutable memtable by current schema. The returned memtable is - /// guaranteed to have same schema of current table - pub fn find_or_create_mutable( - &self, - timestamp: Timestamp, - table_schema: &Schema, - ) -> Result { - let last_sequence = self.last_sequence(); - - if let Some(mem) = self - .current_version - .memtable_for_write(timestamp, table_schema.version()) - .context(FindMemTable)? - { - return Ok(mem); - } - - // Mutable memtable for this timestamp not found, need to create a new one. - let table_options = self.table_options(); - let memtable_opts = MemTableOptions { - schema: table_schema.clone(), - arena_block_size: table_options.arena_block_size, - creation_sequence: last_sequence, - collector: self.mem_usage_collector.clone(), - }; - let mem = self - .memtable_factory - .create_memtable(memtable_opts) - .context(CreateMemTable)?; - - // Currently only do sample when segment duration is none. - // TODO: add a new flag to represent this since we may need to sample other - // info, such as primary keys. - match table_options.segment_duration() { - Some(segment_duration) => { - let time_range = TimeRange::bucket_of(timestamp, segment_duration).context( - TimestampOverflow { - timestamp, - duration: segment_duration, - }, - )?; - let mem_state = MemTableState { - mem, - aligned_time_range: time_range, - id: self.alloc_memtable_id(), - }; - - // Insert memtable into mutable memtables of current version. - self.current_version.insert_mutable(mem_state.clone()); - - Ok(MemTableForWrite::Normal(mem_state)) - } - None => { - let mut sampling_mem = SamplingMemTable::new(mem, self.alloc_memtable_id()); - if self.enable_primary_key_sampling && table_options.support_sample_pk() { - sampling_mem.set_pk_sampler(table_schema); - } - - debug!( - "create sampling mem table:{}, schema:{:#?}", - sampling_mem.id, table_schema - ); - // Set sampling memtables of current version. - self.current_version.set_sampling(sampling_mem.clone()); - - Ok(MemTableForWrite::Sampling(sampling_mem)) - } - } - } - - /// Returns true if the memory usage of this table reaches flush threshold - pub fn should_flush_table(&self, in_flush: bool) -> bool { - // Fallback to usize::MAX if Failed to convert arena_block_size into - // usize (overflow) - let max_write_buffer_size = self - .table_options() - .write_buffer_size - .try_into() - .unwrap_or(usize::MAX); - let mutable_limit = self - .mutable_limit - .load(Ordering::Relaxed) - .try_into() - .unwrap_or(usize::MAX); - - let mutable_usage = self.current_version.mutable_memory_usage(); - let total_usage = self.current_version.total_memory_usage(); - // Inspired by https://github.com/facebook/rocksdb/blob/main/include/rocksdb/write_buffer_manager.h#L94 - if mutable_usage > mutable_limit && !in_flush { - info!( - "TableData should flush by mutable limit, table:{}, table_id:{}, mutable_usage:{}, mutable_limit: {}, total_usage:{}, max_write_buffer_size:{}", - self.name, self.id, mutable_usage, mutable_limit, total_usage, max_write_buffer_size - ); - return true; - } - - // If the memory exceeds the buffer size, we trigger more aggressive - // flush. But if already more than half memory is being flushed, - // triggering more flush may not help. We will hold it instead. - let should_flush = - total_usage >= max_write_buffer_size && mutable_usage >= max_write_buffer_size / 2; - - debug!( - "Check should flush, table:{}, table_id:{}, mutable_usage:{}, mutable_limit: {}, total_usage:{}, max_write_buffer_size:{}", - self.name, self.id, mutable_usage, mutable_limit, total_usage, max_write_buffer_size - ); - - if should_flush { - info!( - "TableData should flush by total usage, table:{}, table_id:{}, mutable_usage:{}, mutable_limit: {}, total_usage:{}, max_write_buffer_size:{}", - self.name, self.id, mutable_usage, mutable_limit, total_usage, max_write_buffer_size - ); - } - - should_flush - } - - /// Use allocator to alloc a file id for a new file. - pub async fn alloc_file_id(&self, manifest: &ManifestRef) -> Result { - // Persist next max file id to manifest. - let persist_max_file_id = move |next_max_file_id| async move { - self.persist_max_file_id(manifest, next_max_file_id).await - }; - - self.allocator - .alloc_id(persist_max_file_id) - .await - .context(AllocFileId) - } - - async fn persist_max_file_id( - &self, - manifest: &ManifestRef, - next_max_file_id: FileId, - ) -> GenericResult<()> { - let manifest_update = VersionEditMeta { - space_id: self.space_id, - table_id: self.id, - flushed_sequence: 0, - files_to_add: vec![], - files_to_delete: vec![], - mems_to_remove: vec![], - max_file_id: next_max_file_id, - }; - let edit_req = { - let meta_update = MetaUpdate::VersionEdit(manifest_update); - MetaEditRequest { - shard_info: self.shard_info, - meta_edit: MetaEdit::Update(meta_update), - table_catalog_info: self.table_catalog_info.clone(), - } - }; - // table version's max file id will be update when apply this meta update. - manifest.apply_edit(edit_req).await?; - Ok(()) - } - - /// Get sst file path in the object storage - pub fn sst_file_path(&self, file_id: FileId) -> Path { - sst_util::new_sst_file_path(self.space_id, self.id, file_id) - } - - pub fn compaction_task_key(&self, file_id: FileId) -> String { - format!("{}-{file_id}", self.id) - } - - /// Allocate next memtable id - fn alloc_memtable_id(&self) -> MemTableId { - let last = self.last_memtable_id.fetch_add(1, Ordering::Relaxed); - last + 1 - } - - /// Returns last memtable id - pub fn last_memtable_id(&self) -> MemTableId { - self.last_memtable_id.load(Ordering::Relaxed) - } - - pub fn dedup(&self) -> bool { - self.table_options().need_dedup() - } - - pub fn is_expired(&self, timestamp: Timestamp) -> bool { - self.table_options().is_expired(timestamp) - } - - pub fn table_location(&self) -> TableLocation { - TableLocation { - id: self.id.as_u64(), - shard_info: self.shard_info, - } - } - - pub fn increase_manifest_updates(&self, updates_num: usize) { - self.manifest_updates - .fetch_add(updates_num, Ordering::Relaxed); - } - - pub fn should_do_manifest_snapshot(&self) -> bool { - let updates = self.manifest_updates.load(Ordering::Relaxed); - updates >= self.manifest_snapshot_every_n_updates.get() - } - - pub fn reset_manifest_updates(&self) { - self.manifest_updates.store(0, Ordering::Relaxed); - } -} - -#[derive(Debug, Clone, Copy)] -pub struct TableLocation { - pub id: common_types::table::TableId, - pub shard_info: TableShardInfo, -} - -/// Table data reference -pub type TableDataRef = Arc; - -/// Manages TableDataRef -#[derive(Debug, Default)] -pub struct TableDataSet { - /// Name to table data - table_datas: HashMap, - /// Id to table data - id_to_tables: HashMap, -} - -impl TableDataSet { - /// Create an empty TableDataSet - pub fn new() -> Self { - Self::default() - } - - /// Insert if absent, if successfully inserted, return true and return - /// false if the data already exists - pub fn insert_if_absent(&mut self, table_data_ref: TableDataRef) -> bool { - let table_name = &table_data_ref.name; - if self.table_datas.contains_key(table_name) { - let exist_table = self.table_datas.get(table_name).unwrap(); - logger::error!( - "found duplicated table_name:{}, exist_table_id:{}, exist_table_shard_id:{}, inserted_table_id:{}, inserted_table_shard_id:{}", - table_name, - exist_table.id, - exist_table.shard_info.shard_id, - table_data_ref.id, - table_data_ref.shard_info.shard_id, - ); - return false; - } - self.table_datas - .insert(table_name.to_string(), table_data_ref.clone()); - self.id_to_tables.insert(table_data_ref.id, table_data_ref); - true - } - - /// Find table by table name - pub fn find_table(&self, table_name: &str) -> Option { - self.table_datas.get(table_name).cloned() - } - - /// Find table by table id - pub fn find_table_by_id(&self, table_id: TableId) -> Option { - self.id_to_tables.get(&table_id).cloned() - } - - /// Remove table by table name - pub fn remove_table(&mut self, table_name: &str) -> Option { - let table = self.table_datas.remove(table_name)?; - self.id_to_tables.remove(&table.id); - Some(table) - } - - /// Returns the total table num in this set - pub fn table_num(&self) -> usize { - self.table_datas.len() - } - - pub fn find_maximum_memory_usage_table(&self) -> Option { - // TODO: Possible performance issue here when there are too many tables. - self.table_datas - .values() - .max_by_key(|t| t.memtable_memory_usage()) - .cloned() - } - - pub fn total_memory_usage(&self) -> usize { - if self.table_datas.is_empty() { - return 0; - } - // TODO: Possible performance issue here when there are too many tables. - self.table_datas - .values() - .map(|t| t.memtable_memory_usage()) - .sum() - } - - pub fn find_maximum_mutable_memory_usage_table(&self) -> Option { - // TODO: Possible performance issue here when there are too many tables. - self.table_datas - .values() - .max_by_key(|t| t.mutable_memory_usage()) - .cloned() - } - - /// List all tables to `tables` - pub fn list_all_tables(&self, tables: &mut Vec) { - for table_data in self.table_datas.values().cloned() { - tables.push(table_data); - } - } -} - -#[cfg(test)] -pub mod tests { - use std::sync::Arc; - - use arena::NoopCollector; - use common_types::{datum::DatumKind, table::DEFAULT_SHARD_ID}; - use table_engine::{ - engine::{CreateTableParams, CreateTableRequest, TableState}, - table::SchemaId, - }; - use time_ext::ReadableDuration; - - use super::*; - use crate::{ - memtable::{factory::Factory, MemTableRef}, - sst::file::tests::FilePurgerMocker, - table_options, - tests::table, - }; - - const DEFAULT_SPACE_ID: SpaceId = 1; - const DEFAULT_SCHEMA_ID: SchemaId = SchemaId::from_u32(2); - - pub fn default_schema() -> Schema { - table::create_schema_builder( - &[("key", DatumKind::Timestamp)], - &[("value", DatumKind::Double)], - ) - .build() - .unwrap() - } - - #[derive(Default)] - pub struct MemTableMocker; - - impl MemTableMocker { - pub fn build(&self) -> MemTableRef { - let memtable_opts = MemTableOptions { - schema: default_schema(), - arena_block_size: 1024 * 1024, - creation_sequence: 1000, - collector: Arc::new(NoopCollector), - }; - - let factory = SkiplistMemTableFactory; - factory.create_memtable(memtable_opts).unwrap() - } - - pub fn build_columnar(&self) -> MemTableRef { - let memtable_opts = MemTableOptions { - schema: default_schema(), - arena_block_size: 1024 * 1024, - creation_sequence: 1000, - collector: Arc::new(NoopCollector), - }; - - let factory = ColumnarMemTableFactory; - factory.create_memtable(memtable_opts).unwrap() - } - } - - #[must_use] - pub struct TableDataMocker { - table_id: TableId, - table_name: String, - shard_id: ShardId, - manifest_snapshot_every_n_updates: NonZeroUsize, - } - - impl TableDataMocker { - pub fn table_id(mut self, table_id: TableId) -> Self { - self.table_id = table_id; - self - } - - pub fn table_name(mut self, table_name: String) -> Self { - self.table_name = table_name; - self - } - - pub fn shard_id(mut self, shard_id: ShardId) -> Self { - self.shard_id = shard_id; - self - } - - pub fn manifest_snapshot_every_n_updates( - mut self, - manifest_snapshot_every_n_updates: NonZeroUsize, - ) -> Self { - self.manifest_snapshot_every_n_updates = manifest_snapshot_every_n_updates; - self - } - - pub fn build(self) -> TableData { - let space_id = DEFAULT_SPACE_ID; - let schema_id = DEFAULT_SCHEMA_ID; - let table_schema = default_schema(); - let params = CreateTableParams { - catalog_name: "test_catalog".to_string(), - schema_name: "public".to_string(), - table_name: self.table_name, - table_schema, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - table_options: HashMap::new(), - partition_info: None, - }; - let create_request = CreateTableRequest { - params, - schema_id: SchemaId::from_u32(DEFAULT_SPACE_ID), - table_id: self.table_id, - state: TableState::Stable, - shard_id: self.shard_id, - }; - - let table_opts = TableOptions::default(); - let purger = FilePurgerMocker::mock(); - let collector = Arc::new(NoopCollector); - - let mem_size_options = MemSizeOptions { - collector, - size_sampling_interval: Default::default(), - }; - - TableData::new( - TableDesc { - id: create_request.table_id, - shard_id: create_request.shard_id, - space_id, - schema_id, - catalog_name: "test_catalog".to_string(), - schema_name: "public".to_string(), - name: create_request.params.table_name, - schema: create_request.params.table_schema, - }, - table_opts, - TableConfig { - preflush_write_buffer_size_ratio: 0.75, - manifest_snapshot_every_n_updates: self.manifest_snapshot_every_n_updates, - metrics_opt: MetricsOptions::default(), - enable_primary_key_sampling: false, - try_compat_old_layered_memtable_opts: false, - }, - &purger, - mem_size_options, - ) - .unwrap() - } - } - - impl Default for TableDataMocker { - fn default() -> Self { - Self { - table_id: table::new_table_id(2, 1), - table_name: "mocked_table".to_string(), - shard_id: DEFAULT_SHARD_ID, - manifest_snapshot_every_n_updates: NonZeroUsize::new(usize::MAX).unwrap(), - } - } - } - - #[test] - fn test_new_table_data() { - let table_id = table::new_table_id(100, 30); - let table_name = "new_table".to_string(); - let shard_id = 42; - let table_data = TableDataMocker::default() - .table_id(table_id) - .table_name(table_name.clone()) - .shard_id(shard_id) - .build(); - - assert_eq!(table_id, table_data.id); - assert_eq!(table_name, table_data.name); - assert_eq!(TableShardInfo::new(shard_id), table_data.shard_info); - assert_eq!(0, table_data.last_sequence()); - assert!(!table_data.is_dropped()); - assert_eq!(0, table_data.last_memtable_id()); - assert!(table_data.dedup()); - } - - #[test] - fn test_find_or_create_mutable() { - let table_data = TableDataMocker::default().build(); - let schema = table_data.schema(); - - // Create sampling memtable. - let zero_ts = Timestamp::new(0); - let mutable = table_data.find_or_create_mutable(zero_ts, &schema).unwrap(); - assert!(mutable.accept_timestamp(zero_ts)); - let sampling_mem = mutable.as_sampling(); - let sampling_id = sampling_mem.id; - assert_eq!(1, sampling_id); - - // Test memtable is reused. - let now_ts = Timestamp::now(); - let mutable = table_data.find_or_create_mutable(now_ts, &schema).unwrap(); - assert!(mutable.accept_timestamp(now_ts)); - let sampling_mem = mutable.as_sampling(); - // Use same sampling memtable. - assert_eq!(sampling_id, sampling_mem.id); - - let current_version = table_data.current_version(); - // Set segment duration manually. - let mut table_opts = (*table_data.table_options()).clone(); - table_opts.segment_duration = - Some(ReadableDuration(table_options::DEFAULT_SEGMENT_DURATION)); - table_data.set_table_options(table_opts); - // Freeze sampling memtable. - current_version.freeze_sampling_memtable(); - - // A new mutable memtable should be created. - let mutable = table_data.find_or_create_mutable(now_ts, &schema).unwrap(); - assert!(mutable.accept_timestamp(now_ts)); - let mem_state = mutable.as_normal(); - assert_eq!(2, mem_state.id); - let time_range = - TimeRange::bucket_of(now_ts, table_options::DEFAULT_SEGMENT_DURATION).unwrap(); - assert_eq!(time_range, mem_state.aligned_time_range); - } - - #[test] - fn test_compute_mutable_limit() { - // Build the cases for compute_mutable_limit. - let cases = vec![ - (80, 0.8, 64), - (80, 0.5, 40), - (80, 0.1, 8), - (80, 0.0, 0), - (80, 1.0, 80), - (0, 0.8, 0), - (0, 0.5, 0), - (0, 0.1, 0), - (0, 0.0, 0), - (0, 1.0, 0), - ]; - - for (write_buffer_size, ratio, expected) in cases { - let limit = compute_mutable_limit(write_buffer_size, ratio); - assert_eq!(expected, limit); - } - } - - #[should_panic] - #[test] - fn test_compute_mutable_limit_panic() { - compute_mutable_limit(80, 1.1); - compute_mutable_limit(80, -0.1); - } - - #[test] - fn test_manifest_snapshot_trigger() { - // When snapshot_every_n_updates is not zero. - let table_data = TableDataMocker::default() - .manifest_snapshot_every_n_updates(NonZeroUsize::new(5).unwrap()) - .build(); - - check_manifest_snapshot_trigger(&table_data); - // Reset and check again. - table_data.reset_manifest_updates(); - check_manifest_snapshot_trigger(&table_data); - } - - fn check_manifest_snapshot_trigger(table_data: &TableData) { - // When no updates yet, result should be false. - assert!(!table_data.should_do_manifest_snapshot()); - - // Eq case. - table_data.increase_manifest_updates(5); - assert!(table_data.should_do_manifest_snapshot()); - - // Greater case. - table_data.increase_manifest_updates(5); - assert!(table_data.should_do_manifest_snapshot()); - } -} diff --git a/src/analytic_engine/src/table/metrics.rs b/src/analytic_engine/src/table/metrics.rs deleted file mode 100644 index d1aa0c57b0..0000000000 --- a/src/analytic_engine/src/table/metrics.rs +++ /dev/null @@ -1,481 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Metrics of table. - -use std::{ - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, - }, - time::Duration, -}; - -use common_types::table::ShardId; -use lazy_static::lazy_static; -use prometheus::{ - exponential_buckets, - local::{LocalHistogram, LocalHistogramTimer}, - register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec, - Histogram, HistogramTimer, HistogramVec, IntCounter, IntCounterVec, -}; -use table_engine::{partition::maybe_extract_partitioned_table_name, table::TableStats}; - -use crate::{sst::metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, MetricsOptions}; - -const KB: f64 = 1024.0; -const DEFAULT_METRICS_KEY: &str = "total"; - -lazy_static! { - // Counters: - static ref TABLE_WRITE_REQUEST_COUNTER: IntCounter = register_int_counter!( - "table_write_request_counter", - "Write request counter of table" - ) - .unwrap(); - - pub static ref TABLE_WRITE_BYTES_COUNTER: IntCounterVec = register_int_counter_vec!( - "table_write_bytes_counter", - "Write bytes counter of table", - &["shard_id", "table"] - ) - .unwrap(); - - static ref TABLE_WRITE_FIELDS_COUNTER: IntCounter = register_int_counter!( - "table_write_fields_counter", - "Fields counter of table write" - ) - .unwrap(); - - static ref TABLE_READ_REQUEST_COUNTER: IntCounter = register_int_counter!( - "table_read_request_counter", - "Read request counter of table" - ) - .unwrap(); - // End of counters. - - // Histograms: - static ref TABLE_WRITE_BATCH_HISTOGRAM: Histogram = register_histogram!( - "table_write_batch_size", - "Histogram of write batch size", - vec![10.0, 50.0, 100.0, 500.0, 1000.0, 5000.0] - ) - .unwrap(); - - // Buckets: 0, 0.002, .., 0.002 * 4^9 - static ref TABLE_FLUSH_DURATION_HISTOGRAM: Histogram = register_histogram!( - "table_flush_duration", - "Histogram for flush duration of the table in seconds", - exponential_buckets(0.002, 4.0, 10).unwrap() - ).unwrap(); - - // Buckets: 0, 1, .., 2^7 - static ref TABLE_FLUSH_SST_NUM_HISTOGRAM: Histogram = register_histogram!( - "table_flush_sst_num", - "Histogram for number of ssts flushed by the table", - exponential_buckets(1.0, 2.0, 8).unwrap() - ).unwrap(); - - // Buckets: 0, 1, ..., 4^11 (4GB) - static ref TABLE_FLUSH_SST_SIZE_HISTOGRAM: Histogram = register_histogram!( - "table_flush_sst_size", - "Histogram for size of ssts flushed by the table in KB", - exponential_buckets(1.0, 4.0, 12).unwrap() - ).unwrap(); - - // Buckets: 0, 0.02, .., 0.02 * 4^9 - static ref TABLE_COMPACTION_DURATION_HISTOGRAM: Histogram = register_histogram!( - "table_compaction_duration", - "Histogram for compaction duration of the table in seconds", - exponential_buckets(0.02, 4.0, 10).unwrap() - ).unwrap(); - - // Buckets: 0, 1, .., 2^7 - static ref TABLE_COMPACTION_SST_NUM_HISTOGRAM: Histogram = register_histogram!( - "table_compaction_sst_num", - "Histogram for number of ssts compacted by the table", - exponential_buckets(1.0, 2.0, 8).unwrap() - ).unwrap(); - - // Buckets: 0, 1, ..., 4^11 (4GB) - static ref TABLE_COMPACTION_SST_SIZE_HISTOGRAM: HistogramVec = register_histogram_vec!( - "table_compaction_sst_size", - "Histogram for size of ssts compacted by the table in KB", - &["type"], - exponential_buckets(1.0, 4.0, 12).unwrap() - ).unwrap(); - - // Buckets: 0, 1, ..., 10^12(1 billion) - static ref TABLE_COMPACTION_SST_ROW_NUM_HISTOGRAM: HistogramVec = register_histogram_vec!( - "table_compaction_sst_row_num", - "Histogram for row num of ssts compacted by the table", - &["type"], - exponential_buckets(1.0, 10.0, 13).unwrap() - ).unwrap(); - - // Buckets: 0, 0.01, .., 0.01 * 2^12 - static ref TABLE_WRITE_DURATION_HISTOGRAM: HistogramVec = register_histogram_vec!( - "table_write_duration", - "Histogram for write stall duration of the table in seconds", - &["type"], - exponential_buckets(0.01, 2.0, 13).unwrap() - ).unwrap(); - - static ref QUERY_TIME_RANGE: HistogramVec = register_histogram_vec!( - "query_time_range", - "Histogram for query time range((15m,30m,...,7d)", - &["table"], - exponential_buckets(900.0, 2.0, 10).unwrap() - ) - .unwrap(); - - static ref DURATION_SINCE_QUERY_START_TIME: HistogramVec = register_histogram_vec!( - "duration_since_query_start_time", - "Histogram for duration since query start time(15m,30m,...,7d)", - &["table"], - exponential_buckets(900.0, 2.0, 10).unwrap() - ) - .unwrap(); - - // End of histograms. -} - -#[derive(Default)] -struct AtomicTableStats { - num_write: AtomicU64, - num_read: AtomicU64, - num_flush: AtomicU64, -} - -impl From<&AtomicTableStats> for TableStats { - fn from(stats: &AtomicTableStats) -> Self { - Self { - num_write: stats.num_write.load(Ordering::Relaxed), - num_read: stats.num_read.load(Ordering::Relaxed), - num_flush: stats.num_flush.load(Ordering::Relaxed), - } - } -} - -/// Table metrics. -/// -/// Now the registered labels won't remove from the metrics vec to avoid panic -/// on concurrent removal. -pub struct Metrics { - /// The table name used for metric label - maybe_table_name: String, - /// The label for shard id - shard_id_label: String, - /// Stats of a single table. - stats: Arc, - - compaction_input_sst_size_histogram: Histogram, - compaction_output_sst_size_histogram: Histogram, - compaction_input_sst_row_num_histogram: Histogram, - compaction_output_sst_row_num_histogram: Histogram, - - table_write_stall_duration: Histogram, - table_write_encode_duration: Histogram, - table_write_wal_duration: Histogram, - table_write_memtable_duration: Histogram, - table_write_preprocess_duration: Histogram, - table_write_space_flush_wait_duration: Histogram, - table_write_instance_flush_wait_duration: Histogram, - table_write_flush_wait_duration: Histogram, - table_write_execute_duration: Histogram, - table_write_queue_waiter_duration: Histogram, - table_write_queue_writer_duration: Histogram, - table_write_total_duration: Histogram, - table_write_bytes_counter: IntCounter, -} - -pub struct MaybeTableLevelMetrics { - // TODO: maybe `query_time_range` and `duration_since_query_query_start_time` - // should place on a higher level such `TableEngine`. - // I do so originally, but I found no reasonable place to keep related contexts... - pub query_time_range: Histogram, - pub duration_since_query_query_start_time: Histogram, - - pub sst_metrics: Arc, -} - -impl MaybeTableLevelMetrics { - pub fn new(maybe_table_name: &str, shard_id_label: &str) -> Self { - let sst_metrics = Arc::new(SstMaybeTableLevelMetrics::new( - maybe_table_name, - shard_id_label, - )); - - Self { - query_time_range: QUERY_TIME_RANGE.with_label_values(&[maybe_table_name]), - duration_since_query_query_start_time: DURATION_SINCE_QUERY_START_TIME - .with_label_values(&[maybe_table_name]), - sst_metrics, - } - } -} - -pub struct MetricsContext<'a> { - /// If enable table level metrics, it should be a table name, - /// Otherwise it should be `DEFAULT_METRICS_KEY`. - table_name: &'a str, - shard_id: ShardId, - metric_opt: MetricsOptions, - maybe_partitioned_table_name: Option, -} - -impl<'a> MetricsContext<'a> { - pub fn new(table_name: &'a str, shard_id: ShardId, metric_opt: MetricsOptions) -> Self { - Self { - table_name, - shard_id, - metric_opt, - maybe_partitioned_table_name: None, - } - } - - fn maybe_table_name(&mut self) -> &str { - if !self.metric_opt.enable_table_level_metrics { - DEFAULT_METRICS_KEY - } else { - let maybe_partition_table = maybe_extract_partitioned_table_name(self.table_name); - match maybe_partition_table { - Some(partitioned) => { - self.maybe_partitioned_table_name = Some(partitioned); - self.maybe_partitioned_table_name.as_ref().unwrap() - } - None => self.table_name, - } - } - } -} - -impl Metrics { - pub fn new(mut metric_ctx: MetricsContext) -> Self { - let shard_id_label = metric_ctx.shard_id.to_string(); - let maybe_table_name = metric_ctx.maybe_table_name().to_string(); - let table_write_bytes_counter = - TABLE_WRITE_BYTES_COUNTER.with_label_values(&[&shard_id_label, &maybe_table_name]); - Self { - maybe_table_name, - shard_id_label, - stats: Arc::new(AtomicTableStats::default()), - compaction_input_sst_size_histogram: TABLE_COMPACTION_SST_SIZE_HISTOGRAM - .with_label_values(&["input"]), - compaction_output_sst_size_histogram: TABLE_COMPACTION_SST_SIZE_HISTOGRAM - .with_label_values(&["output"]), - compaction_input_sst_row_num_histogram: TABLE_COMPACTION_SST_ROW_NUM_HISTOGRAM - .with_label_values(&["input"]), - compaction_output_sst_row_num_histogram: TABLE_COMPACTION_SST_ROW_NUM_HISTOGRAM - .with_label_values(&["output"]), - - table_write_stall_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["stall"]), - table_write_encode_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["encode"]), - table_write_wal_duration: TABLE_WRITE_DURATION_HISTOGRAM.with_label_values(&["wal"]), - table_write_memtable_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["memtable"]), - table_write_preprocess_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["preprocess"]), - table_write_space_flush_wait_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["wait_space_flush"]), - table_write_instance_flush_wait_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["wait_instance_flush"]), - table_write_flush_wait_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["wait_flush"]), - table_write_execute_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["execute"]), - table_write_queue_waiter_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["queue_waiter"]), - table_write_queue_writer_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["queue_writer"]), - table_write_total_duration: TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["total"]), - table_write_bytes_counter, - } - } - - /// Generate a table-level metric observer. - #[inline] - pub fn maybe_table_level_metrics(&self) -> Arc { - Arc::new(MaybeTableLevelMetrics::new( - &self.maybe_table_name, - &self.shard_id_label, - )) - } - - #[inline] - pub fn table_stats(&self) -> TableStats { - TableStats::from(&*self.stats) - } - - #[inline] - pub fn on_write_request_begin(&self) { - self.stats.num_write.fetch_add(1, Ordering::Relaxed); - TABLE_WRITE_REQUEST_COUNTER.inc(); - } - - #[inline] - pub fn on_write_request_done(&self, num_rows: usize, num_columns: usize, num_bytes: usize) { - TABLE_WRITE_BATCH_HISTOGRAM.observe(num_rows as f64); - TABLE_WRITE_FIELDS_COUNTER.inc_by((num_columns * num_rows) as u64); - self.table_write_bytes_counter.inc_by(num_bytes as u64); - } - - #[inline] - pub fn on_read_request_begin(&self) { - self.stats.num_read.fetch_add(1, Ordering::Relaxed); - TABLE_READ_REQUEST_COUNTER.inc(); - } - - #[inline] - pub fn on_write_stall(&self, duration: Duration) { - self.table_write_stall_duration - .observe(duration.as_secs_f64()); - } - - #[inline] - pub fn start_table_total_timer(&self) -> HistogramTimer { - self.table_write_total_duration.start_timer() - } - - #[inline] - pub fn start_table_write_execute_timer(&self) -> HistogramTimer { - self.table_write_execute_duration.start_timer() - } - - #[inline] - pub fn start_table_write_encode_timer(&self) -> HistogramTimer { - self.table_write_encode_duration.start_timer() - } - - #[inline] - pub fn start_table_write_queue_waiter_timer(&self) -> HistogramTimer { - self.table_write_queue_waiter_duration.start_timer() - } - - #[inline] - pub fn start_table_write_queue_writer_timer(&self) -> HistogramTimer { - self.table_write_queue_writer_duration.start_timer() - } - - #[inline] - pub fn start_table_write_memtable_timer(&self) -> HistogramTimer { - self.table_write_memtable_duration.start_timer() - } - - #[inline] - pub fn start_table_write_wal_timer(&self) -> HistogramTimer { - self.table_write_wal_duration.start_timer() - } - - #[inline] - pub fn start_table_write_preprocess_timer(&self) -> HistogramTimer { - self.table_write_preprocess_duration.start_timer() - } - - #[inline] - pub fn start_table_write_space_flush_wait_timer(&self) -> HistogramTimer { - self.table_write_space_flush_wait_duration.start_timer() - } - - #[inline] - pub fn start_table_write_instance_flush_wait_timer(&self) -> HistogramTimer { - self.table_write_instance_flush_wait_duration.start_timer() - } - - #[inline] - pub fn start_table_write_flush_wait_timer(&self) -> HistogramTimer { - self.table_write_flush_wait_duration.start_timer() - } - - #[inline] - pub fn start_compaction_timer(&self) -> HistogramTimer { - TABLE_COMPACTION_DURATION_HISTOGRAM.start_timer() - } - - #[inline] - pub fn compaction_observe_duration(&self, duration: Duration) { - TABLE_COMPACTION_DURATION_HISTOGRAM.observe(duration.as_secs_f64()); - } - - #[inline] - pub fn compaction_observe_sst_num(&self, sst_num: usize) { - TABLE_COMPACTION_SST_NUM_HISTOGRAM.observe(sst_num as f64); - } - - #[inline] - pub fn compaction_observe_input_sst_size(&self, sst_size: u64) { - // Convert bytes to KB. - self.compaction_input_sst_size_histogram - .observe(sst_size as f64 / KB); - } - - #[inline] - pub fn compaction_observe_output_sst_size(&self, sst_size: u64) { - // Convert bytes to KB. - self.compaction_output_sst_size_histogram - .observe(sst_size as f64 / KB); - } - - #[inline] - pub fn compaction_observe_input_sst_row_num(&self, sst_row_num: u64) { - self.compaction_input_sst_row_num_histogram - .observe(sst_row_num as f64); - } - - #[inline] - pub fn compaction_observe_output_sst_row_num(&self, sst_row_num: u64) { - self.compaction_output_sst_row_num_histogram - .observe(sst_row_num as f64); - } - - #[inline] - pub fn local_flush_metrics(&self) -> LocalFlushMetrics { - LocalFlushMetrics { - stats: self.stats.clone(), - flush_duration_histogram: TABLE_FLUSH_DURATION_HISTOGRAM.local(), - flush_sst_num_histogram: TABLE_FLUSH_SST_NUM_HISTOGRAM.local(), - flush_sst_size_histogram: TABLE_FLUSH_SST_SIZE_HISTOGRAM.local(), - } - } -} - -pub struct LocalFlushMetrics { - stats: Arc, - - flush_duration_histogram: LocalHistogram, - flush_sst_num_histogram: LocalHistogram, - flush_sst_size_histogram: LocalHistogram, -} - -impl LocalFlushMetrics { - pub fn start_flush_timer(&self) -> LocalHistogramTimer { - self.stats.num_flush.fetch_add(1, Ordering::Relaxed); - self.flush_duration_histogram.start_timer() - } - - pub fn observe_sst_num(&self, sst_num: usize) { - self.flush_sst_num_histogram.observe(sst_num as f64); - } - - pub fn observe_sst_size(&self, sst_size: u64) { - // Convert bytes to KB. - self.flush_sst_size_histogram.observe(sst_size as f64 / KB); - } -} diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs deleted file mode 100644 index 7bd73553be..0000000000 --- a/src/analytic_engine/src/table/mod.rs +++ /dev/null @@ -1,758 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table implementation - -use std::{ - collections::HashMap, - fmt, - sync::{Arc, Mutex}, -}; - -use async_trait::async_trait; -use common_types::{ - row::{Row, RowGroup}, - schema::Schema, - time::TimeRange, -}; -use datafusion::{common::Column, logical_expr::Expr}; -use future_ext::CancellationSafeFuture; -use futures::TryStreamExt; -use generic_error::BoxError; -use logger::{error, warn}; -use snafu::{ensure, OptionExt, ResultExt}; -use table_engine::{ - partition::PartitionInfo, - predicate::PredicateBuilder, - stream::{PartitionedStreams, SendableRecordBatchStream}, - table::{ - AlterOptions, AlterSchema, AlterSchemaRequest, Compact, Flush, FlushRequest, Get, - GetInvalidPrimaryKey, GetNullPrimaryKey, GetRequest, MergeWrite, ReadOptions, ReadRequest, - Result, Scan, Table, TableId, TableStats, TooManyPendingWrites, WaitForPendingWrites, - Write, WriteRequest, - }, - ANALYTIC_ENGINE_TYPE, -}; -use tokio::sync::oneshot::{self, Receiver, Sender}; -use trace_metric::MetricsCollector; - -use self::data::TableDataRef; -use crate::{ - instance::{alter::Alterer, write::Writer, InstanceRef}, - space::{SpaceAndTable, SpaceRef}, -}; - -pub mod data; -pub mod metrics; -pub mod sst_util; -pub mod version; -pub mod version_edit; - -const GET_METRICS_COLLECTOR_NAME: &str = "get"; -// Additional 1/10 of the pending writes capacity is reserved for new pending -// writes. -const ADDITIONAL_PENDING_WRITE_CAP_RATIO: usize = 10; - -struct WriteRequests { - pub space: SpaceRef, - pub table_data: TableDataRef, - pub instance: InstanceRef, - pub pending_writes: Arc>, -} - -impl WriteRequests { - pub fn new( - instance: InstanceRef, - space: SpaceRef, - table_data: TableDataRef, - pending_writes: Arc>, - ) -> Self { - Self { - space, - instance, - table_data, - pending_writes, - } - } -} - -/// Table trait implementation -pub struct TableImpl { - space: SpaceRef, - /// Instance - instance: InstanceRef, - /// Engine type - engine_type: String, - - /// Holds a strong reference to prevent the underlying table from being - /// dropped when this handle exist. - table_data: TableDataRef, - - /// Buffer for written rows. - pending_writes: Arc>, -} - -impl TableImpl { - pub fn new(instance: InstanceRef, space_table: SpaceAndTable) -> Self { - let pending_writes = Mutex::new(PendingWriteQueue::new(instance.max_rows_in_write_queue)); - let table_data = space_table.table_data().clone(); - let space = space_table.space().clone(); - Self { - space, - instance, - engine_type: ANALYTIC_ENGINE_TYPE.to_string(), - table_data, - pending_writes: Arc::new(pending_writes), - } - } -} - -impl fmt::Debug for TableImpl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("TableImpl") - .field("space_id", &self.space.id) - .field("table_id", &self.table_data.id) - .finish() - } -} - -/// The queue for buffering pending write requests. -struct PendingWriteQueue { - max_rows: usize, - pending_writes: PendingWrites, -} - -/// The underlying queue for buffering pending write requests. -#[derive(Default)] -struct PendingWrites { - writes: Vec, - notifiers: Vec>>, - num_rows: usize, -} - -impl PendingWrites { - pub fn with_capacity(cap: usize) -> Self { - Self { - writes: Vec::with_capacity(cap), - notifiers: Vec::with_capacity(cap), - num_rows: 0, - } - } - - /// Try to push the request into the pending queue. - /// - /// This push will be rejected if the schema is different. - fn try_push(&mut self, request: WriteRequest) -> QueueResult { - if !self.is_same_schema(request.row_group.schema()) { - return QueueResult::Reject(request); - } - - // For the first pending writes, don't provide the receiver because it should do - // the write for the pending writes and no need to wait for the notification. - let res = if self.is_empty() { - QueueResult::First - } else { - let (tx, rx) = oneshot::channel(); - self.notifiers.push(tx); - QueueResult::Waiter(rx) - }; - - self.num_rows += request.row_group.num_rows(); - self.writes.push(request); - - res - } - - /// Check if the schema of the request is the same as the schema of the - /// pending write requests. - /// - /// Return true if the pending write requests is empty. - fn is_same_schema(&self, schema: &Schema) -> bool { - if self.is_empty() { - return true; - } - - let request = &self.writes[0]; - schema.version() == request.row_group.schema().version() - } - - #[inline] - fn is_empty(&self) -> bool { - self.writes.is_empty() - } -} - -/// The result when trying to push write request to the queue. -enum QueueResult { - /// This request is rejected because the queue is full or the schema is - /// different. - #[allow(dead_code)] - Reject(WriteRequest), - /// This request is the first one in the queue. - First, - /// This request is pushed into the queue and the caller should wait for the - /// finish notification. - Waiter(Receiver>), -} - -impl PendingWriteQueue { - fn new(max_rows: usize) -> Self { - Self { - max_rows, - pending_writes: PendingWrites::default(), - } - } - - /// Try to push the request into the queue. - /// - /// If the queue is full or the schema is different, return the request - /// back. Otherwise, return a receiver to let the caller wait for the write - /// result. - fn try_push(&mut self, request: WriteRequest) -> QueueResult { - if self.is_full() { - return QueueResult::Reject(request); - } - - self.pending_writes.try_push(request) - } - - #[inline] - fn is_full(&self) -> bool { - self.pending_writes.num_rows >= self.max_rows - } - - /// Clear the pending writes and reset the number of rows. - fn take_pending_writes(&mut self) -> PendingWrites { - let curr_num_reqs = self.pending_writes.writes.len(); - let new_cap = curr_num_reqs / ADDITIONAL_PENDING_WRITE_CAP_RATIO + curr_num_reqs; - let new_pending_writes = PendingWrites::with_capacity(new_cap); - std::mem::replace(&mut self.pending_writes, new_pending_writes) - } -} - -/// Merge the pending write requests into a same one. -/// -/// The schema of all the pending write requests should be the same. -/// REQUIRES: the `pending_writes` is required non-empty. -fn merge_pending_write_requests( - mut pending_writes: Vec, - num_pending_rows: usize, -) -> WriteRequest { - assert!(!pending_writes.is_empty()); - - let mut last_req = pending_writes.pop().unwrap(); - let total_rows = { - let mut rows = Vec::with_capacity(num_pending_rows); - for mut pending_req in pending_writes { - let mut pending_rows = pending_req.row_group.take_rows(); - rows.append(&mut pending_rows); - } - let mut last_rows = last_req.row_group.take_rows(); - rows.append(&mut last_rows); - rows - }; - - let schema = last_req.row_group.into_schema(); - let row_group = RowGroup::new_unchecked(schema, total_rows); - WriteRequest { row_group } -} - -impl TableImpl { - /// Perform table write with pending queue. - /// - /// The writes will be put into the pending queue first. And the writer who - /// submits the first request to the queue is responsible for merging and - /// writing all the writes in the queue. - /// - /// NOTE: The write request will be rejected if the queue is full. - async fn write_with_pending_queue(&self, request: WriteRequest) -> Result { - let num_rows = request.row_group.num_rows(); - - // Failed to acquire the serial_exec, put the request into the - // pending queue. - let queue_res = { - let mut pending_queue = self.pending_writes.lock().unwrap(); - pending_queue.try_push(request) - }; - - match queue_res { - QueueResult::First => { - let _timer = self - .table_data - .metrics - .start_table_write_queue_writer_timer(); - - // This is the first request in the queue, and we should - // take responsibilities for merging and writing the - // requests in the queue. - let write_requests = WriteRequests::new( - self.instance.clone(), - self.space.clone(), - self.table_data.clone(), - self.pending_writes.clone(), - ); - - match CancellationSafeFuture::new( - Self::write_requests(write_requests), - "pending_queue_writer", - self.instance.write_runtime().clone(), - ) - .await - { - Ok(_) => Ok(num_rows), - Err(e) => Err(e), - } - } - QueueResult::Waiter(rx) => { - // The request is successfully pushed into the queue, and just wait for the - // write result. - let _timer = self - .table_data - .metrics - .start_table_write_queue_waiter_timer(); - - // We have ever observed that `rx` is closed in production but it is impossible - // in theory(especially after warping actual write by - // `CancellationSafeFuture`). So we also warp `rx` by - // `CancellationSafeFuture` for not just retrying but better observing. - match CancellationSafeFuture::new( - rx, - "pending_queue_waiter", - self.instance.write_runtime().clone(), - ) - .await - { - Ok(res) => { - res.box_err().context(Write { table: self.name() })?; - Ok(num_rows) - } - Err(_) => WaitForPendingWrites { table: self.name() }.fail(), - } - } - QueueResult::Reject(_) => { - // The queue is full, return error. - error!( - "Pending_writes queue is full, max_rows_in_queue:{}, table:{}", - self.instance.max_rows_in_write_queue, - self.name(), - ); - TooManyPendingWrites { table: self.name() }.fail() - } - } - } - - async fn write_requests(write_requests: WriteRequests) -> Result<()> { - let mut serial_exec = write_requests.table_data.serial_exec.lock().await; - // The `serial_exec` is acquired, let's merge the pending requests and write - // them all. - let pending_writes = { - let mut pending_queue = write_requests.pending_writes.lock().unwrap(); - pending_queue.take_pending_writes() - }; - assert!( - !pending_writes.is_empty(), - "The pending writes should contain at least the one just pushed." - ); - let merged_write_request = - merge_pending_write_requests(pending_writes.writes, pending_writes.num_rows); - - let mut writer = Writer::new( - write_requests.instance, - write_requests.space, - write_requests.table_data.clone(), - &mut serial_exec, - ); - let write_res = writer - .write(merged_write_request) - .await - .box_err() - .context(Write { - table: write_requests.table_data.name.clone(), - }); - - // There is no waiter for pending writes, return the write result. - let notifiers = pending_writes.notifiers; - if notifiers.is_empty() { - return Ok(()); - } - - // Notify the waiters for the pending writes. - match write_res { - Ok(_) => { - for notifier in notifiers { - if notifier.send(Ok(())).is_err() { - warn!( - "Failed to notify the ok result of pending writes, table:{}", - write_requests.table_data.name - ); - } - } - Ok(()) - } - Err(e) => { - let err_msg = format!("Failed to do merge write, err:{e}"); - for notifier in notifiers { - let err = MergeWrite { msg: &err_msg }.fail(); - if notifier.send(err).is_err() { - warn!( - "Failed to notify the error result of pending writes, table:{}", - write_requests.table_data.name - ); - } - } - Err(e) - } - } - } - - #[inline] - fn should_queue_write_request(&self, request: &WriteRequest) -> bool { - request.row_group.num_rows() < self.instance.max_rows_in_write_queue - } -} - -pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String]) -> bool { - if !need_dedup { - return true; - } - - // When table need dedup, only unique keys columns support pushdown - // See https://github.com/apache/incubator-horaedb/issues/605 - col_names - .iter() - .all(|col_name| schema.is_unique_column(col_name.as_str())) -} - -#[async_trait] -impl Table for TableImpl { - fn name(&self) -> &str { - &self.table_data.name - } - - fn id(&self) -> TableId { - self.table_data.id - } - - fn schema(&self) -> Schema { - self.table_data.schema() - } - - fn options(&self) -> HashMap { - self.table_data.table_options().to_raw_map() - } - - fn partition_info(&self) -> Option { - None - } - - fn engine_type(&self) -> &str { - &self.engine_type - } - - fn stats(&self) -> TableStats { - self.table_data.metrics.table_stats() - } - - fn support_pushdown(&self, read_schema: &Schema, col_names: &[String]) -> bool { - let need_dedup = self.table_data.table_options().need_dedup(); - - support_pushdown(read_schema, need_dedup, col_names) - } - - async fn write(&self, request: WriteRequest) -> Result { - let _timer = self.table_data.metrics.start_table_total_timer(); - - if self.should_queue_write_request(&request) { - return self.write_with_pending_queue(request).await; - } - - let mut serial_exec = self.table_data.serial_exec.lock().await; - let mut writer = Writer::new( - self.instance.clone(), - self.space.clone(), - self.table_data.clone(), - &mut serial_exec, - ); - writer - .write(request) - .await - .box_err() - .context(Write { table: self.name() }) - } - - async fn read(&self, mut request: ReadRequest) -> Result { - request.opts.read_parallelism = 1; - let mut streams = self - .instance - .partitioned_read_from_table(&self.table_data, request) - .await - .box_err() - .context(Scan { table: self.name() })?; - - assert_eq!(streams.streams.len(), 1); - let stream = streams.streams.pop().unwrap(); - - Ok(stream) - } - - async fn get(&self, request: GetRequest) -> Result> { - let schema = request.projected_schema.to_record_schema_with_key(); - let primary_key_columns = &schema.key_columns()[..]; - ensure!( - primary_key_columns.len() == request.primary_key.len(), - GetInvalidPrimaryKey { - schema: schema.clone(), - primary_key_columns, - } - ); - - let mut primary_key_exprs: Vec = Vec::with_capacity(request.primary_key.len()); - for (primary_key_value, column_schema) in - request.primary_key.iter().zip(primary_key_columns.iter()) - { - let v = primary_key_value - .as_scalar_value() - .with_context(|| GetNullPrimaryKey { - schema: schema.clone(), - primary_key_columns, - })?; - primary_key_exprs.push( - Expr::Column(Column::from_qualified_name(&column_schema.name)).eq(Expr::Literal(v)), - ); - } - - let predicate = PredicateBuilder::default() - .set_time_range(TimeRange::min_to_max()) - .add_pushdown_exprs(&primary_key_exprs) - .build(); - - let read_request = ReadRequest { - request_id: request.request_id, - opts: ReadOptions::default(), - projected_schema: request.projected_schema, - predicate, - metrics_collector: MetricsCollector::new(GET_METRICS_COLLECTOR_NAME.to_string()), - // TODO: pass priority from request - priority: Default::default(), - }; - let mut batch_stream = self - .read(read_request) - .await - .box_err() - .context(Scan { table: self.name() })?; - - let mut result_columns = Vec::with_capacity(schema.num_columns()); - - while let Some(batch) = batch_stream - .try_next() - .await - .box_err() - .context(Get { table: self.name() })? - { - let row_num = batch.num_rows(); - if row_num == 0 { - return Ok(None); - } - for row_idx in 0..row_num { - for col_idx in 0..batch.num_columns() { - let col = batch.column(col_idx); - result_columns.push(col.datum(row_idx)); - } - - let mut result_columns_k = vec![]; - for col_idx in schema.primary_key_idx() { - result_columns_k.push(result_columns[*col_idx].clone()); - } - if request.primary_key == result_columns_k { - return Ok(Some(Row::from_datums(result_columns))); - } - result_columns.clear(); - } - } - - Ok(None) - } - - async fn partitioned_read(&self, request: ReadRequest) -> Result { - let streams = self - .instance - .partitioned_read_from_table(&self.table_data, request) - .await - .box_err() - .context(Scan { table: self.name() })?; - - Ok(streams) - } - - async fn alter_schema(&self, request: AlterSchemaRequest) -> Result { - let mut serial_exec = self.table_data.serial_exec.lock().await; - let mut alterer = Alterer::new( - self.table_data.clone(), - &mut serial_exec, - self.instance.clone(), - ) - .await; - - alterer - .alter_schema_of_table(request) - .await - .box_err() - .context(AlterSchema { table: self.name() })?; - Ok(0) - } - - async fn alter_options(&self, options: HashMap) -> Result { - let mut serial_exec = self.table_data.serial_exec.lock().await; - let alterer = Alterer::new( - self.table_data.clone(), - &mut serial_exec, - self.instance.clone(), - ) - .await; - - alterer - .alter_options_of_table(options) - .await - .box_err() - .context(AlterOptions { table: self.name() })?; - Ok(0) - } - - async fn flush(&self, request: FlushRequest) -> Result<()> { - self.instance - .manual_flush_table(&self.table_data, request) - .await - .box_err() - .context(Flush { table: self.name() }) - } - - async fn compact(&self) -> Result<()> { - self.instance - .manual_compact_table(&self.table_data) - .await - .box_err() - .context(Compact { table: self.name() })?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use common_types::{schema::Version, time::Timestamp}; - - use super::*; - use crate::tests::{row_util, table::FixedSchemaTable}; - - fn build_test_write_request( - seed: i64, - num_rows: usize, - schema_version: Version, - ) -> WriteRequest { - let schema = FixedSchemaTable::default_schema_builder() - .version(schema_version) - .primary_key_indexes(vec![0, 1]) - .build() - .unwrap(); - let mut schema_rows = Vec::with_capacity(num_rows); - for i in 0..num_rows { - let row = ( - "key1", - Timestamp::new(seed + i as i64 * 7), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ); - schema_rows.push(row); - } - let rows = row_util::new_rows_6(&schema_rows); - let row_group = RowGroup::try_new(schema, rows).unwrap(); - WriteRequest { row_group } - } - - #[test] - fn test_queue_write_requests() { - let mut queue = PendingWriteQueue::new(100); - let req0 = build_test_write_request(0, 99, 0); - let res0 = queue.try_push(req0); - assert!(!queue.is_full()); - assert!(matches!(res0, QueueResult::First)); - - let req1 = build_test_write_request(10, 10, 0); - let res1 = queue.try_push(req1); - assert!(queue.is_full()); - assert!(matches!(res1, QueueResult::Waiter(_))); - - let req2 = build_test_write_request(20, 17, 0); - let res2 = queue.try_push(req2); - assert!(queue.is_full()); - assert!(matches!(res2, QueueResult::Reject(_))); - if let QueueResult::Reject(req) = res2 { - assert_eq!(req.row_group.num_rows(), 17); - } - - // Reset the queue, and check the result. - let pending_writes = queue.take_pending_writes(); - assert_eq!(pending_writes.num_rows, 99 + 10); - assert_eq!(pending_writes.writes.len(), 2); - // Only one waiter. - assert_eq!(pending_writes.notifiers.len(), 1); - - assert!(queue.pending_writes.is_empty()); - } - - #[test] - fn test_queue_write_requests_with_different_schema() { - let mut queue = PendingWriteQueue::new(100); - let req0 = build_test_write_request(0, 10, 0); - let res0 = queue.try_push(req0); - assert!(matches!(res0, QueueResult::First)); - - let req1 = build_test_write_request(1, 10, 1); - let res1 = queue.try_push(req1); - assert!(matches!(res1, QueueResult::Reject(_))); - } - - #[test] - fn test_merge_pending_write_requests() { - let mut queue = PendingWriteQueue::new(100); - let mut total_requests = Vec::with_capacity(3); - let req0 = build_test_write_request(0, 40, 0); - total_requests.push(req0.clone()); - queue.try_push(req0); - - let req1 = build_test_write_request(10, 40, 0); - total_requests.push(req1.clone()); - queue.try_push(req1); - - let req2 = build_test_write_request(10, 40, 0); - total_requests.push(req2.clone()); - queue.try_push(req2); - - let pending_writes = queue.take_pending_writes(); - let mut merged_request = - merge_pending_write_requests(pending_writes.writes, pending_writes.num_rows); - - let merged_rows = merged_request.row_group.take_rows(); - let original_rows = total_requests - .iter_mut() - .flat_map(|req| req.row_group.take_rows()) - .collect::>(); - - assert_eq!(merged_rows, original_rows); - } -} diff --git a/src/analytic_engine/src/table/sst_util.rs b/src/analytic_engine/src/table/sst_util.rs deleted file mode 100644 index bafb19c4ff..0000000000 --- a/src/analytic_engine/src/table/sst_util.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! utilities for sst. - -use std::iter::FromIterator; - -use object_store::Path; -use table_engine::table::TableId; - -use crate::{space::SpaceId, sst::manager::FileId}; - -const SST_FILE_SUFFIX: &str = "sst"; -const SST_CUSTOM_METADATA_FILE_SUFFIX: &str = "metadata"; - -#[inline] -/// Generate the sst file name. -pub fn sst_file_name(id: FileId) -> String { - format!("{id}.{SST_FILE_SUFFIX}") -} - -pub fn new_sst_file_path(space_id: SpaceId, table_id: TableId, file_id: FileId) -> Path { - Path::from_iter([ - space_id.to_string(), - table_id.to_string(), - sst_file_name(file_id), - ]) -} - -/// Convert sst_file_path into custom metadata path -pub fn new_metadata_path(sst_file_path: &str) -> String { - format!("{sst_file_path}.{SST_CUSTOM_METADATA_FILE_SUFFIX}") -} diff --git a/src/analytic_engine/src/table/version.rs b/src/analytic_engine/src/table/version.rs deleted file mode 100644 index 14c247dfd1..0000000000 --- a/src/analytic_engine/src/table/version.rs +++ /dev/null @@ -1,1237 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table version - -use std::{ - cmp, - collections::{BTreeMap, HashMap}, - fmt, - ops::Bound, - sync::{Arc, RwLock}, - time::Duration, -}; - -use common_types::{ - row::Row, - schema::{self, Schema}, - time::{TimeRange, Timestamp}, - SequenceNumber, -}; -use macros::define_result; -use sampling_cache::SamplingCachedUsize; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; -use time_ext::ReadableDuration; - -use crate::{ - compaction::{ - picker::{self, CompactionPickerRef, PickerContext}, - CompactionTask, ExpiredFiles, - }, - memtable::{self, key::KeySequence, MemTableRef, PutContext}, - sampler::{DefaultSampler, PrimaryKeySampler, SamplerRef, MAX_SUGGEST_PRIMARY_KEY_NUM}, - sst::{ - file::{FileHandle, FilePurgeQueue, SST_LEVEL_NUM}, - manager::{FileId, LevelsController}, - }, - table::{ - data::{MemTableId, DEFAULT_ALLOC_STEP}, - version_edit::{AddFile, VersionEdit}, - }, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Schema mismatch, memtable_version:{}, given:{}.\nBacktrace:\n{}", - memtable_version, - given, - backtrace - ))] - SchemaMismatch { - memtable_version: schema::Version, - given: schema::Version, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to put memtable, err:{}", source))] - PutMemTable { source: crate::memtable::Error }, - - #[snafu(display("Failed to collect timestamp, err:{}", source))] - CollectTimestamp { source: crate::sampler::Error }, -} - -define_result!(Error); - -/// Memtable for sampling timestamp. -#[derive(Clone)] -pub struct SamplingMemTable { - pub mem: MemTableRef, - pub id: MemTableId, - /// If freezed is true, the sampling is finished and no more data should be - /// inserted into this memtable. Otherwise, the memtable is active and all - /// data should ONLY write to this memtable instead of mutable memtable. - pub freezed: bool, - pub sampler: SamplerRef, - pub pk_sampler: Option, -} - -impl SamplingMemTable { - pub fn new(memtable: MemTableRef, id: MemTableId) -> Self { - SamplingMemTable { - mem: memtable, - id, - freezed: false, - sampler: Arc::new(DefaultSampler::default()), - pk_sampler: None, - } - } - - // TODO: add a builder for SamplingMemTable - pub fn set_pk_sampler(&mut self, schema: &Schema) { - self.pk_sampler = Some(PrimaryKeySampler::new( - schema, - // Make this configurable - MAX_SUGGEST_PRIMARY_KEY_NUM, - )); - } - - pub fn last_sequence(&self) -> SequenceNumber { - self.mem.last_sequence() - } - - fn memory_usage(&self) -> usize { - self.mem.approximate_memory_usage() - } - - /// Suggest segment duration, if there is no sampled timestamp, returns - /// default segment duration. - fn suggest_segment_duration(&self) -> Duration { - self.sampler.suggest_duration() - } - - fn suggest_primary_key(&self) -> Option> { - self.pk_sampler.as_ref().and_then(|sampler| { - let new_pk_idx = sampler.suggest(); - let old_pk_idx = self.mem.schema().primary_key_indexes(); - // If new suggested idx is the same with old, return None to avoid unnecessary - // meta update. - if new_pk_idx == old_pk_idx { - None - } else { - Some(new_pk_idx) - } - }) - } -} - -impl fmt::Debug for SamplingMemTable { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SamplingMemTable") - .field("id", &self.id) - .field("freezed", &self.freezed) - .finish() - } -} - -/// Memtable with additional meta data -#[derive(Clone)] -pub struct MemTableState { - /// The mutable memtable - pub mem: MemTableRef, - /// The `time_range` is estimated via the time range of the first row group - /// write to this memtable and is aligned to segment size - pub aligned_time_range: TimeRange, - /// Id of the memtable, newer memtable has greater id - pub id: MemTableId, -} - -impl MemTableState { - #[inline] - pub fn last_sequence(&self) -> SequenceNumber { - self.mem.last_sequence() - } - - pub fn real_time_range(&self) -> TimeRange { - self.mem.time_range().unwrap_or(self.aligned_time_range) - } -} - -impl fmt::Debug for MemTableState { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("MemTableState") - .field("aligned_time_range", &self.aligned_time_range) - .field("real_time_range", &self.real_time_range()) - .field("id", &self.id) - .field("mem", &self.mem.approximate_memory_usage()) - .field("metrics", &self.mem.metrics()) - .field("last_sequence", &self.mem.last_sequence()) - .finish() - } -} - -// TODO(yingwen): Replace by Either. -#[derive(Clone)] -pub enum MemTableForWrite { - Sampling(SamplingMemTable), - Normal(MemTableState), -} - -impl MemTableForWrite { - #[inline] - pub fn set_last_sequence(&self, seq: SequenceNumber) -> memtable::Result<()> { - self.memtable().set_last_sequence(seq) - } - - #[inline] - pub fn accept_timestamp(&self, timestamp: Timestamp) -> bool { - match self { - MemTableForWrite::Sampling(_) => true, - MemTableForWrite::Normal(v) => v.aligned_time_range.contains(timestamp), - } - } - - #[inline] - pub fn put( - &self, - ctx: &mut PutContext, - sequence: KeySequence, - row: &Row, - schema: &Schema, - timestamp: Timestamp, - ) -> Result<()> { - match self { - MemTableForWrite::Sampling(v) => { - v.mem.put(ctx, sequence, row, schema).context(PutMemTable)?; - - // Collect the timestamp of this row. - v.sampler.collect(timestamp).context(CollectTimestamp)?; - - if let Some(sampler) = &v.pk_sampler { - sampler.collect(row); - } - - Ok(()) - } - MemTableForWrite::Normal(v) => { - v.mem.put(ctx, sequence, row, schema).context(PutMemTable) - } - } - } - - #[inline] - fn memtable(&self) -> &MemTableRef { - match self { - MemTableForWrite::Sampling(v) => &v.mem, - MemTableForWrite::Normal(v) => &v.mem, - } - } - - #[cfg(test)] - pub fn as_sampling(&self) -> &SamplingMemTable { - match self { - MemTableForWrite::Sampling(v) => v, - MemTableForWrite::Normal(_) => panic!(), - } - } - - #[cfg(test)] - pub fn as_normal(&self) -> &MemTableState { - match self { - MemTableForWrite::Sampling(_) => panic!(), - MemTableForWrite::Normal(v) => v, - } - } -} - -#[derive(Debug, Default)] -pub struct FlushableMemTables { - pub sampling_mem: Option, - pub memtables: MemTableVec, -} - -impl FlushableMemTables { - #[inline] - pub fn is_empty(&self) -> bool { - self.sampling_mem.is_none() && self.memtables.is_empty() - } - - pub fn ids(&self) -> Vec { - let mut memtable_ids = Vec::with_capacity(self.memtables.len() + 1); - if let Some(v) = &self.sampling_mem { - memtable_ids.push(v.id); - } - for mem in &self.memtables { - memtable_ids.push(mem.id); - } - - memtable_ids - } - - pub fn len(&self) -> usize { - self.sampling_mem.as_ref().map_or(0, |_| 1) + self.memtables.len() - } -} - -/// Vec to store memtables -pub type MemTableVec = Vec; - -/// MemTableView holds all memtables of the table -#[derive(Debug)] -struct MemTableView { - /// The memtable for sampling timestamp to suggest segment duration. - /// - /// This memtable is special and may contains data in different segment, so - /// can not be moved into immutable memtable set. - sampling_mem: Option, - /// Mutable memtables arranged by its time range. - mutables: MutableMemTableSet, - /// Immutable memtables set, lookup by memtable id is fast. - immutables: ImmutableMemTableSet, -} - -impl MemTableView { - fn new() -> Self { - Self { - sampling_mem: None, - mutables: MutableMemTableSet::new(), - immutables: ImmutableMemTableSet(BTreeMap::new()), - } - } - - /// Get the memory usage of mutable memtables. - fn mutable_memory_usage(&self) -> usize { - self.mutables.memory_usage() - + self - .sampling_mem - .as_ref() - .map(|v| v.memory_usage()) - .unwrap_or(0) - } - - /// Get the total memory usage of mutable and immutable memtables. - fn total_memory_usage(&self) -> usize { - let mutable_usage = self.mutable_memory_usage(); - let immutable_usage = self.immutables.memory_usage(); - - mutable_usage + immutable_usage - } - - /// Instead of replace the old memtable by a new memtable, we just move the - /// old memtable to immutable memtables and left mutable memtables - /// empty. New mutable memtable will be constructed via put request. - fn switch_memtables(&mut self) -> Option { - self.mutables.move_to_inmem(&mut self.immutables) - } - - /// Sample the segment duration. - /// - /// If the sampling memtable is still active, return the suggested segment - /// duration or move all mutable memtables into immutable memtables if - /// the sampling memtable is freezed and returns None. - fn suggest_duration(&mut self) -> Option { - if let Some(v) = &mut self.sampling_mem { - if !v.freezed { - // Other memtable should be empty during sampling phase. - assert!(self.mutables.is_empty()); - assert!(self.immutables.is_empty()); - - // The sampling memtable is still active, we need to compute the - // segment duration and then freeze the memtable. - let segment_duration = v.suggest_segment_duration(); - - // But we cannot freeze the sampling memtable now, because the - // segment duration may not yet been persisted. - return Some(segment_duration); - } - } - - None - } - - fn suggest_primary_key(&mut self) -> Option> { - if let Some(v) = &mut self.sampling_mem { - if !v.freezed { - // Other memtable should be empty during sampling phase. - assert!(self.mutables.is_empty()); - assert!(self.immutables.is_empty()); - - return v.suggest_primary_key(); - } - } - - None - } - - fn freeze_sampling_memtable(&mut self) -> Option { - if let Some(v) = &mut self.sampling_mem { - v.freezed = true; - return Some(v.mem.last_sequence()); - } - None - } - - /// Returns memtables need to be flushed. Only sampling memtable and - /// immutables will be considered. And only memtables which `last_sequence` - /// less or equal to the given [SequenceNumber] will be picked. - /// - /// This method assumes that one sequence number will not exist in multiple - /// memtables. - fn pick_memtables_to_flush(&self, last_sequence: SequenceNumber) -> FlushableMemTables { - let mut mems = FlushableMemTables::default(); - - if let Some(v) = &self.sampling_mem { - if v.last_sequence() <= last_sequence { - mems.sampling_mem = Some(v.clone()); - } - } - - for mem in self.immutables.0.values() { - if mem.last_sequence() <= last_sequence { - mems.memtables.push(mem.clone()); - } - } - - mems - } - - /// Remove memtable from immutables or sampling memtable. - #[inline] - fn remove_immutable_or_sampling(&mut self, id: MemTableId) { - if let Some(v) = &self.sampling_mem { - if v.id == id { - self.sampling_mem = None; - return; - } - } - - self.immutables.0.remove(&id); - } - - /// Collect memtables intersect with `time_range` - fn memtables_for_read( - &self, - time_range: TimeRange, - mems: &mut MemTableVec, - sampling_mem: &mut Option, - ) { - self.mutables.memtables_for_read(time_range, mems); - - self.immutables.memtables_for_read(time_range, mems); - - *sampling_mem = self.sampling_mem.clone(); - } -} - -/// Mutable memtables -/// -/// All mutable memtables ordered by their end time (exclusive), their time -/// range may overlaps if `alter segment duration` is supported -/// -/// We choose end time so we can use BTreeMap::range to find the first range -/// that may contains a given timestamp (end >= timestamp) -#[derive(Debug)] -struct MutableMemTableSet(BTreeMap); - -impl MutableMemTableSet { - fn new() -> Self { - Self(BTreeMap::new()) - } - - /// Get memtable by timestamp for write - fn memtable_for_write(&self, timestamp: Timestamp) -> Option<&MemTableState> { - // Find the first memtable whose end time (exclusive) > timestamp - if let Some((_, memtable)) = self - .0 - .range((Bound::Excluded(timestamp), Bound::Unbounded)) - .next() - { - if memtable.aligned_time_range.contains(timestamp) { - return Some(memtable); - } - } - - None - } - - /// Insert memtable, the caller should guarantee the key of memtable is not - /// present. - fn insert(&mut self, memtable: MemTableState) -> Option { - // Use end time of time range as key - let end = memtable.aligned_time_range.exclusive_end(); - self.0.insert(end, memtable) - } - - fn memory_usage(&self) -> usize { - self.0 - .values() - .map(|m| m.mem.approximate_memory_usage()) - .sum() - } - - /// Move all mutable memtables to immutable memtables. - fn move_to_inmem(&mut self, immem: &mut ImmutableMemTableSet) -> Option { - let last_seq = self - .0 - .values() - .map(|m| { - let last_sequence = m.mem.last_sequence(); - immem.0.insert(m.id, m.clone()); - - last_sequence - }) - .max(); - - self.0.clear(); - last_seq - } - - fn memtables_for_read(&self, time_range: TimeRange, mems: &mut MemTableVec) { - // Seek to first memtable whose end time (exclusive) > time_range.start - let inclusive_start = time_range.inclusive_start(); - let iter = self - .0 - .range((Bound::Excluded(inclusive_start), Bound::Unbounded)); - for (_end_ts, mem_state) in iter { - // We need to iterate all candidate memtables as their start time is unspecific - let memtable_time_range = mem_state.real_time_range(); - if memtable_time_range.intersect_with(time_range) { - mems.push(mem_state.clone()); - } - } - } - - fn is_empty(&self) -> bool { - self.0.is_empty() - } -} - -/// Immutable memtables set -/// -/// MemTables are ordered by memtable id, so lookup by memtable id is fast -#[derive(Debug)] -struct ImmutableMemTableSet(BTreeMap); - -impl ImmutableMemTableSet { - /// Memory used by all immutable memtables - fn memory_usage(&self) -> usize { - self.0 - .values() - .map(|m| m.mem.approximate_memory_usage()) - .sum() - } - - fn memtables_for_read(&self, time_range: TimeRange, mems: &mut MemTableVec) { - for mem_state in self.0.values() { - let memtable_time_range = mem_state.real_time_range(); - if memtable_time_range.intersect_with(time_range) { - mems.push(mem_state.clone()); - } - } - } - - fn is_empty(&self) -> bool { - self.0.is_empty() - } -} - -pub type LeveledFiles = Vec>; - -/// Memtable/sst to read for given time range. -pub struct ReadView { - pub sampling_mem: Option, - pub memtables: MemTableVec, - /// Ssts to read in each level. - /// - /// The `ReadView` MUST ensure the length of `leveled_ssts` >= MAX_LEVEL. - pub leveled_ssts: LeveledFiles, -} - -impl Default for ReadView { - fn default() -> Self { - Self { - sampling_mem: None, - memtables: Vec::new(), - leveled_ssts: vec![Vec::new(); SST_LEVEL_NUM], - } - } -} - -impl ReadView { - pub fn contains_sampling(&self) -> bool { - self.sampling_mem.is_some() - } -} - -/// Data of TableVersion -struct TableVersionInner { - /// All memtables - memtable_view: MemTableView, - /// All ssts - levels_controller: LevelsController, - - /// The earliest sequence number of the entries already flushed (inclusive). - /// All log entry with sequence <= `flushed_sequence` can be deleted - flushed_sequence: SequenceNumber, - /// Max id of the sst file. - /// - /// The id is allocated by step, so there are some still unused ids smaller - /// than the max one. And this field is only a mem state for Manifest, - /// it can only be updated during recover or by Manifest. - max_file_id: FileId, -} - -impl TableVersionInner { - fn memtable_for_write(&self, timestamp: Timestamp) -> Option { - if let Some(mem) = self.memtable_view.sampling_mem.clone() { - if !mem.freezed { - // If sampling memtable is not freezed. - return Some(MemTableForWrite::Sampling(mem)); - } - } - - self.memtable_view - .mutables - .memtable_for_write(timestamp) - .cloned() - .map(MemTableForWrite::Normal) - } -} - -// TODO(yingwen): How to support snapshot? -/// Table version -/// -/// Holds memtables and sst meta data of a table -/// -/// Switching memtable, memtable to level 0 file, addition/deletion to files -/// should be done atomically. -pub struct TableVersion { - inner: RwLock, - - cached_mem_size: SamplingCachedUsize, -} - -impl TableVersion { - /// Create an empty table version - pub fn new(mem_usage_sampling_interval: ReadableDuration, purge_queue: FilePurgeQueue) -> Self { - Self { - inner: RwLock::new(TableVersionInner { - memtable_view: MemTableView::new(), - levels_controller: LevelsController::new(purge_queue), - flushed_sequence: 0, - max_file_id: 0, - }), - - cached_mem_size: SamplingCachedUsize::new(mem_usage_sampling_interval.as_millis()), - } - } - - /// See [MemTableView::mutable_memory_usage] - pub fn mutable_memory_usage(&self) -> usize { - self.inner - .read() - .unwrap() - .memtable_view - .mutable_memory_usage() - } - - /// See [MemTableView::total_memory_usage] - pub fn total_memory_usage(&self) -> usize { - let fetch_total_memory_usage = || -> std::result::Result { - let size = self - .inner - .read() - .unwrap() - .memtable_view - .total_memory_usage(); - - Ok(size) - }; - - self.cached_mem_size.read(fetch_total_memory_usage).unwrap() - } - - /// Return the suggested segment duration if sampling memtable is still - /// active. - pub fn suggest_duration(&self) -> Option { - self.inner.write().unwrap().memtable_view.suggest_duration() - } - - pub fn suggest_primary_key(&self) -> Option> { - self.inner - .write() - .unwrap() - .memtable_view - .suggest_primary_key() - } - - /// Switch all mutable memtables - /// - /// Returns the maximum `SequenceNumber` in the mutable memtables needs to - /// be freezed. - pub fn switch_memtables(&self) -> Option { - self.inner.write().unwrap().memtable_view.switch_memtables() - } - - /// Stop timestamp sampling and freezed the sampling memtable. - /// - /// REQUIRE: Do in write worker - pub fn freeze_sampling_memtable(&self) -> Option { - self.inner - .write() - .unwrap() - .memtable_view - .freeze_sampling_memtable() - } - - /// See [MemTableView::pick_memtables_to_flush] - pub fn pick_memtables_to_flush(&self, last_sequence: SequenceNumber) -> FlushableMemTables { - self.inner - .read() - .unwrap() - .memtable_view - .pick_memtables_to_flush(last_sequence) - } - - /// Get memtable by timestamp for write. - /// - /// The returned schema is guaranteed to have schema with same version as - /// `schema_version`. Return None if the schema of existing memtable has - /// different schema. - pub fn memtable_for_write( - &self, - timestamp: Timestamp, - schema_version: schema::Version, - ) -> Result> { - // Find memtable by timestamp - let memtable = self.inner.read().unwrap().memtable_for_write(timestamp); - let mutable = match memtable { - Some(v) => v, - None => return Ok(None), - }; - - // We consider the schemas are same if they have the same version. - ensure!( - mutable.memtable().schema().version() == schema_version, - SchemaMismatch { - memtable_version: mutable.memtable().schema().version(), - given: schema_version, - } - ); - - Ok(Some(mutable)) - } - - /// Insert memtable into mutable memtable set. - pub fn insert_mutable(&self, mem_state: MemTableState) { - let mut inner = self.inner.write().unwrap(); - let old_memtable = inner.memtable_view.mutables.insert(mem_state.clone()); - assert!( - old_memtable.is_none(), - "Find a duplicate memtable, new_memtable:{:?}, old_memtable:{:?}, memtable_view:{:#?}", - mem_state, - old_memtable, - inner.memtable_view - ); - } - - /// Set sampling memtable. - /// - /// Panic if the sampling memtable of this version is not None. - pub fn set_sampling(&self, sampling_mem: SamplingMemTable) { - let mut inner = self.inner.write().unwrap(); - assert!(inner.memtable_view.sampling_mem.is_none()); - inner.memtable_view.sampling_mem = Some(sampling_mem); - } - - /// Atomically apply the edit to the version. - pub fn apply_edit(&self, edit: VersionEdit) { - let mut inner = self.inner.write().unwrap(); - - // TODO(yingwen): else, log warning - inner.flushed_sequence = cmp::max(inner.flushed_sequence, edit.flushed_sequence); - - inner.max_file_id = cmp::max(inner.max_file_id, edit.max_file_id); - - // Add sst files to level first. - for add_file in edit.files_to_add { - inner - .levels_controller - .add_sst_to_level(add_file.level, add_file.file); - } - - // Remove ssts from level. - for delete_file in edit.files_to_delete { - inner - .levels_controller - .remove_ssts_from_level(delete_file.level, &[delete_file.file_id]); - } - - // Remove immutable memtables. - for mem_id in edit.mems_to_remove { - inner.memtable_view.remove_immutable_or_sampling(mem_id); - } - } - - /// Atomically apply the meta to the version, useful in recover. - pub fn apply_meta(&self, meta: TableVersionMeta) { - let mut inner = self.inner.write().unwrap(); - - inner.flushed_sequence = cmp::max(inner.flushed_sequence, meta.flushed_sequence); - - inner.max_file_id = cmp::max(inner.max_file_id, meta.max_file_id); - - for add_file in meta.files.into_values() { - inner - .levels_controller - .add_sst_to_level(add_file.level, add_file.file); - } - } - - pub fn pick_read_view(&self, time_range: TimeRange) -> ReadView { - let mut sampling_mem = None; - let mut memtables = MemTableVec::new(); - let mut leveled_ssts = vec![Vec::new(); SST_LEVEL_NUM]; - - { - // Pick memtables for read. - let inner = self.inner.read().unwrap(); - - inner - .memtable_view - .memtables_for_read(time_range, &mut memtables, &mut sampling_mem); - - // Pick ssts for read. - inner - .levels_controller - .pick_ssts(time_range, |level, ssts| { - leveled_ssts[level.as_usize()].extend_from_slice(ssts) - }); - } - - ReadView { - sampling_mem, - memtables, - leveled_ssts, - } - } - - /// Pick ssts for compaction using given `picker`. - pub fn pick_for_compaction( - &self, - picker_ctx: PickerContext, - picker: &CompactionPickerRef, - ) -> picker::Result { - let mut inner = self.inner.write().unwrap(); - - picker.pick_compaction(picker_ctx, &mut inner.levels_controller) - } - - pub fn has_expired_sst(&self, expire_time: Option) -> bool { - let inner = self.inner.read().unwrap(); - - inner.levels_controller.has_expired_sst(expire_time) - } - - pub fn expired_ssts(&self, expire_time: Option) -> Vec { - let inner = self.inner.read().unwrap(); - - inner.levels_controller.expired_ssts(expire_time) - } - - pub fn flushed_sequence(&self) -> SequenceNumber { - let inner = self.inner.read().unwrap(); - - inner.flushed_sequence - } - - pub fn snapshot(&self) -> TableVersionSnapshot { - let inner = self.inner.read().unwrap(); - let controller = &inner.levels_controller; - let files = controller - .levels() - .flat_map(|level| { - let ssts = controller.iter_ssts_at_level(level); - ssts.map(move |file| { - let add_file = AddFile { - level, - file: file.meta(), - }; - (file.id(), add_file) - }) - }) - .collect(); - - TableVersionSnapshot { - flushed_sequence: inner.flushed_sequence, - files, - max_file_id: inner.max_file_id, - } - } -} - -pub struct TableVersionSnapshot { - pub flushed_sequence: SequenceNumber, - pub files: HashMap, - pub max_file_id: FileId, -} - -/// During recovery, we apply all version edit to [TableVersionMeta] first, then -/// apply the version meta to the table, so we can avoid adding removed ssts to -/// the version. -#[derive(Clone, Debug, Default, PartialEq, Eq)] -pub struct TableVersionMeta { - pub flushed_sequence: SequenceNumber, - pub files: HashMap, - pub max_file_id: FileId, -} - -impl TableVersionMeta { - pub fn apply_edit(&mut self, edit: VersionEdit) { - self.flushed_sequence = cmp::max(self.flushed_sequence, edit.flushed_sequence); - - for add_file in edit.files_to_add { - self.max_file_id = cmp::max(self.max_file_id, add_file.file.id); - - self.files.insert(add_file.file.id, add_file); - } - - self.max_file_id = cmp::max(self.max_file_id, edit.max_file_id); - - // aligned max file id. - self.max_file_id = - (self.max_file_id + DEFAULT_ALLOC_STEP - 1) / DEFAULT_ALLOC_STEP * DEFAULT_ALLOC_STEP; - - for delete_file in edit.files_to_delete { - self.files.remove(&delete_file.file_id); - } - } - - /// Returns the max file id in the files to add. - pub fn max_file_id_to_add(&self) -> FileId { - self.max_file_id - } - - pub fn ordered_files(&self) -> Vec { - let mut files_vec: Vec<_> = self.files.values().cloned().collect(); - files_vec.sort_unstable_by_key(|file| file.file.id); - - files_vec - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - sst::file::tests::FilePurgerMocker, - table::{data::tests::MemTableMocker, version_edit::tests::AddFileMocker}, - table_options, - tests::table, - }; - - fn new_table_version() -> TableVersion { - let purger = FilePurgerMocker::mock(); - let queue = purger.create_purge_queue(1, table::new_table_id(2, 2)); - TableVersion::new(ReadableDuration::millis(0), queue) - } - - #[test] - fn test_empty_table_version() { - let version = new_table_version(); - - let ts = Timestamp::now(); - assert!(!version.has_expired_sst(None)); - assert!(!version.has_expired_sst(Some(ts))); - - assert_eq!(0, version.mutable_memory_usage()); - assert_eq!(0, version.total_memory_usage()); - - { - let inner = version.inner.read().unwrap(); - let memtable_view = &inner.memtable_view; - assert!(memtable_view.sampling_mem.is_none()); - assert!(memtable_view.mutables.is_empty()); - assert!(memtable_view.immutables.is_empty()); - } - - let last_sequence = 1000; - let flushable_mems = version.pick_memtables_to_flush(last_sequence); - assert!(flushable_mems.is_empty()); - - let read_view = version.pick_read_view(TimeRange::min_to_max()); - assert!(!read_view.contains_sampling()); - - assert!(read_view.sampling_mem.is_none()); - assert!(read_view.memtables.is_empty()); - for ssts in read_view.leveled_ssts { - assert!(ssts.is_empty()); - } - - let now = Timestamp::now(); - let mutable = version.memtable_for_write(now, 1).unwrap(); - assert!(mutable.is_none()); - - // Nothing to switch. - assert!(version.suggest_duration().is_none()); - assert!(version.switch_memtables().is_none()); - } - - fn check_flushable_mem_with_sampling( - flushable_mems: &FlushableMemTables, - memtable_id: MemTableId, - ) { - assert!(!flushable_mems.is_empty()); - assert_eq!( - memtable_id, - flushable_mems.sampling_mem.as_ref().unwrap().id - ); - assert!(flushable_mems.memtables.is_empty()); - } - - #[test] - fn test_table_version_sampling() { - let memtable = MemTableMocker.build(); - test_table_version_sampling_with_memtable(memtable); - let memtable = MemTableMocker.build_columnar(); - test_table_version_sampling_with_memtable(memtable); - } - - fn test_table_version_sampling_with_memtable(memtable: MemTableRef) { - let version = new_table_version(); - - let schema = memtable.schema().clone(); - - let memtable_id = 1; - let sampling_mem = SamplingMemTable::new(memtable, memtable_id); - - version.set_sampling(sampling_mem); - - // Should write to sampling memtable. - let now = Timestamp::now(); - let mutable = version - .memtable_for_write(now, schema.version()) - .unwrap() - .unwrap(); - let actual_memtable = mutable.as_sampling(); - assert_eq!(memtable_id, actual_memtable.id); - - let mutable = version - .memtable_for_write(Timestamp::new(1234), schema.version()) - .unwrap() - .unwrap(); - let actual_memtable = mutable.as_sampling(); - assert_eq!(memtable_id, actual_memtable.id); - - // Sampling memtable should always be read. - let read_view = version.pick_read_view(TimeRange::new(0.into(), 123.into()).unwrap()); - assert!(read_view.contains_sampling()); - assert_eq!(memtable_id, read_view.sampling_mem.unwrap().id); - - let last_sequence = 1000; - let flushable_mems = version.pick_memtables_to_flush(last_sequence); - check_flushable_mem_with_sampling(&flushable_mems, memtable_id); - } - - #[test] - fn test_table_version_sampling_switch() { - let memtable = MemTableMocker.build(); - test_table_version_sampling_switch_with_memtable(memtable); - let memtable = MemTableMocker.build_columnar(); - test_table_version_sampling_switch_with_memtable(memtable); - } - - fn test_table_version_sampling_switch_with_memtable(memtable: MemTableRef) { - let version = new_table_version(); - - let schema = memtable.schema().clone(); - - let memtable_id = 1; - let last_sequence = 1000; - let sampling_mem = SamplingMemTable::new(memtable, memtable_id); - - version.set_sampling(sampling_mem); - - let duration = version.suggest_duration().unwrap(); - assert_eq!(table_options::DEFAULT_SEGMENT_DURATION, duration); - assert!(version.switch_memtables().is_none()); - - // Flushable memtables only contains sampling memtable. - let flushable_mems = version.pick_memtables_to_flush(last_sequence); - check_flushable_mem_with_sampling(&flushable_mems, memtable_id); - - // Write to memtable after switch and before freezed. - let now = Timestamp::now(); - let mutable = version - .memtable_for_write(now, schema.version()) - .unwrap() - .unwrap(); - // Still write to sampling memtable. - let actual_memtable = mutable.as_sampling(); - assert_eq!(memtable_id, actual_memtable.id); - - // Switch still return duration before freezed. - let duration = version.suggest_duration().unwrap(); - assert_eq!(table_options::DEFAULT_SEGMENT_DURATION, duration); - assert!(version.switch_memtables().is_none()); - - version.switch_memtables(); - // Flushable memtables only contains sampling memtable before sampling - // memtable is freezed. - let flushable_mems = version.pick_memtables_to_flush(last_sequence); - check_flushable_mem_with_sampling(&flushable_mems, memtable_id); - } - - // TODO: test columnar memtable - #[test] - fn test_table_version_sampling_freeze() { - let version = new_table_version(); - - let memtable = MemTableMocker.build(); - let schema = memtable.schema().clone(); - - let memtable_id1 = 1; - let last_sequence = 1000; - let sampling_mem = SamplingMemTable::new(memtable, memtable_id1); - - version.set_sampling(sampling_mem); - assert_eq!( - table_options::DEFAULT_SEGMENT_DURATION, - version.suggest_duration().unwrap() - ); - assert!(version.switch_memtables().is_none()); - // Freeze the sampling memtable. - version.freeze_sampling_memtable(); - - // No memtable after switch and freezed. - let now = Timestamp::now(); - assert!(version - .memtable_for_write(now, schema.version()) - .unwrap() - .is_none()); - - // Still flushable after freezed. - let flushable_mems = version.pick_memtables_to_flush(last_sequence); - assert!(flushable_mems.sampling_mem.unwrap().freezed); - - let aligned_time_range = - TimeRange::bucket_of(now, table_options::DEFAULT_SEGMENT_DURATION).unwrap(); - - // Sampling memtable still readable after freezed. - let read_view = version.pick_read_view(aligned_time_range); - assert!(read_view.contains_sampling()); - assert_eq!(memtable_id1, read_view.sampling_mem.as_ref().unwrap().id); - assert!(read_view.sampling_mem.unwrap().freezed); - - let memtable = MemTableMocker.build(); - let memtable_id2 = 2; - let mem_state = MemTableState { - mem: memtable, - aligned_time_range, - id: memtable_id2, - }; - // Insert a mutable memtable. - version.insert_mutable(mem_state); - - // Write to mutable memtable. - let mutable = version - .memtable_for_write(now, schema.version()) - .unwrap() - .unwrap(); - let mutable = mutable.as_normal(); - assert_eq!(aligned_time_range, mutable.aligned_time_range); - assert_eq!(memtable_id2, mutable.id); - - // Need to read sampling memtable and mutable memtable. - let read_view = version.pick_read_view(aligned_time_range); - assert_eq!(memtable_id1, read_view.sampling_mem.as_ref().unwrap().id); - assert_eq!(1, read_view.memtables.len()); - assert_eq!(memtable_id2, read_view.memtables[0].id); - - // Switch mutable memtable. - assert!(version.suggest_duration().is_none()); - assert!(version.switch_memtables().is_some()); - // No memtable after switch. - let now = Timestamp::now(); - assert!(version - .memtable_for_write(now, schema.version()) - .unwrap() - .is_none()); - - // Two memtables to flush. - let flushable_mems = version.pick_memtables_to_flush(last_sequence); - assert!(flushable_mems.sampling_mem.unwrap().freezed); - assert_eq!(1, flushable_mems.memtables.len()); - assert_eq!(memtable_id2, flushable_mems.memtables[0].id); - } - - // TODO: test columnar memtable - #[test] - fn test_table_version_sampling_apply_edit() { - let version = new_table_version(); - - let memtable = MemTableMocker.build(); - - let memtable_id1 = 1; - let sampling_mem = SamplingMemTable::new(memtable, memtable_id1); - - // Prepare sampling memtable. - version.set_sampling(sampling_mem); - version.freeze_sampling_memtable(); - - let now = Timestamp::now(); - let aligned_time_range = - TimeRange::bucket_of(now, table_options::DEFAULT_SEGMENT_DURATION).unwrap(); - - // Prepare mutable memtable. - let memtable = MemTableMocker.build(); - let memtable_id2 = 2; - let mem_state = MemTableState { - mem: memtable, - aligned_time_range, - id: memtable_id2, - }; - // Insert a mutable memtable. - version.insert_mutable(mem_state); - - // Switch memtable. - assert!(version.suggest_duration().is_none()); - assert!(version.switch_memtables().is_some()); - let max_sequence = 120; - let file_id = 13; - let add_file = AddFileMocker::new(file_id) - .time_range(aligned_time_range) - .max_seq(max_sequence) - .build(); - let edit = VersionEdit { - flushed_sequence: max_sequence, - mems_to_remove: vec![memtable_id1, memtable_id2], - files_to_add: vec![add_file], - files_to_delete: vec![], - max_file_id: 0, - }; - version.apply_edit(edit); - - // Only pick ssts after flushed. - let read_view = version.pick_read_view(aligned_time_range); - assert!(!read_view.contains_sampling()); - assert!(read_view.sampling_mem.is_none()); - assert!(read_view.memtables.is_empty()); - assert_eq!(1, read_view.leveled_ssts[0].len()); - assert_eq!(file_id, read_view.leveled_ssts[0][0].id()); - } -} diff --git a/src/analytic_engine/src/table/version_edit.rs b/src/analytic_engine/src/table/version_edit.rs deleted file mode 100644 index f918d79526..0000000000 --- a/src/analytic_engine/src/table/version_edit.rs +++ /dev/null @@ -1,208 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Version edits - -use std::convert::TryFrom; - -use common_types::{time::TimeRange, SequenceNumber}; -use horaedbproto::manifest as manifest_pb; -use macros::define_result; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - sst::{ - file::{FileMeta, Level}, - manager::FileId, - }, - table::data::MemTableId, - table_options::StorageFormat, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Invalid level:{}, err:{}.\nBacktrace:\n{}", level, source, backtrace))] - InvalidLevel { - level: u32, - source: std::num::TryFromIntError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to convert time range, err:{}", source))] - ConvertTimeRange { source: common_types::time::Error }, - - #[snafu(display("Fail to convert table schema, err:{}", source))] - ConvertTableSchema { source: common_types::schema::Error }, - - #[snafu(display("Fail to convert storage format, err:{}", source))] - ConvertStorageFormat { source: crate::table_options::Error }, - - #[snafu(display("Time range is not found.\nBacktrace:\n{}", backtrace))] - TimeRangeNotFound { backtrace: Backtrace }, - - #[snafu(display("Table schema is not found.\nBacktrace:\n{}", backtrace))] - TableSchemaNotFound { backtrace: Backtrace }, -} - -define_result!(Error); - -/// Meta data of a new file. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct AddFile { - /// The level of the file intended to add. - pub level: Level, - /// Meta data of the file to add. - pub file: FileMeta, -} - -impl From for manifest_pb::AddFileMeta { - /// Convert into protobuf struct - fn from(v: AddFile) -> manifest_pb::AddFileMeta { - manifest_pb::AddFileMeta { - level: v.level.as_u32(), - file_id: v.file.id, - time_range: Some(v.file.time_range.into()), - max_seq: v.file.max_seq, - size: v.file.size, - row_num: v.file.row_num, - storage_format: manifest_pb::StorageFormat::from(v.file.storage_format) as i32, - associated_files: v.file.associated_files, - } - } -} - -impl TryFrom for AddFile { - type Error = Error; - - fn try_from(src: manifest_pb::AddFileMeta) -> Result { - let storage_format = src.storage_format(); - let time_range = { - let time_range = src.time_range.context(TimeRangeNotFound)?; - TimeRange::try_from(time_range).context(ConvertTimeRange)? - }; - - let target = Self { - level: (src.level as u16).into(), - file: FileMeta { - id: src.file_id, - size: src.size, - row_num: src.row_num, - time_range, - max_seq: src.max_seq, - storage_format: StorageFormat::try_from(storage_format) - .context(ConvertStorageFormat)?, - associated_files: src.associated_files, - }, - }; - - Ok(target) - } -} - -/// Meta data of the file to delete. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DeleteFile { - /// The level of the file intended to delete. - pub level: Level, - /// Id of the file to delete. - pub file_id: FileId, -} - -impl From for manifest_pb::DeleteFileMeta { - fn from(v: DeleteFile) -> Self { - manifest_pb::DeleteFileMeta { - level: v.level.as_u32(), - file_id: v.file_id, - } - } -} - -impl TryFrom for DeleteFile { - type Error = Error; - - fn try_from(src: manifest_pb::DeleteFileMeta) -> Result { - let level = (src.level as u16).into(); - - Ok(Self { - level, - file_id: src.file_id, - }) - } -} - -/// Edit to the [TableVersion], which should be done atomically -#[derive(Debug)] -pub struct VersionEdit { - /// The last sequence already flushed. This field is not guaranteed to be - /// set if the version edit is created by a non-flush operation (such as - /// compaction). - pub flushed_sequence: SequenceNumber, - /// Id of memtables to remove from immutable memtable lists. - pub mems_to_remove: Vec, - /// Sst files to add. - pub files_to_add: Vec, - /// Sst files to delete. - pub files_to_delete: Vec, - pub max_file_id: FileId, -} - -#[cfg(test)] -pub mod tests { - use super::*; - - #[must_use] - pub struct AddFileMocker { - file_id: FileId, - time_range: TimeRange, - max_seq: SequenceNumber, - } - - impl AddFileMocker { - pub fn new(file_id: FileId) -> Self { - Self { - file_id, - time_range: TimeRange::empty(), - max_seq: 0, - } - } - - pub fn time_range(mut self, time_range: TimeRange) -> Self { - self.time_range = time_range; - self - } - - pub fn max_seq(mut self, max_seq: SequenceNumber) -> Self { - self.max_seq = max_seq; - self - } - - pub fn build(&self) -> AddFile { - AddFile { - level: Level::MIN, - file: FileMeta { - id: self.file_id, - size: 0, - row_num: 0, - time_range: self.time_range, - max_seq: self.max_seq, - storage_format: StorageFormat::default(), - associated_files: Vec::new(), - }, - } - } - } -} diff --git a/src/analytic_engine/src/table_meta_set_impl.rs b/src/analytic_engine/src/table_meta_set_impl.rs deleted file mode 100644 index 23f3f7a3f0..0000000000 --- a/src/analytic_engine/src/table_meta_set_impl.rs +++ /dev/null @@ -1,383 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table data set impl based on spaces - -use std::{fmt, num::NonZeroUsize, sync::Arc}; - -use anyhow::Context; -use id_allocator::IdAllocator; -use logger::debug; -use table_engine::table::TableId; - -use crate::{ - manifest::{ - details::TableMetaSet, - meta_edit::{ - self, AddTableMeta, AlterOptionsMeta, AlterSchemaMeta, DropTableMeta, MetaEditRequest, - MetaUpdate, VersionEditMeta, - }, - meta_snapshot::MetaSnapshot, - }, - space::{SpaceId, SpaceRef, SpacesRef}, - sst::file::FilePurgerRef, - table::{ - data::{ - MemSizeOptions, TableCatalogInfo, TableConfig, TableData, TableDataRef, TableDesc, - TableShardInfo, DEFAULT_ALLOC_STEP, - }, - version::{TableVersionMeta, TableVersionSnapshot}, - version_edit::VersionEdit, - }, - MetricsOptions, -}; - -#[derive(Clone)] -pub(crate) struct TableMetaSetImpl { - pub(crate) spaces: SpacesRef, - pub(crate) file_purger: FilePurgerRef, - // TODO: maybe not suitable to place this parameter here? - pub(crate) preflush_write_buffer_size_ratio: f32, - pub(crate) manifest_snapshot_every_n_updates: NonZeroUsize, - pub(crate) enable_primary_key_sampling: bool, - pub(crate) try_compat_old_layered_memtable_opts: bool, - pub(crate) metrics_opt: MetricsOptions, -} - -impl fmt::Debug for TableMetaSetImpl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("spaces table snapshot provider") - } -} - -enum TableKey<'a> { - Name(&'a str), - Id(TableId), -} - -impl TableMetaSetImpl { - fn find_table_and_apply_edit( - &self, - space_id: SpaceId, - table_key: TableKey<'_>, - apply_edit: F, - ) -> crate::manifest::Result - where - F: FnOnce(SpaceRef, TableDataRef) -> crate::manifest::Result<()>, - { - let spaces = self.spaces.read().unwrap(); - let space = spaces - .get_by_id(space_id) - .ok_or_else(|| anyhow::anyhow!("space not found, space_id:{space_id}"))?; - - let table_data = match table_key { - TableKey::Name(name) => space.find_table(name).ok_or_else(|| { - anyhow::anyhow!("table not found, space_id:{space_id}, table_name:{name}") - })?, - TableKey::Id(id) => space.find_table_by_id(id).ok_or_else(|| { - anyhow::anyhow!("table not found, space_id:{space_id}, table_id:{id}") - })?, - }; - - apply_edit(space.clone(), table_data.clone())?; - - Ok(table_data) - } - - fn apply_update( - &self, - meta_update: MetaUpdate, - shard_info: TableShardInfo, - table_catalog_info: TableCatalogInfo, - ) -> crate::manifest::Result { - match meta_update { - MetaUpdate::AddTable(AddTableMeta { - space_id, - table_id, - table_name, - schema, - opts, - }) => { - let spaces = self.spaces.read().unwrap(); - let space = spaces - .get_by_id(space_id) - .ok_or_else(|| anyhow::anyhow!("space not found, space_id:{space_id}"))?; - - let mem_size_options = MemSizeOptions { - collector: space.mem_usage_collector.clone(), - size_sampling_interval: space.mem_usage_sampling_interval, - }; - - let table_data = Arc::new( - TableData::new( - TableDesc { - space_id: space.id, - schema_id: table_catalog_info.schema_id, - schema_name: table_catalog_info.schema_name, - catalog_name: table_catalog_info.catalog_name, - id: table_id, - name: table_name, - schema, - shard_id: shard_info.shard_id, - }, - opts, - TableConfig { - preflush_write_buffer_size_ratio: self.preflush_write_buffer_size_ratio, - manifest_snapshot_every_n_updates: self - .manifest_snapshot_every_n_updates, - metrics_opt: self.metrics_opt.clone(), - enable_primary_key_sampling: self.enable_primary_key_sampling, - try_compat_old_layered_memtable_opts: self - .try_compat_old_layered_memtable_opts, - }, - &self.file_purger, - mem_size_options, - ) - .context(format!( - "failed to new table data, space_id:{}, table_id:{}", - space.id, table_id - ))?, - ); - - space.insert_table(table_data.clone()); - - Ok(table_data) - } - MetaUpdate::DropTable(DropTableMeta { - space_id, - table_name, - .. - }) => { - let table_name = &table_name; - let drop_table = move |space: SpaceRef, table_data: TableDataRef| { - // Set the table dropped after finishing flushing and storing drop table meta - // information. - table_data.set_dropped(); - - // Clear the memory status after updating manifest and clearing wal so that - // the drop is retryable if fails to update and clear. - space.remove_table(&table_data.name); - - Ok(()) - }; - - self.find_table_and_apply_edit(space_id, TableKey::Name(table_name), drop_table) - } - MetaUpdate::VersionEdit(VersionEditMeta { - space_id, - table_id, - flushed_sequence, - files_to_add, - files_to_delete, - mems_to_remove, - max_file_id, - }) => { - let version_edit = move |_space: SpaceRef, table_data: TableDataRef| { - let edit = VersionEdit { - flushed_sequence, - mems_to_remove, - files_to_add, - files_to_delete, - max_file_id, - }; - table_data.current_version().apply_edit(edit); - - Ok(()) - }; - - self.find_table_and_apply_edit(space_id, TableKey::Id(table_id), version_edit) - } - MetaUpdate::AlterSchema(AlterSchemaMeta { - space_id, - table_id, - schema, - .. - }) => { - let alter_schema = move |_space: SpaceRef, table_data: TableDataRef| { - table_data.set_schema(schema); - - Ok(()) - }; - - self.find_table_and_apply_edit(space_id, TableKey::Id(table_id), alter_schema) - } - MetaUpdate::AlterOptions(AlterOptionsMeta { - space_id, - table_id, - options, - }) => { - let alter_option = move |_space: SpaceRef, table_data: TableDataRef| { - table_data.set_table_options(options); - - Ok(()) - }; - - self.find_table_and_apply_edit(space_id, TableKey::Id(table_id), alter_option) - } - } - } - - fn apply_snapshot( - &self, - meta_snapshot: MetaSnapshot, - shard_info: TableShardInfo, - table_catalog_info: TableCatalogInfo, - ) -> crate::manifest::Result { - debug!("TableMetaSet apply snapshot, snapshot :{:?}", meta_snapshot); - - let MetaSnapshot { - table_meta, - version_meta, - } = meta_snapshot; - - let space_id = table_meta.space_id; - let spaces = self.spaces.read().unwrap(); - let space = spaces - .get_by_id(space_id) - .ok_or_else(|| anyhow::anyhow!("space not found, space_id:{space_id}"))?; - - // Apply max file id to the allocator - let allocator = match version_meta.clone() { - Some(version_meta) => { - let max_file_id = version_meta.max_file_id_to_add(); - IdAllocator::new(max_file_id, max_file_id, DEFAULT_ALLOC_STEP) - } - None => IdAllocator::new(0, 0, DEFAULT_ALLOC_STEP), - }; - - let table_name = table_meta.table_name.clone(); - let mem_size_options = MemSizeOptions { - collector: space.mem_usage_collector.clone(), - size_sampling_interval: space.mem_usage_sampling_interval, - }; - let table_data = Arc::new( - TableData::recover_from_add( - table_meta, - &self.file_purger, - shard_info.shard_id, - TableConfig { - preflush_write_buffer_size_ratio: self.preflush_write_buffer_size_ratio, - manifest_snapshot_every_n_updates: self.manifest_snapshot_every_n_updates, - metrics_opt: self.metrics_opt.clone(), - enable_primary_key_sampling: self.enable_primary_key_sampling, - try_compat_old_layered_memtable_opts: self.try_compat_old_layered_memtable_opts, - }, - mem_size_options, - allocator, - table_catalog_info, - ) - .context(format!( - "failed to new table_data, space_id:{}, table_name:{}", - space.id, table_name - ))?, - ); - - // Apply version meta to the table. - if let Some(version_meta) = version_meta { - debug!( - "TableMetaSet apply version meta, version meta:{:?}", - version_meta - ); - - table_data.current_version().apply_meta(version_meta); - } - - debug!( - "TableMetaSet success to apply snapshot, table_id:{}, table_name:{}", - table_data.id, table_data.name - ); - - space.insert_table(table_data.clone()); - - Ok(table_data) - } -} - -impl TableMetaSet for TableMetaSetImpl { - fn get_table_snapshot( - &self, - space_id: SpaceId, - table_id: TableId, - ) -> crate::manifest::Result> { - let table_data = { - let spaces = self.spaces.read().unwrap(); - spaces - .get_by_id(space_id) - .ok_or_else(|| { - anyhow::anyhow!("space not exist, space_id:{space_id}, table_id:{table_id}") - })? - .find_table_by_id(table_id) - .ok_or_else(|| { - anyhow::anyhow!( - "table data not exist, space_id:{space_id}, table_id:{table_id}" - ) - })? - }; - - // When table has been dropped, we should return None. - let table_manifest_data_opt = if !table_data.is_dropped() { - let table_meta = AddTableMeta { - space_id, - table_id, - table_name: table_data.name.to_string(), - schema: table_data.schema(), - opts: table_data.table_options().as_ref().clone(), - }; - - let version_snapshot = table_data.current_version().snapshot(); - let TableVersionSnapshot { - flushed_sequence, - files, - max_file_id, - } = version_snapshot; - let version_meta = TableVersionMeta { - flushed_sequence, - files, - max_file_id, - }; - - Some(MetaSnapshot { - table_meta, - version_meta: Some(version_meta), - }) - } else { - None - }; - - Ok(table_manifest_data_opt) - } - - fn apply_edit_to_table( - &self, - request: crate::manifest::meta_edit::MetaEditRequest, - ) -> crate::manifest::Result { - let MetaEditRequest { - shard_info, - meta_edit, - table_catalog_info, - } = request; - - match meta_edit { - meta_edit::MetaEdit::Update(update) => { - self.apply_update(update, shard_info, table_catalog_info) - } - meta_edit::MetaEdit::Snapshot(manifest_data) => { - self.apply_snapshot(manifest_data, shard_info, table_catalog_info) - } - } - } -} diff --git a/src/analytic_engine/src/table_options.rs b/src/analytic_engine/src/table_options.rs deleted file mode 100644 index 0ecabb9512..0000000000 --- a/src/analytic_engine/src/table_options.rs +++ /dev/null @@ -1,921 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Constants for table options. - -use std::{collections::HashMap, str::FromStr, string::ToString, time::Duration}; - -use common_types::{ - time::Timestamp, ARENA_BLOCK_SIZE, COMPACTION_STRATEGY, COMPRESSION, ENABLE_TTL, - LAYERED_ENABLE, LAYERED_MUTABLE_SWITCH_THRESHOLD, MEMTABLE_TYPE, NUM_ROWS_PER_ROW_GROUP, - OPTION_KEY_ENABLE_TTL, SEGMENT_DURATION, STORAGE_FORMAT, TTL, UPDATE_MODE, WRITE_BUFFER_SIZE, -}; -use datafusion::parquet::basic::Compression as ParquetCompression; -use horaedbproto::manifest as manifest_pb; -use macros::define_result; -use serde::{Deserialize, Serialize}; -use size_ext::ReadableSize; -use snafu::{Backtrace, GenerateBacktrace, OptionExt, ResultExt, Snafu}; -use time_ext::{parse_duration, DurationExt, ReadableDuration, TimeUnit}; - -use crate::{ - compaction::{ - self, CompactionStrategy, SizeTieredCompactionOptions, TimeWindowCompactionOptions, - }, - memtable::{LayeredMemtableOptions, MemtableType}, -}; - -const UPDATE_MODE_OVERWRITE: &str = "OVERWRITE"; -const UPDATE_MODE_APPEND: &str = "APPEND"; -const COMPRESSION_UNCOMPRESSED: &str = "UNCOMPRESSED"; -const COMPRESSION_LZ4: &str = "LZ4"; -const COMPRESSION_SNAPPY: &str = "SNAPPY"; -const COMPRESSION_ZSTD: &str = "ZSTD"; -const STORAGE_FORMAT_AUTO: &str = "AUTO"; -const STORAGE_FORMAT_COLUMNAR: &str = "COLUMNAR"; - -/// Default bucket duration (1d) -const BUCKET_DURATION_1D: Duration = Duration::from_secs(24 * 60 * 60); -/// Default duration of a segment (2h). -pub const DEFAULT_SEGMENT_DURATION: Duration = Duration::from_secs(60 * 60 * 2); -/// Default arena block size (2M). -const DEFAULT_ARENA_BLOCK_SIZE: u32 = 2 * 1024 * 1024; -/// Default write buffer size (32M). -const DEFAULT_WRITE_BUFFER_SIZE: u32 = 32 * 1024 * 1024; -/// Default ttl of table (7d). -const DEFAULT_TTL: Duration = Duration::from_secs(7 * 24 * 60 * 60); -/// Default row number of a row group. -const DEFAULT_NUM_ROW_PER_ROW_GROUP: usize = 8192; - -/// Max arena block size (2G) -const MAX_ARENA_BLOCK_SIZE: u32 = 2 * 1024 * 1024 * 1024; -/// Min arena block size (1K) -const MIN_ARENA_BLOCK_SIZE: u32 = 1024; -const MIN_NUM_ROWS_PER_ROW_GROUP: usize = 100; -const MAX_NUM_ROWS_PER_ROW_GROUP: usize = 10_000_000; - -#[derive(Debug, Snafu)] -#[allow(clippy::enum_variant_names)] -pub enum Error { - #[snafu(display("Failed to parse duration, err:{}.\nBacktrace:\n{}", source, backtrace))] - ParseDuration { - source: time_ext::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to parse size, err:{}.\nBacktrace:\n{}", err, backtrace))] - ParseSize { err: String, backtrace: Backtrace }, - - #[snafu(display("Failed to parse compaction strategy: {}, err: {}", value, source))] - ParseStrategy { - value: String, - source: crate::compaction::Error, - }, - #[snafu(display("Failed to parse int, err:{}.\nBacktrace:\n{}", source, backtrace))] - ParseInt { - source: std::num::ParseIntError, - backtrace: Backtrace, - }, - #[snafu(display("Failed to parse bool, err:{}.\nBacktrace:\n{}", source, backtrace))] - ParseBool { - source: std::str::ParseBoolError, - backtrace: Backtrace, - }, - #[snafu(display( - "Failed to parse update mode, raw str:{}.\nBacktrace:\n{}", - s, - backtrace - ))] - ParseUpdateMode { s: String, backtrace: Backtrace }, - - #[snafu(display( - "Failed to parse compression, name:{}.\nBacktrace:\n{}", - name, - backtrace - ))] - ParseCompressionName { name: String, backtrace: Backtrace }, - - #[snafu(display( - "Unknown storage format. value:{:?}.\nBacktrace:\n{}", - value, - backtrace - ))] - UnknownStorageFormat { value: String, backtrace: Backtrace }, - - #[snafu(display( - "Unknown storage format. value:{:?}.\nBacktrace:\n{}", - value, - backtrace - ))] - UnknownStorageFormatType { value: i32, backtrace: Backtrace }, - - #[snafu(display( - "Unknown storage format hint. value:{:?}.\nBacktrace:\n{}", - value, - backtrace - ))] - UnknownStorageFormatHint { value: String, backtrace: Backtrace }, - - #[snafu(display( - "Unknown compression type. value:{:?}.\nBacktrace:\n{}", - value, - backtrace - ))] - UnknownCompressionType { value: i32, backtrace: Backtrace }, - - #[snafu(display("Storage format hint is missing.\nBacktrace:\n{}", backtrace))] - MissingStorageFormatHint { backtrace: Backtrace }, - - #[snafu(display( - "Hybrid format is deprecated, and cannot be used any more.\nBacktrace:\n{}", - backtrace - ))] - HybridDeprecated { backtrace: Backtrace }, - - #[snafu(display( - "Failed to parse layered memtable options, msg:{msg}.\nBacktrace:\n{backtrace}", - ))] - ParseLayeredMemtableOptions { msg: String, backtrace: Backtrace }, - - #[snafu(display("Layered memtable options is missing.\nBacktrace:\n{backtrace}",))] - MissingLayeredMemtableOptions { backtrace: Backtrace }, -} - -define_result!(Error); - -#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Serialize)] -pub enum UpdateMode { - Overwrite, - Append, -} - -impl UpdateMode { - pub fn parse_from(s: &str) -> Result { - if s.eq_ignore_ascii_case(UPDATE_MODE_OVERWRITE) { - Ok(UpdateMode::Overwrite) - } else if s.eq_ignore_ascii_case(UPDATE_MODE_APPEND) { - Ok(UpdateMode::Append) - } else { - ParseUpdateMode { s }.fail() - } - } -} - -impl ToString for UpdateMode { - fn to_string(&self) -> String { - match self { - UpdateMode::Append => UPDATE_MODE_APPEND.to_string(), - UpdateMode::Overwrite => UPDATE_MODE_OVERWRITE.to_string(), - } - } -} - -#[derive(Debug, Clone, Copy, Deserialize, Eq, PartialEq, Serialize)] -pub enum Compression { - Uncompressed, - Lz4, - Snappy, - Zstd, -} - -impl Compression { - pub fn parse_from(name: &str) -> Result { - if name.eq_ignore_ascii_case(COMPRESSION_UNCOMPRESSED) { - Ok(Compression::Uncompressed) - } else if name.eq_ignore_ascii_case(COMPRESSION_LZ4) { - Ok(Compression::Lz4) - } else if name.eq_ignore_ascii_case(COMPRESSION_SNAPPY) { - Ok(Compression::Snappy) - } else if name.eq_ignore_ascii_case(COMPRESSION_ZSTD) { - Ok(Compression::Zstd) - } else { - ParseCompressionName { name }.fail() - } - } -} - -impl ToString for Compression { - fn to_string(&self) -> String { - match self { - Compression::Uncompressed => COMPRESSION_UNCOMPRESSED.to_string(), - Compression::Lz4 => COMPRESSION_LZ4.to_string(), - Compression::Snappy => COMPRESSION_SNAPPY.to_string(), - Compression::Zstd => COMPRESSION_ZSTD.to_string(), - } - } -} - -impl From for manifest_pb::Compression { - fn from(compression: Compression) -> Self { - match compression { - Compression::Uncompressed => manifest_pb::Compression::Uncompressed, - Compression::Lz4 => manifest_pb::Compression::Lz4, - Compression::Snappy => manifest_pb::Compression::Snappy, - Compression::Zstd => manifest_pb::Compression::Zstd, - } - } -} - -impl From for Compression { - fn from(compression: manifest_pb::Compression) -> Self { - match compression { - manifest_pb::Compression::Uncompressed => Compression::Uncompressed, - manifest_pb::Compression::Lz4 => Compression::Lz4, - manifest_pb::Compression::Snappy => Compression::Snappy, - manifest_pb::Compression::Zstd => Compression::Zstd, - } - } -} - -impl TryFrom for Compression { - type Error = Error; - - fn try_from(compression: i32) -> Result { - let compression = match compression { - 0 => Compression::Uncompressed, - 1 => Compression::Lz4, - 2 => Compression::Snappy, - 3 => Compression::Zstd, - _ => return UnknownCompressionType { value: compression }.fail(), - }; - - Ok(compression) - } -} - -impl From for i32 { - fn from(value: Compression) -> Self { - match value { - Compression::Uncompressed => 0, - Compression::Lz4 => 1, - Compression::Snappy => 2, - Compression::Zstd => 3, - } - } -} - -impl From for ParquetCompression { - fn from(compression: Compression) -> Self { - match compression { - Compression::Uncompressed => ParquetCompression::UNCOMPRESSED, - Compression::Lz4 => ParquetCompression::LZ4, - Compression::Snappy => ParquetCompression::SNAPPY, - Compression::Zstd => ParquetCompression::ZSTD(Default::default()), - } - } -} - -/// A hint for building sst. -#[derive(Clone, Copy, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -pub enum StorageFormatHint { - /// Which storage format is chosen to encode one sst depends on the data - /// pattern. - #[default] - Auto, - Specific(StorageFormat), -} - -/// StorageFormat specify how records are saved in persistent storage -#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq, Serialize)] -pub enum StorageFormat { - /// Traditional columnar format, every column is saved in one exact one - /// column, for example: - /// - /// ```plaintext - /// | Timestamp | Device ID | Status Code | Tag 1 | Tag 2 | - /// | --------- |---------- | ----------- | ----- | ----- | - /// | 12:01 | A | 0 | v1 | v1 | - /// | 12:01 | B | 0 | v2 | v2 | - /// | 12:02 | A | 0 | v1 | v1 | - /// | 12:02 | B | 1 | v2 | v2 | - /// | 12:03 | A | 0 | v1 | v1 | - /// | 12:03 | B | 0 | v2 | v2 | - /// | ..... | | | | | - /// ``` - Columnar, -} - -impl From for manifest_pb::StorageFormatHint { - fn from(hint: StorageFormatHint) -> Self { - match hint { - StorageFormatHint::Auto => Self { - hint: Some(manifest_pb::storage_format_hint::Hint::Auto(0)), - }, - StorageFormatHint::Specific(format) => { - let format = manifest_pb::StorageFormat::from(format); - Self { - hint: Some(manifest_pb::storage_format_hint::Hint::Specific( - format as i32, - )), - } - } - } - } -} - -impl TryFrom for StorageFormatHint { - type Error = Error; - - fn try_from(hint: manifest_pb::StorageFormatHint) -> Result { - let format_hint = match hint.hint.context(MissingStorageFormatHint)? { - manifest_pb::storage_format_hint::Hint::Auto(_) => StorageFormatHint::Auto, - manifest_pb::storage_format_hint::Hint::Specific(format) => { - let storage_format = manifest_pb::StorageFormat::from_i32(format) - .context(UnknownStorageFormatType { value: format })?; - StorageFormatHint::Specific(storage_format.try_into()?) - } - }; - - Ok(format_hint) - } -} - -impl ToString for StorageFormatHint { - fn to_string(&self) -> String { - match self { - Self::Auto => STORAGE_FORMAT_AUTO.to_string(), - Self::Specific(format) => format.to_string(), - } - } -} - -impl TryFrom<&str> for StorageFormatHint { - type Error = Error; - - fn try_from(value: &str) -> Result { - let format = match value.to_uppercase().as_str() { - STORAGE_FORMAT_COLUMNAR => Self::Specific(StorageFormat::Columnar), - STORAGE_FORMAT_AUTO => Self::Auto, - _ => return UnknownStorageFormatHint { value }.fail(), - }; - Ok(format) - } -} - -impl From for manifest_pb::StorageFormat { - fn from(format: StorageFormat) -> Self { - match format { - StorageFormat::Columnar => Self::Columnar, - } - } -} - -impl From for i32 { - fn from(value: StorageFormat) -> Self { - match value { - StorageFormat::Columnar => 0, - } - } -} - -impl TryFrom for StorageFormat { - type Error = Error; - - fn try_from(format: manifest_pb::StorageFormat) -> Result { - match format { - manifest_pb::StorageFormat::Columnar => Ok(Self::Columnar), - manifest_pb::StorageFormat::Hybrid => HybridDeprecated {}.fail(), - } - } -} - -impl TryFrom<&str> for StorageFormat { - type Error = Error; - - fn try_from(value: &str) -> Result { - let format = match value.to_uppercase().as_str() { - STORAGE_FORMAT_COLUMNAR => Self::Columnar, - _ => return UnknownStorageFormat { value }.fail(), - }; - Ok(format) - } -} - -impl TryFrom for StorageFormat { - type Error = Error; - - fn try_from(value: i32) -> Result { - let format = match value { - 0 => Self::Columnar, - _ => return UnknownStorageFormatType { value }.fail(), - }; - Ok(format) - } -} - -impl ToString for StorageFormat { - fn to_string(&self) -> String { - match self { - Self::Columnar => STORAGE_FORMAT_COLUMNAR, - } - .to_string() - } -} - -impl Default for StorageFormat { - fn default() -> Self { - Self::Columnar - } -} - -/// Options for a table. -#[derive(Debug, Clone, Deserialize, PartialEq, Serialize)] -#[serde(default)] -pub struct TableOptions { - // The following options are immutable once table was created. - /// Segment duration of the table. - /// - /// `None` means the table is doing the segment duration sampling and - /// the actual duration is still unknown. - pub segment_duration: Option, - /// Table update mode, now support Overwrite(Default) and Append - pub update_mode: UpdateMode, - /// Hint for storage format. - pub storage_format_hint: StorageFormatHint, - - // The following options can be altered. - /// Enable ttl - pub enable_ttl: bool, - /// Time-to-live of the data. - pub ttl: ReadableDuration, - /// Arena block size of memtable. - pub arena_block_size: u32, - /// Write buffer size of memtable. - pub write_buffer_size: u32, - /// Compaction strategy of the table. - pub compaction_strategy: CompactionStrategy, - /// Row number in a row group. - pub num_rows_per_row_group: usize, - /// Table Compression - pub compression: Compression, - - /// Memtable type - pub memtable_type: MemtableType, - /// Layered memtable options - pub layered_memtable_opts: LayeredMemtableOptions, -} - -impl TableOptions { - pub fn from_map(map: &HashMap, is_create: bool) -> Result { - let opt = Self::default(); - merge_table_options(map, &opt, is_create) - } - - #[inline] - pub fn segment_duration(&self) -> Option { - self.segment_duration.map(|v| v.0) - } - - #[inline] - pub fn ttl(&self) -> Option { - if self.enable_ttl { - Some(self.ttl) - } else { - None - } - } - - // for show create table - pub fn to_raw_map(&self) -> HashMap { - let mut m = [ - ( - SEGMENT_DURATION.to_string(), - self.segment_duration - .map(|v| v.to_string()) - .unwrap_or_default(), - ), - (UPDATE_MODE.to_string(), self.update_mode.to_string()), - (ENABLE_TTL.to_string(), self.enable_ttl.to_string()), - (TTL.to_string(), format!("{}", self.ttl)), - ( - ARENA_BLOCK_SIZE.to_string(), - format!("{}", self.arena_block_size), - ), - ( - WRITE_BUFFER_SIZE.to_string(), - format!("{}", self.write_buffer_size), - ), - ( - NUM_ROWS_PER_ROW_GROUP.to_string(), - format!("{}", self.num_rows_per_row_group), - ), - (COMPRESSION.to_string(), self.compression.to_string()), - ( - STORAGE_FORMAT.to_string(), - self.storage_format_hint.to_string(), - ), - (MEMTABLE_TYPE.to_string(), self.memtable_type.to_string()), - ( - LAYERED_ENABLE.to_string(), - self.layered_memtable_opts.enable.to_string(), - ), - ( - LAYERED_MUTABLE_SWITCH_THRESHOLD.to_string(), - self.layered_memtable_opts - .mutable_segment_switch_threshold - .0 - .to_string(), - ), - ] - .into_iter() - .collect(); - self.compaction_strategy.fill_raw_map(&mut m); - - m - } - - /// Check if the options are valid. - /// If invalid, Some(reason) will be returned. - /// If valid, None will be returned - pub fn check_validity(&self) -> Option { - if self.layered_memtable_opts.enable - && self - .layered_memtable_opts - .mutable_segment_switch_threshold - .0 - == 0 - { - return Some(format!( - "layered memtable is enabled but mutable_switch_threshold is 0, layered_memtable_opts:{:?}", - self.layered_memtable_opts, - )); - } - - // layered memtable is not support in overwrite mode - if self.need_dedup() && self.layered_memtable_opts.enable { - return Some(format!( - "layered memtable is enabled for table needing dedup, layered_memtable_opts:{:?}, update_mode:{:?}", - self.layered_memtable_opts, self.update_mode, - )); - } - - None - } - - /// Sanitize options silently. - pub fn sanitize(&mut self) { - let one_day_secs = BUCKET_DURATION_1D.as_secs(); - - if let Some(segment_duration) = self.segment_duration { - let mut segment_duration_secs = segment_duration.as_secs(); - if segment_duration_secs == 0 { - segment_duration_secs = DEFAULT_SEGMENT_DURATION.as_secs() - }; - self.segment_duration = Some(ReadableDuration::secs(segment_duration_secs)); - } - - let ttl_secs = self.ttl.as_secs(); - // Ttl must align to day. - let ttl_secs = ttl_secs / one_day_secs * one_day_secs; - self.ttl = ReadableDuration::secs(ttl_secs); - - if self.arena_block_size < MIN_ARENA_BLOCK_SIZE { - self.arena_block_size = MIN_ARENA_BLOCK_SIZE; - } - - if self.arena_block_size > MAX_ARENA_BLOCK_SIZE { - self.arena_block_size = MAX_ARENA_BLOCK_SIZE; - } - - if self.num_rows_per_row_group < MIN_NUM_ROWS_PER_ROW_GROUP { - self.num_rows_per_row_group = MIN_NUM_ROWS_PER_ROW_GROUP; - } - - if self.num_rows_per_row_group > MAX_NUM_ROWS_PER_ROW_GROUP { - self.num_rows_per_row_group = MAX_NUM_ROWS_PER_ROW_GROUP; - } - } - - pub fn need_dedup(&self) -> bool { - match self.update_mode { - UpdateMode::Overwrite => true, - UpdateMode::Append => false, - } - } - - // Only support sample primary key for APPEND. - pub fn support_sample_pk(&self) -> bool { - match self.update_mode { - UpdateMode::Overwrite => false, - UpdateMode::Append => true, - } - } - - pub fn is_expired(&self, timestamp: Timestamp) -> bool { - self.enable_ttl && timestamp.is_expired(Timestamp::expire_time(self.ttl.0)) - } -} - -impl From for manifest_pb::CompactionOptions { - fn from(opts: SizeTieredCompactionOptions) -> Self { - manifest_pb::CompactionOptions { - bucket_low: opts.bucket_low, - bucket_high: opts.bucket_high, - min_sstable_size: opts.min_sstable_size.0 as u32, - min_threshold: opts.min_threshold as u32, - max_threshold: opts.max_threshold as u32, - // FIXME: Is it ok to use the default timestamp resolution here? - timestamp_resolution: manifest_pb::TimeUnit::Nanoseconds as i32, - } - } -} - -impl From for SizeTieredCompactionOptions { - fn from(opts: manifest_pb::CompactionOptions) -> Self { - Self { - bucket_low: opts.bucket_low, - bucket_high: opts.bucket_high, - min_sstable_size: ReadableSize(opts.min_sstable_size as u64), - min_threshold: opts.min_threshold as usize, - max_threshold: opts.max_threshold as usize, - max_input_sstable_size: compaction::get_max_input_sstable_size(), - } - } -} - -impl From for manifest_pb::CompactionOptions { - fn from(v: TimeWindowCompactionOptions) -> Self { - manifest_pb::CompactionOptions { - bucket_low: v.size_tiered.bucket_low, - bucket_high: v.size_tiered.bucket_high, - min_sstable_size: v.size_tiered.min_sstable_size.0 as u32, - min_threshold: v.size_tiered.min_threshold as u32, - max_threshold: v.size_tiered.max_threshold as u32, - timestamp_resolution: manifest_pb::TimeUnit::from(v.timestamp_resolution) as i32, - } - } -} - -impl From for TimeWindowCompactionOptions { - fn from(opts: manifest_pb::CompactionOptions) -> Self { - let size_tiered: SizeTieredCompactionOptions = opts.clone().into(); - - Self { - size_tiered, - timestamp_resolution: TimeUnit::from(opts.timestamp_resolution()), - } - } -} - -impl From for manifest_pb::TableOptions { - fn from(opts: TableOptions) -> Self { - let segment_duration = opts - .segment_duration - .map(|v| v.0.as_millis_u64()) - .unwrap_or(0); - let sampling_segment_duration = opts.segment_duration.is_none(); - - let (compaction_strategy, compaction_options) = match opts.compaction_strategy { - CompactionStrategy::Default => (manifest_pb::CompactionStrategy::Default, None), - CompactionStrategy::SizeTiered(v) => ( - manifest_pb::CompactionStrategy::SizeTiered, - Some(manifest_pb::CompactionOptions::from(v)), - ), - CompactionStrategy::TimeWindow(v) => ( - manifest_pb::CompactionStrategy::TimeWindow, - Some(manifest_pb::CompactionOptions::from(v)), - ), - }; - - let layered_memtable_opts = opts.layered_memtable_opts.into(); - - manifest_pb::TableOptions { - segment_duration, - enable_ttl: opts.enable_ttl, - ttl: opts.ttl.0.as_millis_u64(), - arena_block_size: opts.arena_block_size, - num_rows_per_row_group: opts.num_rows_per_row_group as u64, - compaction_strategy: compaction_strategy as i32, - compaction_options, - update_mode: manifest_pb::UpdateMode::from(opts.update_mode) as i32, - write_buffer_size: opts.write_buffer_size, - compression: manifest_pb::Compression::from(opts.compression) as i32, - sampling_segment_duration, - storage_format_hint: Some(manifest_pb::StorageFormatHint::from( - opts.storage_format_hint, - )), - layered_memtable_options: Some(layered_memtable_opts), - // TODO: persist `memtable_type` in PB. - } - } -} - -impl From for manifest_pb::UpdateMode { - fn from(v: UpdateMode) -> Self { - match v { - UpdateMode::Overwrite => manifest_pb::UpdateMode::Overwrite, - UpdateMode::Append => manifest_pb::UpdateMode::Append, - } - } -} - -impl From for UpdateMode { - fn from(v: manifest_pb::UpdateMode) -> Self { - match v { - manifest_pb::UpdateMode::Overwrite => UpdateMode::Overwrite, - manifest_pb::UpdateMode::Append => UpdateMode::Append, - } - } -} - -impl TryFrom for TableOptions { - type Error = Error; - - fn try_from(opts: manifest_pb::TableOptions) -> Result { - let compression = opts.compression(); - let update_mode = opts.update_mode(); - - let compaction_strategy = match opts.compaction_strategy() { - manifest_pb::CompactionStrategy::Default => CompactionStrategy::default(), - manifest_pb::CompactionStrategy::SizeTiered => { - let opts = opts - .compaction_options - .map(SizeTieredCompactionOptions::from) - .unwrap_or_default(); - CompactionStrategy::SizeTiered(opts) - } - manifest_pb::CompactionStrategy::TimeWindow => { - let opts = opts - .compaction_options - .map(TimeWindowCompactionOptions::from) - .unwrap_or_default(); - CompactionStrategy::TimeWindow(opts) - } - }; - - let segment_duration = if opts.sampling_segment_duration { - None - } else if opts.segment_duration == 0 { - // If segment duration is still zero. If the data had been used by an elder - // version release that not yet support sampling, the - // `sampling_segment_duration` flag would be truncated after - // manifest snapshot, but left segment duration zero. - Some(DEFAULT_SEGMENT_DURATION.into()) - } else { - Some(Duration::from_millis(opts.segment_duration).into()) - }; - - let storage_format_hint = opts.storage_format_hint.context(MissingStorageFormatHint)?; - // For compatible with old `table_options`, `layered_memtable_options` is - // allowed to be `None`, and when found `None`, we disable `layered_memtable`. - let layered_memtable_opts = match opts.layered_memtable_options { - Some(v) => v.into(), - None => LayeredMemtableOptions { - mutable_segment_switch_threshold: ReadableSize(0), - enable: false, - }, - }; - - let table_opts = Self { - segment_duration, - enable_ttl: opts.enable_ttl, - ttl: Duration::from_millis(opts.ttl).into(), - arena_block_size: opts.arena_block_size, - compaction_strategy, - num_rows_per_row_group: opts.num_rows_per_row_group as usize, - update_mode: UpdateMode::from(update_mode), - write_buffer_size: opts.write_buffer_size, - compression: Compression::from(compression), - storage_format_hint: StorageFormatHint::try_from(storage_format_hint)?, - memtable_type: MemtableType::SkipList, - layered_memtable_opts, - }; - - Ok(table_opts) - } -} - -impl Default for TableOptions { - fn default() -> Self { - Self { - segment_duration: None, - enable_ttl: true, - ttl: DEFAULT_TTL.into(), - arena_block_size: DEFAULT_ARENA_BLOCK_SIZE, - compaction_strategy: CompactionStrategy::default(), - num_rows_per_row_group: DEFAULT_NUM_ROW_PER_ROW_GROUP, - update_mode: UpdateMode::Overwrite, - write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, - compression: Compression::Zstd, - storage_format_hint: StorageFormatHint::default(), - memtable_type: MemtableType::SkipList, - layered_memtable_opts: LayeredMemtableOptions::default(), - } - } -} - -pub fn merge_table_options_for_create( - options: &HashMap, - table_opts: &TableOptions, -) -> Result { - merge_table_options(options, table_opts, true) -} - -pub fn merge_table_options_for_alter( - options: &HashMap, - table_opts: &TableOptions, -) -> Result { - merge_table_options(options, table_opts, false) -} - -/// The options will override the old options. -fn merge_table_options( - options: &HashMap, - base_table_opts: &TableOptions, - is_create: bool, -) -> Result { - let mut base_table_opts = base_table_opts.clone(); - if is_create { - if let Some(v) = options.get(SEGMENT_DURATION) { - if v.is_empty() { - base_table_opts.segment_duration = None; - } else { - base_table_opts.segment_duration = Some(parse_duration(v).context(ParseDuration)?); - } - } - if let Some(v) = options.get(UPDATE_MODE) { - base_table_opts.update_mode = UpdateMode::parse_from(v)?; - } - } - - if let Some(v) = options.get(TTL) { - base_table_opts.ttl = parse_duration(v).context(ParseDuration)?; - } - if let Some(v) = options.get(OPTION_KEY_ENABLE_TTL) { - base_table_opts.enable_ttl = v.parse::().context(ParseBool)?; - } - if let Some(v) = options.get(ARENA_BLOCK_SIZE) { - let size = parse_size(v)?; - base_table_opts.arena_block_size = size.0 as u32; - } - if let Some(v) = options.get(WRITE_BUFFER_SIZE) { - let size = parse_size(v)?; - base_table_opts.write_buffer_size = size.0 as u32; - } - if let Some(v) = options.get(COMPACTION_STRATEGY) { - base_table_opts.compaction_strategy = - CompactionStrategy::parse_from(v, options).context(ParseStrategy { value: v })?; - } - if let Some(v) = options.get(NUM_ROWS_PER_ROW_GROUP) { - base_table_opts.num_rows_per_row_group = v.parse().context(ParseInt)?; - } - if let Some(v) = options.get(COMPRESSION) { - base_table_opts.compression = Compression::parse_from(v)?; - } - if let Some(v) = options.get(STORAGE_FORMAT) { - base_table_opts.storage_format_hint = v.as_str().try_into()?; - } - if let Some(v) = options.get(MEMTABLE_TYPE) { - base_table_opts.memtable_type = MemtableType::parse_from(v); - } - if let Some(v) = options.get(LAYERED_ENABLE) { - let enable = match v.parse::() { - Ok(v) => v, - Err(e) => { - return ParseLayeredMemtableOptions { - msg: format!("invalid layered_enable setting, err:{e}"), - } - .fail() - } - }; - base_table_opts.layered_memtable_opts.enable = enable; - } - if let Some(v) = options.get(LAYERED_MUTABLE_SWITCH_THRESHOLD) { - let threshold = match ReadableSize::from_str(v) { - Ok(v) => v, - Err(e) => { - return ParseLayeredMemtableOptions { - msg: format!("invalid layered_mutable_switch_threshold setting, err:{e}"), - } - .fail() - } - }; - - base_table_opts - .layered_memtable_opts - .mutable_segment_switch_threshold = threshold; - } - - Ok(base_table_opts) -} - -fn parse_size(v: &str) -> Result { - v.parse::().map_err(|err| Error::ParseSize { - err, - backtrace: Backtrace::generate(), - }) -} diff --git a/src/analytic_engine/src/tests/alter_test.rs b/src/analytic_engine/src/tests/alter_test.rs deleted file mode 100644 index 670d88ac0c..0000000000 --- a/src/analytic_engine/src/tests/alter_test.rs +++ /dev/null @@ -1,514 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Alter test - -use std::collections::{BTreeMap, HashMap}; - -use common_types::{ - column_schema, - datum::DatumKind, - row::RowGroup, - schema::{self, Schema}, - time::Timestamp, -}; -use logger::info; -use table_engine::table::AlterSchemaRequest; -use wal::manager::WalsOpener; - -use crate::{ - table_options::TableOptions, - tests::{ - row_util, - table::{self, FixedSchemaTable}, - util::{memory_ctxs, rocksdb_ctxs, EngineBuildContext, Null, TestContext, TestEnv}, - }, -}; - -#[test] -fn test_alter_table_add_column_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_alter_table_add_column(ctx); - } -} - -#[ignore = "Enable this test when manifest use another snapshot implementation"] -#[test] -fn test_alter_table_add_column_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_alter_table_add_column(ctx); - } -} - -fn test_alter_table_add_column(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let alter_test_table1 = "alter_test_table1"; - let fixed_schema_table = test_ctx.create_fixed_schema_table(alter_test_table1).await; - - let start_ms = test_ctx.start_ms(); - let rows = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ]; - - // Write data to table. - let row_group = fixed_schema_table.rows_to_row_group(&rows); - test_ctx.write_to_table(alter_test_table1, row_group).await; - - alter_schema_same_schema_version_case(&test_ctx, alter_test_table1).await; - - alter_schema_old_pre_version_case(&test_ctx, alter_test_table1).await; - - alter_schema_add_column_case(&mut test_ctx, alter_test_table1, start_ms, false).await; - - // Prepare another table for alter. - let alter_test_table2 = "alter_test_table2"; - test_ctx.create_fixed_schema_table(alter_test_table2).await; - let row_group = fixed_schema_table.rows_to_row_group(&rows); - test_ctx.write_to_table(alter_test_table2, row_group).await; - - alter_schema_add_column_case(&mut test_ctx, alter_test_table2, start_ms, true).await; - }); -} - -// Add two columns: -// - add_string -// - add_double -fn add_columns(schema_builder: schema::Builder) -> schema::Builder { - schema_builder - .add_normal_column( - column_schema::Builder::new("add_string".to_string(), DatumKind::String) - .is_nullable(true) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("add_double".to_string(), DatumKind::Double) - .is_nullable(true) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() -} - -async fn alter_schema_same_schema_version_case( - test_ctx: &TestContext, - table_name: &str, -) { - info!("test alter_schema_same_schema_version_case"); - - let mut schema_builder = FixedSchemaTable::default_schema_builder(); - schema_builder = add_columns(schema_builder); - let new_schema = schema_builder - .primary_key_indexes(vec![0, 1]) - .build() - .unwrap(); - - let table = test_ctx.table(table_name); - let old_schema = table.schema(); - - let request = AlterSchemaRequest { - schema: new_schema, - pre_schema_version: old_schema.version(), - }; - - let res = test_ctx.try_alter_schema(table_name, request).await; - assert!(res.is_err()); -} - -async fn alter_schema_old_pre_version_case( - test_ctx: &TestContext, - table_name: &str, -) { - info!("test alter_schema_old_pre_version_case"); - - let mut schema_builder = FixedSchemaTable::default_schema_builder(); - schema_builder = add_columns(schema_builder); - - let table = test_ctx.table(table_name); - let old_schema = table.schema(); - - let new_schema = schema_builder - .version(old_schema.version() + 1) - .primary_key_indexes(old_schema.primary_key_indexes().to_vec()) - .build() - .unwrap(); - - let request = AlterSchemaRequest { - schema: new_schema, - pre_schema_version: old_schema.version() - 1, - }; - - let res = test_ctx.try_alter_schema(table_name, request).await; - assert!(res.is_err()); -} - -async fn alter_schema_add_column_case( - test_ctx: &mut TestContext, - table_name: &str, - start_ms: i64, - flush: bool, -) { - info!( - "test alter_schema_add_column_case, table_name:{}", - table_name - ); - - let mut schema_builder = FixedSchemaTable::default_schema_builder(); - schema_builder = add_columns(schema_builder); - - let old_schema = test_ctx.table(table_name).schema(); - - let new_schema = schema_builder - .version(old_schema.version() + 1) - .primary_key_indexes(old_schema.primary_key_indexes().to_vec()) - .build() - .unwrap(); - - let request = AlterSchemaRequest { - schema: new_schema.clone(), - pre_schema_version: old_schema.version(), - }; - - let affected = test_ctx - .try_alter_schema(table_name, request) - .await - .unwrap(); - assert_eq!(0, affected); - - let rows = [ - ( - "key1", - Timestamp::new(start_ms + 10), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - "add1-1", - 210.0, - ), - ( - "key2", - Timestamp::new(start_ms + 10), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - "add1-2", - 220.0, - ), - ]; - let rows_vec = row_util::new_rows_8(&rows); - let row_group = RowGroup::try_new(new_schema.clone(), rows_vec).unwrap(); - - // Write data with new schema. - test_ctx.write_to_table(table_name, row_group).await; - - if flush { - test_ctx.flush_table(table_name).await; - } - - let new_schema_rows = [ - // We need to check null datum, so tuples have different types and we need to - // convert it into row first. - row_util::new_row_8(( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - Null, - Null, - )), - row_util::new_row_8(( - "key1", - Timestamp::new(start_ms + 10), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - "add1-1", - 210.0, - )), - row_util::new_row_8(( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - Null, - Null, - )), - row_util::new_row_8(( - "key2", - Timestamp::new(start_ms + 10), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - "add1-2", - 220.0, - )), - ]; - let new_schema_row_group = - RowGroup::try_new(new_schema.clone(), new_schema_rows.to_vec()).unwrap(); - - // Read data using new schema. - check_read_row_group( - test_ctx, - "Test read new schema after add columns", - table_name, - &new_schema, - &new_schema_row_group, - ) - .await; - - let old_schema_rows = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key1", - Timestamp::new(start_ms + 10), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ( - "key2", - Timestamp::new(start_ms + 10), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ]; - let old_schema_rows_vec = row_util::new_rows_6(&old_schema_rows); - let old_schema_row_group = RowGroup::try_new(old_schema.clone(), old_schema_rows_vec).unwrap(); - - // Read data using old schema. - check_read_row_group( - test_ctx, - "Test read old schema after add columns", - table_name, - &old_schema, - &old_schema_row_group, - ) - .await; - - // Reopen db. - test_ctx.reopen_with_tables(&[table_name]).await; - - // Read again after reopen. - check_read_row_group( - test_ctx, - "Test read after reopen", - table_name, - &new_schema, - &new_schema_row_group, - ) - .await; -} - -async fn check_read_row_group( - test_ctx: &TestContext, - msg: &str, - table_name: &str, - schema: &Schema, - row_group: &RowGroup, -) { - for read_opts in table::read_opts_list() { - info!("{}, opts:{:?}", msg, read_opts); - - let record_batches = test_ctx - .read_table( - table_name, - table::new_read_all_request(schema.clone(), read_opts), - ) - .await; - - table::assert_batch_eq_to_row_group(&record_batches, row_group); - } -} - -#[test] -fn test_alter_table_options_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_alter_table_options(ctx); - } -} - -#[ignore = "Enable this test when manifest use another snapshot implementation"] -#[test] -fn test_alter_table_options_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_alter_table_options(ctx); - } -} - -fn test_alter_table_options(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let alter_test_table1 = "alter_test_table1"; - test_ctx.create_fixed_schema_table(alter_test_table1).await; - - let opts = test_ctx.table(alter_test_table1).options(); - - let default_opts_map = default_options(); - - assert_options_eq(&default_opts_map, &opts); - - alter_immutable_option_case(&test_ctx, alter_test_table1, "segment_duration", "20d").await; - - alter_immutable_option_case(&test_ctx, alter_test_table1, "bucket_duration", "20d").await; - - alter_immutable_option_case(&test_ctx, alter_test_table1, "update_mode", "Append").await; - - alter_mutable_option_case(&mut test_ctx, alter_test_table1, "enable_ttl", "false").await; - alter_mutable_option_case(&mut test_ctx, alter_test_table1, "enable_ttl", "true").await; - - alter_mutable_option_case( - &mut test_ctx, - alter_test_table1, - "arena_block_size", - "10240", - ) - .await; - - alter_mutable_option_case( - &mut test_ctx, - alter_test_table1, - "write_buffer_size", - "1024000", - ) - .await; - - alter_mutable_option_case( - &mut test_ctx, - alter_test_table1, - "num_rows_per_row_group", - "10000", - ) - .await; - }); -} - -async fn alter_immutable_option_case( - test_ctx: &TestContext, - table_name: &str, - opt_key: &str, - opt_value: &str, -) { - let old_opts = test_ctx.table(table_name).options(); - - let mut new_opts = HashMap::new(); - new_opts.insert(opt_key.to_string(), opt_value.to_string()); - - let affected = test_ctx - .try_alter_options(table_name, new_opts) - .await - .unwrap(); - assert_eq!(0, affected); - - let opts_after_alter = test_ctx.table(table_name).options(); - assert_options_eq(&old_opts, &opts_after_alter); -} - -async fn alter_mutable_option_case( - test_ctx: &mut TestContext, - table_name: &str, - opt_key: &str, - opt_value: &str, -) { - let mut expect_opts = test_ctx.table(table_name).options(); - expect_opts.insert(opt_key.to_string(), opt_value.to_string()); - - let mut new_opts = HashMap::new(); - new_opts.insert(opt_key.to_string(), opt_value.to_string()); - - let affected = test_ctx - .try_alter_options(table_name, new_opts) - .await - .unwrap(); - assert_eq!(0, affected); - - let opts_after_alter = test_ctx.table(table_name).options(); - assert_options_eq(&expect_opts, &opts_after_alter); - - // Reopen table. - test_ctx.reopen_with_tables(&[table_name]).await; - - let opts_after_alter = test_ctx.table(table_name).options(); - assert_options_eq(&expect_opts, &opts_after_alter); -} - -fn assert_options_eq(left: &HashMap, right: &HashMap) { - let sorted_left: BTreeMap<_, _> = left.iter().collect(); - let sorted_right: BTreeMap<_, _> = right.iter().collect(); - - assert_eq!(sorted_left, sorted_right); -} - -fn default_options() -> HashMap { - let table_opts = TableOptions::default(); - - table_opts.to_raw_map() -} diff --git a/src/analytic_engine/src/tests/compaction_test.rs b/src/analytic_engine/src/tests/compaction_test.rs deleted file mode 100644 index 887d210fb4..0000000000 --- a/src/analytic_engine/src/tests/compaction_test.rs +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Compaction integration tests. - -use common_types::time::Timestamp; -use table_engine::table::FlushRequest; - -use crate::{ - compaction::SizeTieredCompactionOptions, - tests::util::{ - self, EngineBuildContext, MemoryEngineBuildContext, RocksDBEngineBuildContext, TestEnv, - }, -}; - -#[test] -fn test_table_compact_current_segment_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_table_compact_current_segment(rocksdb_ctx); -} - -#[test] -fn test_table_compact_current_segment_mem_wal() { - let memory_ctx = MemoryEngineBuildContext::default(); - test_table_compact_current_segment(memory_ctx); -} - -fn test_table_compact_current_segment(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let compact_test_table1 = "compact_test_table1"; - let fixed_schema_table = test_ctx - .create_fixed_schema_table(compact_test_table1) - .await; - let default_opts = SizeTieredCompactionOptions::default(); - - let mut expect_rows = Vec::new(); - - let start_ms = test_ctx.start_ms(); - // Write max_threshold*2 sst to ensure level0->level1, level1->level1 compaction - // will be triggered. - for offset in 0..default_opts.max_threshold as i64 * 2 { - let rows = [ - ( - "key1", - Timestamp::new(start_ms + offset), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms + offset), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ]; - expect_rows.extend_from_slice(&rows); - let row_group = fixed_schema_table.rows_to_row_group(&rows); - - test_ctx - .write_to_table(compact_test_table1, row_group) - .await; - - // Flush table and generate sst. - test_ctx - .flush_table_with_request(compact_test_table1, FlushRequest { sync: true }) - .await; - } - - expect_rows.sort_unstable_by_key(|row_tuple| (row_tuple.0, row_tuple.1)); - - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read after flush", - compact_test_table1, - &expect_rows, - ) - .await; - - test_util::init_log_for_test(); - - // Trigger a compaction. - test_ctx.compact_table(compact_test_table1).await; - - // Check read after compaction. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read after compaction", - compact_test_table1, - &expect_rows, - ) - .await; - }); -} diff --git a/src/analytic_engine/src/tests/drop_test.rs b/src/analytic_engine/src/tests/drop_test.rs deleted file mode 100644 index 82b66f4f79..0000000000 --- a/src/analytic_engine/src/tests/drop_test.rs +++ /dev/null @@ -1,356 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Drop table tests - -use std::collections::HashMap; - -use common_types::{column_schema, datum::DatumKind, time::Timestamp}; -use table_engine::table::AlterSchemaRequest; - -use crate::tests::{ - table::FixedSchemaTable, - util::{ - self, memory_ctxs, rocksdb_ctxs, EngineBuildContext, MemoryEngineBuildContext, - RocksDBEngineBuildContext, TestEnv, - }, -}; - -#[test] -fn test_drop_table_once_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_drop_table_once(rocksdb_ctx); -} - -#[test] -fn test_drop_table_once_mem_wal() { - let memory_ctx = MemoryEngineBuildContext::default(); - test_drop_table_once(memory_ctx); -} - -fn test_drop_table_once(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let drop_test_table1 = "drop_test_table1"; - let table_id = test_ctx - .create_fixed_schema_table(drop_test_table1) - .await - .table_id(); - - assert!(test_ctx.drop_table(drop_test_table1).await); - - let table_opt = test_ctx - .try_open_table(table_id, drop_test_table1) - .await - .unwrap(); - assert!(table_opt.is_none()); - - test_ctx.reopen().await; - - let table_opt = test_ctx - .try_open_table(table_id, drop_test_table1) - .await - .unwrap(); - assert!(table_opt.is_none()); - }); -} - -#[test] -fn test_drop_table_again_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_drop_table_again(rocksdb_ctx); -} - -#[test] -fn test_drop_table_again_mem_wal() { - let memory_ctx = MemoryEngineBuildContext::default(); - test_drop_table_again(memory_ctx); -} - -fn test_drop_table_again(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let drop_test_table1 = "drop_test_table1"; - let table_id = test_ctx - .create_fixed_schema_table(drop_test_table1) - .await - .table_id(); - - assert!(test_ctx.drop_table(drop_test_table1).await); - - assert!(!test_ctx.drop_table(drop_test_table1).await); - - let table_opt = test_ctx - .try_open_table(table_id, drop_test_table1) - .await - .unwrap(); - assert!(table_opt.is_none()); - }); -} - -#[test] -fn test_drop_create_table_mixed_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_drop_create_table_mixed(rocksdb_ctx); -} - -#[test] -fn test_drop_create_table_mixed_mem_wal() { - let memory_ctx = MemoryEngineBuildContext::default(); - test_drop_create_table_mixed(memory_ctx); -} - -fn test_drop_create_table_mixed(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let drop_test_table1 = "drop_test_table1"; - let table1_id = test_ctx - .create_fixed_schema_table(drop_test_table1) - .await - .table_id(); - - assert!(test_ctx.drop_table(drop_test_table1).await); - - // Create another table after dropped. - let test_table2 = "test_table2"; - let table2_id = test_ctx - .create_fixed_schema_table(test_table2) - .await - .table_id(); - - let table_opt = test_ctx - .try_open_table(table1_id, drop_test_table1) - .await - .unwrap(); - assert!(table_opt.is_none()); - - test_ctx.reopen().await; - - let table_opt = test_ctx - .try_open_table(table1_id, drop_test_table1) - .await - .unwrap(); - assert!(table_opt.is_none()); - // Table 2 is still exists. - assert!(test_ctx - .try_open_table(table2_id, test_table2) - .await - .unwrap() - .is_some()); - }); -} - -fn test_drop_create_same_table_case(flush: bool, engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let drop_test_table1 = "drop_test_table1"; - let fixed_schema_table = test_ctx.create_fixed_schema_table(drop_test_table1).await; - - // Write data to table1. - let start_ms = test_ctx.start_ms(); - let rows = [( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - )]; - let row_group = fixed_schema_table.rows_to_row_group(&rows); - test_ctx.write_to_table(drop_test_table1, row_group).await; - - if flush { - test_ctx.flush_table(drop_test_table1).await; - } - - assert!(test_ctx.drop_table(drop_test_table1).await); - - // Create same table again. - let drop_test_table1 = "drop_test_table1"; - test_ctx.create_fixed_schema_table(drop_test_table1).await; - - // No data exists. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read table", - drop_test_table1, - &[], - ) - .await; - - test_ctx.reopen_with_tables(&[drop_test_table1]).await; - - // No data exists. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read table after reopen", - drop_test_table1, - &[], - ) - .await; - }); -} - -#[test] -fn test_drop_create_same_table_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_drop_create_same_table(ctx); - } -} - -#[test] -fn test_drop_create_same_table_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_drop_create_same_table(ctx); - } -} - -fn test_drop_create_same_table(engine_context: T) { - test_drop_create_same_table_case::(false, engine_context.clone()); - - test_drop_create_same_table_case::(true, engine_context); -} - -#[test] -fn test_alter_schema_drop_create_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_alter_schema_drop_create(ctx); - } -} - -#[test] -fn test_alter_schema_drop_create_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_alter_schema_drop_create(ctx); - } -} - -fn test_alter_schema_drop_create(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let drop_test_table1 = "drop_test_table1"; - test_ctx.create_fixed_schema_table(drop_test_table1).await; - - // Alter schema. - let old_schema = test_ctx.table(drop_test_table1).schema(); - let schema_builder = FixedSchemaTable::default_schema_builder() - .add_normal_column( - column_schema::Builder::new("add_double".to_string(), DatumKind::Double) - .is_nullable(true) - .build() - .unwrap(), - ) - .unwrap(); - - let new_schema = schema_builder - .version(old_schema.version() + 1) - .primary_key_indexes(old_schema.primary_key_indexes().to_vec()) - .build() - .unwrap(); - let request = AlterSchemaRequest { - schema: new_schema.clone(), - pre_schema_version: old_schema.version(), - }; - let affected = test_ctx - .try_alter_schema(drop_test_table1, request) - .await - .unwrap(); - assert_eq!(0, affected); - - // Drop table. - assert!(test_ctx.drop_table(drop_test_table1).await); - - // Create same table again. - let drop_test_table1 = "drop_test_table1"; - test_ctx.create_fixed_schema_table(drop_test_table1).await; - - test_ctx.reopen_with_tables(&[drop_test_table1]).await; - }); -} - -#[test] -fn test_alter_options_drop_create_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_alter_options_drop_create(ctx); - } -} - -#[test] -fn test_alter_options_drop_create_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_alter_options_drop_create(ctx); - } -} - -fn test_alter_options_drop_create(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let drop_test_table1 = "drop_test_table1"; - test_ctx.create_fixed_schema_table(drop_test_table1).await; - - // Alter options. - let mut new_opts = HashMap::new(); - new_opts.insert("arena_block_size".to_string(), "10240".to_string()); - - let affected = test_ctx - .try_alter_options(drop_test_table1, new_opts) - .await - .unwrap(); - assert_eq!(0, affected); - - // Drop table. - assert!(test_ctx.drop_table(drop_test_table1).await); - - // Create same table again. - let drop_test_table1 = "drop_test_table1"; - test_ctx.create_fixed_schema_table(drop_test_table1).await; - - test_ctx.reopen_with_tables(&[drop_test_table1]).await; - }); -} diff --git a/src/analytic_engine/src/tests/mod.rs b/src/analytic_engine/src/tests/mod.rs deleted file mode 100644 index d674b54e5e..0000000000 --- a/src/analytic_engine/src/tests/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Test suits and integration tests. - -#[cfg(test)] -mod alter_test; -#[cfg(test)] -mod compaction_test; -#[cfg(test)] -mod drop_test; -#[cfg(test)] -mod open_test; -#[cfg(test)] -mod read_write_test; -pub mod row_util; -pub mod table; -pub mod util; diff --git a/src/analytic_engine/src/tests/open_test.rs b/src/analytic_engine/src/tests/open_test.rs deleted file mode 100644 index b2eaa3d764..0000000000 --- a/src/analytic_engine/src/tests/open_test.rs +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Engine open test. - -use crate::tests::util::{ - EngineBuildContext, MemoryEngineBuildContext, RocksDBEngineBuildContext, TestEnv, -}; - -#[test] -fn test_open_engine_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_open_engine(rocksdb_ctx); -} - -#[test] -fn test_open_engine_mem_wal() { - let memory_ctx = MemoryEngineBuildContext::default(); - test_open_engine(memory_ctx); -} - -fn test_open_engine(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - // Reopen engine. - test_ctx.reopen().await; - }); -} diff --git a/src/analytic_engine/src/tests/read_write_test.rs b/src/analytic_engine/src/tests/read_write_test.rs deleted file mode 100644 index 780ef0a31e..0000000000 --- a/src/analytic_engine/src/tests/read_write_test.rs +++ /dev/null @@ -1,697 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Read write test. - -use std::{thread, time}; - -use common_types::time::Timestamp; -use logger::info; -use wal::manager::WalsOpener; - -use crate::{ - table_options, - tests::util::{self, memory_ctxs, rocksdb_ctxs, EngineBuildContext, TestContext, TestEnv}, -}; - -#[test] -fn test_multi_table_read_write_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_multi_table_read_write(ctx); - } -} - -#[test] -fn test_multi_table_read_write_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_multi_table_read_write(ctx); - } -} - -fn test_multi_table_read_write(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let test_table1 = "test_multi_table_read_write1"; - let test_table2 = "test_multi_table_read_write2"; - let test_table3 = "test_multi_table_read_write3"; - - let fixed_schema_table = test_ctx.create_fixed_schema_table(test_table1).await; - let _ = test_ctx.create_fixed_schema_table(test_table2).await; - let _ = test_ctx.create_fixed_schema_table(test_table3).await; - - let start_ms = test_ctx.start_ms(); - let rows = [ - // One bucket. - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ( - "key3", - Timestamp::new(start_ms + 2), - "tag1-4", - 13.0, - 110.0, - "tag2-4", - ), - ( - "key4", - Timestamp::new(start_ms + 3), - "tag1-5", - 13.0, - 110.0, - "tag2-5", - ), - // Next bucket. - ( - "key5", - Timestamp::new( - start_ms + 1 + 2 * table_options::DEFAULT_SEGMENT_DURATION.as_millis() as i64, - ), - "tag-5-3", - 33.0, - 310.0, - "tag-5-3", - ), - ]; - - // Write data to table. - let row_group1 = fixed_schema_table.rows_to_row_group(&rows); - let row_group2 = fixed_schema_table.rows_to_row_group(&rows); - let row_group3 = fixed_schema_table.rows_to_row_group(&rows); - test_ctx.write_to_table(test_table1, row_group1).await; - test_ctx.write_to_table(test_table2, row_group2).await; - test_ctx.write_to_table(test_table3, row_group3).await; - - // Read with different opts. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table1", - test_table1, - &rows, - ) - .await; - - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table2", - test_table2, - &rows, - ) - .await; - - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table3", - test_table3, - &rows, - ) - .await; - - // Reopen db. - test_ctx - .reopen_with_tables(&[test_table1, test_table2, test_table3]) - .await; - - // Read with different opts again. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table1 after reopen", - test_table1, - &rows, - ) - .await; - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table2 after reopen", - test_table2, - &rows, - ) - .await; - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table3 after reopen", - test_table3, - &rows, - ) - .await; - }); -} - -#[test] -fn test_table_write_read_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_table_write_read(ctx); - } -} - -#[test] -fn test_table_write_read_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_table_write_read(ctx); - } -} - -fn test_table_write_read(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let test_table1 = "test_table1"; - let fixed_schema_table = test_ctx.create_fixed_schema_table(test_table1).await; - - let start_ms = test_ctx.start_ms(); - let rows: [(&str, Timestamp, &str, f64, f64, &str); 3] = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ]; - let row_group = fixed_schema_table.rows_to_row_group(&rows); - - // Write data to table. - test_ctx.write_to_table(test_table1, row_group).await; - - // Read with different opts. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table", - test_table1, - &rows, - ) - .await; - - // Reopen db. - test_ctx.reopen_with_tables(&[test_table1]).await; - - // Read with different opts again. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table after reopen", - test_table1, - &rows, - ) - .await; - }); -} - -#[test] -fn test_table_write_get_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_table_write_get(ctx); - } -} - -#[test] -fn test_table_write_get_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_table_write_get(ctx); - } -} - -fn test_table_write_get(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - test_ctx.open().await; - - let test_table1 = "test_table1"; - let fixed_schema_table = test_ctx.create_fixed_schema_table(test_table1).await; - - let start_ms = test_ctx.start_ms(); - let rows = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ]; - let row_group = fixed_schema_table.rows_to_row_group(&rows); - - // Write data to table. - test_ctx.write_to_table(test_table1, row_group).await; - - util::check_get( - &test_ctx, - &fixed_schema_table, - "Try to get row", - test_table1, - &rows, - ) - .await; - - // Reopen db. - test_ctx.reopen_with_tables(&[test_table1]).await; - - util::check_get( - &test_ctx, - &fixed_schema_table, - "Try to get row after reopen", - test_table1, - &rows, - ) - .await; - }); -} - -#[test] -fn test_table_write_get_override_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - test_table_write_get_override(ctx); - } -} - -#[test] -fn test_table_write_get_override_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - test_table_write_get_override(ctx); - } -} - -fn test_table_write_get_override(engine_context: T) { - test_table_write_get_override_case::(FlushPoint::NoFlush, engine_context.clone()); - - test_table_write_get_override_case::(FlushPoint::AfterFirstWrite, engine_context.clone()); - - test_table_write_get_override_case::(FlushPoint::AfterOverwrite, engine_context.clone()); - - test_table_write_get_override_case::(FlushPoint::FirstAndOverwrite, engine_context); -} - -#[derive(Debug)] -enum FlushPoint { - NoFlush, - AfterFirstWrite, - AfterOverwrite, - FirstAndOverwrite, -} - -fn test_table_write_get_override_case( - flush_point: FlushPoint, - engine_context: T, -) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - - env.block_on(async { - info!( - "test_table_write_get_override_case, flush_point:{:?}", - flush_point - ); - - test_ctx.open().await; - - let test_table1 = "test_table1"; - let fixed_schema_table = test_ctx.create_fixed_schema_table(test_table1).await; - - let start_ms = test_ctx.start_ms(); - { - let rows = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ( - "key3", - Timestamp::new(start_ms + 10), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ]; - let row_group = fixed_schema_table.rows_to_row_group(&rows); - - // Write data to table. - test_ctx.write_to_table(test_table1, row_group).await; - } - - if let FlushPoint::AfterFirstWrite | FlushPoint::FirstAndOverwrite = flush_point { - test_ctx.flush_table(test_table1).await; - } - - // Override some rows - { - let rows = [ - ( - "key2", - Timestamp::new(start_ms), - "tag1-2-copy", - 112.0, - 210.0, - "tag2-2-copy", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3-copy", - 113.0, - 210.0, - "tag2-3-copy", - ), - ]; - let row_group = fixed_schema_table.rows_to_row_group(&rows); - - test_ctx.write_to_table(test_table1, row_group).await; - } - - if let FlushPoint::AfterOverwrite | FlushPoint::FirstAndOverwrite = flush_point { - test_ctx.flush_table(test_table1).await; - } - - let expect_rows = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2-copy", - 112.0, - 210.0, - "tag2-2-copy", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3-copy", - 113.0, - 210.0, - "tag2-3-copy", - ), - ( - "key3", - Timestamp::new(start_ms + 10), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ]; - util::check_get( - &test_ctx, - &fixed_schema_table, - "Try to get row", - test_table1, - &expect_rows, - ) - .await; - - // Reopen db. - test_ctx.reopen_with_tables(&[test_table1]).await; - - util::check_get( - &test_ctx, - &fixed_schema_table, - "Try to get row after reopen", - test_table1, - &expect_rows, - ) - .await; - }); -} - -#[test] -fn test_db_write_buffer_size_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - // Use different table name to avoid metrics collision. - test_db_write_buffer_size("test_db_write_buffer_size_rocks", ctx); - } -} - -#[test] -fn test_db_write_buffer_size_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - // Use different table name to avoid metrics collision. - test_db_write_buffer_size("test_db_write_buffer_size_mem_wal", ctx); - } -} - -fn test_db_write_buffer_size(table_name: &str, engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.config_mut().db_write_buffer_size = 1; - test_write_buffer_size_overflow(table_name, env, test_ctx); -} - -#[test] -fn test_space_write_buffer_size_rocks() { - let rocksdb_ctxs = rocksdb_ctxs(); - for ctx in rocksdb_ctxs { - // Use different table name to avoid metrics collision. - test_space_write_buffer_size("test_space_write_buffer_size_rocks", ctx); - } -} - -#[test] -fn test_space_write_buffer_size_mem_wal() { - let memory_ctxs = memory_ctxs(); - for ctx in memory_ctxs { - // Use different table name to avoid metrics collision. - test_space_write_buffer_size("test_space_write_buffer_size_mem_wal", ctx); - } -} - -fn test_space_write_buffer_size(table_name: &str, engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.config_mut().space_write_buffer_size = 1; - test_write_buffer_size_overflow(table_name, env, test_ctx); -} - -fn test_write_buffer_size_overflow( - test_table_name: &str, - env: TestEnv, - mut test_ctx: TestContext, -) { - env.block_on(async { - test_ctx.open().await; - - let fixed_schema_table = test_ctx.create_fixed_schema_table(test_table_name).await; - - let table = test_ctx.table(test_table_name); - // Note that table with same name shares same global prometheus metrics. - let old_stats = table.stats(); - - let start_ms = test_ctx.start_ms(); - let rows1 = [ - ( - "key1", - Timestamp::new(start_ms), - "tag1-1", - 11.0, - 110.0, - "tag2-1", - ), - ( - "key2", - Timestamp::new(start_ms), - "tag1-2", - 12.0, - 110.0, - "tag2-2", - ), - ( - "key2", - Timestamp::new(start_ms + 1), - "tag1-3", - 13.0, - 110.0, - "tag2-3", - ), - ]; - let row_group = fixed_schema_table.rows_to_row_group(&rows1); - // Write rows1 to table. - test_ctx.write_to_table(test_table_name, row_group).await; - - let stats = table.stats(); - assert_eq!(old_stats.num_read, stats.num_read); - assert_eq!(old_stats.num_write + 1, stats.num_write); - assert_eq!(old_stats.num_flush, stats.num_flush); - - let rows2 = [ - ( - "key4", - Timestamp::new(start_ms + 2), - "tag1-4", - 11.0, - 110.0, - "tag2-4", - ), - ( - "key5", - Timestamp::new(start_ms + 3), - "tag1-5", - 12.0, - 110.0, - "tag2-5", - ), - ]; - - let row_group = fixed_schema_table.rows_to_row_group(&rows2); - // Write rows2 to table. - test_ctx.write_to_table(test_table_name, row_group).await; - - let mut rows = Vec::new(); - rows.extend_from_slice(&rows1); - rows.extend_from_slice(&rows2); - - // Read with different opts. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table", - test_table_name, - &rows, - ) - .await; - - // TODO(lee) a better way to wait table flushing finishes. - // https://github.com/apache/incubator-horaedb/issues/1241 - thread::sleep(time::Duration::from_millis(10000)); - - let stats = table.stats(); - assert_eq!(old_stats.num_read + 5, stats.num_read); - assert_eq!(old_stats.num_write + 2, stats.num_write); - - // Flush when reaches (db/space) write_buffer size limitation. - assert_eq!(old_stats.num_flush + 1, stats.num_flush); - - drop(table); - // Reopen db. - test_ctx.reopen_with_tables(&[test_table_name]).await; - - // Read with different opts again. - util::check_read( - &test_ctx, - &fixed_schema_table, - "Test read write table after reopen", - test_table_name, - &rows, - ) - .await; - }); -} diff --git a/src/analytic_engine/src/tests/row_util.rs b/src/analytic_engine/src/tests/row_util.rs deleted file mode 100644 index 2abfa0862d..0000000000 --- a/src/analytic_engine/src/tests/row_util.rs +++ /dev/null @@ -1,108 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Row utils - -use common_types::{datum::Datum, row::Row}; - -pub fn new_row_6(data: (C0, C1, C2, C3, C4, C5)) -> Row -where - C0: Into, - C1: Into, - C2: Into, - C3: Into, - C4: Into, - C5: Into, -{ - let cols = vec![ - data.0.into(), - data.1.into(), - data.2.into(), - data.3.into(), - data.4.into(), - data.5.into(), - ]; - - Row::from_datums(cols) -} - -pub fn assert_row_eq_6(data: (C0, C1, C2, C3, C4, C5), row: Row) -where - C0: Into, - C1: Into, - C2: Into, - C3: Into, - C4: Into, - C5: Into, -{ - let expect_row = new_row_6(data); - assert_eq!(expect_row, row); -} - -pub fn new_row_8(data: (C0, C1, C2, C3, C4, C5, C6, C7)) -> Row -where - C0: Into, - C1: Into, - C2: Into, - C3: Into, - C4: Into, - C5: Into, - C6: Into, - C7: Into, -{ - let cols = vec![ - data.0.into(), - data.1.into(), - data.2.into(), - data.3.into(), - data.4.into(), - data.5.into(), - data.6.into(), - data.7.into(), - ]; - - Row::from_datums(cols) -} - -pub fn new_rows_6(data: &[(C0, C1, C2, C3, C4, C5)]) -> Vec -where - C0: Into + Clone, - C1: Into + Clone, - C2: Into + Clone, - C3: Into + Clone, - C4: Into + Clone, - C5: Into + Clone, -{ - data.iter().cloned().map(new_row_6).collect() -} - -#[allow(clippy::type_complexity)] -pub fn new_rows_8( - data: &[(C0, C1, C2, C3, C4, C5, C6, C7)], -) -> Vec -where - C0: Into + Clone, - C1: Into + Clone, - C2: Into + Clone, - C3: Into + Clone, - C4: Into + Clone, - C5: Into + Clone, - C6: Into + Clone, - C7: Into + Clone, -{ - data.iter().cloned().map(new_row_8).collect() -} diff --git a/src/analytic_engine/src/tests/table.rs b/src/analytic_engine/src/tests/table.rs deleted file mode 100644 index a399d4d679..0000000000 --- a/src/analytic_engine/src/tests/table.rs +++ /dev/null @@ -1,362 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utils to create table. - -use std::{collections::HashMap, sync::Arc}; - -use common_types::{ - column_schema, - datum::{Datum, DatumKind}, - projected_schema::ProjectedSchema, - record_batch::RecordBatch, - request_id::RequestId, - row::{Row, RowGroup}, - schema::{self, Schema}, - table::DEFAULT_SHARD_ID, - time::Timestamp, -}; -use table_engine::{ - self, - engine::{CreateTableParams, CreateTableRequest, TableState}, - predicate::Predicate, - table::{GetRequest, ReadOptions, ReadRequest, SchemaId, TableId, TableSeq}, -}; -use time_ext::ReadableDuration; -use trace_metric::MetricsCollector; - -use crate::{table_options, tests::row_util}; - -pub fn new_table_id(schema_id: u16, table_seq: u32) -> TableId { - TableId::with_seq(SchemaId::from(schema_id), TableSeq::from(table_seq)).unwrap() -} - -pub type RowTuple<'a> = (&'a str, Timestamp, &'a str, f64, f64, &'a str); -pub type RowTupleOpt<'a> = ( - &'a str, - Timestamp, - Option<&'a str>, - Option, - Option, - Option<&'a str>, -); -pub type KeyTuple<'a> = (&'a str, Timestamp); - -pub struct FixedSchemaTable { - create_request: CreateTableRequest, -} - -impl FixedSchemaTable { - pub fn builder() -> Builder { - Builder::default() - } - - fn default_schema() -> Schema { - Self::default_schema_builder().build().unwrap() - } - - pub fn default_schema_builder() -> schema::Builder { - create_schema_builder( - // Key columns - &[("key", DatumKind::String), ("ts", DatumKind::Timestamp)], - // Normal columns - &[ - ("string_tag", DatumKind::String), - ("double_field1", DatumKind::Double), - ("double_field2", DatumKind::Double), - ("string_field2", DatumKind::String), - ], - ) - } - - #[inline] - pub fn table_id(&self) -> TableId { - self.create_request.table_id - } - - #[inline] - pub fn create_request(&self) -> &CreateTableRequest { - &self.create_request - } - - #[inline] - pub fn segment_duration_ms(&self) -> i64 { - table_options::DEFAULT_SEGMENT_DURATION.as_millis() as i64 - } - - // Format of data: (key string, timestamp, string_tag, double_field1, - // double_field2, string_field2) - fn new_row(data: RowTuple) -> Row { - row_util::new_row_6(data) - } - - pub fn rows_to_row_group(&self, data: &[RowTuple]) -> RowGroup { - let rows = data - .iter() - .copied() - .map(FixedSchemaTable::new_row) - .collect(); - - self.new_row_group(rows) - } - - pub fn rows_opt_to_row_group(&self, data: &[RowTupleOpt]) -> RowGroup { - let rows = data - .iter() - .copied() - .map(FixedSchemaTable::new_row_opt) - .collect(); - - self.new_row_group(rows) - } - - fn new_row_group(&self, rows: Vec) -> RowGroup { - RowGroup::try_new(self.create_request.params.table_schema.clone(), rows).unwrap() - } - - fn new_row_opt(data: RowTupleOpt) -> Row { - row_util::new_row_6(data) - } - - pub fn new_read_all_request(&self, opts: ReadOptions) -> ReadRequest { - new_read_all_request_with_order(self.create_request.params.table_schema.clone(), opts) - } - - pub fn new_get_request(&self, key: KeyTuple) -> GetRequest { - let primary_key = vec![key.0.into(), key.1.into()]; - - GetRequest { - request_id: RequestId::next_id(), - projected_schema: ProjectedSchema::no_projection( - self.create_request.params.table_schema.clone(), - ), - primary_key, - } - } - - pub fn new_get_request_from_row(&self, data: RowTuple) -> GetRequest { - self.new_get_request((data.0, data.1)) - } - - pub fn assert_batch_eq_to_rows(&self, record_batches: &[RecordBatch], rows: &[RowTuple]) { - let row_group = self.rows_to_row_group(rows); - assert_batch_eq_to_row_group(record_batches, &row_group); - } - - pub fn assert_row_eq(&self, data: RowTuple, row: Row) { - row_util::assert_row_eq_6(data, row); - } -} - -pub fn read_opts_list() -> Vec { - vec![ - ReadOptions::default(), - ReadOptions { - batch_size: 1, - read_parallelism: 1, - deadline: None, - }, - ReadOptions { - batch_size: 1, - read_parallelism: 4, - deadline: None, - }, - ReadOptions { - batch_size: 100, - read_parallelism: 1, - deadline: None, - }, - ReadOptions { - batch_size: 100, - read_parallelism: 4, - deadline: None, - }, - ] -} - -pub fn new_read_all_request_with_order(schema: Schema, opts: ReadOptions) -> ReadRequest { - ReadRequest { - request_id: RequestId::next_id(), - opts, - projected_schema: ProjectedSchema::no_projection(schema), - predicate: Arc::new(Predicate::empty()), - metrics_collector: MetricsCollector::default(), - priority: Default::default(), - } -} - -pub fn new_read_all_request(schema: Schema, opts: ReadOptions) -> ReadRequest { - new_read_all_request_with_order(schema, opts) -} - -pub fn assert_batch_eq_to_row_group(record_batches: &[RecordBatch], row_group: &RowGroup) { - if record_batches.is_empty() { - assert!(row_group.is_empty()); - } - - for record_batch in record_batches { - assert_eq!( - record_batch.schema().columns(), - row_group.schema().columns() - ); - } - - let mut cursor = RecordBatchesCursor::new(record_batches); - for row in row_group.iter() { - for (column_idx, datum) in row.iter().enumerate() { - assert_eq!( - &cursor.datum(column_idx), - datum, - "record_batches:{record_batches:?}, row_group:{row_group:?}" - ); - } - cursor.step(); - } -} - -struct RecordBatchesCursor<'a> { - record_batches: &'a [RecordBatch], - batch_idx: usize, - row_idx_in_batch: usize, -} - -impl<'a> RecordBatchesCursor<'a> { - fn new(record_batches: &[RecordBatch]) -> RecordBatchesCursor { - RecordBatchesCursor { - record_batches, - batch_idx: 0, - row_idx_in_batch: 0, - } - } - - fn step(&mut self) { - if self.batch_idx >= self.record_batches.len() { - return; - } - - self.row_idx_in_batch += 1; - if self.row_idx_in_batch >= self.record_batches[self.batch_idx].num_rows() { - self.batch_idx += 1; - self.row_idx_in_batch = 0; - } - } - - fn datum(&self, column_idx: usize) -> Datum { - let record_batch = &self.record_batches[self.batch_idx]; - let column_in_batch = record_batch.column(column_idx); - column_in_batch.datum(self.row_idx_in_batch) - } -} - -#[must_use] -pub struct Builder { - create_request: CreateTableRequest, -} - -impl Builder { - pub fn schema_id(mut self, schema_id: SchemaId) -> Self { - self.create_request.schema_id = schema_id; - self - } - - pub fn table_name(mut self, table_name: String) -> Self { - self.create_request.params.table_name = table_name; - self - } - - pub fn table_id(mut self, table_id: TableId) -> Self { - self.create_request.table_id = table_id; - self - } - - pub fn enable_ttl(mut self, enable_ttl: bool) -> Self { - self.create_request.params.table_options.insert( - common_types::OPTION_KEY_ENABLE_TTL.to_string(), - enable_ttl.to_string(), - ); - self - } - - pub fn ttl(mut self, duration: ReadableDuration) -> Self { - self.create_request - .params - .table_options - .insert(common_types::TTL.to_string(), duration.to_string()); - self - } - - pub fn build_fixed(self) -> FixedSchemaTable { - FixedSchemaTable { - create_request: self.create_request, - } - } -} - -impl Default for Builder { - fn default() -> Self { - let params = CreateTableParams { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - table_name: "test_table".to_string(), - table_schema: FixedSchemaTable::default_schema(), - partition_info: None, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - table_options: HashMap::new(), - }; - - Self { - create_request: CreateTableRequest { - params, - schema_id: SchemaId::from_u32(2), - table_id: new_table_id(2, 1), - state: TableState::Stable, - shard_id: DEFAULT_SHARD_ID, - }, - } - } -} - -// Format of input slice: &[ ( column name, column type ) ] -pub fn create_schema_builder( - key_tuples: &[(&str, DatumKind)], - normal_tuples: &[(&str, DatumKind)], -) -> schema::Builder { - assert!(!key_tuples.is_empty()); - - let mut schema_builder = schema::Builder::with_capacity(key_tuples.len() + normal_tuples.len()) - .auto_increment_column_id(true) - .primary_key_indexes((0..key_tuples.len()).collect()); - - for tuple in key_tuples { - // Key column is not nullable. - let column_schema = column_schema::Builder::new(tuple.0.to_string(), tuple.1) - .is_nullable(false) - .build() - .expect("Should succeed to build key column schema"); - schema_builder = schema_builder.add_key_column(column_schema).unwrap(); - } - - for tuple in normal_tuples { - let column_schema = column_schema::Builder::new(tuple.0.to_string(), tuple.1) - .is_nullable(true) - .build() - .expect("Should succeed to build normal column schema"); - schema_builder = schema_builder.add_normal_column(column_schema).unwrap(); - } - - schema_builder -} diff --git a/src/analytic_engine/src/tests/util.rs b/src/analytic_engine/src/tests/util.rs deleted file mode 100644 index 04bc09f75f..0000000000 --- a/src/analytic_engine/src/tests/util.rs +++ /dev/null @@ -1,745 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Test utils. - -use std::{collections::HashMap, future::Future, sync::Arc}; - -use common_types::{ - datum::Datum, - record_batch::RecordBatch, - row::{Row, RowGroup}, - table::{ShardId, DEFAULT_SHARD_ID}, - time::Timestamp, -}; -use futures::stream::StreamExt; -use logger::info; -use object_store::config::{LocalOptions, ObjectStoreOptions, StorageOptions}; -use runtime::PriorityRuntime; -use size_ext::ReadableSize; -use table_engine::{ - engine::{ - CreateTableRequest, DropTableRequest, EngineRuntimes, OpenShardRequest, OpenTableRequest, - Result as EngineResult, TableDef, TableEngineRef, - }, - table::{ - AlterSchemaRequest, FlushRequest, GetRequest, ReadRequest, Result, SchemaId, TableId, - TableRef, WriteRequest, - }, -}; -use tempfile::TempDir; -use time_ext::ReadableDuration; -use wal::{ - config::{Config as WalConfig, StorageConfig}, - manager::{OpenedWals, WalRuntimes, WalsOpener}, - rocksdb_impl::{config::RocksDBStorageConfig, manager::RocksDBWalsOpener}, - table_kv_impl::wal::MemWalsOpener, -}; - -use crate::{ - setup::{EngineBuilder, TableEngineContext}, - tests::table::{self, FixedSchemaTable, RowTuple}, - Config, RecoverMode, -}; - -const DAY_MS: i64 = 24 * 60 * 60 * 1000; - -/// Helper struct to create a null datum. -pub struct Null; - -impl From for Datum { - fn from(_data: Null) -> Datum { - Datum::Null - } -} - -pub async fn check_read( - test_ctx: &TestContext, - fixed_schema_table: &FixedSchemaTable, - msg: &str, - table_name: &str, - rows: &[RowTuple<'_>], -) { - for read_opts in table::read_opts_list() { - info!("{}, opts:{:?}", msg, read_opts); - - let record_batches = test_ctx - .read_table( - table_name, - fixed_schema_table.new_read_all_request(read_opts), - ) - .await; - - fixed_schema_table.assert_batch_eq_to_rows(&record_batches, rows); - } -} - -pub async fn check_get( - test_ctx: &TestContext, - fixed_schema_table: &FixedSchemaTable, - msg: &str, - table_name: &str, - rows: &[RowTuple<'_>], -) { - for row_data in rows { - let request = fixed_schema_table.new_get_request_from_row(*row_data); - - info!("{}, request:{:?}, row_data:{:?}", msg, request, row_data); - - let row = test_ctx.get_from_table(table_name, request).await.unwrap(); - - fixed_schema_table.assert_row_eq(*row_data, row); - } -} - -pub struct TestContext { - config: Config, - wals_opener: T, - runtimes: Arc, - engine: Option, - opened_wals: Option, - schema_id: SchemaId, - last_table_seq: u32, - open_method: OpenTablesMethod, - - name_to_tables: HashMap, -} - -impl TestContext { - pub async fn open(&mut self) { - let opened_wals = if let Some(opened_wals) = self.opened_wals.take() { - opened_wals - } else { - self.wals_opener - .open_wals( - &self.config.wal, - WalRuntimes { - read_runtime: self.runtimes.read_runtime.high().clone(), - write_runtime: self.runtimes.write_runtime.clone(), - default_runtime: self.runtimes.default_runtime.clone(), - }, - ) - .await - .unwrap() - }; - - let engine_builder = EngineBuilder { - config: &self.config, - engine_runtimes: self.runtimes.clone(), - opened_wals: opened_wals.clone(), - meta_client: None, - }; - self.opened_wals = Some(opened_wals); - - let TableEngineContext { table_engine, .. } = engine_builder.build().await.unwrap(); - self.engine = Some(table_engine); - } - - pub async fn reopen(&mut self) { - { - // Close all tables. - self.name_to_tables.clear(); - - // Close engine. - let engine = self.engine.take().unwrap(); - engine.close().await.unwrap(); - } - - self.open().await; - } - - pub async fn reopen_with_tables(&mut self, tables: &[&str]) { - let table_infos: Vec<_> = tables - .iter() - .map(|name| { - let table_id = self.name_to_tables.get(*name).unwrap().id(); - (table_id, *name) - }) - .collect(); - { - // Close all tables. - self.name_to_tables.clear(); - - // Close engine. - let engine = self.engine.take().unwrap(); - engine.close().await.unwrap(); - } - - self.open().await; - - match self.open_method { - OpenTablesMethod::WithOpenTable => { - for (id, name) in table_infos { - self.open_table(id, name).await; - } - } - OpenTablesMethod::WithOpenShard => { - self.open_tables_of_shard(table_infos, DEFAULT_SHARD_ID) - .await; - } - } - } - - pub async fn reopen_with_tables_of_shard(&mut self, tables: &[&str], shard_id: ShardId) { - let table_infos: Vec<_> = tables - .iter() - .map(|name| { - let table_id = self.name_to_tables.get(*name).unwrap().id(); - (table_id, *name) - }) - .collect(); - { - // Close all tables. - self.name_to_tables.clear(); - - // Close engine. - let engine = self.engine.take().unwrap(); - engine.close().await.unwrap(); - } - - self.open().await; - - self.open_tables_of_shard(table_infos, shard_id).await - } - - async fn open_tables_of_shard(&mut self, table_infos: Vec<(TableId, &str)>, shard_id: ShardId) { - let table_defs = table_infos - .into_iter() - .map(|table| TableDef { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - schema_id: self.schema_id, - id: table.0, - name: table.1.to_string(), - }) - .collect(); - - let open_shard_request = OpenShardRequest { - shard_id, - table_defs, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - }; - - let tables = self - .engine() - .open_shard(open_shard_request) - .await - .unwrap() - .into_values() - .map(|result| result.unwrap().unwrap()); - - for table in tables { - self.name_to_tables.insert(table.name().to_string(), table); - } - } - - async fn open_table(&mut self, table_id: TableId, table_name: &str) { - let table = self - .engine() - .open_table(OpenTableRequest { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - schema_id: self.schema_id, - table_name: table_name.to_string(), - table_id, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - shard_id: DEFAULT_SHARD_ID, - }) - .await - .unwrap() - .unwrap(); - - self.name_to_tables.insert(table_name.to_string(), table); - } - - pub async fn try_open_table( - &mut self, - table_id: TableId, - table_name: &str, - ) -> EngineResult> { - let table_opt = self - .engine() - .open_table(OpenTableRequest { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - schema_id: self.schema_id, - table_name: table_name.to_string(), - table_id, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - shard_id: DEFAULT_SHARD_ID, - }) - .await?; - - let table = match table_opt { - Some(v) => v, - None => return Ok(None), - }; - - self.name_to_tables - .insert(table_name.to_string(), table.clone()); - - Ok(Some(table)) - } - - pub async fn drop_table(&mut self, table_name: &str) -> bool { - let request = DropTableRequest { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - schema_id: self.schema_id, - table_name: table_name.to_string(), - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - }; - - let ret = self.engine().drop_table(request).await.unwrap(); - - self.name_to_tables.remove(table_name); - - ret - } - - /// 3 days ago. - pub fn start_ms(&self) -> i64 { - Timestamp::now().as_i64() - 3 * DAY_MS - } - - pub async fn create_fixed_schema_table(&mut self, table_name: &str) -> FixedSchemaTable { - let fixed_schema_table = FixedSchemaTable::builder() - .schema_id(self.schema_id) - .table_name(table_name.to_string()) - .table_id(self.next_table_id()) - .ttl("7d".parse::().unwrap()) - .build_fixed(); - - self.create_table(fixed_schema_table.create_request().clone()) - .await; - - fixed_schema_table - } - - async fn create_table(&mut self, create_request: CreateTableRequest) { - let table_name = create_request.params.table_name.clone(); - let table = self.engine().create_table(create_request).await.unwrap(); - - self.name_to_tables.insert(table_name.to_string(), table); - } - - pub async fn write_to_table(&self, table_name: &str, row_group: RowGroup) { - let table = self.table(table_name); - - table.write(WriteRequest { row_group }).await.unwrap(); - } - - pub async fn read_table( - &self, - table_name: &str, - read_request: ReadRequest, - ) -> Vec { - let table = self.table(table_name); - - let mut stream = table.read(read_request).await.unwrap(); - let mut record_batches = Vec::new(); - while let Some(batch) = stream.next().await { - let batch = batch.unwrap(); - - record_batches.push(batch); - } - - record_batches - } - - pub async fn partitioned_read_table( - &self, - table_name: &str, - read_request: ReadRequest, - ) -> Vec { - let table = self.table(table_name); - - let streams = table.partitioned_read(read_request).await.unwrap(); - let mut record_batches = Vec::new(); - - for mut stream in streams.streams { - while let Some(batch) = stream.next().await { - let batch = batch.unwrap(); - - record_batches.push(batch); - } - } - - record_batches - } - - pub async fn get_from_table(&self, table_name: &str, request: GetRequest) -> Option { - let table = self.table(table_name); - - table.get(request).await.unwrap() - } - - pub async fn flush_table(&self, table_name: &str) { - let table = self.table(table_name); - - table.flush(FlushRequest::default()).await.unwrap(); - } - - pub async fn flush_table_with_request(&self, table_name: &str, request: FlushRequest) { - let table = self.table(table_name); - - table.flush(request).await.unwrap(); - } - - pub async fn compact_table(&self, table_name: &str) { - let table = self.table(table_name); - - table.compact().await.unwrap(); - } - - pub async fn try_alter_schema( - &self, - table_name: &str, - request: AlterSchemaRequest, - ) -> Result { - let table = self.table(table_name); - - table.alter_schema(request).await - } - - pub async fn try_alter_options( - &self, - table_name: &str, - opts: HashMap, - ) -> Result { - let table = self.table(table_name); - - table.alter_options(opts).await - } - - pub fn table(&self, table_name: &str) -> TableRef { - self.name_to_tables.get(table_name).cloned().unwrap() - } - - #[inline] - pub fn engine(&self) -> &TableEngineRef { - self.engine.as_ref().unwrap() - } - - fn next_table_id(&mut self) -> TableId { - self.last_table_seq += 1; - table::new_table_id(2, self.last_table_seq) - } -} - -#[derive(Clone, Copy)] -pub enum OpenTablesMethod { - WithOpenTable, - WithOpenShard, -} - -impl TestContext { - pub fn config_mut(&mut self) -> &mut Config { - &mut self.config - } - - pub fn clone_engine(&self) -> TableEngineRef { - self.engine.clone().unwrap() - } -} - -pub struct TestEnv { - _dir: TempDir, - pub config: Config, - pub runtimes: Arc, -} - -impl TestEnv { - pub fn builder() -> Builder { - Builder::default() - } - - pub fn new_context( - &self, - build_context: T, - ) -> TestContext { - let config = build_context.config(); - let wals_opener = build_context.wals_opener(); - - TestContext { - config, - wals_opener, - runtimes: self.runtimes.clone(), - engine: None, - opened_wals: None, - schema_id: SchemaId::from_u32(100), - last_table_seq: 1, - name_to_tables: HashMap::new(), - open_method: build_context.open_method(), - } - } - - pub fn block_on(&self, future: F) -> F::Output { - self.runtimes.default_runtime.block_on(future) - } -} - -pub struct Builder { - num_workers: usize, -} - -impl Builder { - pub fn build(self) -> TestEnv { - let dir = tempfile::tempdir().unwrap(); - - let config = Config { - storage: StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }, - wal: WalConfig { - storage: StorageConfig::RocksDB(Box::new(RocksDBStorageConfig { - data_dir: dir.path().to_str().unwrap().to_string(), - ..Default::default() - })), - disable_data: false, - }, - ..Default::default() - }; - - let runtime = Arc::new( - runtime::Builder::default() - .worker_threads(self.num_workers) - .enable_all() - .build() - .unwrap(), - ); - - TestEnv { - _dir: dir, - config, - runtimes: Arc::new(EngineRuntimes { - read_runtime: PriorityRuntime::new(runtime.clone(), runtime.clone()), - write_runtime: runtime.clone(), - meta_runtime: runtime.clone(), - compact_runtime: runtime.clone(), - default_runtime: runtime.clone(), - io_runtime: runtime, - }), - } - } -} - -impl Default for Builder { - fn default() -> Self { - Self { num_workers: 2 } - } -} - -pub trait EngineBuildContext: Clone + Default { - type WalsOpener: WalsOpener; - - fn wals_opener(&self) -> Self::WalsOpener; - fn config(&self) -> Config; - fn open_method(&self) -> OpenTablesMethod; -} - -pub struct RocksDBEngineBuildContext { - config: Config, - open_method: OpenTablesMethod, -} - -impl RocksDBEngineBuildContext { - pub fn new(mode: RecoverMode, open_method: OpenTablesMethod) -> Self { - let mut context = Self::default(); - context.config.recover_mode = mode; - context.open_method = open_method; - - context - } -} - -impl Default for RocksDBEngineBuildContext { - fn default() -> Self { - let dir = tempfile::tempdir().unwrap(); - - let config = Config { - storage: StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }, - wal: WalConfig { - storage: StorageConfig::RocksDB(Box::new(RocksDBStorageConfig { - data_dir: dir.path().to_str().unwrap().to_string(), - ..Default::default() - })), - disable_data: false, - }, - ..Default::default() - }; - - Self { - config, - open_method: OpenTablesMethod::WithOpenTable, - } - } -} - -impl Clone for RocksDBEngineBuildContext { - fn clone(&self) -> Self { - let mut config = self.config.clone(); - - let dir = tempfile::tempdir().unwrap(); - let storage = StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }; - - config.storage = storage; - config.wal = WalConfig { - storage: StorageConfig::RocksDB(Box::new(RocksDBStorageConfig { - data_dir: dir.path().to_str().unwrap().to_string(), - ..Default::default() - })), - disable_data: false, - }; - Self { - config, - open_method: self.open_method, - } - } -} - -impl EngineBuildContext for RocksDBEngineBuildContext { - type WalsOpener = RocksDBWalsOpener; - - fn wals_opener(&self) -> Self::WalsOpener { - RocksDBWalsOpener - } - - fn config(&self) -> Config { - self.config.clone() - } - - fn open_method(&self) -> OpenTablesMethod { - self.open_method - } -} - -#[derive(Clone)] -pub struct MemoryEngineBuildContext { - config: Config, - open_method: OpenTablesMethod, -} - -impl MemoryEngineBuildContext { - pub fn new(mode: RecoverMode, open_method: OpenTablesMethod) -> Self { - let mut context = Self::default(); - context.config.recover_mode = mode; - context.open_method = open_method; - - context - } -} - -impl Default for MemoryEngineBuildContext { - fn default() -> Self { - let dir = tempfile::tempdir().unwrap(); - - let config = Config { - storage: StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }, - wal: WalConfig { - storage: StorageConfig::Obkv(Box::default()), - disable_data: false, - }, - ..Default::default() - }; - - Self { - config, - open_method: OpenTablesMethod::WithOpenTable, - } - } -} - -impl EngineBuildContext for MemoryEngineBuildContext { - type WalsOpener = MemWalsOpener; - - fn wals_opener(&self) -> Self::WalsOpener { - MemWalsOpener::default() - } - - fn config(&self) -> Config { - self.config.clone() - } - - fn open_method(&self) -> OpenTablesMethod { - self.open_method - } -} - -pub fn rocksdb_ctxs() -> Vec { - vec![ - RocksDBEngineBuildContext::new(RecoverMode::TableBased, OpenTablesMethod::WithOpenTable), - RocksDBEngineBuildContext::new(RecoverMode::ShardBased, OpenTablesMethod::WithOpenTable), - RocksDBEngineBuildContext::new(RecoverMode::TableBased, OpenTablesMethod::WithOpenShard), - RocksDBEngineBuildContext::new(RecoverMode::ShardBased, OpenTablesMethod::WithOpenShard), - ] -} - -pub fn memory_ctxs() -> Vec { - vec![ - MemoryEngineBuildContext::new(RecoverMode::TableBased, OpenTablesMethod::WithOpenTable), - MemoryEngineBuildContext::new(RecoverMode::ShardBased, OpenTablesMethod::WithOpenTable), - MemoryEngineBuildContext::new(RecoverMode::TableBased, OpenTablesMethod::WithOpenShard), - MemoryEngineBuildContext::new(RecoverMode::ShardBased, OpenTablesMethod::WithOpenShard), - ] -} diff --git a/src/benchmarks/Cargo.toml b/src/benchmarks/Cargo.toml deleted file mode 100644 index 031d072396..0000000000 --- a/src/benchmarks/Cargo.toml +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "benchmarks" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -analytic_engine = { workspace = true } -arena = { workspace = true } -arrow = { workspace = true } -base64 = { workspace = true } -bytes_ext = { workspace = true } -clap = { workspace = true } -common_types = { workspace = true } -env_logger = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -object_store = { workspace = true } -parquet = { workspace = true } -parquet_ext = { workspace = true } -pprof = { workspace = true, features = ["flamegraph", "criterion"] } -rand = { workspace = true } -runtime = { workspace = true } -serde = { workspace = true } -size_ext = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -table_kv = { workspace = true } -tempfile = { workspace = true } -time_ext = { workspace = true } -tokio = { workspace = true } -toml_ext = { workspace = true } -trace_metric = { workspace = true } -wal = { workspace = true, features = ["wal-rocksdb", "wal-message-queue", "wal-table-kv"] } -zstd = { workspace = true } - -[dev-dependencies] -criterion = { workspace = true } -tempfile = { workspace = true } - -[[bench]] -name = "bench" -harness = false - -[[bin]] -name = "sst-tools" diff --git a/src/benchmarks/README.md b/src/benchmarks/README.md deleted file mode 100644 index 3a5d2c5c2a..0000000000 --- a/src/benchmarks/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Benchmarks - -## Test Data -todo - -## Config -A config template can be found in `config/bench.toml`. - -## Run benchmarks -In root directory of `horaedb` (not this directory `horaedb/benchmarks`), run the following command: -```bash -ANALYTIC_BENCH_CONFIG_PATH=/path/to/bench.toml cargo bench -p benchmarks -``` - -Print logs: -```bash -RUST_LOG=info ANALYTIC_BENCH_CONFIG_PATH=/path/to/bench.toml cargo bench -p benchmarks -``` - -Run specific bench: -```bash -ANALYTIC_BENCH_CONFIG_PATH=/path/to/bench.toml cargo bench --bench bench -p benchmarks -- read_parquet -``` - -If you want to enable pprof, add `--profile-time 60`, see [pprof-rs#127](https://github.com/tikv/pprof-rs/issues/127) diff --git a/src/benchmarks/bench.toml b/src/benchmarks/bench.toml deleted file mode 100644 index b76f779f9b..0000000000 --- a/src/benchmarks/bench.toml +++ /dev/null @@ -1,70 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[sst_bench] -store_path = "/tmp/horaedb/1/1" -sst_file_name = "37.sst" -runtime_thread_num = 1 -bench_measurement_time = "30s" -max_projections = 5 -read_batch_row_num = 500 -sst_meta_cache_cap = 1000 -sst_data_cache_cap = 10000 - -[sst_bench.predicate] -# start_time_ms = 0 -start_time_ms = 1632985200000 -# end_time_ms = 0 -end_time_ms = 1632985800000 - -[merge_sst_bench] -store_path = "/tmp/horaedb" -space_id = 1 -table_id = 1 -sst_file_ids = [ 34, 37 ] -runtime_thread_num = 1 -bench_measurement_time = "120s" -max_projections = 5 -read_batch_row_num = 500 -sst_level = 0 - -[merge_sst_bench.predicate] -start_time_ms = 0 -# start_time_ms = 1632985200000 -end_time_ms = 0 -# end_time_ms = 1632985800000 - -[scan_memtable_bench] -store_path = "/tmp/horaedb/1/1" -sst_file_name = "37.sst" -runtime_thread_num = 1 -max_projections = 5 -arena_block_size = "64M" - -[wal_row_bench] -rows_num = 100_0000 -test_num = 3 - -[wal_write_bench] -bench_measurement_time = "60s" -bench_sample_size = 60 -batch_size = 512 -value_size = 1024 - -[replay_bench] -bench_measurement_time = "3s" -bench_sample_size = 10 \ No newline at end of file diff --git a/src/benchmarks/benches/bench.rs b/src/benchmarks/benches/bench.rs deleted file mode 100644 index cb5d76ed99..0000000000 --- a/src/benchmarks/benches/bench.rs +++ /dev/null @@ -1,242 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Benchmarks - -use std::{cell::RefCell, sync::Once}; - -use benchmarks::{ - config::{self, BenchConfig}, - merge_memtable_bench::MergeMemTableBench, - merge_sst_bench::MergeSstBench, - parquet_bench::ParquetBench, - replay_bench::ReplayBench, - scan_memtable_bench::ScanMemTableBench, - sst_bench::SstBench, - wal_write_bench::WalWriteBench, -}; -use criterion::*; -use pprof::criterion::{Output, PProfProfiler}; - -static INIT_LOG: Once = Once::new(); - -pub fn init_bench() -> BenchConfig { - INIT_LOG.call_once(|| { - env_logger::init(); - }); - - config::bench_config_from_env() -} - -fn bench_read_sst_iter(b: &mut Bencher<'_>, bench: &SstBench) { - b.iter(|| { - bench.run_bench(); - }) -} - -fn bench_read_sst(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("read_sst"); - group.measurement_time(config.sst_bench.bench_measurement_time.0); - group.sample_size(config.sst_bench.bench_sample_size); - - let mut bench = SstBench::new(config.sst_bench); - - for i in 0..bench.num_benches() { - bench.init_for_bench(i); - - group.bench_with_input( - BenchmarkId::new("read_sst", format!("{}/{}", bench.sst_file_name, i)), - &bench, - bench_read_sst_iter, - ); - } - - group.finish(); -} - -fn bench_merge_sst_iter(b: &mut Bencher<'_>, bench: &MergeSstBench) { - b.iter(|| bench.run_bench()) -} - -fn bench_merge_sst(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("merge_sst"); - - group.measurement_time(config.merge_sst_bench.bench_measurement_time.0); - group.sample_size(config.sst_bench.bench_sample_size); - - let sst_file_ids = format!("{:?}", config.merge_sst_bench.sst_file_ids); - let mut bench = MergeSstBench::new(config.merge_sst_bench); - - for i in 0..bench.num_benches() { - bench.init_for_bench(i, true); - group.bench_with_input( - BenchmarkId::new("merge_sst", format!("{sst_file_ids}/{i}/dedup")), - &bench, - bench_merge_sst_iter, - ); - - bench.init_for_bench(i, false); - group.bench_with_input( - BenchmarkId::new("merge_sst", format!("{sst_file_ids}/{i}/no-dedup")), - &bench, - bench_merge_sst_iter, - ); - } - - group.finish(); -} - -fn bench_parquet_iter(b: &mut Bencher<'_>, bench: &ParquetBench) { - b.iter(|| bench.run_bench()) -} - -fn bench_parquet(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("read_parquet"); - - group.measurement_time(config.sst_bench.bench_measurement_time.0); - group.sample_size(config.sst_bench.bench_sample_size); - - let mut bench = ParquetBench::new(config.sst_bench); - - for i in 0..bench.num_benches() { - bench.init_for_bench(i); - - group.bench_with_input( - BenchmarkId::new("read_parquet", format!("{}/{}", bench.sst_file_name, i)), - &bench, - bench_parquet_iter, - ); - } - - group.finish(); -} - -fn bench_scan_memtable_iter(b: &mut Bencher<'_>, bench: &ScanMemTableBench) { - b.iter(|| bench.run_bench()) -} - -fn bench_scan_memtable(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("scan_memtable"); - - let mut bench = ScanMemTableBench::new(config.scan_memtable_bench); - - for i in 0..bench.num_benches() { - bench.init_for_bench(i); - - group.bench_with_input( - BenchmarkId::new("scan_memtable", i), - &bench, - bench_scan_memtable_iter, - ); - } - - group.finish(); -} - -fn bench_merge_memtable_iter(b: &mut Bencher<'_>, bench: &MergeMemTableBench) { - b.iter(|| bench.run_bench()) -} - -fn bench_merge_memtable(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("merge_memtable"); - - let sst_file_ids = format!("{:?}", config.merge_memtable_bench.sst_file_ids); - let mut bench = MergeMemTableBench::new(config.merge_memtable_bench); - - for i in 0..bench.num_benches() { - bench.init_for_bench(i, true); - group.bench_with_input( - BenchmarkId::new("merge_memtable", format!("{sst_file_ids}/{i}/dedup")), - &bench, - bench_merge_memtable_iter, - ); - - bench.init_for_bench(i, false); - group.bench_with_input( - BenchmarkId::new("merge_memtable", format!("{sst_file_ids}/{i}/no-dedup")), - &bench, - bench_merge_memtable_iter, - ); - } - - group.finish(); -} - -fn bench_wal_write_iter(b: &mut Bencher<'_>, bench: &WalWriteBench) { - b.iter(|| bench.run_bench()) -} - -fn bench_wal_write(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("wal_write"); - - group.measurement_time(config.wal_write_bench.bench_measurement_time.0); - group.sample_size(config.wal_write_bench.bench_sample_size); - - let bench = WalWriteBench::new(config.wal_write_bench); - - group.bench_with_input( - BenchmarkId::new("wal_write", 0), - &bench, - bench_wal_write_iter, - ); - - group.finish(); -} - -fn bench_replay_iter(b: &mut Bencher<'_>, bench: &RefCell) { - let mut bench = bench.borrow_mut(); - b.iter(|| bench.run_bench()) -} - -fn bench_replay(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("replay"); - - group.measurement_time(config.replay_bench.bench_measurement_time.0); - group.sample_size(config.replay_bench.bench_sample_size); - - let bench = RefCell::new(ReplayBench::new(config.replay_bench)); - group.bench_with_input(BenchmarkId::new("replay", 0), &bench, bench_replay_iter); - group.finish(); -} - -criterion_group!( - name = benches; - config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); - targets = bench_parquet, - bench_read_sst, - bench_merge_sst, - bench_scan_memtable, - bench_merge_memtable, - bench_wal_write, - bench_replay, -); - -criterion_main!(benches); diff --git a/src/benchmarks/config/bench.toml b/src/benchmarks/config/bench.toml deleted file mode 100644 index ad66fdeaa1..0000000000 --- a/src/benchmarks/config/bench.toml +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[sst_bench] -store_path = "/path/to/data/1/1" -sst_file_name = "37.sst" -runtime_thread_num = 1 -bench_measurement_time = "30s" -bench_sample_size = 30 -max_projections = 5 -read_batch_row_num = 8192 -is_async = false -reverse = false - -[sst_bench.predicate] -# start_time_ms = 0 -start_time_ms = 1632985200000 -# end_time_ms = 0 -end_time_ms = 1632985800000 - -[merge_sst_bench] -store_path = "/path/to/data" -space_id = 1 -table_id = 1 -sst_file_ids = [ 34, 37 ] -runtime_thread_num = 1 -bench_measurement_time = "30s" -bench_sample_size = 30 -max_projections = 5 -read_batch_row_num = 500 -sst_level = 0 - -[merge_sst_bench.predicate] -start_time_ms = 0 -# start_time_ms = 1632985200000 -end_time_ms = 0 -# end_time_ms = 1632985800000 - -[scan_memtable_bench] -store_path = "/path/to/data/1/1" -sst_file_name = "37.sst" -runtime_thread_num = 1 -max_projections = 5 -arena_block_size = "64M" - -[merge_memtable_bench] -store_path = "/path/to/data" -space_id = 1 -table_id = 1 -sst_file_ids = [ 37 ] -runtime_thread_num = 1 -max_projections = 5 -arena_block_size = "64M" - -[wal_write_bench] -bench_measurement_time = "60s" -bench_sample_size = 60 -batch_size = 512 -value_size = 1024 - -[replay_bench] -bench_measurement_time = "3s" -bench_sample_size = 10 -batch_size = 10000 diff --git a/src/benchmarks/config/sst.toml b/src/benchmarks/config/sst.toml deleted file mode 100644 index 474edcf978..0000000000 --- a/src/benchmarks/config/sst.toml +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -runtime_thread_num = 4 - -[rebuild_sst] -store_path = "/tmp/horaedb/benchmarks" -input_file_name = "898.sst" -read_batch_row_num = 8192 -output_file_name = "tt_t.sst" -num_rows_per_row_group = 8192 -compression = "SNAPPY" - -[rebuild_sst.predicate] -start_time_ms = 0 -end_time_ms = 0 - -[merge_sst] -store_path = "/tmp/horaedb/benchmarks/2199023255564" -space_id = 1 -table_id = 1 -sst_file_ids = [1, 17, 19, 24, 31, 37, 43, 45, 9, 14, 18, 21, 27, 34, 40, 44, 5] -dedup = true -read_batch_row_num = 16384 -output_store_path = "/tmp/horaedb/data/1/1" -output_file_name = "16384-all.sst" -num_rows_per_row_group = 16384 - -[merge_sst.predicate] -start_time_ms = 0 -end_time_ms = 0 diff --git a/src/benchmarks/src/bin/parquet-reader.rs b/src/benchmarks/src/bin/parquet-reader.rs deleted file mode 100644 index c06eaa82cb..0000000000 --- a/src/benchmarks/src/bin/parquet-reader.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Once; - -use benchmarks::{ - config::{self, BenchConfig}, - parquet_bench::ParquetBench, -}; -use env_logger::Env; - -static INIT_LOG: Once = Once::new(); - -pub fn init_bench() -> BenchConfig { - INIT_LOG.call_once(|| { - env_logger::from_env(Env::default().default_filter_or("info")).init(); - }); - - config::bench_config_from_env() -} - -fn main() { - let config = init_bench(); - let bench = ParquetBench::new(config.sst_bench); - - for _ in 0..10 { - bench.run_bench(); - } - println!("done"); -} diff --git a/src/benchmarks/src/bin/sst-tools.rs b/src/benchmarks/src/bin/sst-tools.rs deleted file mode 100644 index 3256acd9ba..0000000000 --- a/src/benchmarks/src/bin/sst-tools.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use benchmarks::{ - sst_tools::{self, MergeSstConfig, RebuildSstConfig}, - util, -}; -use clap::{Arg, Command}; -use logger::info; -use serde::Deserialize; - -#[derive(Debug, Deserialize)] -#[serde(default)] -struct Config { - runtime_thread_num: usize, - rebuild_sst: Option, - merge_sst: Option, -} - -impl Default for Config { - fn default() -> Config { - Self { - runtime_thread_num: 1, - rebuild_sst: None, - merge_sst: None, - } - } -} - -fn config_from_path(path: &str) -> Config { - let mut toml_buf = String::new(); - toml_ext::parse_toml_from_path(path, &mut toml_buf).expect("Failed to parse config.") -} - -fn main() { - env_logger::init(); - - let matches = Command::new("SST Tools") - .arg( - Arg::new("config") - .short('c') - .long("config") - .required(true) - .num_args(1) - .help("Set configuration file, eg: \"/path/server.toml\""), - ) - .get_matches(); - - let config_path = matches - .get_one::("config") - .expect("Config file is required."); - let config = config_from_path(config_path); - - info!("sst tools start, config:{:?}", config); - - let runtime = Arc::new(util::new_runtime(config.runtime_thread_num)); - - let rt = runtime.clone(); - runtime.block_on(async { - if let Some(rebuild_sst) = config.rebuild_sst { - sst_tools::rebuild_sst(rebuild_sst, rt.clone()).await; - } - - if let Some(merge_sst) = config.merge_sst { - sst_tools::merge_sst(merge_sst, rt).await; - } - }); -} diff --git a/src/benchmarks/src/config.rs b/src/benchmarks/src/config.rs deleted file mode 100644 index 493eb7c644..0000000000 --- a/src/benchmarks/src/config.rs +++ /dev/null @@ -1,157 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Benchmark configs. - -use std::env; - -use analytic_engine::{space::SpaceId, sst::manager::FileId}; -use common_types::time::{TimeRange, Timestamp}; -use serde::Deserialize; -use size_ext::ReadableSize; -use table_engine::{ - predicate::{PredicateBuilder, PredicateRef}, - table::TableId, -}; -use time_ext::ReadableDuration; - -const BENCH_CONFIG_PATH_KEY: &str = "ANALYTIC_BENCH_CONFIG_PATH"; - -#[derive(Deserialize)] -pub struct BenchConfig { - pub sst_bench: SstBenchConfig, - pub merge_sst_bench: MergeSstBenchConfig, - pub scan_memtable_bench: ScanMemTableBenchConfig, - pub merge_memtable_bench: MergeMemTableBenchConfig, - pub wal_write_bench: WalWriteBenchConfig, - pub replay_bench: ReplayConfig, -} - -// TODO(yingwen): Maybe we can use layze static to load config first. -pub fn bench_config_from_env() -> BenchConfig { - let path = match env::var(BENCH_CONFIG_PATH_KEY) { - Ok(v) => v, - Err(e) => panic!("Env {BENCH_CONFIG_PATH_KEY} is required to run benches, err:{e}."), - }; - - let mut toml_buf = String::new(); - toml_ext::parse_toml_from_path(&path, &mut toml_buf).expect("Failed to parse config.") -} - -#[derive(Deserialize)] -pub struct SstBenchConfig { - pub store_path: String, - pub sst_file_name: String, - pub runtime_thread_num: usize, - pub is_async: bool, - - pub bench_measurement_time: ReadableDuration, - pub bench_sample_size: usize, - - /// Max number of projection columns. - pub max_projections: usize, - pub num_rows_per_row_group: usize, - pub predicate: BenchPredicate, - pub sst_meta_cache_cap: Option, - pub sst_data_cache_cap: Option, - pub reverse: bool, -} - -#[derive(Deserialize)] -pub struct MergeSstBenchConfig { - pub store_path: String, - pub space_id: SpaceId, - pub table_id: TableId, - pub sst_file_ids: Vec, - pub runtime_thread_num: usize, - - pub bench_measurement_time: ReadableDuration, - pub bench_sample_size: usize, - - /// Max number of projection columns. - pub max_projections: usize, - pub num_rows_per_row_group: usize, - pub predicate: BenchPredicate, - pub sst_level: u16, -} - -#[derive(Deserialize)] -pub struct ScanMemTableBenchConfig { - pub store_path: String, - pub sst_file_name: String, - pub runtime_thread_num: usize, - - /// Max number of projection columns. - pub max_projections: usize, - - pub arena_block_size: ReadableSize, -} - -#[derive(Debug, Deserialize)] -pub struct BenchPredicate { - /// Inclusive start time in millis. - start_time_ms: i64, - /// Exclusive end time in millis. - /// - /// Set to current time millis if start_time_ms == end_time_ms. - end_time_ms: i64, -} - -impl BenchPredicate { - pub fn into_predicate(self) -> PredicateRef { - let start = Timestamp::new(self.start_time_ms); - let end = if self.start_time_ms == self.end_time_ms { - Timestamp::now() - } else { - Timestamp::new(self.end_time_ms) - }; - let time_range = TimeRange::new(start, end).unwrap(); - - PredicateBuilder::default() - .set_time_range(time_range) - .build() - } -} - -#[derive(Deserialize)] -pub struct MergeMemTableBenchConfig { - pub store_path: String, - pub space_id: SpaceId, - pub table_id: TableId, - pub sst_file_ids: Vec, - pub runtime_thread_num: usize, - - /// Max number of projection columns. - pub max_projections: usize, - - pub arena_block_size: ReadableSize, -} - -#[derive(Deserialize)] -pub struct WalWriteBenchConfig { - pub bench_measurement_time: ReadableDuration, - pub bench_sample_size: usize, - pub batch_size: usize, - pub value_size: usize, -} - -#[derive(Deserialize)] -pub struct ReplayConfig { - pub bench_measurement_time: ReadableDuration, - pub bench_sample_size: usize, - pub batch_size: usize, -} diff --git a/src/benchmarks/src/lib.rs b/src/benchmarks/src/lib.rs deleted file mode 100644 index ffc098c549..0000000000 --- a/src/benchmarks/src/lib.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utilities for benchmarks. - -use common_types::SequenceNumber; - -pub mod config; -pub mod merge_memtable_bench; -pub mod merge_sst_bench; -pub mod parquet_bench; -pub mod replay_bench; -pub mod scan_memtable_bench; -pub mod sst_bench; -pub mod sst_tools; -pub mod table; -pub mod util; -pub mod wal_write_bench; - -pub(crate) const INIT_SEQUENCE: SequenceNumber = 1; diff --git a/src/benchmarks/src/merge_memtable_bench.rs b/src/benchmarks/src/merge_memtable_bench.rs deleted file mode 100644 index abf5f8f45a..0000000000 --- a/src/benchmarks/src/merge_memtable_bench.rs +++ /dev/null @@ -1,232 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Merge memtable bench. - -use std::{cmp, sync::Arc, time::Instant}; - -use analytic_engine::{ - memtable::{ - factory::{Factory as MemTableFactory, Options}, - skiplist::factory::SkiplistMemTableFactory, - }, - row_iter::{ - dedup::DedupIterator, - merge::{MergeBuilder, MergeConfig}, - FetchedRecordBatchIterator, IterOptions, - }, - space::SpaceId, - sst::{ - factory::{FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ScanOptions}, - meta_data::cache::MetaCacheRef, - }, - table::{ - sst_util, - version::{MemTableState, MemTableVec}, - }, - ScanType, SstReadOptionsBuilder, -}; -use arena::NoopCollector; -use common_types::{ - projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema, time::TimeRange, -}; -use logger::info; -use object_store::{local_file, ObjectStoreRef}; -use runtime::Runtime; -use table_engine::{predicate::Predicate, table::TableId}; - -use crate::{config::MergeMemTableBenchConfig, util}; - -pub struct MergeMemTableBench { - store: ObjectStoreRef, - memtables: MemTableVec, - max_projections: usize, - schema: Schema, - projected_schema: ProjectedSchema, - runtime: Arc, - space_id: SpaceId, - table_id: TableId, - dedup: bool, - sst_read_options_builder: SstReadOptionsBuilder, - num_rows_per_row_group: usize, -} - -impl MergeMemTableBench { - pub fn new(config: MergeMemTableBenchConfig) -> Self { - assert!(!config.sst_file_ids.is_empty()); - - let store = Arc::new(local_file::try_new_with_default(config.store_path).unwrap()) as _; - - let runtime = Arc::new(util::new_runtime(config.runtime_thread_num)); - let space_id = config.space_id; - let table_id = config.table_id; - - let meta_cache: Option = None; - // Use first sst's schema. - let sst_path = sst_util::new_sst_file_path(space_id, table_id, config.sst_file_ids[0]); - let schema = runtime.block_on(util::schema_from_sst(&store, &sst_path, &meta_cache)); - - let projected_schema = ProjectedSchema::no_projection(schema.clone()); - let max_projections = cmp::min(config.max_projections, schema.num_columns()); - - let mut memtables = Vec::with_capacity(config.sst_file_ids.len()); - for id in &config.sst_file_ids { - let sst_path = sst_util::new_sst_file_path(space_id, table_id, *id); - - let memtable_factory = SkiplistMemTableFactory; - let memtable_opts = Options { - collector: Arc::new(NoopCollector {}), - schema: schema.clone(), - arena_block_size: config.arena_block_size.0 as u32, - creation_sequence: crate::INIT_SEQUENCE, - }; - let memtable = memtable_factory.create_memtable(memtable_opts).unwrap(); - - runtime.block_on(util::load_sst_to_memtable( - &store, - &sst_path, - &schema, - &memtable, - runtime.clone(), - )); - - info!( - "MergeMemTableBench memtable loaded, memory used:{}", - memtable.approximate_memory_usage() - ); - - memtables.push(MemTableState { - mem: memtable, - aligned_time_range: TimeRange::min_to_max(), - id: *id, - }); - } - let sst_read_options_builder = - mock_sst_read_options_builder(projected_schema.clone(), runtime.clone()); - - MergeMemTableBench { - store, - memtables, - max_projections, - schema, - projected_schema, - runtime, - space_id, - table_id, - dedup: true, - sst_read_options_builder, - num_rows_per_row_group: 500, - } - } - - pub fn num_benches(&self) -> usize { - // One test reads all columns and `max_projections` tests read with projection. - 1 + self.max_projections - } - - pub fn init_for_bench(&mut self, i: usize, dedup: bool) { - let projected_schema = - util::projected_schema_by_number(&self.schema, i, self.max_projections); - - self.projected_schema = projected_schema; - self.dedup = dedup; - } - - pub fn run_bench(&self) { - let space_id = self.space_id; - let table_id = self.table_id; - let sequence = u64::MAX; - let projected_schema = self.projected_schema.clone(); - let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); - let iter_options = IterOptions { - batch_size: self.num_rows_per_row_group, - }; - - let request_id = RequestId::next_id(); - let store_picker: ObjectStorePickerRef = Arc::new(self.store.clone()); - let mut builder = MergeBuilder::new(MergeConfig { - request_id: request_id.clone(), - metrics_collector: None, - deadline: None, - space_id, - table_id, - sequence, - projected_schema, - predicate: Arc::new(Predicate::empty()), - sst_factory: &sst_factory, - sst_read_options_builder: self.sst_read_options_builder.clone(), - store_picker: &store_picker, - merge_iter_options: iter_options.clone(), - need_dedup: true, - reverse: false, - }); - - builder.mut_memtables().extend_from_slice(&self.memtables); - - self.runtime.block_on(async { - let begin_instant = Instant::now(); - - let mut merge_iter = builder.build().await.unwrap(); - let mut total_rows = 0; - let mut batch_num = 0; - - if self.dedup { - let mut dedup_iter = - DedupIterator::new(request_id.clone(), merge_iter, iter_options); - while let Some(batch) = dedup_iter.next_batch().await.unwrap() { - let num_rows = batch.num_rows(); - total_rows += num_rows; - batch_num += 1; - } - } else { - while let Some(batch) = merge_iter.next_batch().await.unwrap() { - let num_rows = batch.num_rows(); - total_rows += num_rows; - batch_num += 1; - } - } - - info!( - "MergeMemTableBench total rows of sst:{}, total batch num:{}, cost:{:?}", - total_rows, - batch_num, - begin_instant.elapsed(), - ); - }); - } -} - -fn mock_sst_read_options_builder( - _projected_schema: ProjectedSchema, - runtime: Arc, -) -> SstReadOptionsBuilder { - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: 1024, - num_streams_to_prefetch: 0, - }; - - SstReadOptionsBuilder::new( - ScanType::Query, - scan_options, - None, - 500, - Arc::new(Predicate::empty()), - None, - runtime, - ) -} diff --git a/src/benchmarks/src/merge_sst_bench.rs b/src/benchmarks/src/merge_sst_bench.rs deleted file mode 100644 index 9a949438a9..0000000000 --- a/src/benchmarks/src/merge_sst_bench.rs +++ /dev/null @@ -1,256 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Merge SST bench. - -use std::{cmp, sync::Arc, time::Instant}; - -use analytic_engine::{ - row_iter::{ - chain, - chain::ChainConfig, - dedup::DedupIterator, - merge::{MergeBuilder, MergeConfig}, - FetchedRecordBatchIterator, IterOptions, - }, - space::SpaceId, - sst::{ - factory::{FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ScanOptions}, - file::{FileHandle, FilePurgeQueue, Level, Request}, - meta_data::cache::MetaCacheRef, - }, - table::sst_util, - ScanType, SstReadOptionsBuilder, -}; -use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema}; -use logger::info; -use object_store::{local_file, ObjectStoreRef}; -use runtime::Runtime; -use table_engine::{predicate::Predicate, table::TableId}; -use tokio::sync::mpsc::{self, UnboundedReceiver}; - -use crate::{config::MergeSstBenchConfig, util}; - -pub struct MergeSstBench { - store: ObjectStoreRef, - max_projections: usize, - schema: Schema, - projected_schema: Option, - sst_read_options_builder: SstReadOptionsBuilder, - num_rows_per_row_group: usize, - runtime: Arc, - space_id: SpaceId, - table_id: TableId, - file_handles: Vec, - _receiver: UnboundedReceiver, - dedup: bool, - sst_level: Level, -} - -impl MergeSstBench { - pub fn new(config: MergeSstBenchConfig) -> Self { - assert!(!config.sst_file_ids.is_empty()); - - let store = Arc::new(local_file::try_new_with_default(config.store_path).unwrap()) as _; - - let runtime = Arc::new(util::new_runtime(config.runtime_thread_num)); - let space_id = config.space_id; - let table_id = config.table_id; - - let sst_path = sst_util::new_sst_file_path(space_id, table_id, config.sst_file_ids[0]); - let meta_cache: Option = None; - - let schema = runtime.block_on(util::schema_from_sst(&store, &sst_path, &meta_cache)); - - let predicate = config.predicate.into_predicate(); - let _projected_schema = ProjectedSchema::no_projection(schema.clone()); - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: 1024, - num_streams_to_prefetch: 0, - }; - - let scan_type = ScanType::Query; - let sst_read_options_builder = SstReadOptionsBuilder::new( - scan_type, - scan_options, - None, - config.num_rows_per_row_group, - predicate, - meta_cache.clone(), - runtime.clone(), - ); - let max_projections = cmp::min(config.max_projections, schema.num_columns()); - - let (tx, rx) = mpsc::unbounded_channel(); - let purge_queue = FilePurgeQueue::new(space_id, table_id, tx); - - let file_handles = runtime.block_on(util::file_handles_from_ssts( - &store, - space_id, - table_id, - &config.sst_file_ids, - purge_queue, - &meta_cache, - )); - - MergeSstBench { - store, - max_projections, - schema, - sst_read_options_builder, - num_rows_per_row_group: config.num_rows_per_row_group, - projected_schema: None, - runtime, - space_id, - table_id, - file_handles, - _receiver: rx, - dedup: true, - sst_level: config.sst_level.into(), - } - } - - pub fn num_benches(&self) -> usize { - // One test reads all columns and `max_projections` tests read with projection. - 1 + self.max_projections - } - - pub fn init_for_bench(&mut self, i: usize, dedup: bool) { - let projected_schema = - util::projected_schema_by_number(&self.schema, i, self.max_projections); - - self.projected_schema = Some(projected_schema); - self.dedup = dedup; - } - - fn run_dedup_bench(&self) { - let space_id = self.space_id; - let table_id = self.table_id; - let sequence = u64::MAX; - let projected_schema = self.projected_schema.clone().unwrap(); - let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); - let iter_options = IterOptions { - batch_size: self.num_rows_per_row_group, - }; - - let request_id = RequestId::next_id(); - let store_picker: ObjectStorePickerRef = Arc::new(self.store.clone()); - let mut builder = MergeBuilder::new(MergeConfig { - request_id: request_id.clone(), - metrics_collector: None, - deadline: None, - space_id, - table_id, - sequence, - projected_schema, - predicate: Arc::new(Predicate::empty()), - sst_factory: &sst_factory, - sst_read_options_builder: self.sst_read_options_builder.clone(), - store_picker: &store_picker, - merge_iter_options: iter_options.clone(), - need_dedup: true, - reverse: false, - }); - - builder - // TODO: make level configurable - .mut_ssts_of_level(self.sst_level) - .extend_from_slice(&self.file_handles); - - self.runtime.block_on(async { - let begin_instant = Instant::now(); - - let merge_iter = builder.build().await.unwrap(); - let mut dedup_iter = DedupIterator::new(request_id, merge_iter, iter_options); - let mut total_rows = 0; - let mut batch_num = 0; - - while let Some(batch) = dedup_iter.next_batch().await.unwrap() { - let num_rows = batch.num_rows(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nMergeSstBench total rows of sst: {}, total batch num: {}, cost: {:?}", - total_rows, - batch_num, - begin_instant.elapsed(), - ); - }); - } - - fn run_no_dedup_bench(&self) { - let space_id = self.space_id; - let table_id = self.table_id; - let projected_schema = self.projected_schema.clone().unwrap(); - let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); - - let request_id = RequestId::next_id(); - let store_picker: ObjectStorePickerRef = Arc::new(self.store.clone()); - let builder = chain::Builder::new(ChainConfig { - request_id, - metrics_collector: None, - deadline: None, - space_id, - table_id, - projected_schema, - predicate: Arc::new(Predicate::empty()), - sst_factory: &sst_factory, - sst_read_options_builder: self.sst_read_options_builder.clone(), - store_picker: &store_picker, - num_streams_to_prefetch: 0, - }) - .ssts(vec![self.file_handles.clone()]); - - self.runtime.block_on(async { - let begin_instant = Instant::now(); - - let mut chain_iter = builder.build().await.unwrap(); - let mut total_rows = 0; - let mut batch_num = 0; - - while let Some(batch) = chain_iter.next_batch().await.unwrap() { - let num_rows = batch.num_rows(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nMergeSstBench total rows of sst: {}, total batch num: {}, cost: {:?}", - total_rows, - batch_num, - begin_instant.elapsed(), - ); - }); - } - - pub fn run_bench(&self) { - if self.dedup { - self.run_dedup_bench(); - } else { - self.run_no_dedup_bench(); - } - } -} - -impl Drop for MergeSstBench { - fn drop(&mut self) { - self.file_handles.clear(); - } -} diff --git a/src/benchmarks/src/parquet_bench.rs b/src/benchmarks/src/parquet_bench.rs deleted file mode 100644 index 5bec32bac4..0000000000 --- a/src/benchmarks/src/parquet_bench.rs +++ /dev/null @@ -1,169 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Parquet bench. - -use std::{io::Cursor, sync::Arc, time::Instant}; - -use analytic_engine::sst::meta_data::cache::MetaCacheRef; -use common_types::schema::Schema; -use futures::StreamExt; -use logger::info; -use object_store::{local_file, ObjectStoreRef, Path}; -use parquet::arrow::{ - arrow_reader::ParquetRecordBatchReaderBuilder, ParquetRecordBatchStreamBuilder, -}; -use runtime::Runtime; -use table_engine::predicate::PredicateRef; - -use crate::{config::SstBenchConfig, util}; - -pub struct ParquetBench { - store: ObjectStoreRef, - pub sst_file_name: String, - max_projections: usize, - projection: Vec, - _schema: Schema, - _predicate: PredicateRef, - runtime: Arc, - is_async: bool, - batch_size: usize, -} - -impl ParquetBench { - pub fn new(config: SstBenchConfig) -> Self { - let store = Arc::new(local_file::try_new_with_default(config.store_path).unwrap()) as _; - - let runtime = util::new_runtime(config.runtime_thread_num); - - let sst_path = Path::from(config.sst_file_name.clone()); - let meta_cache: Option = None; - let schema = runtime.block_on(util::schema_from_sst(&store, &sst_path, &meta_cache)); - - ParquetBench { - store, - sst_file_name: config.sst_file_name, - max_projections: config.max_projections, - projection: Vec::new(), - _schema: schema, - _predicate: config.predicate.into_predicate(), - runtime: Arc::new(runtime), - is_async: config.is_async, - batch_size: config.num_rows_per_row_group, - } - } - - pub fn num_benches(&self) -> usize { - // One test reads all columns and `max_projections` tests read with projection. - 1 + self.max_projections - } - - pub fn init_for_bench(&mut self, i: usize) { - let projection = if i < self.max_projections { - (0..i + 1).collect() - } else { - Vec::new() - }; - - self.projection = projection; - } - - pub fn run_bench(&self) { - if self.is_async { - return self.run_async_bench(); - } - - self.run_sync_bench() - } - - pub fn run_sync_bench(&self) { - let sst_path = Path::from(self.sst_file_name.clone()); - - self.runtime.block_on(async { - let open_instant = Instant::now(); - let get_result = self.store.get(&sst_path).await.unwrap(); - let bytes = get_result.bytes().await.unwrap(); - let open_cost = open_instant.elapsed(); - - let filter_begin_instant = Instant::now(); - let arrow_reader = ParquetRecordBatchReaderBuilder::try_new(bytes) - .unwrap() - .with_batch_size(self.batch_size) - .build() - .unwrap(); - let filter_cost = filter_begin_instant.elapsed(); - - let iter_begin_instant = Instant::now(); - let mut total_rows = 0; - let mut batch_num = 0; - for record_batch in arrow_reader { - let num_rows = record_batch.unwrap().num_rows(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nParquetBench Sync total rows of sst:{}, total batch num:{}, - open cost:{:?}, filter cost:{:?}, iter cost:{:?}", - total_rows, - batch_num, - open_cost, - filter_cost, - iter_begin_instant.elapsed(), - ); - }); - } - - pub fn run_async_bench(&self) { - let sst_path = Path::from(self.sst_file_name.clone()); - self.runtime.block_on(async { - let open_instant = Instant::now(); - let get_result = self.store.get(&sst_path).await.unwrap(); - let bytes = get_result.bytes().await.unwrap(); - let cursor = Cursor::new(bytes); - let open_cost = open_instant.elapsed(); - - let filter_begin_instant = Instant::now(); - let mut stream = ParquetRecordBatchStreamBuilder::new(cursor) - .await - .unwrap() - .with_batch_size(self.batch_size) - .build() - .unwrap(); - let filter_cost = filter_begin_instant.elapsed(); - - let mut total_rows = 0; - let mut batch_num = 0; - let iter_begin_instant = Instant::now(); - while let Some(record_batch) = stream.next().await { - let num_rows = record_batch.unwrap().num_rows(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nParquetBench Async total rows of sst:{}, total batch num:{}, - open cost:{:?}, filter cost:{:?}, iter cost:{:?}", - total_rows, - batch_num, - open_cost, - filter_cost, - iter_begin_instant.elapsed(), - ); - }); - } -} diff --git a/src/benchmarks/src/replay_bench.rs b/src/benchmarks/src/replay_bench.rs deleted file mode 100644 index bf2c9a8810..0000000000 --- a/src/benchmarks/src/replay_bench.rs +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Replay bench. - -use std::sync::Arc; - -use analytic_engine::RecoverMode; -use runtime::Runtime; -use util::{OpenTablesMethod, RocksDBEngineBuildContext, TestContext, TestEnv}; -use wal::rocksdb_impl::manager::RocksDBWalsOpener; - -use crate::{config::ReplayConfig, table::FixedSchemaTable, util}; - -pub struct ReplayBench { - runtime: Arc, - test_ctx: TestContext, - table: FixedSchemaTable, - batch_size: usize, -} - -impl ReplayBench { - pub fn new(config: ReplayConfig) -> Self { - let runtime = util::new_runtime(1); - let engine_context = RocksDBEngineBuildContext::new( - RecoverMode::TableBased, - OpenTablesMethod::WithOpenShard, - ); - let env: TestEnv = TestEnv::builder().build(); - - let (test_ctx, fixed_schema_table) = env.block_on(async { - let mut test_ctx = env.new_context(&engine_context); - test_ctx.open().await; - - let fixed_schema_table = test_ctx - .create_fixed_schema_table("test_replay_table1") - .await; - let _ = test_ctx - .create_fixed_schema_table("test_replay_table2") - .await; - let _ = test_ctx - .create_fixed_schema_table("test_replay_table3") - .await; - - (test_ctx, fixed_schema_table) - }); - - ReplayBench { - runtime: Arc::new(runtime), - test_ctx, - table: fixed_schema_table, - batch_size: config.batch_size, - } - } - - pub fn run_bench(&mut self) { - self.runtime.block_on(async { - self.table.prepare_write_requests(self.batch_size); - let rows = self.table.row_tuples(); - - // Write data to table. - let mut table_names = Vec::new(); - for (table_name, _) in self.test_ctx.name_to_tables().iter() { - let row_group = self.table.rows_to_row_group(&rows); - self.test_ctx - .write_to_table(table_name.as_str(), row_group) - .await; - table_names.push(table_name.clone()); - } - - // Reopen db. - self.test_ctx - .reopen_with_tables( - table_names - .iter() - .map(|s| s.as_str()) - .collect::>() - .as_slice(), - ) - .await; - }); - } -} diff --git a/src/benchmarks/src/scan_memtable_bench.rs b/src/benchmarks/src/scan_memtable_bench.rs deleted file mode 100644 index edd1ad3284..0000000000 --- a/src/benchmarks/src/scan_memtable_bench.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Scan memtable bench. - -use std::{collections::Bound, sync::Arc}; - -use analytic_engine::{ - memtable::{ - factory::{Factory as MemTableFactory, Options}, - skiplist::factory::SkiplistMemTableFactory, - MemTableRef, ScanContext, ScanRequest, - }, - sst::meta_data::cache::MetaCacheRef, -}; -use arena::NoopCollector; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - time::TimeRange, -}; -use logger::info; -use object_store::{local_file, Path}; - -use crate::{config::ScanMemTableBenchConfig, util}; - -pub struct ScanMemTableBench { - memtable: MemTableRef, - projected_schema: ProjectedSchema, - max_projections: usize, -} - -impl ScanMemTableBench { - pub fn new(config: ScanMemTableBenchConfig) -> Self { - let store = Arc::new(local_file::try_new_with_default(config.store_path).unwrap()) as _; - - let runtime = Arc::new(util::new_runtime(config.runtime_thread_num)); - let meta_cache: Option = None; - let sst_path = Path::from(config.sst_file_name); - let schema = runtime.block_on(util::schema_from_sst(&store, &sst_path, &meta_cache)); - - let projected_schema = ProjectedSchema::no_projection(schema.clone()); - - let memtable_factory = SkiplistMemTableFactory; - let memtable_opts = Options { - collector: Arc::new(NoopCollector {}), - schema: schema.clone(), - arena_block_size: config.arena_block_size.0 as u32, - creation_sequence: crate::INIT_SEQUENCE, - }; - let memtable = memtable_factory.create_memtable(memtable_opts).unwrap(); - - runtime.block_on(util::load_sst_to_memtable( - &store, - &sst_path, - &schema, - &memtable, - runtime.clone(), - )); - - info!( - "\nScanMemTableBench memtable loaded, memory used: {}", - memtable.approximate_memory_usage() - ); - - Self { - memtable, - projected_schema, - max_projections: config.max_projections, - } - } - - pub fn num_benches(&self) -> usize { - // One test reads all columns and `max_projections` tests read with projection. - 1 + self.max_projections - } - - pub fn init_for_bench(&mut self, i: usize) { - let projected_schema = - util::projected_schema_by_number(self.memtable.schema(), i, self.max_projections); - - self.projected_schema = projected_schema; - } - - pub fn run_bench(&self) { - let scan_ctx = ScanContext::default(); - let fetched_schema = self.projected_schema.to_record_schema(); - let table_schema = self.projected_schema.table_schema(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); - let scan_req = ScanRequest { - start_user_key: Bound::Unbounded, - end_user_key: Bound::Unbounded, - sequence: common_types::MAX_SEQUENCE_NUMBER, - need_dedup: true, - reverse: false, - metrics_collector: None, - row_projector_builder, - time_range: TimeRange::min_to_max(), - }; - - let iter = self.memtable.scan(scan_ctx, scan_req).unwrap(); - - let mut total_rows = 0; - let mut batch_num = 0; - for batch in iter { - let num_rows = batch.unwrap().num_rows(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nScanMemTableBench total rows of memtable: {}, total batch num: {}", - total_rows, batch_num, - ); - } -} diff --git a/src/benchmarks/src/sst_bench.rs b/src/benchmarks/src/sst_bench.rs deleted file mode 100644 index 29afdd7f69..0000000000 --- a/src/benchmarks/src/sst_bench.rs +++ /dev/null @@ -1,148 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! SST bench. - -use std::{cmp, sync::Arc, time::Instant}; - -use analytic_engine::{ - sst::{ - factory::{Factory, FactoryImpl, ObjectStorePickerRef, ScanOptions, SstReadHint}, - meta_data::cache::{MetaCache, MetaCacheRef}, - }, - ScanType, SstReadOptionsBuilder, -}; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - schema::Schema, -}; -use logger::info; -use object_store::{local_file, ObjectStoreRef, Path}; -use runtime::Runtime; - -use crate::{config::SstBenchConfig, util}; - -pub struct SstBench { - store: ObjectStoreRef, - pub sst_file_name: String, - max_projections: usize, - schema: Schema, - projected_schema: Option, - sst_read_options_builder: SstReadOptionsBuilder, - runtime: Arc, -} - -impl SstBench { - pub fn new(config: SstBenchConfig) -> Self { - let runtime = Arc::new(util::new_runtime(config.runtime_thread_num)); - - let store = Arc::new(local_file::try_new_with_default(config.store_path).unwrap()) as _; - let sst_path = Path::from(config.sst_file_name.clone()); - let meta_cache: Option = config - .sst_meta_cache_cap - .map(|cap| Arc::new(MetaCache::new(cap))); - let schema = runtime.block_on(util::schema_from_sst(&store, &sst_path, &meta_cache)); - let predicate = config.predicate.into_predicate(); - let projected_schema = ProjectedSchema::no_projection(schema.clone()); - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: 1024, - num_streams_to_prefetch: 0, - }; - let sst_read_options_builder = SstReadOptionsBuilder::new( - ScanType::Query, - scan_options, - None, - config.num_rows_per_row_group, - predicate, - meta_cache, - runtime.clone(), - ); - let max_projections = cmp::min(config.max_projections, schema.num_columns()); - - SstBench { - store, - sst_file_name: config.sst_file_name, - max_projections, - schema, - projected_schema: Some(projected_schema), - sst_read_options_builder: sst_read_options_builder.clone(), - runtime, - } - } - - pub fn num_benches(&self) -> usize { - // One test reads all columns and `max_projections` tests read with projection. - 1 + self.max_projections - } - - pub fn init_for_bench(&mut self, i: usize) { - let projected_schema = - util::projected_schema_by_number(&self.schema, i, self.max_projections); - - self.projected_schema = Some(projected_schema); - } - - pub fn run_bench(&self) { - let sst_path = Path::from(self.sst_file_name.clone()); - - let sst_factory = FactoryImpl; - let store_picker: ObjectStorePickerRef = Arc::new(self.store.clone()); - - let fetched_schema = self.projected_schema.as_ref().unwrap().to_record_schema(); - let table_schema = self - .projected_schema - .as_ref() - .unwrap() - .table_schema() - .clone(); - let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); - let sst_read_options = self - .sst_read_options_builder - .clone() - .build(row_projector_builder); - self.runtime.block_on(async { - let mut sst_reader = sst_factory - .create_reader( - &sst_path, - &sst_read_options, - SstReadHint::default(), - &store_picker, - None, - ) - .await - .unwrap(); - let begin_instant = Instant::now(); - let mut sst_stream = sst_reader.read().await.unwrap(); - - let mut total_rows = 0; - let mut batch_num = 0; - while let Some(batch) = sst_stream.fetch_next().await { - let num_rows = batch.unwrap().num_rows(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nSstBench total rows of sst: {}, total batch num: {}, cost: {:?}", - total_rows, - batch_num, - begin_instant.elapsed(), - ); - }); - } -} diff --git a/src/benchmarks/src/sst_tools.rs b/src/benchmarks/src/sst_tools.rs deleted file mode 100644 index 4e27492943..0000000000 --- a/src/benchmarks/src/sst_tools.rs +++ /dev/null @@ -1,324 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Tools to generate SST. - -use std::sync::Arc; - -use analytic_engine::{ - prefetchable_stream::PrefetchableStreamExt, - row_iter::{ - self, - dedup::DedupIterator, - merge::{MergeBuilder, MergeConfig}, - IterOptions, - }, - space::SpaceId, - sst::{ - factory::{ - Factory, FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ReadFrequency, - ScanOptions, SstReadHint, SstReadOptions, SstWriteOptions, - }, - file::{FilePurgeQueue, Level}, - manager::FileId, - meta_data::SstMetaReader, - writer::{MetaData, RecordBatchStream}, - }, - table::sst_util, - table_options::{Compression, StorageFormatHint}, - ScanType, SstReadOptionsBuilder, -}; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - request_id::RequestId, -}; -use generic_error::BoxError; -use logger::info; -use object_store::{local_file, ObjectStoreRef, Path}; -use runtime::Runtime; -use serde::Deserialize; -use table_engine::{predicate::Predicate, table::TableId}; -use tokio::sync::mpsc; - -use crate::{config::BenchPredicate, util}; - -#[derive(Debug)] -struct SstConfig { - sst_meta: MetaData, - store_path: String, - sst_file_name: String, - num_rows_per_row_group: usize, - compression: Compression, -} - -async fn create_sst_from_stream(config: SstConfig, record_batch_stream: RecordBatchStream) { - let sst_factory = FactoryImpl; - let sst_write_options = SstWriteOptions { - storage_format_hint: StorageFormatHint::Auto, - num_rows_per_row_group: config.num_rows_per_row_group, - compression: config.compression, - max_buffer_size: 1024 * 1024 * 10, - column_stats: Default::default(), - }; - - info!( - "create sst from stream, config:{:?}, sst_write_options:{:?}", - config, sst_write_options - ); - - let store: ObjectStoreRef = - Arc::new(local_file::try_new_with_default(config.store_path).unwrap()); - let store_picker: ObjectStorePickerRef = Arc::new(store); - let sst_file_path = Path::from(config.sst_file_name); - - let mut writer = sst_factory - .create_writer( - &sst_write_options, - &sst_file_path, - &store_picker, - Level::MAX, - ) - .await - .unwrap(); - writer - .write(RequestId::next_id(), &config.sst_meta, record_batch_stream) - .await - .unwrap(); -} - -#[derive(Debug, Deserialize)] -pub struct RebuildSstConfig { - store_path: String, - input_file_name: String, - predicate: BenchPredicate, - - // Output sst config: - output_file_name: String, - num_rows_per_row_group: usize, - compression: Compression, -} - -pub async fn rebuild_sst(config: RebuildSstConfig, runtime: Arc) { - info!("Start rebuild sst, config:{:?}", config); - - let store = Arc::new(local_file::try_new_with_default(config.store_path.clone()).unwrap()) as _; - let input_path = Path::from(config.input_file_name); - - let parquet_metadata = util::parquet_metadata(&store, &input_path).await; - let sst_meta = util::meta_from_sst(&parquet_metadata, &store, &None).await; - - let projected_schema = ProjectedSchema::no_projection(sst_meta.schema.clone()); - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: 1024, - num_streams_to_prefetch: 2, - }; - - let fetched_schema = projected_schema.to_record_schema(); - let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); - let sst_read_options = SstReadOptions { - maybe_table_level_metrics: None, - frequency: ReadFrequency::Once, - num_rows_per_row_group: config.num_rows_per_row_group, - predicate: config.predicate.into_predicate(), - meta_cache: None, - scan_options, - runtime, - row_projector_builder, - }; - - let record_batch_stream = - sst_to_record_batch_stream(&sst_read_options, &input_path, &store).await; - - let output_sst_config = SstConfig { - sst_meta, - store_path: config.store_path, - sst_file_name: config.output_file_name, - num_rows_per_row_group: config.num_rows_per_row_group, - compression: config.compression, - }; - - create_sst_from_stream(output_sst_config, record_batch_stream).await; - - info!("Start rebuild sst done"); -} - -async fn sst_to_record_batch_stream( - sst_read_options: &SstReadOptions, - input_path: &Path, - store: &ObjectStoreRef, -) -> RecordBatchStream { - let sst_factory = FactoryImpl; - let store_picker: ObjectStorePickerRef = Arc::new(store.clone()); - let mut sst_reader = sst_factory - .create_reader( - input_path, - sst_read_options, - SstReadHint::default(), - &store_picker, - None, - ) - .await - .unwrap(); - - sst_reader - .read() - .await - .unwrap() - .map(|res| res.box_err()) - .into_boxed_stream() -} - -#[derive(Debug, Deserialize)] -pub struct MergeSstConfig { - store_path: String, - space_id: SpaceId, - table_id: TableId, - sst_file_ids: Vec, - dedup: bool, - predicate: BenchPredicate, - - // Output sst config: - output_store_path: String, - output_file_name: String, - num_rows_per_row_group: usize, - compression: Compression, -} - -pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { - if config.sst_file_ids.is_empty() { - info!("No input files to merge"); - return; - } - - info!("Merge sst begin, config:{:?}", config); - - let space_id = config.space_id; - let table_id = config.table_id; - let store = Arc::new(local_file::try_new_with_default(config.store_path).unwrap()) as _; - - let (tx, _rx) = mpsc::unbounded_channel(); - let purge_queue = FilePurgeQueue::new(space_id, table_id, tx); - - let file_handles = util::file_handles_from_ssts( - &store, - space_id, - table_id, - &config.sst_file_ids, - purge_queue, - &None, - ) - .await; - let max_sequence = file_handles - .iter() - .map(|file| file.max_sequence()) - .max() - .unwrap(); - - let first_sst_path = sst_util::new_sst_file_path(space_id, table_id, config.sst_file_ids[0]); - let schema = util::schema_from_sst(&store, &first_sst_path, &None).await; - let iter_options = IterOptions { - batch_size: config.num_rows_per_row_group, - }; - - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: 1024, - num_streams_to_prefetch: 0, - }; - - let request_id = RequestId::next_id(); - let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); - let store_picker: ObjectStorePickerRef = Arc::new(store); - let projected_schema = ProjectedSchema::no_projection(schema.clone()); - let sst_read_options_builder = SstReadOptionsBuilder::new( - ScanType::Query, - scan_options, - None, - config.num_rows_per_row_group, - config.predicate.into_predicate(), - None, - runtime.clone(), - ); - let fetched_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); - let fetched_schema = fetched_schema.into_record_schema(); - let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, Some(primary_key_indexes)); - - let iter = { - let space_id = config.space_id; - let table_id = config.table_id; - let sequence = max_sequence + 1; - let request_id = request_id.clone(); - - let mut builder = MergeBuilder::new(MergeConfig { - request_id, - metrics_collector: None, - deadline: None, - space_id, - table_id, - sequence, - projected_schema, - predicate: Arc::new(Predicate::empty()), - sst_factory: &sst_factory, - store_picker: &store_picker, - merge_iter_options: iter_options.clone(), - need_dedup: true, - reverse: false, - sst_read_options_builder: sst_read_options_builder.clone(), - }); - builder - .mut_ssts_of_level(Level::MIN) - .extend_from_slice(&file_handles); - - builder.build().await.unwrap() - }; - - let record_batch_stream = if config.dedup { - let iter = DedupIterator::new(request_id.clone(), iter, iter_options); - row_iter::record_batch_with_key_iter_to_stream(iter) - } else { - row_iter::record_batch_with_key_iter_to_stream(iter) - }; - - let sst_read_options = sst_read_options_builder.build(row_projector_builder); - let sst_meta = { - let meta_reader = SstMetaReader { - space_id, - table_id, - factory: sst_factory, - read_opts: sst_read_options, - store_picker: store_picker.clone(), - }; - let sst_metas = meta_reader.fetch_metas(&file_handles).await.unwrap(); - MetaData::merge(sst_metas.into_iter().map(MetaData::from), schema) - }; - let output_sst_config = SstConfig { - sst_meta, - store_path: config.output_store_path, - sst_file_name: config.output_file_name, - num_rows_per_row_group: config.num_rows_per_row_group, - compression: config.compression, - }; - - create_sst_from_stream(output_sst_config, record_batch_stream).await; - - info!("Merge sst done"); -} diff --git a/src/benchmarks/src/table.rs b/src/benchmarks/src/table.rs deleted file mode 100644 index 31df4234f9..0000000000 --- a/src/benchmarks/src/table.rs +++ /dev/null @@ -1,246 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utils to create table. - -use std::collections::HashMap; - -use common_types::{ - column_schema, - datum::{Datum, DatumKind}, - row::{Row, RowGroup}, - schema::{self, Schema}, - table::DEFAULT_SHARD_ID, - time::Timestamp, -}; -use table_engine::{ - self, - engine::{CreateTableParams, CreateTableRequest, TableState}, - table::{SchemaId, TableId, TableSeq}, -}; -use time_ext::ReadableDuration; - -use crate::util::start_ms; - -pub fn new_row_6(data: (C0, C1, C2, C3, C4, C5)) -> Row -where - C0: Into, - C1: Into, - C2: Into, - C3: Into, - C4: Into, - C5: Into, -{ - let cols = vec![ - data.0.into(), - data.1.into(), - data.2.into(), - data.3.into(), - data.4.into(), - data.5.into(), - ]; - - Row::from_datums(cols) -} - -pub type WriteRequestTuple = (String, Timestamp, String, f64, f64, String); -pub type RowTuple<'a> = (&'a str, Timestamp, &'a str, f64, f64, &'a str); - -pub fn new_table_id(schema_id: u16, table_seq: u32) -> TableId { - TableId::with_seq(SchemaId::from(schema_id), TableSeq::from(table_seq)).unwrap() -} - -pub struct RowTupleGenerator {} - -pub struct FixedSchemaTable { - create_request: CreateTableRequest, - write_requests: Vec, -} - -impl FixedSchemaTable { - pub fn builder() -> Builder { - Builder::default() - } - - fn default_schema() -> Schema { - Self::default_schema_builder().build().unwrap() - } - - pub fn default_schema_builder() -> schema::Builder { - create_schema_builder( - // Key columns - &[("key", DatumKind::String), ("ts", DatumKind::Timestamp)], - // Normal columns - &[ - ("string_tag", DatumKind::String), - ("double_field1", DatumKind::Double), - ("double_field2", DatumKind::Double), - ("string_field2", DatumKind::String), - ], - ) - } - - #[inline] - pub fn table_id(&self) -> TableId { - self.create_request.table_id - } - - #[inline] - pub fn create_request(&self) -> &CreateTableRequest { - &self.create_request - } - - fn new_row(data: RowTuple) -> Row { - new_row_6(data) - } - - pub fn rows_to_row_group(&self, data: &[RowTuple]) -> RowGroup { - let rows = data - .iter() - .copied() - .map(FixedSchemaTable::new_row) - .collect(); - - self.new_row_group(rows) - } - - fn new_row_group(&self, rows: Vec) -> RowGroup { - RowGroup::try_new(self.create_request.params.table_schema.clone(), rows).unwrap() - } - - pub fn prepare_write_requests(&mut self, batch_size: usize) { - let start_ms = start_ms(); - self.write_requests.clear(); - (0..batch_size).for_each(|idx| { - self.write_requests.push(( - format!("key_{idx}"), - Timestamp::new(start_ms + idx as i64), - format!("tag1_{idx}"), - 11.0, - 110.0, - format!("tag2_{idx}"), - )) - }); - } - - pub fn row_tuples(&self) -> Vec { - self.write_requests - .iter() - .map(|x| (x.0.as_str(), x.1, x.2.as_str(), x.3, x.4, x.5.as_str())) - .collect() - } -} - -#[must_use] -pub struct Builder { - create_request: CreateTableRequest, -} - -impl Builder { - pub fn schema_id(mut self, schema_id: SchemaId) -> Self { - self.create_request.schema_id = schema_id; - self - } - - pub fn table_name(mut self, table_name: String) -> Self { - self.create_request.params.table_name = table_name; - self - } - - pub fn table_id(mut self, table_id: TableId) -> Self { - self.create_request.table_id = table_id; - self - } - - pub fn enable_ttl(mut self, enable_ttl: bool) -> Self { - self.create_request.params.table_options.insert( - common_types::OPTION_KEY_ENABLE_TTL.to_string(), - enable_ttl.to_string(), - ); - self - } - - pub fn ttl(mut self, duration: ReadableDuration) -> Self { - self.create_request - .params - .table_options - .insert(common_types::TTL.to_string(), duration.to_string()); - self - } - - pub fn build_fixed(self) -> FixedSchemaTable { - FixedSchemaTable { - create_request: self.create_request, - write_requests: Vec::new(), - } - } -} - -impl Default for Builder { - fn default() -> Self { - let params = CreateTableParams { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - table_name: "test_table".to_string(), - table_schema: FixedSchemaTable::default_schema(), - partition_info: None, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - table_options: HashMap::new(), - }; - - Self { - create_request: CreateTableRequest { - params, - schema_id: SchemaId::from_u32(2), - table_id: new_table_id(2, 1), - state: TableState::Stable, - shard_id: DEFAULT_SHARD_ID, - }, - } - } -} - -// Format of input slice: &[ ( column name, column type ) ] -pub fn create_schema_builder( - key_tuples: &[(&str, DatumKind)], - normal_tuples: &[(&str, DatumKind)], -) -> schema::Builder { - assert!(!key_tuples.is_empty()); - - let mut schema_builder = schema::Builder::with_capacity(key_tuples.len() + normal_tuples.len()) - .auto_increment_column_id(true) - .primary_key_indexes((0..key_tuples.len()).collect()); - - for tuple in key_tuples { - // Key column is not nullable. - let column_schema = column_schema::Builder::new(tuple.0.to_string(), tuple.1) - .is_nullable(false) - .build() - .expect("Should succeed to build key column schema"); - schema_builder = schema_builder.add_key_column(column_schema).unwrap(); - } - - for tuple in normal_tuples { - let column_schema = column_schema::Builder::new(tuple.0.to_string(), tuple.1) - .is_nullable(true) - .build() - .expect("Should succeed to build normal column schema"); - schema_builder = schema_builder.add_normal_column(column_schema).unwrap(); - } - - schema_builder -} diff --git a/src/benchmarks/src/util.rs b/src/benchmarks/src/util.rs deleted file mode 100644 index 97c8457be8..0000000000 --- a/src/benchmarks/src/util.rs +++ /dev/null @@ -1,670 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utilities. - -use std::{collections::HashMap, future::Future, sync::Arc}; - -use analytic_engine::{ - memtable::{key::KeySequence, MemTableRef, PutContext}, - setup::{EngineBuilder, TableEngineContext}, - space::SpaceId, - sst::{ - factory::{ - Factory, FactoryImpl, ObjectStorePickerRef, ReadFrequency, ScanOptions, SstReadHint, - SstReadOptions, - }, - file::{FileHandle, FileMeta, FilePurgeQueue}, - manager::FileId, - meta_data::cache::{self, MetaCacheRef}, - writer::MetaData, - }, - table::sst_util, - table_options::StorageFormat, - Config, RecoverMode, -}; -use bytes_ext::{BufMut, SafeBufMut}; -use common_types::{ - projected_schema::{ProjectedSchema, RowProjectorBuilder}, - record_batch::RecordBatch, - row::RowGroup, - schema::{IndexInWriterSchema, Schema}, - table::{ShardId, DEFAULT_SHARD_ID}, - time::Timestamp, -}; -use futures::stream::StreamExt; -use macros::define_result; -use object_store::{ - config::{LocalOptions, ObjectStoreOptions, StorageOptions}, - ObjectStoreRef, Path, -}; -use parquet::file::footer; -use runtime::{PriorityRuntime, Runtime}; -use size_ext::ReadableSize; -use snafu::{ResultExt, Snafu}; -use table_engine::{ - engine::{CreateTableRequest, EngineRuntimes, OpenShardRequest, TableDef, TableEngineRef}, - predicate::Predicate, - table::{ReadRequest, SchemaId, TableId, TableRef, WriteRequest}, -}; -use tempfile::TempDir; -use time_ext::ReadableDuration; -use wal::{ - config::{Config as WalConfig, StorageConfig}, - log_batch::Payload, - manager::{OpenedWals, WalRuntimes, WalsOpener}, - rocksdb_impl::{config::RocksDBStorageConfig, manager::RocksDBWalsOpener}, -}; - -use crate::{table, table::FixedSchemaTable}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to writer header, err:{}.", source))] - WriteHeader { source: bytes_ext::Error }, - - #[snafu(display("Failed to writer body, err:{}.", source))] - WriteBody { source: bytes_ext::Error }, -} - -define_result!(Error); - -pub fn new_runtime(thread_num: usize) -> Runtime { - runtime::Builder::default() - .thread_name("engine_bench") - .worker_threads(thread_num) - .enable_all() - .build() - .unwrap() -} - -pub async fn parquet_metadata( - store: &ObjectStoreRef, - sst_path: &Path, -) -> parquet_ext::ParquetMetaData { - let get_result = store.get(sst_path).await.unwrap(); - let chunk_reader = get_result.bytes().await.unwrap(); - footer::parse_metadata(&chunk_reader).unwrap() -} - -pub async fn meta_from_sst( - metadata: &parquet_ext::ParquetMetaData, - store: &ObjectStoreRef, - _meta_cache: &Option, -) -> MetaData { - let md = cache::MetaData::try_new(metadata, false, store.clone()) - .await - .unwrap(); - - MetaData::from(md.custom().clone()) -} - -pub async fn schema_from_sst( - store: &ObjectStoreRef, - sst_path: &Path, - meta_cache: &Option, -) -> Schema { - let parquet_metadata = parquet_metadata(store, sst_path).await; - let sst_meta = meta_from_sst(&parquet_metadata, store, meta_cache).await; - sst_meta.schema -} - -pub fn projected_schema_by_number( - schema: &Schema, - num_columns: usize, - max_projections: usize, -) -> ProjectedSchema { - if num_columns < max_projections { - let projection = (0..num_columns + 1).collect(); - - ProjectedSchema::new(schema.clone(), Some(projection)).unwrap() - } else { - ProjectedSchema::no_projection(schema.clone()) - } -} - -pub async fn load_sst_to_memtable( - store: &ObjectStoreRef, - sst_path: &Path, - schema: &Schema, - memtable: &MemTableRef, - runtime: Arc, -) { - let scan_options = ScanOptions { - background_read_parallelism: 1, - max_record_batches_in_flight: 1024, - num_streams_to_prefetch: 0, - }; - let projected_schema = ProjectedSchema::no_projection(schema.clone()); - - let fetched_schema = projected_schema.to_record_schema(); - let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); - let sst_read_options = SstReadOptions { - maybe_table_level_metrics: None, - frequency: ReadFrequency::Frequent, - num_rows_per_row_group: 8192, - predicate: Arc::new(Predicate::empty()), - meta_cache: None, - scan_options, - runtime, - row_projector_builder, - }; - let sst_factory = FactoryImpl; - let store_picker: ObjectStorePickerRef = Arc::new(store.clone()); - let mut sst_reader = sst_factory - .create_reader( - sst_path, - &sst_read_options, - SstReadHint::default(), - &store_picker, - None, - ) - .await - .unwrap(); - - let mut sst_stream = sst_reader.read().await.unwrap(); - let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); - let mut ctx = PutContext::new(index_in_writer); - - let mut sequence = crate::INIT_SEQUENCE; - - while let Some(batch) = sst_stream.fetch_next().await { - let batch = batch.unwrap(); - - for i in 0..batch.num_rows() { - let row = batch.clone_row_at(i); - - let key_seq = KeySequence::new(sequence, i as u32); - - memtable.put(&mut ctx, key_seq, &row, schema).unwrap(); - - sequence += 1; - } - } -} - -pub async fn file_handles_from_ssts( - store: &ObjectStoreRef, - space_id: SpaceId, - table_id: TableId, - sst_file_ids: &[FileId], - purge_queue: FilePurgeQueue, - meta_cache: &Option, -) -> Vec { - let mut file_handles = Vec::with_capacity(sst_file_ids.len()); - - for file_id in sst_file_ids.iter() { - let path = sst_util::new_sst_file_path(space_id, table_id, *file_id); - let parquet_metadata = parquet_metadata(store, &path).await; - let sst_meta = meta_from_sst(&parquet_metadata, store, meta_cache).await; - - let file_meta = FileMeta { - id: *file_id, - size: store.head(&path).await.unwrap().size as u64, - row_num: parquet_metadata.file_metadata().num_rows() as u64, - time_range: sst_meta.time_range, - max_seq: sst_meta.max_sequence, - storage_format: StorageFormat::Columnar, - associated_files: Vec::new(), - }; - - let handle = FileHandle::new(file_meta, purge_queue.clone()); - - file_handles.push(handle); - } - - file_handles -} - -/// Header size in bytes -const HEADER_SIZE: usize = 1; - -/// Wal entry header -#[derive(Clone, Copy)] -enum Header { - Write = 1, -} - -impl Header { - pub fn to_u8(self) -> u8 { - self as u8 - } -} - -fn write_header(header: Header, buf: &mut B) -> Result<()> { - buf.try_put_u8(header.to_u8()).context(WriteHeader) -} - -#[derive(Debug)] -pub struct WritePayload<'a>(pub &'a [u8]); - -impl<'a> Payload for WritePayload<'a> { - type Error = Error; - - fn encode_size(&self) -> usize { - let body_size = self.0.len(); - HEADER_SIZE + body_size - } - - fn encode_to(&self, buf: &mut B) -> Result<()> { - write_header(Header::Write, buf)?; - buf.try_put(self.0).context(WriteBody) - } -} - -impl<'a> From<&'a Vec> for WritePayload<'a> { - fn from(data: &'a Vec) -> Self { - Self(data) - } -} - -const DAY_MS: i64 = 24 * 60 * 60 * 1000; -/// 3 days ago. -pub fn start_ms() -> i64 { - Timestamp::now().as_i64() - 3 * DAY_MS -} -#[derive(Clone, Copy, Debug)] -pub enum OpenTablesMethod { - WithOpenTable, - WithOpenShard, -} - -pub struct TestEnv { - _dir: TempDir, - pub config: Config, - pub runtimes: Arc, -} - -pub struct Builder { - num_workers: usize, -} - -impl Builder { - pub fn build(self) -> TestEnv { - let dir = tempfile::tempdir().unwrap(); - - let config = Config { - storage: StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }, - wal: WalConfig { - storage: StorageConfig::RocksDB(Box::new(RocksDBStorageConfig { - data_dir: dir.path().to_str().unwrap().to_string(), - ..Default::default() - })), - disable_data: false, - }, - ..Default::default() - }; - - let runtime = Arc::new( - runtime::Builder::default() - .worker_threads(self.num_workers) - .enable_all() - .build() - .unwrap(), - ); - - TestEnv { - _dir: dir, - config, - runtimes: Arc::new(EngineRuntimes { - read_runtime: PriorityRuntime::new(runtime.clone(), runtime.clone()), - write_runtime: runtime.clone(), - meta_runtime: runtime.clone(), - compact_runtime: runtime.clone(), - default_runtime: runtime.clone(), - io_runtime: runtime, - }), - } - } -} - -impl Default for Builder { - fn default() -> Self { - Self { num_workers: 2 } - } -} - -pub trait EngineBuildContext: Clone + Default { - type WalsOpener: WalsOpener; - - fn wals_opener(&self) -> Self::WalsOpener; - fn config(&self) -> Config; - fn open_method(&self) -> OpenTablesMethod; -} - -pub struct RocksDBEngineBuildContext { - config: Config, - open_method: OpenTablesMethod, -} - -impl RocksDBEngineBuildContext { - pub fn new(mode: RecoverMode, open_method: OpenTablesMethod) -> Self { - let mut context = Self::default(); - context.config.recover_mode = mode; - context.open_method = open_method; - - context - } -} - -impl Default for RocksDBEngineBuildContext { - fn default() -> Self { - let dir = tempfile::tempdir().unwrap(); - - let config = Config { - storage: StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }, - wal: WalConfig { - storage: StorageConfig::RocksDB(Box::new(RocksDBStorageConfig { - data_dir: dir.path().to_str().unwrap().to_string(), - ..Default::default() - })), - disable_data: false, - }, - ..Default::default() - }; - - Self { - config, - open_method: OpenTablesMethod::WithOpenTable, - } - } -} - -impl Clone for RocksDBEngineBuildContext { - fn clone(&self) -> Self { - let mut config = self.config.clone(); - - let dir = tempfile::tempdir().unwrap(); - let storage = StorageOptions { - mem_cache_capacity: ReadableSize::mb(0), - mem_cache_partition_bits: 0, - disk_cache_dir: "".to_string(), - disk_cache_capacity: ReadableSize::mb(0), - disk_cache_page_size: ReadableSize::mb(0), - disk_cache_partition_bits: 0, - object_store: ObjectStoreOptions::Local(LocalOptions { - data_dir: dir.path().to_str().unwrap().to_string(), - max_retries: 3, - timeout: Default::default(), - }), - }; - - config.storage = storage; - config.wal = WalConfig { - storage: StorageConfig::RocksDB(Box::new(RocksDBStorageConfig { - data_dir: dir.path().to_str().unwrap().to_string(), - ..Default::default() - })), - disable_data: false, - }; - Self { - config, - open_method: self.open_method, - } - } -} - -impl EngineBuildContext for RocksDBEngineBuildContext { - type WalsOpener = RocksDBWalsOpener; - - fn wals_opener(&self) -> Self::WalsOpener { - RocksDBWalsOpener - } - - fn config(&self) -> Config { - self.config.clone() - } - - fn open_method(&self) -> OpenTablesMethod { - self.open_method - } -} - -pub struct TestContext { - config: Config, - wals_opener: T, - runtimes: Arc, - engine: Option, - opened_wals: Option, - schema_id: SchemaId, - last_table_seq: u32, - - name_to_tables: HashMap, -} - -impl TestEnv { - pub fn builder() -> Builder { - Builder::default() - } - - pub fn new_context( - &self, - build_context: &T, - ) -> TestContext { - let config = build_context.config(); - let wals_opener = build_context.wals_opener(); - - TestContext { - config, - wals_opener, - runtimes: self.runtimes.clone(), - engine: None, - opened_wals: None, - schema_id: SchemaId::from_u32(100), - last_table_seq: 1, - name_to_tables: HashMap::new(), - } - } - - pub fn block_on(&self, future: F) -> F::Output { - self.runtimes.default_runtime.block_on(future) - } -} - -impl TestContext { - pub async fn open(&mut self) { - let opened_wals = if let Some(opened_wals) = self.opened_wals.take() { - opened_wals - } else { - self.wals_opener - .open_wals( - &self.config.wal, - WalRuntimes { - read_runtime: self.runtimes.read_runtime.high().clone(), - write_runtime: self.runtimes.write_runtime.clone(), - default_runtime: self.runtimes.default_runtime.clone(), - }, - ) - .await - .unwrap() - }; - - let engine_builder = EngineBuilder { - config: &self.config, - engine_runtimes: self.runtimes.clone(), - opened_wals: opened_wals.clone(), - meta_client: None, - }; - self.opened_wals = Some(opened_wals); - - let TableEngineContext { table_engine, .. } = engine_builder.build().await.unwrap(); - self.engine = Some(table_engine); - } - - pub async fn create_fixed_schema_table(&mut self, table_name: &str) -> FixedSchemaTable { - let fixed_schema_table = FixedSchemaTable::builder() - .schema_id(self.schema_id) - .table_name(table_name.to_string()) - .table_id(self.next_table_id()) - .ttl("7d".parse::().unwrap()) - .build_fixed(); - - self.create_table(fixed_schema_table.create_request().clone()) - .await; - - fixed_schema_table - } - - fn next_table_id(&mut self) -> TableId { - self.last_table_seq += 1; - table::new_table_id(2, self.last_table_seq) - } - - async fn create_table(&mut self, create_request: CreateTableRequest) { - let table_name = create_request.params.table_name.clone(); - let table = self.engine().create_table(create_request).await.unwrap(); - - self.name_to_tables.insert(table_name.to_string(), table); - } - - #[inline] - pub fn engine(&self) -> &TableEngineRef { - self.engine.as_ref().unwrap() - } - - pub async fn write_to_table(&self, table_name: &str, row_group: RowGroup) { - let table = self.table(table_name); - - table.write(WriteRequest { row_group }).await.unwrap(); - } - - pub fn table(&self, table_name: &str) -> TableRef { - self.name_to_tables.get(table_name).cloned().unwrap() - } - - pub async fn read_table( - &self, - table_name: &str, - read_request: ReadRequest, - ) -> Vec { - let table = self.table(table_name); - - let mut stream = table.read(read_request).await.unwrap(); - let mut record_batches = Vec::new(); - while let Some(batch) = stream.next().await { - let batch = batch.unwrap(); - - record_batches.push(batch); - } - - record_batches - } - - pub async fn partitioned_read_table( - &self, - table_name: &str, - read_request: ReadRequest, - ) -> Vec { - let table = self.table(table_name); - - let streams = table.partitioned_read(read_request).await.unwrap(); - let mut record_batches = Vec::new(); - - for mut stream in streams.streams { - while let Some(batch) = stream.next().await { - let batch = batch.unwrap(); - - record_batches.push(batch); - } - } - - record_batches - } - - pub async fn reopen_with_tables(&mut self, tables: &[&str]) { - let table_infos: Vec<_> = tables - .iter() - .map(|name| { - let table_id = self.name_to_tables.get(*name).unwrap().id(); - (table_id, *name) - }) - .collect(); - { - // Close all tables. - self.name_to_tables.clear(); - - // Close engine. - let engine = self.engine.take().unwrap(); - engine.close().await.unwrap(); - } - - self.open().await; - - self.open_tables_of_shard(table_infos, DEFAULT_SHARD_ID) - .await; - } - - async fn open_tables_of_shard(&mut self, table_infos: Vec<(TableId, &str)>, shard_id: ShardId) { - let table_defs = table_infos - .into_iter() - .map(|table| TableDef { - catalog_name: "horaedb".to_string(), - schema_name: "public".to_string(), - schema_id: self.schema_id, - id: table.0, - name: table.1.to_string(), - }) - .collect(); - - let open_shard_request = OpenShardRequest { - shard_id, - table_defs, - engine: table_engine::ANALYTIC_ENGINE_TYPE.to_string(), - }; - - let tables = self - .engine() - .open_shard(open_shard_request) - .await - .unwrap() - .into_values() - .map(|result| result.unwrap().unwrap()); - - for table in tables { - self.name_to_tables.insert(table.name().to_string(), table); - } - } - - pub fn name_to_tables(&self) -> &HashMap { - &self.name_to_tables - } -} diff --git a/src/benchmarks/src/wal_write_bench.rs b/src/benchmarks/src/wal_write_bench.rs deleted file mode 100644 index 7f8b46d46c..0000000000 --- a/src/benchmarks/src/wal_write_bench.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! WalManager write bench. - -use std::sync::Arc; - -use rand::prelude::*; -use runtime::Runtime; -use table_kv::memory::MemoryImpl; -use wal::{ - kv_encoder::LogBatchEncoder, - manager::{WalLocation, WalManager, WalRuntimes, WriteContext}, - table_kv_impl::{model::NamespaceConfig, wal::WalNamespaceImpl}, -}; - -use crate::{ - config::WalWriteBenchConfig, - util::{self, WritePayload}, -}; - -pub struct WalWriteBench { - batch_size: usize, - value_size: usize, - runtime: Arc, -} - -impl WalWriteBench { - pub fn new(config: WalWriteBenchConfig) -> Self { - let runtime = util::new_runtime(1); - - WalWriteBench { - batch_size: config.batch_size, - value_size: config.value_size, - runtime: Arc::new(runtime), - } - } - - pub fn build_value_vec(&self) -> Vec> { - let value_size = match self.value_size < 128 { - true => 128, - false => self.value_size, - }; - - let mut values = Vec::with_capacity(self.batch_size); - for _ in 0..self.batch_size { - let value = self.random_value(value_size); - values.push(value); - } - - values - } - - pub fn random_value(&self, size: usize) -> Vec { - let mut value = vec![0u8; size - 4]; - let mut rng = rand::thread_rng(); - value.extend_from_slice(rng.next_u32().to_le_bytes().as_slice()); - value - } - - pub fn run_bench(&self) { - self.runtime.block_on(async { - let runtimes = WalRuntimes { - read_runtime: self.runtime.clone(), - write_runtime: self.runtime.clone(), - default_runtime: self.runtime.clone(), - }; - - let wal = WalNamespaceImpl::open( - MemoryImpl::default(), - runtimes.clone(), - "horaedb", - NamespaceConfig::default(), - ) - .await - .expect("should succeed to open WalNamespaceImpl(Memory)"); - - let values = self.build_value_vec(); - let wal_encoder = LogBatchEncoder::create(WalLocation::new(1, 1)); - let payloads = values.iter().map(|v| WritePayload(v)); - let log_batch = wal_encoder - .encode_batch(payloads) - .expect("should succeed to encode payload batch"); - - // Write to wal manager - let write_ctx = WriteContext::default(); - let _ = wal.write(&write_ctx, &log_batch).await.unwrap(); - }); - } -} diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml deleted file mode 100644 index e4f2f6845b..0000000000 --- a/src/catalog/Cargo.toml +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "catalog" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[features] -test = [] - -[dependencies] -async-trait = { workspace = true } -common_types = { workspace = true } -generic_error = { workspace = true } -lazy_static = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -time_ext = { workspace = true } diff --git a/src/catalog/src/consts.rs b/src/catalog/src/consts.rs deleted file mode 100644 index bb202f8f4c..0000000000 --- a/src/catalog/src/consts.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Catalog constants - -use lazy_static::lazy_static; - -lazy_static! { - /// Default catalog name - pub static ref DEFAULT_CATALOG: String = - std::env::var("HORAEDB_DEFAULT_CATALOG").unwrap_or_else(|_| "horaedb".to_string()); -} -/// Default schema name -pub const DEFAULT_SCHEMA: &str = "public"; -/// Catalog name of the sys catalog -pub const SYSTEM_CATALOG: &str = "system"; -/// Schema name of the sys catalog -pub const SYSTEM_CATALOG_SCHEMA: &str = "public"; diff --git a/src/catalog/src/lib.rs b/src/catalog/src/lib.rs deleted file mode 100644 index 9b1b1d9ac9..0000000000 --- a/src/catalog/src/lib.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Common traits and types about catalog (schema) - -pub mod consts; -pub mod manager; -pub mod schema; -pub mod table_operator; -#[cfg(feature = "test")] -pub mod test_util; - -use std::sync::Arc; - -use async_trait::async_trait; -use generic_error::GenericError; -use macros::define_result; -use snafu::{Backtrace, Snafu}; - -use crate::schema::{NameRef, SchemaRef}; - -#[derive(Debug, Snafu)] -#[snafu(visibility = "pub")] -pub enum Error { - #[snafu(display( - "Failed to create schema, catalog:{}, schema:{}, msg:{}.\nBacktrace:\nbacktrace:{}", - catalog, - schema, - msg, - backtrace, - ))] - CreateSchema { - catalog: String, - schema: String, - msg: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to create schema, catalog:{}, schema:{}, err:{}", - catalog, - schema, - source - ))] - CreateSchemaWithCause { - catalog: String, - schema: String, - source: GenericError, - }, - - #[snafu(display("Unsupported method, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - UnSupported { msg: String, backtrace: Backtrace }, - - #[snafu(display("Failed to operate table, msg:{:?}, err:{}", msg, source))] - TableOperatorWithCause { - msg: Option, - source: GenericError, - }, - - // Fixme: Temporarily remove the stack information, otherwise you will encounter a - // segmentation fault. - #[snafu(display("Failed to operate table, msg:{:?}.\n", msg))] - TableOperatorNoCause { msg: Option }, -} - -define_result!(Error); - -/// Catalog manage schemas -// TODO(yingwen): Provide a context -// TODO(yingwen): Catalog id? -#[async_trait] -pub trait Catalog { - /// Get the catalog name - fn name(&self) -> NameRef; - - /// Find schema by name - fn schema_by_name(&self, name: NameRef) -> Result>; - - async fn create_schema<'a>(&'a self, name: NameRef<'a>) -> Result<()>; - - /// All schemas - fn all_schemas(&self) -> Result>; -} - -/// A reference counted catalog pointer -pub type CatalogRef = Arc; diff --git a/src/catalog/src/manager.rs b/src/catalog/src/manager.rs deleted file mode 100644 index ff0266e534..0000000000 --- a/src/catalog/src/manager.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Catalog manager - -use std::sync::Arc; - -use macros::define_result; -use snafu::Snafu; - -use crate::{schema::NameRef, CatalogRef}; - -#[derive(Debug, Snafu)] -pub struct Error; - -define_result!(Error); - -/// Catalog manager abstraction -/// -/// Tracks meta data of databases/tables -// TODO(yingwen): Maybe use async trait? -// TODO(yingwen): Provide a context - -pub trait Manager: Send + Sync { - /// Get the default catalog name - /// - /// Default catalog is ensured created because no method to create catalog - /// is provided. - fn default_catalog_name(&self) -> NameRef; - - /// Get the default schema name - /// - /// Default schema may be not created by the implementation and the caller - /// may need to create that by itself. - fn default_schema_name(&self) -> NameRef; - - /// Find the catalog by name - fn catalog_by_name(&self, name: NameRef) -> Result>; - - /// All catalogs - fn all_catalogs(&self) -> Result>; -} - -pub type ManagerRef = Arc; diff --git a/src/catalog/src/schema.rs b/src/catalog/src/schema.rs deleted file mode 100644 index 2769cca560..0000000000 --- a/src/catalog/src/schema.rs +++ /dev/null @@ -1,413 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Schema contains one or more tables - -use std::sync::Arc; - -use async_trait::async_trait; -use common_types::table::ShardId; -use generic_error::GenericError; -use macros::define_result; -use snafu::{Backtrace, Snafu}; -use table_engine::{ - engine::{self, CreateTableParams, TableEngineRef, TableState}, - table::{SchemaId, TableId, TableRef}, -}; - -// FIXME: `CreateExistTable` can lead to `segmentation fault` if including -// backtrace. -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Unsupported method, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - UnSupported { msg: String, backtrace: Backtrace }, - - #[snafu(display( - "Failed to allocate table id, schema:{}, table:{}, err:{}", - schema, - table, - source - ))] - AllocateTableId { - schema: String, - table: String, - source: GenericError, - }, - - #[snafu(display( - "Failed to invalidate table id, schema:{}, table:{}, table_id:{}, err:{}", - schema, - table_name, - table_id, - source - ))] - InvalidateTableId { - schema: String, - table_name: String, - table_id: TableId, - source: GenericError, - }, - - #[snafu(display( - "Failed to create table, request:{:?}, msg:{}.\nBacktrace:\n{}", - request, - msg, - backtrace - ))] - CreateTable { - request: CreateTableRequest, - msg: String, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to create table, err:{}", source))] - CreateTableWithCause { source: GenericError }, - - #[snafu(display( - "Failed to drop table, request:{:?}, msg:{}.\nBacktrace:\n{}", - request, - msg, - backtrace - ))] - DropTable { - request: DropTableRequest, - msg: String, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to drop table, err:{}", source))] - DropTableWithCause { source: GenericError }, - - #[snafu(display( - "Failed to open table, request:{:?}, msg:{}.\nBacktrace:\n{}", - request, - msg, - backtrace - ))] - OpenTable { - request: OpenTableRequest, - msg: String, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to open table, source:{}", source))] - OpenTableWithCause { source: GenericError }, - - #[snafu(display( - "Failed to close table, request:{:?}, msg:{}.\nBacktrace:\n{}", - request, - msg, - backtrace - ))] - CloseTable { - request: CloseTableRequest, - msg: String, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to close table, source:{}", source))] - CloseTableWithCause { source: GenericError }, - - #[snafu(display("Failed to create table, table already exists, table:{table}."))] - CreateExistTable { table: String }, - - #[snafu(display( - "Failed to create table, cannot persist meta, table:{}, err:{}", - table, - source - ))] - WriteTableMeta { table: String, source: GenericError }, - - #[snafu(display( - "Catalog mismatch, expect:{}, given:{}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - CatalogMismatch { - expect: String, - given: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Schema mismatch, expect:{}, given:{}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - SchemaMismatch { - expect: String, - given: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Different table ids are used for the same table, table_name:{table_name}, expected_table_id:{expected_table_id}, given_table_id:{given_table_id}.\nBacktrace:\n{backtrace}", - ))] - DifferentTableId { - table_name: String, - expected_table_id: TableId, - given_table_id: TableId, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to find table, table:{}.\nBacktrace:\n{}", table, backtrace))] - TableNotFound { table: String, backtrace: Backtrace }, - - #[snafu(display("Failed to alter table, err:{}", source))] - AlterTable { source: GenericError }, - - #[snafu(display( - "Too many table, cannot create table, schema:{}, table:{}.\nBacktrace:\n{}", - schema, - table, - backtrace - ))] - TooManyTable { - schema: String, - table: String, - backtrace: Backtrace, - }, - - #[snafu(display("Table is not ready, err:{}", source))] - TableNotReady { source: GenericError }, -} - -define_result!(Error); - -/// A name reference. -pub type NameRef<'a> = &'a str; -// TODO: This name is conflict with [table_engine::schema::SchemaRef]. -pub type SchemaRef = Arc; - -/// Request of creating table. -#[derive(Debug, Clone)] -pub struct CreateTableRequest { - pub params: CreateTableParams, - /// Table id - // TODO: remove this field - pub table_id: Option, - /// Table schema - /// Tells state of the table - pub state: TableState, - /// Shard id of the table - pub shard_id: ShardId, -} - -impl CreateTableRequest { - pub fn into_engine_create_request( - self, - table_id: Option, - schema_id: SchemaId, - ) -> engine::CreateTableRequest { - let table_id = self.table_id.unwrap_or(table_id.unwrap_or(TableId::MIN)); - - engine::CreateTableRequest { - params: self.params, - schema_id, - table_id, - state: self.state, - shard_id: self.shard_id, - } - } -} - -/// Create table options. -#[derive(Clone)] -pub struct CreateOptions { - /// Table engine - // FIXME(yingwen): We have engine type in create request, remove this - pub table_engine: TableEngineRef, - /// Create if not exists, if table already exists, wont return error - // TODO(yingwen): Maybe remove this? - pub create_if_not_exists: bool, -} - -/// Drop table request -#[derive(Debug, Clone)] -pub struct DropTableRequest { - /// Catalog name - pub catalog_name: String, - /// Schema name - pub schema_name: String, - /// Table name - pub table_name: String, - /// Table engine type - pub engine: String, -} - -impl DropTableRequest { - pub fn into_engine_drop_request(self, schema_id: SchemaId) -> engine::DropTableRequest { - engine::DropTableRequest { - catalog_name: self.catalog_name, - schema_name: self.schema_name, - schema_id, - table_name: self.table_name, - engine: self.engine, - } - } -} -/// Drop table options -#[derive(Clone)] -pub struct DropOptions { - /// Table engine - pub table_engine: TableEngineRef, -} - -/// Open table request -#[derive(Debug, Clone)] -pub struct OpenTableRequest { - /// Catalog name - pub catalog_name: String, - /// Schema name - pub schema_name: String, - /// Table name - pub table_name: String, - /// Table id - pub table_id: TableId, - /// Table engine type - pub engine: String, - /// Shard id, shard is the table set about scheduling from nodes - pub shard_id: ShardId, -} - -impl OpenTableRequest { - pub fn into_engine_open_request(self, schema_id: SchemaId) -> engine::OpenTableRequest { - engine::OpenTableRequest { - catalog_name: self.catalog_name, - schema_name: self.schema_name, - schema_id, - table_name: self.table_name, - table_id: self.table_id, - engine: self.engine, - shard_id: self.shard_id, - } - } -} -/// Open table options. -#[derive(Clone)] -pub struct OpenOptions { - /// Table engine - pub table_engine: TableEngineRef, -} - -/// Close table request -#[derive(Clone, Debug)] -pub struct CloseTableRequest { - /// Catalog name - pub catalog_name: String, - /// Schema name - pub schema_name: String, - /// Table name - pub table_name: String, - /// Table id - pub table_id: TableId, - /// Table engine type - pub engine: String, -} - -impl CloseTableRequest { - pub fn into_engine_close_request(self, schema_id: SchemaId) -> engine::CloseTableRequest { - engine::CloseTableRequest { - catalog_name: self.catalog_name, - schema_name: self.schema_name, - schema_id, - table_name: self.table_name, - table_id: self.table_id, - engine: self.engine, - } - } -} - -/// Close table options. -#[derive(Clone)] -pub struct CloseOptions { - /// Table engine - pub table_engine: TableEngineRef, -} - -#[derive(Debug, Clone)] -pub struct OpenShardRequest { - /// Shard id - pub shard_id: ShardId, - - /// Table infos - pub table_defs: Vec, - - /// Table engine type - pub engine: String, -} - -#[derive(Clone, Debug)] -pub struct TableDef { - pub catalog_name: String, - pub schema_name: String, - pub id: TableId, - pub name: String, -} - -impl TableDef { - pub fn into_engine_table_def(self, schema_id: SchemaId) -> engine::TableDef { - engine::TableDef { - catalog_name: self.catalog_name, - schema_name: self.schema_name, - schema_id, - id: self.id, - name: self.name, - } - } -} - -pub type CloseShardRequest = OpenShardRequest; - -/// Schema manage tables. -#[async_trait] -pub trait Schema { - /// Get schema name. - fn name(&self) -> NameRef; - - /// Get schema id - fn id(&self) -> SchemaId; - - /// Find table by name. - fn table_by_name(&self, name: NameRef) -> Result>; - - /// TODO: remove this method afterwards. - /// Create table according to `request`. - async fn create_table( - &self, - request: CreateTableRequest, - opts: CreateOptions, - ) -> Result; - - /// TODO: remove this method afterwards. - /// Drop table according to `request`. - /// - /// Returns true if the table is really dropped. - async fn drop_table(&self, request: DropTableRequest, opts: DropOptions) -> Result; - - /// All tables - fn all_tables(&self) -> Result>; - - /// Register the opened table into schema. - fn register_table(&self, table: TableRef); - - /// Unregister table - fn unregister_table(&self, table_name: &str); -} diff --git a/src/catalog/src/table_operator.rs b/src/catalog/src/table_operator.rs deleted file mode 100644 index 6d5e2a61d1..0000000000 --- a/src/catalog/src/table_operator.rs +++ /dev/null @@ -1,291 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::time::Instant; - -use generic_error::BoxError; -use logger::{error, info, warn}; -use snafu::{OptionExt, ResultExt}; -use table_engine::{engine, table::TableRef}; -use time_ext::InstantExt; - -use crate::{ - manager::ManagerRef, - schema::{ - CloseOptions, CloseShardRequest, CloseTableRequest, CreateOptions, CreateTableRequest, - DropOptions, DropTableRequest, OpenOptions, OpenShardRequest, OpenTableRequest, SchemaRef, - }, - Result, TableOperatorNoCause, TableOperatorWithCause, -}; - -/// Table operator -/// -/// Encapsulate all operations about tables, including create/drop, open/close -/// and etc. -#[derive(Clone)] -pub struct TableOperator { - catalog_manager: ManagerRef, -} - -impl TableOperator { - pub fn new(catalog_manager: ManagerRef) -> Self { - Self { catalog_manager } - } - - pub async fn open_shard(&self, request: OpenShardRequest, opts: OpenOptions) -> Result<()> { - let instant = Instant::now(); - let table_engine = opts.table_engine; - let shard_id = request.shard_id; - - // Generate open requests. - let mut related_schemas = Vec::with_capacity(request.table_defs.len()); - let mut engine_table_defs = Vec::with_capacity(request.table_defs.len()); - for open_ctx in request.table_defs { - let schema = self.schema_by_name(&open_ctx.catalog_name, &open_ctx.schema_name)?; - let table_id = open_ctx.id; - engine_table_defs.push(open_ctx.into_engine_table_def(schema.id())); - related_schemas.push((table_id, schema)); - } - - // Open tables by table engine. - let engine_open_shard_req = engine::OpenShardRequest { - shard_id, - table_defs: engine_table_defs, - engine: request.engine, - }; - let mut shard_result = table_engine - .open_shard(engine_open_shard_req) - .await - .box_err() - .context(TableOperatorWithCause { msg: None })?; - - // Check and register successful opened table into schema. - let mut success_count = 0_u32; - let mut missing_table_count = 0_u32; - let mut open_table_errs = Vec::new(); - - for (table_id, schema) in related_schemas { - let table_result = shard_result - .remove(&table_id) - .context(TableOperatorNoCause { - msg: Some(format!( - "table not exist, shard_id:{shard_id}, table_id:{table_id}" - )), - })?; - - match table_result { - Ok(Some(table)) => { - schema.register_table(table); - success_count += 1; - } - Ok(None) => { - error!("TableOperator failed to open a missing table, table_id:{table_id}, schema_id:{:?}, shard_id:{shard_id}", schema.id()); - missing_table_count += 1; - } - Err(e) => { - error!("TableOperator failed to open table, table_id:{table_id}, schema_id:{:?}, shard_id:{shard_id}, err:{}", schema.id(), e); - open_table_errs.push(e); - } - } - } - - info!( - "Open shard finish, shard id:{shard_id}, cost:{}ms, success_count:{success_count}, missing_table_count:{missing_table_count}, open_table_errs:{open_table_errs:?}", - instant.saturating_elapsed().as_millis(), - ); - - if missing_table_count == 0 && open_table_errs.is_empty() { - Ok(()) - } else { - let msg = format!( - "Failed to open shard, some tables open failed, shard id:{shard_id}, \ - missing_table_count:{missing_table_count}, \ - open_err_count:{}", - open_table_errs.len() - ); - - TableOperatorNoCause { msg }.fail() - } - } - - pub async fn close_shard(&self, request: CloseShardRequest, opts: CloseOptions) -> Result<()> { - let instant = Instant::now(); - let table_engine = opts.table_engine; - let shard_id = request.shard_id; - - // Generate open requests. - let mut schemas = Vec::with_capacity(request.table_defs.len()); - let mut engine_table_defs = Vec::with_capacity(request.table_defs.len()); - for table_def in request.table_defs { - let schema = self.schema_by_name(&table_def.catalog_name, &table_def.schema_name)?; - engine_table_defs.push(table_def.into_engine_table_def(schema.id())); - schemas.push(schema); - } - - // Close tables by table engine. - // TODO: add the `close_shard` method into table engine. - let engine_close_shard_req = engine::CloseShardRequest { - shard_id: request.shard_id, - table_defs: engine_table_defs, - engine: request.engine, - }; - let close_results = table_engine.close_shard(engine_close_shard_req).await; - - // Check and unregister successful closed table from schema. - let mut success_count = 0_u32; - let mut close_table_errs = Vec::new(); - - for (schema, close_result) in schemas.into_iter().zip(close_results.into_iter()) { - match close_result { - Ok(table_name) => { - schema.unregister_table(&table_name); - success_count += 1; - } - Err(e) => close_table_errs.push(e), - } - } - - info!( - "Close shard finished, shard id:{shard_id}, cost:{}ms, success_count:{success_count}, close_table_errs:{close_table_errs:?}", - instant.saturating_elapsed().as_millis(), - ); - - if close_table_errs.is_empty() { - Ok(()) - } else { - TableOperatorNoCause { - msg: format!( - "Failed to close shard, shard id:{shard_id}, success_count:{success_count}, close_err_count:{}", close_table_errs.len(), - ), - } - .fail() - } - } - - pub async fn open_table_on_shard( - &self, - request: OpenTableRequest, - opts: OpenOptions, - ) -> Result<()> { - let table_engine = opts.table_engine; - let schema = self.schema_by_name(&request.catalog_name, &request.schema_name)?; - - let table = table_engine - .open_table(request.clone().into_engine_open_request(schema.id())) - .await - .box_err() - .context(TableOperatorWithCause { - msg: format!("failed to open table on shard, request:{request:?}"), - })? - .context(TableOperatorNoCause { - msg: format!("table engine returns none when opening table, request:{request:?}"), - })?; - schema.register_table(table); - - Ok(()) - } - - pub async fn close_table_on_shard( - &self, - request: CloseTableRequest, - opts: CloseOptions, - ) -> Result<()> { - let table_engine = opts.table_engine; - let schema = self.schema_by_name(&request.catalog_name, &request.schema_name)?; - let table_name = request.table_name.clone(); - - table_engine - .close_table(request.clone().into_engine_close_request(schema.id())) - .await - .box_err() - .context(TableOperatorWithCause { - msg: format!("failed to close table on shard, request:{request:?}"), - })?; - schema.unregister_table(&table_name); - - Ok(()) - } - - pub async fn create_table_on_shard( - &self, - request: CreateTableRequest, - opts: CreateOptions, - ) -> Result { - let schema = - self.schema_by_name(&request.params.catalog_name, &request.params.schema_name)?; - - // TODO: we should create table directly by table engine, and register table - // into schema like opening. - schema - .create_table(request.clone(), opts) - .await - .box_err() - .context(TableOperatorWithCause { - msg: format!("failed to create table on shard, request:{request:?}"), - }) - } - - pub async fn drop_table_on_shard( - &self, - request: DropTableRequest, - opts: DropOptions, - ) -> Result<()> { - let schema = self.schema_by_name(&request.catalog_name, &request.schema_name)?; - - // TODO: we should drop table directly by table engine, and unregister table - // from schema like closing. - let has_dropped = schema - .drop_table(request.clone(), opts) - .await - .box_err() - .context(TableOperatorWithCause { - msg: format!("failed to create table on shard, request:{request:?}"), - })?; - - if has_dropped { - warn!( - "Table has been dropped already, table_name:{}", - request.table_name - ); - } - - Ok(()) - } - - fn schema_by_name(&self, catalog_name: &str, schema_name: &str) -> Result { - let catalog = self - .catalog_manager - .catalog_by_name(catalog_name) - .box_err() - .context(TableOperatorWithCause { - msg: format!("failed to find catalog, catalog_name:{catalog_name}"), - })? - .context(TableOperatorNoCause { - msg: format!("catalog not found, catalog_name:{catalog_name}"), - })?; - - catalog - .schema_by_name(schema_name) - .box_err() - .context(TableOperatorWithCause { - msg: format!("failed to find schema, schema_name:{schema_name}"), - })? - .context(TableOperatorNoCause { - msg: format!("schema not found, schema_name:{schema_name}"), - }) - } -} diff --git a/src/catalog/src/test_util.rs b/src/catalog/src/test_util.rs deleted file mode 100644 index 2ba717b048..0000000000 --- a/src/catalog/src/test_util.rs +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, sync::Arc}; - -use async_trait::async_trait; -use table_engine::table::{SchemaId, TableRef}; - -use crate::{ - manager::{Manager, ManagerRef}, - schema::{ - CreateOptions, CreateTableRequest, DropOptions, DropTableRequest, NameRef, - Result as SchemaResult, Schema, SchemaRef, - }, - Catalog, CatalogRef, Result, -}; - -/// Mock catalog builder -pub struct MockCatalogManagerBuilder { - catalog: String, - schema: String, - tables: Vec, -} - -impl MockCatalogManagerBuilder { - pub fn new(catalog: String, schema: String, tables: Vec) -> Self { - Self { - catalog, - schema, - tables, - } - } - - pub fn build(self) -> ManagerRef { - let schema = Arc::new(MockSchema { - name: self.schema.clone(), - tables: self - .tables - .into_iter() - .map(|t| (t.name().to_string(), t)) - .collect(), - }); - - let catalog = Arc::new(MockCatalog { - name: self.catalog.clone(), - schemas: HashMap::from([(self.schema.clone(), schema as _)]), - }); - - Arc::new(MockCatalogManager { - catalogs: HashMap::from([(self.catalog.clone(), catalog as _)]), - default_catalog: self.catalog, - default_schema: self.schema, - }) - } -} - -/// Mock catalog manager which only support default catalog and schema -/// -/// You can set the default catalog and schema when initializing. -struct MockCatalogManager { - catalogs: HashMap, - default_catalog: String, - default_schema: String, -} - -impl Manager for MockCatalogManager { - fn default_catalog_name(&self) -> crate::schema::NameRef { - &self.default_catalog - } - - fn default_schema_name(&self) -> crate::schema::NameRef { - &self.default_schema - } - - fn catalog_by_name( - &self, - name: crate::schema::NameRef, - ) -> crate::manager::Result> { - Ok(self.catalogs.get(name).cloned()) - } - - fn all_catalogs(&self) -> crate::manager::Result> { - Ok(self.catalogs.clone().into_values().collect()) - } -} - -struct MockCatalog { - name: String, - schemas: HashMap, -} - -#[async_trait::async_trait] -impl Catalog for MockCatalog { - fn name(&self) -> NameRef { - &self.name - } - - fn schema_by_name(&self, name: NameRef) -> Result> { - Ok(self.schemas.get(name).cloned()) - } - - async fn create_schema<'a>(&'a self, _name: NameRef<'a>) -> Result<()> { - unimplemented!() - } - - /// All schemas - fn all_schemas(&self) -> Result> { - Ok(self.schemas.clone().into_values().collect()) - } -} - -struct MockSchema { - name: String, - tables: HashMap, -} - -#[async_trait] -impl Schema for MockSchema { - fn name(&self) -> NameRef { - &self.name - } - - fn id(&self) -> SchemaId { - SchemaId::from_u32(42) - } - - fn table_by_name(&self, name: NameRef) -> SchemaResult> { - Ok(self.tables.get(name).cloned()) - } - - async fn create_table( - &self, - _request: CreateTableRequest, - _opts: CreateOptions, - ) -> SchemaResult { - unimplemented!() - } - - async fn drop_table( - &self, - _request: DropTableRequest, - _opts: DropOptions, - ) -> SchemaResult { - unimplemented!() - } - - fn all_tables(&self) -> SchemaResult> { - Ok(self.tables.clone().into_values().collect()) - } - - fn register_table(&self, _table: TableRef) { - unimplemented!() - } - - fn unregister_table(&self, _table_name: &str) { - unimplemented!() - } -} diff --git a/src/catalog_impls/Cargo.toml b/src/catalog_impls/Cargo.toml deleted file mode 100644 index 3e3cd9fbaa..0000000000 --- a/src/catalog_impls/Cargo.toml +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "catalog_impls" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -async-trait = { workspace = true } -catalog = { workspace = true } -cluster = { workspace = true } -common_types = { workspace = true } -generic_error = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -meta_client = { workspace = true } -snafu = { workspace = true } -system_catalog = { workspace = true } -table_engine = { workspace = true } -tokio = { workspace = true } - -[dev-dependencies] -analytic_engine = { workspace = true, features = ["test"] } diff --git a/src/catalog_impls/src/cluster_based.rs b/src/catalog_impls/src/cluster_based.rs deleted file mode 100644 index c1208f678c..0000000000 --- a/src/catalog_impls/src/cluster_based.rs +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use async_trait::async_trait; -use catalog::{ - schema, - schema::{ - CreateOptions, CreateTableRequest, DropOptions, DropTableRequest, NameRef, Schema, - SchemaRef, TableNotReady, - }, -}; -use cluster::{ClusterRef, TableStatus}; -use generic_error::BoxError; -use snafu::{ResultExt, Snafu}; -use table_engine::table::{SchemaId, TableRef}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Invalid table status, status:{:?}", status))] - InvalidTableStatus { status: TableStatus }, -} - -/// A cluster-based implementation for [`schema`]. - -/// Schema with cluster. -/// It binds cluster and schema and will detect the health status of the cluster -/// when calling the schema interface. -pub(crate) struct SchemaWithCluster { - internal: SchemaRef, - - cluster: ClusterRef, -} - -impl SchemaWithCluster { - pub(crate) fn new(internal: SchemaRef, cluster: ClusterRef) -> SchemaWithCluster { - SchemaWithCluster { internal, cluster } - } - - // Get table status, return None when table not found in shard. - fn table_status(&self, table_name: NameRef) -> Option { - self.cluster.get_table_status(self.name(), table_name) - } -} - -#[async_trait] -impl Schema for SchemaWithCluster { - fn name(&self) -> NameRef { - self.internal.name() - } - - fn id(&self) -> SchemaId { - self.internal.id() - } - - fn table_by_name(&self, name: NameRef) -> schema::Result> { - let find_table_result = self.internal.table_by_name(name)?; - - if find_table_result.is_none() { - return match self.table_status(name) { - // Table not found in schema and shard not contains this table. - None => Ok(None), - // Table not found in schema but shard contains this table. - // Check the status of the shard. - Some(table_status) => InvalidTableStatus { - status: table_status, - } - .fail() - .box_err() - .with_context(|| TableNotReady {})?, - }; - } - - Ok(find_table_result) - } - - async fn create_table( - &self, - request: CreateTableRequest, - opts: CreateOptions, - ) -> schema::Result { - self.internal.create_table(request, opts).await - } - - async fn drop_table( - &self, - request: DropTableRequest, - opts: DropOptions, - ) -> schema::Result { - self.internal.drop_table(request, opts).await - } - - fn all_tables(&self) -> schema::Result> { - self.internal.all_tables() - } - - fn register_table(&self, table: TableRef) { - self.internal.register_table(table) - } - - fn unregister_table(&self, table_name: &str) { - self.internal.unregister_table(table_name) - } -} diff --git a/src/catalog_impls/src/lib.rs b/src/catalog_impls/src/lib.rs deleted file mode 100644 index 0b8e1b43f4..0000000000 --- a/src/catalog_impls/src/lib.rs +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use catalog::{ - consts::SYSTEM_CATALOG, - manager::{Manager, ManagerRef}, - schema::NameRef, - CatalogRef, -}; -use system_catalog::{tables::Tables, SystemTableAdapter}; - -use crate::system_tables::{SystemTables, SystemTablesBuilder}; - -mod cluster_based; -mod system_tables; -pub mod table_based; -pub mod volatile; - -/// CatalogManagerImpl is a wrapper for system and user tables -#[derive(Clone)] -pub struct CatalogManagerImpl { - system_tables: SystemTables, - user_catalog_manager: ManagerRef, -} - -impl CatalogManagerImpl { - pub fn new(manager: ManagerRef) -> Self { - let mut system_tables_builder = SystemTablesBuilder::new(); - system_tables_builder = system_tables_builder - .insert_table(SystemTableAdapter::new(Tables::new(manager.clone()))); - Self { - system_tables: system_tables_builder.build(), - user_catalog_manager: manager, - } - } -} - -impl Manager for CatalogManagerImpl { - fn default_catalog_name(&self) -> NameRef { - self.user_catalog_manager.default_catalog_name() - } - - fn default_schema_name(&self) -> NameRef { - self.user_catalog_manager.default_schema_name() - } - - fn catalog_by_name(&self, name: NameRef) -> catalog::manager::Result> { - match name { - SYSTEM_CATALOG => Ok(Some(Arc::new(self.system_tables.clone()))), - _ => self.user_catalog_manager.catalog_by_name(name), - } - } - - fn all_catalogs(&self) -> catalog::manager::Result> { - self.user_catalog_manager.all_catalogs() - } -} diff --git a/src/catalog_impls/src/system_tables.rs b/src/catalog_impls/src/system_tables.rs deleted file mode 100644 index a7123a6cdf..0000000000 --- a/src/catalog_impls/src/system_tables.rs +++ /dev/null @@ -1,155 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Contains System tables, such as system.public.tables - -use std::{collections::HashMap, sync::Arc}; - -use async_trait::async_trait; -use catalog::{ - consts::{SYSTEM_CATALOG, SYSTEM_CATALOG_SCHEMA}, - schema::{ - CreateOptions, CreateTableRequest, DropOptions, DropTableRequest, NameRef, Schema, - SchemaRef, - }, - Catalog, -}; -use logger::warn; -use system_catalog::SystemTableAdapter; -use table_engine::{ - self, - table::{SchemaId, Table, TableRef}, -}; - -const UNSUPPORTED_MSG: &str = "system tables not supported"; - -pub struct SystemTablesBuilder { - tables: HashMap>, -} - -impl SystemTablesBuilder { - pub fn new() -> Self { - Self { - tables: HashMap::new(), - } - } - - pub fn insert_table(mut self, table: SystemTableAdapter) -> Self { - self.tables - .insert(table.name().to_string(), Arc::new(table)); - self - } - - pub fn build(self) -> SystemTables { - SystemTables::new(self.tables) - } -} - -#[derive(Clone)] -pub struct SystemTables { - tables: Arc>>, -} - -impl SystemTables { - pub fn new(tables: HashMap>) -> Self { - Self { - tables: Arc::new(tables), - } - } -} - -#[async_trait] -impl Schema for SystemTables { - fn name(&self) -> NameRef { - SYSTEM_CATALOG_SCHEMA - } - - fn id(&self) -> SchemaId { - system_catalog::SYSTEM_SCHEMA_ID - } - - fn table_by_name(&self, name: NameRef) -> catalog::schema::Result> { - Ok(self.tables.get(name).map(|v| v.clone() as TableRef)) - } - - async fn create_table( - &self, - _request: CreateTableRequest, - _opts: CreateOptions, - ) -> catalog::schema::Result { - catalog::schema::UnSupported { - msg: UNSUPPORTED_MSG, - } - .fail() - } - - async fn drop_table( - &self, - _request: DropTableRequest, - _opts: DropOptions, - ) -> catalog::schema::Result { - catalog::schema::UnSupported { - msg: UNSUPPORTED_MSG, - } - .fail() - } - - fn all_tables(&self) -> catalog::schema::Result> { - Ok(self - .tables - .iter() - .map(|(_, v)| v.clone() as TableRef) - .collect()) - } - - fn register_table(&self, _table: TableRef) { - warn!("Try to register table in the system tables"); - } - - fn unregister_table(&self, _table_name: &str) { - warn!("Try to unregister table in the system tables"); - } -} - -#[async_trait] -impl Catalog for SystemTables { - fn name(&self) -> NameRef { - SYSTEM_CATALOG - } - - fn schema_by_name(&self, name: NameRef) -> catalog::Result> { - if name == SYSTEM_CATALOG_SCHEMA { - Ok(Some(Arc::new(self.clone()))) - } else { - Ok(None) - } - } - - async fn create_schema<'a>(&'a self, _name: NameRef<'a>) -> catalog::Result<()> { - catalog::UnSupported { - msg: UNSUPPORTED_MSG, - } - .fail() - } - - fn all_schemas(&self) -> catalog::Result> { - catalog::UnSupported { - msg: UNSUPPORTED_MSG, - } - .fail() - } -} diff --git a/src/catalog_impls/src/table_based.rs b/src/catalog_impls/src/table_based.rs deleted file mode 100644 index 95db0fa851..0000000000 --- a/src/catalog_impls/src/table_based.rs +++ /dev/null @@ -1,1175 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Table based catalog implementation - -use std::{ - collections::HashMap, - sync::{Arc, RwLock}, -}; - -use async_trait::async_trait; -use catalog::{ - self, consts, - manager::{self, Manager}, - schema::{ - self, AllocateTableId, CatalogMismatch, CreateExistTable, CreateOptions, - CreateTableRequest, CreateTableWithCause, DropOptions, DropTableRequest, - DropTableWithCause, NameRef, Schema, SchemaMismatch, SchemaRef, TooManyTable, - WriteTableMeta, - }, - Catalog, CatalogRef, -}; -use generic_error::BoxError; -use logger::{debug, info}; -use macros::define_result; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; -use system_catalog::sys_catalog_table::{ - self, CreateCatalogRequest, CreateSchemaRequest, SysCatalogTable, VisitOptions, - VisitOptionsBuilder, VisitorCatalogNotFound, VisitorInner, VisitorSchemaNotFound, -}; -use table_engine::{ - engine::{TableEngineRef, TableState}, - table::{ - ReadOptions, SchemaId, SchemaIdGenerator, TableId, TableInfo, TableRef, TableSeq, - TableSeqGenerator, - }, -}; -use tokio::sync::Mutex; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to build sys catalog table, err:{}", source))] - BuildSysCatalog { - source: system_catalog::sys_catalog_table::Error, - }, - - #[snafu(display("Failed to visit sys catalog table, err:{}", source))] - VisitSysCatalog { - source: system_catalog::sys_catalog_table::Error, - }, - - #[snafu(display( - "Failed to find table to update, name:{}.\nBacktrace:\n{}", - name, - backtrace - ))] - UpdateTableNotFound { name: String, backtrace: Backtrace }, - - #[snafu(display("Failed to create catalog, catalog:{}, err:{}", catalog, source))] - CreateCatalog { - catalog: String, - source: system_catalog::sys_catalog_table::Error, - }, - - #[snafu(display( - "Failed to create schema, catalog:{}, schema:{}, err:{}", - catalog, - schema, - source - ))] - CreateSchema { - catalog: String, - schema: String, - source: system_catalog::sys_catalog_table::Error, - }, - - #[snafu(display( - "Invalid schema id and table seq, schema_id:{:?}, table_seq:{:?}.\nBacktrace:\n{}", - schema_id, - table_seq, - backtrace, - ))] - InvalidSchemaIdAndTableSeq { - schema_id: SchemaId, - table_seq: TableSeq, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -/// Table based catalog manager -pub struct TableBasedManager { - /// Sys catalog table - catalog_table: Arc, - catalogs: CatalogMap, - /// Global schema id generator, Each schema has a unique schema id. - schema_id_generator: Arc, -} - -impl Manager for TableBasedManager { - fn default_catalog_name(&self) -> NameRef { - &consts::DEFAULT_CATALOG - } - - fn default_schema_name(&self) -> NameRef { - consts::DEFAULT_SCHEMA - } - - fn catalog_by_name(&self, name: NameRef) -> manager::Result> { - let catalog = self.catalogs.get(name).cloned().map(|v| v as _); - Ok(catalog) - } - - fn all_catalogs(&self) -> manager::Result> { - Ok(self.catalogs.values().map(|v| v.clone() as _).collect()) - } -} - -impl TableBasedManager { - /// Create and init the TableBasedManager. - // TODO(yingwen): Define all constants in catalog crate. - pub async fn new(backend: TableEngineRef) -> Result { - // Create or open sys_catalog table, will also create a space (catalog + schema) - // for system catalog. - let catalog_table = SysCatalogTable::new(backend) - .await - .context(BuildSysCatalog)?; - - let mut manager = Self { - catalog_table: Arc::new(catalog_table), - catalogs: HashMap::new(), - schema_id_generator: Arc::new(SchemaIdGenerator::default()), - }; - - manager.init().await?; - - Ok(manager) - } - - pub async fn fetch_table_infos(&mut self) -> Result> { - let catalog_table = self.catalog_table.clone(); - - let mut table_infos = Vec::default(); - let visitor_inner = VisitorInnerImpl { - catalog_table: catalog_table.clone(), - catalogs: &mut self.catalogs, - schema_id_generator: self.schema_id_generator.clone(), - table_infos: &mut table_infos, - }; - - let visit_opts = VisitOptionsBuilder::default().visit_table().build(); - - Self::visit_catalog_table_with_options(catalog_table, visitor_inner, visit_opts).await?; - - Ok(table_infos) - } - - /// Load all data from sys catalog table. - async fn init(&mut self) -> Result<()> { - // The system catalog and schema in it is not persisted, so we add it manually. - self.load_system_catalog(); - - // Load all existent catalog/schema from catalog_table - let catalog_table = self.catalog_table.clone(); - - let visitor_inner = VisitorInnerImpl { - catalog_table: self.catalog_table.clone(), - catalogs: &mut self.catalogs, - schema_id_generator: self.schema_id_generator.clone(), - table_infos: &mut Vec::default(), - }; - - let visit_opts = VisitOptionsBuilder::default() - .visit_catalog() - .visit_schema() - .build(); - - Self::visit_catalog_table_with_options(catalog_table, visitor_inner, visit_opts).await?; - - // Create default catalog if it is not exists. - self.maybe_create_default_catalog().await?; - - Ok(()) - } - - async fn visit_catalog_table_with_options( - catalog_table: Arc, - mut visitor_inner: VisitorInnerImpl<'_>, - visit_opts: VisitOptions, - ) -> Result<()> { - let opts = ReadOptions::default(); - - catalog_table - .visit(opts, &mut visitor_inner, visit_opts) - .await - .context(VisitSysCatalog) - } - - fn load_system_catalog(&mut self) { - // Get the `sys_catalog` table and add it to tables. - let table = self.catalog_table.inner_table(); - let mut tables = SchemaTables::default(); - tables.insert(self.catalog_table.table_id(), table); - - // Use schema id of schema `system/public` as last schema id. - let schema_id = system_catalog::SYSTEM_SCHEMA_ID; - self.schema_id_generator.set_last_schema_id(schema_id); - - // Create the default schema in system catalog. - let schema = Arc::new(SchemaImpl { - catalog_name: consts::SYSTEM_CATALOG.to_string(), - schema_name: consts::SYSTEM_CATALOG_SCHEMA.to_string(), - schema_id, - tables: RwLock::new(tables), - mutex: Mutex::new(()), - catalog_table: self.catalog_table.clone(), - table_seq_generator: TableSeqGenerator::default(), - }); - // Use table seq of `sys_catalog` table as last table seq. - schema - .table_seq_generator - .set_last_table_seq(system_catalog::MAX_SYSTEM_TABLE_SEQ); - - let mut schemas = HashMap::new(); - schemas.insert(schema.name().to_string(), schema); - - let schema_id_generator = self.schema_id_generator.clone(); - let catalog_table = self.catalog_table.clone(); - // Create the system catalog. - let catalog = Arc::new(CatalogImpl { - name: consts::SYSTEM_CATALOG.to_string(), - schemas: RwLock::new(schemas), - schema_id_generator, - catalog_table, - mutex: Mutex::new(()), - }); - - self.catalogs.insert(catalog.name().to_string(), catalog); - } - - async fn maybe_create_default_catalog(&mut self) -> Result<()> { - // Try to get default catalog, create it if not exists. - let catalog = match self.catalogs.get(consts::DEFAULT_CATALOG.as_str()) { - Some(v) => v.clone(), - None => { - // Only system catalog should exists. - assert_eq!(1, self.catalogs.len()); - - // Default catalog is not exists, create and store it. - self.create_catalog(CreateCatalogRequest { - catalog_name: consts::DEFAULT_CATALOG.to_string(), - }) - .await? - } - }; - - // Create default schema if not exists. - if catalog.find_schema(consts::DEFAULT_SCHEMA).is_none() { - // Allocate schema id. - let schema_id = self - .schema_id_generator - .alloc_schema_id() - .expect("Schema id of default catalog should be valid"); - - self.add_schema_to_catalog( - CreateSchemaRequest { - catalog_name: consts::DEFAULT_CATALOG.to_string(), - schema_name: consts::DEFAULT_SCHEMA.to_string(), - schema_id, - }, - &catalog, - ) - .await?; - } - - Ok(()) - } - - async fn create_catalog(&mut self, request: CreateCatalogRequest) -> Result> { - let catalog_name = request.catalog_name.clone(); - - self.catalog_table - .create_catalog(request) - .await - .context(CreateCatalog { - catalog: &catalog_name, - })?; - - let schema_id_generator = self.schema_id_generator.clone(); - let catalog_table = self.catalog_table.clone(); - let catalog = Arc::new(CatalogImpl { - name: catalog_name.clone(), - schemas: RwLock::new(HashMap::new()), - schema_id_generator, - catalog_table, - mutex: Mutex::new(()), - }); - - self.catalogs.insert(catalog_name, catalog.clone()); - - Ok(catalog) - } - - async fn add_schema_to_catalog( - &mut self, - request: CreateSchemaRequest, - catalog: &CatalogImpl, - ) -> Result> { - let schema_name = request.schema_name.clone(); - let schema_id = request.schema_id; - - self.catalog_table - .create_schema(request) - .await - .context(CreateSchema { - catalog: &catalog.name, - schema: &schema_name, - })?; - - let schema = Arc::new(SchemaImpl::new( - &catalog.name, - &schema_name, - schema_id, - self.catalog_table.clone(), - )); - - catalog.insert_schema_into_memory(schema.clone()); - - Ok(schema) - } -} - -type CatalogMap = HashMap>; - -/// Sys catalog visitor implementation, used to load catalog info -struct VisitorInnerImpl<'a> { - catalog_table: Arc, - catalogs: &'a mut CatalogMap, - schema_id_generator: Arc, - table_infos: &'a mut Vec, -} - -#[async_trait] -impl<'a> VisitorInner for VisitorInnerImpl<'a> { - fn visit_catalog(&mut self, request: CreateCatalogRequest) -> sys_catalog_table::Result<()> { - debug!("Visitor visit catalog, request:{:?}", request); - let schema_id_generator = self.schema_id_generator.clone(); - let catalog_table = self.catalog_table.clone(); - - let catalog = CatalogImpl { - name: request.catalog_name.to_string(), - schemas: RwLock::new(HashMap::new()), - schema_id_generator, - catalog_table, - mutex: Mutex::new(()), - }; - - // Register catalog. - self.catalogs - .insert(request.catalog_name, Arc::new(catalog)); - - Ok(()) - } - - fn visit_schema(&mut self, request: CreateSchemaRequest) -> sys_catalog_table::Result<()> { - debug!("Visitor visit schema, request:{:?}", request); - - let catalog = - self.catalogs - .get_mut(&request.catalog_name) - .context(VisitorCatalogNotFound { - catalog: &request.catalog_name, - })?; - - let schema_id = request.schema_id; - let schema = Arc::new(SchemaImpl::new( - &request.catalog_name, - &request.schema_name, - schema_id, - self.catalog_table.clone(), - )); - - // If schema exists, we overwrite it. - catalog.insert_schema_into_memory(schema); - - // Update last schema id. - if self.schema_id_generator.last_schema_id_u32() < schema_id.as_u32() { - self.schema_id_generator.set_last_schema_id(schema_id); - } - - Ok(()) - } - - fn visit_tables(&mut self, table_info: TableInfo) -> sys_catalog_table::Result<()> { - debug!("Visitor visit tables, table_info:{:?}", table_info); - - let catalog = - self.catalogs - .get_mut(&table_info.catalog_name) - .context(VisitorCatalogNotFound { - catalog: &table_info.catalog_name, - })?; - let schema = - catalog - .find_schema(&table_info.schema_name) - .context(VisitorSchemaNotFound { - catalog: &table_info.catalog_name, - schema: &table_info.schema_name, - })?; - - // Update max table sequence of the schema. - let table_id = table_info.table_id; - let table_seq = TableSeq::from(table_id); - if table_seq.as_u64() >= schema.table_seq_generator.last_table_seq_u64() { - schema.table_seq_generator.set_last_table_seq(table_seq); - } - - // Only the stable/altering table can be opened. - if !matches!(table_info.state, TableState::Stable) { - debug!( - "Visitor visit a unstable table, table_info:{:?}", - table_info - ); - return Ok(()); - } - - // Collect table infos for later opening. - self.table_infos.push(table_info); - - Ok(()) - } -} - -type SchemaMap = HashMap>; - -/// Table based catalog -struct CatalogImpl { - /// Catalog name - name: String, - /// Schemas of catalog - // Now the Schema trait does not support create schema, so we use impl type here - schemas: RwLock, - /// Global schema id generator, Each schema has a unique schema id. - schema_id_generator: Arc, - /// Sys catalog table - catalog_table: Arc, - /// Mutex - /// - /// Protects: - /// - create schema - /// - persist to default catalog - mutex: Mutex<()>, -} - -impl CatalogImpl { - /// Insert schema - fn insert_schema_into_memory(&self, schema: Arc) { - let mut schemas = self.schemas.write().unwrap(); - schemas.insert(schema.name().to_string(), schema); - } - - fn find_schema(&self, schema_name: &str) -> Option> { - let schemas = self.schemas.read().unwrap(); - schemas.get(schema_name).cloned() - } -} - -// TODO(yingwen): Support add schema (with options to control schema -// persistence) -#[async_trait] -impl Catalog for CatalogImpl { - fn name(&self) -> NameRef { - &self.name - } - - fn schema_by_name(&self, name: NameRef) -> catalog::Result> { - let schemas = self.schemas.read().unwrap(); - let schema = schemas.get(name).cloned().map(|v| v as _); - Ok(schema) - } - - async fn create_schema<'a>(&'a self, name: NameRef<'a>) -> catalog::Result<()> { - // Check schema existence - if self.schema_by_name(name)?.is_some() { - return Ok(()); - } - - // Lock schema and persist schema to default catalog - let _lock = self.mutex.lock().await; - // Check again - if self.schema_by_name(name)?.is_some() { - return Ok(()); - } - - // Allocate schema id. - let schema_id = self - .schema_id_generator - .alloc_schema_id() - .expect("Schema id of default catalog should be valid"); - - let request = CreateSchemaRequest { - catalog_name: self.name.to_string(), - schema_name: name.to_string(), - schema_id, - }; - - let schema_id = request.schema_id; - - self.catalog_table - .create_schema(request) - .await - .box_err() - .context(catalog::CreateSchemaWithCause { - catalog: &self.name, - schema: &name.to_string(), - })?; - - let schema = Arc::new(SchemaImpl::new( - &self.name, - name, - schema_id, - self.catalog_table.clone(), - )); - - self.insert_schema_into_memory(schema); - info!( - "create schema success, catalog:{}, schema:{}", - &self.name, name - ); - Ok(()) - } - - fn all_schemas(&self) -> catalog::Result> { - Ok(self - .schemas - .read() - .unwrap() - .iter() - .map(|(_, v)| v.clone() as _) - .collect()) - } -} - -/// Table based schema -struct SchemaImpl { - /// Catalog name - catalog_name: String, - /// Schema name - schema_name: String, - /// Schema id - schema_id: SchemaId, - /// Tables of schema - tables: RwLock, - /// Mutex - /// - /// Protects: - /// - add/drop/alter table - /// - persist to sys catalog table - mutex: Mutex<()>, - /// Sys catalog table - catalog_table: Arc, - table_seq_generator: TableSeqGenerator, -} - -impl SchemaImpl { - fn new( - catalog_name: &str, - schema_name: &str, - schema_id: SchemaId, - catalog_table: Arc, - ) -> Self { - Self { - catalog_name: catalog_name.to_string(), - schema_name: schema_name.to_string(), - schema_id, - tables: RwLock::new(SchemaTables::default()), - mutex: Mutex::new(()), - catalog_table, - table_seq_generator: TableSeqGenerator::default(), - } - } - - fn validate_schema_info(&self, catalog_name: &str, schema_name: &str) -> schema::Result<()> { - ensure!( - self.catalog_name == catalog_name, - CatalogMismatch { - expect: &self.catalog_name, - given: catalog_name, - } - ); - ensure!( - self.schema_name == schema_name, - SchemaMismatch { - expect: &self.schema_name, - given: schema_name, - } - ); - - Ok(()) - } - - /// Insert table into memory, wont check existence - fn insert_table_into_memory(&self, table_id: TableId, table: TableRef) { - let mut tables = self.tables.write().unwrap(); - tables.insert(table_id, table); - } - - /// Remove table in memory, wont check existence - fn remove_table_in_memory(&self, table_name: &str) { - let mut tables = self.tables.write().unwrap(); - tables.remove(table_name); - } - - /// Check table existence in read lock - /// - /// If table exists: - /// - if create_if_not_exists is true, return Ok - /// - if create_if_not_exists is false, return Error - fn check_create_table_read( - &self, - table_name: &str, - create_if_not_exists: bool, - ) -> schema::Result> { - let tables = self.tables.read().unwrap(); - if let Some(table) = tables.tables_by_name.get(table_name) { - // Already exists - if create_if_not_exists { - // Create if not exists is set - return Ok(Some(table.clone())); - } - // Create if not exists is not set, need to return error - return CreateExistTable { table: table_name }.fail(); - } - - Ok(None) - } - - fn find_table_by_name(&self, name: NameRef) -> Option { - self.tables - .read() - .unwrap() - .tables_by_name - .get(name) - .cloned() - } - - async fn alloc_table_id<'a>(&self, name: NameRef<'a>) -> schema::Result { - let table_seq = self - .table_seq_generator - .alloc_table_seq() - .context(TooManyTable { - schema: &self.schema_name, - table: name, - })?; - - TableId::with_seq(self.schema_id, table_seq) - .context(InvalidSchemaIdAndTableSeq { - schema_id: self.schema_id, - table_seq, - }) - .box_err() - .context(AllocateTableId { - schema: &self.schema_name, - table: name, - }) - } -} - -#[derive(Default)] -struct SchemaTables { - tables_by_name: HashMap, - tables_by_id: HashMap, -} - -impl SchemaTables { - fn insert(&mut self, table_id: TableId, table: TableRef) { - self.tables_by_name - .insert(table.name().to_string(), table.clone()); - self.tables_by_id.insert(table_id, table); - } - - fn remove(&mut self, name: NameRef) { - if let Some(table) = self.tables_by_name.remove(name) { - self.tables_by_id.remove(&table.id()); - } - } -} - -#[async_trait] -impl Schema for SchemaImpl { - fn name(&self) -> NameRef { - &self.schema_name - } - - fn id(&self) -> SchemaId { - self.schema_id - } - - fn table_by_name(&self, name: NameRef) -> schema::Result> { - let table = self - .tables - .read() - .unwrap() - .tables_by_name - .get(name) - .cloned(); - Ok(table) - } - - // TODO(yingwen): Do not persist if engine is memory engine. - async fn create_table( - &self, - request: CreateTableRequest, - opts: CreateOptions, - ) -> schema::Result { - info!( - "Table based catalog manager create table, request:{:?}", - request - ); - - self.validate_schema_info(&request.params.catalog_name, &request.params.schema_name)?; - - // TODO(yingwen): Validate table id is unique. - - // Check table existence - if let Some(table) = - self.check_create_table_read(&request.params.table_name, opts.create_if_not_exists)? - { - return Ok(table); - } - - // Lock schema and persist table to sys catalog table - let _lock = self.mutex.lock().await; - // Check again - if let Some(table) = - self.check_create_table_read(&request.params.table_name, opts.create_if_not_exists)? - { - return Ok(table); - } - - // Create table - let table_id = self.alloc_table_id(&request.params.table_name).await?; - let request = request.into_engine_create_request(Some(table_id), self.schema_id); - let table_name = request.params.table_name.clone(); - let table = opts - .table_engine - .create_table(request.clone()) - .await - .box_err() - .context(CreateTableWithCause)?; - assert_eq!(table_name, table.name()); - - self.catalog_table - .create_table(request.clone().into()) - .await - .box_err() - .context(WriteTableMeta { - table: &request.params.table_name, - })?; - - { - // Insert into memory - let mut tables = self.tables.write().unwrap(); - tables.insert(request.table_id, table.clone()); - } - - Ok(table) - } - - async fn drop_table( - &self, - mut request: DropTableRequest, - opts: DropOptions, - ) -> schema::Result { - info!( - "Table based catalog manager drop table, request:{:?}", - request - ); - - self.validate_schema_info(&request.catalog_name, &request.schema_name)?; - - if self.find_table_by_name(&request.table_name).is_none() { - return Ok(false); - }; - - let _lock = self.mutex.lock().await; - // double check whether the table to drop exists. - let table = match self.find_table_by_name(&request.table_name) { - Some(v) => v, - None => return Ok(false), - }; - - // Determine the real engine type of the table to drop. - // FIXME(xikai): the engine should not be part of the DropRequest. - request.engine = table.engine_type().to_string(); - let request = request.into_engine_drop_request(self.schema_id); - - // Prepare to drop table info in the sys_catalog. - self.catalog_table - .prepare_drop_table(request.clone()) - .await - .box_err() - .context(WriteTableMeta { - table: &request.table_name, - })?; - - let dropped = opts - .table_engine - .drop_table(request.clone()) - .await - .box_err() - .context(DropTableWithCause)?; - - info!( - "Table engine drop table successfully, request:{:?}, dropped:{}", - request, dropped - ); - - // Update the drop table record into the sys_catalog_table. - self.catalog_table - .drop_table(request.clone()) - .await - .box_err() - .context(WriteTableMeta { - table: &request.table_name, - })?; - - { - let mut tables = self.tables.write().unwrap(); - tables.remove(&request.table_name); - }; - - info!( - "Table based catalog manager drop table successfully, request:{:?}", - request - ); - - return Ok(true); - } - - fn all_tables(&self) -> schema::Result> { - Ok(self - .tables - .read() - .unwrap() - .tables_by_name - .values() - .cloned() - .collect()) - } - - fn register_table(&self, table: TableRef) { - self.insert_table_into_memory(table.id(), table); - } - - fn unregister_table(&self, table_name: &str) { - self.remove_table_in_memory(table_name); - } -} - -#[cfg(any(test, feature = "test"))] -mod tests { - use std::{collections::HashMap, sync::Arc}; - - use analytic_engine::tests::util::{EngineBuildContext, RocksDBEngineBuildContext, TestEnv}; - use catalog::{ - consts::DEFAULT_CATALOG, - manager::Manager, - schema::{CreateOptions, CreateTableRequest, DropOptions, DropTableRequest, SchemaRef}, - }; - use common_types::table::DEFAULT_SHARD_ID; - use table_engine::{ - engine::{CreateTableParams, TableEngineRef, TableState}, - memory::MemoryTableEngine, - proxy::TableEngineProxy, - ANALYTIC_ENGINE_TYPE, - }; - - use crate::table_based::TableBasedManager; - - async fn build_catalog_manager(analytic: TableEngineRef) -> TableBasedManager { - // Create catalog manager, use analytic table as backend - TableBasedManager::new(analytic.clone()) - .await - .expect("Failed to create catalog manager") - } - - async fn build_default_schema_with_catalog(catalog_manager: &TableBasedManager) -> SchemaRef { - let catalog_name = catalog_manager.default_catalog_name(); - let schema_name = catalog_manager.default_schema_name(); - let catalog = catalog_manager.catalog_by_name(catalog_name); - assert!(catalog.is_ok()); - assert!(catalog.as_ref().unwrap().is_some()); - catalog - .as_ref() - .unwrap() - .as_ref() - .unwrap() - .schema_by_name(schema_name) - .unwrap() - .unwrap() - } - - async fn build_create_table_req(table_name: &str, schema: SchemaRef) -> CreateTableRequest { - let params = CreateTableParams { - catalog_name: DEFAULT_CATALOG.to_string(), - schema_name: schema.name().to_string(), - table_name: table_name.to_string(), - table_schema: common_types::tests::build_schema(), - engine: ANALYTIC_ENGINE_TYPE.to_string(), - table_options: HashMap::new(), - partition_info: None, - }; - - CreateTableRequest { - params, - table_id: None, - state: TableState::Stable, - shard_id: DEFAULT_SHARD_ID, - } - } - - #[tokio::test] - async fn test_catalog_by_name_schema_by_name_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_catalog_by_name_schema_by_name(rocksdb_ctx).await; - } - - async fn test_catalog_by_name_schema_by_name(engine_context: T) - where - T: EngineBuildContext, - { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.open().await; - - let catalog_manager = build_catalog_manager(test_ctx.clone_engine()).await; - let catalog_name = catalog_manager.default_catalog_name(); - let schema_name = catalog_manager.default_schema_name(); - let catalog = catalog_manager.catalog_by_name(catalog_name); - assert!(catalog.is_ok()); - assert!(catalog.as_ref().unwrap().is_some()); - - let schema = catalog - .as_ref() - .unwrap() - .as_ref() - .unwrap() - .schema_by_name(schema_name); - assert!(schema.is_ok()); - assert!(schema.as_ref().unwrap().is_some()); - - let schema_name2 = "test"; - let schema = catalog - .as_ref() - .unwrap() - .as_ref() - .unwrap() - .schema_by_name(schema_name2); - assert!(schema.is_ok()); - assert!(schema.as_ref().unwrap().is_none()); - - let catalog_name2 = "test"; - let catalog = catalog_manager.catalog_by_name(catalog_name2); - assert!(catalog.is_ok()); - assert!(catalog.as_ref().unwrap().is_none()); - } - - #[tokio::test] - async fn test_maybe_create_schema_by_name_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_maybe_create_schema_by_name(rocksdb_ctx).await; - } - - async fn test_maybe_create_schema_by_name(engine_context: T) - where - T: EngineBuildContext, - { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.open().await; - - let catalog_manager = build_catalog_manager(test_ctx.clone_engine()).await; - let catalog_name = catalog_manager.default_catalog_name(); - let catalog = catalog_manager.catalog_by_name(catalog_name); - assert!(catalog.is_ok()); - assert!(catalog.as_ref().unwrap().is_some()); - - let schema_name = "test"; - let catalog_ref = catalog.as_ref().unwrap().as_ref().unwrap(); - let mut schema = catalog_ref.schema_by_name(schema_name); - assert!(schema.is_ok()); - assert!(schema.as_ref().unwrap().is_none()); - - catalog_ref.create_schema(schema_name).await.unwrap(); - schema = catalog_ref.schema_by_name(schema_name); - assert!(schema.is_ok()); - assert!(schema.as_ref().unwrap().is_some()); - } - - #[tokio::test] - async fn test_create_table_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_create_table(rocksdb_ctx).await; - } - - async fn test_create_table(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.open().await; - - let engine = test_ctx.engine().clone(); - let memory = MemoryTableEngine; - let engine_proxy = Arc::new(TableEngineProxy { - memory, - analytic: engine.clone(), - }); - - let catalog_manager = build_catalog_manager(engine.clone()).await; - let schema = build_default_schema_with_catalog(&catalog_manager).await; - - let table_name = "test"; - let request = build_create_table_req(table_name, schema.clone()).await; - - let opts = CreateOptions { - table_engine: engine_proxy.clone(), - create_if_not_exists: true, - }; - - schema - .create_table(request.clone(), opts.clone()) - .await - .unwrap(); - assert!(schema.table_by_name(table_name).unwrap().is_some()); - - // create again - schema.create_table(request.clone(), opts).await.unwrap(); - assert!(schema.table_by_name(table_name).unwrap().is_some()); - - let opts2 = CreateOptions { - table_engine: engine_proxy, - create_if_not_exists: false, - }; - assert!(schema.create_table(request.clone(), opts2).await.is_err()); - } - - #[tokio::test] - async fn test_drop_table_rocks() { - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_drop_table(rocksdb_ctx).await; - } - - async fn test_drop_table(engine_context: T) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.open().await; - - let engine = test_ctx.engine().clone(); - let memory = MemoryTableEngine; - let engine_proxy = Arc::new(TableEngineProxy { - memory, - analytic: engine.clone(), - }); - - let catalog_manager = build_catalog_manager(engine.clone()).await; - let schema = build_default_schema_with_catalog(&catalog_manager).await; - - let table_name = "test"; - let engine_name = "test_engine"; - let drop_table_request = DropTableRequest { - catalog_name: DEFAULT_CATALOG.to_string(), - schema_name: schema.name().to_string(), - table_name: table_name.to_string(), - engine: engine_name.to_string(), - }; - let drop_table_opts = DropOptions { - table_engine: engine_proxy.clone(), - }; - - assert!(!schema - .drop_table(drop_table_request.clone(), drop_table_opts.clone()) - .await - .unwrap()); - - let create_table_request = build_create_table_req(table_name, schema.clone()).await; - let create_table_opts = CreateOptions { - table_engine: engine_proxy, - create_if_not_exists: true, - }; - - // create table - { - schema - .create_table(create_table_request.clone(), create_table_opts.clone()) - .await - .unwrap(); - // check table exists - assert!(schema.table_by_name(table_name).unwrap().is_some()); - } - - // drop table - { - assert!(schema - .drop_table(drop_table_request.clone(), drop_table_opts.clone()) - .await - .unwrap()); - // check table not exists - assert!(schema.table_by_name(table_name).unwrap().is_none()); - } - - // create table again - { - schema - .create_table(create_table_request.clone(), create_table_opts.clone()) - .await - .unwrap(); - // check table exists - assert!(schema.table_by_name(table_name).unwrap().is_some()); - } - - // drop table again - { - assert!(schema - .drop_table(drop_table_request.clone(), drop_table_opts.clone()) - .await - .unwrap()); - // check table not exists - assert!(schema.table_by_name(table_name).unwrap().is_none()); - } - - // create two tables - { - let table_name2 = "test2"; - let create_table_request2 = build_create_table_req(table_name2, schema.clone()).await; - schema - .create_table(create_table_request2.clone(), create_table_opts.clone()) - .await - .unwrap(); - // check table exists - assert!(schema.table_by_name(table_name2).unwrap().is_some()); - - schema - .create_table(create_table_request, create_table_opts) - .await - .unwrap(); - // check table exists - assert!(schema.table_by_name(table_name).unwrap().is_some()); - } - - // drop table again - { - assert!(schema - .drop_table(drop_table_request, drop_table_opts) - .await - .unwrap()); - // check table not exists - assert!(schema.table_by_name(table_name).unwrap().is_none()); - } - } -} diff --git a/src/catalog_impls/src/volatile.rs b/src/catalog_impls/src/volatile.rs deleted file mode 100644 index 3b73e06e4f..0000000000 --- a/src/catalog_impls/src/volatile.rs +++ /dev/null @@ -1,430 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! A volatile catalog implementation used for storing information about table -//! and schema in memory. - -use std::{ - collections::HashMap, - string::ToString, - sync::{Arc, RwLock}, -}; - -use async_trait::async_trait; -use catalog::{ - self, consts, - manager::{self, Manager}, - schema::{ - self, CatalogMismatch, CreateOptions, CreateTableRequest, CreateTableWithCause, - DifferentTableId, DropOptions, DropTableRequest, DropTableWithCause, NameRef, Schema, - SchemaMismatch, SchemaRef, - }, - Catalog, CatalogRef, CreateSchemaWithCause, -}; -use cluster::{shard_set::ShardSet, ClusterRef}; -use common_types::schema::SchemaName; -use generic_error::BoxError; -use logger::{debug, info}; -use meta_client::{types::AllocSchemaIdRequest, MetaClientRef}; -use snafu::{ensure, OptionExt, ResultExt}; -use table_engine::table::{SchemaId, TableRef}; -use tokio::sync::Mutex; - -use crate::cluster_based::SchemaWithCluster; - -/// ManagerImpl manages multiple volatile catalogs. -pub struct ManagerImpl { - catalogs: HashMap>, - shard_set: ShardSet, - meta_client: MetaClientRef, - cluster: ClusterRef, -} - -impl ManagerImpl { - pub fn new(shard_set: ShardSet, meta_client: MetaClientRef, cluster: ClusterRef) -> Self { - let mut manager = ManagerImpl { - catalogs: HashMap::new(), - shard_set, - meta_client, - cluster, - }; - - manager.maybe_create_default_catalog(); - - manager - } -} - -impl Manager for ManagerImpl { - fn default_catalog_name(&self) -> NameRef { - &consts::DEFAULT_CATALOG - } - - fn default_schema_name(&self) -> NameRef { - consts::DEFAULT_SCHEMA - } - - fn catalog_by_name(&self, name: NameRef) -> manager::Result> { - let catalog = self.catalogs.get(name).map(|v| v.clone() as CatalogRef); - Ok(catalog) - } - - fn all_catalogs(&self) -> manager::Result> { - Ok(self - .catalogs - .values() - .map(|v| v.clone() as CatalogRef) - .collect()) - } -} - -impl ManagerImpl { - fn maybe_create_default_catalog(&mut self) { - // TODO: we should delegate this operation to the [TableManager]. - // Try to get default catalog, create it if not exists. - if self - .catalogs - .get(consts::DEFAULT_CATALOG.as_str()) - .is_none() - { - // Default catalog is not exists, create and store it. - self.create_catalog(consts::DEFAULT_CATALOG.to_string()); - }; - } - - fn create_catalog(&mut self, catalog_name: String) -> Arc { - let catalog = Arc::new(CatalogImpl { - name: catalog_name.clone(), - schemas: RwLock::new(HashMap::new()), - shard_set: self.shard_set.clone(), - meta_client: self.meta_client.clone(), - cluster: self.cluster.clone(), - }); - - self.catalogs.insert(catalog_name, catalog.clone()); - - catalog - } -} - -/// A volatile implementation for [`Catalog`]. -/// -/// The schema and table id are allocated (and maybe stored) by other components -/// so there is no recovering work for all the schemas and tables during -/// initialization. -struct CatalogImpl { - /// Catalog name - name: String, - /// All the schemas belonging to the catalog. - schemas: RwLock>, - shard_set: ShardSet, - meta_client: MetaClientRef, - cluster: ClusterRef, -} - -#[async_trait] -impl Catalog for CatalogImpl { - fn name(&self) -> NameRef { - &self.name - } - - fn schema_by_name(&self, name: NameRef) -> catalog::Result> { - let schema = self.schemas.read().unwrap().get(name).cloned(); - Ok(schema) - } - - async fn create_schema<'a>(&'a self, name: NameRef<'a>) -> catalog::Result<()> { - { - let schemas = self.schemas.read().unwrap(); - - if schemas.get(name).is_some() { - return Ok(()); - } - } - - let schema_id = { - let req = AllocSchemaIdRequest { - name: name.to_string(), - }; - let resp = self - .meta_client - .alloc_schema_id(req) - .await - .box_err() - .with_context(|| CreateSchemaWithCause { - catalog: &self.name, - schema: name.to_string(), - })?; - resp.id - }; - - let mut schemas = self.schemas.write().unwrap(); - if schemas.get(name).is_some() { - return Ok(()); - } - - let schema: SchemaRef = Arc::new(SchemaImpl::new( - self.name.to_string(), - name.to_string(), - SchemaId::from_u32(schema_id), - self.shard_set.clone(), - )); - - let cluster_based: SchemaRef = - Arc::new(SchemaWithCluster::new(schema, self.cluster.clone())); - - schemas.insert(name.to_string(), cluster_based); - - info!( - "create schema success, catalog:{}, schema:{}", - &self.name, name - ); - Ok(()) - } - - fn all_schemas(&self) -> catalog::Result> { - Ok(self - .schemas - .read() - .unwrap() - .iter() - .map(|(_, v)| v.clone()) - .collect()) - } -} - -/// A volatile implementation for [`Schema`]. -/// -/// The implementation is actually a delegation for [`cluster::TableManager`]. -struct SchemaImpl { - /// Catalog name - catalog_name: String, - /// Schema name - schema_name: String, - schema_id: SchemaId, - shard_set: ShardSet, - /// Tables of schema - tables: RwLock>, - /// Guard for creating/dropping table - create_table_mutex: Mutex<()>, -} - -impl SchemaImpl { - fn new( - catalog_name: String, - schema_name: String, - schema_id: SchemaId, - shard_set: ShardSet, - ) -> Self { - Self { - catalog_name, - schema_name, - schema_id, - shard_set, - tables: Default::default(), - create_table_mutex: Mutex::new(()), - } - } - - fn get_table( - &self, - catalog_name: &str, - schema_name: &str, - table_name: &str, - ) -> schema::Result> { - ensure!( - self.catalog_name == catalog_name, - CatalogMismatch { - expect: &self.catalog_name, - given: catalog_name, - } - ); - - ensure!( - self.schema_name == schema_name, - SchemaMismatch { - expect: &self.schema_name, - given: schema_name, - } - ); - - let tables = self.tables.read().unwrap(); - debug!( - "Volatile schema impl gets table, table_name:{:?}, all_tables:{:?}", - table_name, self.tables - ); - Ok(tables.get(table_name).cloned()) - } - - fn add_table(&self, table: TableRef) -> Option { - let mut tables = self.tables.write().unwrap(); - tables.insert(table.name().to_string(), table) - } - - fn add_new_table(&self, table: TableRef) { - let old = self.add_table(table); - - assert!(old.is_none()); - } - - fn remove_table(&self, table_name: &str) -> Option { - let mut tables = self.tables.write().unwrap(); - tables.remove(table_name) - } -} - -#[async_trait] -impl Schema for SchemaImpl { - fn name(&self) -> NameRef { - &self.schema_name - } - - fn id(&self) -> SchemaId { - self.schema_id - } - - fn table_by_name(&self, name: NameRef) -> schema::Result> { - let table = self - .get_table(self.catalog_name.as_str(), self.schema_name.as_str(), name) - .unwrap() - .clone(); - Ok(table) - } - - // In memory schema does not support persisting table info - async fn create_table( - &self, - request: CreateTableRequest, - opts: CreateOptions, - ) -> schema::Result { - // FIXME: Error should be returned if create_if_not_exist is false. - if let Some(table) = self.get_table( - &request.params.catalog_name, - &request.params.schema_name, - &request.params.table_name, - )? { - if let Some(given_table_id) = request.table_id { - let expected_table_id = table.id(); - ensure!( - expected_table_id == given_table_id, - DifferentTableId { - table_name: table.name().to_string(), - expected_table_id, - given_table_id, - } - ); - } - return Ok(table); - } - - // Prepare to create table. - let _create_table_guard = self.create_table_mutex.lock().await; - - if let Some(table) = self.get_table( - &request.params.catalog_name, - &request.params.schema_name, - &request.params.table_name, - )? { - return Ok(table); - } - - // Do real create table. - // Partition table is not stored in ShardTableManager. - if request.params.partition_info.is_none() { - let _ = self - .shard_set - .get(request.shard_id) - .with_context(|| schema::CreateTable { - request: request.clone(), - msg: "shard not found".to_string(), - })?; - } - let request = request.into_engine_create_request(None, self.schema_id); - - // Table engine is able to handle duplicate table creation. - let table = opts - .table_engine - .create_table(request) - .await - .box_err() - .context(CreateTableWithCause)?; - - self.add_new_table(table.clone()); - - Ok(table) - } - - async fn drop_table( - &self, - request: DropTableRequest, - opts: DropOptions, - ) -> schema::Result { - if self - .get_table( - &request.catalog_name, - &request.schema_name, - &request.table_name, - )? - .is_none() - { - return Ok(false); - }; - - // Prepare to drop table - let _drop_table_guard = self.create_table_mutex.lock().await; - - let table = match self.get_table( - &request.catalog_name, - &request.schema_name, - &request.table_name, - )? { - Some(v) => v, - None => return Ok(false), - }; - - // Drop the table in the engine first. - let request = request.into_engine_drop_request(self.schema_id); - let real_dropped = opts - .table_engine - .drop_table(request) - .await - .box_err() - .context(DropTableWithCause)?; - - // Remove the table from the memory. - self.remove_table(table.name()); - Ok(real_dropped) - } - - fn all_tables(&self) -> schema::Result> { - Ok(self - .tables - .read() - .unwrap() - .iter() - .map(|(_, v)| v.clone()) - .collect()) - } - - fn register_table(&self, table: TableRef) { - self.add_table(table); - } - - fn unregister_table(&self, table_name: &str) { - let _ = self.remove_table(table_name); - } -} diff --git a/src/cluster/Cargo.toml b/src/cluster/Cargo.toml deleted file mode 100644 index e48fd847c1..0000000000 --- a/src/cluster/Cargo.toml +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "cluster" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -async-trait = { workspace = true } -bytes_ext = { workspace = true } -catalog = { workspace = true } -common_types = { workspace = true } -etcd-client = { workspace = true } -future_ext = { workspace = true } -generic_error = { workspace = true } -horaedbproto = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -meta_client = { workspace = true } -prost = { workspace = true } -runtime = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -time_ext = { workspace = true } -tokio = { workspace = true } -wal = { workspace = true } diff --git a/src/cluster/src/cluster_impl.rs b/src/cluster/src/cluster_impl.rs deleted file mode 100644 index d79eda0485..0000000000 --- a/src/cluster/src/cluster_impl.rs +++ /dev/null @@ -1,470 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - sync::{Arc, Mutex, RwLock}, - time::Duration, -}; - -use async_trait::async_trait; -use common_types::table::ShardId; -use etcd_client::{Certificate, ConnectOptions, Identity, TlsOptions}; -use generic_error::BoxError; -use logger::{error, info, warn}; -use meta_client::{ - types::{ - GetNodesRequest, GetTablesOfShardsRequest, RouteTablesRequest, RouteTablesResponse, - ShardInfo, - }, - MetaClientRef, -}; -use runtime::{JoinHandle, Runtime}; -use snafu::{ensure, OptionExt, ResultExt}; -use tokio::{ - fs, io, - sync::mpsc::{self, Sender}, - time, -}; - -use crate::{ - config::{ClusterConfig, EtcdClientConfig}, - shard_lock_manager::{self, ShardLockManager, ShardLockManagerRef}, - shard_set::{Shard, ShardRef, ShardSet}, - topology::ClusterTopology, - Cluster, ClusterNodesNotFound, ClusterNodesResp, EtcdClientFailureWithCause, - InitEtcdClientConfig, InvalidArguments, MetaClientFailure, NodeType, OpenShard, - OpenShardWithCause, Result, ShardNotFound, TableStatus, -}; - -/// ClusterImpl is an implementation of [`Cluster`] based [`MetaClient`]. -/// -/// Its functions are to: -/// - Handle the some action from the HoraeMeta; -/// - Handle the heartbeat between horaedb-server and HoraeMeta; -/// - Provide the cluster topology. -pub struct ClusterImpl { - inner: Arc, - runtime: Arc, - config: ClusterConfig, - heartbeat_handle: Mutex>>, - stop_heartbeat_tx: Mutex>>, - shard_lock_manager: ShardLockManagerRef, -} - -impl ClusterImpl { - pub async fn try_new( - node_name: String, - shard_set: ShardSet, - meta_client: MetaClientRef, - config: ClusterConfig, - runtime: Arc, - ) -> Result { - if let Err(e) = config.etcd_client.validate() { - return InvalidArguments { msg: e }.fail(); - } - - let connect_options = build_etcd_connect_options(&config.etcd_client) - .await - .context(InitEtcdClientConfig)?; - let etcd_client = - etcd_client::Client::connect(&config.etcd_client.server_addrs, Some(connect_options)) - .await - .context(EtcdClientFailureWithCause { - msg: "failed to connect to etcd", - })?; - - let shard_lock_key_prefix = Self::shard_lock_key_prefix( - &config.etcd_client.root_path, - &config.meta_client.cluster_name, - )?; - let shard_lock_mgr_config = shard_lock_manager::Config { - node_name, - lock_key_prefix: shard_lock_key_prefix, - lock_lease_ttl_sec: config.etcd_client.shard_lock_lease_ttl_sec, - lock_lease_check_interval: config.etcd_client.shard_lock_lease_check_interval.0, - enable_fast_reacquire_lock: config.etcd_client.enable_shard_lock_fast_reacquire, - rpc_timeout: config.etcd_client.rpc_timeout(), - runtime: runtime.clone(), - }; - let shard_lock_manager = ShardLockManager::new(shard_lock_mgr_config, etcd_client); - - let inner = Arc::new(Inner::new(shard_set, meta_client)?); - Ok(Self { - inner, - runtime, - config, - heartbeat_handle: Mutex::new(None), - stop_heartbeat_tx: Mutex::new(None), - shard_lock_manager: Arc::new(shard_lock_manager), - }) - } - - fn start_heartbeat_loop(&self) { - let interval = self.heartbeat_interval(); - let error_wait_lease = self.error_wait_lease(); - let inner = self.inner.clone(); - let (tx, mut rx) = mpsc::channel(1); - - let handle = self.runtime.spawn(async move { - loop { - let shard_infos = inner - .shard_set - .all_shards() - .iter() - .map(|shard| shard.shard_info()) - .collect(); - info!("Node heartbeat to meta, shard infos:{:?}", shard_infos); - - let resp = inner.meta_client.send_heartbeat(shard_infos).await; - let wait = match resp { - Ok(()) => interval, - Err(e) => { - error!("Send heartbeat to meta failed, err:{}", e); - error_wait_lease - } - }; - - if time::timeout(wait, rx.recv()).await.is_ok() { - warn!("Receive exit command and exit heartbeat loop"); - break; - } - } - }); - - *self.stop_heartbeat_tx.lock().unwrap() = Some(tx); - *self.heartbeat_handle.lock().unwrap() = Some(handle); - } - - // Register node every 2/3 lease - fn heartbeat_interval(&self) -> Duration { - Duration::from_millis(self.config.meta_client.lease.as_millis() * 2 / 3) - } - - fn error_wait_lease(&self) -> Duration { - self.config.meta_client.lease.0 / 2 - } - - fn shard_lock_key_prefix(root_path: &str, cluster_name: &str) -> Result { - ensure!( - root_path.starts_with('/'), - InvalidArguments { - msg: "root_path is required to start with /", - } - ); - - ensure!( - !cluster_name.is_empty(), - InvalidArguments { - msg: "cluster_name is required non-empty", - } - ); - - const SHARD_LOCK_KEY: &str = "shards"; - Ok(format!("{root_path}/{cluster_name}/{SHARD_LOCK_KEY}")) - } -} - -struct Inner { - shard_set: ShardSet, - meta_client: MetaClientRef, - topology: RwLock, -} - -impl Inner { - fn new(shard_set: ShardSet, meta_client: MetaClientRef) -> Result { - Ok(Self { - shard_set, - meta_client, - topology: Default::default(), - }) - } - - async fn route_tables(&self, req: &RouteTablesRequest) -> Result { - // TODO: we should use self.topology to cache the route result to reduce the - // pressure on the HoraeMeta. - let route_resp = self - .meta_client - .route_tables(req.clone()) - .await - .context(MetaClientFailure)?; - - Ok(route_resp) - } - - async fn fetch_nodes(&self) -> Result { - { - let topology = self.topology.read().unwrap(); - let cached_node_topology = topology.nodes(); - if let Some(cached_node_topology) = cached_node_topology { - return Ok(ClusterNodesResp { - cluster_topology_version: cached_node_topology.version, - cluster_nodes: cached_node_topology.nodes, - }); - } - } - - let req = GetNodesRequest::default(); - let resp = self - .meta_client - .get_nodes(req) - .await - .context(MetaClientFailure)?; - - let version = resp.cluster_topology_version; - let nodes = Arc::new(resp.node_shards); - let updated = self - .topology - .write() - .unwrap() - .maybe_update_nodes(nodes.clone(), version); - - let resp = if updated { - ClusterNodesResp { - cluster_topology_version: version, - cluster_nodes: nodes, - } - } else { - let topology = self.topology.read().unwrap(); - // The fetched topology is outdated, and we will use the cache. - let cached_node_topology = - topology.nodes().context(ClusterNodesNotFound { version })?; - ClusterNodesResp { - cluster_topology_version: cached_node_topology.version, - cluster_nodes: cached_node_topology.nodes, - } - }; - - Ok(resp) - } - - async fn open_shard(&self, shard_info: &ShardInfo) -> Result { - if let Some(shard) = self.shard_set.get(shard_info.id) { - let cur_shard_info = shard.shard_info(); - if cur_shard_info.version == shard_info.version { - info!( - "No need to open the exactly same shard again, shard_info:{:?}", - shard_info - ); - return Ok(shard); - } - ensure!( - cur_shard_info.version < shard_info.version, - OpenShard { - shard_id: shard_info.id, - msg: format!("open a shard with a smaller version, curr_shard_info:{cur_shard_info:?}, new_shard_info:{shard_info:?}"), - } - ); - } - - let req = GetTablesOfShardsRequest { - shard_ids: vec![shard_info.id], - }; - - let mut resp = self - .meta_client - .get_tables_of_shards(req) - .await - .box_err() - .with_context(|| OpenShardWithCause { - msg: format!("shard_info:{shard_info:?}"), - })?; - - ensure!( - resp.tables_by_shard.len() == 1, - OpenShard { - shard_id: shard_info.id, - msg: "expect only one shard tables" - } - ); - - let tables_of_shard = resp - .tables_by_shard - .remove(&shard_info.id) - .context(OpenShard { - shard_id: shard_info.id, - msg: "shard tables are missing from the response", - })?; - - let shard_id = tables_of_shard.shard_info.id; - let shard = Arc::new(Shard::new(tables_of_shard)); - - info!("Insert shard to shard_set, id:{shard_id}, shard:{shard:?}"); - if let Some(old_shard) = self.shard_set.insert(shard_id, shard.clone()) { - info!("Remove old shard, id:{shard_id}, old:{old_shard:?}"); - } - - Ok(shard) - } - - fn shard(&self, shard_id: ShardId) -> Option { - self.shard_set.get(shard_id) - } - - /// Get shard by table name. - /// - /// This method is similar to `route_tables`, but it will not send request - /// to meta server, it only load data from local cache. - /// If target table is not found in any shards in this cluster, return None. - /// Otherwise, return the shard where this table is exists. - fn get_shard_by_table_name(&self, schema_name: &str, table_name: &str) -> Option { - let shards = self.shard_set.all_shards(); - shards - .into_iter() - .find(|shard| shard.find_table(schema_name, table_name).is_some()) - } - - fn close_shard(&self, shard_id: ShardId) -> Result { - info!("Remove shard from shard_set, id:{shard_id}"); - self.shard_set - .remove(shard_id) - .with_context(|| ShardNotFound { - msg: format!("close non-existent shard, shard_id:{shard_id}"), - }) - } - - fn list_shards(&self) -> Vec { - let shards = self.shard_set.all_shards(); - - shards.iter().map(|shard| shard.shard_info()).collect() - } -} - -#[async_trait] -impl Cluster for ClusterImpl { - async fn start(&self) -> Result<()> { - info!("Cluster is starting with config:{:?}", self.config); - - // start the background loop for sending heartbeat. - self.start_heartbeat_loop(); - - info!("Cluster has started"); - Ok(()) - } - - async fn stop(&self) -> Result<()> { - info!("Cluster is stopping"); - - { - let tx = self.stop_heartbeat_tx.lock().unwrap().take(); - if let Some(tx) = tx { - let _ = tx.send(()).await; - } - } - - { - let handle = self.heartbeat_handle.lock().unwrap().take(); - if let Some(handle) = handle { - let _ = handle.await; - } - } - - info!("Cluster has stopped"); - Ok(()) - } - - fn node_type(&self) -> NodeType { - self.config.node_type.clone() - } - - async fn open_shard(&self, shard_info: &ShardInfo) -> Result { - self.inner.open_shard(shard_info).await - } - - fn shard(&self, shard_id: ShardId) -> Option { - self.inner.shard(shard_id) - } - - fn get_table_status(&self, schema_name: &str, table_name: &str) -> Option { - self.inner - .get_shard_by_table_name(schema_name, table_name) - .map(|shard| TableStatus::from(shard.get_status())) - } - - async fn close_shard(&self, shard_id: ShardId) -> Result { - self.inner.close_shard(shard_id) - } - - fn list_shards(&self) -> Vec { - self.inner.list_shards() - } - - async fn route_tables(&self, req: &RouteTablesRequest) -> Result { - self.inner.route_tables(req).await - } - - async fn fetch_nodes(&self) -> Result { - self.inner.fetch_nodes().await - } - - fn shard_lock_manager(&self) -> ShardLockManagerRef { - self.shard_lock_manager.clone() - } -} - -/// Build the connect options for accessing etcd cluster. -async fn build_etcd_connect_options(config: &EtcdClientConfig) -> io::Result { - let connect_options = ConnectOptions::default() - .with_connect_timeout(config.connect_timeout.0) - .with_timeout(config.rpc_timeout()); - - let tls = &config.tls; - if tls.enable { - let server_ca_cert = fs::read(&tls.ca_cert_path).await?; - let client_cert = fs::read(&tls.client_cert_path).await?; - let client_key = fs::read(&tls.client_key_path).await?; - - let ca_cert = Certificate::from_pem(server_ca_cert); - let client_ident = Identity::from_pem(client_cert, client_key); - let mut tls_options = TlsOptions::new() - .ca_certificate(ca_cert) - .identity(client_ident); - - if let Some(domain) = &tls.domain { - tls_options = tls_options.domain_name(domain); - } - - Ok(connect_options.with_tls(tls_options)) - } else { - Ok(connect_options) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_format_shard_lock_key_prefix() { - let cases = vec![ - ( - ("/horaedb", "defaultCluster"), - Some("/horaedb/defaultCluster/shards"), - ), - (("", "defaultCluster"), None), - (("vvv", "defaultCluster"), None), - (("/x", ""), None), - ]; - - for ((root_path, cluster_name), expected) in cases { - let actual = ClusterImpl::shard_lock_key_prefix(root_path, cluster_name); - match expected { - Some(expected) => assert_eq!(actual.unwrap(), expected), - None => assert!(actual.is_err()), - } - } - } -} diff --git a/src/cluster/src/config.rs b/src/cluster/src/config.rs deleted file mode 100644 index d0b1c694b9..0000000000 --- a/src/cluster/src/config.rs +++ /dev/null @@ -1,141 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::time::Duration; - -use common_types::schema::TIMESTAMP_COLUMN; -use meta_client::meta_impl::MetaClientConfig; -use serde::{Deserialize, Serialize}; -use table_engine::ANALYTIC_ENGINE_TYPE; -use time_ext::ReadableDuration; - -use crate::NodeType; - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -// TODO: move this to table_engine crates -pub struct SchemaConfig { - pub default_engine_type: String, - pub default_timestamp_column_name: String, -} - -impl Default for SchemaConfig { - fn default() -> Self { - Self { - default_engine_type: ANALYTIC_ENGINE_TYPE.to_string(), - default_timestamp_column_name: TIMESTAMP_COLUMN.to_string(), - } - } -} - -const DEFAULT_ETCD_ROOT_PATH: &str = "/horaedb"; -const MIN_SHARD_LOCK_LEASE_TTL_SEC: u64 = 15; - -#[derive(Clone, Deserialize, Debug, Serialize)] -#[serde(default)] -pub struct TlsConfig { - pub enable: bool, - pub domain: Option, - pub ca_cert_path: String, - pub client_key_path: String, - pub client_cert_path: String, -} - -#[derive(Clone, Deserialize, Debug, Serialize)] -#[serde(default)] -pub struct EtcdClientConfig { - /// The etcd server addresses - pub server_addrs: Vec, - /// Root path in the etcd used by the horaedb server - pub root_path: String, - - /// Timeout to connect to etcd cluster - pub connect_timeout: ReadableDuration, - - /// Tls config to access etcd cluster. - pub tls: TlsConfig, - - /// The lease of the shard lock in seconds. - /// - /// It should be greater than `shard_lock_lease_check_interval`. - /// NOTE: the rpc timeout to the etcd cluster is determined by it. - pub shard_lock_lease_ttl_sec: u64, - /// The interval of checking whether the shard lock lease is expired - pub shard_lock_lease_check_interval: ReadableDuration, - /// The shard lock can be reacquired in a fast way if set. - pub enable_shard_lock_fast_reacquire: bool, -} - -impl EtcdClientConfig { - pub fn validate(&self) -> Result<(), String> { - if self.shard_lock_lease_ttl_sec < MIN_SHARD_LOCK_LEASE_TTL_SEC { - return Err(format!( - "shard_lock_lease_ttl_sec should be greater than {MIN_SHARD_LOCK_LEASE_TTL_SEC}" - )); - } - - if self.shard_lock_lease_check_interval.0 - >= Duration::from_secs(self.shard_lock_lease_ttl_sec) - { - return Err(format!( - "shard_lock_lease_check_interval({}) should be less than shard_lock_lease_ttl_sec({}s)", - self.shard_lock_lease_check_interval, self.shard_lock_lease_ttl_sec, - )); - } - - Ok(()) - } - - pub fn rpc_timeout(&self) -> Duration { - Duration::from_secs(self.shard_lock_lease_ttl_sec) / 6 - } -} - -impl Default for EtcdClientConfig { - fn default() -> Self { - Self { - server_addrs: vec!["127.0.0.1:2379".to_string()], - root_path: DEFAULT_ETCD_ROOT_PATH.to_string(), - tls: TlsConfig::default(), - connect_timeout: ReadableDuration::secs(5), - shard_lock_lease_ttl_sec: 30, - shard_lock_lease_check_interval: ReadableDuration::millis(200), - enable_shard_lock_fast_reacquire: false, - } - } -} - -impl Default for TlsConfig { - fn default() -> Self { - Self { - enable: false, - domain: None, - ca_cert_path: "".to_string(), - client_key_path: "".to_string(), - client_cert_path: "".to_string(), - } - } -} - -#[derive(Default, Clone, Deserialize, Debug, Serialize)] -#[serde(default)] -pub struct ClusterConfig { - pub cmd_channel_buffer_size: usize, - pub node_type: NodeType, - pub meta_client: MetaClientConfig, - pub etcd_client: EtcdClientConfig, -} diff --git a/src/cluster/src/lib.rs b/src/cluster/src/lib.rs deleted file mode 100644 index ddda6c4689..0000000000 --- a/src/cluster/src/lib.rs +++ /dev/null @@ -1,223 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Cluster sub-crate includes several functionalities for supporting HoraeDB -//! server to running in the distribute mode. Including: -//! - Request HoraeMeta for reading topology or configuration. -//! - Accept HoraeMeta's commands like open/close shard or create/drop table -//! etc. -//! -//! The core types are [Cluster] trait and its implementation [ClusterImpl]. - -#![feature(trait_alias)] - -use std::sync::Arc; - -use async_trait::async_trait; -use common_types::{cluster::NodeType, schema::SchemaName}; -use generic_error::GenericError; -use macros::define_result; -use meta_client::types::{ - ClusterNodesRef, RouteTablesRequest, RouteTablesResponse, ShardId, ShardInfo, ShardStatus, - ShardVersion, -}; -use shard_lock_manager::ShardLockManagerRef; -use snafu::{Backtrace, Snafu}; - -use crate::shard_set::ShardRef; - -pub mod cluster_impl; -pub mod config; -pub mod shard_lock_manager; -pub mod shard_operation; -pub mod shard_operator; -pub mod shard_set; -#[allow(dead_code)] -pub mod topology; - -#[derive(Debug, Snafu)] -#[snafu(visibility = "pub")] -pub enum Error { - #[snafu(display("Invalid arguments, msg:{msg}.\nBacktrace:{backtrace}"))] - InvalidArguments { msg: String, backtrace: Backtrace }, - - #[snafu(display("Internal error, msg:{msg}, err:{source}"))] - Internal { msg: String, source: GenericError }, - - #[snafu(display("Build meta client failed, err:{source}."))] - BuildMetaClient { source: meta_client::Error }, - - #[snafu(display("Meta client start failed, err:{source}."))] - StartMetaClient { source: meta_client::Error }, - - #[snafu(display("Meta client execute failed, err:{source}."))] - MetaClientFailure { source: meta_client::Error }, - - #[snafu(display("Failed to init etcd client config, err:{source}.\nBacktrace:\n{backtrace}"))] - InitEtcdClientConfig { - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Etcd client failure, msg:{msg}, err:{source}.\nBacktrace:\n{backtrace}"))] - EtcdClientFailureWithCause { - msg: String, - source: etcd_client::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Fail to open shard, shard_id:{shard_id}, msg:{msg}.\nBacktrace:\n{backtrace}", - ))] - OpenShard { - shard_id: ShardId, - msg: String, - backtrace: Backtrace, - }, - - #[snafu(display("Fail to open shard, msg:{msg}, source:{source}."))] - OpenShardWithCause { msg: String, source: GenericError }, - - #[snafu(display("Fail to open shard, msg:{msg}.\nBacktrace:\n{backtrace}"))] - OpenShardNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Fail to close shard, msg:{msg}, source:{source}."))] - CloseShardWithCause { msg: String, source: GenericError }, - - #[snafu(display("Fail to close shard, msg:{msg}.\nBacktrace:\n{backtrace}"))] - CloseShardNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Fail to create table on shard, msg:{msg}, source:{source}."))] - CreateTableWithCause { msg: String, source: GenericError }, - - #[snafu(display("Fail to create table on shard, msg:{msg}.\nBacktrace:\n{backtrace}"))] - CreateTableNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Fail to drop table on shard, msg:{msg}, source:{source}."))] - DropTableWithCause { msg: String, source: GenericError }, - - #[snafu(display("Fail to drop table on shard, msg:{msg}.\nBacktrace:\n{backtrace}"))] - DropTableNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Fail to open table on shard, msg:{msg}, source:{source}."))] - OpenTableWithCause { msg: String, source: GenericError }, - - #[snafu(display("Fail to open table on shard, msg:{msg}.\nBacktrace:\n{backtrace}"))] - OpenTableNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Fail to close table on shard, msg:{msg}, source:{source}."))] - CloseTableWithCause { msg: String, source: GenericError }, - - #[snafu(display("Fail to close table on shard, msg:{msg}.\nBacktrace:\n{backtrace}"))] - CloseTableNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Shard not found, msg:{msg}.\nBacktrace:\n{backtrace}"))] - ShardNotFound { msg: String, backtrace: Backtrace }, - - #[snafu(display("Table not found, msg:{msg}.\nBacktrace:\n{backtrace}"))] - TableNotFound { msg: String, backtrace: Backtrace }, - - #[snafu(display("Table already exists, msg:{msg}.\nBacktrace:\n{backtrace}"))] - TableAlreadyExists { msg: String, backtrace: Backtrace }, - - #[snafu(display( - "Schema not found in current node, schema name:{schema_name}.\nBacktrace:\n{backtrace}", - ))] - SchemaNotFound { - schema_name: SchemaName, - backtrace: Backtrace, - }, - - #[snafu(display( - "Shard version mismatch, shard_info:{shard_info:?}, expect version:{expect_version}.\nBacktrace:\n{backtrace}", - ))] - ShardVersionMismatch { - shard_info: ShardInfo, - expect_version: ShardVersion, - backtrace: Backtrace, - }, - - #[snafu(display("Update on a frozen shard, shard_id:{shard_id}.\nBacktrace:\n{backtrace}",))] - UpdateFrozenShard { - shard_id: ShardId, - backtrace: Backtrace, - }, - - #[snafu(display( - "Cluster nodes are not found in the topology, version:{version}.\nBacktrace:\n{backtrace}", - ))] - ClusterNodesNotFound { version: u64, backtrace: Backtrace }, -} - -define_result!(Error); - -#[derive(Debug)] -pub enum TableStatus { - Ready, - Recovering, - Frozen, -} - -impl From for TableStatus { - fn from(value: ShardStatus) -> Self { - match value { - ShardStatus::Init | ShardStatus::Opening => TableStatus::Recovering, - ShardStatus::Ready => TableStatus::Ready, - ShardStatus::Frozen => TableStatus::Frozen, - } - } -} - -pub type ClusterRef = Arc; - -#[derive(Clone, Debug)] -pub struct ClusterNodesResp { - pub cluster_topology_version: u64, - pub cluster_nodes: ClusterNodesRef, -} - -#[async_trait] -pub trait Cluster { - async fn start(&self) -> Result<()>; - async fn stop(&self) -> Result<()>; - - /// Get cluster type. - fn node_type(&self) -> NodeType; - - /// Fetch related information and open shard. - async fn open_shard(&self, shard_info: &ShardInfo) -> Result; - - /// Get shard. - /// - /// If target shard has opened in cluster, return it. Otherwise, return - /// None. - fn shard(&self, shard_id: ShardId) -> Option; - - fn get_table_status(&self, schema_name: &str, table_name: &str) -> Option; - - /// Close shard. - /// - /// Return error if the shard is not found. - async fn close_shard(&self, shard_id: ShardId) -> Result; - - /// list loaded shards in current node. - fn list_shards(&self) -> Vec; - - async fn route_tables(&self, req: &RouteTablesRequest) -> Result; - async fn fetch_nodes(&self) -> Result; - fn shard_lock_manager(&self) -> ShardLockManagerRef; -} diff --git a/src/cluster/src/shard_lock_manager.rs b/src/cluster/src/shard_lock_manager.rs deleted file mode 100644 index aaaa65ae8a..0000000000 --- a/src/cluster/src/shard_lock_manager.rs +++ /dev/null @@ -1,908 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::HashMap, - future::Future, - sync::{Arc, Mutex, RwLock}, - time::{Duration, Instant}, -}; - -use bytes_ext::Bytes; -use common_types::table::ShardId; -use etcd_client::{ - Client, Compare, CompareOp, LeaseKeepAliveStream, LeaseKeeper, PutOptions, Txn, TxnOp, -}; -use horaedbproto::meta_event::ShardLockValue; -use logger::{debug, error, info, warn}; -use macros::define_result; -use prost::Message; -use runtime::{JoinHandle, RuntimeRef}; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; -use tokio::sync::{oneshot, RwLock as AsyncRwLock}; - -#[derive(Debug, Snafu)] -#[snafu(visibility = "pub")] -pub enum Error { - #[snafu(display( - "Failed to keep alive, lease_id:{lease_id}, err:{source}.\nBacktrace:\n{backtrace:?}" - ))] - KeepAlive { - lease_id: i64, - source: etcd_client::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to keep alive because of no resp, lease_id:{lease_id}.\nBacktrace:\n{backtrace:?}" - ))] - KeepAliveWithoutResp { lease_id: i64, backtrace: Backtrace }, - - #[snafu(display( - "Failed to keep alive because of expired ttl, ttl_in_resp:{ttl_in_resp}, lease_id:{lease_id}.\nBacktrace:\n{backtrace:?}" - ))] - KeepAliveWithExpiredTTL { - lease_id: i64, - ttl_in_resp: i64, - backtrace: Backtrace, - }, - - #[snafu(display( - "Try to start multiple keepalive procedure in background, lease_id:{lease_id}, err:{source}.\nBacktrace:\n{backtrace:?}" - ))] - ConcurrentKeepalive { - source: tokio::sync::TryLockError, - lease_id: i64, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to get lease, shard_id:{shard_id}, lease_id:{lease_id}, err:{source}.\nBacktrace:\n{backtrace:?}" - ))] - GetLease { - shard_id: ShardId, - lease_id: i64, - source: etcd_client::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to get lock key, shard_id:{shard_id}, err:{source}.\nBacktrace:\n{backtrace:?}" - ))] - GetLockKey { - shard_id: ShardId, - source: etcd_client::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to grant lease, shard_id:{shard_id}, err:{source}.\nBacktrace:\n{backtrace:?}" - ))] - GrantLease { - shard_id: ShardId, - source: etcd_client::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to grant lease, shard_id:{shard_id}, ttl_sec:{ttl_sec}.\nBacktrace:\n{backtrace:?}" - ))] - GrantLeaseWithInvalidTTL { - shard_id: ShardId, - ttl_sec: i64, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to create lock in etcd, shard_id:{shard_id}, err:{source}.\nBacktrace:\n{backtrace:?}"))] - CreateLockInEtcd { - shard_id: ShardId, - source: etcd_client::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to execute txn of create lock, shard_id:{shard_id}.\nBacktrace:\n{backtrace:?}" - ))] - CreateLockTxn { - shard_id: ShardId, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to revoke the lease, lease_id:{lease_id}, shard_id:{shard_id}, err:{source}.\nBacktrace:\n{backtrace:?}"))] - RevokeLease { - lease_id: i64, - shard_id: ShardId, - source: etcd_client::Error, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -pub type ShardLockManagerRef = Arc; - -/// Shard lock manager is implemented based on etcd. -/// -/// Only with the lock held, the shard can be operated by this node. -pub struct ShardLockManager { - lock_key_prefix: String, - lock_value: Bytes, - lock_lease_ttl_sec: u64, - lock_lease_check_interval: Duration, - enable_fast_reacquire_lock: bool, - rpc_timeout: Duration, - - etcd_client: Client, - runtime: RuntimeRef, - - // ShardID -> ShardLock - shard_locks: Arc>>, -} - -#[derive(Debug)] -struct LeaseState { - expired_at: Instant, -} - -impl LeaseState { - fn new(expired_at: Instant) -> Self { - Self { expired_at } - } - - /// Get the duration until the lease is expired. - /// - /// The actual duration will be still returned even if the lease is not - /// alive. And None will be returned if the lease is expired. - fn duration_until_expired(&self) -> Option { - let expired_at = self.expired_at; - let now = Instant::now(); - expired_at.checked_duration_since(now) - } - - /// Check whether lease is expired. - fn is_expired(&self) -> bool { - self.expired_at < Instant::now() - } -} - -impl LeaseState { - fn update_expired_at(&mut self, expired_at: Instant) { - self.expired_at = expired_at; - } -} - -#[derive(Debug)] -enum KeepAliveStopReason { - Failure { - err: Error, - stop_rx: oneshot::Receiver<()>, - }, - Exit, -} - -#[derive(Debug)] -enum KeepAliveResult { - Ok, - Failure { - err: Error, - stop_rx: oneshot::Receiver<()>, - }, -} - -/// The lease of the shard lock. -struct Lease { - /// The lease id - id: i64, - /// The time to live of the lease - ttl: Duration, - - state: Arc>, - /// The handle for the keep alive task in background. - /// - /// TODO: shall we wait for it to exit? - keep_alive_handle: Mutex>>, -} - -impl Lease { - fn new(id: i64, ttl: Duration, initial_state: LeaseState) -> Self { - Self { - id, - ttl, - state: Arc::new(RwLock::new(initial_state)), - keep_alive_handle: Mutex::new(None), - } - } - - fn is_expired(&self) -> bool { - let state = self.state.read().unwrap(); - state.is_expired() - } - - fn duration_until_expired(&self) -> Option { - let state = self.state.read().unwrap(); - state.duration_until_expired() - } - - /// Keep alive the lease once, the state will be updated whatever the - /// keepalive result is. - async fn keep_alive_once( - keeper: &mut LeaseKeeper, - stream: &mut LeaseKeepAliveStream, - state: &Arc>, - ) -> Result<()> { - keeper.keep_alive().await.context(KeepAlive { - lease_id: keeper.id(), - })?; - match stream.message().await.context(KeepAlive { - lease_id: keeper.id(), - })? { - Some(resp) => { - // The ttl in the response is in seconds, let's convert it into milliseconds. - let ttl_sec = resp.ttl(); - if ttl_sec <= 0 { - error!("Failed to keep lease alive because negative ttl is received, id:{}, ttl:{ttl_sec}s", keeper.id()); - return KeepAliveWithExpiredTTL { - lease_id: keeper.id(), - ttl_in_resp: ttl_sec, - } - .fail(); - } - - let expired_at = Instant::now() + Duration::from_secs(ttl_sec as u64); - state.write().unwrap().update_expired_at(expired_at); - - debug!( - "Succeed to keep lease alive, id:{}, ttl:{ttl_sec}s, expired_at:{expired_at:?}", - keeper.id(), - ); - Ok(()) - } - None => { - error!( - "failed to keep lease alive because of no resp, id:{}", - keeper.id() - ); - KeepAliveWithoutResp { - lease_id: keeper.id(), - } - .fail() - } - } - } - - async fn start_keepalive( - &self, - mut stop_rx: oneshot::Receiver<()>, - notifier: oneshot::Sender, - etcd_client: &mut Client, - runtime: &RuntimeRef, - ) -> KeepAliveResult { - let (mut keeper, mut stream) = match etcd_client - .lease_keep_alive(self.id) - .await - .context(KeepAlive { lease_id: self.id }) - { - Ok((keeper, stream)) => (keeper, stream), - Err(err) => { - return KeepAliveResult::Failure { err, stop_rx }; - } - }; - - // Update the lease state immediately. - if let Err(err) = Self::keep_alive_once(&mut keeper, &mut stream, &self.state).await { - return KeepAliveResult::Failure { err, stop_rx }; - } - - // Send keepalive request every ttl/3. - // FIXME: shall we ensure the interval won't be too small? - let keep_alive_interval = self.ttl / 3; - let lease_id = self.id; - let state = self.state.clone(); - let handle = runtime.spawn(async move { - loop { - if let Err(err) = Self::keep_alive_once(&mut keeper, &mut stream, &state).await { - error!("Failed to keep lease alive, id:{lease_id}, err:{err}"); - let reason = KeepAliveStopReason::Failure { - err, - stop_rx, - }; - if notifier.send(reason).is_err() { - error!("Failed to send keepalive failure, lease_id:{lease_id}"); - } - - return - } - let sleeper = tokio::time::sleep(keep_alive_interval); - tokio::select! { - _ = sleeper => { - debug!("Try to keep the lease alive again, id:{lease_id}"); - }, - _ = &mut stop_rx => { - info!("Stop keeping lease alive, id:{lease_id}"); - if notifier.send(KeepAliveStopReason::Exit).is_err() { - error!("Failed to send keepalive stopping message, lease_id:{lease_id}"); - } - return - } - } - } - }); - - *self.keep_alive_handle.lock().unwrap() = Some(handle); - - KeepAliveResult::Ok - } -} - -/// Lock for a shard. -/// -/// The lock is a temporary key in etcd, which is created with a lease. And the -/// lease is kept alive in background, so the lock will be expired if the lease -/// is expired. -/// -/// Note: the lock will be invalid if the keepalive fails so check the validity -/// before granting the lock. -pub struct ShardLock { - shard_id: ShardId, - /// The temporary key in etcd - key: Bytes, - /// The value of the key in etcd - value: Bytes, - /// The lease of the lock in etcd - ttl_sec: u64, - /// The interval to check whether the lease is expired - lease_check_interval: Duration, - /// The previous lease will be re-used if set - enable_fast_reacquire: bool, - /// The timeout for etcd rpc - rpc_timeout: Duration, - - lease: Option>, - lease_check_handle: Option>, - lease_keepalive_stopper: Option>, -} - -/// The information about an etcd lease. -#[derive(Debug, Clone)] -struct LeaseInfo { - id: i64, - expired_at: Instant, -} - -impl ShardLock { - fn new( - shard_id: ShardId, - key_prefix: &str, - value: Bytes, - ttl_sec: u64, - lease_check_interval: Duration, - enable_fast_reacquire: bool, - rpc_timeout: Duration, - ) -> Self { - Self { - shard_id, - key: Self::lock_key(key_prefix, shard_id), - value, - ttl_sec, - lease_check_interval, - enable_fast_reacquire, - rpc_timeout, - - lease: None, - lease_check_handle: None, - lease_keepalive_stopper: None, - } - } - - fn lock_key(key_prefix: &str, shard_id: ShardId) -> Bytes { - // The shard id in the key is padded with at most 20 zeros to make it sortable. - let key = format!("{key_prefix}/{shard_id:0>20}"); - Bytes::from(key) - } - - /// The shard lock is allowed to acquired in a fast way if the lock has been - /// created by this node just before. - /// - /// The slow way has to wait for the previous lock lease expired. And the - /// fast way is to reuse the lease if it is not expired and the lock's value - /// is the same. - async fn maybe_fast_acquire_lock(&self, etcd_client: &mut Client) -> Result> { - if !self.enable_fast_reacquire { - return Ok(None); - } - - let resp = etcd_client - .get(self.key.clone(), None) - .await - .context(GetLockKey { - shard_id: self.shard_id, - })?; - - // Only one or zero key-value will be fetched, and it can continue only if one - // key-value is returned. - if resp.kvs().len() != 1 { - warn!( - "Expect exactly one key value pair, but found {} kv pairs, shard_id:{}", - resp.kvs().len(), - self.shard_id - ); - return Ok(None); - } - let kv = &resp.kvs()[0]; - let lease_id = kv.lease(); - if lease_id == 0 { - // There is no lease attached to the lock key. - return Ok(None); - } - - // FIXME: A better way is to compare the specific field of the decoded values. - if kv.value() != self.value { - warn!( - "Try to acquire a lock held by others, shard_id:{}", - self.shard_id - ); - return Ok(None); - } - - let ttl_sec = etcd_client - .lease_time_to_live(lease_id, None) - .await - .context(GetLease { - shard_id: self.shard_id, - lease_id, - })? - .ttl(); - - if ttl_sec == 0 { - // The lease has expired. - return Ok(None); - } - - let lease_expired_at = Instant::now() + Duration::from_secs(ttl_sec as u64); - Ok(Some(LeaseInfo { - id: lease_id, - expired_at: lease_expired_at, - })) - } - - async fn slow_acquire_lock(&self, etcd_client: &mut Client) -> Result { - // Grant the lease first. - let resp = etcd_client - .lease_grant(self.ttl_sec as i64, None) - .await - .context(GrantLease { - shard_id: self.shard_id, - })?; - ensure!( - resp.ttl() > 0, - GrantLeaseWithInvalidTTL { - shard_id: self.shard_id, - ttl_sec: resp.ttl() - } - ); - - let lease_expired_at = Instant::now() + Duration::from_secs(resp.ttl() as u64); - let lease_id = resp.id(); - self.create_lock_with_lease(lease_id, etcd_client).await?; - - Ok(LeaseInfo { - id: lease_id, - expired_at: lease_expired_at, - }) - } - - /// Grant the shard lock. - /// - /// The `on_lock_expired` callback will be called when the lock is expired, - /// but it won't be triggered if the lock is revoked in purpose. - async fn grant( - &mut self, - on_lock_expired: OnExpired, - etcd_client: &mut Client, - runtime: &RuntimeRef, - ) -> Result - where - OnExpired: FnOnce(ShardId) -> Fut + Send + 'static, - Fut: Future + Send + 'static, - { - if self.is_valid() { - // FIXME: maybe the lock will be expired soon. - warn!("The lock is already granted, shard_id:{}", self.shard_id); - return Ok(false); - } - - let lease_info = match self.maybe_fast_acquire_lock(etcd_client).await { - Ok(Some(v)) => { - info!("Shard lock is acquired fast, shard_id:{}", self.shard_id); - v - } - Ok(None) => { - warn!( - "No lock to reuse, try to slow acquire lock, shard_id:{}", - self.shard_id - ); - self.slow_acquire_lock(etcd_client).await? - } - Err(e) => { - warn!( - "Failed to fast acquire lock, try to slow acquire lock, shard_id:{}, err:{e}", - self.shard_id - ); - self.slow_acquire_lock(etcd_client).await? - } - }; - - self.keep_lease_alive( - lease_info.id, - lease_info.expired_at, - on_lock_expired, - etcd_client, - runtime, - ) - .await?; - - Ok(true) - } - - /// Revoke the shard lock. - /// - /// NOTE: the `on_lock_expired` callback set in the `grant` won't be - /// triggered. - async fn revoke(&mut self, etcd_client: &mut Client) -> Result<()> { - self.stop_keepalive().await; - - // Revoke the lease. - if let Some(lease) = self.lease.take() { - etcd_client - .lease_revoke(lease.id) - .await - .context(RevokeLease { - lease_id: lease.id, - shard_id: self.shard_id, - })?; - } - - Ok(()) - } - - /// Check whether the shard lock is valid. - /// - /// The shard lock is valid if the associated lease is not expired. - fn is_valid(&self) -> bool { - self.lease - .as_ref() - .map(|v| !v.is_expired()) - .unwrap_or(false) - } - - async fn stop_keepalive(&mut self) { - info!( - "Wait for background keepalive exit, shard_id:{}", - self.shard_id - ); - - // Stop keeping alive the lease. - if let Some(sender) = self.lease_keepalive_stopper.take() { - if sender.send(()).is_err() { - warn!("Failed to stop keeping lease alive, maybe it has been stopped already so ignore it, hard_id:{}", self.shard_id); - } - } - - // Wait for the lease check worker to stop. - if let Some(handle) = self.lease_check_handle.take() { - handle.abort(); - } - - info!( - "Finish exiting from background keepalive task, shard_id:{}", - self.shard_id - ); - } - - async fn create_lock_with_lease(&self, lease_id: i64, etcd_client: &mut Client) -> Result<()> { - // In etcd, the version is 0 if the key does not exist. - let not_exist = Compare::version(self.key.clone(), CompareOp::Equal, 0); - let create_key = { - let options = PutOptions::new().with_lease(lease_id); - TxnOp::put(self.key.clone(), self.value.clone(), Some(options)) - }; - - let create_if_not_exist = Txn::new().when([not_exist]).and_then([create_key]); - - let resp = etcd_client - .txn(create_if_not_exist) - .await - .context(CreateLockInEtcd { - shard_id: self.shard_id, - })?; - - ensure!( - resp.succeeded(), - CreateLockTxn { - shard_id: self.shard_id - } - ); - - Ok(()) - } - - /// Keep alive the lease. - /// - /// The `on_lock_expired` callback will be called when the lock is expired, - /// but it won't be triggered if the lock is revoked in purpose. - /// The keepalive procedure supports retrying if the failure is caused by - /// rpc error, and it will stop if the lease is expired. - async fn keep_lease_alive( - &mut self, - lease_id: i64, - expired_at: Instant, - on_lock_expired: OnExpired, - etcd_client: &Client, - runtime: &RuntimeRef, - ) -> Result<()> - where - OnExpired: FnOnce(ShardId) -> Fut + Send + 'static, - Fut: Future + Send + 'static, - { - // Try to acquire the lock to ensure there is only one running keepalive - // procedure. - let initial_state = LeaseState::new(expired_at); - let lease = Arc::new(Lease::new( - lease_id, - Duration::from_secs(self.ttl_sec), - initial_state, - )); - - let shard_id = self.shard_id; - let rpc_timeout = self.rpc_timeout; - let lock_lease_check_interval = self.lease_check_interval; - let (keepalive_stop_sender, keepalive_stop_rx) = oneshot::channel(); - let mut etcd_client = etcd_client.clone(); - let runtime_for_bg = runtime.clone(); - let lease_for_bg = lease.clone(); - let handle = runtime.spawn(async move { - // This receiver is used to stop the underlying keepalive procedure. - let mut keepalive_stop_rx = keepalive_stop_rx; - // The loop is for retrying the keepalive procedure if the failure results from rpc error. - loop { - if let Some(dur_until_expired) = lease_for_bg.duration_until_expired() { - // Don't start next keep alive immediately, wait for a while to avoid too many requests. - // The wait time is calculated based on the lease ttl and the rpc timeout. And if the time before - // the lease expired is not enough (less than 1.5*rpc timeout), the keepalive will not be scheduled - // and the lock is considered expired. - let keepalive_cost = rpc_timeout + rpc_timeout / 2; - if let Some(wait) = dur_until_expired.checked_sub(keepalive_cost) { - info!( - "Next keepalive will be schedule after {wait:?}, shard_id:{shard_id}, lease_id:{lease_id}, \ - keepalive_cost:{keepalive_cost:?}, duration_until_expired:{dur_until_expired:?}" - ); - tokio::select! { - _ = tokio::time::sleep(wait) => { - info!("Start to keepalive, shard_id:{shard_id}, lease_id:{lease_id}"); - } - _ = &mut keepalive_stop_rx => { - info!("Recv signal to stop keepalive, shard_id:{shard_id}, lease_id:{lease_id}"); - return; - } - } - - } else { - warn!( - "The lease is about to expire, and trigger the on_lock_expire callback, shard_id:{shard_id}, \ - lease_id:{lease_id}" - ); - - on_lock_expired(shard_id).await; - return; - } - } - - let (lease_expire_notifier, mut lease_expire_rx) = oneshot::channel(); - if let KeepAliveResult::Failure { err, stop_rx } = lease_for_bg - .start_keepalive( - keepalive_stop_rx, - lease_expire_notifier, - &mut etcd_client, - &runtime_for_bg, - ) - .await { - error!("Failed to start keepalive and will retry , shard_id:{shard_id}, err:{err}"); - keepalive_stop_rx = stop_rx; - continue; - } - - // Start to keep the lease alive forever. - // If the lease is expired, the whole task will be stopped. However, if there are some failure in the - // keepalive procedure, we will retry. - 'outer: loop { - let timer = tokio::time::sleep(lock_lease_check_interval); - tokio::select! { - _ = timer => { - if lease_for_bg.is_expired() { - warn!("The lease of the shard lock is expired, shard_id:{shard_id}"); - on_lock_expired(shard_id).await; - return - } - } - res = &mut lease_expire_rx => { - match res { - Ok(reason) => match reason { - KeepAliveStopReason::Exit => { - info!("The lease is revoked in purpose, and no need to do anything, shard_id:{shard_id}"); - return; - } - KeepAliveStopReason::Failure { err, stop_rx } => { - error!("Fail to keep lease alive, and will retry, shard_id:{shard_id}, err:{err:?}"); - keepalive_stop_rx = stop_rx; - // Break the outer loop to avoid the macro may introduce a new loop. - break 'outer; - } - } - Err(_) => { - // Unreachable! Because the notifier will always send a value before it is closed. - error!("The notifier for lease keeping alive is closed, will trigger callback, shard_id:{shard_id}"); - on_lock_expired(shard_id).await; - return; - } - } - } - } - } - } - }); - - self.lease_check_handle = Some(handle); - self.lease_keepalive_stopper = Some(keepalive_stop_sender); - self.lease = Some(lease); - Ok(()) - } -} - -#[derive(Clone, Debug)] -pub struct Config { - pub node_name: String, - pub lock_key_prefix: String, - pub lock_lease_ttl_sec: u64, - pub lock_lease_check_interval: Duration, - pub enable_fast_reacquire_lock: bool, - pub rpc_timeout: Duration, - pub runtime: RuntimeRef, -} - -impl ShardLockManager { - pub fn new(config: Config, etcd_client: Client) -> ShardLockManager { - let Config { - node_name, - lock_key_prefix, - lock_lease_ttl_sec, - lock_lease_check_interval, - enable_fast_reacquire_lock, - rpc_timeout, - runtime, - } = config; - - let value = Bytes::from(ShardLockValue { node_name }.encode_to_vec()); - - ShardLockManager { - lock_key_prefix, - lock_value: value, - lock_lease_ttl_sec, - lock_lease_check_interval, - rpc_timeout, - enable_fast_reacquire_lock, - etcd_client, - runtime, - shard_locks: Arc::new(AsyncRwLock::new(HashMap::new())), - } - } - - /// Grant lock to the shard. - /// - /// If the lock is already granted, return false. The `on_lock_expired` will - /// be called when the lock lease is expired, but it won't be triggered if - /// the lock is revoked. - pub async fn grant_lock( - &self, - shard_id: u32, - on_lock_expired: OnExpired, - ) -> Result - where - OnExpired: FnOnce(ShardId) -> Fut + Send + 'static, - Fut: Future + Send + 'static, - { - info!("Try to grant lock for shard, shard_id:{shard_id}"); - - let mut shard_locks = self.shard_locks.write().await; - if let Some(shard_lock) = shard_locks.get_mut(&shard_id) { - let mut etcd_client = self.etcd_client.clone(); - warn!("The shard lock was created before, and grant it again now, shard_id:{shard_id}"); - shard_lock - .grant(on_lock_expired, &mut etcd_client, &self.runtime) - .await?; - } else { - info!("Try to grant a new shard lock, shard_id:{shard_id}"); - - let mut shard_lock = ShardLock::new( - shard_id, - &self.lock_key_prefix, - self.lock_value.clone(), - self.lock_lease_ttl_sec, - self.lock_lease_check_interval, - self.enable_fast_reacquire_lock, - self.rpc_timeout, - ); - - let mut etcd_client = self.etcd_client.clone(); - shard_lock - .grant(on_lock_expired, &mut etcd_client, &self.runtime) - .await?; - - shard_locks.insert(shard_id, shard_lock); - } - - info!("Finish granting lock for shard, shard_id:{shard_id}"); - Ok(true) - } - - /// Revoke the shard lock. - /// - /// If the lock is not exist, return false. And the `on_lock_expired` won't - /// be triggered. - pub async fn revoke_lock(&self, shard_id: u32) -> Result { - info!("Try to revoke lock for shard, shard_id:{shard_id}"); - - let mut shard_locks = self.shard_locks.write().await; - let shard_lock = shard_locks.remove(&shard_id); - let res = match shard_lock { - Some(mut v) => { - let mut etcd_client = self.etcd_client.clone(); - v.revoke(&mut etcd_client).await?; - - info!("Finish revoking lock for shard, shard_id:{shard_id}"); - Ok(true) - } - None => { - warn!("The lock is not exist, shard_id:{shard_id}"); - Ok(false) - } - }; - - info!("Finish revoke lock for shard, shard_id:{shard_id}"); - res - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_format_shard_lock_key() { - let key_prefix = "/horaedb/defaultCluster"; - let cases = vec![ - (0, "/horaedb/defaultCluster/00000000000000000000"), - (10, "/horaedb/defaultCluster/00000000000000000010"), - (10000, "/horaedb/defaultCluster/00000000000000010000"), - (999999999, "/horaedb/defaultCluster/00000000000999999999"), - ]; - - for (shard_id, expected) in cases { - let key = ShardLock::lock_key(key_prefix, shard_id); - assert_eq!(key, expected); - } - } -} diff --git a/src/cluster/src/shard_operation.rs b/src/cluster/src/shard_operation.rs deleted file mode 100644 index a9b2e0bcdb..0000000000 --- a/src/cluster/src/shard_operation.rs +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Implementation of operations on shards. - -// TODO: Currently, only a specific operation (close wal region) is implemented, -// and it is expected to encapsulate more operations on **Shard** in the future. - -use std::sync::Arc; - -use async_trait::async_trait; -use common_types::table::ShardId; -use generic_error::{BoxError, GenericResult}; -use wal::manager::WalManagerRef; - -#[async_trait] -pub trait WalRegionCloser: std::fmt::Debug + Send + Sync { - async fn close_region(&self, shard_id: ShardId) -> GenericResult<()>; -} - -pub type WalRegionCloserRef = Arc; - -#[derive(Debug)] -pub struct WalCloserAdapter { - pub data_wal: WalManagerRef, - pub manifest_wal: WalManagerRef, -} - -#[async_trait] -impl WalRegionCloser for WalCloserAdapter { - async fn close_region(&self, shard_id: ShardId) -> GenericResult<()> { - let region_id = shard_id as u64; - - self.data_wal.close_region(region_id).await.box_err()?; - self.manifest_wal.close_region(region_id).await.box_err()?; - - Ok(()) - } -} diff --git a/src/cluster/src/shard_operator.rs b/src/cluster/src/shard_operator.rs deleted file mode 100644 index 7718f50cd7..0000000000 --- a/src/cluster/src/shard_operator.rs +++ /dev/null @@ -1,453 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; - -use catalog::{ - schema::{ - CloseOptions, CloseTableRequest, CreateOptions, CreateTableRequest, DropOptions, - DropTableRequest, OpenOptions, OpenTableRequest, TableDef, - }, - table_operator::TableOperator, -}; -use common_types::table::ShardVersion; -use generic_error::BoxError; -use logger::info; -use snafu::ResultExt; -use table_engine::{ - engine::{CreateTableParams, TableEngineRef, TableState}, - table::TableId, -}; - -use crate::{ - shard_operation::WalRegionCloserRef, - shard_set::{ShardDataRef, UpdatedTableInfo}, - CloseShardWithCause, CloseTableWithCause, CreateTableWithCause, DropTableWithCause, - OpenShardWithCause, OpenTableWithCause, Result, -}; - -pub struct OpenContext { - pub catalog: String, - pub table_engine: TableEngineRef, - pub table_operator: TableOperator, - pub engine: String, -} - -impl std::fmt::Debug for OpenContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("OpenContext") - .field("catalog", &self.catalog) - .field("engine", &self.engine) - .finish() - } -} - -pub struct CloseContext { - pub catalog: String, - pub table_engine: TableEngineRef, - pub table_operator: TableOperator, - pub wal_region_closer: WalRegionCloserRef, - pub engine: String, -} - -impl std::fmt::Debug for CloseContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("OpenContext") - .field("catalog", &self.catalog) - .field("engine", &self.engine) - .finish() - } -} - -pub struct CreateTableContext { - pub catalog: String, - pub table_engine: TableEngineRef, - pub table_operator: TableOperator, - pub partition_table_engine: TableEngineRef, - pub updated_table_info: UpdatedTableInfo, - pub table_schema: common_types::schema::Schema, - pub options: HashMap, - pub create_if_not_exist: bool, - pub engine: String, -} - -impl std::fmt::Debug for CreateTableContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CreateTableContext") - .field("catalog", &self.catalog) - .field("updated_table_info", &self.updated_table_info) - .field("table_schema", &self.table_schema) - .field("options", &self.options) - .field("engine", &self.engine) - .field("create_if_not_exist", &self.create_if_not_exist) - .finish() - } -} - -pub struct AlterContext { - pub catalog: String, - pub table_engine: TableEngineRef, - pub table_operator: TableOperator, - pub updated_table_info: UpdatedTableInfo, - pub engine: String, -} - -impl std::fmt::Debug for AlterContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("AlterContext") - .field("catalog", &self.catalog) - .field("updated_table_info", &self.updated_table_info) - .field("engine", &self.engine) - .finish() - } -} - -pub type DropTableContext = AlterContext; -pub type OpenTableContext = AlterContext; -pub type CloseTableContext = AlterContext; - -pub struct ShardOperator { - pub data: ShardDataRef, -} - -impl ShardOperator { - pub async fn open(&self, ctx: OpenContext) -> Result<()> { - let (shard_info, tables) = { - let data = self.data.read().unwrap(); - let shard_info = data.shard_info.clone(); - let tables = data.tables.clone(); - - (shard_info, tables) - }; - - info!("ShardOperator open sequentially begin, shard_id:{shard_info:?}"); - - let table_defs = tables - .into_iter() - .map(|info| TableDef { - catalog_name: ctx.catalog.clone(), - schema_name: info.schema_name, - id: TableId::from(info.id), - name: info.name, - }) - .collect(); - - let open_shard_request = catalog::schema::OpenShardRequest { - shard_id: shard_info.id, - table_defs, - engine: ctx.engine, - }; - let opts = OpenOptions { - table_engine: ctx.table_engine.clone(), - }; - - ctx.table_operator - .open_shard(open_shard_request, opts) - .await - .box_err() - .with_context(|| OpenShardWithCause { - msg: format!("shard_info:{shard_info:?}"), - })?; - - info!("ShardOperator open sequentially finish, shard_id:{shard_info:?}"); - - Ok(()) - } - - pub async fn close(&self, ctx: CloseContext) -> Result<()> { - let (shard_info, tables) = { - let data = self.data.read().unwrap(); - let shard_info = data.shard_info.clone(); - let tables = data.tables.clone(); - - (shard_info, tables) - }; - info!("ShardOperator close sequentially begin, shard_info:{shard_info:?}"); - - { - let mut data = self.data.write().unwrap(); - data.freeze(); - info!("Shard is frozen before closed, shard_id:{}", shard_info.id); - } - - let table_defs = tables - .into_iter() - .map(|info| TableDef { - catalog_name: ctx.catalog.clone(), - schema_name: info.schema_name, - id: TableId::from(info.id), - name: info.name, - }) - .collect(); - let close_shard_request = catalog::schema::CloseShardRequest { - shard_id: shard_info.id, - table_defs, - engine: ctx.engine, - }; - let opts = CloseOptions { - table_engine: ctx.table_engine, - }; - - ctx.table_operator - .close_shard(close_shard_request, opts) - .await - .box_err() - .with_context(|| CloseShardWithCause { - msg: format!("shard_info:{shard_info:?}"), - })?; - - // Try to close wal region - ctx.wal_region_closer - .close_region(shard_info.id) - .await - .with_context(|| CloseShardWithCause { - msg: format!("shard_info:{shard_info:?}"), - })?; - - info!("ShardOperator close sequentially finish, shard_info:{shard_info:?}"); - - Ok(()) - } - - pub async fn create_table(&self, ctx: CreateTableContext) -> Result { - let shard_info = &ctx.updated_table_info.shard_info; - let table_info = &ctx.updated_table_info.table_info; - - info!( - "ShardOperator create table sequentially begin, shard_id:{}, table:{}, shard_info:{shard_info:?}, table_info:{table_info:?}", - shard_info.id, - table_info.name, - ); - - // Create the table by operator afterwards. - let (table_engine, partition_info) = match table_info.partition_info.clone() { - Some(v) => (ctx.partition_table_engine.clone(), Some(v)), - None => (ctx.table_engine.clone(), None), - }; - - // Build create table request and options. - let params = CreateTableParams { - catalog_name: ctx.catalog, - schema_name: table_info.schema_name.clone(), - table_name: table_info.name.clone(), - table_schema: ctx.table_schema, - engine: ctx.engine, - table_options: ctx.options, - partition_info, - }; - let create_table_request = CreateTableRequest { - params, - table_id: Some(TableId::new(table_info.id)), - state: TableState::Stable, - shard_id: shard_info.id, - }; - - let create_opts = CreateOptions { - table_engine, - create_if_not_exists: ctx.create_if_not_exist, - }; - - let _ = ctx - .table_operator - .create_table_on_shard(create_table_request.clone(), create_opts) - .await - .box_err() - .with_context(|| CreateTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })?; - - info!( - "ShardOperator table is created by operator, shard_id:{}, table:{}", - shard_info.id, table_info.name, - ); - - let latest_version = { - let mut data = self.data.write().unwrap(); - data.try_create_table(ctx.updated_table_info.clone()) - .box_err() - .with_context(|| CreateTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })? - }; - - info!( - "ShardOperator create table sequentially finish, shard_id:{}, shard_version:{}, table:{}", - shard_info.id, shard_info.version, table_info.name, - ); - - Ok(latest_version) - } - - pub async fn drop_table(&self, ctx: DropTableContext) -> Result { - let shard_info = &ctx.updated_table_info.shard_info; - let table_info = &ctx.updated_table_info.table_info; - - info!( - "ShardOperator drop table sequentially begin, shard_id:{}, table:{}, shard_info:{shard_info:?}, table_info:{table_info:?}", - shard_info.id, - table_info.name, - ); - - // Drop the table by operator afterwards. - let drop_table_request = DropTableRequest { - catalog_name: ctx.catalog, - schema_name: table_info.schema_name.clone(), - table_name: table_info.name.clone(), - engine: ctx.engine, - }; - let drop_opts = DropOptions { - table_engine: ctx.table_engine, - }; - - ctx.table_operator - .drop_table_on_shard(drop_table_request.clone(), drop_opts) - .await - .box_err() - .with_context(|| DropTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })?; - - info!( - "ShardOperator table is dropped, shard_id:{}, table:{}", - shard_info.id, table_info.name, - ); - - // Update the shard info after the table is dropped. - let latest_version = { - let mut data = self.data.write().unwrap(); - data.try_drop_table(ctx.updated_table_info.clone()) - .box_err() - .with_context(|| DropTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })? - }; - - info!( - "ShardOperator drop table sequentially finish, latest_version:{latest_version}, shard_id:{}, old_shard_version:{}, table:{}", - shard_info.id, - shard_info.version, - table_info.name, - ); - - Ok(latest_version) - } - - pub async fn open_table(&self, ctx: OpenTableContext) -> Result<()> { - let shard_info = &ctx.updated_table_info.shard_info; - let table_info = &ctx.updated_table_info.table_info; - - info!( - "ShardOperator open table sequentially begin, shard_id:{}, table:{}, shard_info:{shard_info:?}, table_info:{table_info:?}", - shard_info.id, - table_info.name, - ); - - // Open the table by operator. - let open_table_request = OpenTableRequest { - catalog_name: ctx.catalog, - schema_name: table_info.schema_name.clone(), - table_name: table_info.name.clone(), - // FIXME: the engine type should not use the default one. - engine: ctx.engine, - shard_id: shard_info.id, - table_id: TableId::new(table_info.id), - }; - let open_opts = OpenOptions { - table_engine: ctx.table_engine, - }; - - ctx.table_operator - .open_table_on_shard(open_table_request.clone(), open_opts) - .await - .box_err() - .with_context(|| OpenTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })?; - - info!( - "ShardOperator table is opened by operator, shard_id:{}, table:{}", - shard_info.id, table_info.name - ); - - // Update the shard info after the table is opened. - { - let mut data = self.data.write().unwrap(); - data.try_open_table(ctx.updated_table_info.clone()) - .box_err() - .with_context(|| OpenTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })?; - } - - info!( - "ShardOperator open table sequentially finish, shard_id:{}, table:{}", - shard_info.id, table_info.name - ); - - Ok(()) - } - - pub async fn close_table(&self, ctx: CloseTableContext) -> Result<()> { - let shard_info = &ctx.updated_table_info.shard_info; - let table_info = &ctx.updated_table_info.table_info; - - info!("ShardOperator close table sequentially begin, shard_id:{}, table:{}, shard_info:{shard_info:?}, table_info:{table_info:?}", shard_info.id, table_info.name); - - // Close the table by catalog manager afterwards. - let close_table_request = CloseTableRequest { - catalog_name: ctx.catalog, - schema_name: table_info.schema_name.clone(), - table_name: table_info.name.clone(), - table_id: TableId::new(table_info.id), - // FIXME: the engine type should not use the default one. - engine: ctx.engine, - }; - let close_opts = CloseOptions { - table_engine: ctx.table_engine, - }; - - ctx.table_operator - .close_table_on_shard(close_table_request.clone(), close_opts) - .await - .box_err() - .with_context(|| CloseTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })?; - - info!( - "ShardOperator table is closed by operator, shard_id:{}, table:{}", - shard_info.id, table_info.name - ); - - // Update the shard info after the table is closed. - { - let mut data = self.data.write().unwrap(); - data.try_close_table(ctx.updated_table_info.clone()) - .box_err() - .with_context(|| CloseTableWithCause { - msg: format!("shard_info:{shard_info:?}, table_info:{table_info:?}"), - })?; - } - - info!( - "ShardOperator close table sequentially finish, shard_id:{}, table:{}", - shard_info.id, table_info.name - ); - - Ok(()) - } -} diff --git a/src/cluster/src/shard_set.rs b/src/cluster/src/shard_set.rs deleted file mode 100644 index a8244aa048..0000000000 --- a/src/cluster/src/shard_set.rs +++ /dev/null @@ -1,367 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, sync::Arc}; - -use common_types::table::ShardVersion; -use generic_error::BoxError; -use meta_client::types::{ShardId, ShardInfo, ShardStatus, TableInfo, TablesOfShard}; -use snafu::{ensure, OptionExt, ResultExt}; - -use crate::{ - shard_operator::{ - CloseContext, CloseTableContext, CreateTableContext, DropTableContext, OpenContext, - OpenTableContext, ShardOperator, - }, - OpenShardNoCause, OpenShardWithCause, Result, ShardVersionMismatch, TableAlreadyExists, - TableNotFound, UpdateFrozenShard, -}; - -/// Shard set -/// -/// Manage all shards opened on current node -#[derive(Debug, Default, Clone)] -pub struct ShardSet { - inner: Arc>>, -} - -impl ShardSet { - // Fetch all the shards, including not opened. - pub fn all_shards(&self) -> Vec { - let inner = self.inner.read().unwrap(); - inner.values().cloned().collect() - } - - // Get the shard by its id. - pub fn get(&self, shard_id: ShardId) -> Option { - let inner = self.inner.read().unwrap(); - inner.get(&shard_id).cloned() - } - - /// Remove the shard. - pub fn remove(&self, shard_id: ShardId) -> Option { - let mut inner = self.inner.write().unwrap(); - inner.remove(&shard_id) - } - - /// Insert the tables of one shard. - pub fn insert(&self, shard_id: ShardId, shard: ShardRef) -> Option { - let mut inner = self.inner.write().unwrap(); - inner.insert(shard_id, shard) - } -} - -/// Shard -/// -/// NOTICE: all write operations on a shard will be performed sequentially. -pub struct Shard { - data: ShardDataRef, - operator: tokio::sync::Mutex, -} - -impl std::fmt::Debug for Shard { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Shard").field("data", &self.data).finish() - } -} - -impl Shard { - pub fn new(tables_of_shard: TablesOfShard) -> Self { - let data = Arc::new(std::sync::RwLock::new(ShardData { - shard_info: tables_of_shard.shard_info, - tables: tables_of_shard.tables, - })); - - let operator = tokio::sync::Mutex::new(ShardOperator { data: data.clone() }); - - Self { data, operator } - } - - pub fn shard_info(&self) -> ShardInfo { - let data = self.data.read().unwrap(); - - data.shard_info.clone() - } - - pub fn find_table(&self, schema_name: &str, table_name: &str) -> Option { - let data = self.data.read().unwrap(); - data.find_table(schema_name, table_name) - } - - pub async fn open(&self, ctx: OpenContext) -> Result<()> { - let operator = self - .operator - .try_lock() - .box_err() - .context(OpenShardWithCause { - msg: "Failed to get shard operator lock", - })?; - - { - let mut data = self.data.write().unwrap(); - if !data.need_open() { - return OpenShardNoCause { - msg: "Shard is already in opening", - } - .fail(); - } - - data.begin_open(); - } - - let ret = operator.open(ctx).await; - - if ret.is_ok() { - let mut data = self.data.write().unwrap(); - data.finish_open(); - } - // If open failed, shard status is unchanged(`Opening`), so it can be - // rescheduled to open again. - - ret - } - - pub fn get_status(&self) -> ShardStatus { - let data = self.data.read().unwrap(); - data.shard_info.status.clone() - } - - pub fn is_opened(&self) -> bool { - let data = self.data.read().unwrap(); - data.is_opened() - } - - pub fn is_frozen(&self) -> bool { - let data = self.data.read().unwrap(); - data.is_frozen() - } - - pub async fn close(&self, ctx: CloseContext) -> Result<()> { - let operator = self.operator.lock().await; - operator.close(ctx).await - } - - pub async fn create_table(&self, ctx: CreateTableContext) -> Result { - let operator = self.operator.lock().await; - operator.create_table(ctx).await - } - - pub async fn drop_table(&self, ctx: DropTableContext) -> Result { - let operator = self.operator.lock().await; - operator.drop_table(ctx).await - } - - pub async fn open_table(&self, ctx: OpenTableContext) -> Result<()> { - let operator = self.operator.lock().await; - operator.open_table(ctx).await - } - - pub async fn close_table(&self, ctx: CloseTableContext) -> Result<()> { - let operator = self.operator.lock().await; - operator.close_table(ctx).await - } -} - -pub type ShardRef = Arc; - -#[derive(Debug, Clone)] -pub struct UpdatedTableInfo { - pub shard_info: ShardInfo, - pub table_info: TableInfo, -} - -/// Shard data -#[derive(Debug)] -pub struct ShardData { - /// Shard info - pub shard_info: ShardInfo, - - /// Tables in shard - pub tables: Vec, -} - -impl ShardData { - pub fn find_table(&self, schema_name: &str, table_name: &str) -> Option { - self.tables - .iter() - .find(|table| table.schema_name == schema_name && table.name == table_name) - .cloned() - } - - #[inline] - pub fn freeze(&mut self) { - self.shard_info.status = ShardStatus::Frozen; - } - - #[inline] - pub fn begin_open(&mut self) { - self.shard_info.status = ShardStatus::Opening; - } - - #[inline] - pub fn finish_open(&mut self) { - assert_eq!(self.shard_info.status, ShardStatus::Opening); - - self.shard_info.status = ShardStatus::Ready; - } - - #[inline] - pub fn need_open(&self) -> bool { - !self.is_opened() - } - - #[inline] - pub fn is_opened(&self) -> bool { - self.shard_info.is_opened() - } - - #[inline] - fn is_frozen(&self) -> bool { - matches!(self.shard_info.status, ShardStatus::Frozen) - } - - #[inline] - fn inc_shard_version(&mut self) { - self.shard_info.version += 1; - } - - /// Create the table on the shard, whose version will be incremented. - #[inline] - pub fn try_create_table(&mut self, updated_info: UpdatedTableInfo) -> Result { - self.try_insert_table(updated_info, true) - } - - /// Open the table on the shard, whose version won't change. - #[inline] - pub fn try_open_table(&mut self, updated_info: UpdatedTableInfo) -> Result<()> { - self.try_insert_table(updated_info, false)?; - - Ok(()) - } - - /// Try to insert the table into the shard. - /// - /// The shard version may be incremented and the new version will be - /// returned. - fn try_insert_table( - &mut self, - updated_info: UpdatedTableInfo, - inc_version: bool, - ) -> Result { - let UpdatedTableInfo { - shard_info: curr_shard_info, - table_info: new_table, - } = updated_info; - - ensure!( - !self.is_frozen(), - UpdateFrozenShard { - shard_id: curr_shard_info.id, - } - ); - - ensure!( - self.shard_info.version == curr_shard_info.version, - ShardVersionMismatch { - shard_info: self.shard_info.clone(), - expect_version: curr_shard_info.version, - } - ); - - let table = self.tables.iter().find(|v| v.id == new_table.id); - ensure!( - table.is_none(), - TableAlreadyExists { - msg: "the table to insert has already existed", - } - ); - - // Insert the new table into the shard. - self.tables.push(new_table); - - // Update the shard version if necessary. - if inc_version { - self.inc_shard_version(); - } - - Ok(self.shard_info.version) - } - - /// Drop the table from the shard, whose version will be incremented. - #[inline] - pub fn try_drop_table(&mut self, updated_info: UpdatedTableInfo) -> Result { - self.try_remove_table(updated_info, true) - } - - /// Close the table from the shard, whose version won't change. - #[inline] - pub fn try_close_table(&mut self, updated_info: UpdatedTableInfo) -> Result<()> { - self.try_remove_table(updated_info, false)?; - - Ok(()) - } - - /// Try to remove the table from the shard. - /// - /// The shard version may be incremented and the new version will be - /// returned. - fn try_remove_table( - &mut self, - updated_info: UpdatedTableInfo, - inc_version: bool, - ) -> Result { - let UpdatedTableInfo { - shard_info: curr_shard_info, - table_info: new_table, - } = updated_info; - - ensure!( - !self.is_frozen(), - UpdateFrozenShard { - shard_id: curr_shard_info.id, - } - ); - - ensure!( - self.shard_info.version == curr_shard_info.version, - ShardVersionMismatch { - shard_info: self.shard_info.clone(), - expect_version: curr_shard_info.version, - } - ); - - let table_idx = self - .tables - .iter() - .position(|v| v.id == new_table.id) - .with_context(|| TableNotFound { - msg: format!("the table to remove is not found, table:{new_table:?}"), - })?; - - // Remove the table from the shard. - self.tables.swap_remove(table_idx); - - // Update the shard version if necessary. - if inc_version { - self.inc_shard_version(); - } - - Ok(self.shard_info.version) - } -} - -pub type ShardDataRef = Arc>; diff --git a/src/cluster/src/topology.rs b/src/cluster/src/topology.rs deleted file mode 100644 index 5333d8f6b7..0000000000 --- a/src/cluster/src/topology.rs +++ /dev/null @@ -1,210 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; - -use common_types::{ - schema::{SchemaId, SchemaName}, - table::TableName, -}; -use meta_client::types::{ClusterNodesRef, RouteEntry, RouteTablesResponse}; - -use crate::config::SchemaConfig; - -/// RouteSlot is used to prevent cache penetration, that is to say, the -/// `NotExist` routing result of a table is also kept in the memory. -#[derive(Debug, Clone)] -pub enum RouteSlot { - Exist(RouteEntry), - NotExist, -} - -#[derive(Debug, Default)] -struct SchemaTopology { - id: SchemaId, - config: SchemaConfig, - /// The [RouteSlot] in the `route_slots` only can be `Exist` or `NotExist`. - route_slots: HashMap, -} - -#[derive(Debug, Default)] -pub struct SchemaTopologies { - version: u64, - topologies: HashMap, -} - -#[derive(Clone, Debug, Default)] -pub struct NodeTopology { - pub version: u64, - pub nodes: ClusterNodesRef, -} - -#[derive(Debug, Default)] -pub struct ClusterTopology { - schemas: Option, - nodes: Option, -} - -#[derive(Debug, Default, Clone)] -pub struct RouteTablesResult { - pub version: u64, - pub route_entries: HashMap, - pub missing_tables: Vec, -} - -impl From for RouteTablesResponse { - fn from(result: RouteTablesResult) -> Self { - RouteTablesResponse { - cluster_topology_version: result.version, - entries: result.route_entries, - } - } -} - -impl SchemaTopologies { - fn route_tables(&self, schema_name: &str, tables: &[TableName]) -> RouteTablesResult { - if let Some(schema_topology) = self.topologies.get(schema_name) { - let mut route_entries = HashMap::with_capacity(tables.len()); - let mut missing_tables = vec![]; - - for table in tables { - match schema_topology.route_slots.get(table) { - None => missing_tables.push(table.clone()), - Some(RouteSlot::Exist(route_entry)) => { - route_entries.insert(table.clone(), route_entry.clone()); - } - Some(RouteSlot::NotExist) => (), - }; - } - - return RouteTablesResult { - version: self.version, - route_entries, - missing_tables, - }; - } - - RouteTablesResult { - version: self.version, - route_entries: Default::default(), - missing_tables: tables.to_vec(), - } - } - - /// Update the routing information into the topology if its version is - /// valid. - /// - /// Return false if the version is outdated. - fn maybe_update_tables( - &mut self, - schema_name: &str, - tables: HashMap, - version: u64, - ) -> bool { - if ClusterTopology::is_outdated_version(self.version, version) { - return false; - } - - self.topologies - .entry(schema_name.to_string()) - .or_default() - .update_tables(tables); - - true - } -} - -impl NodeTopology { - fn maybe_update_nodes(&mut self, nodes: ClusterNodesRef, version: u64) -> bool { - if ClusterTopology::is_newer_version(self.version, version) { - self.nodes = nodes; - true - } else { - false - } - } -} - -impl ClusterTopology { - #[inline] - fn is_outdated_version(current_version: u64, check_version: u64) -> bool { - check_version < current_version - } - - #[inline] - fn is_newer_version(current_version: u64, check_version: u64) -> bool { - check_version > current_version - } - - pub fn nodes(&self) -> Option { - self.nodes.clone() - } - - /// Try to update the nodes topology of the cluster. - /// - /// If the provided version is not newer, then the update will be - /// ignored. - pub fn maybe_update_nodes(&mut self, nodes: ClusterNodesRef, version: u64) -> bool { - if self.nodes.is_none() { - let nodes = NodeTopology { version, nodes }; - self.nodes = Some(nodes); - return true; - } - - self.nodes - .as_mut() - .unwrap() - .maybe_update_nodes(nodes, version) - } -} - -impl SchemaTopology { - fn update_tables(&mut self, tables: HashMap) { - for (table_name, slot) in tables { - self.route_slots.insert(table_name, slot); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_outdated_version() { - // One case is (current_version, check_version, is_outdated) - let cases = [(1, 2, false), (1, 1, false), (1, 0, true)]; - for (current_version, check_version, is_outdated) in cases { - assert_eq!( - is_outdated, - ClusterTopology::is_outdated_version(current_version, check_version) - ); - } - } - - #[test] - fn test_newer_version() { - // One case is (current_version, check_version, is_newer) - let cases = [(1, 2, true), (1, 1, false), (1, 0, false)]; - for (current_version, check_version, is_newer) in cases { - assert_eq!( - is_newer, - ClusterTopology::is_newer_version(current_version, check_version) - ); - } - } -} diff --git a/src/common_types/Cargo.toml b/src/common_types/Cargo.toml deleted file mode 100644 index 0413063b4e..0000000000 --- a/src/common_types/Cargo.toml +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "common_types" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[features] -test = [] - -[dependencies] -# In alphabetical order -arrow = { workspace = true } -arrow_ext = { workspace = true } -bytes_ext = { workspace = true } -chrono = { workspace = true } -datafusion = { workspace = true } -hash_ext = { workspace = true } -horaedbproto = { workspace = true } -macros = { workspace = true } -paste = { workspace = true } -prost = { workspace = true } -rand = { workspace = true } -seahash = "4.1.0" -serde = { workspace = true } -serde_json = { workspace = true } -snafu = { workspace = true } -sqlparser = { workspace = true } -uuid = { workspace = true, features = ["fast-rng"] } diff --git a/src/common_types/src/bitset.rs b/src/common_types/src/bitset.rs deleted file mode 100644 index 53f629a92d..0000000000 --- a/src/common_types/src/bitset.rs +++ /dev/null @@ -1,612 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Fork from https://github.com/influxdata/influxdb_iox/blob/123ba32fb9d8941f888d79f7608448e0cd722253/arrow_util/src/bitset.rs. - -use std::ops::Range; - -use arrow::buffer::{BooleanBuffer, Buffer}; - -/// An arrow-compatible mutable bitset implementation -/// -/// Note: This currently operates on individual bytes at a time -/// it could be optimised to instead operate on usize blocks -#[derive(Debug, Default, Clone)] -pub struct BitSet { - /// The underlying data - /// - /// Data is stored in the least significant bit of a byte first - buffer: Vec, - - /// The length of this mask in bits - len: usize, -} - -impl BitSet { - /// Creates a new BitSet - pub fn new() -> Self { - Self::default() - } - - /// Creates a new BitSet with `count` unset bits. - pub fn with_size(count: usize) -> Self { - let mut bitset = Self::default(); - bitset.append_unset(count); - bitset - } - - /// Reserve space for `count` further bits - pub fn reserve(&mut self, count: usize) { - let new_buf_len = (self.len + count + 7) >> 3; - self.buffer.reserve(new_buf_len); - } - - /// Appends `count` unset bits - pub fn append_unset(&mut self, count: usize) { - self.len += count; - let new_buf_len = (self.len + 7) >> 3; - self.buffer.resize(new_buf_len, 0); - } - - /// Appends `count` set bits - pub fn append_set(&mut self, count: usize) { - let new_len = self.len + count; - let new_buf_len = (new_len + 7) >> 3; - - let skew = self.len & 7; - if skew != 0 { - *self.buffer.last_mut().unwrap() |= 0xFF << skew; - } - - self.buffer.resize(new_buf_len, 0xFF); - - let rem = new_len & 7; - if rem != 0 { - *self.buffer.last_mut().unwrap() &= (1 << rem) - 1; - } - - self.len = new_len; - } - - /// Truncates the bitset to the provided length - pub fn truncate(&mut self, len: usize) { - let new_buf_len = (len + 7) >> 3; - self.buffer.truncate(new_buf_len); - let overrun = len & 7; - if overrun > 0 { - *self.buffer.last_mut().unwrap() &= (1 << overrun) - 1; - } - self.len = len; - } - - /// Extends this [`BitSet`] by the context of `other` - pub fn extend_from(&mut self, other: &BitSet) { - self.append_bits(other.len, &other.buffer) - } - - /// Extends this [`BitSet`] by `range` elements in `other` - pub fn extend_from_range(&mut self, other: &BitSet, range: Range) { - let count = range.end - range.start; - if count == 0 { - return; - } - - let start_byte = range.start >> 3; - let end_byte = (range.end + 7) >> 3; - let skew = range.start & 7; - - // `append_bits` requires the provided `to_set` to be byte aligned, therefore - // if the range being copied is not byte aligned we must first append - // the leading bits to reach a byte boundary - if skew == 0 { - // No skew can simply append bytes directly - self.append_bits(count, &other.buffer[start_byte..end_byte]) - } else if start_byte + 1 == end_byte { - // Append bits from single byte - self.append_bits(count, &[other.buffer[start_byte] >> skew]) - } else { - // Append trailing bits from first byte to reach byte boundary, then append - // bits from the remaining byte-aligned mask - let offset = 8 - skew; - self.append_bits(offset, &[other.buffer[start_byte] >> skew]); - self.append_bits(count - offset, &other.buffer[(start_byte + 1)..end_byte]); - } - } - - /// Appends `count` boolean values from the slice of packed bits - pub fn append_bits(&mut self, count: usize, to_set: &[u8]) { - assert_eq!((count + 7) >> 3, to_set.len()); - - let new_len = self.len + count; - let new_buf_len = (new_len + 7) >> 3; - self.buffer.reserve(new_buf_len - self.buffer.len()); - - let whole_bytes = count >> 3; - let overrun = count & 7; - - let skew = self.len & 7; - if skew == 0 { - self.buffer.extend_from_slice(&to_set[..whole_bytes]); - if overrun > 0 { - let masked = to_set[whole_bytes] & ((1 << overrun) - 1); - self.buffer.push(masked) - } - - self.len = new_len; - debug_assert_eq!(self.buffer.len(), new_buf_len); - return; - } - - for to_set_byte in &to_set[..whole_bytes] { - let low = *to_set_byte << skew; - let high = *to_set_byte >> (8 - skew); - - *self.buffer.last_mut().unwrap() |= low; - self.buffer.push(high); - } - - if overrun > 0 { - let masked = to_set[whole_bytes] & ((1 << overrun) - 1); - let low = masked << skew; - *self.buffer.last_mut().unwrap() |= low; - - if overrun > 8 - skew { - let high = masked >> (8 - skew); - self.buffer.push(high) - } - } - - self.len = new_len; - debug_assert_eq!(self.buffer.len(), new_buf_len); - } - - /// Sets a given bit - pub fn set(&mut self, idx: usize) { - assert!(idx <= self.len); - - let byte_idx = idx >> 3; - let bit_idx = idx & 7; - self.buffer[byte_idx] |= 1 << bit_idx; - } - - /// Returns if the given index is set - pub fn get(&self, idx: usize) -> bool { - assert!(idx <= self.len); - - let byte_idx = idx >> 3; - let bit_idx = idx & 7; - (self.buffer[byte_idx] >> bit_idx) & 1 != 0 - } - - /// Converts this BitSet to a buffer compatible with arrows boolean encoding - pub fn to_arrow(&self) -> BooleanBuffer { - let offset = 0; - BooleanBuffer::new(Buffer::from(&self.buffer), offset, self.len) - } - - /// Returns the number of values stored in the bitset - pub fn len(&self) -> usize { - self.len - } - - /// Returns if this bitset is empty - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Returns the number of bytes used by this bitset - pub fn byte_len(&self) -> usize { - self.buffer.len() - } - - /// Return the raw packed bytes used by this bitset - pub fn bytes(&self) -> &[u8] { - &self.buffer - } - - /// Return `true` if all bits in the [`BitSet`] are currently set. - pub fn is_all_set(&self) -> bool { - // An empty bitmap has no set bits. - if self.len == 0 { - return false; - } - - // Check all the bytes in the bitmap that have all their bits considered - // part of the bit set. - let full_blocks = (self.len / 8).saturating_sub(1); - if !self.buffer.iter().take(full_blocks).all(|&v| v == u8::MAX) { - return false; - } - - // Check the last byte of the bitmap that may only be partially part of - // the bit set, and therefore need masking to check only the relevant - // bits. - let mask = match self.len % 8 { - 1..=8 => !(0xFF << (self.len % 8)), // LSB mask - 0 => 0xFF, - _ => unreachable!(), - }; - *self.buffer.last().unwrap() == mask - } - - /// Return `true` if all bits in the [`BitSet`] are currently unset. - pub fn is_all_unset(&self) -> bool { - self.buffer.iter().all(|&v| v == 0) - } -} - -/// Returns an iterator over set bit positions in increasing order -pub fn iter_set_positions(bytes: &[u8]) -> impl Iterator + '_ { - iter_set_positions_with_offset(bytes, 0) -} - -/// Returns an iterator over set bit positions in increasing order starting -/// at the provided bit offset -pub fn iter_set_positions_with_offset( - bytes: &[u8], - offset: usize, -) -> impl Iterator + '_ { - let mut byte_idx = offset >> 3; - let mut in_progress = bytes.get(byte_idx).cloned().unwrap_or(0); - - let skew = offset & 7; - in_progress &= 0xFF << skew; - - std::iter::from_fn(move || loop { - if in_progress != 0 { - let bit_pos = in_progress.trailing_zeros(); - in_progress ^= 1 << bit_pos; - return Some((byte_idx << 3) + (bit_pos as usize)); - } - byte_idx += 1; - in_progress = *bytes.get(byte_idx)?; - }) -} - -#[cfg(test)] -mod tests { - use arrow::array::BooleanBufferBuilder; - use rand::{prelude::*, rngs::OsRng}; - - use super::*; - - /// Computes a compacted representation of a given bool array - fn compact_bools(bools: &[bool]) -> Vec { - bools - .chunks(8) - .map(|x| { - let mut collect = 0_u8; - for (idx, set) in x.iter().enumerate() { - if *set { - collect |= 1 << idx - } - } - collect - }) - .collect() - } - - fn iter_set_bools(bools: &[bool]) -> impl Iterator + '_ { - bools - .iter() - .enumerate() - .filter(|&(_, y)| *y) - .map(|(x, _)| x) - } - - #[test] - fn test_compact_bools() { - let bools = &[ - false, false, true, true, false, false, true, false, true, false, - ]; - let collected = compact_bools(bools); - let indexes: Vec<_> = iter_set_bools(bools).collect(); - assert_eq!(collected.as_slice(), &[0b01001100, 0b00000001]); - assert_eq!(indexes.as_slice(), &[2, 3, 6, 8]) - } - - #[test] - fn test_bit_mask() { - let mut mask = BitSet::new(); - - mask.append_bits(8, &[0b11111111]); - let d1 = mask.buffer.clone(); - - mask.append_bits(3, &[0b01010010]); - let d2 = mask.buffer.clone(); - - mask.append_bits(5, &[0b00010100]); - let d3 = mask.buffer.clone(); - - mask.append_bits(2, &[0b11110010]); - let d4 = mask.buffer.clone(); - - mask.append_bits(15, &[0b11011010, 0b01010101]); - let d5 = mask.buffer.clone(); - - assert_eq!(d1.as_slice(), &[0b11111111]); - assert_eq!(d2.as_slice(), &[0b11111111, 0b00000010]); - assert_eq!(d3.as_slice(), &[0b11111111, 0b10100010]); - assert_eq!(d4.as_slice(), &[0b11111111, 0b10100010, 0b00000010]); - assert_eq!( - d5.as_slice(), - &[0b11111111, 0b10100010, 0b01101010, 0b01010111, 0b00000001] - ); - - assert!(mask.get(0)); - assert!(!mask.get(8)); - assert!(mask.get(9)); - assert!(mask.get(19)); - } - - fn make_rng() -> StdRng { - let seed = OsRng.next_u64(); - println!("Seed: {seed}"); - StdRng::seed_from_u64(seed) - } - - #[test] - fn test_bit_mask_all_set() { - let mut mask = BitSet::new(); - let mut all_bools = vec![]; - let mut rng = make_rng(); - - for _ in 0..100 { - let mask_length = (rng.next_u32() % 50) as usize; - let bools: Vec<_> = std::iter::repeat(true).take(mask_length).collect(); - - let collected = compact_bools(&bools); - mask.append_bits(mask_length, &collected); - all_bools.extend_from_slice(&bools); - } - - let collected = compact_bools(&all_bools); - assert_eq!(mask.buffer, collected); - - let expected_indexes: Vec<_> = iter_set_bools(&all_bools).collect(); - let actual_indexes: Vec<_> = iter_set_positions(&mask.buffer).collect(); - assert_eq!(expected_indexes, actual_indexes); - } - - #[test] - fn test_bit_mask_fuzz() { - let mut mask = BitSet::new(); - let mut all_bools = vec![]; - let mut rng = make_rng(); - - for _ in 0..100 { - let mask_length = (rng.next_u32() % 50) as usize; - let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0)) - .take(mask_length) - .collect(); - - let collected = compact_bools(&bools); - mask.append_bits(mask_length, &collected); - all_bools.extend_from_slice(&bools); - } - - let collected = compact_bools(&all_bools); - assert_eq!(mask.buffer, collected); - - let expected_indexes: Vec<_> = iter_set_bools(&all_bools).collect(); - let actual_indexes: Vec<_> = iter_set_positions(&mask.buffer).collect(); - assert_eq!(expected_indexes, actual_indexes); - - if !all_bools.is_empty() { - for _ in 0..10 { - let offset = rng.next_u32() as usize % all_bools.len(); - - let expected_indexes: Vec<_> = iter_set_bools(&all_bools[offset..]) - .map(|x| x + offset) - .collect(); - - let actual_indexes: Vec<_> = - iter_set_positions_with_offset(&mask.buffer, offset).collect(); - - assert_eq!(expected_indexes, actual_indexes); - } - } - - for index in actual_indexes { - assert!(mask.get(index)); - } - } - - #[test] - fn test_append_fuzz() { - let mut mask = BitSet::new(); - let mut all_bools = vec![]; - let mut rng = make_rng(); - - for _ in 0..100 { - let len = (rng.next_u32() % 32) as usize; - let set = rng.next_u32() & 1 == 0; - - match set { - true => mask.append_set(len), - false => mask.append_unset(len), - } - - all_bools.extend(std::iter::repeat(set).take(len)); - - let collected = compact_bools(&all_bools); - assert_eq!(mask.buffer, collected); - } - } - - #[test] - fn test_truncate_fuzz() { - let mut mask = BitSet::new(); - let mut all_bools = vec![]; - let mut rng = make_rng(); - - for _ in 0..100 { - let mask_length = (rng.next_u32() % 32) as usize; - let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0)) - .take(mask_length) - .collect(); - - let collected = compact_bools(&bools); - mask.append_bits(mask_length, &collected); - all_bools.extend_from_slice(&bools); - - if !all_bools.is_empty() { - let truncate = rng.next_u32() as usize % all_bools.len(); - mask.truncate(truncate); - all_bools.truncate(truncate); - } - - let collected = compact_bools(&all_bools); - assert_eq!(mask.buffer, collected); - } - } - - #[test] - fn test_extend_range_fuzz() { - let mut rng = make_rng(); - let src_len = 32; - let src_bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0)) - .take(src_len) - .collect(); - - let mut src_mask = BitSet::new(); - src_mask.append_bits(src_len, &compact_bools(&src_bools)); - - let mut dst_bools = Vec::new(); - let mut dst_mask = BitSet::new(); - - for _ in 0..100 { - let a = rng.next_u32() as usize % src_len; - let b = rng.next_u32() as usize % src_len; - - let start = a.min(b); - let end = a.max(b); - - dst_bools.extend_from_slice(&src_bools[start..end]); - dst_mask.extend_from_range(&src_mask, start..end); - - let collected = compact_bools(&dst_bools); - assert_eq!(dst_mask.buffer, collected); - } - } - - #[test] - fn test_arrow_compat() { - let bools = &[ - false, false, true, true, false, false, true, false, true, false, false, true, - ]; - - let mut builder = BooleanBufferBuilder::new(bools.len()); - builder.append_slice(bools); - let buffer = builder.finish(); - - let collected = compact_bools(bools); - let mut mask = BitSet::new(); - mask.append_bits(bools.len(), &collected); - let mask_buffer = mask.to_arrow(); - - assert_eq!(collected.as_slice(), buffer.values()); - assert_eq!(buffer.values(), mask_buffer.into_inner().as_slice()); - } - - #[test] - #[should_panic = "idx <= self.len"] - fn test_bitset_set_get_out_of_bounds() { - let mut v = BitSet::with_size(4); - - // The bitset is of length 4, which is backed by a single byte with 8 - // bits of storage capacity. - // - // Accessing bits past the 4 the bitset "contains" should not succeed. - - v.get(5); - v.set(5); - } - - #[test] - fn test_all_set_unset() { - for i in 1..100 { - let mut v = BitSet::new(); - v.append_set(i); - assert!(v.is_all_set()); - assert!(!v.is_all_unset()); - } - } - - #[test] - fn test_all_set_unset_multi_byte() { - let mut v = BitSet::new(); - - // Bitmap is composed of entirely set bits. - v.append_set(100); - assert!(v.is_all_set()); - assert!(!v.is_all_unset()); - - // Now the bitmap is neither composed of entirely set, nor entirely - // unset bits. - v.append_unset(1); - assert!(!v.is_all_set()); - assert!(!v.is_all_unset()); - - let mut v = BitSet::new(); - - // Bitmap is composed of entirely unset bits. - v.append_unset(100); - assert!(!v.is_all_set()); - assert!(v.is_all_unset()); - - // And once again, it is neither all set, nor all unset. - v.append_set(1); - assert!(!v.is_all_set()); - assert!(!v.is_all_unset()); - } - - #[test] - fn test_all_set_unset_single_byte() { - let mut v = BitSet::new(); - - // Bitmap is composed of entirely set bits. - v.append_set(2); - assert!(v.is_all_set()); - assert!(!v.is_all_unset()); - - // Now the bitmap is neither composed of entirely set, nor entirely - // unset bits. - v.append_unset(1); - assert!(!v.is_all_set()); - assert!(!v.is_all_unset()); - - let mut v = BitSet::new(); - - // Bitmap is composed of entirely unset bits. - v.append_unset(2); - assert!(!v.is_all_set()); - assert!(v.is_all_unset()); - - // And once again, it is neither all set, nor all unset. - v.append_set(1); - assert!(!v.is_all_set()); - assert!(!v.is_all_unset()); - } - - #[test] - fn test_all_set_unset_empty() { - let v = BitSet::new(); - assert!(!v.is_all_set()); - assert!(v.is_all_unset()); - } -} diff --git a/src/common_types/src/cluster.rs b/src/common_types/src/cluster.rs deleted file mode 100644 index ad302023e9..0000000000 --- a/src/common_types/src/cluster.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use serde::{Deserialize, Serialize}; - -/// Type to distinguish different node type in cluster mode. -#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub enum NodeType { - #[default] - HoraeDB, - CompactionServer, -} diff --git a/src/common_types/src/column.rs b/src/common_types/src/column.rs deleted file mode 100644 index af692b6aa4..0000000000 --- a/src/common_types/src/column.rs +++ /dev/null @@ -1,396 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Fork from https://github.com/influxdata/influxdb_iox/blob/7d878b21bd78cf7d0618804c1ccf8506521703bd/mutable_batch/src/column.rs. - -//! A [`Column`] stores the rows for a given column name - -use std::{fmt::Formatter, mem}; - -use arrow::error::ArrowError; -use bytes_ext::Bytes; -use snafu::{Backtrace, Snafu}; - -use crate::{ - bitset::BitSet, - datum::{Datum, DatumKind}, - string::StringBytes, - time::Timestamp, -}; - -#[derive(Debug, Snafu)] -#[allow(missing_copy_implementations, missing_docs)] -pub enum Error { - #[snafu(display( - "Invalid null mask, expected to be {} bytes but was {}", - expected_bytes, - actual_bytes - ))] - InvalidNullMask { - expected_bytes: usize, - actual_bytes: usize, - }, - - #[snafu(display("Internal MUB error constructing Arrow Array: {}", source))] - CreatingArrowArray { source: ArrowError }, - - #[snafu(display("Data type conflict, msg:{:?}.\nBacktrace:\n{}", msg, backtrace))] - ConflictType { msg: String, backtrace: Backtrace }, - - #[snafu(display("Data type unsupported, kind:{:?}.\nBacktrace:\n{}", kind, backtrace))] - UnsupportedType { - kind: DatumKind, - backtrace: Backtrace, - }, -} - -/// A specialized `Error` for [`Column`] errors -pub type Result = std::result::Result; - -/// Stores the actual data for columns in a chunk along with summary -/// statistics -#[derive(Debug, Clone)] -pub struct Column { - pub(crate) datum_kind: DatumKind, - pub(crate) valid: BitSet, - pub(crate) data: ColumnData, - pub(crate) to_insert: usize, -} - -/// The data for a column -#[derive(Debug, Clone)] -#[allow(missing_docs)] -pub enum ColumnData { - F64(Vec), - F32(Vec), - I64(Vec), - I32(Vec), - I16(Vec), - I8(Vec), - U64(Vec), - U32(Vec), - U16(Vec), - U8(Vec), - String(Vec), - Varbinary(Vec>), - Bool(BitSet), -} - -impl std::fmt::Display for ColumnData { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::F64(col_data) => write!(f, "F64({})", col_data.len()), - Self::F32(col_data) => write!(f, "F32({})", col_data.len()), - Self::I64(col_data) => write!(f, "I64({})", col_data.len()), - Self::I32(col_data) => write!(f, "I32({})", col_data.len()), - Self::I16(col_data) => write!(f, "I16({})", col_data.len()), - Self::I8(col_data) => write!(f, "I8({})", col_data.len()), - Self::U64(col_data) => write!(f, "U64({})", col_data.len()), - Self::U32(col_data) => write!(f, "U32({})", col_data.len()), - Self::U16(col_data) => write!(f, "U16({})", col_data.len()), - Self::U8(col_data) => write!(f, "U8({})", col_data.len()), - Self::String(col_data) => write!(f, "StringBytes({})", col_data.len()), - Self::Varbinary(col_data) => write!(f, "Varbinary({})", col_data.len()), - Self::Bool(col_data) => write!(f, "Bool({})", col_data.len()), - } - } -} - -impl Column { - pub fn with_capacity(row_count: usize, datum_kind: DatumKind) -> Result { - let mut valid = BitSet::new(); - valid.append_unset(row_count); - - let data = match datum_kind { - DatumKind::Boolean => { - let mut data = BitSet::new(); - data.append_unset(row_count); - ColumnData::Bool(data) - } - DatumKind::UInt64 => ColumnData::U64(vec![0; row_count]), - DatumKind::UInt32 => ColumnData::U32(vec![0; row_count]), - DatumKind::UInt16 => ColumnData::U16(vec![0; row_count]), - DatumKind::UInt8 => ColumnData::U8(vec![0; row_count]), - DatumKind::Double => ColumnData::F64(vec![0.0; row_count]), - DatumKind::Float => ColumnData::F32(vec![0.0; row_count]), - DatumKind::Int64 | DatumKind::Timestamp => ColumnData::I64(vec![0; row_count]), - DatumKind::Int32 => ColumnData::I32(vec![0; row_count]), - DatumKind::Int16 => ColumnData::I16(vec![0; row_count]), - DatumKind::Int8 => ColumnData::I8(vec![0; row_count]), - DatumKind::String => ColumnData::String(vec![StringBytes::new(); row_count]), - DatumKind::Varbinary => ColumnData::Varbinary(vec![vec![]; row_count]), - kind => { - return UnsupportedType { kind }.fail(); - } - }; - - Ok(Self { - datum_kind, - valid, - data, - to_insert: 0, - }) - } - - pub fn append_column(&mut self, mut column: Column) -> Result<()> { - assert_eq!(self.datum_kind, column.datum_kind); - self.valid.append_set(column.len()); - self.to_insert += column.len(); - match (&mut self.data, &mut column.data) { - (ColumnData::F64(data), ColumnData::F64(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::F32(data), ColumnData::F32(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::I64(data), ColumnData::I64(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::I32(data), ColumnData::I32(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::I16(data), ColumnData::I16(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::I8(data), ColumnData::I8(ref mut column_data)) => data.append(column_data), - (ColumnData::U64(data), ColumnData::U64(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::U32(data), ColumnData::U32(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::U16(data), ColumnData::U16(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::U8(data), ColumnData::U8(ref mut column_data)) => data.append(column_data), - (ColumnData::String(data), ColumnData::String(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::Varbinary(data), ColumnData::Varbinary(ref mut column_data)) => { - data.append(column_data) - } - (ColumnData::Bool(data), ColumnData::Bool(column_data)) => { - data.extend_from(column_data) - } - (expect, given) => { - return ConflictType { - msg: format!("column data type expect:{expect:?}, but given:{given:?}"), - } - .fail() - } - } - - Ok(()) - } - - pub fn append_nulls(&mut self, count: usize) { - self.to_insert += count; - } - - pub fn append_datum_ref(&mut self, value: &Datum) -> Result<()> { - match (&mut self.data, value) { - (ColumnData::F64(data), Datum::Double(v)) => data[self.to_insert] = *v, - (ColumnData::F32(data), Datum::Float(v)) => data[self.to_insert] = *v, - (ColumnData::I64(data), Datum::Int64(v)) => data[self.to_insert] = *v, - (ColumnData::I64(data), Datum::Timestamp(v)) => data[self.to_insert] = v.as_i64(), - (ColumnData::I32(data), Datum::Int32(v)) => data[self.to_insert] = *v, - (ColumnData::I16(data), Datum::Int16(v)) => data[self.to_insert] = *v, - (ColumnData::I8(data), Datum::Int8(v)) => data[self.to_insert] = *v, - (ColumnData::U64(data), Datum::UInt64(v)) => data[self.to_insert] = *v, - (ColumnData::U32(data), Datum::UInt32(v)) => data[self.to_insert] = *v, - (ColumnData::U16(data), Datum::UInt16(v)) => data[self.to_insert] = *v, - (ColumnData::U8(data), Datum::UInt8(v)) => data[self.to_insert] = *v, - (ColumnData::String(data), Datum::String(v)) => { - data[self.to_insert] = StringBytes::from(v.as_str()) - } - (ColumnData::Varbinary(data), Datum::Varbinary(v)) => { - data[self.to_insert] = v.to_vec(); - } - (ColumnData::Bool(data), Datum::Boolean(v)) => { - if *v { - data.set(self.to_insert); - } - } - - (column_data, datum) => { - return ConflictType { - msg: format!( - "column data type:{:?} but got datum type:{}", - column_data, - datum.kind() - ), - } - .fail() - } - } - self.valid.set(self.to_insert); - self.to_insert += 1; - Ok(()) - } - - pub fn get_datum(&self, idx: usize) -> Datum { - if !self.valid.get(idx) { - return Datum::Null; - } - match &self.data { - ColumnData::F64(data) => Datum::Double(data[idx]), - ColumnData::F32(data) => Datum::Float(data[idx]), - ColumnData::I64(data) => match self.datum_kind { - DatumKind::Timestamp => Datum::Timestamp(Timestamp::from(data[idx])), - DatumKind::Int64 => Datum::Int64(data[idx]), - _ => unreachable!(), - }, - ColumnData::I32(data) => Datum::Int32(data[idx]), - ColumnData::I16(data) => Datum::Int16(data[idx]), - ColumnData::I8(data) => Datum::Int8(data[idx]), - ColumnData::U64(data) => Datum::UInt64(data[idx]), - ColumnData::U32(data) => Datum::UInt32(data[idx]), - ColumnData::U16(data) => Datum::UInt16(data[idx]), - ColumnData::U8(data) => Datum::UInt8(data[idx]), - ColumnData::String(data) => Datum::String(data[idx].clone()), - ColumnData::Varbinary(data) => Datum::Varbinary(Bytes::from(data[idx].clone())), - ColumnData::Bool(data) => Datum::Boolean(data.get(idx)), - } - } - - /// Returns the [`DatumKind`] of this column - pub fn datum_kind(&self) -> DatumKind { - self.datum_kind - } - - /// Returns the validity bitmask of this column - pub fn valid_mask(&self) -> &BitSet { - &self.valid - } - - /// Returns a reference to this column's data - pub fn data(&self) -> &ColumnData { - &self.data - } - - /// Ensures that the total length of this column is `len` rows, - /// padding it with trailing NULLs if necessary - #[allow(dead_code)] - pub(crate) fn push_nulls_to_len(&mut self, len: usize) { - if self.valid.len() == len { - return; - } - assert!(len > self.valid.len(), "cannot shrink column"); - let delta = len - self.valid.len(); - self.valid.append_unset(delta); - - match &mut self.data { - ColumnData::F64(data) => { - data.resize(len, 0.); - } - ColumnData::F32(data) => { - data.resize(len, 0.); - } - ColumnData::I64(data) => { - data.resize(len, 0); - } - ColumnData::I32(data) => { - data.resize(len, 0); - } - ColumnData::I16(data) => { - data.resize(len, 0); - } - ColumnData::I8(data) => { - data.resize(len, 0); - } - ColumnData::U64(data) => { - data.resize(len, 0); - } - ColumnData::U32(data) => { - data.resize(len, 0); - } - ColumnData::U16(data) => { - data.resize(len, 0); - } - ColumnData::U8(data) => { - data.resize(len, 0); - } - ColumnData::Varbinary(data) => { - data.resize(len, vec![]); - } - ColumnData::Bool(data) => { - data.append_unset(delta); - } - ColumnData::String(data) => { - data.resize(len, StringBytes::new()); - } - } - } - - /// Returns the number of rows in this column - pub fn len(&self) -> usize { - self.valid.len() - } - - /// Returns true if this column contains no rows - pub fn is_empty(&self) -> bool { - self.valid.is_empty() - } - - /// The approximate memory size of the data in the column. - /// - /// This includes the size of `self`. - pub fn size(&self) -> usize { - let data_size = match &self.data { - ColumnData::F64(v) => mem::size_of::() * v.capacity(), - ColumnData::F32(v) => mem::size_of::() * v.capacity(), - ColumnData::I64(v) => mem::size_of::() * v.capacity(), - ColumnData::I32(v) => mem::size_of::() * v.capacity(), - ColumnData::I16(v) => mem::size_of::() * v.capacity(), - ColumnData::I8(v) => mem::size_of::() * v.capacity(), - ColumnData::U64(v) => mem::size_of::() * v.capacity(), - ColumnData::U32(v) => mem::size_of::() * v.capacity(), - ColumnData::U16(v) => mem::size_of::() * v.capacity(), - ColumnData::U8(v) => mem::size_of::() * v.capacity(), - ColumnData::Bool(v) => v.byte_len(), - ColumnData::String(v) => { - v.iter().map(|s| s.len()).sum::() - + (v.capacity() - v.len()) * mem::size_of::() - } - ColumnData::Varbinary(v) => { - v.iter().map(|s| s.len()).sum::() + (v.capacity() - v.len()) - } - }; - mem::size_of::() + data_size + self.valid.byte_len() - } - - /// The approximate memory size of the data in the column, not counting for - /// stats or self or whatever extra space has been allocated for the - /// vecs - pub fn size_data(&self) -> usize { - match &self.data { - ColumnData::F64(_) => mem::size_of::() * self.len(), - ColumnData::F32(_) => mem::size_of::() * self.len(), - ColumnData::I64(_) => mem::size_of::() * self.len(), - ColumnData::I32(_) => mem::size_of::() * self.len(), - ColumnData::I16(_) => mem::size_of::() * self.len(), - ColumnData::I8(_) => mem::size_of::() * self.len(), - ColumnData::U64(_) => mem::size_of::() * self.len(), - ColumnData::U32(_) => mem::size_of::() * self.len(), - ColumnData::U16(_) => mem::size_of::() * self.len(), - ColumnData::U8(_) => mem::size_of::() * self.len(), - ColumnData::Bool(_) => mem::size_of::() * self.len(), - ColumnData::String(v) => v.iter().map(|s| s.len()).sum(), - ColumnData::Varbinary(v) => v.iter().map(|s| s.len()).sum(), - } - } -} diff --git a/src/common_types/src/column_block.rs b/src/common_types/src/column_block.rs deleted file mode 100644 index d200e51b1d..0000000000 --- a/src/common_types/src/column_block.rs +++ /dev/null @@ -1,1370 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Column -use std::sync::Arc; - -use arrow::{ - array::{ - Array, ArrayAccessor, ArrayBuilder, ArrayRef, BinaryArray, BinaryBuilder, BooleanArray, - BooleanBuilder, Date32Array as DateArray, Date32Builder as DateBuilder, DictionaryArray, - Float32Array as FloatArray, Float32Builder as FloatBuilder, Float64Array as DoubleArray, - Float64Builder as DoubleBuilder, Int16Array, Int16Builder, Int32Array, Int32Builder, - Int64Array, Int64Builder, Int8Array, Int8Builder, NullArray, StringArray, StringBuilder, - StringDictionaryBuilder, Time64NanosecondArray as TimeArray, - Time64NanosecondBuilder as TimeBuilder, TimestampMillisecondArray, - TimestampMillisecondBuilder, UInt16Array, UInt16Builder, UInt32Array, UInt32Builder, - UInt64Array, UInt64Builder, UInt8Array, UInt8Builder, - }, - datatypes::{DataType, Int32Type, TimeUnit}, - error::ArrowError, -}; -use bytes_ext::Bytes; -use datafusion::physical_plan::{expressions::cast_column, ColumnarValue}; -use paste::paste; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - datum::{Datum, DatumKind, DatumView}, - string::StringBytes, - time::{TimeRange, Timestamp}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Invalid array type, datum_kind:{:?}, data_type:{:?}.\nBacktrace:\n{}", - datum_kind, - data_type, - backtrace - ))] - InvalidArrayType { - datum_kind: DatumKind, - data_type: DataType, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to append value, err:{}.\nBacktrace:\n{}", source, backtrace))] - Append { - source: ArrowError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Data type conflict, expect:{:?}, given:{:?}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - ConflictType { - expect: DatumKind, - given: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to convert arrow data type, data_type:{}.\nBacktrace:\n{}", - data_type, - backtrace - ))] - UnsupportedArray { - data_type: DataType, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to cast nanosecond to millisecond, data_type:{}. err:{}", - data_type, - source, - ))] - CastTimestamp { - data_type: DataType, - source: datafusion::error::DataFusionError, - }, - - #[snafu(display("Operation not yet implemented."))] - NotImplemented, -} - -pub type Result = std::result::Result; - -#[derive(Debug, Clone)] -pub struct NullColumn(NullArray); - -impl NullColumn { - fn new_null(rows: usize) -> Self { - Self(NullArray::new(rows)) - } - - /// Only the first datum of NullColumn is considered not duplicated. - #[inline] - pub fn dedup(&self, selected: &mut [bool]) { - if !self.0.is_empty() { - selected[0] = true; - } - } -} - -// TODO(yingwen): Builder for columns. - -macro_rules! define_numeric_column { - ($($Kind: ident), *) => { - $(paste! { - #[derive(Debug, Clone)] - pub struct [<$Kind Column>]([<$Kind Array>]); - - #[inline] - fn [](array: &[<$Kind Array>], index: usize) -> Datum { - let value = array.value(index); - Datum::$Kind(value) - } - - #[inline] - fn [](array: &[<$Kind Array>], index: usize) -> DatumView { - let value = array.value(index); - DatumView::$Kind(value) - } - })* - } -} - -define_numeric_column!( - Float, Double, UInt64, UInt32, UInt16, UInt8, Int64, Int32, Int16, Int8, Boolean -); - -#[derive(Debug, Clone)] -pub struct TimestampColumn(TimestampMillisecondArray); - -impl TimestampColumn { - pub fn time_range(&self) -> Option { - if self.0.is_empty() { - return None; - } - - let first_value = self.0.value(0); - let (mut min, mut max) = (first_value, first_value); - for i in 1..self.0.len() { - let current = self.0.value(i); - if current < min { - min = current; - } else if current > max { - max = current; - } - } - - TimeRange::new(min.into(), (max + 1).into()) - } -} - -#[derive(Debug, Clone)] -pub struct VarbinaryColumn(BinaryArray); - -#[derive(Debug, Clone)] -pub struct StringColumn(StringArray); - -/// dictionary encode type is difference from other types, need implement -/// without macro -#[derive(Debug, Clone)] -pub struct StringDictionaryColumn(DictionaryArray); - -#[derive(Debug, Clone)] -pub struct DateColumn(DateArray); - -#[derive(Debug, Clone)] -pub struct TimeColumn(TimeArray); - -#[inline] -fn get_null_datum_view(_array: &NullArray, _index: usize) -> DatumView { - DatumView::Null -} - -#[inline] -fn get_timestamp_datum_view(array: &TimestampMillisecondArray, index: usize) -> DatumView { - let value = array.value(index); - DatumView::Timestamp(Timestamp::new(value)) -} - -#[inline] -fn get_varbinary_datum_view(array: &BinaryArray, index: usize) -> DatumView { - let value = array.value(index); - DatumView::Varbinary(value) -} - -#[inline] -fn get_string_datum_view(array: &StringArray, index: usize) -> DatumView { - let value = array.value(index); - DatumView::String(value) -} - -#[inline] -fn get_date_datum_view(array: &DateArray, index: usize) -> DatumView { - let value = array.value(index); - DatumView::Date(value) -} - -#[inline] -fn get_time_datum_view(array: &TimeArray, index: usize) -> DatumView { - let value = array.value(index); - DatumView::Time(value) -} - -#[inline] -fn get_null_datum(_array: &NullArray, _index: usize) -> Datum { - Datum::Null -} - -#[inline] -fn get_timestamp_datum(array: &TimestampMillisecondArray, index: usize) -> Datum { - let value = array.value(index); - Datum::Timestamp(Timestamp::new(value)) -} - -// TODO(yingwen): Avoid clone of data. -// Require a clone. -#[inline] -fn get_varbinary_datum(array: &BinaryArray, index: usize) -> Datum { - let value = array.value(index); - Datum::Varbinary(Bytes::copy_from_slice(value)) -} - -// TODO(yingwen): Avoid clone of data. -// Require a clone. -#[inline] -fn get_string_datum(array: &StringArray, index: usize) -> Datum { - let value = array.value(index); - Datum::String(StringBytes::copy_from_str(value)) -} - -#[inline] -fn get_date_datum(array: &DateArray, index: usize) -> Datum { - let value = array.value(index); - Datum::Date(value) -} - -#[inline] -fn get_time_datum(array: &TimeArray, index: usize) -> Datum { - let value = array.value(index); - Datum::Time(value) -} - -macro_rules! impl_column { - ($Column: ident, $get_datum: expr, $get_datum_view: expr) => { - impl $Column { - /// Get datum by index. - pub fn datum_opt(&self, index: usize) -> Option { - // Do bound check. - if index >= self.0.len() { - return None; - } - - Some(self.datum(index)) - } - - pub fn datum_view_opt(&self, index: usize) -> Option { - if index >= self.0.len() { - return None; - } - - Some(self.datum_view(index)) - } - - pub fn datum_view(&self, index: usize) -> DatumView { - // If this datum is null. - if self.0.is_null(index) { - return DatumView::Null; - } - - $get_datum_view(&self.0, index) - } - - pub fn datum(&self, index: usize) -> Datum { - // If this datum is null. - if self.0.is_null(index) { - return Datum::Null; - } - - $get_datum(&self.0, index) - } - - #[inline] - pub fn num_rows(&self) -> usize { - self.0.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.num_rows() == 0 - } - } - }; -} - -impl_column!(NullColumn, get_null_datum, get_null_datum_view); -impl_column!( - TimestampColumn, - get_timestamp_datum, - get_timestamp_datum_view -); -impl_column!( - VarbinaryColumn, - get_varbinary_datum, - get_varbinary_datum_view -); -impl_column!(StringColumn, get_string_datum, get_string_datum_view); - -impl StringDictionaryColumn { - /// Get datum by index - pub fn datum_opt(&self, index: usize) -> Option { - if index >= self.0.len() { - return None; - } - Some(self.datum(index)) - } - - pub fn datum_view_opt(&self, index: usize) -> Option { - if index >= self.0.len() { - return None; - } - Some(self.datum_view(index)) - } - - pub fn datum_view(&self, index: usize) -> DatumView { - if self.0.is_null(index) { - return DatumView::Null; - } - // TODO(tanruixiang): Is this the efficient way? - DatumView::String(self.0.downcast_dict::().unwrap().value(index)) - } - - pub fn datum(&self, index: usize) -> Datum { - if self.0.is_null(index) { - return Datum::Null; - } - // TODO(tanruixiang): Is this the efficient way? - Datum::String( - self.0 - .downcast_dict::() - .unwrap() - .value(index) - .into(), - ) - } - - #[inline] - pub fn num_rows(&self) -> usize { - self.0.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.num_rows() == 0 - } -} - -macro_rules! impl_dedup { - ($Column: ident) => { - impl $Column { - /// If datum i is not equal to previous datum i - 1, mark `selected[i]` to - /// true. - /// - /// The first datum is marked to true. - /// - /// The size of selected must equal to the size of this column and - /// initialized to false. - #[allow(clippy::float_cmp)] - pub fn dedup(&self, selected: &mut [bool]) { - if self.0.is_empty() { - return; - } - - selected[0] = true; - for i in 1..self.0.len() { - let current = self.0.value(i); - let prev = self.0.value(i - 1); - - if current != prev { - selected[i] = true; - } - } - } - } - }; -} - -impl_dedup!(TimestampColumn); -impl_dedup!(VarbinaryColumn); -impl_dedup!(StringColumn); - -impl StringDictionaryColumn { - pub fn dedup(&self, selected: &mut [bool]) { - if self.0.is_empty() { - return; - } - selected[0] = true; - for (i, v) in selected.iter_mut().enumerate().take(self.0.len()).skip(1) { - let current = self.0.key(i); - let prev = self.0.key(i - 1); - if current != prev { - *v = true; - } - } - } -} - -macro_rules! impl_new_null { - ($Column: ident, $Builder: ident) => { - impl $Column { - /// Create a column that all values are null. - fn new_null(num_rows: usize) -> Self { - let mut builder = $Builder::with_capacity(num_rows); - for _ in 0..num_rows { - builder.append_null(); - } - let array = builder.finish(); - - Self(array) - } - } - }; -} - -impl_new_null!(TimestampColumn, TimestampMillisecondBuilder); - -macro_rules! impl_from_array_and_slice { - ($Column: ident, $ArrayType: ident) => { - impl From<$ArrayType> for $Column { - fn from(array: $ArrayType) -> Self { - Self(array) - } - } - - impl From<&$ArrayType> for $Column { - fn from(array_ref: &$ArrayType) -> Self { - // We need to clone the [arrow::array::ArrayData], which clones - // the underlying vector of [arrow::buffer::Buffer] and Bitmap (also - // holds a Buffer), thus require some allocation. However, the Buffer is - // managed by Arc, so cloning the buffer is not too expensive. - let array_data = array_ref.into_data(); - let array = $ArrayType::from(array_data); - - Self(array) - } - } - - impl $Column { - fn to_arrow_array(&self) -> $ArrayType { - // Clone the array data. - let array_data = self.0.clone().into_data(); - $ArrayType::from(array_data) - } - - /// Returns a zero-copy slice of this array with the indicated offset and - /// length. - /// - /// Panics if offset with length is greater than column length. - fn slice(&self, offset: usize, length: usize) -> Self { - let array_slice = self.0.slice(offset, length); - // Clone the slice data. - let array_data = array_slice.into_data(); - let array = $ArrayType::from(array_data); - - Self(array) - } - } - }; -} - -impl_from_array_and_slice!(NullColumn, NullArray); -impl_from_array_and_slice!(TimestampColumn, TimestampMillisecondArray); -impl_from_array_and_slice!(VarbinaryColumn, BinaryArray); -impl_from_array_and_slice!(StringColumn, StringArray); - -impl From> for StringDictionaryColumn { - fn from(array: DictionaryArray) -> Self { - Self(array) - } -} - -impl From<&DictionaryArray> for StringDictionaryColumn { - fn from(array_ref: &DictionaryArray) -> Self { - let array_data = array_ref.into_data(); - let array = DictionaryArray::::from(array_data); - Self(array) - } -} - -impl StringDictionaryColumn { - fn to_arrow_array(&self) -> DictionaryArray { - let array_data = self.0.clone().into_data(); - DictionaryArray::::from(array_data) - } - - fn slice(&self, offset: usize, length: usize) -> Self { - let array_slice = self.0.slice(offset, length); - let array_data = array_slice.into_data(); - let array = DictionaryArray::::from(array_data); - Self(array) - } -} - -macro_rules! impl_iter { - ($Column: ident, $Value: ident) => { - impl $Column { - /// Iter column values. - pub fn iter(&self) -> impl Iterator> + '_ { - self.0.iter() - } - } - }; -} - -macro_rules! impl_iter_map { - ($Column: ident, $Value: ident) => { - impl $Column { - /// Iter column values. - pub fn iter(&self) -> impl Iterator> + '_ { - self.0.iter().map(|v| v.map($Value::from)) - } - } - }; -} - -impl_iter_map!(TimestampColumn, Timestamp); - -impl VarbinaryColumn { - fn new_null(num_rows: usize) -> Self { - let mut builder = BinaryBuilder::with_capacity(num_rows, 0usize); - for _ in 0..num_rows { - builder.append_null(); - } - let array = builder.finish(); - - Self(array) - } -} - -impl StringColumn { - /// Create a column that all values are null. - fn new_null(num_rows: usize) -> Self { - let mut builder = StringBuilder::with_capacity(num_rows, 0usize); - for _ in 0..num_rows { - builder.append_null(); - } - let array = builder.finish(); - - Self(array) - } -} - -impl StringDictionaryColumn { - /// Create a column that all values are null. - fn new_null(num_rows: usize) -> Self { - let mut builder = StringDictionaryBuilder::::new(); - for _ in 0..num_rows { - builder.append_null(); - } - let array = builder.finish(); - - Self(array) - } -} - -macro_rules! impl_numeric_column { - ($(($Kind: ident, $type: ty)), *) => { - $( - paste! { - impl_column!([<$Kind Column>], [], []); - impl_from_array_and_slice!([<$Kind Column>], [<$Kind Array>]); - impl_new_null!([<$Kind Column>], [<$Kind Builder>]); - impl_iter!([<$Kind Column>], $type); - impl_dedup!([<$Kind Column>]); - } - )* - } -} - -impl_numeric_column!( - (Double, f64), - (Float, f32), - (UInt64, u64), - (UInt32, u32), - (UInt16, u16), - (UInt8, u8), - (Int64, i64), - (Int32, i32), - (Int16, i16), - (Int8, i8), - (Boolean, bool), - (Date, i32), - (Time, i64) -); - -macro_rules! impl_numeric_value { - ($Column: ident, $Value: ident) => { - impl $Column { - /// Get value at index. - pub fn value(&self, index: usize) -> Option<$Value> { - if self.0.is_valid(index) { - unsafe { Some(self.0.value_unchecked(index)) } - } else { - None - } - } - } - }; -} - -macro_rules! batch_impl_numeric_value { - ($(($Kind: ident, $type: ty)), *) => { - $( - paste! { - impl_numeric_value!([<$Kind Column>], $type); - } - )* - } -} - -batch_impl_numeric_value!( - (Timestamp, i64), - (Double, f64), - (Float, f32), - (UInt64, u64), - (UInt32, u32), - (UInt16, u16), - (UInt8, u8), - (Int64, i64), - (Int32, i32), - (Int16, i16), - (Int8, i8), - (Boolean, bool), - (Date, i32), - (Time, i64) -); - -impl VarbinaryColumn { - pub fn iter(&self) -> impl Iterator> + '_ { - self.0.iter() - } - - pub fn value(&self, index: usize) -> Option<&[u8]> { - if self.0.is_valid(index) { - unsafe { Some(self.0.value_unchecked(index)) } - } else { - None - } - } -} - -impl StringColumn { - pub fn iter(&self) -> impl Iterator> + '_ { - self.0.iter() - } - - pub fn value(&self, index: usize) -> Option<&str> { - if self.0.is_valid(index) { - unsafe { Some(self.0.value_unchecked(index)) } - } else { - None - } - } -} - -macro_rules! impl_column_block { - ($($Kind: ident), *) => { - impl ColumnBlock { - pub fn datum_kind(&self) -> DatumKind { - match self { - ColumnBlock::StringDictionary(_) => DatumKind::String, - $(ColumnBlock::$Kind(_) => DatumKind::$Kind,)* - } - } - - pub fn datum_opt(&self, index: usize) -> Option { - match self { - ColumnBlock::StringDictionary(col) => col.datum_opt(index), - $(ColumnBlock::$Kind(col) => col.datum_opt(index),)* - } - } - - pub fn datum_view_opt(&self, index: usize) -> Option { - match self { - ColumnBlock::StringDictionary(col) => col.datum_view_opt(index), - $(ColumnBlock::$Kind(col) => col.datum_view_opt(index),)* - } - } - - /// Panic if index is out fo bound. - pub fn datum_view(&self, index: usize) -> DatumView { - match self { - ColumnBlock::StringDictionary(col) => col.datum_view(index), - $(ColumnBlock::$Kind(col) => col.datum_view(index),)* - } - } - - /// Panic if index is out fo bound. - pub fn datum(&self, index: usize) -> Datum { - match self { - ColumnBlock::StringDictionary(col) => col.datum(index), - $(ColumnBlock::$Kind(col) => col.datum(index),)* - } - } - - pub fn num_rows(&self) -> usize { - match self { - ColumnBlock::StringDictionary(col) => col.num_rows(), - $(ColumnBlock::$Kind(col) => col.num_rows(),)* - } - } - - pub fn to_arrow_array_ref(&self) -> ArrayRef { - match self { - ColumnBlock::StringDictionary(col) => Arc::new(col.to_arrow_array()), - $(ColumnBlock::$Kind(col) => Arc::new(col.to_arrow_array()),)* - } - } - - /// If datum i is not equal to previous datum i - 1, mark `selected[i]` to true. - /// - /// The first datum is not marked to true. - pub fn dedup(&self, selected: &mut [bool]) { - match self { - ColumnBlock::StringDictionary(col) => col.dedup(selected), - $(ColumnBlock::$Kind(col) => col.dedup(selected),)* - } - } - - /// Returns a zero-copy slice of this array with the indicated offset and length. - /// - /// Panics if offset with length is greater than column length. - #[must_use] - pub fn slice(&self, offset: usize, length: usize) -> Self { - match self { - ColumnBlock::StringDictionary(col) => ColumnBlock::StringDictionary(col.slice(offset, length)), - $(ColumnBlock::$Kind(col) => ColumnBlock::$Kind(col.slice(offset, length)),)* - } - } - } - - $(paste! { - impl From<[<$Kind Column>]> for ColumnBlock { - fn from(column: [<$Kind Column>]) -> Self { - Self::$Kind(column) - } - } - })* - - impl From for ColumnBlock { - fn from(column: StringDictionaryColumn) -> Self { - Self::StringDictionary(column) - } - } - }; -} - -impl_column_block!( - Null, Timestamp, Double, Float, Varbinary, String, UInt64, UInt32, UInt16, UInt8, Int64, Int32, - Int16, Int8, Boolean, Date, Time -); - -// TODO(yingwen): We can add a unsafe function that don't do bound check. - -macro_rules! define_column_block { - ($($Kind: ident), *) => { - paste! { - #[derive(Debug, Clone)] - pub enum ColumnBlock { - Null(NullColumn), - StringDictionary(StringDictionaryColumn), - String(StringColumn), - $( - $Kind([<$Kind Column>]), - )* - } - - impl ColumnBlock { - pub fn try_from_arrow_array_ref(datum_kind: &DatumKind, array: &ArrayRef) -> Result { - let is_dictionary : bool = if let DataType::Dictionary(..) = array.data_type() { - true - } else { - false - }; - let column = match datum_kind { - DatumKind::Null => ColumnBlock::Null(NullColumn::new_null(array.len())), - DatumKind::String => { - if is_dictionary { - let cast_column = cast_array(datum_kind, array)?; - ColumnBlock::StringDictionary(StringDictionaryColumn::from(cast_column)) - - } else { - let cast_column = cast_array(datum_kind, array)?; - ColumnBlock::String(StringColumn::from(cast_column)) - } - }, - $( - DatumKind::$Kind => { - let mills_array; - let cast_column = match array.data_type() { - DataType::Timestamp(TimeUnit::Nanosecond, None) => { - mills_array = cast_nanosecond_to_mills(array)?; - cast_array(datum_kind, &mills_array)? - }, - _ => { - cast_array(datum_kind, array)? - } - }; - - ColumnBlock::$Kind([<$Kind Column>]::from(cast_column)) - } - )* - }; - Ok(column) - } - - pub fn new_null_with_type(kind: &DatumKind, rows: usize, is_dictionary: bool) -> Result { - let block = match kind { - DatumKind::Null => ColumnBlock::Null(NullColumn::new_null(rows)), - DatumKind::String => { - if is_dictionary { - ColumnBlock::StringDictionary(StringDictionaryColumn::new_null(rows)) - }else { - ColumnBlock::String(StringColumn::new_null(rows)) - } - }, - $( - DatumKind::$Kind => ColumnBlock::$Kind([<$Kind Column>]::new_null(rows)), - )* - }; - - Ok(block) - } - } - } - } -} - -// Define column blocks, Null is defined explicitly in macro. -define_column_block!( - Timestamp, Double, Float, Varbinary, UInt64, UInt32, UInt16, UInt8, Int64, Int32, Int16, Int8, - Boolean, Date, Time -); - -impl ColumnBlock { - pub fn try_cast_arrow_array_ref(array: &ArrayRef) -> Result { - let datum_kind = - DatumKind::from_data_type(array.data_type()).with_context(|| UnsupportedArray { - data_type: array.data_type().clone(), - })?; - Self::try_from_arrow_array_ref(&datum_kind, array) - } - - pub fn new_null(rows: usize) -> Self { - Self::Null(NullColumn::new_null(rows)) - } - - pub fn as_timestamp(&self) -> Option<&TimestampColumn> { - match self { - ColumnBlock::Timestamp(c) => Some(c), - _ => None, - } - } -} - -// TODO: This is a temp workaround to support nanoseconds, a better way -// is to support nanoseconds natively. -// This is also required for influxql. -pub fn cast_nanosecond_to_mills(array: &ArrayRef) -> Result> { - let column = ColumnarValue::Array(array.clone()); - let mills_column = cast_column( - &column, - &DataType::Timestamp(TimeUnit::Millisecond, None), - // It will use the default option internally when found None. - None, - ) - .with_context(|| CastTimestamp { - data_type: DataType::Timestamp(TimeUnit::Millisecond, None), - })?; - - match mills_column { - ColumnarValue::Array(array) => Ok(array), - _ => Err(Error::NotImplemented), - } -} - -fn cast_array<'a, T: 'static>(datum_kind: &DatumKind, array: &'a ArrayRef) -> Result<&'a T> { - array - .as_any() - .downcast_ref::() - .with_context(|| InvalidArrayType { - datum_kind: *datum_kind, - data_type: array.data_type().clone(), - }) -} - -macro_rules! append_datum { - ($Kind: ident, $builder: ident, $DatumType: ident, $datum: ident) => { - match $datum { - $DatumType::Null => Ok($builder.append_null()), - $DatumType::$Kind(v) => Ok($builder.append_value(v)), - _ => ConflictType { - expect: DatumKind::$Kind, - given: $datum.kind(), - } - .fail(), - } - }; -} - -macro_rules! append_datum_into { - ($Kind: ident, $builder: ident, $DatumType: ident, $datum: ident) => { - match $datum { - $DatumType::Null => Ok($builder.append_null()), - $DatumType::$Kind(v) => Ok($builder.append_value(v.into())), - _ => ConflictType { - expect: DatumKind::$Kind, - given: $datum.kind(), - } - .fail(), - } - }; -} - -macro_rules! append_block { - ($Kind: ident, $builder: ident, $BlockType: ident, $block: ident, $start: ident, $len: ident) => { - match $block { - $BlockType::Null(v) => { - let end = std::cmp::min($start + $len, v.num_rows()); - for _ in $start..end { - $builder.append_null(); - } - Ok(()) - } - $BlockType::$Kind(v) => { - // There is no convenient api to copy a range of data from array to builder, so - // we still need to clone value one by one using a for loop. - let end = std::cmp::min($start + $len, v.num_rows()); - for i in $start..end { - let value_opt = v.value(i); - match value_opt { - Some(value) => { - $builder.append_value(value); - } - None => { - $builder.append_null(); - } - } - } - Ok(()) - } - _ => ConflictType { - expect: DatumKind::$Kind, - given: $block.datum_kind(), - } - .fail(), - } - }; -} - -macro_rules! define_column_block_builder { - ($(($Kind: ident, $Builder: ident)), *) => { - paste! { - pub enum ColumnBlockBuilder { - Null { rows: usize }, - Timestamp(TimestampMillisecondBuilder), - Varbinary(BinaryBuilder), - String(StringBuilder), - Date(DateBuilder), - Time(TimeBuilder), - Dictionary(StringDictionaryBuilder::), - $( - $Kind($Builder), - )* - } - - impl ColumnBlockBuilder { - /// Create by data type with initial capacity - pub fn with_capacity(data_type: &DatumKind, item_capacity: usize, is_dictionary : bool) -> Self { - match data_type { - DatumKind::Null => Self::Null { rows: 0 }, - DatumKind::Timestamp => Self::Timestamp(TimestampMillisecondBuilder::with_capacity(item_capacity)), - // The data_capacity is set as 1024, because the item is variable-size type. - DatumKind::Varbinary => Self::Varbinary(BinaryBuilder::with_capacity(item_capacity, 1024)), - DatumKind::String =>{ - if is_dictionary { - Self::Dictionary(StringDictionaryBuilder::::new()) - }else { - Self::String(StringBuilder::with_capacity(item_capacity, 1024)) - } - } - DatumKind::Date => Self::Date(DateBuilder::with_capacity(item_capacity)), - DatumKind::Time => Self::Time(TimeBuilder::with_capacity(item_capacity)), - $( - DatumKind::$Kind => Self::$Kind($Builder::with_capacity(item_capacity)), - )* - } - } - - /// Append the datum into the builder, the datum should have same the data - /// type of builder - pub fn append(&mut self, datum: Datum) -> Result<()> { - let given = datum.kind(); - match self { - Self::Null { rows } => match datum { - Datum::Null => { - *rows += 1; - Ok(()) - } - _ => ConflictType { - expect: DatumKind::Null, - given, - } - .fail(), - }, - Self::Timestamp(builder) => append_datum_into!(Timestamp, builder, Datum, datum), - Self::Varbinary(builder) => append_datum!(Varbinary, builder, Datum, datum), - Self::String(builder) => append_datum!(String, builder, Datum, datum), - Self::Date(builder) => append_datum!(Date, builder, Datum, datum), - Self::Time(builder) => append_datum!(Time, builder, Datum, datum), - Self::Dictionary(builder) => { - match datum { - Datum::Null => Ok(builder.append_null()), - Datum::String(v) => Ok(builder.append_value(v)), - _ => ConflictType { - expect: DatumKind::String, - given: datum.kind(), - } - .fail() - } - }, - $( - Self::$Kind(builder) => append_datum!($Kind, builder, Datum, datum), - )* - } - } - - /// Append the [DatumView] into the builder, the datum view should have same the data - /// type of builder - pub fn append_view(&mut self, datum: DatumView<'_>) -> Result<()> { - let given = datum.kind(); - match self { - Self::Null { rows } => match datum { - DatumView::Null => { - *rows += 1; - Ok(()) - } - _ => ConflictType { - expect: DatumKind::Null, - given, - } - .fail(), - }, - Self::Timestamp(builder) => append_datum_into!(Timestamp, builder, DatumView, datum), - Self::Varbinary(builder) => append_datum!(Varbinary, builder, DatumView, datum), - Self::String(builder) => append_datum!(String, builder, DatumView, datum), - Self::Date(builder) => append_datum!(Date, builder, DatumView, datum), - Self::Time(builder) => append_datum!(Time, builder, DatumView, datum), - Self::Dictionary(builder) => { - match datum { - DatumView::Null => Ok(builder.append_null()), - DatumView::String(v) => Ok(builder.append_value(v)), - _ => ConflictType { - expect: DatumKind::String, - given: datum.kind(), - } - .fail() - } - }, - $( - Self::$Kind(builder) => append_datum!($Kind, builder, DatumView, datum), - )* - } - } - - /// Append rows in [start..start + len) from `block` to the builder. - /// - /// Returns rows actually appended. - pub fn append_block_range(&mut self, block: &ColumnBlock, start: usize, len: usize) -> Result<()> { - match self { - Self::Null { rows } => { - if start + len >= block.num_rows() { - *rows += block.num_rows() - start; - } else { - *rows += len; - } - Ok(()) - }, - Self::Timestamp(builder) => append_block!(Timestamp, builder, ColumnBlock, block, start, len), - Self::Varbinary(builder) => append_block!(Varbinary, builder, ColumnBlock, block, start, len), - Self::String(builder) => append_block!(String, builder, ColumnBlock, block, start, len), - Self::Date(builder) => append_block!(Date, builder, ColumnBlock, block, start, len), - Self::Time(builder) => append_block!(Time, builder, ColumnBlock, block, start, len), - Self::Dictionary(builder) => { - match block { - ColumnBlock::Null(v) => { - let end = std::cmp::min(start + len, v.num_rows()); - for _ in start..end { - builder.append_null(); - } - Ok(()) - } - ColumnBlock::StringDictionary(v) => { - let end = std::cmp::min(start + len, v.num_rows()); - for i in start..end { - if v.0.is_null(i) { - builder.append_null(); - } else { - let value = v.datum(i); - builder.append_value(value.as_str().unwrap()); - } - } - Ok(()) - } - _ => ConflictType { - expect: DatumKind::String, - given: block.datum_kind(), - } - .fail(), - } - }, - $( - Self::$Kind(builder) => append_block!($Kind, builder, ColumnBlock, block, start, len), - )* - } - } - - pub fn len(&self) -> usize { - match &self { - Self::Null { rows } => *rows, - Self::Timestamp(builder) => builder.len(), - Self::Varbinary(builder) => builder.len(), - Self::String(builder) => builder.len(), - Self::Date(builder) => builder.len(), - Self::Time(builder) => builder.len(), - Self::Dictionary(builder) => builder.len(), - $( - Self::$Kind(builder) => builder.len(), - )* - } - } - - // Build and reset the builder. - pub fn build(&mut self) -> ColumnBlock { - match self { - Self::Null { rows } => { - let block = ColumnBlock::new_null(*rows); - *rows = 0; - block - } - Self::Timestamp(builder) => TimestampColumn::from(builder.finish()).into(), - Self::Varbinary(builder) => VarbinaryColumn::from(builder.finish()).into(), - Self::String(builder) => StringColumn::from(builder.finish()).into(), - Self::Date(builder) => DateColumn::from(builder.finish()).into(), - Self::Time(builder) => TimeColumn::from(builder.finish()).into(), - Self::Dictionary(builder) => { - StringDictionaryColumn::from(builder.finish()).into() - }, - $( - Self::$Kind(builder) => [<$Kind Column>]::from(builder.finish()).into(), - )* - } - } - } - } - } -} - -// Define column block builders, Null and Timestamp are defined explicitly in -// macro. -define_column_block_builder!( - (Double, DoubleBuilder), - (Float, FloatBuilder), - (UInt64, UInt64Builder), - (UInt32, UInt32Builder), - (UInt16, UInt16Builder), - (UInt8, UInt8Builder), - (Int64, Int64Builder), - (Int32, Int32Builder), - (Int16, Int16Builder), - (Int8, Int8Builder), - (Boolean, BooleanBuilder) -); - -impl ColumnBlockBuilder { - /// Create by data type - pub fn new(data_type: &DatumKind, is_dictionry: bool) -> Self { - Self::with_capacity(data_type, 0, is_dictionry) - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Clear the builder by calling `build()` and drop the built result. - pub fn clear(&mut self) { - let _ = self.build(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::tests::{ - build_row_for_dictionary, build_rows, build_schema, build_schema_with_dictionary, - }; - - #[test] - fn test_column_block_builder() { - let schema = build_schema(); - let rows = build_rows(); - // DatumKind::Varbinary - let column = schema.column(0); - let mut builder = ColumnBlockBuilder::with_capacity(&column.data_type, 2, false); - - // append - builder.append(rows[0][0].clone()).unwrap(); - let ret = builder.append(rows[0][1].clone()); - assert!(ret.is_err()); - - // append_view - builder.append_view(rows[1][0].as_view()).unwrap(); - let ret = builder.append_view(rows[0][1].as_view()); - assert!(ret.is_err()); - - let column_block = builder.build(); - assert_eq!(column_block.num_rows(), 2); - let mut builder = ColumnBlockBuilder::with_capacity(&column.data_type, 2, false); - - // append_block_range - builder.append_block_range(&column_block, 0, 1).unwrap(); - builder.append_block_range(&column_block, 1, 1).unwrap(); - - let column_block = builder.build(); - assert_eq!(column_block.num_rows(), 2); - assert_eq!( - column_block.datum(0), - Datum::Varbinary(Bytes::copy_from_slice(b"binary key")) - ); - assert_eq!( - column_block.datum(1), - Datum::Varbinary(Bytes::copy_from_slice(b"binary key1")) - ); - } - - #[test] - fn test_column_block_string_dictionary_builder() { - let schema = build_schema_with_dictionary(); - let rows = [ - build_row_for_dictionary( - b"a", - 1, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tag1_1"), - "tag2_1", - ), - build_row_for_dictionary( - b"b", - 2, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tag1_2"), - "tag2_2", - ), - build_row_for_dictionary( - b"c", - 3, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tag1_3"), - "tag2_3", - ), - build_row_for_dictionary( - b"d", - 4, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tag1_1"), - "tag2_4", - ), - build_row_for_dictionary( - b"e", - 5, - 10.0, - "v4", - 1000, - 1_000_000, - Some("tag1_3"), - "tag2_4", - ), - build_row_for_dictionary(b"f", 6, 10.0, "v4", 1000, 1_000_000, None, "tag2_4"), - ]; - // DatumKind::String , is_dictionary = true - let column = schema.column(6); - let mut builder = - ColumnBlockBuilder::with_capacity(&column.data_type, 0, column.is_dictionary); - // append - (0..rows.len()).for_each(|i| builder.append(rows[i][6].clone()).unwrap()); - - let ret = builder.append(rows[0][0].clone()); - assert!(ret.is_err()); - - // append_view - builder.append_view(rows[5][6].as_view()).unwrap(); - let ret = builder.append_view(rows[1][0].as_view()); - - assert!(ret.is_err()); - - let column_block = builder.build(); - assert_eq!(column_block.num_rows(), 7); - let mut builder = - ColumnBlockBuilder::with_capacity(&column.data_type, 2, column.is_dictionary); - - // append_block_range - (0..rows.len()).for_each(|i| builder.append_block_range(&column_block, i, 1).unwrap()); - - let column_block = builder.build(); - assert_eq!(column_block.num_rows(), 6); - assert_eq!( - column_block.datum(0), - Datum::String(StringBytes::from("tag1_1")) - ); - assert_eq!( - column_block.datum(1), - Datum::String(StringBytes::from("tag1_2")) - ); - assert_eq!( - column_block.datum(2), - Datum::String(StringBytes::from("tag1_3")) - ); - assert_eq!( - column_block.datum(3), - Datum::String(StringBytes::from("tag1_1")) - ); - assert_eq!( - column_block.datum(4), - Datum::String(StringBytes::from("tag1_3")) - ); - assert_eq!(column_block.datum(5), Datum::Null); - } -} diff --git a/src/common_types/src/column_schema.rs b/src/common_types/src/column_schema.rs deleted file mode 100644 index a8822c5722..0000000000 --- a/src/common_types/src/column_schema.rs +++ /dev/null @@ -1,690 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Schema of column - -use std::{collections::HashMap, convert::TryFrom, str::FromStr, sync::Arc}; - -use arrow::datatypes::{DataType, Field}; -use horaedbproto::{remote_engine::ColumnDesc, schema as schema_pb}; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; -use sqlparser::ast::Expr; - -use crate::datum::DatumKind; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Unsupported arrow data type, type:{}.\nBacktrace:\n{}", - data_type, - backtrace - ))] - UnsupportedDataType { - data_type: DataType, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid tag type:{}.\nBacktrace:\n{}", data_type, backtrace))] - InvalidTagType { - data_type: DataType, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid dictionary type:{}.\nBacktrace:\n{}", data_type, backtrace))] - InvalidDictionaryType { - data_type: DataType, - backtrace: Backtrace, - }, - - #[snafu(display( - "Arrow field meta data is missing, field name:{}.\nBacktrace:\n{}", - field_name, - backtrace - ))] - ArrowFieldMetaDataMissing { - field_name: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Arrow field meta key is not found, key:{:?}.\nBacktrace:\n{}", - key, - backtrace - ))] - ArrowFieldMetaKeyNotFound { - key: ArrowFieldMetaKey, - backtrace: Backtrace, - }, - - #[snafu(display( - "Arrow field meta value is invalid, key:{:?}, raw_value:{}, err:{}.\nBacktrace:\n{}", - key, - raw_value, - source, - backtrace - ))] - InvalidArrowFieldMetaValue { - key: ArrowFieldMetaKey, - raw_value: String, - source: Box, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode default value, encoded_val:{:?}, err:{}.\nBacktrace:\n{}", - encoded_val, - source, - backtrace - ))] - DecodeDefaultValue { - encoded_val: Vec, - source: serde_json::error::Error, - backtrace: Backtrace, - }, -} - -pub type Result = std::result::Result; - -/// Error of compatibility check -#[derive(Debug, Snafu)] -pub enum CompatError { - #[snafu(display( - "Incompatible data type of column, name:{}, expect:{:?}, given:{:?}.\nBacktrace:\n{}", - name, - expect, - given, - backtrace, - ))] - IncompatDataType { - name: String, - expect: DatumKind, - given: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display("Column is not nullable, name:{}.\nBacktrace:\n{}", name, backtrace))] - NotNullable { name: String, backtrace: Backtrace }, -} - -/// Id of column -pub type ColumnId = u32; - -/// A ColumnId used to indicate that the column id is uninitialized -pub const COLUMN_ID_UNINIT: ColumnId = 0; - -/// Read operation of a column -#[derive(Debug)] -pub enum ReadOp { - /// Use the column exactly - Exact, - /// Fill the column by null - FillNull, -} - -/// Meta data of the arrow field. -#[derive(Clone, Debug, Default, PartialEq)] -struct ArrowFieldMeta { - id: u32, - is_tag: bool, - comment: String, - is_dictionary: bool, -} - -#[derive(Copy, Clone, Debug)] -pub enum ArrowFieldMetaKey { - Id, - IsTag, - IsDictionary, - Comment, -} - -impl ArrowFieldMetaKey { - fn as_str(&self) -> &str { - match self { - ArrowFieldMetaKey::Id => "field::id", - ArrowFieldMetaKey::IsTag => "field::is_tag", - ArrowFieldMetaKey::Comment => "field::comment", - ArrowFieldMetaKey::IsDictionary => "field::is_dictionary", - } - } - - // Only id,is_tag,comment are required meta keys, other keys should be optional - // to keep backward compatible. - fn is_required(&self) -> bool { - matches!(self, Self::Id | Self::IsTag | Self::Comment) - } -} - -impl ToString for ArrowFieldMetaKey { - fn to_string(&self) -> String { - self.as_str().to_string() - } -} - -/// Schema of column -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ColumnSchema { - /// Id of column - pub id: ColumnId, - /// Column name - pub name: String, - /// Data type of the column - pub data_type: DatumKind, - /// Is nullable - pub is_nullable: bool, - /// Is tag, tag is just a hint for a column, there is no restriction that a - /// tag column must be a part of primary key - pub is_tag: bool, - // Whether to use dictionary types for encoding column - pub is_dictionary: bool, - /// Comment of the column - pub comment: String, - /// Column name in response - pub escaped_name: String, - /// Default value expr - pub default_value: Option, -} - -impl ColumnSchema { - /// Check whether a type is valid tag type. - pub fn is_valid_tag_type(typ: DatumKind) -> bool { - match typ { - DatumKind::Null => false, - DatumKind::Timestamp => true, - DatumKind::Double => false, - DatumKind::Float => false, - DatumKind::Varbinary => true, - DatumKind::String => true, - DatumKind::UInt64 => true, - DatumKind::UInt32 => true, - DatumKind::UInt16 => true, - DatumKind::UInt8 => true, - DatumKind::Int64 => true, - DatumKind::Int32 => true, - DatumKind::Int16 => true, - DatumKind::Int8 => true, - DatumKind::Boolean => true, - DatumKind::Date => true, - DatumKind::Time => true, - } - } - - /// Check whether a type is valid dictionary type. - pub fn is_valid_dictionary_type(typ: DatumKind) -> bool { - matches!(typ, DatumKind::String) - } - - /// Convert `self` to [`arrow::datatypes::Field`] - pub fn to_arrow_field(&self) -> Field { - From::from(self) - } - - /// Returns Ok if column with `writer_schema` can write to column with the - /// same schema as `self`. - pub fn compatible_for_write( - &self, - writer_schema: &ColumnSchema, - ) -> std::result::Result<(), CompatError> { - ensure!( - self.data_type == writer_schema.data_type, - IncompatDataType { - name: &self.name, - expect: writer_schema.data_type, - given: self.data_type, - } - ); - - // This column is not nullable but writer is nullable - ensure!( - self.is_nullable || !writer_schema.is_nullable, - NotNullable { name: &self.name } - ); - - Ok(()) - } - - /// Returns `Ok` if the source schema can read by this schema, now we won't - /// validate data type of column - pub fn compatible_for_read( - &self, - source_schema: &ColumnSchema, - ) -> std::result::Result { - if self.is_nullable { - // Column is nullable - if self.id == source_schema.id { - // Same column - Ok(ReadOp::Exact) - } else { - // Not the same column, maybe dropped, fill by null. - Ok(ReadOp::FillNull) - } - } else { - // Column is not null. We consider the old column was dropped if they have - // different column id and also try to fill by null, so we - // also check column id. - ensure!( - self.id == source_schema.id && !source_schema.is_nullable, - NotNullable { - name: &source_schema.name, - } - ); - - Ok(ReadOp::Exact) - } - } - - /// Check whether the given `desc` is correct with self. - pub fn is_correct_desc(&self, desc: &ColumnDesc) -> bool { - if self.id != desc.id { - return false; - } - - let desc_datum_kind = DatumKind::from(desc.typ()); - desc_datum_kind == self.data_type - } -} - -impl From<&ColumnSchema> for ColumnDesc { - fn from(column_schema: &ColumnSchema) -> Self { - Self { - id: column_schema.id, - typ: schema_pb::DataType::from(column_schema.data_type).into(), - } - } -} - -impl TryFrom for ColumnSchema { - type Error = Error; - - fn try_from(column_schema: schema_pb::ColumnSchema) -> Result { - let escaped_name = column_schema.name.escape_debug().to_string(); - let data_type = column_schema.data_type(); - let default_value = column_schema - .default_value - .map(|v| match v { - schema_pb::column_schema::DefaultValue::SerdeJson(encoded_val) => { - serde_json::from_slice::(&encoded_val) - .context(DecodeDefaultValue { encoded_val }) - } - }) - .transpose()?; - - Ok(Self { - id: column_schema.id, - name: column_schema.name, - data_type: DatumKind::from(data_type), - is_nullable: column_schema.is_nullable, - is_tag: column_schema.is_tag, - is_dictionary: column_schema.is_dictionary, - comment: column_schema.comment, - escaped_name, - default_value, - }) - } -} - -impl TryFrom<&Arc> for ColumnSchema { - type Error = Error; - - fn try_from(field: &Arc) -> Result { - let ArrowFieldMeta { - id, - is_tag, - is_dictionary, - comment, - } = decode_arrow_field_meta_data(field.metadata())?; - Ok(Self { - id, - name: field.name().clone(), - data_type: DatumKind::from_data_type(field.data_type()).context( - UnsupportedDataType { - data_type: field.data_type().clone(), - }, - )?, - is_nullable: field.is_nullable(), - is_tag, - is_dictionary, - comment, - escaped_name: field.name().escape_debug().to_string(), - default_value: None, - }) - } -} - -impl From<&ColumnSchema> for Field { - fn from(col_schema: &ColumnSchema) -> Self { - let metadata = encode_arrow_field_meta_data(col_schema); - // If the column should use dictionary, create correspond dictionary type. - let mut field = if col_schema.is_dictionary { - Field::new_dict( - &col_schema.name, - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - col_schema.is_nullable, - col_schema.id.into(), - // TODO(tanruixiang): how to use dict_is_ordered - false, - ) - } else { - Field::new( - &col_schema.name, - col_schema.data_type.into(), - col_schema.is_nullable, - ) - }; - - field.set_metadata(metadata); - - field - } -} - -fn parse_arrow_field_meta_value( - meta: &HashMap, - key: ArrowFieldMetaKey, -) -> Result -where - T: FromStr, - T::Err: std::error::Error + Send + Sync + 'static, -{ - let raw_value = match meta.get(key.as_str()) { - None => { - if key.is_required() { - return ArrowFieldMetaKeyNotFound { key }.fail(); - } else { - return Ok(T::default()); - } - } - Some(v) => v, - }; - - T::from_str(raw_value.as_str()) - .map_err(|e| Box::new(e) as _) - .context(InvalidArrowFieldMetaValue { key, raw_value }) -} - -fn decode_arrow_field_meta_data(meta: &HashMap) -> Result { - if meta.is_empty() { - Ok(ArrowFieldMeta::default()) - } else { - Ok(ArrowFieldMeta { - id: parse_arrow_field_meta_value(meta, ArrowFieldMetaKey::Id)?, - is_tag: parse_arrow_field_meta_value(meta, ArrowFieldMetaKey::IsTag)?, - comment: parse_arrow_field_meta_value(meta, ArrowFieldMetaKey::Comment)?, - is_dictionary: parse_arrow_field_meta_value(meta, ArrowFieldMetaKey::IsDictionary)?, - }) - } -} - -fn encode_arrow_field_meta_data(col_schema: &ColumnSchema) -> HashMap { - let mut meta = HashMap::new(); - - meta.insert(ArrowFieldMetaKey::Id.to_string(), col_schema.id.to_string()); - meta.insert( - ArrowFieldMetaKey::IsTag.to_string(), - col_schema.is_tag.to_string(), - ); - meta.insert( - ArrowFieldMetaKey::IsDictionary.to_string(), - col_schema.is_dictionary.to_string(), - ); - meta.insert( - ArrowFieldMetaKey::Comment.to_string(), - col_schema.comment.clone(), - ); - - meta -} - -/// ColumnSchema builder -#[must_use] -pub struct Builder { - id: ColumnId, - name: String, - data_type: DatumKind, - is_nullable: bool, - is_tag: bool, - is_dictionary: bool, - comment: String, - default_value: Option, -} - -impl Builder { - /// Create a new builder - pub fn new(name: String, data_type: DatumKind) -> Self { - Self { - id: COLUMN_ID_UNINIT, - name, - data_type, - is_nullable: false, - is_tag: false, - is_dictionary: false, - comment: String::new(), - default_value: None, - } - } - - pub fn id(mut self, id: ColumnId) -> Self { - self.id = id; - self - } - - /// Set this column is nullable, default is false (not nullable). - pub fn is_nullable(mut self, is_nullable: bool) -> Self { - self.is_nullable = is_nullable; - self - } - - /// Set this column is tag, default is false (not a tag). - pub fn is_tag(mut self, is_tag: bool) -> Self { - self.is_tag = is_tag; - self - } - - /// Set this column is dictionary, default is false (not a dictionary). - pub fn is_dictionary(mut self, is_dictionary: bool) -> Self { - self.is_dictionary = is_dictionary; - self - } - - pub fn comment(mut self, comment: String) -> Self { - self.comment = comment; - self - } - - pub fn default_value(mut self, default_value: Option) -> Self { - self.default_value = default_value; - self - } - - pub fn validate(&self) -> Result<()> { - if self.is_tag { - ensure!( - ColumnSchema::is_valid_tag_type(self.data_type), - InvalidTagType { - data_type: self.data_type - } - ); - } - - if self.is_dictionary { - ensure!( - ColumnSchema::is_valid_dictionary_type(self.data_type), - InvalidDictionaryType { - data_type: self.data_type - } - ); - } - - Ok(()) - } - - pub fn build(self) -> Result { - self.validate()?; - let escaped_name = self.name.escape_debug().to_string(); - Ok(ColumnSchema { - id: self.id, - name: self.name, - data_type: self.data_type, - is_nullable: self.is_nullable, - is_tag: self.is_tag, - is_dictionary: self.is_dictionary, - comment: self.comment, - escaped_name, - default_value: self.default_value, - }) - } -} - -impl From for schema_pb::ColumnSchema { - fn from(src: ColumnSchema) -> Self { - let default_value = src.default_value.map(|v| { - // FIXME: Maybe we should throw this error rather than panic here. - let encoded_value = serde_json::to_vec(&v).unwrap(); - schema_pb::column_schema::DefaultValue::SerdeJson(encoded_value) - }); - - schema_pb::ColumnSchema { - name: src.name, - data_type: schema_pb::DataType::from(src.data_type) as i32, - is_nullable: src.is_nullable, - id: src.id, - is_tag: src.is_tag, - is_dictionary: src.is_dictionary, - comment: src.comment, - default_value, - } - } -} - -#[cfg(test)] -mod tests { - use macros::hash_map; - use sqlparser::ast::Value; - - use super::*; - - /// Create a column schema for test, each field is filled with non-default - /// value - fn new_test_column_schema() -> ColumnSchema { - Builder::new("test_column_schema".to_string(), DatumKind::String) - .id(18) - .is_nullable(true) - .is_tag(true) - .is_dictionary(true) - .comment("Comment of this column".to_string()) - .default_value(Some(Expr::Value(Value::Boolean(true)))) - .build() - .expect("should succeed to build column schema") - } - - #[test] - fn test_builder() { - let lhs = new_test_column_schema(); - let rhs = ColumnSchema { - id: 18, - name: "test_column_schema".to_string(), - data_type: DatumKind::String, - is_nullable: true, - is_tag: true, - is_dictionary: true, - comment: "Comment of this column".to_string(), - escaped_name: "test_column_schema".escape_debug().to_string(), - default_value: Some(Expr::Value(Value::Boolean(true))), - }; - - assert_eq!(&lhs, &rhs); - } - - #[test] - fn test_pb_convert() { - let column_schema = new_test_column_schema(); - let pb_schema = schema_pb::ColumnSchema::from(column_schema.clone()); - // Check pb specific fields - assert!(pb_schema.is_tag); - assert!(pb_schema.is_dictionary); - assert!(pb_schema.is_nullable); - - let schema_from_pb = ColumnSchema::try_from(pb_schema).unwrap(); - assert_eq!(&schema_from_pb, &column_schema); - } - - #[test] - fn test_valid_tag_type() { - let invalid_tag_types = [DatumKind::Null, DatumKind::Float, DatumKind::Double]; - - for v in &DatumKind::VALUES { - assert_eq!( - ColumnSchema::is_valid_tag_type(*v), - !invalid_tag_types.contains(v) - ); - } - } - - #[test] - fn test_valid_dictionary_type() { - let valid_dictionary_types = [DatumKind::String]; - - for v in &DatumKind::VALUES { - assert_eq!( - ColumnSchema::is_valid_dictionary_type(*v), - valid_dictionary_types.contains(v) - ); - } - } - - #[test] - fn test_decode_arrow_field_meta_data() { - let testcases = [ - ( - hash_map! { - "field::id".to_string() => "1".to_string(), - "field::is_tag".to_string() => "true".to_string(), - "field::comment".to_string() => "".to_string() - }, - ArrowFieldMeta { - id: 1, - is_tag: true, - comment: "".to_string(), - is_dictionary: false, - }, - ), - ( - hash_map! { - "field::id".to_string() => "1".to_string(), - "field::is_tag".to_string() => "false".to_string(), - "field::comment".to_string() => "abc".to_string(), - "field::is_dictionary".to_string() => "true".to_string() - }, - ArrowFieldMeta { - id: 1, - is_tag: false, - comment: "abc".to_string(), - is_dictionary: true, - }, - ), - ]; - - for (meta, expected) in &testcases { - assert_eq!(expected, &decode_arrow_field_meta_data(meta).unwrap()) - } - - let meta = hash_map! { - "field::id".to_string() => "1".to_string() - }; - assert!(decode_arrow_field_meta_data(&meta).is_err()); - } -} diff --git a/src/common_types/src/datum.rs b/src/common_types/src/datum.rs deleted file mode 100644 index 9b22439a22..0000000000 --- a/src/common_types/src/datum.rs +++ /dev/null @@ -1,1834 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Datum holds different kind of data - -use std::{convert::TryFrom, fmt, str}; - -use arrow::{ - datatypes::{DataType, TimeUnit}, - temporal_conversions::{EPOCH_DAYS_FROM_CE, NANOSECONDS}, -}; -use bytes_ext::Bytes; -use chrono::{Datelike, Local, NaiveDate, NaiveTime, TimeZone, Timelike}; -use datafusion::scalar::ScalarValue; -use hash_ext::hash64; -use horaedbproto::schema::DataType as DataTypePb; -use serde::ser::{Serialize, Serializer}; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use sqlparser::ast::{DataType as SqlDataType, Value}; - -use crate::{hex, string::StringBytes, time::Timestamp}; - -const DATE_FORMAT: &str = "%Y-%m-%d"; -const TIME_FORMAT: &str = "%H:%M:%S%.3f"; -const NULL_VALUE_FOR_HASH: u128 = u128::MAX; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Unsupported SQL data type, type:{sql_type}.\nBacktrace:\n{backtrace}"))] - UnsupportedDataType { - sql_type: SqlDataType, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid double or float, err:{source}.\nBacktrace:\n{backtrace}"))] - InvalidDouble { - source: std::num::ParseFloatError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid insert value, kind:{kind}, value:{value:?}.\nBacktrace:\n{backtrace}" - ))] - InvalidValueType { - kind: DatumKind, - value: Value, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid timestamp, err:{source}.\nBacktrace:\n{backtrace}"))] - InvalidTimestamp { - source: std::num::ParseIntError, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid date, err:{source}.\nBacktrace:\n{backtrace}"))] - InvalidDate { - source: chrono::ParseError, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid time, err:{source}.\nBacktrace:\n{backtrace}"))] - InvalidTimeCause { - source: chrono::ParseError, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid time, err:{source}.\nBacktrace:\n{backtrace}"))] - InvalidTimeHourFormat { - source: std::num::ParseIntError, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid time, err:{msg}.\nBacktrace:\n{backtrace}"))] - InvalidTimeNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Invalid integer, err:{source}.\nBacktrace:\n{backtrace}"))] - InvalidInt { - source: std::num::ParseIntError, - backtrace: Backtrace, - }, - - #[snafu(display("Invalid datum byte, byte:{value}.\nBacktrace:\n{backtrace}"))] - InvalidDatumByte { value: u8, backtrace: Backtrace }, - - #[snafu(display("Invalid hex value, hex_val:{hex_val}.\nBacktrace:\n{backtrace}"))] - InvalidHexValue { - hex_val: String, - backtrace: Backtrace, - }, -} - -pub type Result = std::result::Result; - -// Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for -// floats directly we have to do it through type wrapper -// Fork from datafusion: -// https://github.com/apache/arrow-datafusion/blob/1a0542acbc01e5243471ae0fc3586c2f1f40013b/datafusion/common/src/scalar.rs#L1493 -struct Fl(T); - -macro_rules! hash_float_value { - ($(($t:ty, $i:ty)),+) => { - $(impl std::hash::Hash for Fl<$t> { - #[inline] - fn hash(&self, state: &mut H) { - state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes()) - } - })+ - }; -} - -hash_float_value!((f64, u64), (f32, u32)); - -// FIXME(yingwen): How to handle timezone? - -/// Data type of datum -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum DatumKind { - Null = 0, - Timestamp, - Double, - Float, - Varbinary, - String, - UInt64, - UInt32, - UInt16, - UInt8, - Int64, - Int32, - Int16, - Int8, - Boolean, - Date, - Time, -} - -impl DatumKind { - pub const VALUES: [Self; 17] = [ - Self::Null, - Self::Timestamp, - Self::Double, - Self::Float, - Self::Varbinary, - Self::String, - Self::UInt64, - Self::UInt32, - Self::UInt16, - Self::UInt8, - Self::Int64, - Self::Int32, - Self::Int16, - Self::Int8, - Self::Boolean, - Self::Date, - Self::Time, - ]; - - /// Return true if this is DatumKind::Timestamp - pub fn is_timestamp(&self) -> bool { - matches!(self, DatumKind::Timestamp) - } - - pub fn is_f64_castable(&self) -> bool { - matches!( - self, - Self::Double - | Self::Float - | Self::UInt64 - | Self::UInt32 - | Self::UInt16 - | Self::UInt8 - | Self::Int64 - | Self::Int32 - | Self::Int16 - | Self::Int8 - ) - } - - /// Can column of this datum kind used as key column - pub fn is_key_kind(&self) -> bool { - matches!( - self, - DatumKind::Timestamp - | DatumKind::Varbinary - | DatumKind::String - | DatumKind::UInt64 - | DatumKind::UInt32 - | DatumKind::UInt16 - | DatumKind::UInt8 - | DatumKind::Int64 - | DatumKind::Int32 - | DatumKind::Int16 - | DatumKind::Int8 - | DatumKind::Boolean - | DatumKind::Date - | DatumKind::Time - ) - } - - /// Can column of this datum kind used as dictionary encode column - pub fn is_dictionary_kind(&self) -> bool { - matches!(self, DatumKind::String) - } - - pub fn unsign_kind(&self) -> Option { - match self { - Self::Int64 | Self::UInt64 => Some(Self::UInt64), - Self::Int32 | Self::UInt32 => Some(Self::UInt32), - Self::Int16 | Self::UInt16 => Some(Self::UInt16), - Self::Int8 | Self::UInt8 => Some(Self::UInt8), - _ => None, - } - } - - /// Get name of this kind. - fn as_str(&self) -> &str { - match self { - DatumKind::Null => "null", - DatumKind::Timestamp => "timestamp", - DatumKind::Double => "double", - DatumKind::Float => "float", - DatumKind::Varbinary => "varbinary", - DatumKind::String => "string", - DatumKind::UInt64 => "uint64", - DatumKind::UInt32 => "uint32", - DatumKind::UInt16 => "uint16", - DatumKind::UInt8 => "uint8", - DatumKind::Int64 => "bigint", - DatumKind::Int32 => "int", - DatumKind::Int16 => "smallint", - DatumKind::Int8 => "tinyint", - DatumKind::Boolean => "boolean", - DatumKind::Date => "date", - DatumKind::Time => "time", - } - } - - /// Convert into a byte. - #[inline] - pub fn into_u8(self) -> u8 { - self as u8 - } - - /// Return None for variable-length type - pub fn size(&self) -> Option { - let size = match self { - DatumKind::Null => 0, - DatumKind::Timestamp => 8, - DatumKind::Double => 8, - DatumKind::Float => 4, - DatumKind::Varbinary => return None, - DatumKind::String => return None, - DatumKind::UInt64 => 8, - DatumKind::UInt32 => 4, - DatumKind::UInt16 => 2, - DatumKind::UInt8 => 1, - DatumKind::Int64 => 8, - DatumKind::Int32 => 4, - DatumKind::Int16 => 2, - DatumKind::Int8 => 1, - DatumKind::Boolean => 1, - DatumKind::Date => 4, - DatumKind::Time => 8, - }; - Some(size) - } -} - -impl TryFrom<&SqlDataType> for DatumKind { - type Error = Error; - - fn try_from(sql_type: &SqlDataType) -> Result { - match sql_type { - // TODO(yingwen): Consider timezone - SqlDataType::Timestamp(_, _) => Ok(Self::Timestamp), - SqlDataType::Real | SqlDataType::Float(_) => Ok(Self::Float), - SqlDataType::Double => Ok(Self::Double), - SqlDataType::Boolean => Ok(Self::Boolean), - SqlDataType::BigInt(_) => Ok(Self::Int64), - SqlDataType::Int64 => Ok(Self::Int64), - SqlDataType::Int(_) => Ok(Self::Int32), - SqlDataType::Int8(_) => Ok(Self::Int8), - SqlDataType::SmallInt(_) => Ok(Self::Int16), - SqlDataType::String(_) => Ok(Self::String), - SqlDataType::Varbinary(_) => Ok(Self::Varbinary), - SqlDataType::Date => Ok(Self::Date), - SqlDataType::Time(_, _) => Ok(Self::Time), - SqlDataType::Custom(objects, _) if objects.0.len() == 1 => { - match objects.0[0].value.as_str() { - "UINT64" | "uint64" => Ok(Self::UInt64), - "UINT32" | "uint32" => Ok(Self::UInt32), - "UINT16" | "uint16" => Ok(Self::UInt16), - "UINT8" | "uint8" => Ok(Self::UInt8), - "INT64" | "int64" => Ok(Self::Int64), - "INT32" | "int32" => Ok(Self::Int32), - "INT16" | "int16" => Ok(Self::Int16), - "TINYINT" | "INT8" | "tinyint" | "int8" => Ok(Self::Int8), - _ => UnsupportedDataType { - sql_type: sql_type.clone(), - } - .fail(), - } - } - - // Unlike datafusion, Decimal is not supported now - _ => UnsupportedDataType { - sql_type: sql_type.clone(), - } - .fail(), - } - } -} - -impl TryFrom for DatumKind { - type Error = Error; - - fn try_from(v: u8) -> Result { - match v { - v if DatumKind::Null.into_u8() == v => Ok(DatumKind::Null), - v if DatumKind::Timestamp.into_u8() == v => Ok(DatumKind::Timestamp), - v if DatumKind::Double.into_u8() == v => Ok(DatumKind::Double), - v if DatumKind::Float.into_u8() == v => Ok(DatumKind::Float), - v if DatumKind::Varbinary.into_u8() == v => Ok(DatumKind::Varbinary), - v if DatumKind::String.into_u8() == v => Ok(DatumKind::String), - v if DatumKind::UInt64.into_u8() == v => Ok(DatumKind::UInt64), - v if DatumKind::UInt32.into_u8() == v => Ok(DatumKind::UInt32), - v if DatumKind::UInt16.into_u8() == v => Ok(DatumKind::UInt16), - v if DatumKind::UInt8.into_u8() == v => Ok(DatumKind::UInt8), - v if DatumKind::Int64.into_u8() == v => Ok(DatumKind::Int64), - v if DatumKind::Int32.into_u8() == v => Ok(DatumKind::Int32), - v if DatumKind::Int16.into_u8() == v => Ok(DatumKind::Int16), - v if DatumKind::Int8.into_u8() == v => Ok(DatumKind::Int8), - v if DatumKind::Boolean.into_u8() == v => Ok(DatumKind::Boolean), - v if DatumKind::Date.into_u8() == v => Ok(DatumKind::Date), - v if DatumKind::Time.into_u8() == v => Ok(DatumKind::Time), - _ => InvalidDatumByte { value: v }.fail(), - } - } -} - -impl From for DataTypePb { - fn from(kind: DatumKind) -> Self { - match kind { - DatumKind::Null => Self::Null, - DatumKind::Timestamp => Self::Timestamp, - DatumKind::Double => Self::Double, - DatumKind::Float => Self::Float, - DatumKind::Varbinary => Self::Varbinary, - DatumKind::String => Self::String, - DatumKind::UInt64 => Self::Uint64, - DatumKind::UInt32 => Self::Uint32, - DatumKind::UInt16 => Self::Uint16, - DatumKind::UInt8 => Self::Uint8, - DatumKind::Int64 => Self::Int64, - DatumKind::Int32 => Self::Int32, - DatumKind::Int16 => Self::Int16, - DatumKind::Int8 => Self::Int8, - DatumKind::Boolean => Self::Bool, - DatumKind::Date => Self::Date, - DatumKind::Time => Self::Time, - } - } -} - -impl From for DatumKind { - fn from(data_type: DataTypePb) -> Self { - match data_type { - DataTypePb::Null => DatumKind::Null, - DataTypePb::Timestamp => DatumKind::Timestamp, - DataTypePb::Double => DatumKind::Double, - DataTypePb::Float => DatumKind::Float, - DataTypePb::Varbinary => DatumKind::Varbinary, - DataTypePb::String => DatumKind::String, - DataTypePb::Uint64 => DatumKind::UInt64, - DataTypePb::Uint32 => DatumKind::UInt32, - DataTypePb::Uint16 => DatumKind::UInt16, - DataTypePb::Uint8 => DatumKind::UInt8, - DataTypePb::Int64 => DatumKind::Int64, - DataTypePb::Int32 => DatumKind::Int32, - DataTypePb::Int16 => DatumKind::Int16, - DataTypePb::Int8 => DatumKind::Int8, - DataTypePb::Bool => DatumKind::Boolean, - DataTypePb::Date => DatumKind::Date, - DataTypePb::Time => DatumKind::Time, - } - } -} - -impl fmt::Display for DatumKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -// FIXME(yingwen): Validate the length of string and varbinary. -/// A data box holds different kind of data -#[derive(Debug, Clone, PartialEq, PartialOrd)] -pub enum Datum { - Null, - /// Millisecond precision - /// - /// Map to arrow::datatypes::DataType::Timestamp(TimeUnit::Millisecond, - /// None) - Timestamp(Timestamp), - /// Map to arrow::datatypes::DataType::Float64 - Double(f64), - /// Map to arrow::datatypes::DataType::Float32 - Float(f32), - /// Map to arrow::datatypes::DateType::Binary - /// - /// No more than 2G (size of i32) - Varbinary(Bytes), - /// Map to arrow::datatypes::DataType::String - /// - /// No more than 2G (size of i32) - String(StringBytes), - /// Map to arrow::datatypes::DataType::UInt64 - UInt64(u64), - UInt32(u32), - UInt16(u16), - UInt8(u8), - Int64(i64), - Int32(i32), - Int16(i16), - Int8(i8), - Boolean(bool), - /// Date represents the elapsed days since UNIX epoch. - /// It is mapped to [`arrow::datatypes::DataType::Date32`]. - /// The supported date range is '-9999-01-01' to '9999-12-31'. - Date(i32), - /// Time represents the elapsed nanoseconds since midnight. - /// It is mapped to [`arrow::datatypes::DataType::Time64`]. - /// The supported time range is '-838:59:59.000000' to '838:59:59.000000'. - Time(i64), -} - -impl Datum { - /// Creates an empty datum by given datum kind - pub fn empty(kind: &DatumKind) -> Self { - match kind { - DatumKind::Null => Self::Null, - DatumKind::Timestamp => Self::Timestamp(Timestamp::new(0)), - DatumKind::Double => Self::Double(0.0), - DatumKind::Float => Self::Float(0.0), - DatumKind::Varbinary => Self::Varbinary(Bytes::new()), - DatumKind::String => Self::String(StringBytes::new()), - DatumKind::UInt64 => Self::UInt64(0), - DatumKind::UInt32 => Self::UInt32(0), - DatumKind::UInt16 => Self::UInt16(0), - DatumKind::UInt8 => Self::UInt8(0), - DatumKind::Int64 => Self::Int64(0), - DatumKind::Int32 => Self::Int32(0), - DatumKind::Int16 => Self::Int16(0), - DatumKind::Int8 => Self::Int8(0), - DatumKind::Boolean => Self::Boolean(false), - DatumKind::Date => Self::Date(0), - DatumKind::Time => Self::Time(0), - } - } - - /// Return the kind of datum - pub fn kind(&self) -> DatumKind { - match self { - Datum::Null => DatumKind::Null, - Datum::Timestamp(_) => DatumKind::Timestamp, - Datum::Double(_) => DatumKind::Double, - Datum::Float(_) => DatumKind::Float, - Datum::Varbinary(_) => DatumKind::Varbinary, - Datum::String(_) => DatumKind::String, - Datum::UInt64(_) => DatumKind::UInt64, - Datum::UInt32(_) => DatumKind::UInt32, - Datum::UInt16(_) => DatumKind::UInt16, - Datum::UInt8(_) => DatumKind::UInt8, - Datum::Int64(_) => DatumKind::Int64, - Datum::Int32(_) => DatumKind::Int32, - Datum::Int16(_) => DatumKind::Int16, - Datum::Int8(_) => DatumKind::Int8, - Datum::Boolean(_) => DatumKind::Boolean, - Datum::Date(_) => DatumKind::Date, - Datum::Time(_) => DatumKind::Time, - } - } - - // TODO: handle error - pub fn convert_to_uint64(&self) -> u64 { - match self { - Datum::Null => 0, - Datum::Timestamp(v) => v.as_i64() as u64, - Datum::Double(v) => *v as u64, - Datum::Float(v) => *v as u64, - Datum::Varbinary(v) => hash64(&v[..]), - Datum::String(v) => hash64(v.as_bytes()), - Datum::UInt64(v) => *v, - Datum::UInt32(v) => *v as u64, - Datum::UInt16(v) => *v as u64, - Datum::UInt8(v) => *v as u64, - Datum::Int64(v) => *v as u64, - Datum::Int32(v) => *v as u64, - Datum::Int16(v) => *v as u64, - Datum::Int8(v) => *v as u64, - Datum::Boolean(v) => *v as u64, - Datum::Date(v) => *v as u64, - Datum::Time(v) => *v as u64, - } - } - - pub fn is_null(&self) -> bool { - matches!(self, Datum::Null) - } - - /// Cast datum to timestamp. - pub fn as_timestamp(&self) -> Option { - match self { - Datum::Time(v) => Some(Timestamp::new(*v)), - Datum::Timestamp(v) => Some(*v), - _ => None, - } - } - - /// Cast datum to &str. - pub fn as_str(&self) -> Option<&str> { - match self { - Datum::String(v) => Some(v), - _ => None, - } - } - - /// Cast datum to uint64. - pub fn as_u64(&self) -> Option { - match self { - Datum::UInt64(v) => Some(*v), - Datum::UInt32(v) => Some(*v as u64), - Datum::UInt16(v) => Some(*v as u64), - Datum::UInt8(v) => Some(*v as u64), - Datum::Int64(v) => Some(*v as u64), - Datum::Int32(v) => Some(*v as u64), - Datum::Int16(v) => Some(*v as u64), - Datum::Int8(v) => Some(*v as u64), - Datum::Boolean(v) => Some(*v as u64), - _ => None, - } - } - - /// Cast datum to int64. - pub fn as_i64(&self) -> Option { - match self { - Datum::UInt64(v) => Some(*v as i64), - Datum::UInt32(v) => Some(*v as i64), - Datum::UInt16(v) => Some(*v as i64), - Datum::UInt8(v) => Some(*v as i64), - Datum::Int64(v) => Some(*v), - Datum::Int32(v) => Some(*v as i64), - Datum::Int16(v) => Some(*v as i64), - Datum::Int8(v) => Some(*v as i64), - Datum::Boolean(v) => Some(*v as i64), - Datum::Date(v) => Some(*v as i64), - Datum::Time(v) => Some(*v), - _ => None, - } - } - - /// Cast datum to Bytes. - pub fn as_varbinary(&self) -> Option<&Bytes> { - match self { - Datum::Varbinary(v) => Some(v), - _ => None, - } - } - - pub fn as_f32(&self) -> Option { - match self { - Datum::Float(v) => Some(*v), - _ => None, - } - } - - pub fn as_f64(&self) -> Option { - match self { - Datum::Double(v) => Some(*v), - Datum::Float(v) => Some(*v as f64), - Datum::UInt64(v) => Some(*v as f64), - Datum::UInt32(v) => Some(*v as f64), - Datum::UInt16(v) => Some(*v as f64), - Datum::UInt8(v) => Some(*v as f64), - Datum::Int64(v) => Some(*v as f64), - Datum::Int32(v) => Some(*v as f64), - Datum::Date(v) => Some(*v as f64), - Datum::Time(v) => Some(*v as f64), - Datum::Int16(v) => Some(*v as f64), - Datum::Int8(v) => Some(*v as f64), - Datum::Boolean(_) - | Datum::Null - | Datum::Timestamp(_) - | Datum::Varbinary(_) - | Datum::String(_) => None, - } - } - - pub fn do_with_bytes(&self, mut f: F) - where - F: FnMut(&[u8]), - { - match self { - Datum::Double(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Float(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::UInt64(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::UInt32(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::UInt16(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::UInt8(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Int64(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Int32(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Int16(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Int8(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Boolean(v) => { - if *v { - f(&[1]) - } else { - f(&[0]) - } - } - Datum::Null => f(&[0]), - Datum::Timestamp(v) => { - let arr = v.as_i64().to_le_bytes(); - f(arr.as_slice()) - } - Datum::Varbinary(v) => f(v.as_ref()), - Datum::String(v) => f(v.as_bytes()), - Datum::Date(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - Datum::Time(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - } - } - - pub fn to_bytes(&self) -> Vec { - match self { - Datum::Double(v) => v.to_le_bytes().to_vec(), - Datum::Float(v) => v.to_le_bytes().to_vec(), - Datum::UInt64(v) => v.to_le_bytes().to_vec(), - Datum::UInt32(v) => v.to_le_bytes().to_vec(), - Datum::UInt16(v) => v.to_le_bytes().to_vec(), - Datum::UInt8(v) => v.to_le_bytes().to_vec(), - Datum::Int64(v) => v.to_le_bytes().to_vec(), - Datum::Int32(v) => v.to_le_bytes().to_vec(), - Datum::Int16(v) => v.to_le_bytes().to_vec(), - Datum::Int8(v) => v.to_le_bytes().to_vec(), - Datum::Boolean(v) => { - if *v { - vec![1] - } else { - vec![0] - } - } - Datum::Null => vec![0], - Datum::Timestamp(ts) => ts.as_i64().to_le_bytes().to_vec(), - Datum::Varbinary(b) => b.to_vec(), - Datum::String(string) => string.as_bytes().to_vec(), - Datum::Date(v) => v.to_le_bytes().to_vec(), - Datum::Time(v) => v.to_le_bytes().to_vec(), - } - } - - /// Generate a negative datum if possible. - /// - /// It will return `None` if: - /// - The data type has no negative value. - /// - The negative value overflows. - pub fn to_negative(self) -> Option { - match self { - Datum::Null => None, - Datum::Timestamp(_) => None, - Datum::Double(v) => Some(Datum::Double(-v)), - Datum::Float(v) => Some(Datum::Float(-v)), - Datum::Varbinary(_) => None, - Datum::String(_) => None, - Datum::UInt64(_) => None, - Datum::UInt32(_) => None, - Datum::UInt16(_) => None, - Datum::UInt8(_) => None, - Datum::Int64(v) => 0i64.checked_sub(v).map(Datum::Int64), - Datum::Int32(v) => 0i32.checked_sub(v).map(Datum::Int32), - Datum::Int16(v) => 0i16.checked_sub(v).map(Datum::Int16), - Datum::Int8(v) => 0i8.checked_sub(v).map(Datum::Int8), - Datum::Boolean(v) => Some(Datum::Boolean(!v)), - Datum::Date(_) => None, - Datum::Time(_) => None, - } - } - - pub fn display_string(&self) -> String { - match self { - Datum::Null => "null".to_string(), - Datum::Timestamp(v) => Local.timestamp_millis_opt(v.as_i64()).unwrap().to_rfc3339(), - Datum::Double(v) => v.to_string(), - Datum::Float(v) => v.to_string(), - Datum::Varbinary(v) => format!("{v:?}"), - Datum::String(v) => v.to_string(), - Datum::UInt64(v) => v.to_string(), - Datum::UInt32(v) => v.to_string(), - Datum::UInt16(v) => v.to_string(), - Datum::UInt8(v) => v.to_string(), - Datum::Int64(v) => v.to_string(), - Datum::Int32(v) => v.to_string(), - Datum::Int16(v) => v.to_string(), - Datum::Int8(v) => v.to_string(), - Datum::Boolean(v) => v.to_string(), - // Display the Date(32 bits) as String. - // Date(v) represent the days from Unix epoch(1970-01-01), - // so it is necessary to add `EPOCH_DAYS_FROM_CE` to generate - // `NaiveDate`. - Datum::Date(v) => NaiveDate::from_num_days_from_ce_opt((*v) + EPOCH_DAYS_FROM_CE) - .unwrap() - .to_string(), - - Datum::Time(v) => Datum::format_datum_time(v), - } - } - - pub fn try_from_sql_value(kind: &DatumKind, value: Value) -> Result { - match (kind, value) { - (DatumKind::Null, Value::Null) => Ok(Datum::Null), - (DatumKind::Timestamp, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidTimestamp)?; - Ok(Datum::Timestamp(Timestamp::new(n))) - } - (DatumKind::Date, Value::SingleQuotedString(s)) => { - let date = Self::parse_datum_date_from_str(&s)?; - Ok(date) - } - (DatumKind::Time, Value::SingleQuotedString(s)) => { - let datum_time = Self::parse_datum_time_from_str(&s)?; - Ok(datum_time) - } - (DatumKind::Double, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidDouble)?; - Ok(Datum::Double(n)) - } - (DatumKind::Float, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidDouble)?; - Ok(Datum::Float(n)) - } - // TODO(yingwen): Support hex string. - (DatumKind::Varbinary, Value::SingleQuotedString(s)) => { - Ok(Datum::Varbinary(Bytes::from(s))) - } - (DatumKind::String, Value::SingleQuotedString(s)) => { - Ok(Datum::String(StringBytes::from(s))) - } - (DatumKind::Varbinary, Value::DoubleQuotedString(s)) => { - Ok(Datum::Varbinary(Bytes::from(s))) - } - (DatumKind::Varbinary, Value::HexStringLiteral(s)) => { - let bytes = hex::try_decode(&s).context(InvalidHexValue { hex_val: s })?; - Ok(Datum::Varbinary(Bytes::from(bytes))) - } - (DatumKind::String, Value::DoubleQuotedString(s)) => { - Ok(Datum::String(StringBytes::from(s))) - } - (DatumKind::UInt64, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::UInt64(n)) - } - (DatumKind::UInt32, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::UInt32(n)) - } - (DatumKind::UInt16, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::UInt16(n)) - } - (DatumKind::UInt8, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::UInt8(n)) - } - (DatumKind::Int64, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::Int64(n)) - } - (DatumKind::Int32, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::Int32(n)) - } - (DatumKind::Int16, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::Int16(n)) - } - (DatumKind::Int8, Value::Number(n, _long)) => { - let n = n.parse::().context(InvalidInt)?; - Ok(Datum::Int8(n)) - } - (DatumKind::Boolean, Value::Boolean(b)) => Ok(Datum::Boolean(b)), - (_, value) => InvalidValueType { kind: *kind, value }.fail(), - } - } - - /// format the `Datum::Time`(64 bits) as String. - /// Time represent the nanoseconds from midnight, - /// so it is necessary to split `v` into seconds and nanoseconds to - /// generate `NaiveTime`. - pub fn format_datum_time(v: &i64) -> String { - let abs_nanos = (*v).abs(); - let hours = abs_nanos / 3600 / NANOSECONDS; - let time = NaiveTime::from_num_seconds_from_midnight_opt( - (abs_nanos / NANOSECONDS - hours * 3600) as u32, - (abs_nanos % NANOSECONDS) as u32, - ) - .unwrap(); - let minute_sec = &(time.to_string())[3..]; - if *v < 0 { - format!("-{hours:02}:{minute_sec}") - } else { - format!("{hours:02}:{minute_sec}") - } - } - - /// format the `Datum::Date`(32 bits) as String. - fn format_datum_date(v: &i32) -> String { - NaiveDate::from_num_days_from_ce_opt((*v) + EPOCH_DAYS_FROM_CE) - .unwrap() - .format(DATE_FORMAT) - .to_string() - } - - fn parse_datum_time_from_str(s: &str) -> Result { - // `NaiveTime` contains two parts: `num_seconds_from_midnight` - // and `nanoseconds`, it is necessary to - // calculate them into number of nanoseconds from midnight. - if let Some(index) = s.find(':') { - let hours: i64 = (s[..index]).parse().context(InvalidTimeHourFormat)?; - let replace = format!("00:{}", &s[index + 1..]); - let time = - NaiveTime::parse_from_str(&replace, TIME_FORMAT).context(InvalidTimeCause)?; - let sec = hours.abs() * 3600 + (time.num_seconds_from_midnight() as i64); - let nanos = time.nanosecond() as i64 + sec * NANOSECONDS; - let nanos = if hours < 0 { -nanos } else { nanos }; - Ok(Datum::Time(nanos)) - } else { - InvalidTimeNoCause { - msg: "Invalid time format".to_string(), - } - .fail() - } - } - - fn parse_datum_date_from_str(s: &str) -> Result { - // `NaiveDate::num_days_from_ce()` returns the elapsed time - // since 0001-01-01 in days, it is necessary to - // subtract `EPOCH_DAYS_FROM_CE` to generate `Datum::Date` - let date = chrono::NaiveDate::parse_from_str(s, DATE_FORMAT).context(InvalidDate)?; - let days = date.num_days_from_ce() - EPOCH_DAYS_FROM_CE; - Ok(Datum::Date(days)) - } - - pub fn is_fixed_sized(&self) -> bool { - match self { - Datum::Null - | Datum::Timestamp(_) - | Datum::Double(_) - | Datum::Float(_) - | Datum::UInt64(_) - | Datum::UInt32(_) - | Datum::UInt16(_) - | Datum::UInt8(_) - | Datum::Int64(_) - | Datum::Int32(_) - | Datum::Int16(_) - | Datum::Int8(_) - | Datum::Boolean(_) - | Datum::Date(_) - | Datum::Time(_) => true, - Datum::Varbinary(_) | Datum::String(_) => false, - } - } - - pub fn size(&self) -> usize { - match self { - Datum::Null => 1, - Datum::Timestamp(_) => 8, - Datum::Double(_) => 8, - Datum::Float(_) => 4, - Datum::Varbinary(v) => v.len(), - Datum::String(v) => v.len(), - Datum::UInt64(_) => 8, - Datum::UInt32(_) => 4, - Datum::UInt16(_) => 2, - Datum::UInt8(_) => 1, - Datum::Int64(_) => 8, - Datum::Int32(_) => 4, - Datum::Int16(_) => 2, - Datum::Int8(_) => 1, - Datum::Boolean(_) => 1, - Datum::Date(_) => 4, - Datum::Time(_) => 8, - } - } - - pub fn as_view(&self) -> DatumView { - match self { - Datum::Null => DatumView::Null, - Datum::Timestamp(v) => DatumView::Timestamp(*v), - Datum::Double(v) => DatumView::Double(*v), - Datum::Float(v) => DatumView::Float(*v), - Datum::Varbinary(v) => DatumView::Varbinary(v), - Datum::String(v) => DatumView::String(v), - Datum::UInt64(v) => DatumView::UInt64(*v), - Datum::UInt32(v) => DatumView::UInt32(*v), - Datum::UInt16(v) => DatumView::UInt16(*v), - Datum::UInt8(v) => DatumView::UInt8(*v), - Datum::Int64(v) => DatumView::Int64(*v), - Datum::Int32(v) => DatumView::Int32(*v), - Datum::Date(v) => DatumView::Date(*v), - Datum::Time(v) => DatumView::Time(*v), - Datum::Int16(v) => DatumView::Int16(*v), - Datum::Int8(v) => DatumView::Int8(*v), - Datum::Boolean(v) => DatumView::Boolean(*v), - } - } -} - -macro_rules! impl_from { - ($Kind: ident, $FromType: ident) => { - impl From<$FromType> for Datum { - fn from(value: $FromType) -> Self { - Self::$Kind(value) - } - } - - impl From> for Datum { - fn from(value_opt: Option<$FromType>) -> Self { - match value_opt { - Some(value) => Self::$Kind(value), - None => Self::Null, - } - } - } - }; -} - -impl_from!(Timestamp, Timestamp); -impl_from!(Double, f64); -impl_from!(Float, f32); -impl_from!(Varbinary, Bytes); -impl_from!(String, StringBytes); -impl_from!(UInt64, u64); -impl_from!(UInt32, u32); -impl_from!(UInt16, u16); -impl_from!(UInt8, u8); -impl_from!(Int64, i64); -impl_from!(Int32, i32); -impl_from!(Int16, i16); -impl_from!(Int8, i8); -impl_from!(Boolean, bool); - -impl From<&str> for Datum { - fn from(value: &str) -> Datum { - Datum::String(StringBytes::copy_from_str(value)) - } -} - -impl From> for Datum { - fn from(value_opt: Option<&str>) -> Datum { - match value_opt { - Some(value) => Datum::String(StringBytes::copy_from_str(value)), - None => Datum::Null, - } - } -} - -impl From<&[u8]> for Datum { - fn from(value: &[u8]) -> Datum { - Datum::Varbinary(Bytes::copy_from_slice(value)) - } -} - -impl From> for Datum { - fn from(value_opt: Option<&[u8]>) -> Datum { - match value_opt { - Some(value) => Datum::Varbinary(Bytes::copy_from_slice(value)), - None => Datum::Null, - } - } -} - -/// impl serde serialize for Datum -impl Serialize for Datum { - fn serialize(&self, serializer: S) -> std::result::Result - where - S: Serializer, - { - match self { - Datum::Null => serializer.serialize_none(), - Datum::Timestamp(v) => serializer.serialize_i64(v.as_i64()), - Datum::Double(v) => serializer.serialize_f64(*v), - Datum::Float(v) => serializer.serialize_f32(*v), - Datum::Varbinary(v) => serializer.serialize_bytes(v), - Datum::String(v) => serializer.serialize_str(v), - Datum::UInt64(v) => serializer.serialize_u64(*v), - Datum::UInt32(v) => serializer.serialize_u32(*v), - Datum::UInt16(v) => serializer.serialize_u16(*v), - Datum::UInt8(v) => serializer.serialize_u8(*v), - Datum::Int64(v) => serializer.serialize_i64(*v), - Datum::Int32(v) => serializer.serialize_i32(*v), - Datum::Int16(v) => serializer.serialize_i16(*v), - Datum::Int8(v) => serializer.serialize_i8(*v), - Datum::Boolean(v) => serializer.serialize_bool(*v), - Datum::Date(v) => serializer.serialize_str(Self::format_datum_date(v).as_ref()), - Datum::Time(v) => serializer.serialize_str(Datum::format_datum_time(v).as_ref()), - } - } -} - -/// A view to a datum. -/// -/// Holds copy of integer like datum and reference of string like datum. -#[derive(Clone, Debug, PartialEq, PartialOrd)] -pub enum DatumView<'a> { - Null, - Timestamp(Timestamp), - Double(f64), - Float(f32), - Varbinary(&'a [u8]), - String(&'a str), - UInt64(u64), - UInt32(u32), - UInt16(u16), - UInt8(u8), - Int64(i64), - Int32(i32), - Int16(i16), - Int8(i8), - Boolean(bool), - Date(i32), - Time(i64), -} - -impl<'a> DatumView<'a> { - #[inline] - pub fn is_null(&self) -> bool { - matches!(self, DatumView::Null) - } - - /// Return the kind of datum - pub fn kind(&self) -> DatumKind { - match self { - DatumView::Null => DatumKind::Null, - DatumView::Timestamp(_) => DatumKind::Timestamp, - DatumView::Double(_) => DatumKind::Double, - DatumView::Float(_) => DatumKind::Float, - DatumView::Varbinary(_) => DatumKind::Varbinary, - DatumView::String(_) => DatumKind::String, - DatumView::UInt64(_) => DatumKind::UInt64, - DatumView::UInt32(_) => DatumKind::UInt32, - DatumView::UInt16(_) => DatumKind::UInt16, - DatumView::UInt8(_) => DatumKind::UInt8, - DatumView::Int64(_) => DatumKind::Int64, - DatumView::Int32(_) => DatumKind::Int32, - DatumView::Int16(_) => DatumKind::Int16, - DatumView::Int8(_) => DatumKind::Int8, - DatumView::Boolean(_) => DatumKind::Boolean, - DatumView::Date(_) => DatumKind::Date, - DatumView::Time(_) => DatumKind::Time, - } - } - - pub fn do_with_bytes(&self, mut f: F) - where - F: FnMut(&[u8]), - { - match self { - DatumView::Double(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Float(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::UInt64(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::UInt32(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::UInt16(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::UInt8(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Int64(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Int32(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Int16(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Int8(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Boolean(v) => { - if *v { - f(&[1]) - } else { - f(&[0]) - } - } - DatumView::Null => f(&[0]), - DatumView::Timestamp(v) => { - let arr = v.as_i64().to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Varbinary(v) => f(v), - DatumView::String(v) => f(v.as_bytes()), - DatumView::Date(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - DatumView::Time(v) => { - let arr = v.to_le_bytes(); - f(arr.as_slice()) - } - } - } - - pub fn to_datum(&self) -> Datum { - match self { - DatumView::Null => Datum::Null, - DatumView::Timestamp(v) => Datum::Timestamp(*v), - DatumView::Double(v) => Datum::Double(*v), - DatumView::Float(v) => Datum::Float(*v), - DatumView::Varbinary(v) => Datum::Varbinary(Bytes::from(v.to_vec())), - DatumView::String(v) => Datum::String(StringBytes::copy_from_str(v)), - DatumView::UInt64(v) => Datum::UInt64(*v), - DatumView::UInt32(v) => Datum::UInt32(*v), - DatumView::UInt16(v) => Datum::UInt16(*v), - DatumView::UInt8(v) => Datum::UInt8(*v), - DatumView::Int64(v) => Datum::Int64(*v), - DatumView::Int32(v) => Datum::Int32(*v), - DatumView::Int16(v) => Datum::Int16(*v), - DatumView::Int8(v) => Datum::Int8(*v), - DatumView::Boolean(v) => Datum::Boolean(*v), - DatumView::Date(v) => Datum::Date(*v), - DatumView::Time(v) => Datum::Time(*v), - } - } - - pub fn as_date_i32(&self) -> Option { - match self { - DatumView::Date(v) => Some(*v), - _ => None, - } - } - - pub fn as_i8(&self) -> Option { - match self { - DatumView::Int8(v) => Some(*v), - _ => None, - } - } - - pub fn as_i16(&self) -> Option { - match self { - DatumView::Int16(v) => Some(*v), - _ => None, - } - } - - pub fn as_i32(&self) -> Option { - match self { - DatumView::Int32(v) => Some(*v), - _ => None, - } - } - - pub fn as_i64(&self) -> Option { - match self { - DatumView::Int64(v) => Some(*v), - _ => None, - } - } - - pub fn as_u8(&self) -> Option { - match self { - DatumView::UInt8(v) => Some(*v), - _ => None, - } - } - - pub fn as_u16(&self) -> Option { - match self { - DatumView::UInt16(v) => Some(*v), - _ => None, - } - } - - pub fn as_u32(&self) -> Option { - match self { - DatumView::UInt32(v) => Some(*v), - _ => None, - } - } - - pub fn as_u64(&self) -> Option { - match self { - DatumView::UInt64(v) => Some(*v), - _ => None, - } - } - - pub fn as_bool(&self) -> Option { - match self { - DatumView::Boolean(v) => Some(*v), - _ => None, - } - } - - pub fn as_timestamp(&self) -> Option { - match self { - DatumView::Timestamp(v) => Some(*v), - DatumView::Time(v) => Some(Timestamp::new(*v)), - _ => None, - } - } - - pub fn as_f64(&self) -> Option { - match self { - DatumView::Double(v) => Some(*v), - _ => None, - } - } - - pub fn as_f32(&self) -> Option { - match self { - DatumView::Float(v) => Some(*v), - _ => None, - } - } - - pub fn into_str(self) -> Option<&'a str> { - match self { - DatumView::String(v) => Some(v), - _ => None, - } - } - - pub fn into_bytes(self) -> Option<&'a [u8]> { - match self { - DatumView::Varbinary(v) => Some(v), - _ => None, - } - } -} - -impl<'a> std::hash::Hash for DatumView<'a> { - fn hash(&self, state: &mut H) { - match self { - DatumView::Null => NULL_VALUE_FOR_HASH.hash(state), - DatumView::Timestamp(v) => v.hash(state), - DatumView::Double(v) => Fl(*v).hash(state), - DatumView::Float(v) => Fl(*v).hash(state), - DatumView::Varbinary(v) => v.hash(state), - DatumView::String(v) => v.hash(state), - DatumView::UInt64(v) => v.hash(state), - DatumView::UInt32(v) => v.hash(state), - DatumView::UInt16(v) => v.hash(state), - DatumView::UInt8(v) => v.hash(state), - DatumView::Int64(v) => v.hash(state), - DatumView::Int32(v) => v.hash(state), - DatumView::Int16(v) => v.hash(state), - DatumView::Int8(v) => v.hash(state), - DatumView::Boolean(v) => v.hash(state), - DatumView::Date(v) => v.hash(state), - DatumView::Time(v) => v.hash(state), - } - } -} - -impl DatumKind { - /// Create DatumKind from [arrow::datatypes::DataType], if - /// the type is not supported, returns None - pub fn from_data_type(data_type: &DataType) -> Option { - match data_type { - DataType::Null => Some(Self::Null), - DataType::Timestamp(TimeUnit::Millisecond, None) => Some(Self::Timestamp), - DataType::Timestamp(TimeUnit::Nanosecond, None) => Some(Self::Timestamp), - DataType::Float64 => Some(Self::Double), - DataType::Float32 => Some(Self::Float), - DataType::Binary => Some(Self::Varbinary), - DataType::Utf8 => Some(Self::String), - DataType::UInt64 => Some(Self::UInt64), - DataType::UInt32 => Some(Self::UInt32), - DataType::UInt16 => Some(Self::UInt16), - DataType::UInt8 => Some(Self::UInt8), - DataType::Int64 => Some(Self::Int64), - DataType::Int32 => Some(Self::Int32), - DataType::Int16 => Some(Self::Int16), - DataType::Int8 => Some(Self::Int8), - DataType::Boolean => Some(Self::Boolean), - DataType::Date32 => Some(Self::Date), - DataType::Time64(TimeUnit::Nanosecond) => Some(Self::Time), - DataType::Dictionary(_, _) => Some(Self::String), - DataType::Float16 - | DataType::LargeUtf8 - | DataType::LargeBinary - | DataType::FixedSizeBinary(_) - | DataType::Struct(_) - | DataType::Union(_, _) - | DataType::List(_) - | DataType::LargeList(_) - | DataType::FixedSizeList(_, _) - | DataType::Time32(_) - | DataType::Time64(_) - | DataType::Timestamp(_, _) - | DataType::Date64 - | DataType::Interval(_) - | DataType::Duration(_) - | DataType::Decimal128(_, _) - | DataType::Decimal256(_, _) - | DataType::RunEndEncoded(_, _) - | DataType::Map(_, _) => None, - } - } - - pub fn to_arrow_data_type(&self) -> DataType { - match self { - DatumKind::Null => DataType::Null, - DatumKind::Timestamp => DataType::Timestamp(TimeUnit::Millisecond, None), - DatumKind::Double => DataType::Float64, - DatumKind::Float => DataType::Float32, - DatumKind::Varbinary => DataType::Binary, - DatumKind::String => DataType::Utf8, - DatumKind::UInt64 => DataType::UInt64, - DatumKind::UInt32 => DataType::UInt32, - DatumKind::UInt16 => DataType::UInt16, - DatumKind::UInt8 => DataType::UInt8, - DatumKind::Int64 => DataType::Int64, - DatumKind::Int32 => DataType::Int32, - DatumKind::Int16 => DataType::Int16, - DatumKind::Int8 => DataType::Int8, - DatumKind::Boolean => DataType::Boolean, - DatumKind::Date => DataType::Date32, - DatumKind::Time => DataType::Time64(TimeUnit::Nanosecond), - } - } -} - -impl Datum { - pub fn as_scalar_value(&self) -> Option { - match self { - Datum::Null => None, - Datum::Timestamp(v) => { - Some(ScalarValue::TimestampMillisecond(Some((*v).as_i64()), None)) - } - Datum::Double(v) => Some(ScalarValue::Float64(Some(*v))), - Datum::Float(v) => Some(ScalarValue::Float32(Some(*v))), - Datum::Varbinary(v) => Some(ScalarValue::Binary(Some(v.to_vec()))), - Datum::String(v) => Some(ScalarValue::Utf8(Some(v.to_string()))), - Datum::UInt64(v) => Some(ScalarValue::UInt64(Some(*v))), - Datum::UInt32(v) => Some(ScalarValue::UInt32(Some(*v))), - Datum::UInt16(v) => Some(ScalarValue::UInt16(Some(*v))), - Datum::UInt8(v) => Some(ScalarValue::UInt8(Some(*v))), - Datum::Int64(v) => Some(ScalarValue::Int64(Some(*v))), - Datum::Int32(v) => Some(ScalarValue::Int32(Some(*v))), - Datum::Int16(v) => Some(ScalarValue::Int16(Some(*v))), - Datum::Int8(v) => Some(ScalarValue::Int8(Some(*v))), - Datum::Boolean(v) => Some(ScalarValue::Boolean(Some(*v))), - Datum::Date(v) => Some(ScalarValue::Date32(Some(*v))), - Datum::Time(v) => Some(ScalarValue::Time64Nanosecond(Some(*v))), - } - } - - pub fn from_scalar_value(val: &ScalarValue) -> Option { - match val { - ScalarValue::Boolean(v) => v.map(Datum::Boolean), - ScalarValue::Float32(v) => v.map(Datum::Float), - ScalarValue::Float64(v) => v.map(Datum::Double), - ScalarValue::Int8(v) => v.map(Datum::Int8), - ScalarValue::Int16(v) => v.map(Datum::Int16), - ScalarValue::Int32(v) => v.map(Datum::Int32), - ScalarValue::Int64(v) => v.map(Datum::Int64), - ScalarValue::UInt8(v) => v.map(Datum::UInt8), - ScalarValue::UInt16(v) => v.map(Datum::UInt16), - ScalarValue::UInt32(v) => v.map(Datum::UInt32), - ScalarValue::UInt64(v) => v.map(Datum::UInt64), - ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => v - .as_ref() - .map(|v| Datum::String(StringBytes::copy_from_str(v.as_str()))), - ScalarValue::Binary(v) - | ScalarValue::FixedSizeBinary(_, v) - | ScalarValue::LargeBinary(v) => v - .as_ref() - .map(|v| Datum::Varbinary(Bytes::copy_from_slice(v.as_slice()))), - ScalarValue::TimestampMillisecond(v, _) => { - v.map(|v| Datum::Timestamp(Timestamp::new(v))) - } - ScalarValue::Date32(v) => v.map(Datum::Date), - ScalarValue::Time64Nanosecond(v) => v.map(Datum::Time), - ScalarValue::Dictionary(_, literal) => Datum::from_scalar_value(literal), - ScalarValue::List(_) - | ScalarValue::Date64(_) - | ScalarValue::Time32Second(_) - | ScalarValue::Time32Millisecond(_) - | ScalarValue::Time64Microsecond(_) - | ScalarValue::TimestampSecond(_, _) - | ScalarValue::TimestampMicrosecond(_, _) - | ScalarValue::TimestampNanosecond(_, _) - | ScalarValue::IntervalYearMonth(_) - | ScalarValue::IntervalDayTime(_) - | ScalarValue::Struct(_, _) - | ScalarValue::Decimal128(_, _, _) - | ScalarValue::Null - | ScalarValue::IntervalMonthDayNano(_) - | ScalarValue::FixedSizeList(_) - | ScalarValue::DurationSecond(_) - | ScalarValue::DurationMillisecond(_) - | ScalarValue::DurationMicrosecond(_) - | ScalarValue::Decimal256(_, _, _) - | ScalarValue::LargeList(_) - | ScalarValue::DurationNanosecond(_) => None, - } - } -} - -impl<'a> DatumView<'a> { - pub fn from_scalar_value(val: &'a ScalarValue) -> Option { - match val { - ScalarValue::Boolean(v) => v.map(DatumView::Boolean), - ScalarValue::Float32(v) => v.map(DatumView::Float), - ScalarValue::Float64(v) => v.map(DatumView::Double), - ScalarValue::Int8(v) => v.map(DatumView::Int8), - ScalarValue::Int16(v) => v.map(DatumView::Int16), - ScalarValue::Int32(v) => v.map(DatumView::Int32), - ScalarValue::Int64(v) => v.map(DatumView::Int64), - ScalarValue::UInt8(v) => v.map(DatumView::UInt8), - ScalarValue::UInt16(v) => v.map(DatumView::UInt16), - ScalarValue::UInt32(v) => v.map(DatumView::UInt32), - ScalarValue::UInt64(v) => v.map(DatumView::UInt64), - ScalarValue::Date32(v) => v.map(DatumView::Date), - ScalarValue::Time64Nanosecond(v) => v.map(DatumView::Time), - ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => { - v.as_ref().map(|v| DatumView::String(v.as_str())) - } - ScalarValue::Binary(v) - | ScalarValue::FixedSizeBinary(_, v) - | ScalarValue::LargeBinary(v) => v.as_ref().map(|v| DatumView::Varbinary(v.as_slice())), - ScalarValue::TimestampMillisecond(v, _) => { - v.map(|v| DatumView::Timestamp(Timestamp::new(v))) - } - ScalarValue::Dictionary(_, literal) => DatumView::from_scalar_value(literal), - ScalarValue::List(_) - | ScalarValue::Date64(_) - | ScalarValue::Time32Second(_) - | ScalarValue::Time32Millisecond(_) - | ScalarValue::Time64Microsecond(_) - | ScalarValue::TimestampSecond(_, _) - | ScalarValue::TimestampMicrosecond(_, _) - | ScalarValue::TimestampNanosecond(_, _) - | ScalarValue::IntervalYearMonth(_) - | ScalarValue::IntervalDayTime(_) - | ScalarValue::Struct(_, _) - | ScalarValue::Decimal128(_, _, _) - | ScalarValue::Null - | ScalarValue::IntervalMonthDayNano(_) - | ScalarValue::FixedSizeList(_) - | ScalarValue::DurationSecond(_) - | ScalarValue::DurationMillisecond(_) - | ScalarValue::DurationMicrosecond(_) - | ScalarValue::Decimal256(_, _, _) - | ScalarValue::LargeList(_) - | ScalarValue::DurationNanosecond(_) => None, - } - } -} - -impl From for DataType { - fn from(kind: DatumKind) -> Self { - match kind { - DatumKind::Null => DataType::Null, - DatumKind::Timestamp => DataType::Timestamp(TimeUnit::Millisecond, None), - DatumKind::Double => DataType::Float64, - DatumKind::Float => DataType::Float32, - DatumKind::Varbinary => DataType::Binary, - DatumKind::String => DataType::Utf8, - DatumKind::UInt64 => DataType::UInt64, - DatumKind::UInt32 => DataType::UInt32, - DatumKind::UInt16 => DataType::UInt16, - DatumKind::UInt8 => DataType::UInt8, - DatumKind::Int64 => DataType::Int64, - DatumKind::Int32 => DataType::Int32, - DatumKind::Int16 => DataType::Int16, - DatumKind::Int8 => DataType::Int8, - DatumKind::Boolean => DataType::Boolean, - DatumKind::Date => DataType::Date32, - DatumKind::Time => DataType::Time64(TimeUnit::Nanosecond), - } - } -} - -#[cfg(test)] -mod tests { - use std::{ - collections::hash_map::DefaultHasher, - hash::{Hash, Hasher}, - }; - - use super::*; - - #[test] - fn test_is_key_kind() { - assert!(!DatumKind::Null.is_key_kind()); - assert!(DatumKind::Timestamp.is_key_kind()); - assert!(!DatumKind::Double.is_key_kind()); - assert!(!DatumKind::Float.is_key_kind()); - assert!(DatumKind::Varbinary.is_key_kind()); - assert!(DatumKind::String.is_key_kind()); - assert!(DatumKind::UInt64.is_key_kind()); - assert!(DatumKind::UInt32.is_key_kind()); - assert!(DatumKind::UInt16.is_key_kind()); - assert!(DatumKind::UInt8.is_key_kind()); - assert!(DatumKind::Int64.is_key_kind()); - assert!(DatumKind::Int32.is_key_kind()); - assert!(DatumKind::Int16.is_key_kind()); - assert!(DatumKind::Int8.is_key_kind()); - assert!(DatumKind::Boolean.is_key_kind()); - assert!(DatumKind::Date.is_key_kind()); - assert!(DatumKind::Time.is_key_kind()); - } - - #[test] - fn test_unsign_kind() { - assert_eq!(DatumKind::UInt64.unsign_kind(), Some(DatumKind::UInt64)); - assert_eq!(DatumKind::Int64.unsign_kind(), Some(DatumKind::UInt64)); - assert_eq!(DatumKind::UInt32.unsign_kind(), Some(DatumKind::UInt32)); - assert_eq!(DatumKind::Int32.unsign_kind(), Some(DatumKind::UInt32)); - assert_eq!(DatumKind::UInt16.unsign_kind(), Some(DatumKind::UInt16)); - assert_eq!(DatumKind::Int16.unsign_kind(), Some(DatumKind::UInt16)); - assert_eq!(DatumKind::UInt8.unsign_kind(), Some(DatumKind::UInt8)); - assert_eq!(DatumKind::Int8.unsign_kind(), Some(DatumKind::UInt8)); - - assert!(DatumKind::Null.unsign_kind().is_none()); - assert!(DatumKind::Timestamp.unsign_kind().is_none()); - assert!(DatumKind::String.unsign_kind().is_none()); - assert!(DatumKind::Boolean.unsign_kind().is_none()); - assert!(DatumKind::Varbinary.unsign_kind().is_none()); - assert!(DatumKind::Double.unsign_kind().is_none()); - assert!(DatumKind::Float.unsign_kind().is_none()); - } - - #[test] - fn test_into_u8() { - assert_eq!(0, DatumKind::Null.into_u8()); - assert_eq!(1, DatumKind::Timestamp.into_u8()); - assert_eq!(2, DatumKind::Double.into_u8()); - assert_eq!(3, DatumKind::Float.into_u8()); - assert_eq!(4, DatumKind::Varbinary.into_u8()); - assert_eq!(5, DatumKind::String.into_u8()); - assert_eq!(6, DatumKind::UInt64.into_u8()); - assert_eq!(7, DatumKind::UInt32.into_u8()); - assert_eq!(8, DatumKind::UInt16.into_u8()); - assert_eq!(9, DatumKind::UInt8.into_u8()); - assert_eq!(10, DatumKind::Int64.into_u8()); - assert_eq!(11, DatumKind::Int32.into_u8()); - assert_eq!(12, DatumKind::Int16.into_u8()); - assert_eq!(13, DatumKind::Int8.into_u8()); - assert_eq!(14, DatumKind::Boolean.into_u8()); - assert_eq!(15, DatumKind::Date.into_u8()); - assert_eq!(16, DatumKind::Time.into_u8()); - } - - #[test] - fn test_to_negative_value() { - let cases = [ - (Datum::Null, None), - (Datum::Timestamp(Timestamp::ZERO), None), - (Datum::Date(10), None), - (Datum::Time(10), None), - (Datum::Double(1.0), Some(Datum::Double(-1.0))), - (Datum::Float(1.0), Some(Datum::Float(-1.0))), - (Datum::Varbinary(Bytes::new()), None), - (Datum::String(StringBytes::new()), None), - (Datum::UInt64(10), None), - (Datum::UInt32(10), None), - (Datum::UInt16(10), None), - (Datum::UInt8(10), None), - (Datum::Int64(10), Some(Datum::Int64(-10))), - (Datum::Int32(10), Some(Datum::Int32(-10))), - (Datum::Int16(10), Some(Datum::Int16(-10))), - (Datum::Int8(10), Some(Datum::Int8(-10))), - ]; - - for (source, negative) in cases { - assert_eq!(negative, source.to_negative()); - } - } - - #[test] - fn test_to_overflow_negative_value() { - let cases = [ - Datum::Int64(i64::MIN), - Datum::Int32(i32::MIN), - Datum::Int16(i16::MIN), - Datum::Int8(i8::MIN), - ]; - - for source in cases { - assert!(source.to_negative().is_none()); - } - } - - #[test] - fn test_parse_datum_date() { - let cases = ["-9999-01-01", "9999-12-21", "2000-01-01", "1000-02-28"]; - - for case in cases { - let datum = Datum::parse_datum_date_from_str(case).unwrap(); - assert_eq!( - case.to_string(), - Datum::format_datum_date(&(datum.as_i64().unwrap() as i32)) - ); - } - } - - #[test] - fn test_parse_datum_date_error_cases() { - let err_cases = [ - "ab-01-01", - "01-ab-01", - "-9999-234-ab", - "100099-123-01", - "1990-01-123", - "1999", - "", - "1999--00--00", - "1999-0", - "1999-01-01-01", - ]; - - for source in err_cases { - assert!(Datum::parse_datum_date_from_str(source).is_err()); - } - } - - #[test] - fn test_parse_datum_time() { - // '-838:59:59.000000' to '838:59:59.000000' - let cases = [ - "-838:59:59.123", - "830:59:59.567", - "-23:59:59.999", - "23:59:59.999", - "00:59:59.567", - "10:10:10.234", - ]; - - for case in cases { - let datum = Datum::parse_datum_time_from_str(case).unwrap(); - assert_eq!( - case.to_string(), - Datum::format_datum_time(&datum.as_i64().unwrap()) - ); - } - } - - #[test] - fn test_parse_datum_time_error_cases() { - let err_cases = [ - "-ab:12:59.000", - "00:ab:59.000", - "-12:234:59.000", - "00:23:900.000", - "-00:59:59.abc", - "00", - "", - "00:00:00:00", - "12:", - ":", - ]; - - for source in err_cases { - assert!(Datum::parse_datum_time_from_str(source).is_err()); - } - } - - #[test] - fn test_convert_from_sql_value() { - let cases = vec![ - ( - Value::Boolean(false), - DatumKind::Boolean, - true, - Some(Datum::Boolean(false)), - ), - ( - Value::Number("100.1".to_string(), false), - DatumKind::Float, - true, - Some(Datum::Float(100.1)), - ), - ( - Value::SingleQuotedString("string_literal".to_string()), - DatumKind::String, - true, - Some(Datum::String(StringBytes::from_static("string_literal"))), - ), - ( - Value::HexStringLiteral("c70a0b".to_string()), - DatumKind::Varbinary, - true, - Some(Datum::Varbinary(Bytes::from(vec![199, 10, 11]))), - ), - ( - Value::EscapedStringLiteral("string_literal".to_string()), - DatumKind::String, - false, - None, - ), - ]; - - for (input, kind, succeed, expect) in cases { - let res = Datum::try_from_sql_value(&kind, input); - if succeed { - assert_eq!(res.unwrap(), expect.unwrap()); - } else { - assert!(res.is_err()); - } - } - } - - fn get_hash(v: &V) -> u64 { - let mut hasher = DefaultHasher::new(); - v.hash(&mut hasher); - hasher.finish() - } - - macro_rules! assert_datum_view_hash { - ($v:expr, $Kind: ident) => { - let expected = get_hash(&DatumView::$Kind($v)); - let actual = get_hash(&$v); - assert_eq!(expected, actual); - }; - } - - #[test] - fn test_hash() { - assert_datum_view_hash!(Timestamp::new(42), Timestamp); - assert_datum_view_hash!(42_i32, Date); - assert_datum_view_hash!(424_i64, Time); - assert_datum_view_hash!(b"abcde", Varbinary); - assert_datum_view_hash!("12345", String); - assert_datum_view_hash!(42424242_u64, UInt64); - assert_datum_view_hash!(424242_u32, UInt32); - assert_datum_view_hash!(4242_u16, UInt16); - assert_datum_view_hash!(42_u8, UInt8); - assert_datum_view_hash!(-42424242_i64, Int64); - assert_datum_view_hash!(-42424242_i32, Int32); - assert_datum_view_hash!(-4242_i16, Int16); - assert_datum_view_hash!(-42_i8, Int8); - assert_datum_view_hash!(true, Boolean); - - // Null case. - let null_expected = get_hash(&NULL_VALUE_FOR_HASH); - let null_actual = get_hash(&DatumView::Null); - assert_eq!(null_expected, null_actual); - - // Float case. - let float_expected = get_hash(&Fl(42.0_f32)); - let float_actual = get_hash(&DatumView::Float(42.0)); - assert_eq!(float_expected, float_actual); - - // Double case. - let double_expected = get_hash(&Fl(-42.0_f64)); - let double_actual = get_hash(&DatumView::Double(-42.0)); - assert_eq!(double_expected, double_actual); - } -} diff --git a/src/common_types/src/hex.rs b/src/common_types/src/hex.rs deleted file mode 100644 index f2b4739103..0000000000 --- a/src/common_types/src/hex.rs +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/// Try to decode bytes from hex literal string. -/// -/// None will be returned if the input literal is hex-invalid. -pub fn try_decode(s: &str) -> Option> { - let hex_bytes = s.as_bytes(); - - let mut decoded_bytes = Vec::with_capacity((hex_bytes.len() + 1) / 2); - - let start_idx = hex_bytes.len() % 2; - if start_idx > 0 { - // The first byte is formed of only one char. - decoded_bytes.push(try_decode_hex_char(hex_bytes[0])?); - } - - for i in (start_idx..hex_bytes.len()).step_by(2) { - let high = try_decode_hex_char(hex_bytes[i])?; - let low = try_decode_hex_char(hex_bytes[i + 1])?; - decoded_bytes.push(high << 4 | low); - } - - Some(decoded_bytes) -} - -/// Try to decode a byte from a hex char. -/// -/// None will be returned if the input char is hex-invalid. -const fn try_decode_hex_char(c: u8) -> Option { - match c { - b'A'..=b'F' => Some(c - b'A' + 10), - b'a'..=b'f' => Some(c - b'a' + 10), - b'0'..=b'9' => Some(c - b'0'), - _ => None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_decode_hex_literal() { - let cases = [ - ("", Some(vec![])), - ("FF00", Some(vec![255, 0])), - ("a00a", Some(vec![160, 10])), - ("FF0", Some(vec![15, 240])), - ("f", Some(vec![15])), - ("FF0X", None), - ("X0", None), - ("XX", None), - ("x", None), - ]; - - for (input, expect) in cases { - let output = try_decode(input); - assert_eq!(output, expect); - } - } -} diff --git a/src/common_types/src/lib.rs b/src/common_types/src/lib.rs deleted file mode 100644 index 334bd42f91..0000000000 --- a/src/common_types/src/lib.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Contains common types - -pub mod bitset; -pub mod cluster; -pub mod column; -pub mod column_block; -pub mod column_schema; -pub mod datum; -pub(crate) mod hex; -pub mod projected_schema; -pub mod record_batch; -pub mod request_id; -pub mod row; -pub mod schema; -pub mod string; -pub mod table; -pub mod time; - -/// Sequence number -pub type SequenceNumber = u64; -/// Maximum sequence number, all sequence number should less than this. -pub const MAX_SEQUENCE_NUMBER: u64 = u64::MAX; -/// Minimum sequence number, all sequence number should greater than this, so -/// sequence number should starts from 1. -pub const MIN_SEQUENCE_NUMBER: u64 = 0; - -/// Enable ttl key -pub const OPTION_KEY_ENABLE_TTL: &str = "enable_ttl"; -pub const SEGMENT_DURATION: &str = "segment_duration"; -pub const ENABLE_TTL: &str = OPTION_KEY_ENABLE_TTL; -pub const TTL: &str = "ttl"; -pub const ARENA_BLOCK_SIZE: &str = "arena_block_size"; -pub const WRITE_BUFFER_SIZE: &str = "write_buffer_size"; -pub const COMPACTION_STRATEGY: &str = "compaction_strategy"; -pub const NUM_ROWS_PER_ROW_GROUP: &str = "num_rows_per_row_group"; -pub const UPDATE_MODE: &str = "update_mode"; -pub const COMPRESSION: &str = "compression"; -pub const STORAGE_FORMAT: &str = "storage_format"; -pub const MEMTABLE_TYPE: &str = "memtable_type"; -pub const LAYERED_MUTABLE_SWITCH_THRESHOLD: &str = "layered_mutable_switch_threshold"; -pub const LAYERED_ENABLE: &str = "layered_enable"; - -#[cfg(any(test, feature = "test"))] -pub mod tests; diff --git a/src/common_types/src/projected_schema.rs b/src/common_types/src/projected_schema.rs deleted file mode 100644 index 1eff7dc424..0000000000 --- a/src/common_types/src/projected_schema.rs +++ /dev/null @@ -1,475 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Projected schema - -use std::{fmt, sync::Arc}; - -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - column_schema::{ColumnSchema, ReadOp}, - datum::{Datum, DatumKind}, - row::Row, - schema::{ArrowSchemaRef, RecordSchema, RecordSchemaWithKey, Schema}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Invalid projection index, index:{}.\nBacktrace:\n{}", - index, - backtrace - ))] - InvalidProjectionIndex { index: usize, backtrace: Backtrace }, - - #[snafu(display("Incompatible column schema for read, err:{}", source))] - IncompatReadColumn { - source: crate::column_schema::CompatError, - }, - - #[snafu(display("Failed to build projected schema, err:{}", source))] - BuildProjectedSchema { source: crate::schema::Error }, - - #[snafu(display( - "Missing not null column for read, name:{}.\nBacktrace:\n{}", - name, - backtrace - ))] - MissingReadColumn { name: String, backtrace: Backtrace }, - - #[snafu(display("Empty table schema.\nBacktrace:\n{}", backtrace))] - EmptyTableSchema { backtrace: Backtrace }, - - #[snafu(display("Failed to covert table schema, err:{}", source))] - ConvertTableSchema { - source: Box, - }, -} - -pub type Result = std::result::Result; - -#[derive(Debug, Clone)] -pub struct RowProjector { - /// The schema for data fetched - /// It is derived from table schema and some columns may not exist in data - /// source. - target_record_schema: RecordSchema, - - /// Primary key indexes in `fetched_schema`. - /// It will be `None` if update mode of table is `append`, - /// and will be `Some` if the mode is `overwrite`. - primary_key_indexes: Option>, - - /// Schema in data source - /// It is possible to be different with the table - /// schema caused by table schema altering. - source_schema: Schema, - - /// The Vec stores the column index in data source, and `None` means this - /// column is not in source but required by reader, and need to filled - /// by null. The length of Vec is the same as the number of columns - /// reader intended to read. - source_projection_indexes: Vec>, - - /// Used to reorder columns in arrow record batch fetched from sst to the - /// needed projection order. - /// Actually, It stores the record column indexes in - /// projected order similar as `source_projection_indexes`. - /// - /// Why we need it? - /// Because in current rust parquet impl, we can just define which columns - /// we wanted to fetch without their order. - /// - /// For example: - /// wanted columns in order: 2,1,3 - /// actual fetched columns: 1,2,3 - /// - /// However, projection is not only wanted columns but with wanted order, so - /// we need this remapping to reorder the fetched record. - /// - /// For example: - /// source columns in sst: 0,1,2,3,4 - /// target projection columns: 2,1,3 - /// - /// the actual columns in fetched record: 1,2,3 - /// relative columns indexes in fetched record: 0,1,2 - /// - /// finally, the remapping to the relative indexes: 1,0,2 - target_record_projection_remapping: Vec>, -} - -impl RowProjector { - pub fn new( - fetched_schema: &RecordSchema, - primary_key_indexes: Option>, - table_schema: &Schema, - source_schema: &Schema, - ) -> Result { - // Get `fetched_source_column_indexes`. - let mut fetched_source_column_indexes = Vec::with_capacity(fetched_schema.num_columns()); - let mut projected_source_indexes = Vec::with_capacity(fetched_schema.num_columns()); - for column_schema in fetched_schema.columns() { - Self::try_project_column( - column_schema, - table_schema, - source_schema, - &mut fetched_source_column_indexes, - &mut projected_source_indexes, - )?; - } - - // Get `fetched_projected_source_column_indexes` from - // `fetched_source_column_indexes`. - projected_source_indexes.sort_unstable(); - let fetched_projected_source_column_indexes = fetched_source_column_indexes - .iter() - .map(|source_idx_opt| { - source_idx_opt.map(|src_idx| { - // Safe to unwrap, index exists in `fetched_source_column_indexes` is ensured - // to exist in `projected_source_indexes`. - projected_source_indexes - .iter() - .position(|proj_idx| src_idx == *proj_idx) - .unwrap() - }) - }) - .collect(); - - Ok(RowProjector { - target_record_schema: fetched_schema.clone(), - primary_key_indexes, - source_schema: source_schema.clone(), - source_projection_indexes: fetched_source_column_indexes, - target_record_projection_remapping: fetched_projected_source_column_indexes, - }) - } - - fn try_project_column( - column: &ColumnSchema, - table_schema: &Schema, - source_schema: &Schema, - fetched_source_column_indexes: &mut Vec>, - projected_source_indexes: &mut Vec, - ) -> Result<()> { - match source_schema.index_of(&column.name) { - Some(source_idx) => { - // Column is in source - if table_schema.version() == source_schema.version() { - // Same version, just use that column in source - fetched_source_column_indexes.push(Some(source_idx)); - projected_source_indexes.push(source_idx); - } else { - // Different version, need to check column schema - let source_column = source_schema.column(source_idx); - // TODO(yingwen): Data type is not checked here because we do not support alter - // data type now. - match column - .compatible_for_read(source_column) - .context(IncompatReadColumn)? - { - ReadOp::Exact => { - fetched_source_column_indexes.push(Some(source_idx)); - projected_source_indexes.push(source_idx); - } - ReadOp::FillNull => { - fetched_source_column_indexes.push(None); - } - } - } - } - None => { - // Column is not in source - ensure!(column.is_nullable, MissingReadColumn { name: &column.name }); - // Column is nullable, fill this column by null - fetched_source_column_indexes.push(None); - } - } - - Ok(()) - } - - pub fn source_schema(&self) -> &Schema { - &self.source_schema - } - - pub fn fetched_schema(&self) -> &RecordSchema { - &self.target_record_schema - } - - /// The projected indexes of existed columns in the source schema. - pub fn existed_source_projection(&self) -> Vec { - self.source_projection_indexes - .iter() - .filter_map(|index| *index) - .collect() - } - - /// The projected indexes of all columns(existed and not exist) in the - /// source schema. - pub fn fetched_source_column_indexes(&self) -> &[Option] { - &self.source_projection_indexes - } - - /// The projected indexes of all columns(existed and not exist) in the - /// projected source schema. - pub fn target_record_projection_remapping(&self) -> &[Option] { - &self.target_record_projection_remapping - } - - pub fn primary_key_indexes(&self) -> Option<&[usize]> { - self.primary_key_indexes.as_deref() - } - - /// Project the row. - /// - /// REQUIRE: The schema of row is the same as source schema. - pub fn project_row(&self, row: &Row, mut datums_buffer: Vec) -> Row { - assert_eq!(self.source_schema.num_columns(), row.num_columns()); - - datums_buffer.reserve(self.target_record_schema.num_columns()); - - for p in &self.source_projection_indexes { - let datum = match p { - Some(index_in_source) => row[*index_in_source].clone(), - None => Datum::Null, - }; - - datums_buffer.push(datum); - } - - Row::from_datums(datums_buffer) - } - - /// Returns a datum kind selected - /// using an index into the source schema columns. - pub fn datum_kind(&self, index: usize) -> &DatumKind { - assert!(index < self.source_schema.num_columns()); - - &self.source_schema.column(index).data_type - } -} - -#[derive(Debug, Clone)] -pub struct RowProjectorBuilder { - fetched_schema: RecordSchema, - table_schema: Schema, - primary_key_indexes: Option>, -} - -impl RowProjectorBuilder { - pub fn new( - fetched_schema: RecordSchema, - table_schema: Schema, - primary_key_indexes: Option>, - ) -> Self { - Self { - fetched_schema, - table_schema, - primary_key_indexes, - } - } - - pub fn build(&self, source_schema: &Schema) -> Result { - RowProjector::new( - &self.fetched_schema, - self.primary_key_indexes.clone(), - &self.table_schema, - source_schema, - ) - } -} - -#[derive(Clone)] -pub struct ProjectedSchema(Arc); - -impl fmt::Debug for ProjectedSchema { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ProjectedSchema") - .field("original_schema", &self.0.table_schema) - .field("projection", &self.0.projection) - .finish() - } -} - -impl ProjectedSchema { - pub fn no_projection(schema: Schema) -> Self { - let inner = ProjectedSchemaInner::no_projection(schema); - Self(Arc::new(inner)) - } - - pub fn new(table_schema: Schema, projection: Option>) -> Result { - let inner = ProjectedSchemaInner::new(table_schema, projection)?; - Ok(Self(Arc::new(inner))) - } - - pub fn is_all_projection(&self) -> bool { - self.0.is_all_projection() - } - - pub fn projection(&self) -> Option> { - self.0.projection() - } - - // Returns the record schema after projection with key. - pub fn to_record_schema_with_key(&self) -> RecordSchemaWithKey { - self.0.record_schema_with_key.clone() - } - - pub fn as_record_schema_with_key(&self) -> &RecordSchemaWithKey { - &self.0.record_schema_with_key - } - - // Returns the record schema after projection. - pub fn to_record_schema(&self) -> RecordSchema { - self.0.target_record_schema.clone() - } - - /// Returns the arrow schema after projection. - pub fn to_projected_arrow_schema(&self) -> ArrowSchemaRef { - self.0.target_record_schema.to_arrow_schema_ref() - } - - pub fn table_schema(&self) -> &Schema { - &self.0.table_schema - } - - pub fn target_column_schema(&self, i: usize) -> &ColumnSchema { - self.0.target_record_schema.column(i) - } -} - -impl From for horaedbproto::schema::ProjectedSchema { - fn from(request: ProjectedSchema) -> Self { - let table_schema_pb = (&request.0.table_schema).into(); - let projection_pb = request.0.projection.as_ref().map(|project| { - let project = project - .iter() - .map(|one_project| *one_project as u64) - .collect::>(); - horaedbproto::schema::Projection { idx: project } - }); - - Self { - table_schema: Some(table_schema_pb), - projection: projection_pb, - } - } -} - -impl TryFrom for ProjectedSchema { - type Error = Error; - - fn try_from( - pb: horaedbproto::schema::ProjectedSchema, - ) -> std::result::Result { - let schema: Schema = pb - .table_schema - .context(EmptyTableSchema)? - .try_into() - .map_err(|e| Box::new(e) as _) - .context(ConvertTableSchema)?; - let projection = pb - .projection - .map(|v| v.idx.into_iter().map(|id| id as usize).collect()); - - ProjectedSchema::new(schema, projection) - } -} - -/// Schema with projection informations -struct ProjectedSchemaInner { - /// The table schema used to generate plan, possible to differ from - /// schema in ssts/memtable. - table_schema: Schema, - /// Index of the projected columns in `self.schema`, `None` if - /// all columns are needed. - projection: Option>, - - /// The fetched record schema from `self.schema` with key columns after - /// projection. - record_schema_with_key: RecordSchemaWithKey, - /// The fetched record schema from `self.schema` after projection. - target_record_schema: RecordSchema, -} - -impl ProjectedSchemaInner { - fn no_projection(table_schema: Schema) -> Self { - let record_schema_with_key = table_schema.to_record_schema_with_key(); - let target_record_schema = table_schema.to_record_schema(); - - Self { - table_schema, - projection: None, - record_schema_with_key, - target_record_schema, - } - } - - fn new(table_schema: Schema, projection: Option>) -> Result { - if let Some(p) = &projection { - // Projection is provided, validate the projection is valid. This is necessary - // to avoid panic when creating RecordSchema and - // RecordSchemaWithKey. - if let Some(max_idx) = p.iter().max() { - ensure!( - *max_idx < table_schema.num_columns(), - InvalidProjectionIndex { index: *max_idx } - ); - } - - let record_schema_with_key = table_schema.project_record_schema_with_key(p); - let target_record_schema = table_schema.project_record_schema(p); - - Ok(Self { - table_schema, - projection, - record_schema_with_key, - target_record_schema, - }) - } else { - Ok(Self::no_projection(table_schema)) - } - } - - /// Selecting all the columns is the all projection. - fn is_all_projection(&self) -> bool { - self.projection.is_none() - } - - fn projection(&self) -> Option> { - self.projection.clone() - } -} - -#[cfg(test)] -mod tests { - use crate::{projected_schema::ProjectedSchema, tests::build_schema}; - - #[test] - fn test_projected_schema() { - let schema = build_schema(); - assert!(schema.num_columns() > 1); - let projection: Vec = (0..schema.num_columns() - 1).collect(); - let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - assert_eq!( - projected_schema.0.record_schema_with_key.num_columns(), - schema.num_columns() - 1 - ); - assert!(!projected_schema.is_all_projection()); - } -} diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs deleted file mode 100644 index 0278aa7095..0000000000 --- a/src/common_types/src/record_batch.rs +++ /dev/null @@ -1,829 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Record batch - -use std::{cmp, convert::TryFrom, mem, sync::Arc}; - -use arrow::{ - array::BooleanArray, - compute, - datatypes::{DataType, Field, Schema, SchemaRef as ArrowSchemaRef, TimeUnit}, - error::ArrowError, - record_batch::{RecordBatch as ArrowRecordBatch, RecordBatchOptions}, -}; -use arrow_ext::operation; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - column_block::{cast_nanosecond_to_mills, ColumnBlock, ColumnBlockBuilder}, - datum::DatumKind, - projected_schema::ProjectedSchema, - row::{ - contiguous::{ContiguousRow, ProjectedContiguousRow}, - Row, RowViewOnBatch, - }, - schema::{RecordSchema, RecordSchemaWithKey}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Invalid schema len to build RecordBatch.\nBacktrace:\n{}", backtrace))] - SchemaLen { backtrace: Backtrace }, - - #[snafu(display("Failed to create column block, err:{}", source))] - CreateColumnBlock { source: crate::column_block::Error }, - - #[snafu(display( - "Failed to create arrow record batch, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - CreateArrow { - source: ArrowError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to iterate datum, err:{}", source))] - IterateDatum { source: crate::row::Error }, - - #[snafu(display("Failed to append datum, err:{}", source))] - AppendDatum { source: crate::column_block::Error }, - - #[snafu(display( - "Column not in schema with key, column_name:{}.\nBacktrace:\n{}", - name, - backtrace - ))] - ColumnNotInSchemaWithKey { name: String, backtrace: Backtrace }, - - #[snafu(display("Failed to convert arrow schema, err:{}", source))] - ConvertArrowSchema { source: crate::schema::Error }, - - #[snafu(display("Mismatch record schema to build RecordBatch, column_name:{}, schema_type:{:?}, column_type:{:?}.\nBacktrace:\n{}", column_name, schema_type, column_type, backtrace))] - MismatchRecordSchema { - column_name: String, - schema_type: DatumKind, - column_type: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display( - "Projection is out of the index, source_projection:{:?}, arrow_schema:{}.\nBacktrace:\n{}", - source_projection, - arrow_schema, - backtrace - ))] - OutOfIndexProjection { - source_projection: Vec>, - arrow_schema: ArrowSchemaRef, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to reverse record batch data, err:{:?}.\nBacktrace:\n{}", - source, - backtrace - ))] - ReverseRecordBatchData { - source: Box, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to select record batch data, err:{:?}.\nBacktrace:\n{}", - source, - backtrace - ))] - SelectRecordBatchData { - source: Box, - backtrace: Backtrace, - }, -} - -pub type Result = std::result::Result; - -#[derive(Debug, Clone)] -pub struct RecordBatchData { - arrow_record_batch: ArrowRecordBatch, - column_blocks: Vec, -} - -impl RecordBatchData { - fn new( - arrow_schema: ArrowSchemaRef, - column_blocks: Vec, - options: RecordBatchOptions, - ) -> Result { - let arrays = column_blocks - .iter() - .map(|column| column.to_arrow_array_ref()) - .collect::>(); - let arrow_record_batch = - ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options) - .context(CreateArrow)?; - - Ok(RecordBatchData { - arrow_record_batch, - column_blocks, - }) - } - - fn num_rows(&self) -> usize { - self.arrow_record_batch.num_rows() - } - - fn take_column_block(&mut self, index: usize) -> ColumnBlock { - let num_rows = self.num_rows(); - mem::replace( - &mut self.column_blocks[index], - ColumnBlock::new_null(num_rows), - ) - } - - /// Returns a zero-copy slice of this array with the indicated offset and - /// length. - /// - /// Panics if offset with length is greater than column length. - fn slice(&self, offset: usize, length: usize) -> Self { - let column_blocks = self - .column_blocks - .iter() - .map(|col| col.slice(offset, length)) - .collect(); - - Self { - arrow_record_batch: self.arrow_record_batch.slice(offset, length), - column_blocks, - } - } -} - -fn build_column_blocks_from_arrow_record_batch( - arrow_record_batch: &ArrowRecordBatch, - record_schema: &RecordSchema, -) -> Result> { - let mut column_blocks = Vec::with_capacity(arrow_record_batch.num_columns()); - for (column_schema, array) in record_schema - .columns() - .iter() - .zip(arrow_record_batch.columns()) - { - let column = ColumnBlock::try_from_arrow_array_ref(&column_schema.data_type, array) - .context(CreateColumnBlock)?; - column_blocks.push(column); - } - - Ok(column_blocks) -} - -impl TryFrom for RecordBatchData { - type Error = Error; - - fn try_from(arrow_record_batch: ArrowRecordBatch) -> Result { - let record_schema = - RecordSchema::try_from(arrow_record_batch.schema()).context(ConvertArrowSchema)?; - let column_blocks = - build_column_blocks_from_arrow_record_batch(&arrow_record_batch, &record_schema)?; - Ok(Self { - arrow_record_batch, - column_blocks, - }) - } -} - -// TODO(yingwen): The schema in RecordBatch should be much simple because it may -// lack some information. -#[derive(Debug, Clone)] -pub struct RecordBatch { - schema: RecordSchema, - data: RecordBatchData, -} - -impl RecordBatch { - pub fn new_empty(schema: RecordSchema) -> Self { - let arrow_schema = schema.to_arrow_schema_ref(); - let arrow_record_batch = ArrowRecordBatch::new_empty(arrow_schema); - - Self { - schema, - data: RecordBatchData { - arrow_record_batch, - column_blocks: Vec::new(), - }, - } - } - - pub fn new( - schema: RecordSchema, - column_blocks: Vec, - num_rows: usize, - ) -> Result { - ensure!(schema.num_columns() == column_blocks.len(), SchemaLen); - let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); - // Validate schema and column_blocks. - for (column_schema, column_block) in schema.columns().iter().zip(column_blocks.iter()) { - ensure!( - column_schema.data_type == column_block.datum_kind(), - MismatchRecordSchema { - column_name: &column_schema.name, - schema_type: column_schema.data_type, - column_type: column_block.datum_kind(), - } - ); - } - - let arrow_schema = schema.to_arrow_schema_ref(); - let data = RecordBatchData::new(arrow_schema, column_blocks, options)?; - - Ok(Self { schema, data }) - } - - pub fn schema(&self) -> &RecordSchema { - &self.schema - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.num_rows() == 0 - } - - // REQUIRE: index is valid - #[inline] - pub fn column(&self, index: usize) -> &ColumnBlock { - &self.data.column_blocks[index] - } - - #[inline] - pub fn num_columns(&self) -> usize { - self.schema.num_columns() - } - - #[inline] - pub fn num_rows(&self) -> usize { - self.data.num_rows() - } - - #[inline] - pub fn as_arrow_record_batch(&self) -> &ArrowRecordBatch { - &self.data.arrow_record_batch - } - - #[inline] - pub fn into_arrow_record_batch(self) -> ArrowRecordBatch { - self.data.arrow_record_batch - } - - #[inline] - pub fn into_record_batch_data(self) -> RecordBatchData { - self.data - } -} - -impl TryFrom for RecordBatch { - type Error = Error; - - fn try_from(arrow_record_batch: ArrowRecordBatch) -> Result { - let record_schema = - RecordSchema::try_from(arrow_record_batch.schema()).context(ConvertArrowSchema)?; - - let column_blocks = - build_column_blocks_from_arrow_record_batch(&arrow_record_batch, &record_schema)?; - - let arrow_record_batch = cast_arrow_record_batch(arrow_record_batch)?; - Ok(Self { - schema: record_schema, - data: RecordBatchData { - arrow_record_batch, - column_blocks, - }, - }) - } -} - -fn cast_arrow_record_batch(source: ArrowRecordBatch) -> Result { - let row_count = source.num_columns(); - if row_count == 0 { - return Ok(source); - } - let columns = source.columns(); - let mut casted_columns = Vec::with_capacity(columns.len()); - for column in columns { - let column = match column.data_type() { - DataType::Timestamp(TimeUnit::Nanosecond, None) => { - cast_nanosecond_to_mills(column).context(AppendDatum)? - } - _ => column.clone(), - }; - casted_columns.push(column); - } - - let schema = source.schema(); - let fields = schema.all_fields(); - let mills_fileds = fields - .iter() - .map(|field| { - let mut f = match field.data_type() { - DataType::Timestamp(TimeUnit::Nanosecond, None) => Field::new( - field.name(), - DataType::Timestamp(TimeUnit::Millisecond, None), - field.is_nullable(), - ), - _ => { - let (dict_id, dict_is_ordered) = { - match field.data_type() { - DataType::Dictionary(_, _) => { - (field.dict_id().unwrap(), field.dict_is_ordered().unwrap()) - } - _ => (0, false), - } - }; - Field::new_dict( - field.name(), - field.data_type().clone(), - field.is_nullable(), - dict_id, - dict_is_ordered, - ) - } - }; - f.set_metadata(field.metadata().clone()); - f - }) - .collect::>(); - let mills_schema = Schema { - fields: mills_fileds.into(), - metadata: schema.metadata().clone(), - }; - let result = - ArrowRecordBatch::try_new(Arc::new(mills_schema), casted_columns).context(CreateArrow)?; - Ok(result) -} - -#[derive(Debug)] -pub struct FetchedRecordBatch { - schema: RecordSchema, - // TODO: remove it later, `FetchedRecordBatch` is unnecessary to know anything about primary - // keys. - primary_key_indexes: Option>, - data: RecordBatchData, -} - -impl FetchedRecordBatch { - pub fn try_new( - fetched_schema: RecordSchema, - primary_key_indexes: Option>, - column_indexes: &[Option], - arrow_record_batch: ArrowRecordBatch, - ) -> Result { - let mut column_blocks = Vec::with_capacity(fetched_schema.num_columns()); - let num_rows = arrow_record_batch.num_rows(); - let num_columns = arrow_record_batch.num_columns(); - let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); - for (col_idx_opt, col_schema) in column_indexes.iter().zip(fetched_schema.columns()) { - match col_idx_opt { - Some(col_idx) => { - ensure!( - *col_idx < num_columns, - OutOfIndexProjection { - source_projection: column_indexes, - arrow_schema: arrow_record_batch.schema() - } - ); - - let array = arrow_record_batch.column(*col_idx); - let column_block = - ColumnBlock::try_from_arrow_array_ref(&col_schema.data_type, array) - .context(CreateColumnBlock)?; - - column_blocks.push(column_block); - } - None => { - // Need to push row with specific type. - let null_block = ColumnBlock::new_null_with_type( - &col_schema.data_type, - num_rows, - col_schema.is_dictionary, - ) - .context(CreateColumnBlock)?; - column_blocks.push(null_block); - } - } - } - - let data = - RecordBatchData::new(fetched_schema.to_arrow_schema_ref(), column_blocks, options)?; - - Ok(FetchedRecordBatch { - schema: fetched_schema, - primary_key_indexes, - data, - }) - } - - pub fn new_from_parts( - schema: RecordSchema, - primary_key_indexes: Option>, - data: RecordBatchData, - ) -> Self { - Self { - schema, - primary_key_indexes, - data, - } - } - - pub fn num_rows(&self) -> usize { - self.data.num_rows() - } - - pub fn num_columns(&self) -> usize { - self.data.arrow_record_batch.num_columns() - } - - pub fn columns(&self) -> &[ColumnBlock] { - &self.data.column_blocks - } - - pub fn clone_row_at(&self, index: usize) -> Row { - let datums = self - .data - .column_blocks - .iter() - .map(|column_block| column_block.datum(index)) - .collect(); - - Row::from_datums(datums) - } - - /// Project the [FetchedRecordBatch] into a [RecordBatch] according to - /// [ProjectedSchema]. - // TODO: how do we ensure `ProjectedSchema` passed here is same as the source - // `ProjectedSchema` of `RecordSchema` here? - pub fn try_project(mut self, projected_schema: &ProjectedSchema) -> Result { - // Get the schema after projection. - let record_schema = projected_schema.to_record_schema(); - let mut column_blocks = Vec::with_capacity(record_schema.num_columns()); - let num_rows = self.data.num_rows(); - let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); - - for column_schema in record_schema.columns() { - let column_index = - self.schema - .index_of(&column_schema.name) - .context(ColumnNotInSchemaWithKey { - name: &column_schema.name, - })?; - - // Take the column block out. - let column_block = self.data.take_column_block(column_index); - column_blocks.push(column_block); - } - - let data = - RecordBatchData::new(record_schema.to_arrow_schema_ref(), column_blocks, options)?; - Ok(RecordBatch { - schema: record_schema, - data, - }) - } - - pub fn into_record_batch(self) -> RecordBatch { - RecordBatch { - schema: self.schema, - data: self.data, - } - } - - pub fn as_arrow_record_batch(&self) -> &ArrowRecordBatch { - &self.data.arrow_record_batch - } - - pub fn into_arrow_record_batch(self) -> ArrowRecordBatch { - self.data.arrow_record_batch - } - - pub fn schema_with_key(&self) -> Option { - self.primary_key_indexes - .clone() - .map(|idxs| RecordSchemaWithKey::new(self.schema.clone(), idxs)) - } - - #[inline] - pub fn schema(&self) -> &RecordSchema { - &self.schema - } - - #[inline] - pub fn primary_key_indexes(&self) -> Option<&[usize]> { - self.primary_key_indexes.as_deref() - } - - #[inline] - pub fn column(&self, index: usize) -> &ColumnBlock { - &self.data.column_blocks[index] - } - - /// Reverse the rows in the data. - /// - /// The data retains intact if failed. - pub fn reverse_data(&mut self) -> Result<()> { - let reversed_record_batch = operation::reverse_record_batch(&self.data.arrow_record_batch) - .map_err(|e| Box::new(e) as _) - .context(ReverseRecordBatchData)?; - - self.data = RecordBatchData::try_from(reversed_record_batch) - .map_err(|e| Box::new(e) as _) - .context(ReverseRecordBatchData)?; - - Ok(()) - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.num_rows() == 0 - } - - /// Returns a zero-copy slice of this array with the indicated offset and - /// length. - /// - /// Panics if offset with length is greater than column length. - #[must_use] - pub fn slice(&self, offset: usize, length: usize) -> Self { - Self { - schema: self.schema.clone(), - primary_key_indexes: self.primary_key_indexes.clone(), - data: self.data.slice(offset, length), - } - } - - /// Select the rows according to the `filter_array`. - pub fn select_data(&mut self, filter_array: &BooleanArray) -> Result<()> { - assert_eq!(self.num_rows(), filter_array.len()); - let selected_record_batch = - compute::filter_record_batch(&self.data.arrow_record_batch, filter_array) - .map_err(|e| Box::new(e) as _) - .context(SelectRecordBatchData)?; - - self.data = RecordBatchData::try_from(selected_record_batch) - .map_err(|e| Box::new(e) as _) - .context(SelectRecordBatchData)?; - - Ok(()) - } -} - -pub struct FetchedRecordBatchBuilder { - fetched_schema: RecordSchema, - primary_key_indexes: Option>, - builders: Vec, - num_rows: usize, -} - -impl FetchedRecordBatchBuilder { - pub fn new(fetched_schema: RecordSchema, primary_key_indexes: Option>) -> Self { - let builders = fetched_schema - .columns() - .iter() - .map(|column_schema| { - ColumnBlockBuilder::with_capacity( - &column_schema.data_type, - 0, - column_schema.is_dictionary, - ) - }) - .collect(); - Self { - fetched_schema, - primary_key_indexes, - builders, - num_rows: 0, - } - } - - pub fn with_capacity( - record_schema: RecordSchema, - primary_key_indexes: Option>, - capacity: usize, - ) -> Self { - let builders = record_schema - .columns() - .iter() - .map(|column_schema| { - ColumnBlockBuilder::with_capacity( - &column_schema.data_type, - capacity, - column_schema.is_dictionary, - ) - }) - .collect(); - Self { - fetched_schema: record_schema, - primary_key_indexes, - builders, - num_rows: 0, - } - } - - /// Append row into builder. - /// - /// REQUIRE: The row and the builder must have the same schema. - pub fn append_row(&mut self, row: Row) -> Result<()> { - for (builder, datum) in self.builders.iter_mut().zip(row) { - builder.append(datum).context(AppendDatum)?; - } - - Ok(()) - } - - /// Append projected contiguous row into builder. - /// - /// REQUIRE: - /// - The schema of `row` is the same as the source schema of the - /// `projector`. - /// - The projected schema (with key) is the same as the schema of the - /// builder. - pub fn append_projected_contiguous_row( - &mut self, - row: &ProjectedContiguousRow, - ) -> Result<()> { - assert_eq!(row.num_datum_views(), self.builders.len()); - - for (index, builder) in self.builders.iter_mut().enumerate() { - let datum_view = row.datum_view_at(index); - builder.append_view(datum_view).context(AppendDatum)?; - } - - Ok(()) - } - - /// Append the row from the [RowView] to the builder. - /// - /// REQUIRE: The `row_view` and the builder must have the same schema. - pub fn append_row_view(&mut self, row_view: &RowViewOnBatch) -> Result<()> { - for (builder, datum_view) in self.builders.iter_mut().zip(row_view.iter_columns()) { - let datum_view = datum_view.context(IterateDatum)?; - builder.append_view(datum_view).context(AppendDatum)?; - } - - Ok(()) - } - - /// When the record batch contains no column, its row num may not be 0, so - /// we need to inc row num explicitly in this case. - /// See: https://github.com/apache/arrow-datafusion/pull/7920 - pub fn inc_row_num(&mut self, n: usize) { - self.num_rows += n; - } - - /// Append `len` from `start` (inclusive) to this builder. - /// - /// REQUIRE: - /// - The `record_batch` and the builder must have the same schema. - pub fn append_batch_range( - &mut self, - record_batch: &FetchedRecordBatch, - start: usize, - len: usize, - ) -> Result { - let num_rows = record_batch.num_rows(); - if start >= num_rows { - return Ok(0); - } - - let added = cmp::min(num_rows - start, len); - - for (builder, column) in self.builders.iter_mut().zip(record_batch.columns().iter()) { - builder - .append_block_range(column, start, added) - .context(AppendDatum)?; - } - - Ok(added) - } - - /// The number of the appended rows. - pub fn len(&self) -> usize { - self.builders - .first() - .map(|builder| builder.len()) - .unwrap_or(self.num_rows) - } - - /// Returns true if the builder is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Reset the builders for reuse. - pub fn clear(&mut self) { - for builder in &mut self.builders { - builder.clear(); - } - } - - /// Build [FetchedRecordBatch] and reset the builder. - pub fn build(&mut self) -> Result { - let column_blocks: Vec<_> = self - .builders - .iter_mut() - .map(|builder| builder.build()) - .collect(); - let arrow_schema = self.fetched_schema.to_arrow_schema_ref(); - let num_rows = column_blocks - .first() - .map(|block| block.num_rows()) - .unwrap_or(self.num_rows); - let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); - - Ok(FetchedRecordBatch { - schema: self.fetched_schema.clone(), - primary_key_indexes: self.primary_key_indexes.clone(), - data: RecordBatchData::new(arrow_schema, column_blocks, options)?, - }) - } -} - -#[cfg(test)] -mod tests { - use crate::{ - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - row::RowViewOnBatch, - tests::{ - build_fetched_record_batch_by_rows, build_projected_schema, build_rows, - check_record_batch_with_key_with_rows, - }, - }; - - fn build_fetched_record_batch() -> FetchedRecordBatch { - let rows = build_rows(); - build_fetched_record_batch_by_rows(rows) - } - - fn check_record_batch_with_key( - record_batch_with_key: FetchedRecordBatch, - row_num: usize, - column_num: usize, - ) -> bool { - let rows = build_rows(); - check_record_batch_with_key_with_rows(&record_batch_with_key, row_num, column_num, rows) - } - - #[test] - fn test_append_projected_contiguous_row() { - let record_batch_with_key = build_fetched_record_batch(); - assert_eq!(record_batch_with_key.num_rows(), 5); - assert_eq!(record_batch_with_key.num_columns(), 5); - - check_record_batch_with_key(record_batch_with_key, 5, 5); - } - - #[test] - fn test_append_row_view() { - let projected_schema = build_projected_schema(); - let fetched_record_batch = build_fetched_record_batch(); - let mut builder = - FetchedRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); - let view = RowViewOnBatch { - record_batch: &fetched_record_batch, - row_idx: 1, - }; - builder.append_row_view(&view).unwrap(); - let record_batch_with_key = builder.build().unwrap(); - assert_eq!(record_batch_with_key.num_rows(), 1); - assert_eq!(record_batch_with_key.num_columns(), 5); - - check_record_batch_with_key(record_batch_with_key, 1, 5); - } - - #[test] - fn test_append_batch_range() { - let projected_schema = build_projected_schema(); - let record_batch_with_key = build_fetched_record_batch(); - - let mut builder = - FetchedRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); - builder - .append_batch_range(&record_batch_with_key, 0, 2) - .unwrap(); - let record_batch_with_key = builder.build().unwrap(); - assert_eq!(record_batch_with_key.num_rows(), 2); - assert_eq!(record_batch_with_key.num_columns(), 5); - - check_record_batch_with_key(record_batch_with_key, 2, 5); - } -} diff --git a/src/common_types/src/request_id.rs b/src/common_types/src/request_id.rs deleted file mode 100644 index 300fb20eb4..0000000000 --- a/src/common_types/src/request_id.rs +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Request id. - -use std::fmt; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct RequestId(String); - -impl RequestId { - /// Acquire next request id. - pub fn next_id() -> Self { - let id = uuid::Uuid::new_v4().to_string(); - Self(id) - } - - #[inline] - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for RequestId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl From for RequestId { - fn from(id: String) -> Self { - Self(id) - } -} - -impl From for String { - fn from(value: RequestId) -> Self { - value.0 - } -} - -impl From<&str> for RequestId { - fn from(id: &str) -> Self { - Self(id.to_string()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_request_id() { - let id = RequestId::next_id(); - let id2 = RequestId::next_id(); - - assert_ne!(id, id2); - } -} diff --git a/src/common_types/src/row/bitset.rs b/src/common_types/src/row/bitset.rs deleted file mode 100644 index 36974fdbc4..0000000000 --- a/src/common_types/src/row/bitset.rs +++ /dev/null @@ -1,270 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Simple BitSet implementation. - -#![allow(dead_code)] - -const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128]; -const UNSET_BIT_MASK: [u8; 8] = [ - 255 - 1, - 255 - 2, - 255 - 4, - 255 - 8, - 255 - 16, - 255 - 32, - 255 - 64, - 255 - 128, -]; - -/// A bit set representing at most 8 bits with a underlying u8. -pub struct OneByteBitSet(pub u8); - -impl OneByteBitSet { - /// Create from a given boolean slice. - /// - /// The values in the `bits` whose index is greater than 8 will be ignored. - pub fn from_slice(bits: &[bool]) -> Self { - let mut v = 0u8; - for (idx, set) in bits.iter().take(8).map(|v| *v as u8).enumerate() { - let (_, bit_idx) = RoBitSet::compute_byte_bit_index(idx); - v |= set << bit_idx - } - - Self(v) - } -} - -/// A basic implementation supporting read/write. -#[derive(Debug, Default, Clone)] -pub struct BitSet { - /// The bits are stored as bytes in the least significant bit order. - buffer: Vec, - /// The number of real bits in the `buffer` - num_bits: usize, -} - -impl BitSet { - /// Initialize a unset [`BitSet`]. - pub fn new(num_bits: usize) -> Self { - Self { - buffer: vec![0; Self::num_bytes(num_bits)], - num_bits, - } - } - - /// Initialize a [`BitSet`] with all bits set. - pub fn all_set(num_bits: usize) -> Self { - Self { - buffer: vec![0xFF; Self::num_bytes(num_bits)], - num_bits, - } - } - - #[inline] - pub fn num_bits(&self) -> usize { - self.num_bits - } - - #[inline] - pub fn num_bytes(num_bits: usize) -> usize { - (num_bits + 7) >> 3 - } - - /// Initialize directly from a buffer. - /// - /// None will be returned if the buffer's length is not enough to cover the - /// bits of `num_bits`. - pub fn try_from_raw(buffer: Vec, num_bits: usize) -> Option { - if buffer.len() < Self::num_bytes(num_bits) { - None - } else { - Some(Self { buffer, num_bits }) - } - } - - /// Set the bit at the `index`. - /// - /// Return false if the index is outside the range. - pub fn set(&mut self, index: usize) -> bool { - if index >= self.num_bits { - return false; - } - let (byte_index, bit_index) = RoBitSet::compute_byte_bit_index(index); - self.buffer[byte_index] |= BIT_MASK[bit_index]; - true - } - - /// Set the bit at the `index`. - /// - /// Return false if the index is outside the range. - pub fn unset(&mut self, index: usize) -> bool { - if index >= self.num_bits { - return false; - } - let (byte_index, bit_index) = RoBitSet::compute_byte_bit_index(index); - self.buffer[byte_index] &= UNSET_BIT_MASK[bit_index]; - true - } - - /// Tells whether the bit at the `index` is set. - pub fn is_set(&self, index: usize) -> Option { - let ro = RoBitSet { - buffer: &self.buffer, - num_bits: self.num_bits, - }; - ro.is_set(index) - } - - /// Tells whether the bit at the `index` is unset. - pub fn is_unset(&self, index: usize) -> Option { - let ro = RoBitSet { - buffer: &self.buffer, - num_bits: self.num_bits, - }; - ro.is_unset(index) - } - - #[inline] - pub fn as_bytes(&self) -> &[u8] { - &self.buffer - } - - pub fn into_bytes(self) -> Vec { - self.buffer - } -} - -/// A readonly version of [BitSet], only supports read. -pub struct RoBitSet<'a> { - /// The bits are stored as bytes in the least significant bit order. - buffer: &'a [u8], - /// The number of real bits in the `buffer` - num_bits: usize, -} - -impl<'a> RoBitSet<'a> { - pub fn try_new(buffer: &'a [u8], num_bits: usize) -> Option { - if buffer.len() < BitSet::num_bytes(num_bits) { - None - } else { - Some(Self { buffer, num_bits }) - } - } - - /// Tells whether the bit at the `index` is set. - pub fn is_set(&self, index: usize) -> Option { - if index >= self.num_bits { - return None; - } - let (byte_index, bit_index) = Self::compute_byte_bit_index(index); - let set = (self.buffer[byte_index] & (1 << bit_index)) != 0; - Some(set) - } - - /// Tells whether the bit at the `index` is set. - #[inline] - pub fn is_unset(&self, index: usize) -> Option { - self.is_set(index).map(|v| !v) - } - - #[inline] - fn compute_byte_bit_index(index: usize) -> (usize, usize) { - (index >> 3, index & 7) - } -} - -#[cfg(test)] -mod tests { - use std::assert_eq; - - use super::BitSet; - use crate::row::bitset::OneByteBitSet; - - #[test] - fn test_set_op() { - let mut bit_set = BitSet::new(50); - - assert!(bit_set.set(1)); - assert!(bit_set.is_set(1).unwrap()); - - assert!(bit_set.set(20)); - assert!(bit_set.is_set(20).unwrap()); - assert!(bit_set.set(49)); - assert!(bit_set.is_set(49).unwrap()); - assert!(bit_set.unset(49)); - assert!(bit_set.is_unset(49).unwrap()); - - assert!(!bit_set.set(100)); - assert!(bit_set.is_set(100).is_none()); - - assert_eq!( - bit_set.into_bytes(), - vec![ - 0b00000010, 0b00000000, 0b00010000, 0b00000000, 0b00000000, 0b00000000, 0b00000000 - ] - ); - } - - #[test] - fn test_unset() { - let mut bit_set = BitSet::all_set(50); - - assert!(bit_set.unset(1)); - assert!(bit_set.is_unset(1).unwrap()); - - assert!(bit_set.unset(20)); - assert!(bit_set.is_unset(20).unwrap()); - assert!(bit_set.unset(49)); - assert!(bit_set.is_unset(49).unwrap()); - - assert!(!bit_set.unset(100)); - assert!(bit_set.is_unset(100).is_none()); - - assert_eq!( - bit_set.into_bytes(), - vec![ - 0b11111101, 0b11111111, 0b11101111, 0b11111111, 0b11111111, 0b11111111, 0b11111101 - ] - ); - } - - #[test] - fn test_try_from_raw() { - let raw_bytes: Vec = vec![0b11111111, 0b11110000, 0b00001111, 0b00001100, 0b00001001]; - assert!(BitSet::try_from_raw(raw_bytes.clone(), 50).is_none()); - assert!(BitSet::try_from_raw(raw_bytes.clone(), 40).is_some()); - assert!(BitSet::try_from_raw(raw_bytes, 1).is_some()); - } - - #[test] - fn test_one_byte() { - let bits = [false, false, false, false, false, false]; - assert_eq!(0, OneByteBitSet::from_slice(&bits).0); - - let bits = [true, false, false, false, false, false]; - assert_eq!(1, OneByteBitSet::from_slice(&bits).0); - - let bits = [false, false, false, true, false, false, true, true]; - assert_eq!(128 + 64 + 8, OneByteBitSet::from_slice(&bits).0); - - let bits = [ - false, false, false, false, false, false, true, true, true, true, - ]; - assert_eq!(128 + 64, OneByteBitSet::from_slice(&bits).0); - } -} diff --git a/src/common_types/src/row/contiguous.rs b/src/common_types/src/row/contiguous.rs deleted file mode 100644 index b6d11766b1..0000000000 --- a/src/common_types/src/row/contiguous.rs +++ /dev/null @@ -1,832 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Contiguous row. - -use std::{ - convert::TryInto, - debug_assert_eq, fmt, mem, - ops::{Deref, DerefMut}, - str, -}; - -use prost::encoding::{decode_varint, encode_varint, encoded_len_varint}; -use snafu::{ensure, Backtrace, Snafu}; - -use crate::{ - datum::{Datum, DatumKind, DatumView}, - projected_schema::RowProjector, - row::{ - bitset::{BitSet, RoBitSet}, - Row, - }, - schema::{IndexInWriterSchema, Schema}, - time::Timestamp, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "String is too long to encode into row (max is {MAX_STRING_LEN}), len:{len}.\nBacktrace:\n{backtrace}", - ))] - StringTooLong { len: usize, backtrace: Backtrace }, - - #[snafu(display( - "Row is too long to encode(max is {MAX_ROW_LEN}), len:{len}.\nBacktrace:\n{backtrace}", - ))] - RowTooLong { len: usize, backtrace: Backtrace }, - - #[snafu(display("Number of null columns is missing.\nBacktrace:\n{backtrace}"))] - NumNullColsMissing { backtrace: Backtrace }, - - #[snafu(display("The raw bytes of bit set is invalid, expect_len:{expect_len}, give_len:{given_len}.\nBacktrace:\n{backtrace}"))] - InvalidBitSetBytes { - expect_len: usize, - given_len: usize, - backtrace: Backtrace, - }, -} - -pub type Result = std::result::Result; - -/// Offset used in row's encoding -type Offset = u32; - -/// Max allowed string length of datum to store in a contiguous row (16 MB). -const MAX_STRING_LEN: usize = 1024 * 1024 * 16; -/// Max allowed length of total bytes in a contiguous row (1 GB). -const MAX_ROW_LEN: usize = 1024 * 1024 * 1024; - -/// Row encoded in a contiguous buffer. -pub trait ContiguousRow { - /// Returns the number of datums. - fn num_datum_views(&self) -> usize; - - /// Returns [DatumView] of column in given index. - /// - /// Panic if index or buffer is out of bound. - fn datum_view_at(&self, index: usize, datum_kind: &DatumKind) -> DatumView; -} - -/// Here is the layout of the encoded continuous row: -/// ```plaintext -/// +------------------+-----------------+-------------------------+-------------------------+ -/// | num_bits(u32) | nulls_bit_set | datum encoding block... | var-len payload block | -/// +------------------+-----------------+-------------------------+-------------------------+ -/// ``` -/// The first block is the number of bits of the `nulls_bit_set`, which is used -/// to rebuild the bit set. The `nulls_bit_set` is used to record which columns -/// are null. With the bitset, any null column won't be encoded in the following -/// datum encoding block. -/// -/// And if `num_bits` is equal to zero, it will still take 4B while the -/// `nulls_bit_set` block will be ignored. -/// -/// As for the datum encoding block, most type shares the similar pattern: -/// ```plaintext -/// +----------------+ -/// | payload/offset | -/// +----------------+ -/// ``` -/// If the type has a fixed size, here will be the data payload. -/// Otherwise, a offset in the var-len payload block pointing the real payload. -struct Encoding; - -impl Encoding { - const fn size_of_offset() -> usize { - mem::size_of::() - } - - const fn size_of_num_bits() -> usize { - mem::size_of::() - } -} - -pub enum ContiguousRowReader<'a, T> { - NoNulls(ContiguousRowReaderNoNulls<'a, T>), - WithNulls(ContiguousRowReaderWithNulls<'a, T>), -} - -pub struct ContiguousRowReaderNoNulls<'a, T> { - inner: &'a T, - byte_offsets: &'a [usize], - datum_offset: usize, -} - -pub struct ContiguousRowReaderWithNulls<'a, T> { - buf: &'a T, - byte_offsets: Vec, - datum_offset: usize, -} - -impl<'a, T: Deref> ContiguousRowReader<'a, T> { - pub fn try_new(buf: &'a T, schema: &'a Schema) -> Result { - let byte_offsets = schema.byte_offsets(); - ensure!( - buf.len() >= Encoding::size_of_num_bits(), - NumNullColsMissing - ); - let num_bits = - u32::from_ne_bytes(buf[0..Encoding::size_of_num_bits()].try_into().unwrap()) as usize; - if num_bits > 0 { - ContiguousRowReaderWithNulls::try_new(buf, schema, num_bits).map(Self::WithNulls) - } else { - let reader = ContiguousRowReaderNoNulls { - inner: buf, - byte_offsets, - datum_offset: Encoding::size_of_num_bits(), - }; - Ok(Self::NoNulls(reader)) - } - } -} - -impl<'a, T: Deref> ContiguousRow for ContiguousRowReader<'a, T> { - fn num_datum_views(&self) -> usize { - match self { - Self::NoNulls(v) => v.num_datum_views(), - Self::WithNulls(v) => v.num_datum_views(), - } - } - - fn datum_view_at(&self, index: usize, datum_kind: &DatumKind) -> DatumView { - match self { - Self::NoNulls(v) => v.datum_view_at(index, datum_kind), - Self::WithNulls(v) => v.datum_view_at(index, datum_kind), - } - } -} - -impl<'a, T: Deref> ContiguousRowReaderWithNulls<'a, T> { - fn try_new(buf: &'a T, schema: &'a Schema, num_bits: usize) -> Result { - assert!(num_bits > 0); - - let bit_set_size = BitSet::num_bytes(num_bits); - let bit_set_buf = &buf[Encoding::size_of_num_bits()..]; - ensure!( - bit_set_buf.len() >= bit_set_size, - InvalidBitSetBytes { - expect_len: bit_set_size, - given_len: bit_set_buf.len() - } - ); - - let nulls_bit_set = RoBitSet::try_new(&bit_set_buf[..bit_set_size], num_bits).unwrap(); - - let mut fixed_byte_offsets = Vec::with_capacity(schema.num_columns()); - let mut acc_null_bytes = 0; - for (index, expect_offset) in schema.byte_offsets().iter().enumerate() { - match nulls_bit_set.is_set(index) { - Some(true) => fixed_byte_offsets.push((*expect_offset - acc_null_bytes) as isize), - Some(false) => { - fixed_byte_offsets.push(-1); - acc_null_bytes += byte_size_of_datum(&schema.column(index).data_type); - } - None => fixed_byte_offsets.push(-1), - } - } - - Ok(Self { - buf, - byte_offsets: fixed_byte_offsets, - datum_offset: Encoding::size_of_num_bits() + bit_set_size, - }) - } -} - -impl<'a, T: Deref> ContiguousRow for ContiguousRowReaderWithNulls<'a, T> { - fn num_datum_views(&self) -> usize { - self.byte_offsets.len() - } - - fn datum_view_at(&self, index: usize, datum_kind: &DatumKind) -> DatumView<'a> { - let offset = self.byte_offsets[index]; - if offset < 0 { - DatumView::Null - } else { - let datum_offset = self.datum_offset + offset as usize; - let datum_buf = &self.buf[datum_offset..]; - datum_view_at(datum_buf, self.buf, datum_kind) - } - } -} - -impl<'a, T: Deref> ContiguousRow for ContiguousRowReaderNoNulls<'a, T> { - fn num_datum_views(&self) -> usize { - self.byte_offsets.len() - } - - fn datum_view_at(&self, index: usize, datum_kind: &DatumKind) -> DatumView<'a> { - let offset = self.byte_offsets[index]; - let datum_buf = &self.inner[self.datum_offset + offset..]; - datum_view_at(datum_buf, self.inner, datum_kind) - } -} - -fn datum_view_at<'a>( - datum_buf: &'a [u8], - string_buf: &'a [u8], - datum_kind: &DatumKind, -) -> DatumView<'a> { - must_read_view(datum_kind, datum_buf, string_buf) -} - -/// Contiguous row with projection information. -/// -/// The caller must ensure the source schema of projector is the same as the -/// schema of source row. -pub struct ProjectedContiguousRow<'a, T> { - source_row: T, - ctx: &'a RowProjector, -} - -impl<'a, T: ContiguousRow> ProjectedContiguousRow<'a, T> { - pub fn new(source_row: T, ctx: &'a RowProjector) -> Self { - Self { source_row, ctx } - } - - pub fn num_datum_views(&self) -> usize { - self.ctx.fetched_source_column_indexes().len() - } - - pub fn datum_view_at(&self, index: usize) -> DatumView { - let p = self.ctx.fetched_source_column_indexes()[index]; - - match p { - Some(index_in_source) => { - let datum_kind = self.ctx.datum_kind(index_in_source); - self.source_row.datum_view_at(index_in_source, datum_kind) - } - None => DatumView::Null, - } - } -} - -impl<'a, T: ContiguousRow> fmt::Debug for ProjectedContiguousRow<'a, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut list = f.debug_list(); - for i in 0..self.num_datum_views() { - let view = self.datum_view_at(i); - list.entry(&view); - } - list.finish() - } -} - -/// In memory buffer to hold data of a contiguous row. -pub trait RowBuffer: DerefMut { - /// Clear and resize the buffer size to `new_len` with given `value`. - fn reset(&mut self, new_len: usize, value: u8); - - /// Append slice into the buffer, resize the buffer automatically. - fn append_slice(&mut self, src: &[u8]); -} - -/// A writer to build a contiguous row. -pub struct ContiguousRowWriter<'a, T> { - inner: &'a mut T, - /// The schema the row group need to be encoded into, the schema - /// of the row need to be write compatible for the table schema. - table_schema: &'a Schema, - /// The index mapping from table schema to column in the - /// schema of row group. - index_in_writer: &'a IndexInWriterSchema, -} - -// TODO(yingwen): Try to replace usage of row by contiguous row. -impl<'a, T: RowBuffer + 'a> ContiguousRowWriter<'a, T> { - pub fn new( - inner: &'a mut T, - table_schema: &'a Schema, - index_in_writer: &'a IndexInWriterSchema, - ) -> Self { - Self { - inner, - table_schema, - index_in_writer, - } - } - - fn write_datum( - inner: &mut T, - datum: &Datum, - offset: &mut usize, - next_string_offset: &mut usize, - ) -> Result<()> { - match datum { - // Already filled by null, nothing to do. - Datum::Null => {} - Datum::Timestamp(v) => { - let value_buf = v.as_i64().to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Double(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Float(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Varbinary(v) => { - ensure!( - *next_string_offset <= MAX_ROW_LEN, - StringTooLong { - len: *next_string_offset - } - ); - // Encode the string offset as a u32. - let value_buf = (*next_string_offset as u32).to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - - // Encode length of string as a varint. - ensure!(v.len() <= MAX_STRING_LEN, StringTooLong { len: v.len() }); - let mut buf = [0; 4]; - let value_buf = Self::encode_varint(v.len() as u32, &mut buf); - Self::write_slice_to_offset(inner, next_string_offset, value_buf); - Self::write_slice_to_offset(inner, next_string_offset, v); - } - Datum::String(v) => { - ensure!( - *next_string_offset <= MAX_ROW_LEN, - StringTooLong { - len: *next_string_offset - } - ); - // Encode the string offset as a u32. - let value_buf = (*next_string_offset as u32).to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - - // Encode length of string as a varint. - ensure!(v.len() <= MAX_STRING_LEN, StringTooLong { len: v.len() }); - let mut buf = [0; 4]; - let value_buf = Self::encode_varint(v.len() as u32, &mut buf); - Self::write_slice_to_offset(inner, next_string_offset, value_buf); - Self::write_slice_to_offset(inner, next_string_offset, v.as_bytes()); - } - Datum::UInt64(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::UInt32(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::UInt16(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::UInt8(v) => { - Self::write_slice_to_offset(inner, offset, &[*v]); - } - Datum::Int64(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Int32(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Int16(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Int8(v) => { - Self::write_slice_to_offset(inner, offset, &[*v as u8]); - } - Datum::Boolean(v) => { - Self::write_slice_to_offset(inner, offset, &[*v as u8]); - } - Datum::Date(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - Datum::Time(v) => { - let value_buf = v.to_ne_bytes(); - Self::write_slice_to_offset(inner, offset, &value_buf); - } - } - - Ok(()) - } - - /// Write a row to the buffer, the buffer will be reset first. - pub fn write_row(&mut self, row: &Row) -> Result<()> { - let mut num_null_cols = 0; - for index_in_table in 0..self.table_schema.num_columns() { - if let Some(writer_index) = self.index_in_writer.column_index_in_writer(index_in_table) - { - let datum = &row[writer_index]; - if datum.is_null() { - num_null_cols += 1; - } - } else { - num_null_cols += 1; - } - } - - if num_null_cols > 0 { - self.write_row_with_nulls(row) - } else { - self.write_row_without_nulls(row) - } - } - - fn write_row_with_nulls(&mut self, row: &Row) -> Result<()> { - let mut encoded_len = 0; - let mut num_bytes_of_variable_col = 0; - for index_in_table in 0..self.table_schema.num_columns() { - if let Some(writer_index) = self.index_in_writer.column_index_in_writer(index_in_table) - { - let datum = &row[writer_index]; - // No need to store null column. - if !datum.is_null() { - encoded_len += byte_size_of_datum(&datum.kind()); - } - - if !datum.is_fixed_sized() { - // For the datum content and the length of it - let len = datum.size(); - let size = len + encoded_len_varint(len as u64); - num_bytes_of_variable_col += size; - encoded_len += size; - } - } else { - // No need to store null column. - } - } - - let num_bits = self.table_schema.num_columns(); - // Assume most columns are not null, so use a bitset with all bit set at first. - let mut nulls_bit_set = BitSet::all_set(num_bits); - // The flag for the BitSet, denoting the number of the columns. - encoded_len += Encoding::size_of_num_bits() + nulls_bit_set.as_bytes().len(); - - // Pre-allocate the memory. - self.inner.reset(encoded_len, 0); - let mut next_string_offset = encoded_len - num_bytes_of_variable_col; - let mut datum_offset = Encoding::size_of_num_bits() + nulls_bit_set.as_bytes().len(); - for index_in_table in 0..self.table_schema.num_columns() { - if let Some(writer_index) = self.index_in_writer.column_index_in_writer(index_in_table) - { - let datum = &row[writer_index]; - // Write datum bytes to the buffer. - Self::write_datum( - self.inner, - datum, - &mut datum_offset, - &mut next_string_offset, - )?; - - if datum.is_null() { - nulls_bit_set.unset(index_in_table); - } - } else { - // This column should be treated as null. - nulls_bit_set.unset(index_in_table); - } - } - - // Storing the number of null columns as u32 is enough. - Self::write_slice_to_offset(self.inner, &mut 0, &(num_bits as u32).to_ne_bytes()); - Self::write_slice_to_offset( - self.inner, - &mut Encoding::size_of_num_bits(), - nulls_bit_set.as_bytes(), - ); - - debug_assert_eq!(datum_offset, encoded_len - num_bytes_of_variable_col); - debug_assert_eq!(next_string_offset, encoded_len); - - Ok(()) - } - - fn write_row_without_nulls(&mut self, row: &Row) -> Result<()> { - let datum_buffer_len = - self.table_schema.string_buffer_offset() + Encoding::size_of_num_bits(); - let mut encoded_len = datum_buffer_len; - for index_in_table in 0..self.table_schema.num_columns() { - if let Some(writer_index) = self.index_in_writer.column_index_in_writer(index_in_table) - { - let datum = &row[writer_index]; - if !datum.is_fixed_sized() { - // For the datum content and the length of it - let len = datum.size(); - encoded_len += encoded_len_varint(len as u64) + len; - } - } else { - unreachable!("The column is ensured to be non-null"); - } - } - - // Pre-allocate memory for row. - self.inner.reset(encoded_len, DatumKind::Null.into_u8()); - - // Offset to next string in string buffer. - let mut next_string_offset = datum_buffer_len; - let mut datum_offset = Encoding::size_of_num_bits(); - for index_in_table in 0..self.table_schema.num_columns() { - if let Some(writer_index) = self.index_in_writer.column_index_in_writer(index_in_table) - { - let datum = &row[writer_index]; - // Write datum bytes to the buffer. - Self::write_datum( - self.inner, - datum, - &mut datum_offset, - &mut next_string_offset, - )?; - } else { - unreachable!("The column is ensured to be non-null"); - } - } - - debug_assert_eq!(datum_offset, datum_buffer_len); - debug_assert_eq!(next_string_offset, encoded_len); - Ok(()) - } - - #[inline] - fn write_slice_to_offset(inner: &mut T, offset: &mut usize, value_buf: &[u8]) { - let dst = &mut inner[*offset..*offset + value_buf.len()]; - dst.copy_from_slice(value_buf); - *offset += value_buf.len(); - } - - fn encode_varint(value: u32, buf: &mut [u8; 4]) -> &[u8] { - let value = value as u64; - let mut temp = &mut buf[..]; - encode_varint(value, &mut temp); - &buf[..encoded_len_varint(value)] - } -} - -/// The byte size to encode the datum of this kind in memory. -/// -/// Returns the datum size for header. For integer types, the datum -/// size is the memory size of the integer type. For string types, the -/// datum size is the memory size to hold the offset. -pub(crate) fn byte_size_of_datum(kind: &DatumKind) -> usize { - match kind { - DatumKind::Null => 1, - DatumKind::Timestamp => mem::size_of::(), - DatumKind::Double => mem::size_of::(), - DatumKind::Float => mem::size_of::(), - // The size of offset. - DatumKind::Varbinary | DatumKind::String => Encoding::size_of_offset(), - DatumKind::UInt64 => mem::size_of::(), - DatumKind::UInt32 => mem::size_of::(), - DatumKind::UInt16 => mem::size_of::(), - DatumKind::UInt8 => mem::size_of::(), - DatumKind::Int64 => mem::size_of::(), - DatumKind::Int32 => mem::size_of::(), - DatumKind::Int16 => mem::size_of::(), - DatumKind::Int8 => mem::size_of::(), - DatumKind::Boolean => mem::size_of::(), - DatumKind::Date => mem::size_of::(), - DatumKind::Time => mem::size_of::(), - } -} - -/// Read datum view from given datum buf, and may reference the string in -/// `string_buf`. -/// -/// Panic if out of bound. -/// -/// ## Safety -/// The string in buffer must be valid utf8. -fn must_read_view<'a>( - datum_kind: &DatumKind, - datum_buf: &'a [u8], - string_buf: &'a [u8], -) -> DatumView<'a> { - match datum_kind { - DatumKind::Null => DatumView::Null, - DatumKind::Timestamp => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let ts = Timestamp::new(i64::from_ne_bytes(value_buf)); - DatumView::Timestamp(ts) - } - DatumKind::Double => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = f64::from_ne_bytes(value_buf); - DatumView::Double(v) - } - DatumKind::Float => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = f32::from_ne_bytes(value_buf); - DatumView::Float(v) - } - DatumKind::Varbinary => { - let bytes = must_read_bytes(datum_buf, string_buf); - DatumView::Varbinary(bytes) - } - DatumKind::String => { - let bytes = must_read_bytes(datum_buf, string_buf); - let v = unsafe { str::from_utf8_unchecked(bytes) }; - DatumView::String(v) - } - DatumKind::UInt64 => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = u64::from_ne_bytes(value_buf); - DatumView::UInt64(v) - } - DatumKind::UInt32 => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = u32::from_ne_bytes(value_buf); - DatumView::UInt32(v) - } - DatumKind::UInt16 => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = u16::from_ne_bytes(value_buf); - DatumView::UInt16(v) - } - DatumKind::UInt8 => DatumView::UInt8(datum_buf[0]), - DatumKind::Int64 => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = i64::from_ne_bytes(value_buf); - DatumView::Int64(v) - } - DatumKind::Int32 => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = i32::from_ne_bytes(value_buf); - DatumView::Int32(v) - } - DatumKind::Int16 => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = i16::from_ne_bytes(value_buf); - DatumView::Int16(v) - } - DatumKind::Int8 => DatumView::Int8(datum_buf[0] as i8), - DatumKind::Boolean => DatumView::Boolean(datum_buf[0] != 0), - DatumKind::Date => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = i32::from_ne_bytes(value_buf); - DatumView::Date(v) - } - DatumKind::Time => { - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let v = i64::from_ne_bytes(value_buf); - DatumView::Time(v) - } - } -} - -fn must_read_bytes<'a>(datum_buf: &'a [u8], string_buf: &'a [u8]) -> &'a [u8] { - // Read offset of string in string buf. - let value_buf = datum_buf[..mem::size_of::()].try_into().unwrap(); - let offset = Offset::from_ne_bytes(value_buf) as usize; - let mut string_buf = &string_buf[offset..]; - - // Read len of the string. - let string_len = match decode_varint(&mut string_buf) { - Ok(len) => len as usize, - Err(e) => panic!("failed to decode string length, string buffer:{string_buf:?}, err:{e}"), - }; - - // Read string. - &string_buf[..string_len] -} - -impl RowBuffer for Vec { - fn reset(&mut self, new_len: usize, value: u8) { - self.clear(); - - self.resize(new_len, value); - } - - fn append_slice(&mut self, src: &[u8]) { - self.extend_from_slice(src); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - projected_schema::ProjectedSchema, - tests::{build_rows, build_schema}, - }; - - #[test] - fn test_contiguous_read_write() { - let schema = build_schema(); - let rows = build_rows(); - let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); - let datum_kinds = schema - .columns() - .iter() - .map(|column| &column.data_type) - .collect::>(); - - let mut buf = Vec::new(); - for row in rows { - let mut writer = ContiguousRowWriter::new(&mut buf, &schema, &index_in_writer); - - writer.write_row(&row).unwrap(); - - let reader = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - - let range: Vec<_> = (0..reader.num_datum_views()).collect(); - for i in range { - let datum = &row[i]; - let view = reader.datum_view_at(i, datum_kinds[i]); - - assert_eq!(datum.as_view(), view); - } - } - } - - #[test] - fn test_contiguous_read_write_with_different_write_schema() { - let schema = build_schema(); - let rows = build_rows(); - let index_in_writer = { - let mut index_schema = IndexInWriterSchema::default(); - index_schema.reserve_columns(schema.num_columns()); - // Make the final column is None. - for i in 0..schema.num_columns() { - let col_idx = (i != schema.num_columns() - 1).then_some(i); - index_schema.push_column(col_idx); - } - index_schema - }; - - let datum_kinds = schema - .columns() - .iter() - .map(|column| &column.data_type) - .collect::>(); - - let mut buf = Vec::new(); - for row in rows { - let mut writer = ContiguousRowWriter::new(&mut buf, &schema, &index_in_writer); - - writer.write_row(&row).unwrap(); - - let reader = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - - let final_col_idx = reader.num_datum_views() - 1; - let range: Vec<_> = (0..=final_col_idx).collect(); - for i in range { - let datum = &row[i]; - let view = reader.datum_view_at(i, datum_kinds[i]); - if i == final_col_idx { - assert!(matches!(view, DatumView::Null)); - } else { - assert_eq!(datum.as_view(), view); - } - } - } - } - - #[test] - fn test_project_contiguous_read_write() { - let schema = build_schema(); - assert!(schema.num_columns() > 1); - let projection: Vec = (0..schema.num_columns() - 1).collect(); - let projected_schema = - ProjectedSchema::new(schema.clone(), Some(projection.clone())).unwrap(); - let ctx = RowProjector::new( - &projected_schema.to_record_schema(), - None, - projected_schema.table_schema(), - &schema, - ) - .unwrap(); - let rows = build_rows(); - let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); - - let mut buf = Vec::new(); - for row in rows { - let mut writer = ContiguousRowWriter::new(&mut buf, &schema, &index_in_writer); - - writer.write_row(&row).unwrap(); - - let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &ctx); - - let range = projection.clone(); - for i in range { - let datum = &row[i]; - let view = projected_row.datum_view_at(i); - - assert_eq!(datum.as_view(), view); - } - } - } -} diff --git a/src/common_types/src/row/mod.rs b/src/common_types/src/row/mod.rs deleted file mode 100644 index 045a5c80dc..0000000000 --- a/src/common_types/src/row/mod.rs +++ /dev/null @@ -1,650 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Row type - -use std::{ - collections::HashMap, - ops::{Index, IndexMut}, -}; - -use snafu::{ensure, Backtrace, OptionExt, Snafu}; - -use crate::{ - column_schema::{ColumnId, ColumnSchema}, - datum::{Datum, DatumKind, DatumView}, - record_batch::FetchedRecordBatch, - schema::{RecordSchemaWithKey, Schema}, - time::Timestamp, -}; - -pub mod bitset; -pub mod contiguous; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Column out of bound, len:{}, given:{}.\nBacktrace:\n{}", - len, - given, - backtrace - ))] - ColumnOutOfBound { - len: usize, - given: usize, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid row num, expect:{}, given:{}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - InvalidRowNum { - expect: usize, - given: usize, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid column num, expect:{}, given:{}.\nBacktrace:\n{}", - expect, - given, - backtrace - ))] - InvalidColumnNum { - expect: usize, - given: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Column cannot be null, name:{}.\nBacktrace:\n{}", column, backtrace))] - NullColumn { - column: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Column type mismatch, name:{}, expect:{:?}, given:{:?}.\nBacktrace:\n{}", - column, - expect, - given, - backtrace - ))] - TypeMismatch { - column: String, - expect: DatumKind, - given: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display("Missing columns to build row.\nBacktrace:\n{}", backtrace))] - MissingColumns { backtrace: Backtrace }, - - #[snafu(display("Convert column failed, column:{}, err:{}", column, source))] - ConvertColumn { - column: String, - source: crate::datum::Error, - }, - - #[snafu(display("Column in the schema is not found, column_name:{}", column,))] - ColumnNameNotFound { column: String }, - - #[snafu(display( - "Column in the schema is not found, column_name:{}.\nBacktrace:\n{}", - column, - backtrace - ))] - ColumnNotFoundInSchema { - column: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Duplicate column id is found, column_id:{column_id}.\nBacktrace:\n{backtrace}", - ))] - DuplicateColumnId { - column_id: ColumnId, - backtrace: Backtrace, - }, -} - -// Do not depend on test_util crates -pub type Result = std::result::Result; - -// TODO(yingwen): -// - Memory pooling (or Arena) and statistics -// - Custom Debug format -// - Add a type RowWithSchema so we can ensure the row always matches the schema -// - Maybe add a type RowOperation like kudu - -/// Row contains multiple columns, each column is represented by a datum -/// The internal representation of row is not specific -#[derive(Debug, Clone, PartialEq)] -pub struct Row { - cols: Vec, -} - -impl Row { - /// Convert vec of Datum into Row - #[inline] - pub fn from_datums(cols: Vec) -> Self { - Self { cols } - } - - /// Returns the column num - #[inline] - pub fn num_columns(&self) -> usize { - self.cols.len() - } - - /// Iterate all datums - #[inline] - pub fn iter(&self) -> IterDatum { - IterDatum { - iter: self.cols.iter(), - } - } - - /// Get the timestamp column - #[inline] - pub fn timestamp(&self, schema: &Schema) -> Option { - let timestamp_index = schema.timestamp_index(); - - self.cols[timestamp_index].as_timestamp() - } - - #[inline] - pub fn size(&self) -> usize { - self.cols.iter().map(|col| col.size()).sum() - } -} - -#[derive(Debug)] -pub struct IterDatum<'a> { - iter: std::slice::Iter<'a, Datum>, -} - -impl<'a> Iterator for IterDatum<'a> { - type Item = &'a Datum; - - fn next(&mut self) -> Option { - self.iter.next() - } -} - -impl Index for Row { - type Output = Datum; - - fn index(&self, index: usize) -> &Self::Output { - &self.cols[index] - } -} - -impl IndexMut for Row { - fn index_mut(&mut self, index: usize) -> &mut Self::Output { - &mut self.cols[index] - } -} - -impl<'a> IntoIterator for &'a Row { - type IntoIter = std::slice::Iter<'a, Datum>; - type Item = &'a Datum; - - fn into_iter(self) -> Self::IntoIter { - self.cols.iter() - } -} - -impl IntoIterator for Row { - type IntoIter = std::vec::IntoIter; - type Item = Datum; - - fn into_iter(self) -> Self::IntoIter { - self.cols.into_iter() - } -} - -/// Check whether the schema of the row equals to given `schema` -pub fn check_row_schema(row: &Row, schema: &Schema) -> Result<()> { - ensure!( - schema.num_columns() == row.num_columns(), - InvalidColumnNum { - expect: schema.num_columns(), - given: row.num_columns(), - } - ); - - for (index, datum) in row.iter().enumerate() { - let column = schema.column(index); - check_datum_type(datum, column)?; - } - - Ok(()) -} - -// TODO(yingwen): For multiple rows that share the same schema, no need to store -// Datum for each row element, we can store the whole row as a binary and -// provide more efficient way to convert rows into columns -/// RowGroup -/// -/// The min/max timestamp of an empty RowGroup is 0. -/// -/// Rows in the RowGroup have the same schema. The internal representation of -/// rows is not specific. -#[derive(Clone, Debug)] -pub struct RowGroup { - /// Schema of the row group, all rows in the row group should have same - /// schema - schema: Schema, - /// Rows in the row group - rows: Vec, -} - -impl RowGroup { - /// Create [RowGroup] without any check. - /// - /// The caller should ensure all the rows share the same schema as the - /// provided one. - #[inline] - pub fn new_unchecked(schema: Schema, rows: Vec) -> Self { - Self { schema, rows } - } - - /// Check and create row group. - /// - /// [None] will be thrown if the rows have different schema from the - /// provided one. - #[inline] - pub fn try_new(schema: Schema, rows: Vec) -> Result { - rows.iter() - .try_for_each(|row| check_row_schema(row, &schema))?; - - Ok(Self { schema, rows }) - } - - /// Returns true if the row group is empty - #[inline] - pub fn is_empty(&self) -> bool { - self.rows.is_empty() - } - - /// Returns number of rows in the row group - #[inline] - pub fn num_rows(&self) -> usize { - self.rows.len() - } - - /// Returns the idx-th row in the row group - #[inline] - pub fn get_row(&self, idx: usize) -> Option<&Row> { - self.rows.get(idx) - } - - /// Returns the idx-th mutable row in the row group - #[inline] - pub fn get_row_mut(&mut self, idx: usize) -> Option<&mut Row> { - self.rows.get_mut(idx) - } - - /// Iter all datum of the column - /// - /// Will panic if col_index is out of bound - pub fn iter_column(&self, col_index: usize) -> IterCol { - IterCol { - rows: &self.rows, - row_index: 0, - col_index, - } - } - - /// The schema of the row group - #[inline] - pub fn schema(&self) -> &Schema { - &self.schema - } - - #[inline] - pub fn take_rows(&mut self) -> Vec { - std::mem::take(&mut self.rows) - } - - #[inline] - pub fn into_schema(self) -> Schema { - self.schema - } - - /// Iter the row group by rows - // TODO(yingwen): Add a iter_with_schema - pub fn iter(&self) -> IterRow { - IterRow { - iter: self.rows.iter(), - } - } -} - -impl<'a> IntoIterator for &'a RowGroup { - type IntoIter = std::slice::Iter<'a, Row>; - type Item = &'a Row; - - fn into_iter(self) -> Self::IntoIter { - self.rows.iter() - } -} - -impl IntoIterator for RowGroup { - type IntoIter = std::vec::IntoIter; - type Item = Row; - - fn into_iter(self) -> Self::IntoIter { - self.rows.into_iter() - } -} - -#[derive(Debug)] -pub struct IterRow<'a> { - iter: std::slice::Iter<'a, Row>, -} - -impl<'a> Iterator for IterRow<'a> { - type Item = &'a Row; - - fn next(&mut self) -> Option { - self.iter.next() - } -} - -#[derive(Clone, Debug)] -pub struct IterCol<'a> { - rows: &'a Vec, - row_index: usize, - col_index: usize, -} - -impl<'a> Iterator for IterCol<'a> { - type Item = &'a Datum; - - fn next(&mut self) -> Option { - if self.rows.is_empty() { - return None; - } - - if self.row_index >= self.rows.len() { - return None; - } - - let row = &self.rows[self.row_index]; - self.row_index += 1; - - Some(&row[self.col_index]) - } - - fn size_hint(&self) -> (usize, Option) { - let remaining = self.rows.len() - self.row_index; - (remaining, Some(remaining)) - } -} - -/// Build the [`RowGroup`] from the columns. -pub struct RowGroupBuilderFromColumn { - schema: Schema, - cols: HashMap>, -} - -impl RowGroupBuilderFromColumn { - pub fn with_capacity(schema: Schema, num_cols: usize) -> Self { - Self { - schema, - cols: HashMap::with_capacity(num_cols), - } - } - - /// The newly-added column should have the same elements as the - /// previously-added column's. - pub fn try_add_column(&mut self, col_id: ColumnId, col: Vec) -> Result<()> { - if let Some(num_rows) = self.num_rows() { - ensure!( - num_rows == col.len(), - InvalidRowNum { - expect: num_rows, - given: col.len(), - } - ); - } - - let old = self.cols.insert(col_id, col); - ensure!(old.is_none(), DuplicateColumnId { column_id: col_id }); - - Ok(()) - } - - pub fn build(mut self) -> RowGroup { - let num_rows = self.num_rows(); - if Some(0) == num_rows { - return RowGroup { - schema: self.schema, - rows: vec![], - }; - }; - - let num_rows = num_rows.unwrap(); - let num_cols = self.schema.num_columns(); - let mut rows = Vec::with_capacity(num_rows); - - // Pre-allocate the memory for column data in every row. - for _ in 0..num_rows { - let row = Vec::with_capacity(num_cols); - rows.push(row); - } - - let mut add_column_to_row = |row_idx: usize, datum: Datum| { - rows[row_idx].push(datum); - }; - - for col_schema in self.schema.columns() { - let col_id = col_schema.id; - let datums = self.cols.remove(&col_id); - - match datums { - Some(v) => { - for (row_idx, datum) in v.into_iter().enumerate() { - add_column_to_row(row_idx, datum); - } - } - None => { - for row_idx in 0..num_rows { - add_column_to_row(row_idx, Datum::Null); - } - } - } - } - - RowGroup { - schema: self.schema, - rows: rows.into_iter().map(Row::from_datums).collect::>(), - } - } - - #[inline] - fn num_rows(&self) -> Option { - self.cols.iter().next().map(|(_, v)| v.len()) - } -} - -/// Check whether the datum kind matches the column schema -pub fn check_datum_type(datum: &Datum, column_schema: &ColumnSchema) -> Result<()> { - // Check null datum - if let Datum::Null = datum { - ensure!( - column_schema.is_nullable, - NullColumn { - column: &column_schema.name, - } - ); - } else { - ensure!( - datum.kind() == column_schema.data_type, - TypeMismatch { - column: &column_schema.name, - expect: column_schema.data_type, - given: datum.kind(), - } - ); - } - - Ok(()) -} - -/// Row builder for the row group -#[derive(Debug)] -pub struct RowBuilder<'a> { - schema: &'a Schema, - cols: Vec, -} - -impl<'a> RowBuilder<'a> { - pub fn new(schema: &'a Schema) -> RowBuilder<'a> { - Self { - schema, - cols: Vec::with_capacity(schema.num_columns()), - } - } - - /// Append a datum into the row - pub fn append_datum(mut self, datum: Datum) -> Result { - self.check_datum(&datum)?; - - self.cols.push(datum); - - Ok(self) - } - - /// Check whether the datum is valid - fn check_datum(&self, datum: &Datum) -> Result<()> { - let index = self.cols.len(); - ensure!( - index < self.schema.num_columns(), - ColumnOutOfBound { - len: self.schema.num_columns(), - given: index, - } - ); - - let column = self.schema.column(index); - check_datum_type(datum, column) - } - - /// Finish building this row and append this row into the row group - pub fn finish(self) -> Result { - ensure!(self.cols.len() == self.schema.num_columns(), MissingColumns); - - Ok(Row { cols: self.cols }) - } -} - -pub trait RowView { - fn try_get_column_by_name(&self, column_name: &str) -> Result>; - - fn column_by_idx(&self, column_idx: usize) -> Datum; -} - -// TODO(yingwen): Add a method to get row view on FetchedRecordBatch. -/// A row view on the [FetchedRecordBatch]. -/// -/// `row_idx < record_batch.num_rows()` is ensured. -#[derive(Debug)] -pub struct RowViewOnBatch<'a> { - pub record_batch: &'a FetchedRecordBatch, - pub row_idx: usize, -} - -impl<'a> RowViewOnBatch<'a> { - pub fn iter_columns(&self) -> RowViewOnBatchColumnIter { - RowViewOnBatchColumnIter { - next_column_idx: 0, - row_idx: self.row_idx, - record_batch: self.record_batch, - } - } -} - -pub struct RowViewOnBatchColumnIter<'a> { - next_column_idx: usize, - row_idx: usize, - record_batch: &'a FetchedRecordBatch, -} - -impl<'a> RowView for RowViewOnBatch<'a> { - fn try_get_column_by_name(&self, column_name: &str) -> Result> { - let column_idx = - self.record_batch - .schema() - .index_of(column_name) - .context(ColumnNameNotFound { - column: column_name, - })?; - Ok(Some(self.column_by_idx(column_idx))) - } - - #[inline] - fn column_by_idx(&self, column_idx: usize) -> Datum { - let column = self.record_batch.column(column_idx); - column.datum(self.row_idx) - } -} - -impl<'a> Iterator for RowViewOnBatchColumnIter<'a> { - type Item = Result>; - - fn next(&mut self) -> Option { - if self.next_column_idx >= self.record_batch.num_columns() { - return None; - } - - let curr_column_idx = self.next_column_idx; - let column = self.record_batch.column(curr_column_idx); - let datum_view = column.datum_view_opt(self.row_idx).map(Ok); - - self.next_column_idx += 1; - - datum_view - } -} - -#[derive(Debug, Clone)] -pub struct RowWithMeta<'a> { - pub row: &'a Row, - pub schema: &'a RecordSchemaWithKey, -} - -impl<'a> RowView for RowWithMeta<'a> { - fn try_get_column_by_name(&self, column_name: &str) -> Result> { - let idx = self - .schema - .index_of(column_name) - .context(ColumnNotFoundInSchema { - column: column_name, - })?; - Ok(Some(self.column_by_idx(idx))) - } - - #[inline] - fn column_by_idx(&self, column_idx: usize) -> Datum { - self.row.cols[column_idx].clone() - } -} diff --git a/src/common_types/src/schema.rs b/src/common_types/src/schema.rs deleted file mode 100644 index 1f74ed74d3..0000000000 --- a/src/common_types/src/schema.rs +++ /dev/null @@ -1,1852 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Schema of table - -use std::{ - cmp::{self, Ordering}, - collections::{HashMap, HashSet}, - convert::TryFrom, - fmt, - num::ParseIntError, - str::FromStr, - sync::Arc, -}; - -// Just re-use arrow's types -// TODO(yingwen): No need to support all schema that arrow supports, we can -// use a new type pattern to wrap Schema/SchemaRef and not allow to use -// the data type we not supported -pub use arrow::datatypes::{DataType, Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; -use horaedbproto::schema as schema_pb; -use prost::Message; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - column_schema::{self, ColumnId, ColumnSchema}, - datum::DatumKind, - row::{contiguous, RowView}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Projection too long, max:{}, given:{}.\nBacktrace:\n{}", - max, - given, - backtrace - ))] - ProjectionTooLong { - max: usize, - given: usize, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid projection index, max:{}, given:{}.\nBacktrace:\n{}", - max, - given, - backtrace - ))] - InvalidProjectionIndex { - max: usize, - given: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Projection must have timestamp column.\nBacktrace:\n{}", backtrace))] - ProjectionMissTimestamp { backtrace: Backtrace }, - - #[snafu(display( - "Column name already exists, name:{}.\nBacktrace:\n{}", - name, - backtrace - ))] - ColumnNameExists { name: String, backtrace: Backtrace }, - - #[snafu(display( - "Column id already exists, name:{}, id:{}.\nBacktrace:\n{}", - name, - id, - backtrace - ))] - ColumnIdExists { - name: String, - id: ColumnId, - backtrace: Backtrace, - }, - - #[snafu(display( - "Column id is missing in schema, id:{}.\nBacktrace:\n{}", - id, - backtrace - ))] - ColumnIdMissing { id: ColumnId, backtrace: Backtrace }, - - #[snafu(display("Primary key indexes cannot be empty.\nBacktrace:\n{}", backtrace))] - EmptyPirmaryKeyIndexes { backtrace: Backtrace }, - - #[snafu(display( - "Unsupported key column type, name:{}, type:{:?}.\nBacktrace:\n{}", - name, - kind, - backtrace - ))] - KeyColumnType { - name: String, - kind: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display( - "Timestamp key column already exists, timestamp_column:{}, given:{}.\nBacktrace:\n{}", - timestamp_column, - given_column, - backtrace - ))] - TimestampKeyExists { - timestamp_column: String, - given_column: String, - backtrace: Backtrace, - }, - - #[snafu(display("Timestamp not in primary key.\nBacktrace:\n{}", backtrace))] - TimestampNotInPrimaryKey { backtrace: Backtrace }, - - #[snafu(display( - "Invalid arrow field, field_name:{}, arrow_schema:{:?}, err:{}", - field_name, - arrow_schema, - source - ))] - InvalidArrowField { - field_name: String, - arrow_schema: ArrowSchemaRef, - source: crate::column_schema::Error, - }, - - #[snafu(display( - "Primary key with tsid should only contains tsid and timestamp key.\nBacktrace:\n{}", - backtrace - ))] - InvalidPrimaryKeyWithTsid { backtrace: Backtrace }, - - #[snafu(display( - "Invalid arrow schema key, key:{:?}, raw_value:{}, err:{:?}.\nBacktrace:\n{}", - key, - raw_value, - source, - backtrace - ))] - InvalidArrowSchemaMetaValue { - key: ArrowSchemaMetaKey, - raw_value: String, - source: Box, - backtrace: Backtrace, - }, - - #[snafu(display( - "Arrow schema meta key not found, key:{:?}.\nBacktrace:\n{}", - key, - backtrace - ))] - ArrowSchemaMetaKeyNotFound { - key: ArrowSchemaMetaKey, - backtrace: Backtrace, - }, - - #[snafu(display("Arrow schema meta key not found.\nerr:\n{}", source))] - ColumnSchemaDeserializeFailed { source: crate::column_schema::Error }, - - #[snafu(display("Failed to encode schema by protobuf, err:{}", source))] - EncodeSchemaToPb { source: prost::EncodeError }, - - #[snafu(display("Encoded schema content is empty.\nBacktrace:\n{}", backtrace))] - EmptyEncodedSchema { backtrace: Backtrace }, - - #[snafu(display( - "Invalid schema encoding version, version:{}.\nBacktrace:\n{}", - version, - backtrace - ))] - InvalidSchemaEncodingVersion { version: u8, backtrace: Backtrace }, - - #[snafu(display( - "Failed to decode schema from protobuf bytes, buf:{:?}, err:{}", - buf, - source, - ))] - DecodeSchemaFromPb { - buf: Vec, - source: prost::DecodeError, - }, - - #[snafu(display( - "Failed to decode index, input:{}, err:{}\nBacktrace:\n{}", - input, - source, - backtrace - ))] - DecodeIndex { - input: String, - source: ParseIntError, - backtrace: Backtrace, - }, - - #[snafu(display( - "Primary key not found in schema, id:{id}, schema:{schema:?}\nBacktrace:\n{backtrace}", - ))] - PrimaryKeyIdNotFound { - id: u32, - schema: schema_pb::TableSchema, - backtrace: Backtrace, - }, -} - -pub type CatalogName = String; -pub type SchemaId = u32; -pub type SchemaName = String; -pub type Result = std::result::Result; - -// TODO: make these constants configurable -pub const TSID_COLUMN: &str = "tsid"; -pub const TIMESTAMP_COLUMN: &str = "timestamp"; - -const DEFAULT_SCHEMA_VERSION: Version = 1; -const DEFAULT_SCHEMA_ENCODING_VERSION: u8 = 0; - -#[derive(Debug, Snafu)] -pub enum CompatError { - #[snafu(display("Incompatible column schema for write, err:{}", source))] - IncompatWriteColumn { - source: crate::column_schema::CompatError, - }, - - #[snafu(display("Missing column, name:{}", name))] - MissingWriteColumn { name: String }, - - #[snafu(display("Columns to write not found in table, names:{:?}", names))] - WriteMoreColumn { names: Vec }, -} - -/// Meta data of the arrow schema -#[derive(Default)] -pub struct ArrowSchemaMeta { - primary_key_indexes: Indexes, - timestamp_index: usize, - version: u32, -} - -#[derive(Debug, Default, PartialEq)] -struct Indexes(Vec); - -impl ToString for Indexes { - fn to_string(&self) -> String { - self.0 - .iter() - .map(|n| n.to_string()) - .collect::>() - .join(",") - } -} - -impl FromStr for Indexes { - type Err = Error; - - fn from_str(encoded_index: &str) -> Result { - if encoded_index.is_empty() { - return Ok(Indexes(Vec::new())); - } - - let parsed_indexes = encoded_index - .split(',') - .map(|s| { - s.parse::().with_context(|| DecodeIndex { - input: encoded_index.to_string(), - }) - }) - .collect::>>()?; - - Ok(Indexes(parsed_indexes)) - } -} - -impl ArrowSchemaMeta { - fn parse_arrow_schema_meta_value( - meta: &HashMap, - key: ArrowSchemaMetaKey, - ) -> Result - where - T: FromStr, - T::Err: std::error::Error + Send + Sync + 'static, - { - let raw_value = meta - .get(key.as_str()) - .context(ArrowSchemaMetaKeyNotFound { key })?; - T::from_str(raw_value.as_str()) - .map_err(|e| Box::new(e) as _) - .context(InvalidArrowSchemaMetaValue { key, raw_value }) - } -} - -/// Parse the necessary meta information from the arrow schema's meta data. -impl TryFrom<&HashMap> for ArrowSchemaMeta { - type Error = Error; - - fn try_from(meta: &HashMap) -> Result { - Ok(ArrowSchemaMeta { - primary_key_indexes: Self::parse_arrow_schema_meta_value( - meta, - ArrowSchemaMetaKey::PrimaryKeyIndexes, - )?, - timestamp_index: Self::parse_arrow_schema_meta_value( - meta, - ArrowSchemaMetaKey::TimestampIndex, - )?, - version: Self::parse_arrow_schema_meta_value(meta, ArrowSchemaMetaKey::Version)?, - }) - } -} - -#[derive(Copy, Clone, Debug)] -pub enum ArrowSchemaMetaKey { - PrimaryKeyIndexes, - TimestampIndex, - Version, -} - -impl ArrowSchemaMetaKey { - fn as_str(&self) -> &str { - match self { - Self::PrimaryKeyIndexes => "schema::primary_key_indexes", - Self::TimestampIndex => "schema::timestamp_index", - Self::Version => "schema::version", - } - } -} - -impl ToString for ArrowSchemaMetaKey { - fn to_string(&self) -> String { - self.as_str().to_string() - } -} - -/// Schema version -pub type Version = u32; - -/// Mapping column index in table schema to column index in writer schema -#[derive(Clone, Default)] -pub struct IndexInWriterSchema(Vec>); - -impl IndexInWriterSchema { - /// Create a index mapping for same schema with `num_columns` columns. - pub fn for_same_schema(num_columns: usize) -> Self { - let indexes = (0..num_columns).map(Some).collect(); - Self(indexes) - } - - /// Returns the column index in writer schema of the column with index - /// `index_in_table` in the table schema where the writer prepared to - /// write to. - /// - /// If the column is not in writer schema, returns None, which means that - /// this column should be filled by null. - /// - /// Panic if the index_in_table is out of bound - #[inline] - pub fn column_index_in_writer(&self, index_in_table: usize) -> Option { - self.0[index_in_table] - } - - /// Reserve the capacity for the additional columns. - #[inline] - pub fn reserve_columns(&mut self, additional: usize) { - self.0.reserve(additional); - } - - /// Push a new column index. - #[inline] - pub fn push_column(&mut self, column_index: Option) { - self.0.push(column_index) - } -} - -// TODO(yingwen): No need to compare all elements in ColumnSchemas, Schema, -// RecordSchema, custom PartialEq for them. - -/// Data of column schemas -#[derive(PartialEq)] -pub(crate) struct ColumnSchemas { - /// Column schemas - columns: Vec, - /// Column name to index of that column schema in `columns`, the index is - /// guaranteed to be valid - name_to_index: HashMap, - /// Byte offsets of each column in contiguous row. - byte_offsets: Vec, - /// String buffer offset in contiguous row. - string_buffer_offset: usize, -} - -impl ColumnSchemas { - fn new(columns: Vec) -> Self { - let name_to_index = columns - .iter() - .enumerate() - .map(|(idx, c)| (c.name.to_string(), idx)) - .collect(); - - let mut current_offset = 0; - let mut byte_offsets = Vec::with_capacity(columns.len()); - for column_schema in &columns { - byte_offsets.push(current_offset); - current_offset += contiguous::byte_size_of_datum(&column_schema.data_type); - } - - Self { - columns, - name_to_index, - byte_offsets, - string_buffer_offset: current_offset, - } - } -} - -impl ColumnSchemas { - pub fn num_columns(&self) -> usize { - self.columns().len() - } - - pub fn columns(&self) -> &[ColumnSchema] { - &self.columns - } - - pub fn column(&self, i: usize) -> &ColumnSchema { - &self.columns[i] - } - - pub fn index_of(&self, name: &str) -> Option { - self.name_to_index.get(name).copied() - } -} - -impl fmt::Debug for ColumnSchemas { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ColumnSchemas") - // name_to_index is ignored. - .field("columns", &self.columns) - .finish() - } -} - -/// Schema of [crate::record_batch::RecordBatch] -/// -/// Should be cheap to clone. -/// -/// Note: Only `name`, `data_type`, `is_nullable` is valid after converting from -/// arrow's schema, the additional fields like `id`/`is_tag`/`comment` is always -/// unset. Now we only convert arrow's schema into our record before we output -/// the final query result, where the additional fields is never used. -#[derive(Debug, Clone, PartialEq)] -pub struct RecordSchema { - arrow_schema: ArrowSchemaRef, - column_schemas: Arc, -} - -impl RecordSchema { - fn from_column_schemas(column_schemas: ColumnSchemas, arrow_schema: &ArrowSchemaRef) -> Self { - // Convert to arrow fields. - let fields = column_schemas - .columns - .iter() - .map(|col| col.to_arrow_field()) - .collect::>(); - // Build arrow schema. - let arrow_schema = Arc::new(ArrowSchema::new_with_metadata( - fields, - arrow_schema.metadata().to_owned(), - )); - - Self { - arrow_schema, - column_schemas: Arc::new(column_schemas), - } - } - - pub fn num_columns(&self) -> usize { - self.column_schemas.num_columns() - } - - pub fn columns(&self) -> &[ColumnSchema] { - self.column_schemas.columns() - } - - pub fn index_of(&self, name: &str) -> Option { - self.column_schemas.index_of(name) - } - - pub fn column(&self, i: usize) -> &ColumnSchema { - self.column_schemas.column(i) - } - - pub fn column_by_name(&self, name: &str) -> Option<&ColumnSchema> { - let index = self.column_schemas.index_of(name)?; - Some(self.column_schemas.column(index)) - } - - pub fn to_arrow_schema_ref(&self) -> ArrowSchemaRef { - self.arrow_schema.clone() - } -} - -impl TryFrom for RecordSchema { - type Error = Error; - - fn try_from(arrow_schema: ArrowSchemaRef) -> Result { - let fields = arrow_schema.fields(); - let mut columns = Vec::with_capacity(fields.len()); - - for field in fields { - let column_schema = - ColumnSchema::try_from(field).with_context(|| InvalidArrowField { - arrow_schema: arrow_schema.clone(), - field_name: field.name(), - })?; - columns.push(column_schema); - } - - let column_schemas = ColumnSchemas::new(columns); - - Ok(Self::from_column_schemas(column_schemas, &arrow_schema)) - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct RecordSchemaWithKey { - record_schema: RecordSchema, - primary_key_indexes: Vec, -} - -impl RecordSchemaWithKey { - pub fn new(record_schema: RecordSchema, primary_key_indexes: Vec) -> Self { - Self { - record_schema, - primary_key_indexes, - } - } - - pub fn num_columns(&self) -> usize { - self.record_schema.num_columns() - } - - pub fn compare_row(&self, lhs: &LR, rhs: &RR) -> Ordering { - compare_row(&self.primary_key_indexes, lhs, rhs) - } - - pub fn primary_key_idx(&self) -> &[usize] { - &self.primary_key_indexes - } - - pub fn is_primary_key_index(&self, idx: usize) -> bool { - self.primary_key_indexes.contains(&idx) - } - - pub fn index_of(&self, name: &str) -> Option { - self.record_schema.index_of(name) - } - - pub fn columns(&self) -> &[ColumnSchema] { - self.record_schema.columns() - } - - /// Returns an immutable reference of the key column vector. - pub fn key_columns(&self) -> Vec { - self.columns() - .iter() - .enumerate() - .filter_map(|(idx, col)| { - if self.primary_key_indexes.contains(&idx) { - Some(col.clone()) - } else { - None - } - }) - .collect::>() - } - - pub fn to_record_schema(&self) -> RecordSchema { - self.record_schema.clone() - } - - pub fn into_record_schema(self) -> RecordSchema { - self.record_schema - } - - pub fn to_arrow_schema_ref(&self) -> ArrowSchemaRef { - self.record_schema.to_arrow_schema_ref() - } -} - -/// Compare the two rows. -/// -/// REQUIRES: the two rows must have the same number of key columns as -/// `num_key_columns`. -pub fn compare_row( - primary_key_idx: &[usize], - lhs: &LR, - rhs: &RR, -) -> Ordering { - for column_idx in primary_key_idx { - // caller should ensure the row view is valid. - // TODO(xikai): unwrap may not a good way to handle the error. - let left_datum = lhs.column_by_idx(*column_idx); - let right_datum = rhs.column_by_idx(*column_idx); - // the two datums must be of the same kind type. - match left_datum.partial_cmp(&right_datum).unwrap() { - Ordering::Equal => continue, - v @ Ordering::Less | v @ Ordering::Greater => return v, - } - } - - Ordering::Equal -} - -// TODO(yingwen): Maybe rename to TableSchema. -/// Schema of a table -/// -/// - Should be immutable -/// - Each schema must have a timestamp column -/// - Should be immutable and cheap to clone, though passing by reference is -/// preferred -/// - The prefix of columns makes up the primary key (similar to kudu's schema) -/// - The Schema should built by builder -#[derive(Clone, PartialEq)] -pub struct Schema { - /// The underlying arrow schema, data type of fields must be supported by - /// datum - arrow_schema: ArrowSchemaRef, - /// The primary key index list in columns - primary_key_indexes: Vec, - /// Index of timestamp key column - // TODO(yingwen): Maybe we can remove the restriction that timestamp column must exists in - // schema (mainly for projected schema) - timestamp_index: usize, - /// Index of tsid key column - tsid_index: Option, - /// Column schemas, only holds arc pointer so the Schema can be cloned - /// without much overhead. - column_schemas: Arc, - /// Version of the schema, schemas with same version should be identical. - version: Version, -} - -impl fmt::Debug for Schema { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Schema") - // arrow_schema is ignored. - .field("timestamp_index", &self.timestamp_index) - .field("tsid_index", &self.tsid_index) - .field("column_schemas", &self.column_schemas) - .field("version", &self.version) - .field("primary_key_indexes", &self.primary_key_indexes) - .finish() - } -} - -impl TryFrom for Schema { - type Error = Error; - - fn try_from(arrow_schema: ArrowSchemaRef) -> Result { - Builder::build_from_arrow_schema(arrow_schema) - } -} - -impl TryFrom for Schema { - type Error = Error; - - fn try_from(record_schema: RecordSchema) -> Result { - Builder::build_from_arrow_schema(record_schema.to_arrow_schema_ref()) - } -} - -impl Schema { - /// Returns an immutable reference of the vector of [ColumnSchema]. - pub fn columns(&self) -> &[ColumnSchema] { - self.column_schemas.columns() - } - - /// Returns an immutable reference of the key column vector. - pub fn key_columns(&self) -> Vec { - self.primary_key_indexes - .iter() - .map(|i| self.column(*i).clone()) - .collect() - } - - /// Returns an immutable reference of the normal column vector. - pub fn normal_columns(&self) -> Vec { - self.column_schemas - .columns() - .iter() - .enumerate() - .filter_map(|(idx, col)| { - if self.primary_key_indexes.contains(&idx) { - None - } else { - Some(col.clone()) - } - }) - .collect() - } - - /// Returns index of the tsid column. - pub fn index_of_tsid(&self) -> Option { - self.tsid_index - } - - /// Returns tsid column index and immutable reference of tsid column - pub fn tsid_column(&self) -> Option<&ColumnSchema> { - if let Some(idx) = self.index_of_tsid() { - Some(&self.column_schemas.columns[idx]) - } else { - None - } - } - - /// Returns total number of columns - pub fn num_columns(&self) -> usize { - self.column_schemas.num_columns() - } - - /// Returns true if idx is primary key index - pub fn is_primary_key_index(&self, idx: &usize) -> bool { - self.primary_key_indexes.contains(idx) - } - - /// Returns an immutable reference of a specific [ColumnSchema] selected by - /// name. - pub fn column_with_name(&self, name: &str) -> Option<&ColumnSchema> { - let index = self.column_schemas.name_to_index.get(name)?; - Some(&self.column_schemas.columns[*index]) - } - - /// Returns an immutable reference of a specific [ColumnSchema] selected - /// using an offset within the internal vector. - /// - /// Panic if i is out of bound - pub fn column(&self, i: usize) -> &ColumnSchema { - self.column_schemas.column(i) - } - - /// Return the ref to [arrow::datatypes::SchemaRef] - pub fn as_arrow_schema_ref(&self) -> &ArrowSchemaRef { - &self.arrow_schema - } - - /// Return the cloned [arrow::datatypes::SchemaRef] - pub fn to_arrow_schema_ref(&self) -> ArrowSchemaRef { - self.arrow_schema.clone() - } - - /// Into [arrow::datatypes::SchemaRef] - pub fn into_arrow_schema_ref(self) -> ArrowSchemaRef { - self.arrow_schema - } - - /// Find the index of the column with the given name. - pub fn index_of(&self, name: &str) -> Option { - self.column_schemas.index_of(name) - } - - pub fn primary_key_indexes(&self) -> &[usize] { - &self.primary_key_indexes - } - - pub fn reset_primary_key_indexes(&mut self, indexes: Vec) { - self.primary_key_indexes = indexes - } - - /// Return the number of columns index in primary key - pub fn num_primary_key_columns(&self) -> usize { - self.primary_key_indexes.len() - } - - /// Get the name of the timestamp column - #[inline] - pub fn timestamp_name(&self) -> &str { - &self.column(self.timestamp_index()).name - } - - /// Get the index of the timestamp column - #[inline] - pub fn timestamp_index(&self) -> usize { - self.timestamp_index - } - - /// Whether i-nth column is tag column - pub fn is_tag_column(&self, i: usize) -> bool { - self.column(i).is_tag - } - - /// Get the version of this schema - #[inline] - pub fn version(&self) -> Version { - self.version - } - - /// Compare the two rows. - /// - /// REQUIRES: the two rows must have the key columns defined by the schema. - pub fn compare_row(&self, lhs: &R, rhs: &R) -> Ordering { - compare_row(&self.primary_key_indexes, lhs, rhs) - } - - /// Returns `Ok` if rows with `writer_schema` can write to table with the - /// same schema as `self`. - pub fn compatible_for_write( - &self, - writer_schema: &Schema, - index_in_writer: &mut IndexInWriterSchema, - ) -> std::result::Result<(), CompatError> { - index_in_writer.reserve_columns(self.num_columns()); - - let mut num_col_in_writer = 0; - for column in self.columns() { - // Find column in schema of writer. - match writer_schema.index_of(&column.name) { - Some(writer_index) => { - let writer_column = writer_schema.column(writer_index); - - // Column is found in writer - num_col_in_writer += 1; - - // Column with same name, but not compatible - column - .compatible_for_write(writer_column) - .context(IncompatWriteColumn)?; - - // Column is compatible, push index mapping - index_in_writer.push_column(Some(writer_index)); - } - None => { - // Column is not found in writer, then the column should be nullable. - ensure!( - column.is_nullable, - MissingWriteColumn { name: &column.name } - ); - - // Column is nullable, push index mapping - index_in_writer.push_column(None); - } - } - } - // All columns of this schema have been checked - - // If the writer have columns not in this schema, then we consider it - // incompatible - ensure!( - num_col_in_writer == writer_schema.num_columns(), - WriteMoreColumn { - names: writer_schema - .columns() - .iter() - .filter_map(|c| if self.column_with_name(&c.name).is_none() { - Some(c.name.clone()) - } else { - None - }) - .collect::>(), - } - ); - - Ok(()) - } - - pub fn to_record_schema(&self) -> RecordSchema { - RecordSchema { - arrow_schema: self.arrow_schema.clone(), - column_schemas: self.column_schemas.clone(), - } - } - - pub fn to_record_schema_with_key(&self) -> RecordSchemaWithKey { - RecordSchemaWithKey { - record_schema: self.to_record_schema(), - primary_key_indexes: self.primary_key_indexes.clone(), - } - } - - pub fn is_unique_column(&self, col_name: &str) -> bool { - // primary key is obvious unique. - let is_primary_key = self - .primary_key_indexes() - .iter() - .map(|key_idx| self.column(*key_idx).name.as_str()) - .any(|primary_key| primary_key == col_name); - - if is_primary_key { - return true; - } - - if self.tsid_column().is_none() { - return false; - } - - // When tsid exists, it means tag column is also unique. - self.columns() - .iter() - .any(|column| column.is_tag && column.name == col_name) - } - - /// Panic if projection is invalid. - pub(crate) fn project_record_schema_with_key( - &self, - projection: &[usize], - ) -> RecordSchemaWithKey { - let mut primary_key_indexes = Vec::with_capacity(self.num_primary_key_columns()); - let mut columns = Vec::with_capacity(self.num_primary_key_columns()); - for (idx, col) in self.columns().iter().enumerate() { - if self.is_primary_key_index(&idx) { - primary_key_indexes.push(columns.len()); - columns.push(col.clone()); - } else if projection.contains(&idx) { - columns.push(col.clone()); - } - } - - let record_schema = - RecordSchema::from_column_schemas(ColumnSchemas::new(columns), &self.arrow_schema); - - RecordSchemaWithKey { - record_schema, - primary_key_indexes, - } - } - - /// Panic if projection is invalid. - pub(crate) fn project_record_schema(&self, projection: &[usize]) -> RecordSchema { - let mut columns = Vec::with_capacity(projection.len()); - - // Collect all columns needed by the projection. - for p in projection { - let column_schema = &self.columns()[*p]; - // Insert the index in projected schema of the column - columns.push(column_schema.clone()); - } - - RecordSchema::from_column_schemas(ColumnSchemas::new(columns), &self.arrow_schema) - } - - /// Returns byte offsets in contiguous row. - #[inline] - pub fn byte_offsets(&self) -> &[usize] { - &self.column_schemas.byte_offsets - } - - /// Returns byte offset in contiguous row of given column. - /// - /// Panic if out of bound. - #[inline] - pub fn byte_offset(&self, index: usize) -> usize { - self.column_schemas.byte_offsets[index] - } - - /// Returns string buffer offset in contiguous row. - #[inline] - pub fn string_buffer_offset(&self) -> usize { - self.column_schemas.string_buffer_offset - } -} - -impl TryFrom for Schema { - type Error = Error; - - fn try_from(schema: schema_pb::TableSchema) -> Result { - let mut builder = Builder::with_capacity(schema.columns.len()).version(schema.version); - let primary_key_ids = schema.primary_key_ids; - - let primary_key_indexes = primary_key_ids - .iter() - .cloned() - .map(|id| { - let col_idx = schema - .columns - .iter() - .enumerate() - .find_map(|(idx, col)| if col.id == id { Some(idx) } else { None }) - .context(ColumnIdMissing { id })?; - - Ok(col_idx) - }) - .collect::>>()?; - builder = builder.primary_key_indexes(primary_key_indexes); - - for column_schema_pb in schema.columns { - let column = - ColumnSchema::try_from(column_schema_pb).context(ColumnSchemaDeserializeFailed)?; - if primary_key_ids.contains(&column.id) { - builder = builder.add_key_column(column)?; - } else { - builder = builder.add_normal_column(column)?; - } - } - - builder.build() - } -} - -impl From<&Schema> for schema_pb::TableSchema { - fn from(schema: &Schema) -> Self { - let columns: Vec<_> = schema - .columns() - .iter() - .map(|v| schema_pb::ColumnSchema::from(v.clone())) - .collect(); - - let timestamp_id = schema.column(schema.timestamp_index()).id; - let primary_key_ids = schema - .primary_key_indexes() - .iter() - .map(|i| schema.column(*i).id) - .collect(); - - schema_pb::TableSchema { - timestamp_id, - version: schema.version, - columns, - primary_key_ids, - } - } -} - -/// Schema builder -#[must_use] -pub struct Builder { - columns: Vec, - /// The indexes of primary key columns - primary_key_indexes: Vec, - /// Timestamp column index - timestamp_index: Option, - column_names: HashSet, - column_ids: HashSet, - /// Version of the schema - version: Version, - /// Auto increment the column id if the id of the input ColumnSchema is - /// [crate::column_schema::COLUMN_ID_UNINIT]. - auto_increment_column_id: bool, - max_column_id: ColumnId, -} - -impl Default for Builder { - fn default() -> Self { - Self::new() - } -} - -impl Builder { - /// Create a new builder - pub fn new() -> Self { - Self::with_capacity(0) - } - - /// Create a builder with given capacity - pub fn with_capacity(capacity: usize) -> Self { - Self { - columns: Vec::with_capacity(capacity), - primary_key_indexes: Vec::new(), - timestamp_index: None, - column_names: HashSet::with_capacity(capacity), - column_ids: HashSet::with_capacity(capacity), - version: DEFAULT_SCHEMA_VERSION, - auto_increment_column_id: false, - max_column_id: column_schema::COLUMN_ID_UNINIT, - } - } - - /// Add a key column - pub fn add_key_column(mut self, mut column: ColumnSchema) -> Result { - self.may_alloc_column_id(&mut column); - self.validate_column(&column, true)?; - - // FIXME(xikai): it seems not reasonable to decide the timestamp column in this - // way. - let is_timestamp = DatumKind::Timestamp == column.data_type; - if is_timestamp { - ensure!( - self.timestamp_index.is_none(), - TimestampKeyExists { - timestamp_column: &self.columns[self.timestamp_index.unwrap()].name, - given_column: column.name, - } - ); - self.timestamp_index = Some(self.columns.len()); - } - - self.insert_new_column(column); - - Ok(self) - } - - /// Add a normal (non key) column - pub fn add_normal_column(mut self, mut column: ColumnSchema) -> Result { - self.may_alloc_column_id(&mut column); - self.validate_column(&column, false)?; - - self.insert_new_column(column); - - Ok(self) - } - - /// Set primary key indexes of the schema - pub fn primary_key_indexes(mut self, indexes: Vec) -> Self { - self.primary_key_indexes = indexes; - self - } - - /// Set version of the schema - pub fn version(mut self, version: Version) -> Self { - self.version = version; - self - } - - /// When auto increment is true, assign the column schema an auto - /// incremented id if its id is [crate::column_schema::COLUMN_ID_UNINIT]. - /// - /// Default is false - pub fn auto_increment_column_id(mut self, auto_increment: bool) -> Self { - self.auto_increment_column_id = auto_increment; - self - } - - fn may_alloc_column_id(&mut self, column: &mut ColumnSchema) { - // Assign this column an id - if self.auto_increment_column_id && column.id == column_schema::COLUMN_ID_UNINIT { - column.id = self.max_column_id + 1; - } - - self.max_column_id = cmp::max(self.max_column_id, column.id); - } - - // TODO(yingwen): Do we need to support null data type? - fn validate_column(&self, column: &ColumnSchema, is_key: bool) -> Result<()> { - ensure!( - !self.column_names.contains(&column.name), - ColumnNameExists { name: &column.name } - ); - - // Check datum kind if this is a key column - if is_key { - ensure!( - column.data_type.is_key_kind(), - KeyColumnType { - name: &column.name, - kind: column.data_type, - } - ); - } - - ensure!( - !self.column_ids.contains(&column.id), - ColumnIdExists { - name: &column.name, - id: column.id, - } - ); - - Ok(()) - } - - fn insert_new_column(&mut self, column: ColumnSchema) { - self.column_names.insert(column.name.clone()); - self.column_ids.insert(column.id); - - self.columns.push(column); - } - - fn build_from_arrow_schema(arrow_schema: ArrowSchemaRef) -> Result { - let fields = arrow_schema.fields(); - let mut columns = Vec::with_capacity(fields.len()); - - for field in fields { - let column_schema = - ColumnSchema::try_from(field).with_context(|| InvalidArrowField { - arrow_schema: arrow_schema.clone(), - field_name: field.name(), - })?; - columns.push(column_schema); - } - - let ArrowSchemaMeta { - primary_key_indexes, - timestamp_index, - version, - } = Self::parse_arrow_schema_meta_or_default(arrow_schema.metadata())?; - let tsid_index = Self::find_tsid_index(&columns); - - let column_schemas = Arc::new(ColumnSchemas::new(columns)); - - Ok(Schema { - arrow_schema, - primary_key_indexes: primary_key_indexes.0, - timestamp_index, - tsid_index, - column_schemas, - version, - }) - } - - /// Parse the necessary meta information from the arrow schema's meta data. - fn parse_arrow_schema_meta_or_default( - meta: &HashMap, - ) -> Result { - match ArrowSchemaMeta::try_from(meta) { - Ok(v) => Ok(v), - Err(Error::ArrowSchemaMetaKeyNotFound { .. }) => Ok(ArrowSchemaMeta::default()), - Err(e) => Err(e), - } - } - - /// Build arrow schema meta data. - /// - /// Requires: the timestamp index is not None. - fn build_arrow_schema_meta( - primary_key_indexes: Vec, - timestamp_index: usize, - version: u32, - ) -> HashMap { - [ - ( - ArrowSchemaMetaKey::PrimaryKeyIndexes.to_string(), - // TODO: change primary_key_indexes to `Indexes` type - Indexes(primary_key_indexes).to_string(), - ), - ( - ArrowSchemaMetaKey::TimestampIndex.to_string(), - timestamp_index.to_string(), - ), - (ArrowSchemaMetaKey::Version.to_string(), version.to_string()), - ] - .into_iter() - .collect() - } - - fn find_tsid_index(columns: &[ColumnSchema]) -> Option { - columns.iter().enumerate().find_map(|(idx, col_schema)| { - if col_schema.name == TSID_COLUMN { - Some(idx) - } else { - None - } - }) - } - - /// Build the schema - pub fn build(self) -> Result { - let timestamp_index = self.timestamp_index.context(TimestampNotInPrimaryKey)?; - - // Timestamp key column is exists, so key columns should not be zero - ensure!( - !self.primary_key_indexes.is_empty(), - EmptyPirmaryKeyIndexes {} - ); - - let tsid_index = Self::find_tsid_index(&self.columns); - let fields = self - .columns - .iter() - .map(|c| c.to_arrow_field()) - .collect::>(); - let meta = Self::build_arrow_schema_meta( - self.primary_key_indexes.clone(), - timestamp_index, - self.version, - ); - - Ok(Schema { - arrow_schema: Arc::new(ArrowSchema::new_with_metadata(fields, meta)), - primary_key_indexes: self.primary_key_indexes, - timestamp_index, - tsid_index, - column_schemas: Arc::new(ColumnSchemas::new(self.columns)), - version: self.version, - }) - } -} - -/// Encoder for schema with version control. -#[derive(Clone, Debug)] -pub struct SchemaEncoder { - version: u8, -} - -impl Default for SchemaEncoder { - fn default() -> Self { - Self::new(DEFAULT_SCHEMA_ENCODING_VERSION) - } -} - -impl SchemaEncoder { - fn new(version: u8) -> Self { - Self { version } - } - - pub fn encode(&self, schema: &Schema) -> Result> { - let pb_schema = schema_pb::TableSchema::from(schema); - let mut buf = Vec::with_capacity(1 + pb_schema.encoded_len()); - buf.push(self.version); - - pb_schema.encode(&mut buf).context(EncodeSchemaToPb)?; - - Ok(buf) - } - - pub fn decode(&self, buf: &[u8]) -> Result { - ensure!(!buf.is_empty(), EmptyEncodedSchema); - - self.ensure_version(buf[0])?; - - let pb_schema = - schema_pb::TableSchema::decode(&buf[1..]).context(DecodeSchemaFromPb { buf })?; - Schema::try_from(pb_schema) - } - - fn ensure_version(&self, version: u8) -> Result<()> { - ensure!( - self.version == version, - InvalidSchemaEncodingVersion { version } - ); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use bytes_ext::Bytes; - - use super::*; - use crate::{ - datum::Datum, - row::{Row, RowWithMeta}, - time::Timestamp, - }; - - fn build_test_schema() -> Schema { - Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Varbinary) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("timestamp".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field2".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![0, 1]) - .build() - .unwrap() - } - - #[test] - fn test_schema_encoding() { - let schema = build_test_schema(); - let encoder = SchemaEncoder::default(); - let encoded_schema = encoder - .encode(&schema) - .expect("Should succeed in encoding schema"); - - let decoded_schema = encoder - .decode(&encoded_schema) - .expect("Should succeed in decoding schema"); - - assert_eq!(schema, decoded_schema); - } - - #[test] - fn test_schema() { - let schema = build_test_schema(); - - // Length related test - assert_eq!(4, schema.columns().len()); - assert_eq!(4, schema.num_columns()); - assert_eq!(2, schema.primary_key_indexes.len()); - assert_eq!(1, schema.timestamp_index()); - - // Test key columns - assert_eq!(2, schema.key_columns().len()); - assert_eq!("key1", &schema.key_columns()[0].name); - assert_eq!("timestamp", &schema.key_columns()[1].name); - - // Test normal columns - assert_eq!(2, schema.normal_columns().len()); - assert_eq!("field1", &schema.normal_columns()[0].name); - assert_eq!("field2", &schema.normal_columns()[1].name); - - // Test column_with_name() - let field1 = schema.column_with_name("field1").unwrap(); - assert_eq!(3, field1.id); - assert_eq!("field1", field1.name); - assert!(schema.column_with_name("not exists").is_none()); - - // Test column() - assert_eq!(field1, schema.column(2)); - - // Test arrow schema - let arrow_schema = schema.as_arrow_schema_ref(); - let key1 = arrow_schema.field(0); - assert_eq!("key1", key1.name()); - let field2 = arrow_schema.field(3); - assert_eq!("field2", field2.name()); - - // Test index_of() - assert_eq!(1, schema.index_of("timestamp").unwrap()); - assert!(schema.index_of("not exists").is_none()); - - // Test pb convert - let schema_pb = schema_pb::TableSchema::from(&schema); - let schema_from_pb = Schema::try_from(schema_pb).unwrap(); - assert_eq!(schema, schema_from_pb); - } - - #[test] - fn test_build_unordered() { - let schema = Builder::new() - .auto_increment_column_id(true) - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("key2".to_string(), DatumKind::Varbinary) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field2".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![1, 2]) - .build() - .unwrap(); - - let columns = schema.columns(); - assert_eq!(1, columns[0].id); - assert_eq!("field1", columns[0].name); - assert_eq!(2, columns[1].id); - assert_eq!("key1", columns[1].name); - assert_eq!(3, columns[2].id); - assert_eq!("key2", columns[2].name); - assert_eq!(4, columns[3].id); - assert_eq!("field2", columns[3].name); - } - - #[test] - fn test_name_exists() { - let builder = Builder::new() - .auto_increment_column_id(true) - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap(); - assert!(builder - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .is_err()); - } - - #[test] - fn test_id_exists() { - let builder = Builder::new() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .id(1) - .build() - .expect("should succeed build column schema"), - ) - .unwrap(); - assert!(builder - .add_normal_column( - column_schema::Builder::new("field2".to_string(), DatumKind::Double) - .id(1) - .build() - .expect("should succeed build column schema") - ) - .is_err()); - } - - #[test] - fn test_key_column_type() { - assert!(Builder::new() - .add_key_column( - column_schema::Builder::new("key".to_string(), DatumKind::Double) - .id(1) - .build() - .expect("should succeed build column schema") - ) - .is_err()); - } - - #[test] - fn test_timestamp_key_exists() { - let builder = Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap(); - assert!(builder - .add_key_column( - column_schema::Builder::new("key2".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema") - ) - .is_err()); - } - - #[test] - fn test_multiple_timestamp() { - Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![0]) - .build() - .unwrap(); - } - - #[test] - fn test_missing_timestamp_key() { - let builder = Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Varbinary) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap(); - assert!(builder.primary_key_indexes(vec![0]).build().is_err()); - } - - #[test] - fn test_max_column_id() { - let builder = Builder::new() - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Varbinary) - .id(2) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Timestamp) - .id(5) - .build() - .expect("should succeed build column schema"), - ) - .unwrap(); - - let schema = builder - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key2".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field2".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![0]) - .build() - .unwrap(); - - let columns = schema.columns(); - // Check key1 - assert_eq!("key1", &columns[0].name); - assert_eq!(2, columns[0].id); - // Check field1 - assert_eq!("field1", &columns[1].name); - assert_eq!(5, columns[1].id); - // Check key2 - assert_eq!("key2", &columns[2].name); - assert_eq!(6, columns[2].id); - // Check field2 - assert_eq!("field2", &columns[3].name); - assert_eq!(7, columns[3].id); - } - - fn assert_row_compare(ordering: Ordering, schema: &Schema, row1: &Row, row2: &Row) { - let schema_with_key = schema.to_record_schema_with_key(); - let lhs = RowWithMeta { - row: row1, - schema: &schema_with_key, - }; - let rhs = RowWithMeta { - row: row2, - schema: &schema_with_key, - }; - assert_eq!(ordering, schema.compare_row(&lhs, &rhs)); - } - - #[test] - fn test_compare_row() { - let schema = Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Varbinary) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("key2".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![0, 1]) - .build() - .unwrap(); - - // Test equal - { - let row1 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key1")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(12.5), - ]); - let row2 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key1")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(15.5), - ]); - - assert_row_compare(Ordering::Equal, &schema, &row1, &row2); - } - - // Test first key column less - { - let row1 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key2")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(17.5), - ]); - let row2 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key5")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(17.5), - ]); - - assert_row_compare(Ordering::Less, &schema, &row1, &row2); - } - - // Test second key column less - { - let row1 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key2")), - Datum::Timestamp(Timestamp::new(1002)), - Datum::Double(17.5), - ]); - let row2 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key2")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(17.5), - ]); - - assert_row_compare(Ordering::Less, &schema, &row1, &row2); - } - - // Test first key column greater - { - let row1 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key7")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(17.5), - ]); - let row2 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key5")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(17.5), - ]); - - assert_row_compare(Ordering::Greater, &schema, &row1, &row2); - } - - // Test second key column greater - { - let row1 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key2")), - Datum::Timestamp(Timestamp::new(1007)), - Datum::Double(17.5), - ]); - let row2 = Row::from_datums(vec![ - Datum::Varbinary(Bytes::from_static(b"key2")), - Datum::Timestamp(Timestamp::new(1005)), - Datum::Double(17.5), - ]); - - assert_row_compare(Ordering::Greater, &schema, &row1, &row2); - } - } - - #[test] - fn test_build_from_arrow_schema() { - let schema = Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new(TSID_COLUMN.to_string(), DatumKind::UInt64) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("timestamp".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("value".to_string(), DatumKind::Double) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![0, 1]) - .build() - .expect("should succeed to build schema"); - - let arrow_schema = schema.clone().into_arrow_schema_ref(); - let new_schema = Builder::build_from_arrow_schema(arrow_schema) - .expect("should succeed to build new schema"); - - assert_eq!(schema, new_schema); - } - - #[test] - fn test_indexes_encode_and_decode() { - let idx = Indexes(vec![1, 2, 3]); - assert_eq!("1,2,3", idx.to_string()); - assert_eq!(idx, Indexes::from_str("1,2,3").unwrap()); - - let idx = Indexes(vec![]); - assert_eq!("", idx.to_string()); - assert_eq!(idx, Indexes::from_str("").unwrap()); - } - - #[test] - fn test_recovery_schema_from_pb() { - let columns = [ - ("tsid", schema_pb::DataType::Double, 1, false, false, false), - ("ts", schema_pb::DataType::Timestamp, 2, false, false, false), - ("t1", schema_pb::DataType::String, 3, false, false, false), - ("t2", schema_pb::DataType::String, 4, false, false, false), - ("t3", schema_pb::DataType::String, 5, false, false, false), - ]; - let columns = columns - .into_iter() - .map(|column_schema| schema_pb::ColumnSchema { - name: column_schema.0.to_string(), - data_type: column_schema.1 as i32, - id: column_schema.2, - is_nullable: column_schema.3, - is_tag: column_schema.4, - is_dictionary: column_schema.5, - comment: "".to_string(), - default_value: None, - }) - .collect(); - let pb_schema = schema_pb::TableSchema { - columns, - version: 123, - timestamp_id: 1, - primary_key_ids: vec![5, 4, 2], - }; - - let schema = Schema::try_from(pb_schema).unwrap(); - assert_eq!(schema.primary_key_indexes, vec![4, 3, 1]); - assert_eq!(schema.timestamp_index, 1); - assert_eq!(schema.tsid_index, Some(0)); - assert_eq!(schema.version, 123); - assert_eq!( - vec!["tsid", "ts", "t1", "t2", "t3"], - schema - .column_schemas - .columns - .iter() - .map(|col| col.name.as_str()) - .collect::>(), - ); - } -} diff --git a/src/common_types/src/string.rs b/src/common_types/src/string.rs deleted file mode 100644 index cea10148c4..0000000000 --- a/src/common_types/src/string.rs +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Bytes that can safely cast to str/string. - -use std::{convert::TryFrom, fmt, ops, str}; - -use bytes_ext::Bytes; -use snafu::{Backtrace, ResultExt, Snafu}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Bytes are not valid utf8, err:{}.\nBacktrace:\n{}", source, backtrace))] - FromBytes { - source: std::str::Utf8Error, - backtrace: Backtrace, - }, -} - -pub type Result = std::result::Result; - -/// String using [crate::bytes::Bytes] as storage so it can be cast into `Bytes` -/// and clone like `Bytes`. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] -pub struct StringBytes(Bytes); - -impl StringBytes { - pub fn new() -> StringBytes { - StringBytes(Bytes::new()) - } - - pub const fn from_static(src: &'static str) -> StringBytes { - StringBytes(Bytes::from_static(src.as_bytes())) - } - - pub fn copy_from_str(src: &str) -> StringBytes { - StringBytes(Bytes::copy_from_slice(src.as_bytes())) - } - - /// Create a [StringBytes] from a valid utf bytes. - /// - /// # Safety - /// The caller must ensure `bytes` is valid utf string. - pub unsafe fn from_bytes_unchecked(bytes: Bytes) -> StringBytes { - StringBytes(bytes) - } - - #[inline] - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } - - #[inline] - pub fn as_str(&self) -> &str { - unsafe { str::from_utf8_unchecked(self.as_bytes()) } - } -} - -impl Default for StringBytes { - fn default() -> Self { - Self::new() - } -} - -impl ops::Deref for StringBytes { - type Target = str; - - #[inline] - fn deref(&self) -> &str { - self.as_str() - } -} - -impl AsRef for StringBytes { - #[inline] - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl fmt::Display for StringBytes { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(self.as_str()) - } -} - -impl TryFrom for StringBytes { - type Error = Error; - - fn try_from(bytes: Bytes) -> Result { - str::from_utf8(&bytes).context(FromBytes)?; - - Ok(StringBytes(bytes)) - } -} - -impl From for StringBytes { - fn from(src: String) -> Self { - Self(Bytes::from(src)) - } -} - -impl From<&str> for StringBytes { - fn from(src: &str) -> Self { - Self::copy_from_str(src) - } -} diff --git a/src/common_types/src/table.rs b/src/common_types/src/table.rs deleted file mode 100644 index 00bcce030e..0000000000 --- a/src/common_types/src/table.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub type TableId = u64; -pub type TableName = String; -pub type ShardId = u32; -pub type ShardVersion = u64; -pub const DEFAULT_SHARD_ID: u32 = 0; -pub const DEFAULT_SHARD_VERSION: u64 = 0; diff --git a/src/common_types/src/tests.rs b/src/common_types/src/tests.rs deleted file mode 100644 index 2887206827..0000000000 --- a/src/common_types/src/tests.rs +++ /dev/null @@ -1,448 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use bytes_ext::Bytes; -use sqlparser::ast::{BinaryOperator, Expr, Value}; - -use crate::{ - column_schema, - datum::{Datum, DatumKind}, - projected_schema::{ProjectedSchema, RowProjector}, - record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, - row::{ - contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, - Row, - }, - schema, - schema::{IndexInWriterSchema, Schema, TSID_COLUMN}, - string::StringBytes, - time::Timestamp, -}; - -fn base_schema_builder() -> schema::Builder { - schema::Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Varbinary) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("key2".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .is_nullable(true) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field2".to_string(), DatumKind::String) - .is_nullable(true) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field3".to_string(), DatumKind::Date) - .is_nullable(true) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field4".to_string(), DatumKind::Time) - .is_nullable(true) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .primary_key_indexes(vec![0, 1]) -} - -fn default_value_schema_builder() -> schema::Builder { - schema::Builder::new() - .auto_increment_column_id(true) - .primary_key_indexes(vec![0, 1]) - .add_key_column( - column_schema::Builder::new("key1".to_string(), DatumKind::Varbinary) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("key2".to_string(), DatumKind::Timestamp) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - // The data type of column and its default value will not be the same in most time. - // So we need check if the type coercion is legal and do type coercion when legal. - // In he following, the data type of column is `Int64`, and the type of default value - // expr is `Int64`. So we use this column to cover the test, which has the same type. - column_schema::Builder::new("field1".to_string(), DatumKind::Int64) - .default_value(Some(Expr::Value(Value::Number("10".to_string(), false)))) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - // The data type of column is `UInt32`, and the type of default value expr is `Int64`. - // So we use this column to cover the test, which has different type. - column_schema::Builder::new("field2".to_string(), DatumKind::UInt32) - .default_value(Some(Expr::Value(Value::Number("20".to_string(), false)))) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field3".to_string(), DatumKind::UInt32) - .default_value(Some(Expr::BinaryOp { - left: Box::new(Expr::Value(Value::Number("1".to_string(), false))), - op: BinaryOperator::Plus, - right: Box::new(Expr::Value(Value::Number("2".to_string(), false))), - })) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field4".to_string(), DatumKind::UInt32) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field5".to_string(), DatumKind::UInt32) - .default_value(Some(Expr::BinaryOp { - left: Box::new(Expr::Identifier("field4".into())), - op: BinaryOperator::Plus, - right: Box::new(Expr::Value(Value::Number("2".to_string(), false))), - })) - .build() - .expect("should succeed build column schema"), - ) - .unwrap() -} - -/// Build a schema for testing, which contains 6 columns: -/// - key1(varbinary) -/// - key2(timestamp) -/// - field1(double) -/// - field2(string) -/// - field3(Time) -/// - field4(Date) -pub fn build_schema() -> Schema { - base_schema_builder().build().unwrap() -} - -/// Build a schema for testing: -/// key1(varbinary), key2(timestamp), -/// field1(int64, default 10), -/// field2(uint32, default 20), -/// field3(uint32, default 1 + 2) -/// field4(uint32), -/// field5(uint32, default field4 + 2) -pub fn build_default_value_schema() -> Schema { - default_value_schema_builder().build().unwrap() -} - -/// Build a schema for testing: -/// (key1(varbinary), key2(timestamp), field1(double), field2(string), -/// field3(date), field4(time)) tag1(string dictionary), tag2(string dictionary) -pub fn build_schema_with_dictionary() -> Schema { - let builder = base_schema_builder() - .add_normal_column( - column_schema::Builder::new("tag1".to_string(), DatumKind::String) - .is_tag(true) - .is_dictionary(true) - .is_nullable(true) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag2".to_string(), DatumKind::String) - .is_tag(true) - .is_dictionary(true) - .build() - .unwrap(), - ) - .unwrap(); - - builder.build().unwrap() -} - -/// Build a schema for testing: -/// (tsid(uint64), key2(timestamp), tag1(string), tag2(string), value(int8), -/// field2(float)) -pub fn build_schema_for_cpu() -> Schema { - let builder = schema::Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new(TSID_COLUMN.to_string(), DatumKind::UInt64) - .build() - .unwrap(), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("time".to_string(), DatumKind::Timestamp) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag1".to_string(), DatumKind::String) - .is_tag(true) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag2".to_string(), DatumKind::String) - .is_tag(true) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("value".to_string(), DatumKind::Int8) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field2".to_string(), DatumKind::Float) - .build() - .unwrap(), - ) - .unwrap(); - - builder.primary_key_indexes(vec![0, 1]).build().unwrap() -} - -/// Build a schema for testing: -/// (tsid(uint64), key2(timestamp), tag1(string), tag2(string), value(double), -pub fn build_schema_for_metric() -> Schema { - let builder = schema::Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new(TSID_COLUMN.to_string(), DatumKind::UInt64) - .build() - .unwrap(), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new("timestamp".to_string(), DatumKind::Timestamp) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag1".to_string(), DatumKind::String) - .is_tag(true) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag2".to_string(), DatumKind::String) - .is_tag(true) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("value".to_string(), DatumKind::Double) - .build() - .unwrap(), - ) - .unwrap(); - - builder.primary_key_indexes(vec![0, 1]).build().unwrap() -} - -#[allow(clippy::too_many_arguments)] -pub fn build_row_for_dictionary( - key1: &[u8], - key2: i64, - field1: f64, - field2: &str, - field3: i32, - field4: i64, - tag1: Option<&str>, - tag2: &str, -) -> Row { - let datums = vec![ - Datum::Varbinary(Bytes::copy_from_slice(key1)), - Datum::Timestamp(Timestamp::new(key2)), - Datum::Double(field1), - Datum::String(StringBytes::from(field2)), - Datum::Date(field3), - Datum::Time(field4), - tag1.map(|v| Datum::String(StringBytes::from(v))) - .unwrap_or(Datum::Null), - Datum::String(StringBytes::from(tag2)), - ]; - - Row::from_datums(datums) -} - -pub fn build_row_for_cpu( - tsid: u64, - ts: i64, - tag1: &str, - tag2: &str, - value: i8, - field2: f32, -) -> Row { - let datums = vec![ - Datum::UInt64(tsid), - Datum::Timestamp(Timestamp::new(ts)), - Datum::String(StringBytes::from(tag1)), - Datum::String(StringBytes::from(tag2)), - Datum::Int8(value), - Datum::Float(field2), - ]; - - Row::from_datums(datums) -} - -pub fn build_projected_schema() -> ProjectedSchema { - let schema = build_schema(); - assert!(schema.num_columns() > 1); - let projection: Vec = (0..schema.num_columns() - 1).collect(); - ProjectedSchema::new(schema, Some(projection)).unwrap() -} - -pub fn build_row( - key1: &[u8], - key2: i64, - field1: f64, - field2: &str, - field3: i32, - field4: i64, -) -> Row { - let datums = vec![ - Datum::Varbinary(Bytes::copy_from_slice(key1)), - Datum::Timestamp(Timestamp::new(key2)), - Datum::Double(field1), - Datum::String(StringBytes::from(field2)), - Datum::Date(field3), - Datum::Time(field4), - ]; - - Row::from_datums(datums) -} - -pub fn build_row_opt( - key1: &[u8], - key2: i64, - field1: Option, - field2: Option<&str>, - field3: Option, - field4: Option, -) -> Row { - let datums = vec![ - Datum::Varbinary(Bytes::copy_from_slice(key1)), - Datum::Timestamp(Timestamp::new(key2)), - field1.map(Datum::Double).unwrap_or(Datum::Null), - field2 - .map(|v| Datum::String(StringBytes::from(v))) - .unwrap_or(Datum::Null), - field3.map(Datum::Date).unwrap_or(Datum::Null), - field4.map(Datum::Time).unwrap_or(Datum::Null), - ]; - - Row::from_datums(datums) -} - -pub fn build_rows() -> Vec { - vec![ - build_row(b"binary key", 1000000, 10.0, "string value", 0, 0), - build_row( - b"binary key1", - 1000001, - 11.0, - "string value 1", - 1000, - 1000000, - ), - build_row_opt( - b"binary key2", - 1000002, - None, - Some("string value 2"), - Some(1000), - Some(1000000), - ), - build_row_opt(b"binary key3", 1000003, Some(13.0), None, Some(1000), None), - build_row_opt(b"binary key4", 1000004, None, None, None, Some(1000000)), - ] -} - -pub fn build_fetched_record_batch_by_rows(rows: Vec) -> FetchedRecordBatch { - let schema = build_schema(); - assert!(schema.num_columns() > 1); - let projection: Vec = (0..schema.num_columns() - 1).collect(); - let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let row_projector = - RowProjector::new(&projected_schema.to_record_schema(), None, &schema, &schema).unwrap(); - - let mut builder = - FetchedRecordBatchBuilder::with_capacity(row_projector.fetched_schema().clone(), None, 2); - let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); - - let mut buf = Vec::new(); - for row in rows { - let mut writer = ContiguousRowWriter::new(&mut buf, &schema, &index_in_writer); - - writer.write_row(&row).unwrap(); - - let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &row_projector); - builder - .append_projected_contiguous_row(&projected_row) - .unwrap(); - } - builder.build().unwrap() -} - -pub fn check_record_batch_with_key_with_rows( - record_batch_with_key: &FetchedRecordBatch, - row_num: usize, - column_num: usize, - rows: Vec, -) -> bool { - for (i, row) in rows.iter().enumerate().take(row_num) { - for j in 0..column_num { - let datum = &row[j]; - let datum2 = record_batch_with_key.column(j).datum(i); - - if *datum != datum2 { - return false; - } - } - } - true -} diff --git a/src/common_types/src/time.rs b/src/common_types/src/time.rs deleted file mode 100644 index 3a06d74753..0000000000 --- a/src/common_types/src/time.rs +++ /dev/null @@ -1,411 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Time types - -// TODO(yingwen): Support timezone - -use std::{ - convert::{TryFrom, TryInto}, - time::{self, Duration, SystemTime}, -}; - -use datafusion::{ - prelude::{col, lit, Expr}, - scalar::ScalarValue, -}; -use horaedbproto::time_range; -use snafu::{Backtrace, OptionExt, Snafu}; - -/// Error of time module. -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Invalid time range, start:{}, end:{}", start, end))] - InvalidTimeRange { - start: i64, - end: i64, - backtrace: Backtrace, - }, -} - -/// Unix timestamp type in millis -// Use i64 so we can store timestamp before 1970-01-01 -#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd, Hash)] -pub struct Timestamp(i64); - -impl Timestamp { - pub const MAX: Timestamp = Timestamp(i64::MAX); - pub const MIN: Timestamp = Timestamp(i64::MIN); - pub const ZERO: Timestamp = Timestamp(0); - - pub const fn new(ts: i64) -> Self { - Self(ts) - } - - /// Return current (non-negative) unix timestamp in millis. - pub fn now() -> Self { - SystemTime::now() - .duration_since(time::UNIX_EPOCH) - .map(|duration| { - duration - .as_millis() - .try_into() - .map(Timestamp) - .unwrap_or(Timestamp::MAX) - }) - .unwrap_or(Timestamp::ZERO) - } - - /// Returns the earliest expired timestamp. - #[inline] - pub fn expire_time(ttl: Duration) -> Timestamp { - Timestamp::now().sub_duration_or_min(ttl) - } - - #[inline] - pub fn as_i64(&self) -> i64 { - self.0 - } - - /// Truncate the value of this timestamp by given duration, return that - /// value and keeps current timestamp unchanged. - /// - /// This function won't do overflow check. - #[must_use] - pub fn truncate_by(&self, duration: Duration) -> Self { - let duration_millis = duration.as_millis() as i64; - Timestamp::new(self.0 / duration_millis * duration_millis) - } - - /// Floor the timestamp by the `duration_ms` (in millisecond) and return a - /// new Timestamp instance or None if overflow occurred. - /// - /// The `duration_ms` must be positive - #[inline] - pub fn checked_floor_by_i64(&self, duration_ms: i64) -> Option { - assert!(duration_ms > 0); - let normalized_ts = if self.0 >= 0 { - // self / duration_ms * duration_ms - self.0 - } else { - // (self - (duration_ms - 1)) / duration_ms * duration_ms - self.0.checked_sub(duration_ms - 1)? - }; - - normalized_ts - .checked_div(duration_ms) - .and_then(|v| v.checked_mul(duration_ms)) - .map(Timestamp) - } - - /// Returns the result of this `timestamp + offset_ms`, or None if overflow - /// occurred. - /// - /// The `offset_ms` is in millis resolution - pub fn checked_add_i64(&self, offset_ms: i64) -> Option { - self.0.checked_add(offset_ms).map(Timestamp) - } - - pub fn checked_add(&self, other: Self) -> Option { - self.0.checked_add(other.0).map(Timestamp) - } - - pub fn checked_sub(&self, other: Self) -> Option { - self.0.checked_sub(other.0).map(Timestamp) - } - - /// Returns the result of this `timestamp` - `duration`, or None if overflow - /// occurred. - pub fn checked_sub_duration(&self, duration: Duration) -> Option { - let duration_millis = duration.as_millis().try_into().ok()?; - self.0.checked_sub(duration_millis).map(Timestamp) - } - - /// Return true if the time is expired - pub fn is_expired(&self, expired_time: Timestamp) -> bool { - *self < expired_time - } - - /// Returns the result of this `timestamp` - `duration`, or MIN if overflow - /// occurred. - #[must_use] - pub fn sub_duration_or_min(&self, duration: Duration) -> Timestamp { - self.checked_sub_duration(duration) - .unwrap_or(Timestamp::MIN) - } - - pub fn min(&self, rhs: Timestamp) -> Timestamp { - self.0.min(rhs.0).into() - } - - pub fn max(&self, rhs: Timestamp) -> Timestamp { - self.0.max(rhs.0).into() - } -} - -impl From for i64 { - fn from(timestamp: Timestamp) -> Self { - timestamp.0 - } -} - -impl From for Timestamp { - fn from(ts: i64) -> Self { - Self::new(ts) - } -} - -impl From<&i64> for Timestamp { - fn from(ts: &i64) -> Self { - Self::new(*ts) - } -} - -/// Unix timestamp range in millis -/// -/// The start time is inclusive and the end time is exclusive: [start, end). -/// The range is empty if start equals end. -#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub struct TimeRange { - /// The start timestamp (inclusive) - inclusive_start: Timestamp, - /// The end timestamp (exclusive) - exclusive_end: Timestamp, -} - -impl TimeRange { - /// Create a new time range, returns None if the start/end is invalid - pub fn new(inclusive_start: Timestamp, exclusive_end: Timestamp) -> Option { - if inclusive_start <= exclusive_end { - Some(Self { - inclusive_start, - exclusive_end, - }) - } else { - None - } - } - - /// Create a new time range, panic if the start/end is invalid. - pub fn new_unchecked(inclusive_start: Timestamp, exclusive_end: Timestamp) -> Self { - Self::new(inclusive_start, exclusive_end).unwrap() - } - - #[cfg(any(test, feature = "test"))] - pub fn new_unchecked_for_test(inclusive_start: i64, exclusive_end: i64) -> Self { - Self::new( - Timestamp::new(inclusive_start), - Timestamp::new(exclusive_end), - ) - .unwrap() - } - - /// Create a time range only including the single timestamp. - pub fn from_timestamp(t: Timestamp) -> Self { - // FIXME(xikai): now the time range can not express the `exclusive_end` as - // infinite. - let end = t.checked_add_i64(1).unwrap_or(t); - Self::new(t, end).unwrap() - } - - /// Create a new time range of [0, max) - pub fn min_to_max() -> Self { - Self { - inclusive_start: Timestamp::MIN, - exclusive_end: Timestamp::MAX, - } - } - - /// Create a empty time range. - pub fn empty() -> Self { - Self { - inclusive_start: Timestamp::ZERO, - exclusive_end: Timestamp::ZERO, - } - } - - /// The inclusive start timestamp - #[inline] - pub fn inclusive_start(&self) -> Timestamp { - self.inclusive_start - } - - /// The exclusive end timestamp - #[inline] - pub fn exclusive_end(&self) -> Timestamp { - self.exclusive_end - } - - /// Return the reference to the exclusive end timestamp. - #[inline] - pub fn exclusive_end_ref(&self) -> &Timestamp { - &self.exclusive_end - } - - /// Returns true if the time range contains the given `ts` - #[inline] - pub fn contains(&self, ts: Timestamp) -> bool { - self.inclusive_start <= ts && ts < self.exclusive_end - } - - /// Returns a time bucket with fixed bucket size that the timestamp belongs - /// to. Returns None if overflow occurred, the bucket_duration is greater - /// than [i64::MAX] or not positive. - pub fn bucket_of(timestamp: Timestamp, bucket_duration: Duration) -> Option { - let bucket_duration_ms: i64 = bucket_duration.as_millis().try_into().ok()?; - if bucket_duration_ms <= 0 { - return None; - } - - let inclusive_start = timestamp.checked_floor_by_i64(bucket_duration_ms)?; - // end = start + bucket_duration - let exclusive_end = inclusive_start.checked_add_i64(bucket_duration_ms)?; - - Some(Self { - inclusive_start, - exclusive_end, - }) - } - - /// Returns true if this time range intersect with `other` - pub fn intersect_with(&self, other: TimeRange) -> bool { - !self.not_intersecting(other) - } - - /// Return true if the time range is expired (`exclusive_end_time` < - /// `expire_time`). - pub fn is_expired(&self, expire_time: Option) -> bool { - expire_time.is_some() && self.exclusive_end() <= expire_time.unwrap() - } - - #[inline] - fn not_intersecting(&self, other: TimeRange) -> bool { - other.exclusive_end <= self.inclusive_start || other.inclusive_start >= self.exclusive_end - } - - pub fn intersected_range(&self, other: TimeRange) -> Option { - TimeRange::new( - self.inclusive_start.max(other.inclusive_start), - self.exclusive_end.min(other.exclusive_end), - ) - } - - pub fn merge_range(&self, other: TimeRange) -> TimeRange { - TimeRange { - inclusive_start: self.inclusive_start.min(other.inclusive_start), - exclusive_end: self.exclusive_end.max(other.exclusive_end), - } - } -} - -impl From for time_range::TimeRange { - fn from(src: TimeRange) -> Self { - time_range::TimeRange { - start: src.inclusive_start.as_i64(), - end: src.exclusive_end.as_i64(), - } - } -} - -impl TryFrom for TimeRange { - type Error = Error; - - fn try_from(src: time_range::TimeRange) -> Result { - Self::new(Timestamp::new(src.start), Timestamp::new(src.end)).context(InvalidTimeRange { - start: src.start, - end: src.end, - }) - } -} - -impl TimeRange { - /// Creates expression like: - /// start <= time && time < end - pub fn to_df_expr(&self, column_name: impl AsRef) -> Expr { - let ts_start = ScalarValue::TimestampMillisecond(Some(self.inclusive_start.as_i64()), None); - let ts_end = ScalarValue::TimestampMillisecond(Some(self.exclusive_end.as_i64()), None); - let column_name = column_name.as_ref(); - let ts_low = col(column_name).gt_eq(lit(ts_start)); - let ts_high = col(column_name).lt(lit(ts_end)); - - ts_low.and(ts_high) - } -} - -#[cfg(test)] -mod test { - use std::time::Duration; - - use crate::time::{TimeRange, Timestamp}; - - #[test] - fn test_timestamp() { - // 1637723901000: 2021-11-24 11:18:21 - let timestamp = Timestamp::new(1637723901000); - // 1d - let ttl = Duration::from_secs(24 * 3600); - assert_eq!( - timestamp.sub_duration_or_min(ttl), - Timestamp::new(1637637501000) - ); - assert_eq!(timestamp.truncate_by(ttl), Timestamp::new(1637712000000)); - assert_eq!( - timestamp.checked_floor_by_i64(2000), - Some(Timestamp::new(1637723900000)) - ); - assert_eq!( - timestamp.checked_add_i64(2000), - Some(Timestamp::new(1637723903000)) - ); - assert_eq!( - timestamp.checked_sub_duration(ttl), - Some(Timestamp::new(1637637501000)) - ); - } - - #[test] - fn test_time_range() { - // [100,200) - let time_range = TimeRange::new_unchecked_for_test(100, 200); - assert!(time_range.contains(Timestamp::new(150))); - assert!(time_range.contains(Timestamp::new(100))); - assert!(!time_range.contains(Timestamp::new(200))); - - assert!(!time_range.is_expired(Some(Timestamp::new(50)))); - assert!(time_range.is_expired(Some(Timestamp::new(200)))); - - assert_eq!( - TimeRange::bucket_of(Timestamp::new(100), Duration::from_millis(2)), - Some(TimeRange::new_unchecked_for_test(100, 102)) - ); - - let time_range2 = TimeRange::new_unchecked_for_test(200, 300); - assert!(!time_range.intersect_with(time_range2)); - let time_range3 = TimeRange::new_unchecked_for_test(50, 200); - assert!(time_range.intersect_with(time_range3)); - - assert!(time_range.not_intersecting(time_range2)); - assert!(!time_range.not_intersecting(time_range3)); - } - - #[test] - fn test_bucket_of_negative_timestamp() { - let ts = Timestamp::new(-126316800000); - let range = TimeRange::bucket_of(ts, Duration::from_millis(25920000000)).unwrap(); - assert!(range.contains(ts), "range:{range:?}"); - } -} diff --git a/src/components/alloc_tracker/Cargo.toml b/src/components/alloc_tracker/Cargo.toml deleted file mode 100644 index 714f9e10ef..0000000000 --- a/src/components/alloc_tracker/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "alloc_tracker" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true diff --git a/src/components/alloc_tracker/src/lib.rs b/src/components/alloc_tracker/src/lib.rs deleted file mode 100644 index 9cf7f03682..0000000000 --- a/src/components/alloc_tracker/src/lib.rs +++ /dev/null @@ -1,174 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Alloc tracker - -use std::sync::atomic::{AtomicUsize, Ordering}; - -/// Collect memory usage from tracker, useful for extending the tracker -pub trait Collector { - /// Called when `bytes` bytes memory is allocated and tracked by the tracker - fn on_allocate(&self, bytes: usize); - - /// Called when `bytes` bytes memory is freed and tracked by the tracker - fn on_free(&self, bytes: usize); -} - -/// A tracker to track memory in used -// TODO(yingwen): Impl a thread local or local tracker that are not thread safe, -// and collect statistics into the thread safe one for better performance -pub struct Tracker { - collector: T, - bytes_allocated: AtomicUsize, -} - -impl Tracker { - pub fn new(collector: T) -> Self { - Self { - collector, - bytes_allocated: AtomicUsize::new(0), - } - } - - /// Increase consumption of this tracker by bytes - pub fn consume(&self, bytes: usize) { - self.bytes_allocated.fetch_add(bytes, Ordering::Relaxed); - self.collector.on_allocate(bytes); - } - - /// Decrease consumption of this tracker by bytes - /// - /// The caller should guarantee the released bytes wont larger than bytes - /// already consumed - pub fn release(&self, bytes: usize) { - self.bytes_allocated.fetch_sub(bytes, Ordering::Relaxed); - self.collector.on_free(bytes); - } - - /// Bytes allocated - pub fn bytes_allocated(&self) -> usize { - self.bytes_allocated.load(Ordering::Relaxed) - } -} - -impl Drop for Tracker { - fn drop(&mut self) { - let bytes = *self.bytes_allocated.get_mut(); - self.collector.on_free(bytes); - } -} - -/// The noop collector does nothing on alloc and free -struct NoopCollector; - -impl Collector for NoopCollector { - fn on_allocate(&self, _bytes: usize) {} - - fn on_free(&self, _bytes: usize) {} -} - -/// A simple tracker hides the collector api -pub struct SimpleTracker(Tracker); - -impl Default for SimpleTracker { - fn default() -> Self { - Self(Tracker::new(NoopCollector)) - } -} - -impl SimpleTracker { - /// Increase consumption of this tracker by bytes - #[inline] - pub fn consume(&self, bytes: usize) { - self.0.consume(bytes); - } - - /// Decrease consumption of this tracker by bytes - /// - /// The caller should guarantee the released bytes wont larger than bytes - /// already consumed - #[inline] - pub fn release(&self, bytes: usize) { - self.0.release(bytes); - } - - /// Bytes allocated - pub fn bytes_allocated(&self) -> usize { - self.0.bytes_allocated() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_simple_tracker() { - let tracker = SimpleTracker::default(); - tracker.consume(256); - assert_eq!(256, tracker.bytes_allocated()); - - tracker.release(100); - assert_eq!(156, tracker.bytes_allocated()); - } - - #[test] - fn test_collector() { - use std::sync::atomic::AtomicBool; - - struct MockCollector { - allocated: AtomicBool, - freed: AtomicBool, - } - - impl MockCollector { - fn new() -> Self { - Self { - allocated: AtomicBool::new(false), - freed: AtomicBool::new(false), - } - } - } - - impl Drop for MockCollector { - fn drop(&mut self) { - assert!(*self.allocated.get_mut()); - assert!(*self.freed.get_mut()); - } - } - - impl Collector for MockCollector { - fn on_allocate(&self, bytes: usize) { - assert_eq!(800, bytes); - self.allocated.store(true, Ordering::Relaxed); - } - - fn on_free(&self, bytes: usize) { - if self.freed.load(Ordering::Relaxed) { - assert_eq!(440, bytes); - } else { - assert_eq!(360, bytes); - } - self.freed.store(true, Ordering::Relaxed); - } - } - - let tracker = Tracker::new(MockCollector::new()); - tracker.consume(800); - tracker.release(360); - } -} diff --git a/src/components/arena/Cargo.toml b/src/components/arena/Cargo.toml deleted file mode 100644 index 93a2874b4a..0000000000 --- a/src/components/arena/Cargo.toml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "arena" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] diff --git a/src/components/arena/src/arena_trait.rs b/src/components/arena/src/arena_trait.rs deleted file mode 100644 index b3b95a10dc..0000000000 --- a/src/components/arena/src/arena_trait.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{alloc::Layout, ptr::NonNull, sync::Arc}; - -/// Memory Arena trait. -/// -/// The trait itself provides and enforces no guarantee about alignment. It's -/// implementation's responsibility to cover. -/// -/// All memory-relevant methods (`alloc()` etc.) are not "unsafe". Compare with -/// "deallocate" which is not included in this trait, allocating is more safer -/// and not likely to run into UB. However in fact, playing with raw pointer is -/// always dangerous and needs to be careful for both who implements and uses -/// this trait. -pub trait Arena { - type Stats; - - // required methods - - /// Try to allocate required memory described by layout. Return a pointer of - /// allocated space in success, while `None` if failed. - fn try_alloc(&self, layout: Layout) -> Option>; - - /// Get arena's statistics. - fn stats(&self) -> Self::Stats; - - /// Get arena's block size. - fn block_size(&self) -> usize; - - // provided methods - - /// Allocate required memory. Panic if failed. - fn alloc(&self, layout: Layout) -> NonNull { - self.try_alloc(layout).unwrap() - } -} - -/// Basic statistics of arena. Offers [bytes_allocated] -/// and [bytes_used]. -#[derive(Debug, Clone, Copy)] -pub struct BasicStats { - pub(crate) bytes_allocated: usize, - pub(crate) bytes_used: usize, -} - -impl BasicStats { - /// Total bytes allocated from system. - #[inline] - pub fn bytes_allocated(&self) -> usize { - self.bytes_allocated - } - - /// Total bytes allocated to user. - #[inline] - pub fn bytes_used(&self) -> usize { - self.bytes_used - } -} - -/// Collect memory usage from Arena. -pub trait Collector { - /// Called when `bytes` bytes memory is allocated in arena. - fn on_alloc(&self, bytes: usize); - - /// Called when `bytes` bytes memory is used in arena. - fn on_used(&self, bytes: usize); - - /// Called when `allocated` bytes memory is released, and `used` bytes in - /// it. - fn on_free(&self, used: usize, allocated: usize); -} - -pub type CollectorRef = Arc; diff --git a/src/components/arena/src/lib.rs b/src/components/arena/src/lib.rs deleted file mode 100644 index d6ddc04960..0000000000 --- a/src/components/arena/src/lib.rs +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! `Arena` Trait and implementations. - -mod arena_trait; -mod mono_inc; - -pub use arena_trait::{Arena, BasicStats, Collector, CollectorRef}; -pub use mono_inc::{MonoIncArena, NoopCollector}; diff --git a/src/components/arena/src/mono_inc.rs b/src/components/arena/src/mono_inc.rs deleted file mode 100644 index 6d98aeeb59..0000000000 --- a/src/components/arena/src/mono_inc.rs +++ /dev/null @@ -1,367 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - alloc::{alloc, dealloc, Layout}, - ptr::NonNull, - sync::{Arc, RwLock}, -}; - -use crate::arena_trait::{Arena, BasicStats, Collector, CollectorRef}; - -/// The noop collector does nothing on alloc and free -pub struct NoopCollector; - -impl Collector for NoopCollector { - fn on_alloc(&self, _bytes: usize) {} - - fn on_used(&self, _bytes: usize) {} - - fn on_free(&self, _used: usize, _allocated: usize) {} -} - -const DEFAULT_ALIGN: usize = 8; - -/// A thread-safe arena. All allocated memory is aligned to 8. Organizes its -/// allocated memory as blocks. -#[derive(Clone)] -pub struct MonoIncArena { - core: Arc>, - block_size: usize, -} - -impl MonoIncArena { - pub fn new(block_size: usize) -> Self { - Self { - core: Arc::new(RwLock::new(ArenaCore::new( - block_size, - Arc::new(NoopCollector {}), - ))), - block_size, - } - } - - pub fn with_collector(block_size: usize, collector: CollectorRef) -> Self { - Self { - core: Arc::new(RwLock::new(ArenaCore::new(block_size, collector))), - block_size, - } - } -} - -impl Arena for MonoIncArena { - type Stats = BasicStats; - - fn try_alloc(&self, layout: Layout) -> Option> { - Some(self.core.write().unwrap().alloc(layout)) - } - - fn stats(&self) -> Self::Stats { - self.core.read().unwrap().stats - } - - fn alloc(&self, layout: Layout) -> NonNull { - self.core.write().unwrap().alloc(layout) - } - - fn block_size(&self) -> usize { - self.block_size - } -} - -struct ArenaCore { - collector: CollectorRef, - regular_layout: Layout, - regular_blocks: Vec, - special_blocks: Vec, - stats: BasicStats, -} - -impl ArenaCore { - /// # Safety - /// Required property is tested in debug assertions. - fn new(regular_block_size: usize, collector: CollectorRef) -> Self { - debug_assert_ne!(DEFAULT_ALIGN, 0); - debug_assert_eq!(DEFAULT_ALIGN & (DEFAULT_ALIGN - 1), 0); - // TODO(yingwen): Avoid panic. - let regular_layout = Layout::from_size_align(regular_block_size, DEFAULT_ALIGN).unwrap(); - let regular_blocks = vec![Block::new(regular_layout)]; - let special_blocks = vec![]; - let bytes = regular_layout.size(); - collector.on_alloc(bytes); - - Self { - collector, - regular_layout, - regular_blocks, - special_blocks, - stats: BasicStats { - bytes_allocated: bytes, - bytes_used: 0, - }, - } - } - - /// Input layout will be aligned. - fn alloc(&mut self, layout: Layout) -> NonNull { - let layout = layout - .align_to(self.regular_layout.align()) - .unwrap() - .pad_to_align(); - let bytes = layout.size(); - // TODO(Ruihang): determine threshold - if layout.size() > self.regular_layout.size() { - self.stats.bytes_used += bytes; - self.collector.on_used(bytes); - Self::add_new_block( - layout, - &mut self.special_blocks, - &mut self.stats, - &self.collector, - ); - let block = self.special_blocks.last().unwrap(); - return block.data; - } - - self.stats.bytes_used += bytes; - self.collector.on_used(bytes); - if let Some(ptr) = self.try_alloc(layout) { - ptr - } else { - Self::add_new_block( - self.regular_layout, - &mut self.regular_blocks, - &mut self.stats, - &self.collector, - ); - self.try_alloc(layout).unwrap() - } - } - - /// # Safety - /// `regular_blocks` vector is guaranteed to contains at least one element. - fn try_alloc(&mut self, layout: Layout) -> Option> { - self.regular_blocks.last_mut().unwrap().alloc(layout) - } - - fn add_new_block( - layout: Layout, - container: &mut Vec, - stats: &mut BasicStats, - collector: &CollectorRef, - ) { - let new_block = Block::new(layout); - container.push(new_block); - // Update allocated stats once a new block has been allocated from the system. - stats.bytes_allocated += layout.size(); - collector.on_alloc(layout.size()); - } -} - -impl Drop for ArenaCore { - fn drop(&mut self) { - self.collector - .on_free(self.stats.bytes_used, self.stats.bytes_allocated); - } -} - -struct Block { - data: NonNull, - len: usize, - layout: Layout, -} - -impl Block { - /// Create a new block. Return the pointer of this new block. - /// - /// # Safety - /// See [std::alloc::alloc]. The allocated memory will be deallocated in - /// drop(). - fn new(layout: Layout) -> Block { - let data = unsafe { alloc(layout) }; - - Self { - data: NonNull::new(data).unwrap(), - len: 0, - layout, - } - } - - /// # Safety - /// ## ptr:add() - /// The added offset is checked before. - /// ## NonNull::new_unchecked() - /// `ptr` is added from a NonNull. - fn alloc(&mut self, layout: Layout) -> Option> { - let size = layout.size(); - - if self.len + size <= self.layout.size() { - let ptr = unsafe { self.data.as_ptr().add(self.len) }; - self.len += size; - unsafe { Some(NonNull::new_unchecked(ptr)) } - } else { - None - } - } -} - -impl Drop for Block { - /// Reclaim space pointed by `data`. - fn drop(&mut self) { - unsafe { dealloc(self.data.as_ptr(), self.layout) } - } -} - -unsafe impl Send for Block {} -unsafe impl Sync for Block {} - -#[cfg(test)] -mod test { - use std::{ - mem, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, - }; - - use super::*; - - /// # Safety: - /// Caller should check the input buf has enough space. - fn consume_buf_as_u64_slice(buf: NonNull, n: usize) { - unsafe { - let mut buf = buf.as_ptr() as *mut u64; - for i in 0..n { - *buf = i as u64; - buf = buf.add(1); - } - } - } - - #[test] - fn test_stats() { - let arena = MonoIncArena::new(1024 * 1024); - - // Size is 80 - let layout_slice = Layout::new::<[u64; 10]>().align_to(8).unwrap(); - for _ in 0..20 { - arena.alloc(layout_slice); - } - - assert_eq!(1024 * 1024, arena.stats().bytes_allocated()); - assert_eq!(1600, arena.stats().bytes_used()); - } - - struct MockCollector { - allocated: AtomicUsize, - used: AtomicUsize, - } - - impl Collector for MockCollector { - fn on_alloc(&self, bytes: usize) { - self.allocated.fetch_add(bytes, Ordering::Relaxed); - } - - fn on_used(&self, bytes: usize) { - self.used.fetch_add(bytes, Ordering::Relaxed); - } - - fn on_free(&self, _used: usize, _allocated: usize) {} - } - - #[test] - fn test_collector() { - let collector = Arc::new(MockCollector { - allocated: AtomicUsize::new(0), - used: AtomicUsize::new(0), - }); - - let arena = MonoIncArena::with_collector(1024 * 1024, collector.clone()); - - // Size is 80 - let layout_slice = Layout::new::<[u64; 10]>().align_to(8).unwrap(); - for _ in 0..20 { - arena.alloc(layout_slice); - } - - assert_eq!(1024 * 1024, collector.allocated.load(Ordering::Relaxed)); - assert_eq!(1600, collector.used.load(Ordering::Relaxed)); - } - - #[test] - fn alloc_small_slice() { - let arena = MonoIncArena::new(128); - - let layout_slice = Layout::new::<[u64; 10]>().align_to(8).unwrap(); - for _ in 0..20 { - let buf = arena.alloc(layout_slice); - consume_buf_as_u64_slice(buf, 10); - } - - assert_eq!(2560, arena.stats().bytes_allocated()); - assert_eq!(1600, arena.stats().bytes_used()); - } - - #[test] - fn alloc_huge_slice() { - let arena = MonoIncArena::new(128); - - let layout_slice = Layout::new::<[u64; 20]>().align_to(8).unwrap(); - for _ in 0..20 { - let buf = arena.alloc(layout_slice); - consume_buf_as_u64_slice(buf, 20); - } - - assert_eq!(3328, arena.stats().bytes_allocated()); - assert_eq!(3200, arena.stats().bytes_used()); - } - - #[test] - fn alloc_various_slice() { - let arena = MonoIncArena::new(1024); - const SIZES: [usize; 12] = [10, 200, 30, 1024, 512, 77, 89, 1, 3, 29, 16, 480]; - let total_used: usize = SIZES.iter().map(|v| v * 8).sum(); - - for size in &SIZES { - let layout_slice = Layout::from_size_align(mem::size_of::() * *size, 8).unwrap(); - let buf = arena.alloc(layout_slice); - consume_buf_as_u64_slice(buf, *size); - } - - assert_eq!(20800, arena.stats().bytes_allocated()); - assert_eq!(total_used, arena.stats().bytes_used()); - } - - #[test] - fn unaligned_alloc_request() { - let arena = MonoIncArena::new(1024); - - let regular_req_layout = Layout::from_size_align(mem::size_of::(), 2).unwrap(); - for _ in 0..10 { - let buf = arena.alloc(regular_req_layout).as_ptr() as usize; - assert_eq!(0, buf % DEFAULT_ALIGN); - } - - // 2003 is a prime number and 2004 % 8 != 0 - let special_req_layout = Layout::from_size_align(2003, 2).unwrap(); - for _ in 0..10 { - let buf = arena.alloc(special_req_layout).as_ptr() as usize; - assert_eq!(0, buf % DEFAULT_ALIGN); - } - } -} diff --git a/src/components/arrow_ext/Cargo.toml b/src/components/arrow_ext/Cargo.toml deleted file mode 100644 index 2003fbe971..0000000000 --- a/src/components/arrow_ext/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "arrow_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arrow = { workspace = true } -serde = { workspace = true, features = ["derive"] } -snafu = { workspace = true } -zstd = { workspace = true } diff --git a/src/components/arrow_ext/src/ipc.rs b/src/components/arrow_ext/src/ipc.rs deleted file mode 100644 index 839bc4a0ad..0000000000 --- a/src/components/arrow_ext/src/ipc.rs +++ /dev/null @@ -1,438 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utilities for `RecordBatch` serialization using Arrow IPC - -use std::{borrow::Cow, io::Cursor, sync::Arc}; - -use arrow::{ - datatypes::{DataType, Field, Schema, SchemaRef}, - ipc::{reader::StreamReader, writer::StreamWriter}, - record_batch::RecordBatch, -}; -use serde::{Deserialize, Serialize}; -use snafu::{Backtrace, ResultExt, Snafu}; - -#[derive(Snafu, Debug)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Arrow error, err:{}.\nBacktrace:\n{}", source, backtrace))] - ArrowError { - source: arrow::error::ArrowError, - backtrace: Backtrace, - }, - - #[snafu(display("Zstd decode error, err:{}.\nBacktrace:\n{}", source, backtrace))] - ZstdError { - source: std::io::Error, - backtrace: Backtrace, - }, -} - -type Result = std::result::Result; - -const DEFAULT_COMPRESS_MIN_LENGTH: usize = 80 * 1024; - -#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] -pub enum CompressionMethod { - #[default] - None, - Zstd, -} - -// https://facebook.github.io/zstd/zstd_manual.html -// The lower the level, the faster the speed (at the cost of compression). -const ZSTD_LEVEL: i32 = 3; - -#[derive(Default)] -/// Encoder that can encode a batch of record batches with specific compression -/// options. -pub struct RecordBatchesEncoder { - stream_writer: Option>>, - num_rows: usize, - /// Whether the writer has more than one dict fields, we need to do schema - /// convert. - cached_converted_schema: Option, - compress_opts: CompressOptions, -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] -pub struct CompressOptions { - /// The minimum length of the payload to be compressed. - pub compress_min_length: usize, - pub method: CompressionMethod, -} - -impl Default for CompressOptions { - fn default() -> Self { - Self { - compress_min_length: DEFAULT_COMPRESS_MIN_LENGTH, - method: CompressionMethod::Zstd, - } - } -} - -#[derive(Clone, Default, Debug)] -pub struct CompressOutput { - pub method: CompressionMethod, - pub payload: Vec, -} - -impl CompressOutput { - #[inline] - pub fn no_compression(payload: Vec) -> Self { - Self { - method: CompressionMethod::None, - payload, - } - } -} - -impl CompressOptions { - pub fn maybe_compress(&self, input: Vec) -> Result { - if input.len() < self.compress_min_length { - return Ok(CompressOutput::no_compression(input)); - } - - match self.method { - CompressionMethod::None => Ok(CompressOutput::no_compression(input)), - CompressionMethod::Zstd => { - let payload = zstd::bulk::compress(&input, ZSTD_LEVEL).context(ZstdError)?; - Ok(CompressOutput { - method: CompressionMethod::Zstd, - payload, - }) - } - } - } -} - -impl RecordBatchesEncoder { - pub fn new(compress_opts: CompressOptions) -> Self { - Self { - stream_writer: None, - num_rows: 0, - cached_converted_schema: None, - compress_opts, - } - } - - /// Get the number of rows that have been encoded. - pub fn num_rows(&self) -> usize { - self.num_rows - } - - /// When schema more than one dict fields, it will return a new owned - /// schema, otherwise it just return the origin schema. - /// - /// Workaround for https://github.com/apache/arrow-datafusion/issues/6784 - fn convert_schema(schema: &SchemaRef) -> Cow { - let dict_field_num: usize = schema - .fields() - .iter() - .map(|f| { - if let DataType::Dictionary(_, _) = f.data_type() { - 1 - } else { - 0 - } - }) - .sum(); - if dict_field_num <= 1 { - return Cow::Borrowed(schema); - } - - let new_fields = schema - .fields() - .iter() - .enumerate() - .map(|(i, f)| { - if let DataType::Dictionary(_, _) = f.data_type() { - let dict_id = i as i64; - Arc::new(Field::new_dict( - f.name(), - f.data_type().clone(), - f.is_nullable(), - dict_id, - f.dict_is_ordered().unwrap_or(false), - )) - } else { - f.clone() - } - }) - .collect::>(); - - let schema_ref = Arc::new(Schema::new_with_metadata( - new_fields, - schema.metadata.clone(), - )); - - Cow::Owned(schema_ref) - } - - /// Append one batch into the encoder for encoding. - pub fn write(&mut self, batch: &RecordBatch) -> Result<()> { - let stream_writer = if let Some(v) = &mut self.stream_writer { - v - } else { - let mem_size = batch - .columns() - .iter() - .map(|col| col.get_buffer_memory_size()) - .sum(); - let buffer: Vec = Vec::with_capacity(mem_size); - let schema = batch.schema(); - let schema = Self::convert_schema(&schema); - let stream_writer = StreamWriter::try_new(buffer, &schema).context(ArrowError)?; - if schema.is_owned() { - self.cached_converted_schema = Some(schema.into_owned()); - } - self.stream_writer = Some(stream_writer); - self.stream_writer.as_mut().unwrap() - }; - - if let Some(schema) = &self.cached_converted_schema { - let batch = RecordBatch::try_new(schema.clone(), batch.columns().to_vec()) - .context(ArrowError)?; - stream_writer.write(&batch).context(ArrowError)?; - } else { - stream_writer.write(batch).context(ArrowError)?; - } - self.num_rows += batch.num_rows(); - Ok(()) - } - - /// Finish encoding and generate the final encoded bytes, which may be - /// compressed. - pub fn finish(mut self) -> Result { - let stream_writer = match self.stream_writer.take() { - None => return Ok(CompressOutput::no_compression(Vec::new())), - Some(v) => v, - }; - - let encoded_bytes = stream_writer.into_inner().context(ArrowError)?; - self.compress_opts.maybe_compress(encoded_bytes) - } -} - -/// Encode one record batch with given compression. -pub fn encode_record_batch( - batch: &RecordBatch, - compress_opts: CompressOptions, -) -> Result { - let mut encoder = RecordBatchesEncoder::new(compress_opts); - encoder.write(batch)?; - encoder.finish() -} - -/// Decode multiple record batches from the encoded bytes. -pub fn decode_record_batches( - bytes: Vec, - compression: CompressionMethod, -) -> Result> { - if bytes.is_empty() { - return Ok(Vec::new()); - } - - let bytes = match compression { - CompressionMethod::None => bytes, - CompressionMethod::Zstd => { - zstd::stream::decode_all(Cursor::new(bytes)).context(ZstdError)? - } - }; - - let stream_reader = StreamReader::try_new(Cursor::new(bytes), None).context(ArrowError)?; - stream_reader - .collect::, _>>() - .context(ArrowError) -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use arrow::{ - array::{Int32Array, StringArray, StringDictionaryBuilder}, - datatypes::{DataType, Field, Int32Type, Schema}, - }; - - use super::*; - - fn create_dictionary_record_batch() -> RecordBatch { - let col1 = Field::new_dict( - "dic1", - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - false, - 1, - false, - ); - let col2 = Field::new_dict( - "dic2", - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - true, - 0, - false, - ); - let schema = Schema::new(vec![col1, col2]); - let mut builder = StringDictionaryBuilder::::new(); - builder.append_value("d1"); - builder.append_value("d2"); - let dic1 = builder.finish(); - let mut builder = StringDictionaryBuilder::::new(); - builder.append_null(); - builder.append_value("d3"); - let dic2 = builder.finish(); - - RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dic1), Arc::new(dic2)]).unwrap() - } - - fn create_batch(seed: usize, rows: usize) -> RecordBatch { - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, false), - Field::new("b", DataType::Utf8, false), - Field::new( - "c", - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - false, - ), - Field::new( - "d", - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), - false, - ), - ]); - - let a = Int32Array::from_iter_values(0..rows as i32); - let b = StringArray::from_iter_values((0..rows).map(|i| (i + seed).to_string())); - let c = { - let mut b = StringDictionaryBuilder::::new(); - for i in 0..rows { - b.append_value(((i + seed) % 10).to_string()); - } - b.finish() - }; - let d = { - let mut b = StringDictionaryBuilder::::new(); - for i in 0..rows { - b.append_value(((i + seed) % 20).to_string()); - } - b.finish() - }; - RecordBatch::try_new( - Arc::new(schema), - vec![Arc::new(a), Arc::new(b), Arc::new(c), Arc::new(d)], - ) - .unwrap() - } - - fn ensure_encoding_and_decoding( - input: &RecordBatch, - compress_opts: CompressOptions, - expect_compress_method: CompressionMethod, - ) { - let output = encode_record_batch(input, compress_opts).unwrap(); - assert_eq!(output.method, expect_compress_method); - let decoded_batches = decode_record_batches(output.payload, output.method).unwrap(); - assert_eq!(decoded_batches.len(), 1); - assert_eq!(input, &decoded_batches[0]); - } - - #[test] - fn test_ipc_encode_decode() { - let batch = create_batch(0, 1024); - for compression in [CompressionMethod::None, CompressionMethod::Zstd] { - let compress_opts = CompressOptions { - compress_min_length: 0, - method: compression, - }; - ensure_encoding_and_decoding(&batch, compress_opts, compression); - } - } - - #[test] - fn test_ipc_encode_decode_with_dicitonary_encode() { - let batch = create_dictionary_record_batch(); - for compression in [CompressionMethod::None, CompressionMethod::Zstd] { - let compress_opts = CompressOptions { - compress_min_length: 0, - method: compression, - }; - ensure_encoding_and_decoding(&batch, compress_opts, compression); - } - } - - #[test] - fn test_encode_multiple_record_batches() { - let num_batches = 1000; - let mut batches = Vec::with_capacity(num_batches); - for i in 0..num_batches { - batches.push(create_batch(i, 1024)); - } - - let compress_opts = CompressOptions { - compress_min_length: 0, - method: CompressionMethod::Zstd, - }; - let mut encoder = RecordBatchesEncoder::new(compress_opts); - for batch in &batches { - encoder.write(batch).unwrap(); - } - let output = encoder.finish().unwrap(); - assert_eq!(output.method, CompressionMethod::Zstd); - let decoded_batches = - decode_record_batches(output.payload, CompressionMethod::Zstd).unwrap(); - assert_eq!(decoded_batches, batches); - } - - #[test] - fn test_compression_decision() { - let batch = create_batch(0, 1024); - - { - // Encode the record batch with a large `compress_min_length`, so the output - // should not be compressed. - let compress_opts = CompressOptions { - compress_min_length: 1024 * 1024 * 1024, - method: CompressionMethod::Zstd, - }; - ensure_encoding_and_decoding(&batch, compress_opts, CompressionMethod::None); - } - - { - // Encode the record batch with a small `compress_min_length`, so the output - // should be compressed. - let compress_opts = CompressOptions { - compress_min_length: 10, - method: CompressionMethod::Zstd, - }; - ensure_encoding_and_decoding(&batch, compress_opts, CompressionMethod::Zstd); - } - } - - // Test that encoding without any record batch should not panic. - #[test] - fn test_encode_no_record_batch() { - let compress_opts = CompressOptions { - compress_min_length: 0, - method: CompressionMethod::Zstd, - }; - let encoder = RecordBatchesEncoder::new(compress_opts); - let output = encoder.finish().unwrap(); - assert_eq!(output.method, CompressionMethod::None); - assert!(output.payload.is_empty()); - } -} diff --git a/src/components/arrow_ext/src/lib.rs b/src/components/arrow_ext/src/lib.rs deleted file mode 100644 index 80a5d2ef8f..0000000000 --- a/src/components/arrow_ext/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#![feature(cow_is_borrowed)] -pub mod ipc; -pub mod operation; diff --git a/src/components/arrow_ext/src/operation.rs b/src/components/arrow_ext/src/operation.rs deleted file mode 100644 index a6088c4592..0000000000 --- a/src/components/arrow_ext/src/operation.rs +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! utilities for manipulating arrow/parquet/datafusion data structures. - -use std::convert::TryFrom; - -use arrow::{ - array::UInt32Array, - compute, - error::{ArrowError, Result}, - record_batch::RecordBatch, -}; - -/// Reverse the data in the [`RecordBatch`] by read and copy from the source -/// `batch`. -pub fn reverse_record_batch(batch: &RecordBatch) -> Result { - let reversed_columns = { - let num_rows = u32::try_from(batch.num_rows()).map_err(|e| { - ArrowError::InvalidArgumentError(format!( - "too many rows in a batch, convert usize to u32 failed, num_rows:{}, err:{}", - batch.num_rows(), - e - )) - })?; - // TODO(xikai): avoid this memory allocation. - let indices = UInt32Array::from_iter_values((0..num_rows).rev()); - - let mut cols = Vec::with_capacity(batch.num_columns()); - for orig_col_data in batch.columns() { - let new_col_data = compute::take(orig_col_data.as_ref(), &indices, None)?; - cols.push(new_col_data); - } - - cols - }; - - RecordBatch::try_new(batch.schema(), reversed_columns) -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use arrow::{ - array::Int32Array, - datatypes::{DataType, Field, Schema}, - }; - - use super::*; - - #[test] - fn test_reverse_record_batch() { - let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); - let (ids, reverse_ids) = { - let mut source = vec![1, 2, 3, 4, 5]; - let arr = Int32Array::from(source.clone()); - source.reverse(); - let reversed_arr = Int32Array::from(source); - (arr, reversed_arr) - }; - - let batch = - RecordBatch::try_new(schema.clone(), vec![Arc::new(ids)]).expect("build record batch"); - let expect_reversed_batch = - RecordBatch::try_new(schema, vec![Arc::new(reverse_ids)]).expect("build record batch"); - let reversed_batch = reverse_record_batch(&batch).expect("reverse record batch"); - - assert_eq!(expect_reversed_batch, reversed_batch); - } - - #[test] - fn test_reverse_empty_record_batch() { - let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); - let arr = Int32Array::from(Vec::::new()); - - let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).expect("build record batch"); - let reversed_batch = reverse_record_batch(&batch).expect("reverse record batch"); - - assert_eq!(batch, reversed_batch); - } -} diff --git a/src/components/bytes_ext/Cargo.toml b/src/components/bytes_ext/Cargo.toml deleted file mode 100644 index 24ae1150fb..0000000000 --- a/src/components/bytes_ext/Cargo.toml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "bytes_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -bytes = { workspace = true } -snafu = { workspace = true } diff --git a/src/components/bytes_ext/src/lib.rs b/src/components/bytes_ext/src/lib.rs deleted file mode 100644 index a982ad1e0b..0000000000 --- a/src/components/bytes_ext/src/lib.rs +++ /dev/null @@ -1,311 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Provides utilities for byte arrays. -//! -//! Use Bytes instead of Vec. Currently just re-export bytes crate. - -// Should not use bytes crate outside of this mod so we can replace the actual -// implementations if needed. -pub use bytes::{Buf, BufMut, Bytes, BytesMut}; -use snafu::{ensure, Backtrace, Snafu}; - -/// Error of MemBuf/MemBufMut. -/// -/// We do not use `std::io::Error` because it is too large. -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Failed to fill whole buffer.\nBacktrace:\n{}", backtrace))] - UnexpectedEof { backtrace: Backtrace }, - - #[snafu(display("Failed to write whole buffer.\nBacktrace:\n{}", backtrace))] - WouldOverflow { backtrace: Backtrace }, -} - -pub type Result = std::result::Result; - -/// Now is just an alias to `Vec`, prefer to use this alias instead of -/// `Vec` -pub type ByteVec = Vec; - -/// Read bytes from a buffer. -/// -/// Unlike [`bytes::Buf`], failed read operations will throw error rather than -/// panic. -pub trait SafeBuf { - /// Copy bytes from self into dst. - /// - /// The cursor is advanced by the number of bytes copied. - /// - /// Returns error if self does not have enough remaining bytes to fill dst. - fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<()>; - - /// Advance the internal cursor of the Buf - /// - /// Returns error if the `cnt > self.remaining()`. Note the `remaining` - /// method is provided by [`bytes::Buf`]. - fn try_advance(&mut self, cnt: usize) -> Result<()>; - - /// Gets an unsigned 8 bit integer from self and advance current position - /// - /// Returns error if the capacity is not enough - fn try_get_u8(&mut self) -> Result { - let mut buf = [0; 1]; - self.try_copy_to_slice(&mut buf)?; - Ok(buf[0]) - } - - /// Gets an unsigned 32 bit integer from self in big-endian byte order and - /// advance current position - /// - /// Returns error if the capacity is not enough - fn try_get_u32(&mut self) -> Result { - let mut buf = [0; 4]; - self.try_copy_to_slice(&mut buf)?; - Ok(u32::from_be_bytes(buf)) - } - - /// Gets an unsigned 64 bit integer from self in big-endian byte order and - /// advance current position - /// - /// Returns error if the capacity is not enough - fn try_get_u64(&mut self) -> Result { - let mut buf = [0; 8]; - self.try_copy_to_slice(&mut buf)?; - Ok(u64::from_be_bytes(buf)) - } - - fn try_get_f64(&mut self) -> Result { - let mut buf = [0; 8]; - self.try_copy_to_slice(&mut buf)?; - Ok(f64::from_be_bytes(buf)) - } - - fn try_get_f32(&mut self) -> Result { - let mut buf = [0; 4]; - self.try_copy_to_slice(&mut buf)?; - Ok(f32::from_be_bytes(buf)) - } -} - -/// Write bytes to a buffer. -/// -/// Unlike [`bytes::BufMut`], failed write operations will throw error rather -/// than panic. -pub trait SafeBufMut { - /// Write bytes into self from src, advance the buffer position - /// - /// Returns error if the capacity is not enough - fn try_put(&mut self, src: &[u8]) -> Result<()>; - - /// Write an unsigned 8 bit integer to self, advance the buffer position - /// - /// Returns error if the capacity is not enough - fn try_put_u8(&mut self, n: u8) -> Result<()> { - let src = [n]; - self.try_put(&src) - } - - /// Writes an unsigned 32 bit integer to self in the big-endian byte order, - /// advance the buffer position - /// - /// Returns error if the capacity is not enough - fn try_put_u32(&mut self, n: u32) -> Result<()> { - self.try_put(&n.to_be_bytes()) - } - - /// Writes an unsigned 64 bit integer to self in the big-endian byte order, - /// advance the buffer position - /// - /// Returns error if the capacity is not enough - fn try_put_u64(&mut self, n: u64) -> Result<()> { - self.try_put(&n.to_be_bytes()) - } - - /// Writes an float 64 to self in the big-endian byte order, - /// advance the buffer position - /// - /// Returns error if the capacity is not enough - fn try_put_f64(&mut self, n: f64) -> Result<()> { - self.try_put(&n.to_be_bytes()) - } - - /// Writes an float 32 to self in the big-endian byte order, - /// advance the buffer position - /// - /// Returns error if the capacity is not enough - fn try_put_f32(&mut self, n: f32) -> Result<()> { - self.try_put(&n.to_be_bytes()) - } -} - -impl SafeBufMut for T -where - T: BufMut, -{ - fn try_put(&mut self, src: &[u8]) -> Result<()> { - ensure!(self.remaining_mut() >= src.len(), WouldOverflow); - self.put(src); - - Ok(()) - } -} - -impl SafeBuf for T -where - T: Buf, -{ - fn try_advance(&mut self, cnt: usize) -> Result<()> { - ensure!(self.remaining() >= cnt, UnexpectedEof); - self.advance(cnt); - - Ok(()) - } - - fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<()> { - ensure!(self.remaining() >= dst.len(), UnexpectedEof); - self.copy_to_slice(dst); - - Ok(()) - } -} - -/// The wrapper on the [`BufMut`] for implementing [`std::io::Write`]. -pub struct WriterOnBufMut<'a, B: BufMut> { - pub buf: &'a mut B, -} - -impl<'a, B> std::io::Write for WriterOnBufMut<'a, B> -where - B: BufMut, -{ - fn write(&mut self, buf: &[u8]) -> std::io::Result { - self.buf.put_slice(buf); - - Ok(buf.len()) - } - - fn flush(&mut self) -> std::io::Result<()> { - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_bytes_mut_mem_buf() { - let hello = b"hello"; - let mut buffer = BytesMut::new(); - buffer.try_put_u8(8).unwrap(); - buffer.try_put_u64(u64::MAX - 5).unwrap(); - buffer.try_put(hello).unwrap(); - - assert_eq!(&buffer, buffer.chunk()); - assert_eq!(8, buffer.try_get_u8().unwrap()); - assert_eq!(u64::MAX - 5, buffer.try_get_u64().unwrap()); - let mut dst = [0; 5]; - buffer.try_copy_to_slice(&mut dst).unwrap(); - assert_eq!(hello, &dst); - - assert!(buffer.chunk().is_empty()); - } - - #[test] - fn test_bytes_mut_empty() { - let mut buffer = BytesMut::new(); - assert!(buffer.chunk().is_empty()); - assert!(matches!( - buffer.try_get_u8(), - Err(Error::UnexpectedEof { .. }) - )); - assert!(matches!( - buffer.try_get_u64(), - Err(Error::UnexpectedEof { .. }) - )); - } - - #[test] - fn test_bytes_mem_buf() { - let mut buffer = Bytes::from_static(b"hello world"); - assert_eq!(b"hello world", buffer.chunk()); - - let mut dst = [0; 5]; - buffer.try_copy_to_slice(&mut dst).unwrap(); - assert_eq!(b"hello", &dst); - - assert_eq!(b" world", buffer.chunk()); - buffer.advance(1); - assert_eq!(b"world", buffer.chunk()); - - let mut dst = [0; 50]; - assert!(matches!( - buffer.try_copy_to_slice(&mut dst), - Err(Error::UnexpectedEof { .. }) - )); - } - - #[test] - fn test_slice_mem_buf() { - let hello = b"hello world"; - let mut buf = &hello[..]; - - assert_eq!(hello, buf.chunk()); - let mut dst = [0; 6]; - buf.try_copy_to_slice(&mut dst).unwrap(); - assert_eq!(b"hello ", &dst); - assert_eq!(b"world", buf.chunk()); - - buf.advance(1); - assert_eq!(b"orld", buf.chunk()); - } - - #[test] - fn test_slice_mem_buf_mut() { - let mut dst = [b'x'; 11]; - { - let mut buf = &mut dst[..]; - - buf.try_put(b"abcde").unwrap(); - assert_eq!(b"abcdexxxxxx", &dst); - } - - { - let mut buf = &mut dst[..]; - - buf.try_put(b"hello").unwrap(); - buf.try_put(b" world").unwrap(); - assert_eq!(b"hello world", &dst); - } - - let mut dst = [0; 3]; - let mut buf = &mut dst[..]; - assert!(matches!( - buf.try_put(b"a long long long slice"), - Err(Error::WouldOverflow { .. }) - )); - } - - #[test] - fn test_vec_mem_buf_mut() { - let mut buf = Vec::new(); - buf.try_put(b"hello").unwrap(); - assert_eq!(b"hello", &buf[..]); - } -} diff --git a/src/components/codec/Cargo.toml b/src/components/codec/Cargo.toml deleted file mode 100644 index 51066acd82..0000000000 --- a/src/components/codec/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "codec" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -# In alphabetical order -bytes_ext = { workspace = true } -common_types = { workspace = true, features = ["test"] } -lz4_flex = { workspace = true } -macros = { workspace = true } -snafu = { workspace = true } diff --git a/src/components/codec/src/columnar/bool.rs b/src/components/codec/src/columnar/bool.rs deleted file mode 100644 index 87d9926a39..0000000000 --- a/src/components/codec/src/columnar/bool.rs +++ /dev/null @@ -1,248 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use bytes_ext::{Buf, BufMut}; -use common_types::row::bitset::{BitSet, OneByteBitSet, RoBitSet}; -use snafu::{ensure, OptionExt}; - -use super::{ - DecodeContext, InvalidBooleanValue, InvalidCompression, Result, ValuesDecoder, - ValuesDecoderImpl, ValuesEncoder, ValuesEncoderImpl, -}; -use crate::columnar::{InvalidBitSetBuf, InvalidVersion, NotEnoughBytes}; - -/// The layout for the boolean columnar encoding: -/// ```plaintext -/// +-------------+-----------------+------------+-----------------+ -/// | version(u8) | num_values(u32) | data_block | compression(u8) | -/// +-------------+-----------------+------------+-----------------+ -/// ``` -/// Notes: -/// - If the data_block is too long, it will be compressed as bit set. -/// - The `num_values` field is optional, and it is only needed when compression -/// is enabled. -struct Encoding; - -/// The compression for [`Encoding`]. -/// -/// It is not allowed to be modified and only allowed to be appended with a new -/// variant. -#[derive(Clone, Copy, Default)] -#[repr(C)] -enum Compression { - #[default] - None = 0, - BitSet = 1, -} - -impl Encoding { - const COMPRESSION_SIZE: usize = 1; - /// The overhead for compression is 4B, so it is not good to always enable - /// the compression. - const COMPRESS_THRESHOLD: usize = 10; - const NUM_VALUES_SIZE: usize = 4; - const VERSION: u8 = 0; - const VERSION_SIZE: usize = 1; - - fn need_compress(num_values: usize) -> bool { - num_values > Self::COMPRESS_THRESHOLD - } - - fn decode_compression(flag: u8) -> Result { - let compression = match flag { - 0 => Compression::None, - 1 => Compression::BitSet, - _ => InvalidCompression { flag }.fail()?, - }; - - Ok(compression) - } - - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator + Clone, - { - buf.put_u8(Self::VERSION); - - let num_values = values.clone().count(); - if Self::need_compress(num_values) { - Self::encode_with_compression(buf, num_values, values) - } else { - Self::encode_without_compression(buf, values) - } - } - - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - let num_values = values.count(); - if Self::need_compress(num_values) { - BitSet::num_bytes(num_values) - + Self::COMPRESSION_SIZE - + Self::NUM_VALUES_SIZE - + Self::VERSION_SIZE - } else { - num_values + Self::VERSION_SIZE + Self::COMPRESSION_SIZE - } - } - - fn decode(&self, buf: &B, f: F) -> Result<()> - where - B: Buf, - F: FnMut(bool) -> Result<()>, - { - let buf = buf.chunk(); - ensure!( - buf.len() > Self::VERSION_SIZE + Self::COMPRESSION_SIZE, - NotEnoughBytes { len: buf.len() } - ); - - // Decode the version. - let version = buf[0]; - ensure!(version == Self::VERSION, InvalidVersion { version }); - - // Decode the compression. - let compression_index = buf.len() - 1; - match Self::decode_compression(buf[compression_index])? { - Compression::None => Self::decode_without_compression(buf, f)?, - Compression::BitSet => Self::decode_with_compression(buf, f)?, - } - - Ok(()) - } - - fn encode_without_compression(buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator, - { - for v in values { - buf.put_u8(v as u8); - } - - buf.put_u8(Compression::None as u8); - - Ok(()) - } - - fn decode_without_compression(buf: &[u8], mut f: F) -> Result<()> - where - F: FnMut(bool) -> Result<()>, - { - let data_block_start = Self::VERSION_SIZE; - let data_block_end = buf.len() - Self::COMPRESSION_SIZE; - let data_block = &buf[data_block_start..data_block_end]; - for v in data_block { - match *v { - 0 => f(false), - 1 => f(true), - _ => InvalidBooleanValue { value: *v }.fail(), - }? - } - - Ok(()) - } - - fn encode_with_compression(buf: &mut B, num_values: usize, values: I) -> Result<()> - where - B: BufMut, - I: Iterator, - { - buf.put_u32(num_values as u32); - - let mut one_byte_bits = [false; 8]; - let mut offset = 0; - for v in values { - one_byte_bits[offset] = v; - offset += 1; - if offset == 8 { - let bit_set = OneByteBitSet::from_slice(&one_byte_bits); - buf.put_u8(bit_set.0); - - // Reset the offset and the bits buf. - offset = 0; - one_byte_bits = [false; 8]; - } - } - - // Put the remaining bits. - if offset > 0 { - let bit_set = OneByteBitSet::from_slice(&one_byte_bits); - buf.put_u8(bit_set.0); - } - - buf.put_u8(Compression::BitSet as u8); - Ok(()) - } - - fn decode_with_compression(buf: &[u8], mut f: F) -> Result<()> - where - F: FnMut(bool) -> Result<()>, - { - let expected_len = Self::VERSION_SIZE + Self::NUM_VALUES_SIZE + Self::COMPRESSION_SIZE; - ensure!(buf.len() >= expected_len, NotEnoughBytes { len: buf.len() }); - - let bit_set_start = Self::VERSION_SIZE + Self::NUM_VALUES_SIZE; - let num_values = { - let mut num_buf = &buf[Self::VERSION_SIZE..bit_set_start]; - num_buf.get_u32() as usize - }; - - let bit_set_end = buf.len() - Self::COMPRESSION_SIZE; - let bit_set_buf = &buf[bit_set_start..bit_set_end]; - let bit_set = RoBitSet::try_new(bit_set_buf, num_values).context(InvalidBitSetBuf)?; - - for i in 0..num_values { - if bit_set.is_set(i).context(InvalidBitSetBuf)? { - f(true) - } else { - f(false) - }? - } - - Ok(()) - } -} - -impl ValuesEncoder for ValuesEncoderImpl { - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator + Clone, - { - Encoding.encode(buf, values) - } - - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - Encoding.estimated_encoded_size(values) - } -} - -impl ValuesDecoder for ValuesDecoderImpl { - fn decode(&self, _ctx: DecodeContext<'_>, buf: &mut B, f: F) -> Result<()> - where - B: Buf, - F: FnMut(bool) -> Result<()>, - { - Encoding.decode(buf, f) - } -} diff --git a/src/components/codec/src/columnar/bytes.rs b/src/components/codec/src/columnar/bytes.rs deleted file mode 100644 index b77b601dba..0000000000 --- a/src/components/codec/src/columnar/bytes.rs +++ /dev/null @@ -1,265 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::{Read, Write}; - -use bytes_ext::{Buf, BufMut, Bytes, WriterOnBufMut}; -use lz4_flex::frame::{FrameDecoder as Lz4Decoder, FrameEncoder as Lz4Encoder}; -use snafu::{ensure, ResultExt}; - -use crate::{ - columnar::{ - Compress, DecodeContext, Decompress, InvalidCompression, InvalidVersion, NotEnoughBytes, - Result, ValuesDecoder, ValuesDecoderImpl, ValuesEncoder, ValuesEncoderImpl, Varint, - }, - varint, -}; - -/// The layout for the string/bytes: -/// ```plaintext -/// +-------------+--------------+------------+-----------------------+-----------------+ -/// | version(u8) | length_block | data_block | length_block_len(u32) | compression(u8) | -/// +-------------+--------------+------------+-----------------------+-----------------+ -/// ``` -/// -/// Currently, the `compression` has two optional values: -/// - 0: No compression over the data block -/// - 1: the data block will be compressed if it is too long -/// -/// And the lengths in the `length block` are encoded in varint. -/// And the reason to put `length_block_len` and `compression` at the footer is -/// to avoid one more loop when encoding. -struct Encoding; - -impl Encoding { - const COMPRESSION_SIZE: usize = 1; - const LENGTH_BLOCK_LEN_SIZE: usize = 4; - const VERSION: u8 = 0; - const VERSION_SIZE: usize = 1; - - fn decide_compression(data_block_len: usize, threshold: usize) -> Compression { - if data_block_len > threshold { - Compression::Lz4 - } else { - Compression::None - } - } - - fn decode_compression(&self, v: u8) -> Result { - let compression = match v { - 0 => Compression::None, - 1 => Compression::Lz4, - _ => InvalidCompression { flag: v }.fail()?, - }; - - Ok(compression) - } - - fn encode<'a, B, I>( - &self, - buf: &mut B, - values: I, - data_block_compress_threshold: usize, - ) -> Result<()> - where - B: BufMut, - I: Iterator + Clone, - { - // Encode the `version`. - buf.put_u8(Self::VERSION); - - // Encode the `length_block`. - let mut data_block_len = 0; - let mut length_block_len = 0; - for v in values.clone() { - data_block_len += v.len(); - let sz = varint::encode_uvarint(buf, v.len() as u64).context(Varint)?; - length_block_len += sz; - } - assert!(length_block_len < u32::MAX as usize); - - // Encode the `data_block`. - let compression = Self::decide_compression(data_block_len, data_block_compress_threshold); - match compression { - Compression::None => { - for v in values { - buf.put_slice(v); - } - } - Compression::Lz4 => self - .encode_with_compression(buf, values) - .context(Compress)?, - } - - // Encode the `data_block` offset. - buf.put_u32(length_block_len as u32); - buf.put_u8(compression as u8); - - Ok(()) - } - - fn estimated_encoded_size<'a, I>(&self, values: I) -> usize - where - I: Iterator, - { - let mut total_bytes = - Self::VERSION_SIZE + Self::LENGTH_BLOCK_LEN_SIZE + Self::COMPRESSION_SIZE; - - for v in values { - // The length of `v` should be ensured to be smaller than [u32::MAX], that is to - // say, at most 5 bytes will be used when do varint encoding over a u32 number. - total_bytes += 5 + v.len(); - } - total_bytes - } - - /// The layout can be referred to the docs of [`Encoding`]. - fn decode(&self, ctx: DecodeContext<'_>, buf: &B, f: F) -> Result<()> - where - B: Buf, - F: FnMut(Bytes) -> Result<()>, - { - let chunk = buf.chunk(); - let footer_len = Self::LENGTH_BLOCK_LEN_SIZE + Self::COMPRESSION_SIZE; - ensure!( - chunk.len() > footer_len + Self::VERSION_SIZE, - NotEnoughBytes { - len: footer_len + Self::VERSION_SIZE - } - ); - - // Read and check the version. - let version = chunk[0]; - ensure!(version == Self::VERSION, InvalidVersion { version }); - - // Read and decode the compression flag. - let compression_offset = chunk.len() - Self::COMPRESSION_SIZE; - let compression = self.decode_compression(chunk[compression_offset])?; - - // Extract the `length_block` and `data_block` for decoding. - let length_block_len_offset = chunk.len() - footer_len; - let length_block_end = { - let mut len_buf = &chunk[length_block_len_offset..compression_offset]; - len_buf.get_u32() as usize + Self::VERSION_SIZE - }; - let mut length_block = &chunk[Self::VERSION_SIZE..length_block_end]; - let data_block = &chunk[length_block_end..length_block_len_offset]; - - match compression { - Compression::None => self.decode_without_compression(&mut length_block, data_block, f), - Compression::Lz4 => self.decode_with_compression(length_block, data_block, ctx.buf, f), - } - } - - /// Encode the values into the `buf`, and the compress the encoded payload. - fn encode_with_compression<'a, B, I>(&self, buf: &mut B, values: I) -> std::io::Result<()> - where - B: BufMut, - I: Iterator, - { - let writer = WriterOnBufMut { buf }; - let mut enc = Lz4Encoder::new(writer); - for v in values { - enc.write_all(v)?; - } - enc.finish()?; - - Ok(()) - } - - /// Decode the uncompressed data block. - fn decode_without_compression( - &self, - length_block_buf: &mut B, - data_block_buf: &[u8], - mut f: F, - ) -> Result<()> - where - B: Buf, - F: FnMut(Bytes) -> Result<()>, - { - let mut offset = 0; - while length_block_buf.remaining() > 0 { - let length = varint::decode_uvarint(length_block_buf).context(Varint)? as usize; - let b = Bytes::copy_from_slice(&data_block_buf[offset..offset + length]); - f(b)?; - offset += length; - } - - Ok(()) - } - - /// Decode the compressed data block. - fn decode_with_compression( - &self, - mut length_block_buf: &[u8], - compressed_data_block_buf: &[u8], - reused_buf: &mut Vec, - f: F, - ) -> Result<()> - where - F: FnMut(Bytes) -> Result<()>, - { - let mut decoder = Lz4Decoder::new(compressed_data_block_buf); - decoder.read_to_end(reused_buf).context(Decompress)?; - self.decode_without_compression(&mut length_block_buf, &reused_buf[..], f) - } -} - -/// The compression for [`Encoding`]. -/// -/// It is not allowed to be modified and only allowed to be appended with a new -/// variant. -#[derive(Clone, Copy, Default)] -#[repr(C)] -enum Compression { - #[default] - None = 0, - Lz4 = 1, -} - -impl<'a> ValuesEncoder<&'a [u8]> for ValuesEncoderImpl { - /// The layout can be referred to the docs of [`Encoding`]. - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator + Clone, - { - let encoding = Encoding; - encoding.encode(buf, values, self.bytes_compress_threshold) - } - - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - let encoding = Encoding; - encoding.estimated_encoded_size(values) - } -} - -impl ValuesDecoder for ValuesDecoderImpl { - /// The layout can be referred to the docs of [`Encoding`]. - fn decode(&self, ctx: DecodeContext<'_>, buf: &mut B, f: F) -> Result<()> - where - B: Buf, - F: FnMut(Bytes) -> Result<()>, - { - let encoding = Encoding; - encoding.decode(ctx, buf, f) - } -} diff --git a/src/components/codec/src/columnar/mod.rs b/src/components/codec/src/columnar/mod.rs deleted file mode 100644 index bc20c217b3..0000000000 --- a/src/components/codec/src/columnar/mod.rs +++ /dev/null @@ -1,840 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Datum encoding in columnar way. -//! -//! Notice: The encoded results may be used in persisting, so the compatibility -//! must be taken considerations into. - -use bytes_ext::{Buf, BufMut, Bytes}; -use common_types::{ - column_schema::ColumnId, - datum::{Datum, DatumKind, DatumView}, - row::bitset::{BitSet, RoBitSet}, - string::StringBytes, - time::Timestamp, -}; -use macros::define_result; -use snafu::{self, ensure, Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::varint; - -mod bool; -mod bytes; -mod number; -mod timestamp; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Invalid version:{version}.\nBacktrace:\n{backtrace}"))] - InvalidVersion { version: u8, backtrace: Backtrace }, - - #[snafu(display("Invalid compression flag:{flag}.\nBacktrace:\n{backtrace}"))] - InvalidCompression { flag: u8, backtrace: Backtrace }, - - #[snafu(display("Invalid boolean value:{value}.\nBacktrace:\n{backtrace}"))] - InvalidBooleanValue { value: u8, backtrace: Backtrace }, - - #[snafu(display("Invalid datum kind, err:{source}"))] - InvalidDatumKind { source: common_types::datum::Error }, - - #[snafu(display("No enough bytes to compose the bit set.\nBacktrace:\n{backtrace}"))] - InvalidBitSetBuf { backtrace: Backtrace }, - - #[snafu(display( - "Datums is not enough, expect:{expect}, found:{found}.\nBacktrace:\n{backtrace}" - ))] - NotEnoughDatums { - expect: usize, - found: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to varint, err:{source}"))] - Varint { source: varint::Error }, - - #[snafu(display("Failed to do compression, err:{source}.\nBacktrace:\n{backtrace}"))] - Compress { - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to decompress, err:{source}.\nBacktrace:\n{backtrace}"))] - Decompress { - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to do compact encoding, err:{source}"))] - CompactEncode { source: crate::compact::Error }, - - #[snafu(display("Too long bytes, length:{num_bytes}.\nBacktrace:\n{backtrace}"))] - TooLongBytes { - num_bytes: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Bytes is not enough, length:{len}.\nBacktrace:\n{backtrace}"))] - NotEnoughBytes { len: usize, backtrace: Backtrace }, - - #[snafu(display("Number operation overflowed, msg:{msg}.\nBacktrace:\n{backtrace}"))] - Overflow { msg: String, backtrace: Backtrace }, -} - -define_result!(Error); - -/// The trait bound on the encoders for different types. -trait ValuesEncoder { - /// Encode a batch of values into the `buf`. - /// - /// As the `estimated_encoded_size` method is provided, the `buf` should be - /// pre-allocate. - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator + Clone; - - /// The estimated size for memory pre-allocated. - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - let (lower, higher) = values.size_hint(); - let num = lower.max(higher.unwrap_or_default()); - num * std::mem::size_of::() - } -} - -/// The decode context for decoding column. -pub struct DecodeContext<'a> { - /// Buffer for reuse during decoding. - pub buf: &'a mut Vec, -} - -/// The trait bound on the decoders for different types. -trait ValuesDecoder { - fn decode(&self, ctx: DecodeContext<'_>, buf: &mut B, f: F) -> Result<()> - where - B: Buf, - F: FnMut(T) -> Result<()>; -} - -#[derive(Debug, Default)] -/// The implementation for [`ValuesEncoder`]. -struct ValuesEncoderImpl { - bytes_compress_threshold: usize, -} - -/// The implementation for [`ValuesDecoder`]. -struct ValuesDecoderImpl; - -#[derive(Clone, Debug)] -pub struct ColumnarEncoder { - column_id: ColumnId, - bytes_compress_threshold: usize, -} - -/// A hint helps column encoding. -pub struct EncodeHint { - pub num_nulls: Option, - pub num_datums: Option, - pub datum_kind: DatumKind, -} - -impl EncodeHint { - fn compute_num_nulls<'a, I>(&mut self, datums: &I) -> usize - where - I: Iterator> + Clone, - { - if let Some(v) = self.num_nulls { - v - } else { - let num_nulls = datums.clone().filter(|v| v.is_null()).count(); - self.num_nulls = Some(num_nulls); - num_nulls - } - } - - fn compute_num_datums<'a, I>(&mut self, datums: &I) -> usize - where - I: Iterator> + Clone, - { - if let Some(v) = self.num_datums { - v - } else { - let num_datums = datums.clone().count(); - self.num_datums = Some(num_datums); - num_datums - } - } -} - -impl ColumnarEncoder { - const VERSION: u8 = 0; - - pub fn new(column_id: ColumnId, bytes_compress_threshold: usize) -> Self { - Self { - column_id, - bytes_compress_threshold, - } - } - - /// The header includes `version`, `datum_kind`, `column_id`, `num_datums` - /// and `num_nulls`. - /// - /// Refer to the [encode](ColumnarEncoder::encode) method. - #[inline] - const fn header_size() -> usize { - 1 + 1 + 4 + 4 + 4 - } - - /// The layout of the final serialized bytes: - /// ```plaintext - /// +-------------+----------------+-----------------+-----------------+----------------+---------------+---------------------+ - /// | version(u8) | datum_kind(u8) | column_id(u32) | num_datums(u32) | num_nulls(u32) | nulls_bit_set | non-null data block | - /// +-------------+----------------+-----------------+-----------------+----------------+---------------+---------------------+ - /// ``` - /// Note: - /// 1. The `num_nulls`, `nulls_bit_set` and `non-null data block` will not - /// exist if the kind of datum is null; - /// 2. The `nulls_bit_set` will not exist if the `num_nulls` is zero; - /// 3. The `nulls_bit_set` and `non-null data block` will not exist if the - /// `num_nulls` equals the `num_datums`; - pub fn encode<'a, I, B>(&self, buf: &mut B, datums: I, hint: &mut EncodeHint) -> Result<()> - where - I: Iterator> + Clone, - B: BufMut, - { - buf.put_u8(Self::VERSION); - buf.put_u8(hint.datum_kind.into_u8()); - buf.put_u32(self.column_id); - let num_datums = hint.compute_num_datums(&datums); - assert!(num_datums < u32::MAX as usize); - buf.put_u32(num_datums as u32); - - // For null datum, there is no more data to put. - if matches!(hint.datum_kind, DatumKind::Null) { - return Ok(()); - } - - let num_nulls = hint.compute_num_nulls(&datums); - assert!(num_nulls < u32::MAX as usize); - - buf.put_u32(num_nulls as u32); - if num_nulls > 0 { - let mut bit_set = BitSet::all_set(num_datums); - for (idx, value) in datums.clone().enumerate() { - if value.is_null() { - bit_set.unset(idx); - } - } - - buf.put_slice(bit_set.as_bytes()); - } - - self.encode_datums(buf, datums, hint.datum_kind) - } - - pub fn estimated_encoded_size<'a, I>(&self, datums: I, hint: &mut EncodeHint) -> usize - where - I: Iterator> + Clone, - { - let bit_set_size = if matches!(hint.datum_kind, DatumKind::Null) { - 0 - } else { - let num_datums = hint.compute_num_datums(&datums); - BitSet::num_bytes(num_datums) - }; - - let enc = ValuesEncoderImpl::default(); - let data_size = match hint.datum_kind { - DatumKind::Null => 0, - DatumKind::Timestamp => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_timestamp())) - } - DatumKind::Double => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_f64())) - } - DatumKind::Float => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_f32())) - } - DatumKind::Varbinary => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.into_bytes())) - } - DatumKind::String => enc.estimated_encoded_size( - datums - .clone() - .filter_map(|v| v.into_str().map(|v| v.as_bytes())), - ), - DatumKind::UInt64 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_u64())) - } - DatumKind::UInt32 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_u32())) - } - DatumKind::UInt16 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_u16())) - } - DatumKind::UInt8 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_u8())) - } - DatumKind::Int64 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_i64())) - } - DatumKind::Int32 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_i32())) - } - DatumKind::Int16 => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_i16())) - } - DatumKind::Int8 => enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_i8())), - DatumKind::Boolean => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_bool())) - } - DatumKind::Date => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_date_i32())) - } - DatumKind::Time => { - enc.estimated_encoded_size(datums.clone().filter_map(|v| v.as_timestamp())) - } - }; - - Self::header_size() + bit_set_size + data_size - } - - fn encode_datums<'a, I, B>(&self, buf: &mut B, datums: I, datum_kind: DatumKind) -> Result<()> - where - I: Iterator> + Clone, - B: BufMut, - { - let enc = ValuesEncoderImpl { - bytes_compress_threshold: self.bytes_compress_threshold, - }; - match datum_kind { - DatumKind::Null => Ok(()), - DatumKind::Timestamp => enc.encode(buf, datums.filter_map(|v| v.as_timestamp())), - DatumKind::Double => enc.encode(buf, datums.filter_map(|v| v.as_f64())), - DatumKind::Float => enc.encode(buf, datums.filter_map(|v| v.as_f32())), - DatumKind::Varbinary => enc.encode(buf, datums.filter_map(|v| v.into_bytes())), - DatumKind::String => enc.encode( - buf, - datums.filter_map(|v| v.into_str().map(|v| v.as_bytes())), - ), - DatumKind::UInt64 => enc.encode(buf, datums.filter_map(|v| v.as_u64())), - DatumKind::UInt32 => enc.encode(buf, datums.filter_map(|v| v.as_u32())), - DatumKind::UInt16 => enc.encode(buf, datums.filter_map(|v| v.as_u16())), - DatumKind::UInt8 => enc.encode(buf, datums.filter_map(|v| v.as_u8())), - DatumKind::Int64 => enc.encode(buf, datums.filter_map(|v| v.as_i64())), - DatumKind::Int32 => enc.encode(buf, datums.filter_map(|v| v.as_i32())), - DatumKind::Int16 => enc.encode(buf, datums.filter_map(|v| v.as_i16())), - DatumKind::Int8 => enc.encode(buf, datums.filter_map(|v| v.as_i8())), - DatumKind::Boolean => enc.encode(buf, datums.filter_map(|v| v.as_bool())), - DatumKind::Date => enc.encode(buf, datums.filter_map(|v| v.as_date_i32())), - DatumKind::Time => enc.encode(buf, datums.filter_map(|v| v.as_timestamp())), - } - } -} - -/// The decoder for [`ColumnarEncoder`]. -#[derive(Debug, Clone)] -pub struct ColumnarDecoder; - -#[derive(Debug, Clone)] -pub struct DecodeResult { - pub column_id: ColumnId, - pub datums: Vec, -} - -impl ColumnarDecoder { - pub fn decode(&self, ctx: DecodeContext<'_>, buf: &mut B) -> Result { - let version = buf.get_u8(); - ensure!( - version == ColumnarEncoder::VERSION, - InvalidVersion { version } - ); - - let datum_kind = DatumKind::try_from(buf.get_u8()).context(InvalidDatumKind)?; - let column_id = buf.get_u32(); - let num_datums = buf.get_u32() as usize; - - if matches!(datum_kind, DatumKind::Null) { - return Ok(DecodeResult { - column_id, - datums: vec![Datum::Null; num_datums], - }); - } - - let num_nulls = buf.get_u32() as usize; - let datums = if num_nulls == num_datums { - vec![Datum::Null; num_datums] - } else if num_nulls > 0 { - Self::decode_with_nulls(ctx, buf, num_datums, datum_kind)? - } else { - Self::decode_without_nulls(ctx, buf, num_datums, datum_kind)? - }; - - Ok(DecodeResult { column_id, datums }) - } -} - -impl ColumnarDecoder { - fn decode_with_nulls( - ctx: DecodeContext<'_>, - buf: &B, - num_datums: usize, - datum_kind: DatumKind, - ) -> Result> { - let chunk = buf.chunk(); - let bit_set = RoBitSet::try_new(chunk, num_datums).context(InvalidBitSetBuf)?; - - let mut datums = Vec::with_capacity(num_datums); - let with_datum = |datum: Datum| { - let idx = datums.len(); - let null = bit_set.is_unset(idx).context(InvalidBitSetBuf)?; - if null { - datums.push(Datum::Null); - } - datums.push(datum); - - Ok(()) - }; - - let mut data_block = &chunk[BitSet::num_bytes(num_datums)..]; - Self::decode_datums(ctx, &mut data_block, datum_kind, with_datum)?; - - Ok(datums) - } - - fn decode_without_nulls( - ctx: DecodeContext<'_>, - buf: &mut B, - num_datums: usize, - datum_kind: DatumKind, - ) -> Result> { - let mut datums = Vec::with_capacity(num_datums); - let with_datum = |datum: Datum| { - datums.push(datum); - Ok(()) - }; - Self::decode_datums(ctx, buf, datum_kind, with_datum)?; - Ok(datums) - } - - fn decode_datums( - ctx: DecodeContext<'_>, - buf: &mut B, - datum_kind: DatumKind, - mut f: F, - ) -> Result<()> - where - B: Buf, - F: FnMut(Datum) -> Result<()>, - { - match datum_kind { - DatumKind::Null => Ok(()), - DatumKind::Timestamp => { - let with_timestamp = |v: Timestamp| f(Datum::from(v)); - ValuesDecoderImpl.decode(ctx, buf, with_timestamp) - } - DatumKind::Double => { - let with_float = |v: f64| f(Datum::from(v)); - ValuesDecoderImpl.decode(ctx, buf, with_float) - } - DatumKind::Float => { - let with_float = |v: f32| f(Datum::from(v)); - ValuesDecoderImpl.decode(ctx, buf, with_float) - } - DatumKind::Varbinary => { - let with_bytes = |v: Bytes| f(Datum::from(v)); - ValuesDecoderImpl.decode(ctx, buf, with_bytes) - } - DatumKind::String => { - let with_str = |value| { - let datum = unsafe { Datum::from(StringBytes::from_bytes_unchecked(value)) }; - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_str) - } - DatumKind::UInt64 => { - let with_u64 = |value: u64| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_u64) - } - DatumKind::UInt32 => { - let with_u32 = |value: u32| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_u32) - } - DatumKind::UInt16 => { - let with_u16 = |value: u16| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_u16) - } - DatumKind::UInt8 => { - let with_u8 = |value: u8| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_u8) - } - DatumKind::Int64 => { - let with_i64 = |value: i64| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_i64) - } - DatumKind::Int32 => { - let with_i32 = |v: i32| f(Datum::from(v)); - ValuesDecoderImpl.decode(ctx, buf, with_i32) - } - DatumKind::Int16 => { - let with_i16 = |value: i16| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_i16) - } - DatumKind::Int8 => { - let with_i8 = |value: i8| { - let datum = Datum::from(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_i8) - } - DatumKind::Boolean => { - let with_bool = |v: bool| f(Datum::from(v)); - ValuesDecoderImpl.decode(ctx, buf, with_bool) - } - DatumKind::Date => { - let with_i32 = |value: i32| { - let datum = Datum::Date(value); - f(datum) - }; - ValuesDecoderImpl.decode(ctx, buf, with_i32) - } - DatumKind::Time => { - let with_timestamp = |v: Timestamp| f(Datum::Time(v.as_i64())); - ValuesDecoderImpl.decode(ctx, buf, with_timestamp) - } - } - } -} -#[cfg(test)] -mod tests { - use super::*; - - fn check_encode_end_decode(column_id: ColumnId, datums: Vec, datum_kind: DatumKind) { - let encoder = ColumnarEncoder::new(column_id, 256); - let views = datums.iter().map(|v| v.as_view()); - let mut hint = EncodeHint { - num_nulls: None, - num_datums: None, - datum_kind, - }; - - let buf_len = encoder.estimated_encoded_size(views.clone(), &mut hint); - let mut buf = Vec::with_capacity(buf_len); - encoder.encode(&mut buf, views, &mut hint).unwrap(); - - // Ensure no growth over the capacity. - assert!(buf.capacity() <= buf_len); - - let mut reused_buf = Vec::new(); - let ctx = DecodeContext { - buf: &mut reused_buf, - }; - let decoder = ColumnarDecoder; - let DecodeResult { - column_id: decoded_column_id, - datums: decoded_datums, - } = decoder.decode(ctx, &mut buf.as_slice()).unwrap(); - assert_eq!(column_id, decoded_column_id); - assert_eq!(datums, decoded_datums); - } - - #[test] - fn test_small_int() { - let datums = [10u32, 1u32, 2u32, 81u32, 82u32]; - - check_encode_end_decode( - 10, - datums.iter().map(|v| Datum::from(*v)).collect(), - DatumKind::UInt32, - ); - - check_encode_end_decode( - 10, - datums.iter().map(|v| Datum::from(*v as i32)).collect(), - DatumKind::Int32, - ); - - check_encode_end_decode( - 10, - datums.iter().map(|v| Datum::from(*v as u16)).collect(), - DatumKind::UInt16, - ); - - check_encode_end_decode( - 10, - datums.iter().map(|v| Datum::from(*v as i16)).collect(), - DatumKind::Int16, - ); - - check_encode_end_decode( - 10, - datums.iter().map(|v| Datum::from(*v as i8)).collect(), - DatumKind::Int8, - ); - - check_encode_end_decode( - 10, - datums.iter().map(|v| Datum::from(*v as u8)).collect(), - DatumKind::UInt8, - ); - } - - #[test] - fn test_with_empty_datums() { - check_encode_end_decode(1, vec![], DatumKind::Null); - check_encode_end_decode(1, vec![], DatumKind::Timestamp); - check_encode_end_decode(1, vec![], DatumKind::Double); - check_encode_end_decode(1, vec![], DatumKind::Float); - check_encode_end_decode(1, vec![], DatumKind::Varbinary); - check_encode_end_decode(1, vec![], DatumKind::String); - check_encode_end_decode(1, vec![], DatumKind::UInt64); - check_encode_end_decode(1, vec![], DatumKind::UInt32); - check_encode_end_decode(1, vec![], DatumKind::UInt8); - check_encode_end_decode(1, vec![], DatumKind::Int64); - check_encode_end_decode(1, vec![], DatumKind::Int32); - check_encode_end_decode(1, vec![], DatumKind::Int16); - check_encode_end_decode(1, vec![], DatumKind::Int8); - check_encode_end_decode(1, vec![], DatumKind::Boolean); - check_encode_end_decode(1, vec![], DatumKind::Date); - check_encode_end_decode(1, vec![], DatumKind::Time); - } - - #[test] - fn test_i32_with_null() { - let datums = vec![ - Datum::from(10i32), - Datum::from(1i32), - Datum::Null, - Datum::from(18i32), - Datum::from(38i32), - Datum::from(48i32), - Datum::Null, - Datum::from(81i32), - Datum::from(82i32), - ]; - - check_encode_end_decode(10, datums, DatumKind::Int32); - } - - #[test] - fn test_all_nulls() { - let datums = vec![ - Datum::Null, - Datum::Null, - Datum::Null, - Datum::Null, - Datum::Null, - Datum::Null, - ]; - - check_encode_end_decode(10, datums, DatumKind::Int32); - } - - #[test] - fn test_null() { - let datums = vec![ - Datum::Null, - Datum::Null, - Datum::Null, - Datum::Null, - Datum::Null, - Datum::Null, - ]; - - check_encode_end_decode(10, datums, DatumKind::Null); - } - - #[test] - fn test_float() { - let datums = vec![Datum::from(10.0f32), Datum::from(9.0f32)]; - check_encode_end_decode(10, datums, DatumKind::Float); - - let datums = vec![Datum::from(10.0f64), Datum::from(1.0f64)]; - check_encode_end_decode(10, datums, DatumKind::Double); - } - - #[test] - fn test_i64() { - let datums = vec![ - Datum::from(10i64), - Datum::from(1i64), - Datum::from(2i64), - Datum::from(18i64), - Datum::from(38i64), - Datum::from(48i64), - Datum::from(-80i64), - Datum::from(-81i64), - Datum::from(-82i64), - ]; - - check_encode_end_decode(10, datums, DatumKind::Int64); - } - - #[test] - fn test_u64() { - let datums = vec![ - Datum::from(10u64), - Datum::from(1u64), - Datum::from(2u64), - Datum::from(18u64), - Datum::from(38u64), - Datum::from(48u64), - Datum::from(80u64), - Datum::from(81u64), - Datum::from(82u64), - ]; - - check_encode_end_decode(10, datums, DatumKind::UInt64); - } - - #[test] - fn test_timestamp() { - let datums = vec![ - Datum::from(Timestamp::new(-10)), - Datum::from(Timestamp::new(10)), - Datum::from(Timestamp::new(1024)), - Datum::from(Timestamp::new(1024)), - Datum::from(Timestamp::new(1025)), - ]; - - check_encode_end_decode(10, datums, DatumKind::Timestamp); - } - - #[test] - fn test_time() { - let datums = vec![ - Datum::Time(-10), - Datum::Time(10), - Datum::Time(1024), - Datum::Time(1024), - Datum::Time(1025), - ]; - - check_encode_end_decode(10, datums, DatumKind::Time); - } - - #[test] - fn test_overflow_timestamp() { - let datums = [ - Datum::from(Timestamp::new(i64::MIN)), - Datum::from(Timestamp::new(10)), - Datum::from(Timestamp::new(1024)), - Datum::from(Timestamp::new(1024)), - Datum::from(Timestamp::new(1025)), - ]; - - let encoder = ColumnarEncoder::new(0, 256); - let views = datums.iter().map(|v| v.as_view()); - let mut hint = EncodeHint { - num_nulls: None, - num_datums: None, - datum_kind: DatumKind::Timestamp, - }; - - let mut buf = Vec::new(); - let enc_res = encoder.encode(&mut buf, views, &mut hint); - assert!(enc_res.is_err()); - } - - #[test] - fn test_string() { - let datums = vec![ - Datum::from("vvvv"), - Datum::from("xxxx"), - Datum::from("8"), - Datum::from("9999"), - Datum::from(""), - ]; - - check_encode_end_decode(10, datums, DatumKind::String); - } - - #[test] - fn test_boolean() { - let datums = vec![ - Datum::from(false), - Datum::from(false), - Datum::from(true), - Datum::Null, - Datum::from(false), - ]; - - check_encode_end_decode(10, datums.clone(), DatumKind::Boolean); - - let mut massive_datums = Vec::with_capacity(10 * datums.len()); - for _ in 0..10 { - massive_datums.append(&mut datums.clone()); - } - - check_encode_end_decode(10, massive_datums, DatumKind::Boolean); - } - - #[test] - fn test_massive_string() { - let sample_datums = vec![ - Datum::from("vvvv"), - Datum::from("xxxx"), - Datum::from("8"), - Datum::from("9999"), - ]; - let mut datums = Vec::with_capacity(sample_datums.len() * 100); - for _ in 0..100 { - datums.append(&mut sample_datums.clone()); - } - - check_encode_end_decode(10, datums, DatumKind::String); - } - - #[test] - fn test_large_string() { - let large_string_bytes = [ - vec![b'a'; 500], - vec![b'x'; 5000], - vec![b'x'; 5], - vec![], - vec![b' '; 15000], - ]; - let datums = large_string_bytes - .iter() - .map(|v| Datum::from(&*String::from_utf8_lossy(&v[..]))) - .collect(); - - check_encode_end_decode(10, datums, DatumKind::String); - } -} diff --git a/src/components/codec/src/columnar/number.rs b/src/components/codec/src/columnar/number.rs deleted file mode 100644 index 3ff27cd577..0000000000 --- a/src/components/codec/src/columnar/number.rs +++ /dev/null @@ -1,159 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use bytes_ext::{Buf, BufMut}; -use snafu::{ensure, ResultExt}; - -use crate::{ - columnar::{ - DecodeContext, InvalidVersion, Result, ValuesDecoder, ValuesDecoderImpl, ValuesEncoder, - ValuesEncoderImpl, Varint, - }, - varint, -}; - -/// The max number of the bytes used to store a varint encoding u64/i64. -const MAX_NUM_BYTES_OF_64VARINT: usize = 10; -const VERSION: u8 = 0; -const VERSION_SIZE: usize = 1; - -macro_rules! impl_number_encoding { - ($num_type: ty, $write_method: ident, $read_method: ident) => { - impl ValuesEncoder<$num_type> for ValuesEncoderImpl { - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator, - { - for v in values { - buf.$write_method(v); - } - - Ok(()) - } - } - - impl ValuesDecoder<$num_type> for ValuesDecoderImpl { - fn decode(&self, _ctx: DecodeContext<'_>, buf: &mut B, mut f: F) -> Result<()> - where - B: Buf, - F: FnMut($num_type) -> Result<()>, - { - while buf.remaining() > 0 { - let v = buf.$read_method(); - f(v)?; - } - - Ok(()) - } - } - }; -} - -impl_number_encoding!(i8, put_i8, get_i8); -impl_number_encoding!(u8, put_u8, get_u8); -impl_number_encoding!(u16, put_u16, get_u16); -impl_number_encoding!(i16, put_i16, get_i16); -impl_number_encoding!(u32, put_u32, get_u32); -impl_number_encoding!(i32, put_i32, get_i32); -impl_number_encoding!(f32, put_f32, get_f32); -impl_number_encoding!(f64, put_f64, get_f64); - -impl ValuesEncoder for ValuesEncoderImpl { - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator, - { - buf.put_u8(VERSION); - for v in values { - varint::encode_varint(buf, v).context(Varint)?; - } - - Ok(()) - } - - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - let (lower, higher) = values.size_hint(); - let num = lower.max(higher.unwrap_or_default()); - num * MAX_NUM_BYTES_OF_64VARINT + VERSION_SIZE - } -} - -impl ValuesDecoder for ValuesDecoderImpl { - fn decode(&self, _ctx: DecodeContext<'_>, buf: &mut B, mut f: F) -> Result<()> - where - B: Buf, - F: FnMut(i64) -> Result<()>, - { - let version = buf.get_u8(); - ensure!(version == VERSION, InvalidVersion { version }); - - while buf.remaining() > 0 { - let v = varint::decode_varint(buf).context(Varint)?; - f(v)?; - } - - Ok(()) - } -} - -impl ValuesEncoder for ValuesEncoderImpl { - fn encode(&self, buf: &mut B, values: I) -> Result<()> - where - B: BufMut, - I: Iterator, - { - buf.put_u8(VERSION); - - for v in values { - varint::encode_uvarint(buf, v).context(Varint)?; - } - - Ok(()) - } - - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - let (lower, higher) = values.size_hint(); - let num = lower.max(higher.unwrap_or_default()); - num * MAX_NUM_BYTES_OF_64VARINT + VERSION_SIZE - } -} - -impl ValuesDecoder for ValuesDecoderImpl { - fn decode(&self, _ctx: DecodeContext<'_>, buf: &mut B, mut f: F) -> Result<()> - where - B: Buf, - F: FnMut(u64) -> Result<()>, - { - let version = buf.get_u8(); - ensure!(version == VERSION, InvalidVersion { version }); - - while buf.remaining() > 0 { - let v = varint::decode_uvarint(buf).context(Varint)?; - f(v)?; - } - - Ok(()) - } -} diff --git a/src/components/codec/src/columnar/timestamp.rs b/src/components/codec/src/columnar/timestamp.rs deleted file mode 100644 index e848be1815..0000000000 --- a/src/components/codec/src/columnar/timestamp.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use common_types::time::Timestamp; -use snafu::{ensure, OptionExt, ResultExt}; - -use crate::{ - columnar::{ - DecodeContext, InvalidVersion, Overflow, Result, ValuesDecoder, ValuesDecoderImpl, - ValuesEncoder, ValuesEncoderImpl, Varint, - }, - consts::MAX_VARINT_BYTES, - varint, -}; - -/// The layout for the timestamp values: -/// ```plaintext -/// +-------------+----------------------+--------+ -/// | version(u8) | first_timestamp(i64) | deltas | -/// +-------------+----------------------+--------+ -/// ``` -/// -/// This encoding assume the timestamps are have very small differences between -/// each other, so we just store the deltas from the first timestamp in varint. -struct Encoding; - -impl Encoding { - const VERSION: u8 = 0; - const VERSION_SIZE: usize = 1; -} - -impl ValuesEncoder for ValuesEncoderImpl { - fn encode(&self, buf: &mut B, mut values: I) -> Result<()> - where - B: bytes_ext::BufMut, - I: Iterator + Clone, - { - buf.put_u8(Encoding::VERSION); - - let first_ts = match values.next() { - Some(v) => v.as_i64(), - None => return Ok(()), - }; - - buf.put_i64(first_ts); - - for value in values { - let ts = value.as_i64(); - let delta = ts.checked_sub(first_ts).with_context(|| Overflow { - msg: format!("first timestamp:{ts}, current timestamp:{first_ts}"), - })?; - varint::encode_varint(buf, delta).context(Varint)?; - } - - Ok(()) - } - - fn estimated_encoded_size(&self, values: I) -> usize - where - I: Iterator, - { - let (lower, higher) = values.size_hint(); - let num = lower.max(higher.unwrap_or_default()); - num * MAX_VARINT_BYTES + Encoding::VERSION_SIZE - } -} - -impl ValuesDecoder for ValuesDecoderImpl { - fn decode(&self, _ctx: DecodeContext<'_>, buf: &mut B, mut f: F) -> Result<()> - where - B: bytes_ext::Buf, - F: FnMut(Timestamp) -> Result<()>, - { - let version = buf.get_u8(); - - ensure!(version == Encoding::VERSION, InvalidVersion { version }); - - if buf.remaining() == 0 { - return Ok(()); - } - - let first_ts = buf.get_i64(); - f(Timestamp::new(first_ts))?; - - while buf.remaining() > 0 { - let delta = varint::decode_varint(buf).context(Varint)?; - let ts = first_ts.checked_add(delta).with_context(|| Overflow { - msg: format!("first timestamp:{first_ts}, delta:{delta}"), - })?; - f(Timestamp::new(ts))?; - } - - Ok(()) - } -} diff --git a/src/components/codec/src/compact/bytes.rs b/src/components/codec/src/compact/bytes.rs deleted file mode 100644 index 79b8fa3a2d..0000000000 --- a/src/components/codec/src/compact/bytes.rs +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Bytes format - -use std::convert::TryFrom; - -use bytes_ext::{Buf, BufMut, Bytes, BytesMut, SafeBuf, SafeBufMut}; -use snafu::{ensure, ResultExt}; - -use crate::{ - compact::{ - DecodeEmptyValue, DecodeValue, DecodeVarint, EncodeValue, EncodeVarint, Error, - MemCompactDecoder, MemCompactEncoder, Result, SkipDecodedValue, TryIntoUsize, - }, - consts, varint, DecodeTo, Encoder, -}; - -impl Encoder<[u8]> for MemCompactEncoder { - type Error = Error; - - // EncodeCompactBytes joins bytes with its length into a byte slice. It is more - // efficient in both space and time compare to EncodeBytes. Note that the - // encoded result is not memcomparable. - fn encode(&self, buf: &mut B, value: &[u8]) -> Result<()> { - varint::encode_varint(buf, value.len() as i64).context(EncodeVarint)?; - buf.try_put(value).context(EncodeValue)?; - Ok(()) - } - - fn estimate_encoded_size(&self, value: &[u8]) -> usize { - consts::MAX_VARINT_BYTES + value.len() - } -} - -impl Encoder for MemCompactEncoder { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &Bytes) -> Result<()> { - self.encode(buf, &value[..]) - } - - fn estimate_encoded_size(&self, value: &Bytes) -> usize { - self.estimate_encoded_size(&value[..]) - } -} - -impl DecodeTo for MemCompactDecoder { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut BytesMut) -> Result<()> { - let v = usize::try_from(varint::decode_varint(buf).context(DecodeVarint)?) - .context(TryIntoUsize)?; - ensure!(buf.remaining() >= v, DecodeEmptyValue); - value.try_put(&buf.chunk()[..v]).context(DecodeValue)?; - buf.try_advance(v).context(SkipDecodedValue)?; - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::*; - - struct BytesTest { - data: Bytes, - estimate_encoded_size: usize, - } - - #[test] - fn test_compact_bytes_codec() { - let data = vec![ - BytesTest { - data: Bytes::from_static(b""), - estimate_encoded_size: 10, - }, - BytesTest { - data: Bytes::from_static(b"hello1"), - estimate_encoded_size: 16, - }, - BytesTest { - data: Bytes::from_static(b"hello2"), - estimate_encoded_size: 16, - }, - BytesTest { - data: Bytes::from_static(b"hello3"), - estimate_encoded_size: 16, - }, - BytesTest { - data: Bytes::from_static(&[0x00, 0x01]), - estimate_encoded_size: 12, - }, - BytesTest { - data: Bytes::from_static(&[0xff, 0xff]), - estimate_encoded_size: 12, - }, - BytesTest { - data: Bytes::from_static(&[0x01, 0x00]), - estimate_encoded_size: 12, - }, - BytesTest { - data: Bytes::from_static(b"abc"), - estimate_encoded_size: 13, - }, - BytesTest { - data: Bytes::from_static(b"hello world"), - estimate_encoded_size: 21, - }, - ]; - - let encoder = MemCompactEncoder; - let mut buf = vec![]; - for x in &data { - encoder.encode(&mut buf, &x.data).unwrap(); - assert_eq!( - x.estimate_encoded_size, - encoder.estimate_encoded_size(&x.data) - ); - } - - let decoder = MemCompactDecoder; - let mut buf = &buf[..]; - for x in &data { - let mut d = BytesMut::new(); - decoder.decode_to(&mut buf, &mut d).unwrap(); - assert_eq!(d, x.data); - } - } -} diff --git a/src/components/codec/src/compact/datum.rs b/src/components/codec/src/compact/datum.rs deleted file mode 100644 index be3c179ffd..0000000000 --- a/src/components/codec/src/compact/datum.rs +++ /dev/null @@ -1,294 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Datum compact codec - -use bytes_ext::{Buf, BufMut, BytesMut, SafeBufMut}; -use common_types::{datum::Datum, string::StringBytes, time::Timestamp}; -use snafu::ResultExt; - -use crate::{ - compact::{EncodeKey, Error, MemCompactDecoder, MemCompactEncoder, Result}, - consts, DecodeTo, Encoder, -}; - -// For float points, we use same encoding as mem comparable encoder -impl Encoder for MemCompactEncoder { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &Datum) -> Result<()> { - match value { - Datum::Null => buf.try_put_u8(consts::NULL_FLAG).context(EncodeKey), - Datum::Timestamp(ts) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &ts.as_i64()) - } - Datum::Double(v) => { - buf.try_put_u8(consts::FLOAT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::Float(v) => { - buf.try_put_u8(consts::FLOAT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::Varbinary(v) => { - buf.try_put_u8(consts::COMPACT_BYTES_FLAG) - .context(EncodeKey)?; - self.encode(buf, v) - } - // For string, just encode/decode like bytes. - Datum::String(v) => { - buf.try_put_u8(consts::COMPACT_BYTES_FLAG) - .context(EncodeKey)?; - self.encode(buf, v.as_bytes()) - } - Datum::UInt64(v) => { - buf.try_put_u8(consts::UVARINT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::UInt32(v) => { - buf.try_put_u8(consts::UVARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::UInt16(v) => { - buf.try_put_u8(consts::UVARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::UInt8(v) => { - buf.try_put_u8(consts::UVARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::Int64(v) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::Int32(v) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Int16(v) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Int8(v) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Boolean(v) => { - buf.try_put_u8(consts::UVARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::Date(v) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Time(v) => { - buf.try_put_u8(consts::VARINT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - } - } - - fn estimate_encoded_size(&self, value: &Datum) -> usize { - match value { - // Null takes 1 byte - Datum::Null => 1, - Datum::Timestamp(ts) => self.estimate_encoded_size(&ts.as_i64()), - Datum::Double(v) => self.estimate_encoded_size(v), - Datum::Float(v) => self.estimate_encoded_size(v), - Datum::Varbinary(v) => self.estimate_encoded_size(v), - Datum::String(v) => self.estimate_encoded_size(v.as_bytes()), - Datum::UInt64(v) => self.estimate_encoded_size(v), - Datum::UInt32(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::UInt16(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::UInt8(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::Int64(v) => self.estimate_encoded_size(v), - Datum::Int32(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Int16(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Int8(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Boolean(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::Date(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Time(v) => self.estimate_encoded_size(v), - } - } -} - -macro_rules! decode_var_u64_into { - ($self: ident, $v: ident, $actual: ident, $buf: ident, $type: ty) => {{ - Self::ensure_flag(consts::UVARINT_FLAG, $actual)?; - let mut data = 0u64; - $self.decode_to($buf, &mut data)?; - *$v = data as $type; - }}; -} - -macro_rules! decode_var_u64_into_bool { - ($self: ident, $v: ident, $actual: ident, $buf: ident) => {{ - Self::ensure_flag(consts::UVARINT_FLAG, $actual)?; - let mut data = 0u64; - $self.decode_to($buf, &mut data)?; - *$v = data != 0; - }}; -} - -macro_rules! decode_var_i64_into { - ($self: ident, $v: ident, $actual: ident, $buf: ident, $type: ty) => {{ - Self::ensure_flag(consts::VARINT_FLAG, $actual)?; - let mut data = 0i64; - $self.decode_to($buf, &mut data)?; - *$v = data as $type; - }}; -} - -impl DecodeTo for MemCompactDecoder { - type Error = Error; - - /// REQUIRE: The datum type should match the type in buf - /// - /// For string datum, the utf8 check will be skipped. - fn decode_to(&self, buf: &mut B, value: &mut Datum) -> Result<()> { - let actual = match self.maybe_read_null(buf)? { - Some(v) => v, - None => { - *value = Datum::Null; - return Ok(()); - } - }; - - match value { - Datum::Null => { - Self::ensure_flag(consts::NULL_FLAG, actual)?; - } - Datum::Timestamp(ts) => { - Self::ensure_flag(consts::VARINT_FLAG, actual)?; - let mut data = 0; - self.decode_to(buf, &mut data)?; - *ts = Timestamp::new(data); - } - Datum::Double(v) => { - Self::ensure_flag(consts::FLOAT_FLAG, actual)?; - self.decode_to(buf, v)?; - } - Datum::Float(v) => { - Self::ensure_flag(consts::FLOAT_FLAG, actual)?; - self.decode_to(buf, v)?; - } - Datum::Varbinary(v) => { - Self::ensure_flag(consts::COMPACT_BYTES_FLAG, actual)?; - let mut data = BytesMut::new(); - self.decode_to(buf, &mut data)?; - *v = data.freeze(); - } - Datum::String(v) => { - Self::ensure_flag(consts::COMPACT_BYTES_FLAG, actual)?; - let mut data = BytesMut::new(); - self.decode_to(buf, &mut data)?; - // For string datum, we won't validate whether the bytes is a valid utf string - // during decoding to improve decode performance. The encoder - // should already done the utf8 check. - unsafe { - *v = StringBytes::from_bytes_unchecked(data.freeze()); - } - } - Datum::UInt64(v) => { - Self::ensure_flag(consts::UVARINT_FLAG, actual)?; - self.decode_to(buf, v)?; - } - Datum::UInt32(v) => decode_var_u64_into!(self, v, actual, buf, u32), - Datum::UInt16(v) => decode_var_u64_into!(self, v, actual, buf, u16), - Datum::UInt8(v) => decode_var_u64_into!(self, v, actual, buf, u8), - Datum::Int64(v) => { - Self::ensure_flag(consts::VARINT_FLAG, actual)?; - self.decode_to(buf, v)?; - } - Datum::Int32(v) => decode_var_i64_into!(self, v, actual, buf, i32), - Datum::Int16(v) => decode_var_i64_into!(self, v, actual, buf, i16), - Datum::Int8(v) => decode_var_i64_into!(self, v, actual, buf, i8), - Datum::Boolean(v) => decode_var_u64_into_bool!(self, v, actual, buf), - Datum::Date(v) => decode_var_i64_into!(self, v, actual, buf, i32), - Datum::Time(v) => { - Self::ensure_flag(consts::VARINT_FLAG, actual)?; - self.decode_to(buf, v)?; - } - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use bytes_ext::Bytes; - - use super::*; - - // TODO(yingwen): Test nullable. - #[test] - fn test_datum_codec() { - let data = vec![ - // (datum to encode, estimate_encoded_size) - (Datum::Null, 1), - (Datum::Timestamp(Timestamp::new(12345)), 10), - (Datum::Double(10.5), 8), - (Datum::Float(1.99), 4), - (Datum::Varbinary(Bytes::from_static(b"hello world")), 21), - (Datum::String(StringBytes::from_static("hello world")), 21), - (Datum::UInt64(12345), 10), - (Datum::UInt32(1000), 10), - (Datum::UInt16(65000), 10), - (Datum::UInt8(150), 10), - (Datum::Int64(-100209), 10), - (Datum::Int32(-10020), 10), - (Datum::Int16(32500), 10), - (Datum::Int8(-120), 10), - (Datum::Boolean(true), 10), - (Datum::Boolean(false), 10), - (Datum::Date(1000), 10), - (Datum::Time(1_000_000_000), 10), - ]; - let mut decoded = vec![ - Datum::Null, - Datum::Timestamp(Timestamp::new(0)), - Datum::Double(0.0), - Datum::Float(0.0), - Datum::Varbinary(Bytes::new()), - Datum::String(StringBytes::new()), - Datum::UInt64(0), - Datum::UInt32(0), - Datum::UInt16(0), - Datum::UInt8(0), - Datum::Int64(0), - Datum::Int32(0), - Datum::Int16(0), - Datum::Int8(0), - Datum::Boolean(false), - Datum::Boolean(false), - Datum::Date(0), - Datum::Time(0), - ]; - let encoder = MemCompactEncoder; - let decoder = MemCompactDecoder; - for (index, x) in data.iter().enumerate() { - let mut buf = vec![]; - encoder.encode(&mut buf, &x.0).unwrap(); - assert_eq!(x.1, encoder.estimate_encoded_size(&x.0)); - decoder - .decode_to(&mut buf.as_slice(), &mut decoded[index]) - .unwrap(); - assert_eq!(decoded[index], data[index].0); - } - } -} diff --git a/src/components/codec/src/compact/float.rs b/src/components/codec/src/compact/float.rs deleted file mode 100644 index cbc8a100d8..0000000000 --- a/src/components/codec/src/compact/float.rs +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::mem; - -use bytes_ext::{SafeBuf, SafeBufMut}; -use snafu::ResultExt; - -use crate::{ - compact::{DecodeValue, EncodeValue, Error, MemCompactDecoder, MemCompactEncoder, Result}, - DecodeTo, Encoder, -}; - -impl Encoder for MemCompactEncoder { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &f64) -> Result<()> { - buf.try_put_f64(*value).context(EncodeValue)?; - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &f64) -> usize { - mem::size_of::() - } -} - -impl DecodeTo for MemCompactDecoder { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut f64) -> Result<()> { - *value = buf.try_get_f64().context(DecodeValue)?; - Ok(()) - } -} - -impl Encoder for MemCompactEncoder { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &f32) -> Result<()> { - buf.try_put_f32(*value).context(EncodeValue)?; - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &f32) -> usize { - mem::size_of::() - } -} - -impl DecodeTo for MemCompactDecoder { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut f32) -> Result<()> { - *value = buf.try_get_f32().context(DecodeValue)?; - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::*; - - struct TestF64 { - data: f64, - estimate_encoded_size: usize, - } - - #[test] - fn test_compact_f64_codec() { - let data = vec![ - TestF64 { - data: 162132470.5, - estimate_encoded_size: 8, - }, - TestF64 { - data: f64::MIN, - estimate_encoded_size: 8, - }, - TestF64 { - data: f64::MAX, - estimate_encoded_size: 8, - }, - ]; - - let encoder = MemCompactEncoder; - let mut buf = vec![]; - for x in &data { - encoder.encode(&mut buf, &x.data).unwrap(); - assert_eq!( - x.estimate_encoded_size, - encoder.estimate_encoded_size(&x.data) - ); - } - - let decoder = MemCompactDecoder; - let mut buf = &buf[..]; - for x in &data { - let mut d = 0.0; - decoder.decode_to(&mut buf, &mut d).unwrap(); - assert!((d - x.data).abs() < f64::EPSILON); - } - } -} diff --git a/src/components/codec/src/compact/mod.rs b/src/components/codec/src/compact/mod.rs deleted file mode 100644 index d0d9403868..0000000000 --- a/src/components/codec/src/compact/mod.rs +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Mem compact format codec - -// Implementation reference: -// https://github.com/pingcap/tidb/blob/bd011d3c9567c506d8d4343ade03edf77fcd5b56/util/codec/codec.go -mod bytes; -mod datum; -mod float; -mod number; - -use bytes_ext::SafeBuf; -use macros::define_result; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; - -use crate::consts; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to encode flag, err:{}", source))] - EncodeKey { source: bytes_ext::Error }, - - #[snafu(display("Failed to encode value, err:{}", source))] - EncodeValue { source: bytes_ext::Error }, - - #[snafu(display("Failed to encode varint, err:{}", source))] - EncodeVarint { source: crate::varint::Error }, - - #[snafu(display("Failed to decode varint, err:{}", source))] - DecodeVarint { source: crate::varint::Error }, - - #[snafu(display("Failed to decode key, err:{}", source))] - DecodeKey { source: bytes_ext::Error }, - - #[snafu(display("Insufficient bytes to decode value.\nBacktrace:\n{}", backtrace))] - DecodeEmptyValue { backtrace: Backtrace }, - - #[snafu(display( - "Invalid flag, expect:{}, actual:{}.\nBacktrace:\n{}", - expect, - actual, - backtrace - ))] - InvalidKeyFlag { - expect: u8, - actual: u8, - backtrace: Backtrace, - }, - - #[snafu(display("Insufficient bytes to decode value, err:{}", source))] - DecodeValue { source: bytes_ext::Error }, - - #[snafu(display("Failed to skip decoded value, err:{}", source))] - SkipDecodedValue { source: bytes_ext::Error }, - - #[snafu(display("Try into usize error:{}.\nBacktrace:\n{}", source, backtrace))] - TryIntoUsize { - source: std::num::TryFromIntError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to decode string, err:{}", source))] - DecodeString { source: common_types::string::Error }, - - #[snafu(display("Datum cannot be null.\nBacktrace:\n{}", backtrace))] - NullDatum { backtrace: Backtrace }, -} - -define_result!(Error); - -/// Mem compact encoder -pub struct MemCompactEncoder; - -/// Mem compact decoder -pub struct MemCompactDecoder; - -impl MemCompactDecoder { - /// Returns None if we need to return null datum, otherwise return the flag. - fn maybe_read_null(&self, buf: &mut B) -> Result> { - let actual = buf.try_get_u8().context(DecodeKey)?; - // If actual flag is null, need to check whether this datum is nullable. - if actual == consts::NULL_FLAG { - // The decoder need to return null datum. - return Ok(None); - } - - Ok(Some(actual)) - } - - #[inline] - fn ensure_flag(expect: u8, actual: u8) -> Result<()> { - // Actual flag is not null. - ensure!(expect == actual, InvalidKeyFlag { expect, actual }); - Ok(()) - } -} diff --git a/src/components/codec/src/compact/number.rs b/src/components/codec/src/compact/number.rs deleted file mode 100644 index 720c078da8..0000000000 --- a/src/components/codec/src/compact/number.rs +++ /dev/null @@ -1,175 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Number format - -use bytes_ext::{Buf, SafeBufMut}; -use snafu::ResultExt; - -use crate::{ - compact::{DecodeVarint, EncodeVarint, Error, MemCompactDecoder, MemCompactEncoder, Result}, - consts, varint, DecodeTo, Encoder, -}; - -impl Encoder for MemCompactEncoder { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &i64) -> Result<()> { - varint::encode_varint(buf, *value).context(EncodeVarint)?; - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &i64) -> usize { - consts::MAX_VARINT_BYTES - } -} - -impl DecodeTo for MemCompactDecoder { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut i64) -> Result<()> { - *value = varint::decode_varint(buf).context(DecodeVarint)?; - Ok(()) - } -} - -impl Encoder for MemCompactEncoder { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &u64) -> Result<()> { - varint::encode_uvarint(buf, *value).context(EncodeVarint)?; - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &u64) -> usize { - consts::MAX_UVARINT_BYTES - } -} - -impl DecodeTo for MemCompactDecoder { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut u64) -> Result<()> { - *value = varint::decode_uvarint(buf).context(DecodeVarint)?; - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::*; - - struct TestI64 { - data: i64, - estimate_encoded_size: usize, - } - #[test] - fn test_compact_i64_codec() { - let data = vec![ - TestI64 { - data: 1621324705, - estimate_encoded_size: 10, - }, - TestI64 { - data: 1621324705000, - estimate_encoded_size: 10, - }, - TestI64 { - data: 1521324705, - estimate_encoded_size: 10, - }, - TestI64 { - data: 1621324705123, - estimate_encoded_size: 10, - }, - TestI64 { - data: i64::MIN, - estimate_encoded_size: 10, - }, - TestI64 { - data: i64::MIN + 1, - estimate_encoded_size: 10, - }, - TestI64 { - data: 0, - estimate_encoded_size: 10, - }, - TestI64 { - data: i64::MAX, - estimate_encoded_size: 10, - }, - TestI64 { - data: (1 << 47) - 1, - estimate_encoded_size: 10, - }, - TestI64 { - data: -1 << 47, - estimate_encoded_size: 10, - }, - TestI64 { - data: (1 << 23) - 1, - estimate_encoded_size: 10, - }, - TestI64 { - data: -1 << 23, - estimate_encoded_size: 10, - }, - TestI64 { - data: (1 << 33) - 1, - estimate_encoded_size: 10, - }, - TestI64 { - data: -1 << 33, - estimate_encoded_size: 10, - }, - TestI64 { - data: (1 << 55) - 1, - estimate_encoded_size: 10, - }, - TestI64 { - data: -1 << 55, - estimate_encoded_size: 10, - }, - TestI64 { - data: 1, - estimate_encoded_size: 10, - }, - TestI64 { - data: -1, - estimate_encoded_size: 10, - }, - ]; - - let encoder = MemCompactEncoder; - let mut buf = vec![]; - for x in &data { - encoder.encode(&mut buf, &x.data).unwrap(); - assert_eq!( - x.estimate_encoded_size, - encoder.estimate_encoded_size(&x.data) - ); - } - - let decoder = MemCompactDecoder; - let mut buf = &buf[..]; - for x in &data { - let mut d = -1; - decoder.decode_to(&mut buf, &mut d).unwrap(); - assert_eq!(d, x.data); - } - } -} diff --git a/src/components/codec/src/consts.rs b/src/components/codec/src/consts.rs deleted file mode 100644 index 68457a471f..0000000000 --- a/src/components/codec/src/consts.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Common constants used in codec - -// First byte in the encoded value which specifies the encoding type. -// TODO(yingwen): Replace flags by datum kind. (Incompatible with old format). -pub const NULL_FLAG: u8 = 0; -pub const BYTES_FLAG: u8 = 1; -pub const COMPACT_BYTES_FLAG: u8 = 2; -pub const INT_FLAG: u8 = 3; -pub const UINT_FLAG: u8 = 4; -pub const FLOAT_FLAG: u8 = 5; -pub const VARINT_FLAG: u8 = 8; -pub const UVARINT_FLAG: u8 = 9; - -/// Max bytes varint can use -pub const MAX_VARINT_BYTES: usize = 10; -/// Max bytes uvarint can be use -pub const MAX_UVARINT_BYTES: usize = 10; -/// Sign mask for u64/i64 conversion -pub const SIGN_MASK: u64 = 0x8000000000000000; diff --git a/src/components/codec/src/lib.rs b/src/components/codec/src/lib.rs deleted file mode 100644 index 423711d47f..0000000000 --- a/src/components/codec/src/lib.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Data encoding - -// TODO(yingwen): Buf use generic type to avoid cost of vtable call per -// encode/decode - -pub mod columnar; -pub mod compact; -mod consts; -pub mod memcomparable; -pub mod row; -mod varint; - -use bytes_ext::{Buf, BufMut}; - -// encoder/decoder -/// Data encode abstraction -pub trait Encoder { - type Error; - - /// Encode value into buf - fn encode(&self, buf: &mut B, value: &T) -> Result<(), Self::Error>; - - /// Estimate the value size after encoded - fn estimate_encoded_size(&self, value: &T) -> usize; -} - -/// Data decode to target -pub trait DecodeTo { - type Error; - - /// Decode from `buf` to `value` - fn decode_to(&self, buf: &mut B, value: &mut T) -> Result<(), Self::Error>; -} - -/// Data decode abstraction -pub trait Decoder { - type Error; - - /// Decode `value` from `buf` - fn decode(&self, buf: &mut B) -> Result; -} diff --git a/src/components/codec/src/memcomparable/bytes.rs b/src/components/codec/src/memcomparable/bytes.rs deleted file mode 100644 index 3cca1ceec2..0000000000 --- a/src/components/codec/src/memcomparable/bytes.rs +++ /dev/null @@ -1,294 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Bytes format - -use bytes_ext::{Buf, BufMut, Bytes, BytesMut, SafeBuf, SafeBufMut}; -use snafu::{ensure, ResultExt}; - -use crate::{ - memcomparable::{ - DecodeValueGroup, DecodeValueMarker, DecodeValuePadding, EncodeValue, Error, MemComparable, - Result, SkipPadding, - }, - DecodeTo, Encoder, -}; - -const ENC_GROUP_SIZE: usize = 8; -const ENC_MARKER: u8 = 0xFF; -const ENC_PAD: u8 = 0x0; -const PADS: [u8; ENC_GROUP_SIZE] = [0; ENC_GROUP_SIZE]; - -impl Encoder<[u8]> for MemComparable { - type Error = Error; - - // encode Bytes guarantees the encoded value is in ascending order for - // comparison, encoding with the following rule: - // [group1][marker1]...[groupN][markerN] - // group is 8 bytes slice which is padding with 0. - // marker is `0xFF - padding 0 count` - // For example: - // - // ``` - // [] -> [0, 0, 0, 0, 0, 0, 0, 0, 247] - // [1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250] - // [1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251] - // [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247] - // ``` - // - // Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format - fn encode(&self, buf: &mut B, value: &[u8]) -> Result<()> { - let value_len = value.len(); - for idx in (0..=value_len).step_by(ENC_GROUP_SIZE) { - let remain = value_len - idx; - let mut pad_count = 0; - if remain >= ENC_GROUP_SIZE { - buf.try_put(&value[idx..idx + ENC_GROUP_SIZE]) - .context(EncodeValue)?; - } else { - pad_count = ENC_GROUP_SIZE - remain; - buf.try_put(&value[idx..]).context(EncodeValue)?; - buf.try_put(&PADS[..pad_count]).context(EncodeValue)?; - } - let marker = ENC_MARKER - pad_count as u8; - buf.try_put_u8(marker).context(EncodeValue)?; - } - Ok(()) - } - - // Allocate more space to avoid unnecessary slice growing. - // Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * - // (encGroupSize + 1)` bytes, that is `(len(data) / 8 + 1) * 9` in our - // implement. - fn estimate_encoded_size(&self, value: &[u8]) -> usize { - (value.len() / ENC_GROUP_SIZE + 1) * (ENC_GROUP_SIZE + 1) - } -} - -impl Encoder for MemComparable { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &Bytes) -> Result<()> { - self.encode(buf, &value[..]) - } - - fn estimate_encoded_size(&self, value: &Bytes) -> usize { - self.estimate_encoded_size(&value[..]) - } -} - -impl DecodeTo for MemComparable { - type Error = Error; - - // decode Bytes which is encoded by encode Bytes before, - // returns the leftover bytes and decoded value if no error. - fn decode_to(&self, buf: &mut B, value: &mut BytesMut) -> Result<()> { - loop { - let b = buf.chunk(); - ensure!(b.len() > ENC_GROUP_SIZE, DecodeValueGroup); - - let group_bytes = &b[..ENC_GROUP_SIZE + 1]; - let group = &group_bytes[..ENC_GROUP_SIZE]; - let marker = group_bytes[ENC_GROUP_SIZE]; - let pad_count = usize::from(ENC_MARKER - marker); - ensure!( - pad_count <= ENC_GROUP_SIZE, - DecodeValueMarker { group_bytes } - ); - - let real_group_size = ENC_GROUP_SIZE - pad_count; - value - .try_put(&group[..real_group_size]) - .context(EncodeValue)?; - - if pad_count != 0 { - // Check validity of padding bytes. - for v in &group[real_group_size..] { - ensure!(*v == ENC_PAD, DecodeValuePadding { group_bytes }); - } - buf.try_advance(ENC_GROUP_SIZE + 1).context(SkipPadding)?; - - break; - } - buf.try_advance(ENC_GROUP_SIZE + 1).context(SkipPadding)?; - } - Ok(()) - } -} - -#[cfg(test)] -mod test { - use core::cmp::Ordering; - - use super::*; - - struct BytesTest { - data: Bytes, - estimate_encoded_size: usize, - } - - #[test] - fn test_bytes_codec() { - let data = vec![ - BytesTest { - data: Bytes::from_static(b""), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(b"hello1"), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(b"hello2"), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(b"hello3"), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(&[0x00, 0x01]), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(&[0xff, 0xff]), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(&[0x01, 0x00]), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(b"abc"), - estimate_encoded_size: 9, - }, - BytesTest { - data: Bytes::from_static(b"hello world"), - estimate_encoded_size: 18, - }, - ]; - - let c = MemComparable; - let mut buf = vec![]; - for x in &data { - c.encode(&mut buf, &x.data).unwrap(); - assert_eq!(x.estimate_encoded_size, c.estimate_encoded_size(&x.data)); - } - - let mut buf = &buf[..]; - for x in &data { - let mut d = BytesMut::new(); - c.decode_to(&mut buf, &mut d).unwrap(); - assert_eq!(d, x.data); - } - } - - struct TbBytes { - arg1: Bytes, - arg2: Bytes, - ret: Ordering, - } - - #[test] - fn test_bytes_order() { - let data = vec![ - TbBytes { - arg1: Bytes::new(), - arg2: Bytes::from_static(&[0x00]), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(&[0x00]), - arg2: Bytes::from_static(&[0x00]), - ret: Ordering::Equal, - }, - TbBytes { - arg1: Bytes::from_static(&[0xFF]), - arg2: Bytes::from_static(&[0x00]), - ret: Ordering::Greater, - }, - TbBytes { - arg1: Bytes::from_static(&[0xFF]), - arg2: Bytes::from_static(&[0xFF, 0x00]), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(b"a"), - arg2: Bytes::from_static(b"b"), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(b"a"), - arg2: Bytes::from_static(&[0x00]), - ret: Ordering::Greater, - }, - TbBytes { - arg1: Bytes::from_static(&[0x00]), - arg2: Bytes::from_static(&[0x01]), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(&[0x00, 0x01]), - arg2: Bytes::from_static(&[0x00, 0x00]), - ret: Ordering::Greater, - }, - TbBytes { - arg1: Bytes::from_static(&[0x00, 0x00, 0x00]), - arg2: Bytes::from_static(&[0x00, 0x00]), - ret: Ordering::Greater, - }, - TbBytes { - arg1: Bytes::from_static(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), - arg2: Bytes::from_static(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(&[0x01, 0x02, 0x03, 0x00]), - arg2: Bytes::from_static(&[0x01, 0x02, 0x03]), - ret: Ordering::Greater, - }, - TbBytes { - arg1: Bytes::from_static(&[0x01, 0x03, 0x03, 0x04]), - arg2: Bytes::from_static(&[0x01, 0x03, 0x03, 0x05]), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]), - arg2: Bytes::from_static(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]), - ret: Ordering::Less, - }, - TbBytes { - arg1: Bytes::from_static(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]), - arg2: Bytes::from_static(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]), - ret: Ordering::Greater, - }, - TbBytes { - arg1: Bytes::from_static(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00]), - arg2: Bytes::from_static(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]), - ret: Ordering::Greater, - }, - ]; - let c = MemComparable; - for x in &data { - let mut buf1 = vec![]; - let mut buf2 = vec![]; - c.encode(&mut buf1, &x.arg1).unwrap(); - c.encode(&mut buf2, &x.arg2).unwrap(); - assert_eq!(x.ret, buf1.as_slice().cmp(buf2.as_slice())); - } - } -} diff --git a/src/components/codec/src/memcomparable/datum.rs b/src/components/codec/src/memcomparable/datum.rs deleted file mode 100644 index c05f7449ec..0000000000 --- a/src/components/codec/src/memcomparable/datum.rs +++ /dev/null @@ -1,332 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Datum comparable codec - -use std::i64; - -use bytes_ext::{Buf, BufMut, BytesMut, SafeBufMut}; -use common_types::{ - datum::{Datum, DatumKind}, - string::StringBytes, - time::Timestamp, -}; -use snafu::ResultExt; - -use crate::{ - consts, - memcomparable::{EncodeKey, Error, MemComparable, Result, UnsupportedKind}, - DecodeTo, Encoder, -}; - -// TODO(yingwen): Consider collate for string. -impl Encoder for MemComparable { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &Datum) -> Result<()> { - match value { - Datum::Null => buf.try_put_u8(consts::NULL_FLAG).context(EncodeKey), - Datum::Timestamp(ts) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, &ts.as_i64()) - } - Datum::Varbinary(v) => { - buf.try_put_u8(consts::BYTES_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - // For string, we just use same encoding method as bytes now. - Datum::String(v) => { - buf.try_put_u8(consts::BYTES_FLAG).context(EncodeKey)?; - self.encode(buf, v.as_bytes()) - } - Datum::UInt64(v) => { - buf.try_put_u8(consts::UINT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::UInt32(v) => { - buf.try_put_u8(consts::UINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::UInt16(v) => { - buf.try_put_u8(consts::UINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::UInt8(v) => { - buf.try_put_u8(consts::UINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::Int64(v) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::Int32(v) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Int16(v) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Int8(v) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Boolean(v) => { - buf.try_put_u8(consts::UINT_FLAG).context(EncodeKey)?; - self.encode(buf, &(u64::from(*v))) - } - Datum::Date(v) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, &(i64::from(*v))) - } - Datum::Time(v) => { - buf.try_put_u8(consts::INT_FLAG).context(EncodeKey)?; - self.encode(buf, v) - } - Datum::Double(_) => UnsupportedKind { - kind: DatumKind::Double, - } - .fail(), - Datum::Float(_) => UnsupportedKind { - kind: DatumKind::Float, - } - .fail(), - } - } - - fn estimate_encoded_size(&self, value: &Datum) -> usize { - match value { - // Null takes 1 byte - Datum::Null => 1, - Datum::Timestamp(ts) => self.estimate_encoded_size(&ts.as_i64()), - Datum::Varbinary(v) => self.estimate_encoded_size(v), - Datum::String(v) => self.estimate_encoded_size(v.as_bytes()), - Datum::UInt64(v) => self.estimate_encoded_size(v), - Datum::UInt32(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::UInt16(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::UInt8(v) => self.estimate_encoded_size(&(u64::from(*v))), - Datum::Int64(v) => self.estimate_encoded_size(v), - Datum::Int32(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Date(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Time(v) => self.estimate_encoded_size(v), - Datum::Int16(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Int8(v) => self.estimate_encoded_size(&(i64::from(*v))), - Datum::Boolean(v) => self.estimate_encoded_size(&(u64::from(*v))), - // Unsupported kind, but we return 1 - Datum::Double(_) | Datum::Float(_) => 1, - } - } -} - -macro_rules! decode_u64_into { - ($self: ident, $v: ident, $buf: ident, $type: ty) => {{ - Self::ensure_flag($buf, consts::UINT_FLAG)?; - let mut data = 0u64; - $self.decode_to($buf, &mut data)?; - *$v = data as $type; - }}; -} - -macro_rules! decode_u64_into_bool { - ($self: ident, $v: ident, $buf: ident) => {{ - Self::ensure_flag($buf, consts::UINT_FLAG)?; - let mut data = 0u64; - $self.decode_to($buf, &mut data)?; - *$v = data != 0; - }}; -} - -macro_rules! decode_i64_into { - ($self: ident, $v: ident, $buf: ident, $type: ty) => {{ - Self::ensure_flag($buf, consts::INT_FLAG)?; - let mut data = 0i64; - $self.decode_to($buf, &mut data)?; - *$v = data as $type; - }}; -} - -impl DecodeTo for MemComparable { - type Error = Error; - - /// REQUIRE: The datum type should match the type in buf - /// - /// For string datum, the utf8 check will be skipped. - fn decode_to(&self, buf: &mut B, value: &mut Datum) -> Result<()> { - match value { - Datum::Null => { - Self::ensure_flag(buf, consts::NULL_FLAG)?; - } - Datum::Timestamp(ts) => { - Self::ensure_flag(buf, consts::INT_FLAG)?; - let mut data = 0; - self.decode_to(buf, &mut data)?; - *ts = Timestamp::new(data); - } - Datum::Varbinary(v) => { - Self::ensure_flag(buf, consts::BYTES_FLAG)?; - let mut data = BytesMut::new(); - self.decode_to(buf, &mut data)?; - *v = data.freeze(); - } - Datum::String(v) => { - Self::ensure_flag(buf, consts::BYTES_FLAG)?; - let mut data = BytesMut::new(); - self.decode_to(buf, &mut data)?; - // For string datum, we won't validate whether the bytes is a valid utf string - // during decoding to improve decode performance. The encoder - // should already done the utf8 check. - unsafe { - *v = StringBytes::from_bytes_unchecked(data.freeze()); - } - } - Datum::UInt64(v) => { - Self::ensure_flag(buf, consts::UINT_FLAG)?; - self.decode_to(buf, v)?; - } - Datum::UInt32(v) => decode_u64_into!(self, v, buf, u32), - Datum::UInt16(v) => decode_u64_into!(self, v, buf, u16), - Datum::UInt8(v) => decode_u64_into!(self, v, buf, u8), - Datum::Int64(v) => { - Self::ensure_flag(buf, consts::INT_FLAG)?; - self.decode_to(buf, v)?; - } - Datum::Int32(v) => decode_i64_into!(self, v, buf, i32), - Datum::Date(v) => decode_i64_into!(self, v, buf, i32), - Datum::Time(v) => { - Self::ensure_flag(buf, consts::INT_FLAG)?; - self.decode_to(buf, v)?; - } - Datum::Int16(v) => decode_i64_into!(self, v, buf, i16), - Datum::Int8(v) => decode_i64_into!(self, v, buf, i8), - Datum::Boolean(v) => decode_u64_into_bool!(self, v, buf), - Datum::Double(_) => { - return UnsupportedKind { - kind: DatumKind::Double, - } - .fail(); - } - Datum::Float(_) => { - return UnsupportedKind { - kind: DatumKind::Float, - } - .fail(); - } - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use core::cmp::Ordering; - - use bytes_ext::Bytes; - - use super::*; - - #[test] - fn test_datum_codec() { - let data = vec![ - // (datum to encode, estimate_encoded_size) - (Datum::Null, 1), - (Datum::Timestamp(Timestamp::new(12345)), 9), - (Datum::Varbinary(Bytes::from_static(b"hello world")), 18), - (Datum::String(StringBytes::from_static("hello world")), 18), - (Datum::UInt64(100209), 9), - (Datum::UInt32(10020), 9), - (Datum::UInt16(65000), 9), - (Datum::UInt8(150), 9), - (Datum::Int64(-100209), 9), - (Datum::Int32(-10020), 9), - (Datum::Int16(32500), 9), - (Datum::Int8(-120), 9), - (Datum::Boolean(true), 9), - (Datum::Boolean(false), 9), - (Datum::Date(1000), 9), - (Datum::Time(100000000), 9), - ]; - let mut decoded = vec![ - Datum::Null, - Datum::Timestamp(Timestamp::new(0)), - Datum::Varbinary(Bytes::new()), - Datum::String(StringBytes::new()), - Datum::UInt64(0), - Datum::UInt32(0), - Datum::UInt16(0), - Datum::UInt8(0), - Datum::Int64(0), - Datum::Int32(0), - Datum::Int16(0), - Datum::Int8(0), - Datum::Boolean(false), - Datum::Boolean(false), - Datum::Date(0), - Datum::Time(0), - ]; - let c = MemComparable; - for (index, x) in data.iter().enumerate() { - let mut buf = vec![]; - c.encode(&mut buf, &x.0).unwrap(); - assert_eq!(x.1, c.estimate_encoded_size(&x.0)); - c.decode_to(&mut buf.as_slice(), &mut decoded[index]) - .unwrap(); - assert_eq!(decoded[index], data[index].0); - } - } - - #[test] - fn test_datum_order() { - let data = vec![ - // (arg1, arg2, cmp order of arg1 and arg2) - (Datum::Null, Datum::Null, Ordering::Equal), - ( - Datum::Timestamp(Timestamp::new(12345)), - Datum::Timestamp(Timestamp::new(123456)), - Ordering::Less, - ), - ( - Datum::Varbinary(Bytes::from_static(&[ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - ])), - Datum::Varbinary(Bytes::from_static(&[ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - ])), - Ordering::Less, - ), - ( - Datum::String(StringBytes::from_static("abce123")), - Datum::String(StringBytes::from_static("abce1234")), - Ordering::Less, - ), - (Datum::UInt64(888), Datum::UInt64(889), Ordering::Less), - (Datum::Date(1000), Datum::Date(2000), Ordering::Less), - ( - Datum::Time(2000000000), - Datum::Time(1000000000), - Ordering::Greater, - ), - ]; - let c = MemComparable; - for x in &data { - let mut buf1 = vec![]; - let mut buf2 = vec![]; - c.encode(&mut buf1, &x.0).unwrap(); - c.encode(&mut buf2, &x.1).unwrap(); - assert_eq!(x.2, buf1.as_slice().cmp(buf2.as_slice())); - } - } -} diff --git a/src/components/codec/src/memcomparable/mod.rs b/src/components/codec/src/memcomparable/mod.rs deleted file mode 100644 index d5f8111de1..0000000000 --- a/src/components/codec/src/memcomparable/mod.rs +++ /dev/null @@ -1,115 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Mem comparable format codec - -// Implementation reference: -// https://github.com/pingcap/tidb/blob/bd011d3c9567c506d8d4343ade03edf77fcd5b56/util/codec/codec.go - -mod bytes; -mod datum; -mod number; - -use bytes_ext::{BytesMut, SafeBuf}; -use common_types::datum::DatumKind; -use macros::define_result; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to encode flag, err:{}", source))] - EncodeKey { source: bytes_ext::Error }, - - #[snafu(display("Failed to encode value, err:{}", source))] - EncodeValue { source: bytes_ext::Error }, - - #[snafu(display("Failed to decode key, err:{}", source))] - DecodeKey { source: bytes_ext::Error }, - - #[snafu(display( - "Invalid flag, expect:{}, actual:{}.\nBacktrace:\n{}", - expect, - actual, - backtrace - ))] - InvalidKeyFlag { - expect: u8, - actual: u8, - backtrace: Backtrace, - }, - - #[snafu(display( - "Unsupported datum kind to compare in mem, kind :{}.\nBacktrace:\n{}", - kind, - backtrace - ))] - UnsupportedKind { - kind: DatumKind, - backtrace: Backtrace, - }, - - #[snafu(display("Insufficient bytes to decode value, err:{}", source))] - DecodeValue { source: bytes_ext::Error }, - - #[snafu(display("Insufficient bytes to decode value group.\nBacktrace:\n{}", backtrace))] - DecodeValueGroup { backtrace: Backtrace }, - - #[snafu(display( - "Invalid marker byte, group bytes: {:?}.\nBacktrace:\n{}", - group_bytes, - backtrace - ))] - DecodeValueMarker { - group_bytes: BytesMut, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid padding byte, group bytes: {:?}.\nBacktrace:\n{}", - group_bytes, - backtrace - ))] - DecodeValuePadding { - group_bytes: BytesMut, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to skip padding bytes, err:{}.", source))] - SkipPadding { source: bytes_ext::Error }, - - #[snafu(display("Failed to decode string, err:{}", source))] - DecodeString { source: common_types::string::Error }, -} - -define_result!(Error); - -/// Mem comparable codec -pub struct MemComparable; - -impl MemComparable { - fn ensure_flag(buf: &mut B, flag: u8) -> Result<()> { - let actual = buf.try_get_u8().context(DecodeKey)?; - ensure!( - flag == actual, - InvalidKeyFlag { - expect: flag, - actual - } - ); - Ok(()) - } -} diff --git a/src/components/codec/src/memcomparable/number.rs b/src/components/codec/src/memcomparable/number.rs deleted file mode 100644 index 7679c9e0ec..0000000000 --- a/src/components/codec/src/memcomparable/number.rs +++ /dev/null @@ -1,348 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Number format - -use bytes_ext::{SafeBuf, SafeBufMut}; -use snafu::ResultExt; - -use crate::{ - consts, - memcomparable::{DecodeValue, EncodeValue, Error, MemComparable, Result}, - DecodeTo, Encoder, -}; - -impl Encoder for MemComparable { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &i64) -> Result<()> { - buf.try_put_u64(encode_int_to_cmp_uint(*value)) - .context(EncodeValue)?; - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &i64) -> usize { - // flag + u64 - 9 - } -} - -impl DecodeTo for MemComparable { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut i64) -> Result<()> { - *value = decode_cmp_uint_to_int(buf.try_get_u64().context(DecodeValue)?); - Ok(()) - } -} - -// encode_int_to_cmp_uint make int v to comparable uint type -fn encode_int_to_cmp_uint(v: i64) -> u64 { - (v as u64) ^ consts::SIGN_MASK -} - -// decode_cmp_uint_to_int decodes the u that encoded by encode_int_to_cmp_uint -fn decode_cmp_uint_to_int(u: u64) -> i64 { - (u ^ consts::SIGN_MASK) as i64 -} - -impl Encoder for MemComparable { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &u64) -> Result<()> { - buf.try_put_u64(*value).context(EncodeValue)?; - Ok(()) - } - - fn estimate_encoded_size(&self, _value: &u64) -> usize { - // flag + u64 - 9 - } -} - -impl DecodeTo for MemComparable { - type Error = Error; - - fn decode_to(&self, buf: &mut B, value: &mut u64) -> Result<()> { - *value = buf.try_get_u64().context(DecodeValue)?; - Ok(()) - } -} - -#[cfg(test)] -mod test { - use core::cmp::Ordering; - - use super::*; - - struct TestI64 { - data: i64, - estimate_encoded_size: usize, - } - - impl TestI64 { - fn new(data: i64) -> Self { - Self { - data, - estimate_encoded_size: 9, - } - } - } - - #[test] - fn test_i64_codec() { - let data = vec![ - TestI64::new(1621324705), - TestI64::new(1621324705000), - TestI64::new(1521324705), - TestI64::new(1621324705123), - TestI64::new(i64::MIN), - TestI64::new(i64::MIN + 1), - TestI64::new(0), - TestI64::new(i64::MAX), - TestI64::new((1 << 47) - 1), - TestI64::new(-1 << 47), - TestI64::new((1 << 23) - 1), - TestI64::new(-1 << 23), - TestI64::new((1 << 33) - 1), - TestI64::new(-1 << 33), - TestI64::new((1 << 55) - 1), - TestI64::new(-1 << 55), - TestI64::new(1), - TestI64::new(-1), - ]; - let c = MemComparable; - let mut buf = vec![]; - for x in &data { - c.encode(&mut buf, &x.data).unwrap(); - assert_eq!(x.estimate_encoded_size, c.estimate_encoded_size(&x.data)); - } - - let mut buf = &buf[..]; - for x in &data { - let mut d = -1; - c.decode_to(&mut buf, &mut d).unwrap(); - assert_eq!(d, x.data); - } - } - - struct TestU64 { - data: u64, - estimate_encoded_size: usize, - } - - impl TestU64 { - fn new(data: u64) -> Self { - Self { - data, - estimate_encoded_size: 9, - } - } - } - - #[test] - fn test_u64_codec() { - let data = vec![ - TestU64::new(0), - TestU64::new(u64::from(u8::MAX)), - TestU64::new(u64::from(u16::MAX)), - TestU64::new(u64::from(u32::MAX)), - TestU64::new(u64::MAX), - TestU64::new((1 << 24) - 1), - TestU64::new((1 << 48) - 1), - TestU64::new((1 << 56) - 1), - TestU64::new(1), - TestU64::new(i8::MAX as u64), - TestU64::new(i16::MAX as u64), - TestU64::new(i32::MAX as u64), - TestU64::new(i64::MAX as u64), - ]; - let c = MemComparable; - let mut buf = vec![]; - for x in &data { - c.encode(&mut buf, &x.data).unwrap(); - assert_eq!(x.estimate_encoded_size, c.estimate_encoded_size(&x.data)); - } - - let mut buf = &buf[..]; - for x in &data { - let mut d = 0; - c.decode_to(&mut buf, &mut d).unwrap(); - assert_eq!(d, x.data); - } - } - - struct TblI64 { - arg1: i64, - arg2: i64, - ret: Ordering, - } - - #[test] - fn test_i64_order() { - let data = vec![ - TblI64 { - arg1: -1, - arg2: 1, - ret: Ordering::Less, - }, - TblI64 { - arg1: i64::MAX, - arg2: i64::MIN, - ret: Ordering::Greater, - }, - TblI64 { - arg1: i64::MAX, - arg2: i32::MAX as i64, - ret: Ordering::Greater, - }, - TblI64 { - arg1: i32::MIN as i64, - arg2: i16::MAX as i64, - ret: Ordering::Less, - }, - TblI64 { - arg1: i64::MIN, - arg2: i8::MAX as i64, - ret: Ordering::Less, - }, - TblI64 { - arg1: 0, - arg2: i8::MAX as i64, - ret: Ordering::Less, - }, - TblI64 { - arg1: i8::MIN as i64, - arg2: 0, - ret: Ordering::Less, - }, - TblI64 { - arg1: i16::MIN as i64, - arg2: i16::MAX as i64, - ret: Ordering::Less, - }, - TblI64 { - arg1: 1, - arg2: -1, - ret: Ordering::Greater, - }, - TblI64 { - arg1: 1, - arg2: 0, - ret: Ordering::Greater, - }, - TblI64 { - arg1: -1, - arg2: 0, - ret: Ordering::Less, - }, - TblI64 { - arg1: 0, - arg2: 0, - ret: Ordering::Equal, - }, - TblI64 { - arg1: i16::MAX as i64, - arg2: i16::MAX as i64, - ret: Ordering::Equal, - }, - ]; - let c = MemComparable; - for x in &data { - let mut buf1 = vec![]; - let mut buf2 = vec![]; - c.encode(&mut buf1, &x.arg1).unwrap(); - c.encode(&mut buf2, &x.arg2).unwrap(); - assert_eq!(x.ret, buf1.as_slice().cmp(buf2.as_slice())); - } - } - - struct TblU64 { - arg1: u64, - arg2: u64, - ret: Ordering, - } - - #[test] - fn test_u64_order() { - let data = vec![ - TblU64 { - arg1: 0, - arg2: 0, - ret: Ordering::Equal, - }, - TblU64 { - arg1: 1, - arg2: 0, - ret: Ordering::Greater, - }, - TblU64 { - arg1: 0, - arg2: 1, - ret: Ordering::Less, - }, - TblU64 { - arg1: i8::MAX as u64, - arg2: i16::MAX as u64, - ret: Ordering::Less, - }, - TblU64 { - arg1: u32::MAX as u64, - arg2: i32::MAX as u64, - ret: Ordering::Greater, - }, - TblU64 { - arg1: u8::MAX as u64, - arg2: i8::MAX as u64, - ret: Ordering::Greater, - }, - TblU64 { - arg1: u16::MAX as u64, - arg2: i32::MAX as u64, - ret: Ordering::Less, - }, - TblU64 { - arg1: u64::MAX, - arg2: i64::MAX as u64, - ret: Ordering::Greater, - }, - TblU64 { - arg1: i64::MAX as u64, - arg2: u32::MAX as u64, - ret: Ordering::Greater, - }, - TblU64 { - arg1: u64::MAX, - arg2: 0, - ret: Ordering::Greater, - }, - TblU64 { - arg1: 0, - arg2: u64::MAX, - ret: Ordering::Less, - }, - ]; - let c = MemComparable; - for x in &data { - let mut buf1 = vec![]; - let mut buf2 = vec![]; - c.encode(&mut buf1, &x.arg1).unwrap(); - c.encode(&mut buf2, &x.arg2).unwrap(); - assert_eq!(x.ret, buf1.as_slice().cmp(buf2.as_slice())); - } - } -} diff --git a/src/components/codec/src/row/mod.rs b/src/components/codec/src/row/mod.rs deleted file mode 100644 index 3f3483732d..0000000000 --- a/src/components/codec/src/row/mod.rs +++ /dev/null @@ -1,247 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Row encoding utils -//! -//! Notice: The encoding method is used both in wal and memtable. Be careful for -//! data compatibility - -use std::convert::TryFrom; - -use bytes_ext::{Buf, BufMut, ByteVec, BytesMut}; -use common_types::{ - datum::Datum, - row::{Row, RowGroup}, - schema::{IndexInWriterSchema, Schema}, -}; -use macros::define_result; -use snafu::{ResultExt, Snafu}; - -use crate::{ - compact::{MemCompactDecoder, MemCompactEncoder}, - DecodeTo, Decoder, Encoder, -}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to encode row datum, err:{}", source))] - EncodeRowDatum { source: crate::compact::Error }, - - #[snafu(display("Failed to decode row datum, err:{}", source))] - DecodeRowDatum { source: crate::compact::Error }, -} - -define_result!(Error); - -/// Compact row encoder for wal. -struct WalRowEncoder<'a> { - /// Schema of table - table_schema: &'a Schema, - /// Index of table column in writer - index_in_writer: &'a IndexInWriterSchema, -} - -impl<'a> Encoder for WalRowEncoder<'a> { - type Error = Error; - - fn encode(&self, buf: &mut B, value: &Row) -> Result<()> { - let encoder = MemCompactEncoder; - for index_in_table in 0..self.table_schema.num_columns() { - match self.index_in_writer.column_index_in_writer(index_in_table) { - Some(writer_index) => { - // Column in writer - encoder - .encode(buf, &value[writer_index]) - .context(EncodeRowDatum)?; - } - None => { - // Column not in writer - encoder.encode(buf, &Datum::Null).context(EncodeRowDatum)?; - } - } - } - - Ok(()) - } - - fn estimate_encoded_size(&self, value: &Row) -> usize { - let encoder = MemCompactEncoder; - let mut total_len = 0; - for index_in_table in 0..self.table_schema.num_columns() { - match self.index_in_writer.column_index_in_writer(index_in_table) { - Some(writer_index) => { - // Column in writer - total_len += encoder.estimate_encoded_size(&value[writer_index]); - } - None => { - // Column not in writer - total_len += encoder.estimate_encoded_size(&Datum::Null); - } - } - } - - total_len - } -} - -/// Compact row decoder for wal, supports projection. -#[derive(Debug)] -pub struct WalRowDecoder<'a> { - /// Schema of row to decode - schema: &'a Schema, -} - -impl<'a> WalRowDecoder<'a> { - /// Create a decoder with given `schema`, the caller should ensure the - /// schema matches the row to be decoded. - pub fn new(schema: &'a Schema) -> Self { - Self { schema } - } -} - -impl<'a> Decoder for WalRowDecoder<'a> { - type Error = Error; - - fn decode(&self, buf: &mut B) -> Result { - let num_columns = self.schema.num_columns(); - let mut datums = Vec::with_capacity(num_columns); - - for idx in 0..num_columns { - let column_schema = &self.schema.column(idx); - let datum_kind = &column_schema.data_type; - let decoder = MemCompactDecoder; - - // Decode each column - let mut datum = Datum::empty(datum_kind); - decoder.decode_to(buf, &mut datum).context(DecodeRowDatum)?; - - datums.push(datum); - } - - Ok(Row::from_datums(datums)) - } -} - -/// Encode the row group in the format that can write to wal. -/// -/// Arguments -/// - row_group: The rows to be encoded and wrote to. -/// - table_schema: The schema the row group need to be encoded into, the schema -/// of the row group need to be write compatible for the table schema. -/// - index_in_writer: The index mapping from table schema to column in the -/// schema of row group. -/// - encoded_rows: The Vec to store bytes of each encoded row. -pub fn encode_row_group_for_wal( - row_group: &RowGroup, - table_schema: &Schema, - index_in_writer: &IndexInWriterSchema, - encoded_rows: &mut Vec, -) -> Result<()> { - let row_encoder = WalRowEncoder { - table_schema, - index_in_writer, - }; - - // Use estimated size of first row to avoid compute all - let row_estimated_size = match row_group.get_row(0) { - Some(first_row) => row_encoder.estimate_encoded_size(first_row), - // The row group is empty - None => return Ok(()), - }; - - encoded_rows.reserve(row_group.num_rows()); - - // Each row is constructed in writer schema, we need to encode it in - // `table_schema` - for row in row_group { - let mut buf = Vec::with_capacity(row_estimated_size); - row_encoder.encode(&mut buf, row)?; - - encoded_rows.push(buf); - } - - Ok(()) -} - -/// Return the next prefix key -/// -/// Assume there are keys like: -/// -/// ```text -/// rowkey1 -/// rowkey1_column1 -/// rowkey1_column2 -/// rowKey2 -/// ``` -/// -/// If we seek 'rowkey1' Next, we will get 'rowkey1_column1'. -/// If we seek 'rowkey1' PrefixNext, we will get 'rowkey2'. -/// -/// Ported from -/// -/// REQUIRE: The key should be memory comparable -// TODO(yingwen): Maybe add scratch param -// TODO(yingwen): Move to another mod -pub fn key_prefix_next(key: &[u8]) -> BytesMut { - let mut buf = BytesMut::from(key); - // isize should be enough to represent the key len - let mut idx = isize::try_from(key.len() - 1).unwrap(); - while idx >= 0 { - let i = idx as usize; - buf[i] += 1; - if buf[i] != 0 { - break; - } - - idx -= 1; - } - if idx == -1 { - buf.copy_from_slice(key); - buf.put_u8(0); - } - - buf -} - -#[cfg(test)] -mod test { - use common_types::schema::IndexInWriterSchema; - - use crate::{ - row::{WalRowDecoder, WalRowEncoder}, - Decoder, Encoder, - }; - - #[test] - fn test_wal_encode_decode() { - let schema = common_types::tests::build_schema(); - let rows = common_types::tests::build_rows(); - let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); - let wal_encoder = WalRowEncoder { - table_schema: &schema, - index_in_writer: &index_in_writer, - }; - let wal_decoder = WalRowDecoder::new(&schema); - for row in rows { - let mut buf = Vec::new(); - wal_encoder.encode(&mut buf, &row).unwrap(); - let row_decoded = wal_decoder.decode(&mut buf.as_slice()).unwrap(); - assert_eq!(row_decoded, row); - } - } -} diff --git a/src/components/codec/src/varint.rs b/src/components/codec/src/varint.rs deleted file mode 100644 index 35e786bb87..0000000000 --- a/src/components/codec/src/varint.rs +++ /dev/null @@ -1,226 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Varint for codec whose test is covered by compact/number.rs -use bytes_ext::{Buf, SafeBuf, SafeBufMut}; -use macros::define_result; -use snafu::{Backtrace, ResultExt, Snafu}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to encode varint, err:{}", source))] - EncodeVarint { source: bytes_ext::Error }, - - #[snafu(display("Insufficient bytes to decode value.\nBacktrace:\n{}", backtrace))] - DecodeEmptyValue { backtrace: Backtrace }, - - #[snafu(display("Insufficient bytes to decode value, err:{}", source))] - DecodeValue { source: bytes_ext::Error }, - - #[snafu(display("Value larger than 64 bits (overflow).\nBacktrace:\n{}", backtrace))] - UvarintOverflow { backtrace: Backtrace }, -} - -define_result!(Error); - -// from https://golang.org/src/encoding/binary/varint.go?s=2506:2545#L68 -// PutVarint encodes an int64 into buf and returns the number of bytes written. -// If the buffer is too small, PutVarint will panic. -// -// ```go -// func PutVarint(buf []byte, x int64) int { -// ux := uint64(x) << 1 -// if x < 0 { -// ux = ^ux -// } -// return PutUvarint(buf, ux) -// } -// ``` -pub fn encode_varint(buf: &mut B, value: i64) -> Result { - let mut x = (value as u64) << 1; - if value < 0 { - x = !x; - } - encode_uvarint(buf, x) -} - -// from https://golang.org/src/encoding/binary/varint.go?s=1611:1652#L31 -// -// ```go -// func PutUvarint(buf []byte, x uint64) int { -// i := 0 -// for x >= 0x80 { -// buf[i] = byte(x) | 0x80 -// x >>= 7 -// i++ -// } -// buf[i] = byte(x) -// return i + 1 -// } -// ``` -pub fn encode_uvarint(buf: &mut B, mut x: u64) -> Result { - let mut num_bytes = 0; - while x >= 0x80 { - buf.try_put_u8(x as u8 | 0x80).context(EncodeVarint)?; - x >>= 7; - num_bytes += 1; - } - buf.try_put_u8(x as u8).context(EncodeVarint)?; - Ok(num_bytes + 1) -} - -// from https://golang.org/src/encoding/binary/varint.go?s=2955:2991#L84 -// Varint decodes an int64 from buf and returns that value and the -// number of bytes read (> 0). If an error occurred, the value is 0 -// and the number of bytes n is <= 0 with the following meaning: -// -// n == 0: buf too small -// n < 0: value larger than 64 bits (overflow) -// and -n is the number of bytes read -// -// ```go -// func Varint(buf []byte) (int64, int) { -// ux, n := Uvarint(buf) // ok to continue in presence of error -// x := int64(ux >> 1) -// if ux&1 != 0 { -// x = ^x -// } -// return x, n -// } -// ``` -pub fn decode_varint(buf: &mut B) -> Result { - let ux = decode_uvarint(buf)?; - let mut x = (ux >> 1) as i64; - if ux & 1 != 0 { - x = !x; - } - Ok(x) -} - -// from https://golang.org/src/encoding/binary/varint.go?s=2070:2108#L50 -// Uvarint decodes a uint64 from buf and returns that value and the -// number of bytes read (> 0). If an error occurred, the value is 0 -// and the number of bytes n is <= 0 meaning: -// -// n == 0: buf too small -// n < 0: value larger than 64 bits (overflow) -// and -n is the number of bytes read -// -// ```go -// func Uvarint(buf []byte) (uint64, int) { -// var x uint64 -// var s uint -// for i, b := range buf { -// if b < 0x80 { -// if i > 9 || i == 9 && b > 1 { -// return 0, -(i + 1) // overflow -// } -// return x | uint64(b)<(buf: &mut B) -> Result { - let mut x: u64 = 0; - let mut s: usize = 0; - let len = buf.remaining(); - for i in 0..len { - let b = buf.try_get_u8().context(DecodeValue)?; - if b < 0x80 { - if i > 9 || i == 9 && b > 1 { - return UvarintOverflow.fail(); // overflow - } - return Ok(x | u64::from(b) << s); - } - x |= u64::from(b & 0x7f) << s; - s += 7; - } - DecodeEmptyValue.fail() -} - -#[cfg(test)] -mod tests { - use bytes_ext::BytesMut; - - use super::*; - - #[test] - fn test_encode_decode_varint() { - let nums: Vec<(i64, usize)> = vec![ - (i64::MIN, 10), - (-1000000000000000, 8), - (-100000000000, 6), - (-1000000000, 5), - (-100000, 3), - (-65535, 3), - (-1000, 2), - (-125, 2), - (-32, 1), - (0, 1), - (64, 2), - (125, 2), - (1000, 2), - (65535, 3), - (10000, 3), - (1000000000, 5), - (100000000000, 6), - (10000000000000, 7), - (1000000000000000, 8), - (i64::MAX, 10), - ]; - - for (i, size) in nums { - let mut buf = BytesMut::with_capacity(8); - assert!(encode_varint(&mut buf, i).is_ok()); - assert_eq!(size, buf.len()); - let d = decode_varint(&mut buf); - assert!(d.is_ok()); - assert_eq!(i, d.unwrap()); - } - } - - #[test] - fn test_encode_decode_uvarint() { - let nums: Vec<(u64, usize)> = vec![ - (0, 1), - (64, 1), - (125, 1), - (1000, 2), - (65535, 3), - (10000, 2), - (1000000000, 5), - (100000000000, 6), - (10000000000000, 7), - (1000000000000000, 8), - (u64::MAX, 10), - ]; - - for (i, size) in nums { - let mut buf = BytesMut::with_capacity(8); - assert!(encode_uvarint(&mut buf, i).is_ok()); - assert_eq!(size, buf.len()); - let d = decode_uvarint(&mut buf); - assert!(d.is_ok()); - assert_eq!(i, d.unwrap()); - } - } -} diff --git a/src/components/future_ext/Cargo.toml b/src/components/future_ext/Cargo.toml deleted file mode 100644 index 1bc72d52a8..0000000000 --- a/src/components/future_ext/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "future_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -futures = { workspace = true } -lazy_static = { workspace = true } -prometheus = { workspace = true } -rand = { workspace = true } -runtime = { workspace = true } -tokio = { workspace = true, features = ["time"] } diff --git a/src/components/future_ext/src/cancel.rs b/src/components/future_ext/src/cancel.rs deleted file mode 100644 index ab223b0466..0000000000 --- a/src/components/future_ext/src/cancel.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! A future wrapper to ensure the wrapped future must be polled. -//! -//! This implementation is forked from: https://github.com/influxdata/influxdb_iox/blob/885767aa0a6010de592bde9992945b01389eb994/cache_system/src/cancellation_safe_future.rs -//! Here is the copyright and license disclaimer: -//! Copyright (c) 2020 InfluxData. Licensed under Apache-2.0. - -use std::{ - future::Future, - pin::Pin, - task::{Context, Poll}, -}; - -use futures::future::BoxFuture; -use lazy_static::lazy_static; -use prometheus::{register_int_counter_vec, IntCounterVec}; -use runtime::RuntimeRef; - -lazy_static! { - static ref FUTURE_CANCEL_COUNTER: IntCounterVec = register_int_counter_vec!( - "future_cancel_counter", - "Counter of future cancel", - &["token"] - ) - .unwrap(); -} - -/// Wrapper around a future that cannot be cancelled. -/// -/// When the future is dropped/cancelled, we'll spawn a tokio task to _rescue_ -/// it. -pub struct CancellationSafeFuture -where - F: Future + Send + 'static, - F::Output: Send, - T: AsRef + 'static + Send + Unpin, -{ - /// Token for metrics - token: T, - - /// Mark if the inner future finished. If not, we must spawn a helper task - /// on drop. - done: bool, - - /// Inner future. - /// - /// Wrapped in an `Option` so we can extract it during drop. Inside that - /// option however we also need a pinned box because once this wrapper - /// is polled, it will be pinned in memory -- even during drop. Now the - /// inner future does not necessarily implement `Unpin`, so we need a - /// heap allocation to pin it in memory even when we move it out of this - /// option. - inner: Option>, - - /// The runtime to execute the dropped future. - runtime: RuntimeRef, -} - -impl Drop for CancellationSafeFuture -where - F: Future + Send + 'static, - F::Output: Send, - T: AsRef + 'static + Send + Unpin, -{ - fn drop(&mut self) { - if !self.done { - FUTURE_CANCEL_COUNTER - .with_label_values(&[self.token.as_ref()]) - .inc(); - - let inner = self.inner.take().unwrap(); - let handle = self.runtime.spawn(inner); - drop(handle); - } - } -} - -impl CancellationSafeFuture -where - F: Future + Send, - F::Output: Send, - T: AsRef + 'static + Send + Unpin, -{ - /// Create new future that is protected from cancellation. - /// - /// If [`CancellationSafeFuture`] is cancelled (i.e. dropped) and there is - /// still some external receiver of the state left, than we will drive - /// the payload (`f`) to completion. Otherwise `f` will be cancelled. - pub fn new(fut: F, token: T, runtime: RuntimeRef) -> Self { - Self { - token, - done: false, - inner: Some(Box::pin(fut)), - runtime, - } - } -} - -impl Future for CancellationSafeFuture -where - F: Future + Send, - F::Output: Send, - T: AsRef + 'static + Send + Unpin, -{ - type Output = F::Output; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - assert!(!self.done, "Polling future that already returned"); - - match self.inner.as_mut().unwrap().as_mut().poll(cx) { - Poll::Ready(res) => { - self.done = true; - Poll::Ready(res) - } - Poll::Pending => Poll::Pending, - } - } -} - -#[cfg(test)] -mod tests { - use std::{ - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::Duration, - }; - - use runtime::Builder; - use tokio::sync::Barrier; - - use super::*; - - fn rt() -> RuntimeRef { - let rt = Builder::default() - .worker_threads(2) - .thread_name("test_spawn_join") - .enable_all() - .build(); - assert!(rt.is_ok()); - Arc::new(rt.unwrap()) - } - - #[test] - fn test_happy_path() { - let runtime = rt(); - let runtime_clone = runtime.clone(); - runtime.block_on(async move { - let done = Arc::new(AtomicBool::new(false)); - let done_captured = Arc::clone(&done); - - let fut = CancellationSafeFuture::new( - async move { - done_captured.store(true, Ordering::SeqCst); - }, - "test", - runtime_clone, - ); - - fut.await; - - assert!(done.load(Ordering::SeqCst)); - }) - } - - #[test] - fn test_cancel_future() { - let runtime = rt(); - let runtime_clone = runtime.clone(); - - runtime.block_on(async move { - let done = Arc::new(Barrier::new(2)); - let done_captured = Arc::clone(&done); - - let fut = CancellationSafeFuture::new( - async move { - done_captured.wait().await; - }, - "test", - runtime_clone, - ); - - drop(fut); - - tokio::time::timeout(Duration::from_secs(5), done.wait()) - .await - .unwrap(); - }); - } -} diff --git a/src/components/future_ext/src/lib.rs b/src/components/future_ext/src/lib.rs deleted file mode 100644 index ab8d71a2bf..0000000000 --- a/src/components/future_ext/src/lib.rs +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Future extensions. - -mod cancel; -mod retry; - -pub use cancel::CancellationSafeFuture; -pub use retry::{retry_async, BackoffConfig, RetryConfig}; diff --git a/src/components/future_ext/src/retry.rs b/src/components/future_ext/src/retry.rs deleted file mode 100644 index ea646b6318..0000000000 --- a/src/components/future_ext/src/retry.rs +++ /dev/null @@ -1,223 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Util function to retry future. - -use std::time::Duration; - -use futures::Future; -use rand::prelude::*; - -pub struct RetryConfig { - pub max_retries: usize, - pub backoff: BackoffConfig, -} - -impl Default for RetryConfig { - fn default() -> Self { - Self { - max_retries: 3, - backoff: Default::default(), - } - } -} - -// This backoff implementation is ported from -// https://github.com/apache/arrow-rs/blob/dfb642809e93c2c1b8343692f4e4b3080000f988/object_store/src/client/backoff.rs#L26 -pub struct BackoffConfig { - /// The initial backoff duration - pub init_backoff: Duration, - /// The maximum backoff duration - pub max_backoff: Duration, - /// The base of the exponential to use - pub base: f64, -} - -impl Default for BackoffConfig { - fn default() -> Self { - Self { - init_backoff: Duration::from_millis(100), - max_backoff: Duration::from_secs(15), - base: 2., - } - } -} - -pub struct Backoff { - init_backoff: f64, - next_backoff_secs: f64, - max_backoff_secs: f64, - base: f64, - rng: Option>, -} - -impl Backoff { - /// Create a new [`Backoff`] from the provided [`BackoffConfig`] - pub fn new(config: &BackoffConfig) -> Self { - Self::new_with_rng(config, None) - } - - /// Creates a new `Backoff` with the optional `rng` - /// - /// Used [`rand::thread_rng()`] if no rng provided - pub fn new_with_rng( - config: &BackoffConfig, - rng: Option>, - ) -> Self { - let init_backoff = config.init_backoff.as_secs_f64(); - Self { - init_backoff, - next_backoff_secs: init_backoff, - max_backoff_secs: config.max_backoff.as_secs_f64(), - base: config.base, - rng, - } - } - - /// Returns the next backoff duration to wait for - pub fn next(&mut self) -> Duration { - let range = self.init_backoff..(self.next_backoff_secs * self.base); - - let rand_backoff = match self.rng.as_mut() { - Some(rng) => rng.gen_range(range), - None => thread_rng().gen_range(range), - }; - - let next_backoff = self.max_backoff_secs.min(rand_backoff); - Duration::from_secs_f64(std::mem::replace(&mut self.next_backoff_secs, next_backoff)) - } -} - -pub async fn retry_async(f: F, config: &RetryConfig) -> Fut::Output -where - F: Fn() -> Fut, - Fut: Future>, -{ - let mut backoff = Backoff::new(&config.backoff); - for _ in 0..config.max_retries { - let result = f().await; - - if result.is_ok() { - return result; - } - tokio::time::sleep(backoff.next()).await; - } - - f().await -} - -#[cfg(test)] -mod tests { - use std::sync::atomic::{AtomicU8, Ordering}; - - use rand::rngs::mock::StepRng; - - use super::*; - - #[tokio::test] - async fn test_retry_async() { - let config = RetryConfig { - max_retries: 3, - backoff: Default::default(), - }; - - // always fails - { - let runs = AtomicU8::new(0); - let f = || { - runs.fetch_add(1, Ordering::Relaxed); - futures::future::err::(1) - }; - - let ret = retry_async(f, &config).await; - assert!(ret.is_err()); - assert_eq!(4, runs.load(Ordering::Relaxed)); - } - - // succeed directly - { - let runs = AtomicU8::new(0); - let f = || { - runs.fetch_add(1, Ordering::Relaxed); - futures::future::ok::(1) - }; - - let ret = retry_async(f, &config).await; - assert_eq!(1, ret.unwrap()); - assert_eq!(1, runs.load(Ordering::Relaxed)); - } - - // fail 2 times, then succeed - { - let runs = AtomicU8::new(0); - let f = || { - if runs.fetch_add(1, Ordering::Relaxed) < 2 { - return futures::future::err::<_, i32>(1); - } - - futures::future::ok::<_, i32>(2) - }; - - let ret = retry_async(f, &config).await; - assert_eq!(2, ret.unwrap()); - assert_eq!(3, runs.load(Ordering::Relaxed)); - } - } - - #[test] - fn test_backoff() { - let init_backoff_secs = 1.0; - let max_backoff_secs = 500.0; - let base = 3.0; - - let config = BackoffConfig { - init_backoff: Duration::from_secs_f64(init_backoff_secs), - max_backoff: Duration::from_secs_f64(max_backoff_secs), - base, - }; - - let assert_fuzzy_eq = |a: f64, b: f64| assert!((b - a).abs() < 0.0001, "{a} != {b}"); - - // Create a static rng that takes the minimum of the range - let rng = Box::new(StepRng::new(0, 0)); - let mut backoff = Backoff::new_with_rng(&config, Some(rng)); - - for _ in 0..20 { - assert_eq!(backoff.next().as_secs_f64(), init_backoff_secs); - } - - // Create a static rng that takes the maximum of the range - let rng = Box::new(StepRng::new(u64::MAX, 0)); - let mut backoff = Backoff::new_with_rng(&config, Some(rng)); - - for i in 0..20 { - let value = (base.powi(i) * init_backoff_secs).min(max_backoff_secs); - assert_fuzzy_eq(backoff.next().as_secs_f64(), value); - } - - // Create a static rng that takes the mid point of the range - let rng = Box::new(StepRng::new(u64::MAX / 2, 0)); - let mut backoff = Backoff::new_with_rng(&config, Some(rng)); - - let mut value = init_backoff_secs; - for _ in 0..20 { - assert_fuzzy_eq(backoff.next().as_secs_f64(), value); - value = - (init_backoff_secs + (value * base - init_backoff_secs) / 2.).min(max_backoff_secs); - } - } -} diff --git a/src/components/generic_error/Cargo.toml b/src/components/generic_error/Cargo.toml deleted file mode 100644 index 6eb5eb4086..0000000000 --- a/src/components/generic_error/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "generic_error" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true diff --git a/src/components/generic_error/src/lib.rs b/src/components/generic_error/src/lib.rs deleted file mode 100644 index abb75fc1b2..0000000000 --- a/src/components/generic_error/src/lib.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub type GenericError = Box; -pub type GenericResult = std::result::Result; - -pub trait BoxError { - type Item; - - fn box_err(self) -> Result; -} - -impl BoxError for Result { - type Item = T; - - #[inline(always)] - fn box_err(self) -> Result { - self.map_err(|e| Box::new(e) as _) - } -} diff --git a/src/components/hash_ext/Cargo.toml b/src/components/hash_ext/Cargo.toml deleted file mode 100644 index c200cc06fe..0000000000 --- a/src/components/hash_ext/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "hash_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -ahash = { version = "0.8.2", default-features = false, features = ["runtime-rng"] } -byteorder = "1.2" -murmur3 = "0.4.1" -seahash = "4.1.0" diff --git a/src/components/hash_ext/src/lib.rs b/src/components/hash_ext/src/lib.rs deleted file mode 100644 index 3ab698429f..0000000000 --- a/src/components/hash_ext/src/lib.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/// Which Hash to use: -/// - Memory: aHash -/// - Disk: SeaHash -/// https://github.com/CeresDB/hash-benchmark-rs -use std::{hash::BuildHasher, io::Read}; - -pub use ahash; -use byteorder::{ByteOrder, LittleEndian}; -use murmur3::murmur3_x64_128; -use seahash::SeaHasher; - -#[derive(Debug)] -pub struct SeaHasherBuilder; - -impl BuildHasher for SeaHasherBuilder { - type Hasher = SeaHasher; - - fn build_hasher(&self) -> Self::Hasher { - SeaHasher::new() - } -} - -pub fn hash64(mut source: R) -> u64 { - let mut out = [0; 16]; - murmur3_x64_128(&mut source, 0, &mut out); - // in most cases we run on little endian target - LittleEndian::read_u64(&out[0..8]) -} - -pub fn build_fixed_seed_ahasher_builder() -> ahash::RandomState { - ahash::RandomState::with_seeds(0, 0, 0, 0) -} - -#[cfg(test)] -mod test { - use std::{collections::hash_map::DefaultHasher, hash::Hasher}; - - use super::*; - - #[test] - fn test_murmur_hash() { - assert_eq!(hash64(&(vec![])[..]), 0); - - for (key, code) in [ - (b"cse_engine_hash_mod_test_bytes1", 6401327391689448380), - (b"cse_engine_hash_mod_test_bytes2", 10824100215277000151), - ] { - assert_eq!(code, hash64(key.as_slice())); - } - } - - #[test] - fn test_sea_hash() { - let mut hasher = SeaHasher::new(); - hasher.write(&[]); - assert_eq!(14492805990617963705, hasher.finish()); - - for (key, code) in [ - (b"cse_engine_hash_mod_test_bytes1", 16301057587465450460), - (b"cse_engine_hash_mod_test_bytes2", 10270658030298139083), - ] { - let mut hasher = SeaHasher::new(); - hasher.write(key); - assert_eq!(code, hasher.finish()); - } - } - - #[test] - fn test_default_hash() { - let mut hasher = DefaultHasher::new(); - hasher.write(&[]); - assert_eq!(15130871412783076140, hasher.finish()); - - for (key, code) in [ - (b"cse_engine_hash_mod_test_bytes1", 8669533354716427219), - (b"cse_engine_hash_mod_test_bytes2", 6496951441253214618), - ] { - let mut hasher = DefaultHasher::new(); - hasher.write(key); - assert_eq!(code, hasher.finish()); - } - } -} diff --git a/src/components/id_allocator/Cargo.toml b/src/components/id_allocator/Cargo.toml deleted file mode 100644 index e22a9ef65b..0000000000 --- a/src/components/id_allocator/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "id_allocator" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -# In alphabetical order -generic_error = { workspace = true } -tokio = { workspace = true } diff --git a/src/components/id_allocator/src/lib.rs b/src/components/id_allocator/src/lib.rs deleted file mode 100644 index 438d60a0ba..0000000000 --- a/src/components/id_allocator/src/lib.rs +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::future::Future; - -use generic_error::GenericResult; -use tokio::sync::RwLock; - -struct Inner { - last_id: u64, - max_id: u64, - alloc_step: u64, -} - -impl Inner { - /// New a allocator. - pub fn new(last_id: u64, max_id: u64, alloc_step: u64) -> Self { - assert!(alloc_step > 0); - Self { - last_id, - max_id, - alloc_step, - } - } - - /// Alloc id. - pub async fn alloc_id(&mut self, persist_next_max_id: F) -> GenericResult - where - F: FnOnce(u64) -> T, - T: Future>, - { - if self.last_id < self.max_id { - self.last_id += 1; - return Ok(self.last_id); - } - - // Update new max id. - let next_max_id = self.last_id + self.alloc_step; - - // persist new max id. - persist_next_max_id(next_max_id).await?; - - // Update memory. - self.max_id = next_max_id; - - self.last_id += 1; - Ok(self.last_id) - } -} - -pub struct IdAllocator { - inner: RwLock, -} - -impl IdAllocator { - /// New a id allocator. - pub fn new(last_id: u64, max_id: u64, alloc_step: u64) -> Self { - Self { - inner: RwLock::new(Inner::new(last_id, max_id, alloc_step)), - } - } - - /// Alloc id. - pub async fn alloc_id(&self, persist_next_max_id: F) -> GenericResult - where - F: FnOnce(u64) -> T, - T: Future>, - { - self.inner.write().await.alloc_id(persist_next_max_id).await - } -} - -#[cfg(test)] - -mod test { - use tokio::runtime::Runtime; - - use super::*; - - #[test] - fn test_alloc_id() { - let rt = Runtime::new().unwrap(); - let allocator = IdAllocator::new(0, 0, 100); - - rt.block_on(async move { - let persist_max_file_id = move |next_max_file_id| async move { - assert_eq!(next_max_file_id, 100); - Ok(()) - }; - - for i in 1..=100 { - let res = allocator.alloc_id(persist_max_file_id).await.unwrap(); - assert_eq!(res, i); - } - - let persist_max_file_id = move |next_max_file_id| async move { - assert_eq!(next_max_file_id, 200); - Ok(()) - }; - - for i in 101..=200 { - let res = allocator.alloc_id(persist_max_file_id).await.unwrap(); - assert_eq!(res, i); - } - }); - } -} diff --git a/src/components/logger/Cargo.toml b/src/components/logger/Cargo.toml deleted file mode 100644 index f33e2a5374..0000000000 --- a/src/components/logger/Cargo.toml +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "logger" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[dependencies.slog-global] -version = "0.1" -git = "https://github.com/tikv/slog-global.git" -rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" - -[package.edition] -workspace = true - -[dependencies] -chrono = { workspace = true } -log = "0.4" -runtime = { workspace = true } -serde = { workspace = true } -slog = { workspace = true } -slog-async = "2.6" -slog-term = "2.8" diff --git a/src/components/logger/src/lib.rs b/src/components/logger/src/lib.rs deleted file mode 100644 index 53eb7c0d3b..0000000000 --- a/src/components/logger/src/lib.rs +++ /dev/null @@ -1,614 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - fmt, - fs::{File, OpenOptions}, - io, - str::FromStr, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, - }, - time::{Duration, Instant}, -}; - -pub use log::{ - debug as log_debug, error as log_error, info as log_info, max_level, trace as log_trace, - warn as log_warn, SetLoggerError, -}; -use runtime::Priority; -use serde::{Deserialize, Serialize}; -pub use slog::Level; -use slog::{slog_o, Drain, Key, OwnedKVList, Record, KV}; -use slog_async::{Async, OverflowStrategy}; -use slog_term::{Decorator, PlainDecorator, RecordDecorator, TermDecorator}; - -const ASYNC_CHAN_SIZE: usize = 102400; -const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.3f"; -pub const SLOW_QUERY_TAG: &str = "slow"; -pub const DEFAULT_TAG: &str = ""; - -// Thanks to tikv -// https://github.com/tikv/tikv/blob/eaeb39a2c85684de08c48cf4b9426b3faf4defe6/components/tikv_util/src/logger/mod.rs - -pub fn convert_slog_level_to_log_level(lv: Level) -> log::Level { - match lv { - Level::Critical | Level::Error => log::Level::Error, - Level::Warning => log::Level::Warn, - Level::Debug => log::Level::Debug, - Level::Trace => log::Level::Trace, - Level::Info => log::Level::Info, - } -} - -pub fn convert_log_level_to_slog_level(lv: log::Level) -> Level { - match lv { - log::Level::Error => Level::Error, - log::Level::Warn => Level::Warning, - log::Level::Debug => Level::Debug, - log::Level::Trace => Level::Trace, - log::Level::Info => Level::Info, - } -} - -// The `to_string()` function of `slog::Level` produces values like `erro` and -// `trce` instead of the full words. This produces the full word. -fn get_string_by_level(lv: Level) -> &'static str { - match lv { - Level::Critical => "critical", - Level::Error => "error", - Level::Warning => "warn", - Level::Debug => "debug", - Level::Trace => "trace", - Level::Info => "info", - } -} - -pub fn term_drainer() -> LogFormat { - let decorator = TermDecorator::new().stdout().build(); - LogFormat::new(decorator) -} - -pub fn file_drainer(path: &Option) -> Option>> { - match path { - Some(path) => { - let file = OpenOptions::new() - .create(true) - .append(true) - .open(path) - .unwrap(); - let decorator = PlainDecorator::new(file); - Some(LogFormat::new(decorator)) - } - None => None, - } -} - -/// Dispatcher for logs -pub struct LogDispatcher { - normal: N, - slow: Option, -} - -impl LogDispatcher { - pub fn new(normal: N, slow: Option) -> Self { - Self { normal, slow } - } -} - -impl Drain for LogDispatcher -where - N: Drain, - S: Drain, -{ - type Err = io::Error; - type Ok = (); - - fn log(&self, record: &Record, values: &OwnedKVList) -> Result { - let tag = record.tag(); - if tag == DEFAULT_TAG { - self.normal.log(record, values) - } else if self.slow.is_some() && tag == SLOW_QUERY_TAG { - self.slow.as_ref().unwrap().log(record, values) - } else { - // For crates outside horaedb - self.normal.log(record, values) - } - } -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(default)] -/// The config for logger. -pub struct Config { - pub level: String, - pub enable_async: bool, - pub async_channel_len: i32, - pub slow_query_path: Option, - pub failed_query_path: Option, -} - -impl Default for Config { - fn default() -> Self { - Self { - level: "info".to_string(), - enable_async: true, - async_channel_len: 102400, - slow_query_path: None, - failed_query_path: None, - } - } -} - -/// Initialize the logger, configured by the [Config]. -pub fn init_log(config: &Config) -> Result { - let level = match Level::from_str(&config.level) { - Ok(v) => v, - Err(e) => { - panic!( - "Parse log level failed, level: {}, err: {:?}", - &config.level, e - ); - } - }; - - let normal_drain = term_drainer(); - let slow_drain = file_drainer(&config.slow_query_path); - let drain = LogDispatcher::new(normal_drain, slow_drain); - - // Use async and init stdlog - init_log_from_drain( - drain, - level, - config.enable_async, - config.async_channel_len, - true, - ) -} - -pub fn init_log_from_drain( - drain: D, - level: Level, - use_async: bool, - async_log_channel_len: i32, - init_stdlog: bool, -) -> Result -where - D: Drain + Send + 'static, - ::Err: std::fmt::Display, -{ - let runtime_level = RuntimeLevel::new(level); - // TODO(yingwen): Consider print the error instead of just ignoring it? - let root_logger = if use_async { - let drain = if async_log_channel_len <= 0 { - Async::new(drain.ignore_res()) - .chan_size(ASYNC_CHAN_SIZE) - .overflow_strategy(OverflowStrategy::Block) - .build() - } else { - Async::new(drain.ignore_res()) - .chan_size(async_log_channel_len as usize) - .build() - }; - let drain = RuntimeLevelFilter::new(drain, runtime_level.clone()); - slog::Logger::root(drain.ignore_res(), slog_o!()) - } else { - let drain = RuntimeLevelFilter::new(Mutex::new(drain), runtime_level.clone()); - slog::Logger::root(drain.ignore_res(), slog_o!()) - }; - - slog_global::set_global(root_logger); - if init_stdlog { - slog_global::redirect_std_log(Some(level))?; - } - - Ok(runtime_level) -} - -// e.g. -// ```text -// 2020-01-20 13:00:14.998 INFO [src/engine/rocksdb/rocks_kv.rs:394] RocksKV::open_with_op start, name:autogen -// ``` -pub struct LogFormat -where - D: Decorator, -{ - decorator: D, -} - -impl LogFormat -where - D: Decorator, -{ - fn new(decorator: D) -> Self { - Self { decorator } - } -} - -impl Drain for LogFormat -where - D: Decorator, -{ - type Err = io::Error; - type Ok = (); - - fn log(&self, record: &Record, values: &OwnedKVList) -> Result { - self.decorator.with_record(record, values, |decorator| { - write_log_header(decorator, record)?; - write_log_msg(decorator, record)?; - write_log_fields(decorator, record, values)?; - - decorator.start_whitespace()?; - writeln!(decorator)?; - - decorator.flush()?; - - Ok(()) - }) - } -} - -#[derive(Clone)] -pub struct RuntimeLevel { - level: Arc, - default_level: Level, -} - -impl RuntimeLevel { - fn new(default_level: Level) -> Self { - Self { - level: Arc::new(AtomicUsize::new(default_level.as_usize())), - default_level, - } - } - - #[inline] - pub fn current_level(&self) -> Level { - Level::from_usize(self.level.load(Ordering::Relaxed)).unwrap_or(self.default_level) - } - - pub fn set_level(&self, level: Level) { - self.level.store(level.as_usize(), Ordering::Relaxed); - // Log level of std log is not changed unless we call `log::set_max_level` - log::set_max_level(convert_slog_level_to_log_level(level).to_level_filter()); - - // We should not print things about logger use the logger... - println!( - "RuntimeLevel::set_level log level changed to {}", - get_string_by_level(level) - ); - } - - #[inline] - pub fn reset(&self) { - self.set_level(self.default_level); - } - - #[inline] - pub fn default_level(&self) -> Level { - self.default_level - } - - #[inline] - pub fn current_level_str(&self) -> &str { - get_string_by_level(self.current_level()) - } - - pub fn set_level_by_str(&self, level_str: &str) -> Result<(), String> { - Level::from_str(level_str) - .map_err(|_| format!("Invalid level {level_str}")) - .and_then(|level| match level { - Level::Trace | Level::Debug | Level::Info => Ok(level), - _ => Err("Only allow to change log level to ".to_owned()), - }) - .map(|level| self.set_level(level)) - } -} - -struct RuntimeLevelFilter { - drain: D, - runtime_level: RuntimeLevel, -} - -impl RuntimeLevelFilter { - fn new(drain: D, runtime_level: RuntimeLevel) -> Self { - Self { - drain, - runtime_level, - } - } -} - -impl Drain for RuntimeLevelFilter -where - D: Drain, -{ - type Err = D::Err; - type Ok = Option; - - fn log(&self, record: &Record, values: &OwnedKVList) -> Result { - let current_level = self.runtime_level.current_level(); - - if record.level().is_at_least(current_level) { - Ok(Some(self.drain.log(record, values)?)) - } else { - Ok(None) - } - } -} - -fn write_log_header(decorator: &mut dyn RecordDecorator, record: &Record<'_>) -> io::Result<()> { - decorator.start_timestamp()?; - write!( - decorator, - "{}", - chrono::Local::now().format(TIMESTAMP_FORMAT) - )?; - - decorator.start_whitespace()?; - write!(decorator, " ")?; - - decorator.start_level()?; - write!(decorator, "{}", record.level().as_short_str())?; - - decorator.start_whitespace()?; - write!(decorator, " ")?; - - // Writes source file info. - decorator.start_msg()?; // There is no `start_file()` or `start_line()`. - write!(decorator, "[{}:{}]", record.file(), record.line())?; - - Ok(()) -} - -fn write_log_msg(decorator: &mut dyn RecordDecorator, record: &Record<'_>) -> io::Result<()> { - decorator.start_whitespace()?; - write!(decorator, " ")?; - - decorator.start_msg()?; - write!(decorator, "{}", record.msg())?; - - Ok(()) -} - -fn write_log_fields( - decorator: &mut dyn RecordDecorator, - record: &Record<'_>, - values: &OwnedKVList, -) -> io::Result<()> { - let mut serializer = Serializer::new(decorator); - - record.kv().serialize(record, &mut serializer)?; - - values.serialize(record, &mut serializer)?; - - serializer.finish()?; - - Ok(()) -} - -struct Serializer<'a> { - decorator: &'a mut dyn RecordDecorator, -} - -impl<'a> Serializer<'a> { - fn new(decorator: &'a mut dyn RecordDecorator) -> Self { - Serializer { decorator } - } - - fn write_whitespace(&mut self) -> io::Result<()> { - self.decorator.start_whitespace()?; - write!(self.decorator, " ")?; - Ok(()) - } - - fn finish(self) -> io::Result<()> { - Ok(()) - } -} - -impl<'a> Drop for Serializer<'a> { - fn drop(&mut self) {} -} - -impl<'a> slog::Serializer for Serializer<'a> { - fn emit_none(&mut self, key: Key) -> slog::Result { - self.emit_arguments(key, &format_args!("None")) - } - - fn emit_arguments(&mut self, key: Key, val: &fmt::Arguments<'_>) -> slog::Result { - self.write_whitespace()?; - - // Write key - write!(self.decorator, "[")?; - self.decorator.start_key()?; - write!(self.decorator, "{key}")?; - - // Write separator - self.decorator.start_separator()?; - write!(self.decorator, ":")?; - - // Write value - self.decorator.start_value()?; - write!(self.decorator, "{val}")?; - self.decorator.reset()?; - write!(self.decorator, "]")?; - - Ok(()) - } -} - -pub fn init_test_logger() { - // level - let level = Level::Info; - - // drain - let term_drain = term_drainer(); - let drain = LogDispatcher::new(term_drain, Option::>>::None); - - // Use async and init stdlog - let _ = init_log_from_drain(drain, level, false, 12400, true); -} - -/// Timer for collecting slow query -#[derive(Debug)] -pub struct SlowTimer<'a> { - request_id: &'a str, - sql: &'a str, - slow_threshold: Duration, - start_time: Instant, - priority: Option, -} - -impl<'a> Drop for SlowTimer<'a> { - fn drop(&mut self) { - let cost = self.elapsed(); - if cost > self.slow_threshold { - slow_query!( - "Normal query elapsed:{:?}, id:{}, priority:{:?}, query:{}", - cost, - self.request_id, - self.priority, - self.sql, - ); - } - } -} - -impl<'a> SlowTimer<'a> { - pub fn new(request_id: &'a str, sql: &'a str, threshold: Duration) -> SlowTimer<'a> { - SlowTimer { - request_id, - sql, - slow_threshold: threshold, - start_time: Instant::now(), - priority: None, - } - } - - pub fn elapsed(&self) -> Duration { - self.start_time.elapsed() - } - - pub fn start_time(&self) -> Instant { - self.start_time - } - - pub fn priority(&mut self, priority: Priority) { - self.priority = Some(priority); - } -} - -#[macro_export(local_inner_macros)] -macro_rules! error { - (target: $target:expr, $($arg:tt)+) => {{ - log_error!(target: $target, $($arg)+); - }}; - - ($($arg:tt)+) => {{ - log_error!(target: logger::DEFAULT_TAG, $($arg)+); - }} -} - -#[macro_export(local_inner_macros)] -macro_rules! warn { - (target: $target:expr, $($arg:tt)+) => {{ - log_warn!(target: $target, $($arg)+); - }}; - - ($($arg:tt)+) => {{ - log_warn!(target: logger::DEFAULT_TAG, $($arg)+); - }} -} - -#[macro_export(local_inner_macros)] -macro_rules! info { - (target: $target:expr, $($arg:tt)+) => {{ - log_info!(target: $target, $($arg)+); - }}; - - ($($arg:tt)+) => {{ - log_info!(target: logger::DEFAULT_TAG, $($arg)+); - }} -} - -#[macro_export(local_inner_macros)] -macro_rules! debug { - (target: $target:expr, $($arg:tt)+) => {{ - log_debug!(target: $target, $($arg)+); - }}; - - ($($arg:tt)+) => {{ - log_debug!(target: logger::DEFAULT_TAG, $($arg)+); - }} -} - -#[macro_export(local_inner_macros)] -macro_rules! trace { - (target: $target:expr, $($arg:tt)+) => {{ - log_trace!(target: $target, $($arg)+); - }}; - - ($($arg:tt)+) => {{ - log_trace!(target: logger::DEFAULT_TAG, $($arg)+); - }} -} - -#[macro_export(local_inner_macros)] -macro_rules! slow_query { - ($($args:tt)*) => {{ - info!(target: $crate::SLOW_QUERY_TAG, $($args)*); - }} -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_runtime_level() { - let runtime_level = RuntimeLevel::new(Level::Info); - - assert_eq!(runtime_level.current_level(), Level::Info); - assert_eq!(runtime_level.default_level(), Level::Info); - - runtime_level.set_level(Level::Debug); - assert_eq!(runtime_level.current_level(), Level::Debug); - assert_eq!(runtime_level.default_level(), Level::Info); - - runtime_level.reset(); - assert_eq!(runtime_level.current_level(), Level::Info); - assert_eq!(runtime_level.current_level_str(), "info"); - - runtime_level.set_level_by_str("trace").unwrap(); - assert_eq!(runtime_level.current_level(), Level::Trace); - runtime_level.set_level_by_str("debug").unwrap(); - assert_eq!(runtime_level.current_level(), Level::Debug); - runtime_level.set_level_by_str("info").unwrap(); - assert_eq!(runtime_level.current_level(), Level::Info); - - assert!(runtime_level.set_level_by_str("warn").is_err()); - assert_eq!(runtime_level.current_level(), Level::Info); - assert!(runtime_level.set_level_by_str("warning").is_err()); - assert!(runtime_level.set_level_by_str("critical").is_err()); - assert!(runtime_level.set_level_by_str("error").is_err()); - assert!(runtime_level.set_level_by_str("no such level").is_err()); - - assert_eq!(runtime_level.current_level(), Level::Info); - } -} diff --git a/src/components/macros/Cargo.toml b/src/components/macros/Cargo.toml deleted file mode 100644 index 9645bcb0cc..0000000000 --- a/src/components/macros/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "macros" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true diff --git a/src/components/macros/src/lib.rs b/src/components/macros/src/lib.rs deleted file mode 100644 index c627f15199..0000000000 --- a/src/components/macros/src/lib.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Contains all needed macros - -/// Define result for given Error type -#[macro_export] -macro_rules! define_result { - ($t:ty) => { - pub type Result = std::result::Result; - }; -} - -#[macro_export] -macro_rules! hash_map( - { $($key:expr => $value:expr),+ } => { - { - let mut m = ::std::collections::HashMap::new(); - $( - m.insert($key, $value); - )+ - m - } - }; -); - -/// Util for working with anyhow + thiserror -/// Works like anyhow's [ensure](https://docs.rs/anyhow/latest/anyhow/macro.ensure.html) -/// But return `Return` -#[macro_export] -macro_rules! ensure { - ($cond:expr, $msg:literal) => { - if !$cond { - return Err(anyhow::anyhow!($msg).into()); - } - }; - ($cond:expr, $err:expr) => { - if !$cond { - return Err($err.into()); - } - }; - ($cond:expr, $fmt:expr, $($arg:tt)*) => { - if !$cond { - return Err(anyhow::anyhow!($fmt, $($arg)*).into()); - } - }; -} - -#[cfg(test)] -mod tests { - #[test] - fn test_define_result() { - define_result!(i32); - - fn return_i32_error() -> Result<()> { - Err(18) - } - - assert_eq!(Err(18), return_i32_error()); - } - - #[test] - fn test_hash_map() { - let m = hash_map! { 1 => "hello", 2 => "world" }; - - assert_eq!(2, m.len()); - assert_eq!("hello", *m.get(&1).unwrap()); - assert_eq!("world", *m.get(&2).unwrap()); - } -} diff --git a/src/components/message_queue/Cargo.toml b/src/components/message_queue/Cargo.toml deleted file mode 100644 index 759936c214..0000000000 --- a/src/components/message_queue/Cargo.toml +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "message_queue" - -[package.license] -workspace = true - -[package.edition] -workspace = true - -[package.version] -workspace = true - -[dependencies.rskafka] -git = "https://github.com/Rachelint/rskafka.git" -rev = "f0fd8e278d8164cb0cfca5a80476361fc308ecc3" -default-features = false -features = ["compression-gzip", "compression-lz4", "compression-snappy"] - -[dependencies] -async-trait = { workspace = true } -chrono = { workspace = true } -futures = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -serde = { workspace = true } -snafu = { workspace = true } -time_ext = { workspace = true } -tokio = { workspace = true } - -[dev-dependencies] -uuid = { version = "1.3", features = ["v4"] } diff --git a/src/components/message_queue/src/kafka/config.rs b/src/components/message_queue/src/kafka/config.rs deleted file mode 100644 index 55824c9299..0000000000 --- a/src/components/message_queue/src/kafka/config.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Kafka implementation's config - -use serde::{Deserialize, Serialize}; -use time_ext::ReadableDuration; - -/// Generic client config that is used for consumers, producers as well as admin -/// operations (like "create topic"). -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -#[serde(default)] -pub struct Config { - pub client: ClientConfig, - pub topic_management: TopicManagementConfig, - pub consumer: ConsumerConfig, - pub retry_interval_factor: f64, - pub init_retry_interval: ReadableDuration, - pub max_retry_interval: ReadableDuration, - pub max_retry: usize, - // TODO: may need some config options for producer, - // but it seems nothing needed now. -} - -impl Default for Config { - fn default() -> Self { - Self { - client: Default::default(), - topic_management: Default::default(), - consumer: Default::default(), - retry_interval_factor: 2.0, - init_retry_interval: ReadableDuration::secs(1), - max_retry_interval: ReadableDuration::secs(10), - max_retry: 10, - } - } -} - -#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[serde(default)] -pub struct ClientConfig { - /// The endpoint of boost broker, must be set and will panic if found it - /// None. - pub boost_brokers: Option>, - - /// Maximum message size in bytes. - /// - /// Defaults to `None` (rskafka default). - pub max_message_size: Option, - - /// Optional SOCKS5 proxy to use for connecting to the brokers. - /// - /// Defaults to `None`. - pub socks5_proxy: Option, -} - -/// Config for topic creation. -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[serde(default)] -pub struct TopicManagementConfig { - /// Replication factor. - /// - /// Extracted from `replication_factor` option. Defaults to `1`. - pub create_replication_factor: i16, - - /// The maximum amount of time to wait while creating topic. - /// - /// Defaults to `5_000`. - pub create_max_wait_ms: i32, - - /// The maximum amount of time to wait while deleting records in topic. - /// - /// Defaults to `5_000`. - pub delete_max_wait_ms: i32, -} - -impl Default for TopicManagementConfig { - fn default() -> Self { - Self { - create_replication_factor: 1, - create_max_wait_ms: 5000, - delete_max_wait_ms: 5000, - } - } -} - -/// Config for consumers. -#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] -#[serde(default)] -pub struct ConsumerConfig { - /// The maximum amount of time to wait for data before returning. - /// - /// Defaults to `None` (rskafka default). - pub max_wait_ms: Option, - - /// The maximum amount of data for the consumer to fetch in a single batch. - /// - /// Defaults to `None` (rskafka default). - pub min_batch_size: Option, - - /// Will wait for at least `min_batch_size` bytes of data. - /// - /// Defaults to `None` (rskafka default). - pub max_batch_size: Option, -} diff --git a/src/components/message_queue/src/kafka/kafka_impl.rs b/src/components/message_queue/src/kafka/kafka_impl.rs deleted file mode 100644 index 61baded463..0000000000 --- a/src/components/message_queue/src/kafka/kafka_impl.rs +++ /dev/null @@ -1,467 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Kafka implementation's detail - -use std::{ - collections::HashMap, - fmt::{Debug, Display}, - sync::Arc, -}; - -use async_trait::async_trait; -use futures::StreamExt; -use logger::info; -use macros::define_result; -use rskafka::{ - client::{ - consumer::{StartOffset as KafkaStartOffset, StreamConsumer, StreamConsumerBuilder}, - controller::ControllerClient, - error::{Error as RskafkaError, ProtocolError}, - partition::{Compression, OffsetAt, PartitionClient, UnknownTopicHandling}, - Client, ClientBuilder, - }, - record::{Record, RecordAndOffset}, - BackoffConfig, -}; -use snafu::{Backtrace, ResultExt, Snafu}; -use tokio::sync::RwLock; - -use crate::{ - kafka::config::{Config, ConsumerConfig}, - ConsumeIterator, Message, MessageAndOffset, MessageQueue, Offset, OffsetType, StartOffset, -}; - -/// The topic (with just one partition) client for Kafka -// -/// `Arc` is needed to ensure its lifetime because in future's gc process, -/// it may has removed from pool but be still in use. -type TopicClientRef = Arc; -const PARTITION_NUM: i32 = 1; -const DEFAULT_PARTITION: i32 = 0; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to init kafka client, err:{}", source))] - Init { source: RskafkaError }, - - #[snafu(display("Failed to list topics in kafka, err:{}", source))] - ListTopics { source: RskafkaError }, - - #[snafu(display("Failed to create topic in kafka:{}, err:{}", topic_name, source))] - CreateTopic { - topic_name: String, - source: RskafkaError, - }, - - #[snafu(display( - "Failed to fetch offset(type:{}) from kafka topic:{}, err:{}", - offset_type, - topic_name, - source - ))] - FetchOffset { - topic_name: String, - source: RskafkaError, - offset_type: OffsetType, - }, - - #[snafu(display("Failed to produce to kafka topic:{}, err:{}", topic_name, source))] - Produce { - topic_name: String, - source: RskafkaError, - }, - - #[snafu(display( - "Failed to consume in topic:{} when:{}, source:{}", - topic_name, - when, - source - ))] - Consume { - topic_name: String, - source: RskafkaError, - when: ConsumeWhen, - }, - - #[snafu(display( - "Failed to produce to kafka topic:{}, offset:{}, err:{}", - topic_name, - offset, - source - ))] - DeleteUpTo { - topic_name: String, - offset: i64, - source: RskafkaError, - }, - - #[snafu(display("Unknown error occurred, msg:[{}], backtrace:{}", msg, backtrace))] - Unknown { msg: String, backtrace: Backtrace }, -} - -define_result!(Error); - -#[derive(Debug)] -pub enum ConsumeWhen { - Start, - PollStream, -} - -impl Display for ConsumeWhen { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ConsumeWhen::Start => f.write_str("start"), - ConsumeWhen::PollStream => f.write_str("poll_stream"), - } - } -} - -#[derive(Clone)] -pub struct KafkaImpl(Arc); - -impl KafkaImpl { - pub async fn new(config: Config) -> Result { - let inner = KafkaImplInner::new(config).await?; - Ok(Self(Arc::new(inner))) - } -} - -struct KafkaImplInner { - config: Config, - client: Client, - controller_client: ControllerClient, - // TODO: maybe gc is needed for `partition_client_pool`. - topic_client_pool: RwLock>, -} - -impl KafkaImplInner { - async fn new(config: Config) -> Result { - info!("Kafka init, config:{:?}", config); - - if config.client.boost_brokers.is_none() { - panic!("The boost broker must be set"); - } - - let backoff_config = BackoffConfig { - init_backoff: config.init_retry_interval.0, - max_backoff: config.max_retry_interval.0, - base: config.retry_interval_factor, - max_retry: config.max_retry, - }; - let mut client_builder = ClientBuilder::new(config.client.boost_brokers.clone().unwrap()) - .backoff_config(backoff_config); - if let Some(max_message_size) = config.client.max_message_size { - client_builder = client_builder.max_message_size(max_message_size); - } - - let client = client_builder.build().await.context(Init)?; - - let controller_client = client.controller_client().context(Init)?; - - Ok(Self { - config, - client, - controller_client, - topic_client_pool: RwLock::new(HashMap::new()), - }) - } - - async fn get_or_create_topic_client( - &self, - topic_name: &str, - ) -> std::result::Result { - { - let topic_client_pool = self.topic_client_pool.read().await; - // If found, just return it - if let Some(client) = topic_client_pool.get(topic_name) { - return Ok(client.clone()); - } - } - - // Otherwise, we should make a double-check first, - // and if still not found(other thread may has inserted it), - // we should create it. - let mut topic_client_pool = self.topic_client_pool.write().await; - if let Some(client) = topic_client_pool.get(topic_name) { - Ok(client.clone()) - } else { - let client = Arc::new( - self.client - .partition_client(topic_name, DEFAULT_PARTITION, UnknownTopicHandling::Retry) - .await?, - ); - topic_client_pool.insert(topic_name.to_string(), client.clone()); - Ok(client) - } - } -} - -#[async_trait] -impl MessageQueue for KafkaImpl { - type ConsumeIterator = KafkaConsumeIterator; - type Error = Error; - - async fn create_topic_if_not_exist(&self, topic_name: &str) -> Result<()> { - // Check in partition_client_pool first, maybe has exist. - { - let topic_client_pool = self.0.topic_client_pool.read().await; - - if topic_client_pool.contains_key(topic_name) { - info!( - "Topic:{} has exist in kafka and connection to the topic is still alive", - topic_name - ); - return Ok(()); - } - } - - // Create topic in Kafka. - let topic_management_config = &self.0.config.topic_management; - info!("Try to create topic, name:{}.", topic_name); - let result = self - .0 - .controller_client - .create_topic( - topic_name, - PARTITION_NUM, - topic_management_config.create_replication_factor, - topic_management_config.create_max_wait_ms, - ) - .await; - - info!( - "Create topic finish, name:{}, result:{:?}", - topic_name, result - ); - match result { - // Race condition between check and creation action, that's OK. - Ok(_) - | Err(RskafkaError::ServerError { - protocol_error: ProtocolError::TopicAlreadyExists, - .. - }) => Ok(()), - - Err(e) => Err(e).context(CreateTopic { - topic_name: topic_name.to_string(), - }), - } - } - - async fn produce(&self, topic_name: &str, messages: Vec) -> Result> { - let topic_client = self - .0 - .get_or_create_topic_client(topic_name) - .await - .context(Produce { - topic_name: topic_name.to_string(), - })?; - - let records: Vec = messages.into_iter().map(|m| m.into()).collect(); - Ok(topic_client - .produce(records, Compression::default()) - .await - .context(Produce { - topic_name: topic_name.to_string(), - })?) - } - - async fn fetch_offset(&self, topic_name: &str, offset_type: OffsetType) -> Result { - let topic_client = self - .0 - .get_or_create_topic_client(topic_name) - .await - .context(FetchOffset { - topic_name: topic_name.to_string(), - offset_type, - })?; - - topic_client - .get_offset(offset_type.into()) - .await - .context(FetchOffset { - topic_name: topic_name.to_string(), - offset_type, - }) - } - - // FIXME: consume a empty topic may be hanged forever... - async fn consume( - &self, - topic_name: &str, - start_offset: StartOffset, - ) -> Result { - info!("Consume data in kafka topic:{}", topic_name); - - let topic_client = self - .0 - .get_or_create_topic_client(topic_name) - .await - .context(Consume { - topic_name: topic_name.to_string(), - when: ConsumeWhen::Start, - })?; - Ok(KafkaConsumeIterator::new( - topic_name, - self.0.config.consumer.clone(), - topic_client, - start_offset, - )) - } - - async fn delete_to(&self, topic_name: &str, offset: Offset) -> Result<()> { - let topic_client = self - .0 - .get_or_create_topic_client(topic_name) - .await - .context(DeleteUpTo { - topic_name: topic_name.to_string(), - offset, - })?; - - topic_client - .delete_records(offset, self.0.config.topic_management.delete_max_wait_ms) - .await - .context(DeleteUpTo { - topic_name: topic_name.to_string(), - offset, - })?; - - Ok(()) - } -} - -#[derive(Debug)] -pub struct KafkaConsumeIterator { - topic_name: String, - stream_consumer: StreamConsumer, -} - -impl KafkaConsumeIterator { - pub fn new( - topic_name: &str, - config: ConsumerConfig, - topic_client: TopicClientRef, - start_offset: StartOffset, - ) -> Self { - info!("Init consumer of topic:{}, config:{:?}", topic_name, config); - - // If not empty, make consuming stream. - let stream_consumer = { - let mut stream_builder = StreamConsumerBuilder::new(topic_client, start_offset.into()); - - if let Some(max_wait_ms) = config.max_wait_ms { - stream_builder = stream_builder.with_max_wait_ms(max_wait_ms) - } - - if let Some(min_batch_size) = config.min_batch_size { - stream_builder = stream_builder.with_min_batch_size(min_batch_size) - } - - if let Some(max_batch_size) = config.max_batch_size { - stream_builder = stream_builder.with_min_batch_size(max_batch_size) - } - - stream_builder.build() - }; - - KafkaConsumeIterator { - topic_name: topic_name.to_string(), - stream_consumer, - } - } -} - -#[async_trait] -impl ConsumeIterator for KafkaConsumeIterator { - type Error = Error; - - async fn next_message(&mut self) -> Result<(MessageAndOffset, Offset)> { - // Return message and offset from buffer. - match self.stream_consumer.next().await { - Some(Ok((record, high_watermark))) => Ok((record.into(), high_watermark)), - - Some(Err(e)) => Err(e).context(Consume { - topic_name: self.topic_name.clone(), - when: ConsumeWhen::PollStream, - }), - - None => Unknown { - msg: format!( - "consuming stream return None due to unknown cause, topic:{}", - self.topic_name - ), - } - .fail(), - } - } -} - -impl From for Record { - fn from(message: Message) -> Self { - Self { - key: message.key, - value: message.value, - headers: message.headers, - timestamp: message.timestamp, - } - } -} - -impl From for MessageAndOffset { - fn from(record_and_offset: RecordAndOffset) -> Self { - let message = Message { - key: record_and_offset.record.key, - value: record_and_offset.record.value, - headers: record_and_offset.record.headers, - timestamp: record_and_offset.record.timestamp, - }; - - Self { - message, - offset: record_and_offset.offset, - } - } -} - -impl From for KafkaStartOffset { - fn from(start_offset: StartOffset) -> Self { - match start_offset { - StartOffset::Earliest => KafkaStartOffset::Earliest, - StartOffset::Latest => KafkaStartOffset::Latest, - StartOffset::At(offset) => KafkaStartOffset::At(offset), - } - } -} - -impl From for OffsetAt { - fn from(offset_type: OffsetType) -> Self { - match offset_type { - OffsetType::EarliestOffset => OffsetAt::Earliest, - OffsetType::HighWaterMark => OffsetAt::Latest, - } - } -} - -impl Debug for KafkaImpl { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("KafkaImpl") - .field("config", &self.0.config) - .field("client", &"rskafka".to_string()) - .finish() - } -} diff --git a/src/components/message_queue/src/kafka/mod.rs b/src/components/message_queue/src/kafka/mod.rs deleted file mode 100644 index c92a7594c5..0000000000 --- a/src/components/message_queue/src/kafka/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Message queue component's Kafka implementation - -pub mod config; -pub mod kafka_impl; diff --git a/src/components/message_queue/src/lib.rs b/src/components/message_queue/src/lib.rs deleted file mode 100644 index cb496d814e..0000000000 --- a/src/components/message_queue/src/lib.rs +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Message queue component - -pub mod kafka; -#[cfg(any(test, feature = "test"))] -pub mod tests; - -use std::{ - collections::BTreeMap, - fmt::{Debug, Display}, - result::Result, -}; - -use async_trait::async_trait; -use chrono::{DateTime, Utc}; - -pub type Offset = i64; - -/// Message queue interface supporting the methods needed in wal module. -#[async_trait] -pub trait MessageQueue: Clone + Debug + Send + Sync + 'static { - type Error: std::error::Error + Send + Sync + 'static; - type ConsumeIterator: ConsumeIterator + Send; - - async fn create_topic_if_not_exist(&self, topic_name: &str) -> Result<(), Self::Error>; - - async fn fetch_offset( - &self, - topic_name: &str, - offset_type: OffsetType, - ) -> Result; - - async fn produce( - &self, - topic_name: &str, - messages: Vec, - ) -> Result, Self::Error>; - - async fn consume( - &self, - topic_name: &str, - start_offset: StartOffset, - ) -> Result; - - async fn delete_to(&self, topic_name: &str, offset: Offset) -> Result<(), Self::Error>; - // TODO: should design a stream consume method for slave node to fetch wals. -} - -/// High-level record. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Message { - pub key: Option>, - pub value: Option>, - pub headers: BTreeMap>, - pub timestamp: DateTime, -} - -/// Record that has offset information attached. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct MessageAndOffset { - pub message: Message, - pub offset: Offset, -} - -#[async_trait] -pub trait ConsumeIterator: Debug + Send + 'static { - type Error: std::error::Error + Send + Sync + 'static; - - async fn next_message(&mut self) -> Result<(MessageAndOffset, Offset), Self::Error>; -} - -/// At which position shall the stream start. -#[derive(Debug, Clone, Copy)] -pub enum StartOffset { - /// At the earliest known offset. - /// - /// This might be larger than 0 if some records were already deleted due to - /// a retention policy or delete operations. - Earliest, - - /// At the latest known offset. - /// - /// This is helpful if you only want to process new data. - Latest, - - /// At a specific offset. - /// - /// Note that specifying an offset that is unknown will result in the error. - At(Offset), -} - -#[derive(Debug, Clone, Copy)] -pub enum OffsetType { - EarliestOffset, - HighWaterMark, -} - -impl Display for OffsetType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - OffsetType::EarliestOffset => f.write_str("earliest_offset"), - OffsetType::HighWaterMark => f.write_str("high_watermark"), - } - } -} diff --git a/src/components/message_queue/src/tests/cases.rs b/src/components/message_queue/src/tests/cases.rs deleted file mode 100644 index a4da97f1a0..0000000000 --- a/src/components/message_queue/src/tests/cases.rs +++ /dev/null @@ -1,258 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Test cases for message queue - -use std::{ - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::Duration, -}; - -use tokio::time::timeout; - -use crate::{ - kafka::{config::Config, kafka_impl::KafkaImpl}, - tests::util::{generate_test_data, random_topic_name}, - ConsumeIterator, Message, MessageQueue, OffsetType, StartOffset, -}; - -#[tokio::test(flavor = "multi_thread", worker_threads = 4)] -#[ignore = "It can just run with a Kafka cluster"] -async fn test_kafka() { - let mut config = Config::default(); - config.client.boost_brokers = Some(vec!["127.0.0.1:9011".to_string()]); - let kafka_impl = Arc::new(KafkaImpl::new(config).await.unwrap()); - - run_message_queue_test(kafka_impl).await; -} - -async fn test_create_topic(message_queue: &T) { - assert!(message_queue - .create_topic_if_not_exist(random_topic_name().as_str()) - .await - .is_ok()); - // Topic has already existed is ok. - assert!(message_queue - .create_topic_if_not_exist(random_topic_name().as_str()) - .await - .is_ok()); -} - -async fn run_message_queue_test(message_queue: Arc) { - test_create_topic(message_queue.as_ref()).await; - - test_simple_produce_consume(message_queue.as_ref()).await; - - test_delete(message_queue.as_ref()).await; - - test_consume_empty_topic(message_queue.as_ref()).await; - - test_consume_fetch_offset(message_queue.as_ref()).await; - - test_multiple_consumer_on_same_topic(message_queue.clone()).await; -} - -async fn test_simple_produce_consume(message_queue: &T) { - let topic_name = random_topic_name(); - assert!(message_queue - .create_topic_if_not_exist(topic_name.as_str()) - .await - .is_ok()); - - // Call produce to push messages at first, then call consume to pull back and - // compare. - let test_messages = generate_test_data(10); - assert!(message_queue - .produce(&topic_name, test_messages.clone()) - .await - .is_ok()); - consume_all_and_compare(message_queue, &topic_name, 0, &test_messages).await; -} - -async fn test_delete(message_queue: &T) { - let topic_name = random_topic_name(); - assert!(message_queue - .create_topic_if_not_exist(topic_name.as_str()) - .await - .is_ok()); - - // Test consume and produce. - let test_messages = generate_test_data(10); - assert!(message_queue - .produce(&topic_name, test_messages.clone()) - .await - .is_ok()); - consume_all_and_compare(message_queue, &topic_name, 0, &test_messages).await; - - // Test consume after deleting. - assert!(message_queue.delete_to(&topic_name, 3).await.is_ok()); - consume_all_and_compare(message_queue, &topic_name, 3, &test_messages).await; -} - -async fn consume_all_and_compare( - message_queue: &T, - topic_name: &str, - start_offset: i64, - test_messages: &[Message], -) { - let iter = message_queue - .consume(topic_name, StartOffset::Earliest) - .await; - let high_watermark = message_queue - .fetch_offset(topic_name, OffsetType::HighWaterMark) - .await - .unwrap(); - assert!(iter.is_ok()); - let mut iter = iter.unwrap(); - let mut offset = start_offset; - let mut cnt = 0; - - loop { - let res = iter.next_message().await; - assert!(res.is_ok()); - let (message_and_offset, _) = res.unwrap(); - assert_eq!(message_and_offset.offset, offset); - assert_eq!(message_and_offset.message, test_messages[offset as usize]); - - offset += 1; - cnt += 1; - - if message_and_offset.offset + 1 == high_watermark { - break; - } - } - assert_eq!(cnt, test_messages.len() as i64 - start_offset); -} - -async fn test_consume_empty_topic(message_queue: &T) { - let topic_name = random_topic_name(); - assert!(message_queue - .create_topic_if_not_exist(topic_name.as_str()) - .await - .is_ok()); - - // FIXME: consume a empty topic may be hanged forever... - let mut iter = message_queue - .consume(&topic_name, StartOffset::Earliest) - .await - .unwrap(); - assert!(timeout(Duration::from_millis(1000), iter.next_message()) - .await - .is_err()); -} - -async fn test_consume_fetch_offset(message_queue: &T) { - let topic_name = random_topic_name(); - assert!(message_queue - .create_topic_if_not_exist(topic_name.as_str()) - .await - .is_ok()); - - // At the beginning, the topic's partition is empty, earliest offset and high - // watermark should be zero. - let earliest_offset = message_queue - .fetch_offset(&topic_name, OffsetType::EarliestOffset) - .await - .unwrap(); - let high_watermark = message_queue - .fetch_offset(&topic_name, OffsetType::HighWaterMark) - .await - .unwrap(); - assert_eq!(earliest_offset, 0); - assert_eq!(high_watermark, 0); - - // We produce so messages into it, earliest is still zero, but high watermark - // will equal to the amount of messages. - let test_messages = generate_test_data(10); - assert!(message_queue - .produce(&topic_name, test_messages.clone()) - .await - .is_ok()); - let earliest_offset = message_queue - .fetch_offset(&topic_name, OffsetType::EarliestOffset) - .await - .unwrap(); - let high_watermark = message_queue - .fetch_offset(&topic_name, OffsetType::HighWaterMark) - .await - .unwrap(); - assert_eq!(earliest_offset, 0); - assert_eq!(high_watermark, 10); - - // We delete some messages later, and the earliest offset will become the offset - // which is deleted to. - assert!(message_queue.delete_to(&topic_name, 3).await.is_ok()); - let earliest_offset = message_queue - .fetch_offset(&topic_name, OffsetType::EarliestOffset) - .await - .unwrap(); - let high_watermark = message_queue - .fetch_offset(&topic_name, OffsetType::HighWaterMark) - .await - .unwrap(); - assert_eq!(earliest_offset, 3); - assert_eq!(high_watermark, 10); -} - -async fn test_multiple_consumer_on_same_topic(message_queue: Arc) { - let topic_name = random_topic_name(); - assert!(message_queue - .create_topic_if_not_exist(topic_name.as_str()) - .await - .is_ok()); - - // Call produce to push messages at first, then call consume in two tasks to - // pull back and compare. - let test_messages = generate_test_data(10); - assert!(message_queue - .produce(&topic_name, test_messages.clone()) - .await - .is_ok()); - - let is_start = Arc::new(AtomicBool::new(false)); - - let message_queue_clone = message_queue.clone(); - let topic_name_clone = topic_name.clone(); - let test_messages_clone = test_messages.clone(); - let is_start_clone = is_start.clone(); - let handle1 = tokio::spawn(async move { - while !is_start_clone.load(Ordering::SeqCst) {} - - consume_all_and_compare( - message_queue_clone.as_ref(), - &topic_name_clone, - 0, - &test_messages_clone, - ) - .await; - }); - - let is_start_clone = is_start.clone(); - let handle2 = tokio::spawn(async move { - while !is_start_clone.load(Ordering::SeqCst) {} - - consume_all_and_compare(message_queue.as_ref(), &topic_name, 0, &test_messages).await; - }); - - // Let them start and join the handles. - is_start.store(true, Ordering::SeqCst); - let _ = handle1.await; - let _ = handle2.await; -} diff --git a/src/components/message_queue/src/tests/mod.rs b/src/components/message_queue/src/tests/mod.rs deleted file mode 100644 index 1b829a12f3..0000000000 --- a/src/components/message_queue/src/tests/mod.rs +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Tests for message queue - -#[cfg_attr(feature = "test", allow(dead_code, unused_imports))] -mod cases; -pub mod util; diff --git a/src/components/message_queue/src/tests/util.rs b/src/components/message_queue/src/tests/util.rs deleted file mode 100644 index ebdc911a35..0000000000 --- a/src/components/message_queue/src/tests/util.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Test util for message queue - -use std::collections::BTreeMap; - -use chrono::{DateTime, Duration, TimeZone, Utc}; - -use crate::Message; - -pub fn generate_test_data(cnt: usize) -> Vec { - let mut messages = Vec::with_capacity(cnt); - let base_ts = Utc.timestamp_millis_opt(1337).unwrap(); - for i in 0..cnt { - let key = format!("test_key_{i}"); - let val = format!("test_val_{i}"); - let timestamp = base_ts + Duration::milliseconds(i as i64); - - messages.push(message(key.as_bytes(), val.as_bytes(), timestamp)); - } - - messages -} - -fn message(key: &[u8], value: &[u8], timestamp: DateTime) -> Message { - Message { - key: Some(key.to_vec()), - value: Some(value.to_vec()), - headers: BTreeMap::new(), - timestamp, - } -} - -pub fn random_topic_name() -> String { - format!("test_topic_{}", uuid::Uuid::new_v4()) -} diff --git a/src/components/metric_ext/Cargo.toml b/src/components/metric_ext/Cargo.toml deleted file mode 100644 index bc41d8cf81..0000000000 --- a/src/components/metric_ext/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "metric_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -crossbeam-utils = "0.8.7" -# In alphabetical order -serde = { workspace = true } -serde_json = { workspace = true } -time_ext = { workspace = true } diff --git a/src/components/metric_ext/src/lib.rs b/src/components/metric_ext/src/lib.rs deleted file mode 100644 index 195bd2b72d..0000000000 --- a/src/components/metric_ext/src/lib.rs +++ /dev/null @@ -1,280 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/// Copied from https://github.com/sunng87/metriki/blob/master/metriki-core/src/metrics/meter.rs -/// But supports 1 hour and 2 hour rate. -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::{Duration, Instant, SystemTime}; - -use crossbeam_utils::atomic::AtomicCell; -#[cfg(feature = "ser")] -use serde::ser::SerializeMap; -#[cfg(feature = "ser")] -use serde::{Serialize, Serializer}; - -/// Meters are used to calculate rate of an event. -#[derive(Debug)] -pub struct Meter { - moving_averages: ExponentiallyWeightedMovingAverages, - count: AtomicU64, - start_time: SystemTime, -} - -impl Default for Meter { - fn default() -> Self { - Self::new() - } -} - -impl Meter { - pub fn new() -> Meter { - Meter { - moving_averages: ExponentiallyWeightedMovingAverages::new(), - count: AtomicU64::from(0), - start_time: SystemTime::now(), - } - } - - pub fn mark(&self) { - self.mark_n(1) - } - - pub fn mark_n(&self, n: u64) { - self.count.fetch_add(n, Ordering::Relaxed); - self.moving_averages.tick_if_needed(); - self.moving_averages.update(n); - } - - pub fn h1_rate(&self) -> f64 { - self.moving_averages.tick_if_needed(); - self.moving_averages.h1_rate() - } - - pub fn h2_rate(&self) -> f64 { - self.moving_averages.tick_if_needed(); - self.moving_averages.h2_rate() - } - - pub fn m15_rate(&self) -> f64 { - self.moving_averages.tick_if_needed(); - self.moving_averages.m15_rate() - } - - pub fn count(&self) -> u64 { - self.count.load(Ordering::Relaxed) - } - - pub fn mean_rate(&self) -> f64 { - let count = self.count(); - if count > 0 { - if let Ok(elapsed) = SystemTime::now() - .duration_since(self.start_time) - .map(|d| d.as_secs() as f64) - { - count as f64 / elapsed - } else { - 0f64 - } - } else { - 0f64 - } - } -} - -#[derive(Debug)] -struct ExponentiallyWeightedMovingAverage { - alpha: f64, - interval_nanos: u64, - - uncounted: AtomicCell, - rate: AtomicCell>, -} - -impl ExponentiallyWeightedMovingAverage { - fn new(alpha: f64, interval_secs: u64) -> ExponentiallyWeightedMovingAverage { - ExponentiallyWeightedMovingAverage { - alpha, - interval_nanos: time_ext::secs_to_nanos(interval_secs), - - uncounted: AtomicCell::new(0), - rate: AtomicCell::new(None), - } - } - - fn update(&self, n: u64) { - self.uncounted.fetch_add(n); - } - - fn tick(&self) { - let count = self.uncounted.swap(0); - let instant_rate = count as f64 / self.interval_nanos as f64; - - if let Some(prev_rate) = self.rate.load() { - let new_rate = prev_rate + (self.alpha * (instant_rate - prev_rate)); - self.rate.store(Some(new_rate)); - } else { - self.rate.store(Some(instant_rate)); - } - } - - fn get_rate(&self) -> f64 { - if let Some(rate) = self.rate.load() { - rate * time_ext::secs_to_nanos(1) as f64 - } else { - 0f64 - } - } -} - -#[derive(Debug)] -struct ExponentiallyWeightedMovingAverages { - h1: ExponentiallyWeightedMovingAverage, - h2: ExponentiallyWeightedMovingAverage, - m15: ExponentiallyWeightedMovingAverage, - - last_tick: AtomicCell, -} - -#[inline] -fn alpha(interval_secs: u64, minutes: u64) -> f64 { - 1.0 - (-(interval_secs as f64) / 60.0 / minutes as f64).exp() -} - -const DEFAULT_INTERVAL_SECS: u64 = 5; -const DEFAULT_INTERVAL_MILLIS: u64 = DEFAULT_INTERVAL_SECS * 1000; - -impl ExponentiallyWeightedMovingAverages { - fn new() -> ExponentiallyWeightedMovingAverages { - ExponentiallyWeightedMovingAverages { - h1: ExponentiallyWeightedMovingAverage::new( - alpha(DEFAULT_INTERVAL_SECS, 60), - DEFAULT_INTERVAL_SECS, - ), - - h2: ExponentiallyWeightedMovingAverage::new( - alpha(DEFAULT_INTERVAL_SECS, 120), - DEFAULT_INTERVAL_SECS, - ), - - m15: ExponentiallyWeightedMovingAverage::new( - alpha(DEFAULT_INTERVAL_SECS, 15), - DEFAULT_INTERVAL_SECS, - ), - - last_tick: AtomicCell::new(Instant::now()), - } - } - - fn update(&self, n: u64) { - self.h1.update(n); - self.h2.update(n); - self.m15.update(n); - } - - fn tick_if_needed(&self) { - let previous_tick = self.last_tick.load(); - let current_tick = Instant::now(); - - let tick_age = (current_tick - previous_tick).as_millis() as u64; - - if tick_age > DEFAULT_INTERVAL_MILLIS { - let latest_tick = - current_tick - Duration::from_millis(tick_age % DEFAULT_INTERVAL_MILLIS); - if self - .last_tick - .compare_exchange(previous_tick, latest_tick) - .is_ok() - { - let required_ticks = tick_age / DEFAULT_INTERVAL_MILLIS; - for _ in 0..required_ticks { - self.h1.tick(); - self.h2.tick(); - self.m15.tick(); - } - } - } - } - - fn h1_rate(&self) -> f64 { - self.h1.get_rate() - } - - fn h2_rate(&self) -> f64 { - self.h2.get_rate() - } - - fn m15_rate(&self) -> f64 { - self.m15.get_rate() - } -} - -#[cfg(feature = "ser")] -impl Serialize for Meter { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut map = serializer.serialize_map(Some(4))?; - - map.serialize_entry("count", &self.count())?; - map.serialize_entry("h1_rate", &self.h1_rate())?; - map.serialize_entry("h2_rate", &self.h2_rate())?; - map.serialize_entry("m15_rate", &self.m15_rate())?; - - map.end() - } -} - -#[cfg(test)] -mod tests { - use std::{thread, time}; - - use super::*; - - macro_rules! assert_float_eq { - ($left:expr, $right:expr) => {{ - match (&$left, &$right) { - (left_val, right_val) => { - let diff = (left_val - right_val).abs(); - - if diff > f64::EPSILON { - panic!( - "assertion failed: `(left == right)`\n left: `{:?}`,\n right: `{:?}`", - &*left_val, &*right_val - ) - } - } - } - }}; - } - - #[test] - fn test_meter() { - let m = Meter::new(); - - for _ in 0..10 { - m.mark(); - } - - thread::sleep(time::Duration::from_millis(DEFAULT_INTERVAL_MILLIS + 10)); - - assert_eq!(10, m.count()); - assert_float_eq!(2.0, m.m15_rate()); - assert_float_eq!(2.0, m.h1_rate()); - assert_float_eq!(2.0, m.h2_rate()); - } -} diff --git a/src/components/notifier/Cargo.toml b/src/components/notifier/Cargo.toml deleted file mode 100644 index e09d1f900c..0000000000 --- a/src/components/notifier/Cargo.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "notifier" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -tokio = { workspace = true } diff --git a/src/components/notifier/src/lib.rs b/src/components/notifier/src/lib.rs deleted file mode 100644 index fa73889daa..0000000000 --- a/src/components/notifier/src/lib.rs +++ /dev/null @@ -1,18 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod notifier; diff --git a/src/components/notifier/src/notifier.rs b/src/components/notifier/src/notifier.rs deleted file mode 100644 index 57d872211f..0000000000 --- a/src/components/notifier/src/notifier.rs +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, hash::Hash, sync::RwLock}; - -#[derive(Debug)] -struct Notifiers { - notifiers: RwLock>, -} - -impl Notifiers { - pub fn new(notifier: T) -> Self { - let notifiers = vec![notifier]; - Self { - notifiers: RwLock::new(notifiers), - } - } - - pub fn add_notifier(&self, notifier: T) { - self.notifiers.write().unwrap().push(notifier); - } -} - -#[derive(Debug)] -pub struct RequestNotifiers -where - K: PartialEq + Eq + Hash, -{ - notifiers_by_key: RwLock>>, -} - -impl Default for RequestNotifiers -where - K: PartialEq + Eq + Hash, -{ - fn default() -> Self { - Self { - notifiers_by_key: RwLock::new(HashMap::new()), - } - } -} - -impl RequestNotifiers -where - K: PartialEq + Eq + Hash, -{ - /// Insert a notifier for the given key. - pub fn insert_notifier(&self, key: K, notifier: T) -> RequestResult { - // First try to read the notifiers, if the key exists, add the notifier to the - // notifiers. - let notifiers_by_key = self.notifiers_by_key.read().unwrap(); - if let Some(notifiers) = notifiers_by_key.get(&key) { - notifiers.add_notifier(notifier); - return RequestResult::Wait; - } - drop(notifiers_by_key); - - // If the key does not exist, try to write the notifiers. - let mut notifiers_by_key = self.notifiers_by_key.write().unwrap(); - // double check, if the key exists, add the notifier to the notifiers. - if let Some(notifiers) = notifiers_by_key.get(&key) { - notifiers.add_notifier(notifier); - return RequestResult::Wait; - } - - // the key is not existed, insert the key and the notifier. - notifiers_by_key.insert(key, Notifiers::new(notifier)); - RequestResult::First - } - - /// Take the notifiers for the given key, and remove the key from the map. - pub fn take_notifiers(&self, key: &K) -> Option> { - self.notifiers_by_key - .write() - .unwrap() - .remove(key) - .map(|notifiers| notifiers.notifiers.into_inner().unwrap()) - } -} - -pub enum RequestResult { - // The first request for this key, need to handle this request. - First, - // There are other requests for this key, just wait for the result. - Wait, -} - -pub struct ExecutionGuard { - f: F, - cancelled: bool, -} - -impl ExecutionGuard { - pub fn new(f: F) -> Self { - Self { - f, - cancelled: false, - } - } - - pub fn cancel(&mut self) { - self.cancelled = true; - } -} - -impl Drop for ExecutionGuard { - fn drop(&mut self) { - if !self.cancelled { - (self.f)() - } - } -} diff --git a/src/components/object_store/Cargo.toml b/src/components/object_store/Cargo.toml deleted file mode 100644 index 926e85f935..0000000000 --- a/src/components/object_store/Cargo.toml +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "object_store" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -async-trait = { workspace = true } -bytes = { workspace = true } -chrono = { workspace = true } -clru = { workspace = true } -crc = "3.0.0" -futures = { workspace = true } -generic_error = { workspace = true } -hash_ext = { workspace = true } -horaedbproto = { workspace = true } -lazy_static = { workspace = true } -logger = { workspace = true } -lru = { workspace = true } -macros = { workspace = true } -notifier = { workspace = true } -object_store_opendal = "0.46.0" -opendal = { version = "0.49.0", features = [ - "services-oss", - "services-s3", - "services-fs", -] } -partitioned_lock = { workspace = true } -prometheus = { workspace = true } -prometheus-static-metric = { workspace = true } -prost = { workspace = true } -rand = { workspace = true } -reqwest = { workspace = true } -runtime = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -size_ext = { workspace = true } -snafu = { workspace = true } -table_kv = { workspace = true } -time_ext = { workspace = true } -tokio = { workspace = true } -twox-hash = "1.6" -upstream = { package = "object_store", version = "0.10.1" } -uuid = { version = "1.3.3", features = ["v4"] } - -[dev-dependencies] -tempfile = { workspace = true } diff --git a/src/components/object_store/src/aliyun.rs b/src/components/object_store/src/aliyun.rs deleted file mode 100644 index c6b9394fe1..0000000000 --- a/src/components/object_store/src/aliyun.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use object_store_opendal::OpendalStore; -use opendal::{ - layers::{RetryLayer, TimeoutLayer}, - raw::HttpClient, - services::Oss, - Operator, Result, -}; - -use crate::config::AliyunOptions; - -fn normalize_endpoint(endpoint: &str) -> String { - if endpoint.starts_with("http") { - endpoint.to_string() - } else { - format!("http://{}", endpoint) - } -} - -pub fn try_new(aliyun_opts: &AliyunOptions) -> Result { - let http_builder = reqwest::ClientBuilder::new() - .pool_max_idle_per_host(aliyun_opts.http.pool_max_idle_per_host) - .http2_keep_alive_timeout(aliyun_opts.http.keep_alive_timeout.0) - .http2_keep_alive_while_idle(true) - .http2_keep_alive_interval(aliyun_opts.http.keep_alive_interval.0) - .timeout(aliyun_opts.http.timeout.0); - let http_client = HttpClient::build(http_builder)?; - let endpoint = normalize_endpoint(&aliyun_opts.endpoint); - - let builder = Oss::default() - .access_key_id(&aliyun_opts.key_id) - .access_key_secret(&aliyun_opts.key_secret) - .endpoint(&endpoint) - .bucket(&aliyun_opts.bucket) - .http_client(http_client); - let op = Operator::new(builder)? - .layer( - TimeoutLayer::new() - .with_timeout(aliyun_opts.timeout.timeout.0) - .with_io_timeout(aliyun_opts.timeout.io_timeout.0), - ) - .layer(RetryLayer::new().with_max_times(aliyun_opts.max_retries)) - .finish(); - - Ok(OpendalStore::new(op)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_normalize_endpoint() { - let testcase = [ - ("https://oss.aliyun.com", "https://oss.aliyun.com"), - ("http://oss.aliyun.com", "http://oss.aliyun.com"), - ("no-scheme.com", "http://no-scheme.com"), - ]; - - for (endpoint, expected) in testcase { - let actual = normalize_endpoint(endpoint); - assert_eq!(expected, actual); - } - } -} diff --git a/src/components/object_store/src/config.rs b/src/components/object_store/src/config.rs deleted file mode 100644 index 072b9159f9..0000000000 --- a/src/components/object_store/src/config.rs +++ /dev/null @@ -1,157 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::time::Duration; - -use serde::{Deserialize, Serialize}; -use size_ext::ReadableSize; -use time_ext::ReadableDuration; - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -/// Options for storage backend -pub struct StorageOptions { - // 0 means disable mem cache - pub mem_cache_capacity: ReadableSize, - pub mem_cache_partition_bits: usize, - // 0 means disable disk cache - // Note: disk_cache_capacity % (disk_cache_page_size * (1 << disk_cache_partition_bits)) should - // be 0 - pub disk_cache_capacity: ReadableSize, - pub disk_cache_page_size: ReadableSize, - pub disk_cache_partition_bits: usize, - pub disk_cache_dir: String, - pub object_store: ObjectStoreOptions, -} - -impl Default for StorageOptions { - fn default() -> Self { - let root_path = "/tmp/horaedb".to_string(); - - StorageOptions { - mem_cache_capacity: ReadableSize::mb(512), - mem_cache_partition_bits: 6, - disk_cache_dir: root_path.clone(), - disk_cache_capacity: ReadableSize::gb(0), - disk_cache_page_size: ReadableSize::mb(2), - disk_cache_partition_bits: 4, - object_store: ObjectStoreOptions::Local(LocalOptions::new_with_default(root_path)), - } - } -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(tag = "type")] -#[allow(clippy::large_enum_variant)] -pub enum ObjectStoreOptions { - Local(LocalOptions), - Aliyun(AliyunOptions), - S3(S3Options), -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct LocalOptions { - pub data_dir: String, - #[serde(default = "default_max_retries")] - pub max_retries: usize, - #[serde(default)] - pub timeout: TimeoutOptions, -} - -impl LocalOptions { - pub fn new_with_default(data_dir: String) -> Self { - Self { - data_dir, - max_retries: default_max_retries(), - timeout: Default::default(), - } - } -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct AliyunOptions { - pub key_id: String, - pub key_secret: String, - pub endpoint: String, - pub bucket: String, - pub prefix: String, - #[serde(default = "default_max_retries")] - pub max_retries: usize, - #[serde(default)] - pub http: HttpOptions, - #[serde(default)] - pub timeout: TimeoutOptions, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct S3Options { - pub region: String, - pub key_id: String, - pub key_secret: String, - pub endpoint: String, - pub bucket: String, - pub prefix: String, - #[serde(default = "default_max_retries")] - pub max_retries: usize, - #[serde(default)] - pub http: HttpOptions, - #[serde(default)] - pub timeout: TimeoutOptions, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct HttpOptions { - pub pool_max_idle_per_host: usize, - pub timeout: ReadableDuration, - pub keep_alive_timeout: ReadableDuration, - pub keep_alive_interval: ReadableDuration, -} - -impl Default for HttpOptions { - fn default() -> Self { - Self { - pool_max_idle_per_host: 1024, - timeout: ReadableDuration::from(Duration::from_secs(60)), - keep_alive_timeout: ReadableDuration::from(Duration::from_secs(60)), - keep_alive_interval: ReadableDuration::from(Duration::from_secs(2)), - } - } -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct TimeoutOptions { - // Non IO Operation like stat and delete, they operate on a single file, we control them by - // setting timeout. - pub timeout: ReadableDuration, - // IO Operation like read and write, they operate on data directly, we control them by setting - // io_timeout. - pub io_timeout: ReadableDuration, -} - -impl Default for TimeoutOptions { - fn default() -> Self { - Self { - timeout: ReadableDuration::from(Duration::from_secs(10)), - io_timeout: ReadableDuration::from(Duration::from_secs(10)), - } - } -} - -#[inline] -fn default_max_retries() -> usize { - 3 -} diff --git a/src/components/object_store/src/disk_cache.rs b/src/components/object_store/src/disk_cache.rs deleted file mode 100644 index 13a32f01e0..0000000000 --- a/src/components/object_store/src/disk_cache.rs +++ /dev/null @@ -1,1558 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! An ObjectStore implementation with disk as cache. -//! The disk cache is a read-through caching, with page as its minimal cache -//! unit. -//! -//! Page is used for reasons below: -//! - reduce file size in case of there are too many request with small range. - -use std::{fmt::Display, ops::Range, result::Result as StdResult, sync::Arc}; - -use async_trait::async_trait; -use bytes::{Bytes, BytesMut}; -use chrono::{DateTime, Utc}; -use crc::{Crc, CRC_32_ISCSI}; -use futures::stream::BoxStream; -use hash_ext::SeaHasherBuilder; -use logger::{debug, warn}; -use lru::LruCache; -use notifier::notifier::{ExecutionGuard, RequestNotifiers}; -use partitioned_lock::PartitionedMutex; -use runtime::RuntimeRef; -use serde::{Deserialize, Serialize}; -use snafu::{ensure, Backtrace, ResultExt, Snafu}; -use time_ext; -use tokio::{ - fs::{self, File, OpenOptions}, - io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}, - sync::oneshot::{self, error::RecvError, Receiver}, -}; -use upstream::{ - path::Path, Error as ObjectStoreError, GetOptions, GetResult, ListResult, MultipartUpload, - ObjectMeta, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, -}; - -use crate::metrics::{ - DISK_CACHE_DEDUP_COUNT, OBJECT_STORE_DISK_CACHE_HIT, OBJECT_STORE_DISK_CACHE_MISS, -}; - -const FILE_SIZE_CACHE_CAP: usize = 1 << 18; -const FILE_SIZE_CACHE_PARTITION_BITS: usize = 8; -pub const CASTAGNOLI: Crc = Crc::::new(&CRC_32_ISCSI); - -#[derive(Debug, Snafu)] -enum Error { - #[snafu(display("IO failed, file:{file}, source:{source}.\nbacktrace:\n{backtrace}",))] - Io { - file: String, - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Access is out of range, range:{range:?}, file_size:{file_size}, last_modified:{last_modified:?}, file:{file}.\nbacktrace:\n{backtrace}"))] - OutOfRange { - range: Range, - file_size: usize, - file: String, - last_modified: DateTime, - backtrace: Backtrace, - }, - - #[snafu(display( - "Partial write, expect bytes:{expect}, written:{written}.\nbacktrace:\n{backtrace}", - ))] - PartialWrite { - expect: usize, - written: usize, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to deserialize manifest, source:{source}.\nbacktrace:\n{backtrace}"))] - DeserializeManifest { - source: serde_json::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to serialize manifest, source:{source}.\nbacktrace:\n{backtrace}"))] - SerializeManifest { - source: serde_json::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to receive bytes from channel, source:{source}.\nbacktrace:\n{backtrace}" - ))] - ReceiveBytesFromChannel { - backtrace: Backtrace, - source: RecvError, - }, - - #[snafu(display("Fetch data failed, error.\nbacktrace:\n{backtrace}"))] - FetchDataFromObjectStore { backtrace: Backtrace }, - - #[snafu(display("Wait notifier failed, message:{message}."))] - WaitNotifier { message: String }, - - #[snafu(display("Invalid manifest page size, old:{old}, new:{new}."))] - InvalidManifest { old: usize, new: usize }, - - #[snafu(display( - "Failed to persist cache, file:{file}, source:{source}.\nbacktrace:\n{backtrace}", - ))] - PersistCache { - file: String, - source: tokio::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to decode cache pb value, file:{file}, source:{source}.\nbacktrace:\n{backtrace}", - ))] - DecodeCache { - file: String, - source: prost::DecodeError, - backtrace: Backtrace, - }, - - #[snafu(display("disk cache cap must large than 0",))] - InvalidCapacity, -} - -impl From for ObjectStoreError { - fn from(source: Error) -> Self { - Self::Generic { - store: "DiskCacheStore", - source: Box::new(source), - } - } -} - -/// The result of read bytes of a page file. -#[derive(Debug)] -enum ReadBytesResult { - Integrate(Vec), - /// The page file is corrupted. - Corrupted { - file_size: usize, - }, - /// The read range exceeds the file size. - OutOfRange, -} - -/// The manifest for describing the meta of the disk cache. -#[derive(Debug, Serialize, Deserialize)] -struct Manifest { - create_at: String, - page_size: usize, - version: usize, -} - -impl Manifest { - const CURRENT_VERSION: usize = 2; - const FILE_NAME: &'static str = "manifest.json"; - - #[inline] - fn is_valid(&self, version: usize, page_size: usize) -> bool { - self.page_size == page_size && self.version == version - } -} - -/// The writer of the page file in the disk cache. -/// -/// Following the payload, a footer [`PageFileEncoder::MAGIC_FOOTER`] is -/// appended. -struct PageFileWriter { - output: String, - tmp_file: String, - need_clean_tmpfile: bool, -} - -impl Drop for PageFileWriter { - fn drop(&mut self) { - if self.need_clean_tmpfile { - if let Err(e) = std::fs::remove_file(&self.tmp_file) { - warn!( - "Disk cache remove page tmp file failed, file:{}, err:{e}", - &self.tmp_file - ); - } - } - } -} - -impl PageFileWriter { - const MAGIC_FOOTER: [u8; 8] = [0, 0, 0, 0, b'c', b'e', b'r', b'e']; - - fn new(output: String) -> Self { - let tmp_file = Self::tmp_file(&output); - - Self { - output, - tmp_file, - need_clean_tmpfile: true, - } - } - - fn tmp_file(input: &str) -> String { - format!("{}.tmp", input) - } - - async fn write_inner(&self, bytes: Bytes) -> Result<()> { - let tmp_file = &self.tmp_file; - let mut writer = File::create(tmp_file) - .await - .context(Io { file: tmp_file })?; - writer - .write_all(&bytes) - .await - .context(Io { file: tmp_file })?; - - writer - .write_all(&Self::MAGIC_FOOTER) - .await - .context(Io { file: tmp_file })?; - - writer.flush().await.context(Io { file: tmp_file })?; - - tokio::fs::rename(tmp_file, &self.output) - .await - .context(Io { file: &self.output })?; - - Ok(()) - } - - // When write bytes to file, the cache lock is released, so when one thread is - // reading, another thread may update it, so we write to tmp file first, - // then rename to expected filename to avoid other threads see partial - // content. - async fn write_and_flush(mut self, bytes: Bytes) -> Result<()> { - let write_result = self.write_inner(bytes).await; - if write_result.is_ok() { - self.need_clean_tmpfile = false; - } - - write_result - } - - #[inline] - fn encoded_size(payload_len: usize) -> usize { - payload_len + Self::MAGIC_FOOTER.len() - } -} - -/// The mapping is PageFileName -> PageMeta. -type PageMetaCache = LruCache; - -#[derive(Clone, Debug)] -struct PageMeta { - file_size: usize, - // TODO: Introduce the CRC for integration check. -} - -#[derive(Debug, Clone)] -struct DiskCache { - root_dir: String, - meta_cache: Arc>, -} - -#[derive(Debug, Clone)] -struct FileMeta { - last_modified: DateTime, - size: usize, -} - -impl From for FileMeta { - fn from(v: ObjectMeta) -> Self { - FileMeta { - last_modified: v.last_modified, - size: v.size, - } - } -} - -impl DiskCache { - fn try_new(root_dir: String, cap: usize, partition_bits: usize) -> Result { - let init_lru = |partition_num: usize| -> Result<_> { - let cap_per_part = cap / partition_num; - ensure!(cap_per_part != 0, InvalidCapacity); - Ok(LruCache::new(cap_per_part)) - }; - - Ok(Self { - root_dir, - meta_cache: Arc::new(PartitionedMutex::try_new_with_bit_len( - init_lru, - partition_bits, - SeaHasherBuilder {}, - )?), - }) - } - - fn insert_page_meta(&self, filename: String, page_meta: PageMeta) -> Option { - let mut cache = self.meta_cache.lock(&filename); - debug!( - "Update the meta cache, file:{filename}, len:{}, cap_per_part:{}", - cache.cap(), - cache.len() - ); - - cache - .push(filename, page_meta) - .map(|(filename, _)| filename) - } - - async fn insert_data(&self, filename: String, value: Bytes) { - let page_meta = { - let file_size = PageFileWriter::encoded_size(value.len()); - PageMeta { file_size } - }; - let evicted_file = self.insert_page_meta(filename.clone(), page_meta); - - let do_persist = || async { - if let Err(e) = self.persist_bytes(&filename, value).await { - warn!("Failed to persist cache, file:{filename}, err:{e}"); - } - }; - - if let Some(evicted_file) = evicted_file { - if evicted_file == filename { - // No need to do persist and removal. - return; - } - - // Persist the new bytes. - do_persist().await; - - // Remove the evicted file. - debug!("Evicted file:{evicted_file} is to be removed"); - self.remove_file_by_name(&evicted_file).await; - } else { - do_persist().await; - } - } - - /// Get the bytes from the disk cache. - /// - /// If the bytes is invalid (its size is different from the recorded one), - /// remove it and return None. - async fn get_data(&self, filename: &str, range: &Range) -> Option { - let file_size = { - let mut cache = self.meta_cache.lock(&filename); - match cache.get(filename) { - Some(page_meta) => page_meta.file_size, - None => return None, - } - }; - - match self.read_bytes(filename, range, file_size).await { - Ok(ReadBytesResult::Integrate(v)) => Some(v.into()), - Ok(ReadBytesResult::Corrupted { - file_size: real_file_size, - }) => { - warn!( - "File:{filename} is corrupted, expect:{file_size}, got:{real_file_size}, and it will be removed", - ); - - { - let mut cache = self.meta_cache.lock(&filename); - cache.pop(filename); - } - - self.remove_file_by_name(filename).await; - - None - } - Ok(ReadBytesResult::OutOfRange) => { - warn!( - "File:{filename} is not enough to read, range:{range:?}, file_size:{file_size}, and it will be removed", - ); - - { - let mut cache = self.meta_cache.lock(&filename); - cache.pop(filename); - } - - self.remove_file_by_name(filename).await; - - None - } - Err(e) => { - warn!("Failed to read file:{filename} from the disk cache, err:{e}"); - None - } - } - } - - async fn remove_file_by_name(&self, filename: &str) { - debug!("Try to remove file:{filename}"); - - let file_path = std::path::Path::new(&self.root_dir) - .join(filename) - .into_os_string() - .into_string() - .unwrap(); - - if let Err(e) = tokio::fs::remove_file(&file_path).await { - warn!("Failed to remove file:{file_path}, err:{e}"); - } - } - - async fn persist_bytes(&self, filename: &str, payload: Bytes) -> Result<()> { - let dest_filepath = std::path::Path::new(&self.root_dir) - .join(filename) - .into_os_string() - .into_string() - .unwrap(); - - let writer = PageFileWriter::new(dest_filepath); - writer.write_and_flush(payload).await?; - - Ok(()) - } - - /// Read the bytes from the cached file. - /// - /// If the file size is different from the `expect_file_size`, it'll be - /// thought as corrupted file. - async fn read_bytes( - &self, - filename: &str, - range: &Range, - expect_file_size: usize, - ) -> std::io::Result { - if PageFileWriter::encoded_size(range.len()) > expect_file_size { - return Ok(ReadBytesResult::OutOfRange); - } - - let file_path = std::path::Path::new(&self.root_dir) - .join(filename) - .into_os_string() - .into_string() - .unwrap(); - - let mut f = File::open(&file_path).await?; - let file_size = f.metadata().await?.len() as usize; - if expect_file_size != file_size { - return Ok(ReadBytesResult::Corrupted { file_size }); - } - - f.seek(std::io::SeekFrom::Start(range.start as u64)).await?; - let mut buf = vec![0; range.len()]; - let n = f.read_exact(&mut buf).await?; - if n != range.len() { - return Ok(ReadBytesResult::OutOfRange); - } - - Ok(ReadBytesResult::Integrate(buf)) - } -} - -impl Display for DiskCache { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("DiskCache") - .field("path", &self.root_dir) - .field("cache", &self.meta_cache) - .finish() - } -} - -#[derive(Debug, Clone)] -struct Paging { - page_size: usize, -} - -#[derive(Debug, Clone)] -struct PageRangeResult { - aligned_start: usize, - num_pages: usize, -} - -impl Paging { - fn page_range(&self, range: &Range) -> PageRangeResult { - // inclusive start - let aligned_start = range.start / self.page_size * self.page_size; - // exclusive end - let aligned_end = (range.end + self.page_size - 1) / self.page_size * self.page_size; - let num_pages = (aligned_end - aligned_start) / self.page_size; - - PageRangeResult { - aligned_start, - num_pages, - } - } -} - -/// There will be two kinds of file in this cache: -/// 1. manifest.json, which contains metadata, like -/// ```json -/// { -/// "create_at": "2022-12-01T08:51:15.167795+00:00", -/// "page_size": 1048576, -/// "version": 1 -/// } -/// ``` -/// 2. ${sst-path}-${range.start}-${range.end}, which contains bytes of given -/// range, start/end are aligned to page_size. -#[derive(Debug)] -pub struct DiskCacheStore { - cache: DiskCache, - cap: usize, - page_size: usize, - meta_cache: PartitionedMutex, SeaHasherBuilder>, - underlying_store: Arc, - request_notifiers: Arc>>>, - runtime: RuntimeRef, -} - -impl DiskCacheStore { - pub async fn try_new( - cache_dir: String, - cap: usize, - page_size: usize, - underlying_store: Arc, - partition_bits: usize, - runtime: RuntimeRef, - ) -> Result { - let page_num = cap / page_size; - ensure!(page_num != 0, InvalidCapacity); - - let manifest = Self::create_manifest_if_not_exists(&cache_dir, page_size).await?; - if !manifest.is_valid(Manifest::CURRENT_VERSION, page_size) { - Self::reset_cache(&cache_dir, page_size).await?; - } - - let cache = DiskCache::try_new(cache_dir.clone(), page_num, partition_bits)?; - Self::recover_cache(&cache_dir, &cache).await?; - - let init_size_lru = |partition_num| -> Result<_> { - let cap_per_part = FILE_SIZE_CACHE_CAP / partition_num; - assert!(cap_per_part > 0); - Ok(LruCache::new(cap_per_part)) - }; - let meta_cache = PartitionedMutex::try_new_with_bit_len( - init_size_lru, - FILE_SIZE_CACHE_PARTITION_BITS, - SeaHasherBuilder, - )?; - - let request_notifiers = Arc::new(RequestNotifiers::default()); - - Ok(Self { - cache, - cap, - page_size, - meta_cache, - underlying_store, - request_notifiers, - runtime, - }) - } - - async fn reset_cache(cache_dir_path: &str, page_size: usize) -> Result<()> { - warn!("The manifest is outdated, the object store cache will be cleared"); - - fs::remove_dir_all(cache_dir_path).await.context(Io { - file: cache_dir_path, - })?; - - Self::create_manifest_if_not_exists(cache_dir_path, page_size).await?; - - Ok(()) - } - - async fn create_manifest_if_not_exists( - cache_dir_path: &str, - page_size: usize, - ) -> Result { - // TODO: introduce the manifest lock to avoid multiple process to modify the - // cache file data. - - let mut file = OpenOptions::new() - .write(true) - .create(true) - .read(true) - .truncate(false) - .open(std::path::Path::new(cache_dir_path).join(Manifest::FILE_NAME)) - .await - .context(Io { - file: Manifest::FILE_NAME, - })?; - - let metadata = file.metadata().await.context(Io { - file: Manifest::FILE_NAME, - })?; - - // Initialize the manifest if it doesn't exist. - if metadata.len() == 0 { - let manifest = Manifest { - page_size, - create_at: time_ext::current_as_rfc3339(), - version: Manifest::CURRENT_VERSION, - }; - - let buf = serde_json::to_vec_pretty(&manifest).context(SerializeManifest)?; - file.write_all(&buf).await.context(Io { - file: Manifest::FILE_NAME, - })?; - - return Ok(manifest); - } - - let mut buf = Vec::new(); - file.read_to_end(&mut buf).await.context(Io { - file: Manifest::FILE_NAME, - })?; - - // TODO: Maybe we should clear all the cache when the manifest is corrupted. - let manifest: Manifest = serde_json::from_slice(&buf).context(DeserializeManifest)?; - ensure!( - manifest.page_size == page_size, - InvalidManifest { - old: manifest.page_size, - new: page_size - } - ); - - Ok(manifest) - } - - async fn recover_cache(cache_dir_path: &str, cache: &DiskCache) -> Result<()> { - let mut cache_dir = tokio::fs::read_dir(cache_dir_path).await.context(Io { - file: cache_dir_path, - })?; - - while let Some(entry) = cache_dir.next_entry().await.with_context(|| Io { - file: format!("a file in the cache_dir:{cache_dir_path}"), - })? { - let file_name = entry.file_name().into_string().unwrap(); - if file_name == Manifest::FILE_NAME { - // Skip the manifest file. - continue; - } - - let file_size = match entry.metadata().await { - Ok(metadata) => metadata.len() as usize, - Err(e) => { - warn!("Failed to get the size of file:{file_name}, and it will be skipped for recover, err:{e}"); - // TODO: Maybe we should remove this file. - continue; - } - }; - - debug!("Disk cache recover_cache, filename:{file_name}, size:{file_size}"); - let page_meta = PageMeta { file_size }; - cache.insert_page_meta(file_name, page_meta); - } - - Ok(()) - } - - /// Generate the filename of a page file. - fn page_cache_name(location: &Path, range: &Range) -> String { - format!( - "{}-{}-{}", - location.as_ref().replace('/', "-"), - range.start, - range.end - ) - } - - async fn deduped_fetch_data( - &self, - location: &Path, - aligned_ranges: impl IntoIterator>, - ) -> Result>>> { - let aligned_ranges = aligned_ranges.into_iter(); - let (size, _) = aligned_ranges.size_hint(); - let mut rxs = Vec::with_capacity(size); - let mut need_fetch_block = Vec::new(); - let mut need_fetch_block_cache_key = Vec::new(); - - for aligned_range in aligned_ranges { - let (tx, rx) = oneshot::channel(); - let cache_key = Self::page_cache_name(location, &aligned_range); - - if let notifier::notifier::RequestResult::First = self - .request_notifiers - .insert_notifier(cache_key.to_owned(), tx) - { - need_fetch_block.push(aligned_range); - need_fetch_block_cache_key.push(cache_key); - } else { - DISK_CACHE_DEDUP_COUNT.inc(); - } - - rxs.push(rx); - } - - if need_fetch_block.is_empty() { - // All ranges are not first request, return directly. - return Ok(rxs); - } - - let fetched_bytes = { - // This guard will ensure notifiers got released when futures get cancelled - // during `get_ranges`. - let mut guard = ExecutionGuard::new(|| { - for cache_key in &need_fetch_block_cache_key { - let _ = self.request_notifiers.take_notifiers(cache_key); - } - }); - - let bytes = self - .underlying_store - .get_ranges(location, &need_fetch_block) - .await; - - guard.cancel(); - bytes - }; - - // Take all cache_key's notifiers out from request_notifiers immediately. - let notifiers_vec: Vec<_> = need_fetch_block_cache_key - .iter() - .map(|cache_key| self.request_notifiers.take_notifiers(cache_key).unwrap()) - .collect(); - - let fetched_bytes = match fetched_bytes { - Err(err) => { - for notifiers in notifiers_vec { - for notifier in notifiers { - if let Err(e) = notifier.send( - WaitNotifier { - message: err.to_string(), - } - .fail(), - ) { - warn!("Failed to send notifier error result, err:{e:?}."); - } - } - } - - return Err(err); - } - Ok(v) => v, - }; - - for ((bytes, notifiers), cache_key) in fetched_bytes - .into_iter() - .zip(notifiers_vec.into_iter()) - .zip(need_fetch_block_cache_key.into_iter()) - { - { - let cache = self.cache.clone(); - let bytes = bytes.clone(); - let handle = self - .runtime - .spawn(async move { cache.insert_data(cache_key, bytes).await }); - // In test, wait the handle to finish, otherwise the test may fail. - if cfg!(test) { - let _ = handle.await; - } - } - for notifier in notifiers { - if notifier.send(Ok(bytes.clone())).is_err() { - // The error contains sent bytes, which maybe very large, - // so we don't log error. - warn!("Failed to send notifier success result"); - } - } - } - - Ok(rxs) - } - - /// Fetch the data from the underlying store and then cache it. - async fn fetch_and_cache_data( - &self, - location: &Path, - aligned_range: &Range, - ) -> Result { - let mut rxs = self - .deduped_fetch_data(location, [aligned_range.clone()]) - .await?; - - assert_eq!(rxs.len(), 1); - - let rx = rxs.remove(0); - let bytes = rx.await.context(ReceiveBytesFromChannel)??; - Ok(bytes) - } - - /// Fetch the file meta from the cache or the underlying store. - async fn fetch_file_meta(&self, location: &Path) -> Result { - { - let mut cache = self.meta_cache.lock(location); - if let Some(file_meta) = cache.get(location) { - return Ok(file_meta.clone()); - } - } - // The file meta is miss from the cache, let's fetch it from the - // underlying store. - - let meta = self.underlying_store.head(location).await?; - let file_meta = FileMeta::from(meta); - { - let mut cache = self.meta_cache.lock(location); - cache.push(location.clone(), file_meta.clone()); - } - Ok(file_meta) - } -} - -impl Display for DiskCacheStore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("DiskCacheStore") - .field("page_size", &self.page_size) - .field("cap", &self.cap) - .field("cache", &self.cache) - .finish() - } -} - -#[async_trait] -impl ObjectStore for DiskCacheStore { - async fn put(&self, location: &Path, payload: PutPayload) -> Result { - self.underlying_store.put(location, payload).await - } - - async fn put_opts( - &self, - location: &Path, - payload: PutPayload, - opts: PutOptions, - ) -> Result { - self.underlying_store - .put_opts(location, payload, opts) - .await - } - - async fn put_multipart(&self, location: &Path) -> Result> { - self.underlying_store.put_multipart(location).await - } - - async fn put_multipart_opts( - &self, - location: &Path, - opts: PutMultipartOpts, - ) -> Result> { - self.underlying_store - .put_multipart_opts(location, opts) - .await - } - - async fn get(&self, location: &Path) -> Result { - // In sst module, we only use get_range, fetched a whole file is not used, and - // it is not good for disk cache. - self.underlying_store.get(location).await - } - - async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { - self.underlying_store.get_opts(location, options).await - } - - async fn get_range(&self, location: &Path, range: Range) -> Result { - let file_meta = self.fetch_file_meta(location).await?; - ensure!( - file_meta.size >= range.end, - OutOfRange { - range, - file_size: file_meta.size, - last_modified: file_meta.last_modified, - file: location.to_string() - } - ); - - let PageRangeResult { - aligned_start, - num_pages, - } = { - let paging = Paging { - page_size: self.page_size, - }; - paging.page_range(&range) - }; - assert!(num_pages > 0); - - // Fast path for only one page involved. - if num_pages == 1 { - let aligned_end = (aligned_start + self.page_size).min(file_meta.size); - let aligned_range = aligned_start..aligned_end; - let filename = Self::page_cache_name(location, &aligned_range); - let range_in_file = (range.start - aligned_start)..(range.end - aligned_start); - if let Some(bytes) = self.cache.get_data(&filename, &range_in_file).await { - OBJECT_STORE_DISK_CACHE_HIT.inc(); - return Ok(bytes); - } - - // This page is missing from the disk cache, let's fetch it from the - // underlying store and insert it to the disk cache. - OBJECT_STORE_DISK_CACHE_MISS.inc(); - let aligned_bytes = self.fetch_and_cache_data(location, &aligned_range).await?; - - // Allocate a new buffer instead of the `aligned_bytes` to avoid memory - // overhead. - let mut bytes_buf = BytesMut::with_capacity(range.len()); - bytes_buf.extend_from_slice( - &aligned_bytes[(range.start - aligned_start)..(range.end - aligned_start)], - ); - return Ok(bytes_buf.freeze()); - } - - // The queried range involves multiple ranges. - // Here is an example to explain the paged bytes, saying range = [3, 33), - // page_size = 16, then aligned ranges will be [0, 16), [16, 32), [32, - // 48), and we need to combine those ranged bytes to get final result bytes. - let mut paged_bytes: Vec> = vec![None; num_pages]; - let mut num_missing_pages = 0; - { - let mut page_start = aligned_start; - let mut page_idx = 0; - while page_idx < num_pages { - let page_end = (page_start + self.page_size).min(file_meta.size); - let range_in_file = { - let real_start = page_start.max(range.start); - let real_end = page_end.min(range.end); - (real_start - page_start)..(real_end - page_start) - }; - let filename = Self::page_cache_name(location, &(page_start..page_end)); - if let Some(bytes) = self.cache.get_data(&filename, &range_in_file).await { - paged_bytes[page_idx] = Some(bytes); - } else { - num_missing_pages += 1; - } - - page_start += self.page_size; - page_idx += 1; - } - } - let num_hitting_pages = num_pages - num_missing_pages; - OBJECT_STORE_DISK_CACHE_HIT.inc_by(num_hitting_pages as u64); - OBJECT_STORE_DISK_CACHE_MISS.inc_by(num_missing_pages as u64); - - let concat_paged_bytes = |paged_bytes: Vec>| { - // Concat the paged bytes. - let mut byte_buf = BytesMut::with_capacity(range.len()); - for bytes in paged_bytes { - byte_buf.extend(bytes); - } - Ok(byte_buf.freeze()) - }; - - if num_missing_pages == 0 { - return concat_paged_bytes(paged_bytes); - } - - // Fetch all the missing pages from the underlying store. - let mut missing_ranges = Vec::with_capacity(num_missing_pages); - let mut missing_range_idx = Vec::with_capacity(num_missing_pages); - for (idx, cache_miss) in paged_bytes.iter().map(|v| v.is_none()).enumerate() { - if cache_miss { - let missing_range_start = aligned_start + idx * self.page_size; - let missing_range_end = (missing_range_start + self.page_size).min(file_meta.size); - missing_ranges.push(missing_range_start..missing_range_end); - missing_range_idx.push(idx); - } - } - - let mut missing_ranged_bytes = Vec::with_capacity(missing_ranges.len()); - let rxs = self - .deduped_fetch_data(location, missing_ranges.clone()) - .await?; - for rx in rxs { - let bytes = rx.await.context(ReceiveBytesFromChannel)??; - missing_ranged_bytes.push(bytes); - } - - assert_eq!(missing_ranged_bytes.len(), missing_ranges.len()); - - for ((missing_range, missing_range_idx), bytes) in missing_ranges - .into_iter() - .zip(missing_range_idx.into_iter()) - .zip(missing_ranged_bytes.into_iter()) - { - let offset = missing_range.start; - let truncated_range = (missing_range.start.max(range.start) - offset) - ..(missing_range.end.min(range.end) - offset); - - paged_bytes[missing_range_idx] = Some(bytes.slice(truncated_range)); - } - - return concat_paged_bytes(paged_bytes); - } - - async fn head(&self, location: &Path) -> Result { - let file_meta = self.fetch_file_meta(location).await?; - Ok(ObjectMeta { - location: location.clone(), - last_modified: file_meta.last_modified, - size: file_meta.size, - e_tag: None, - version: None, - }) - } - - async fn delete(&self, location: &Path) -> Result<()> { - self.underlying_store.delete(location).await - } - - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { - self.underlying_store.list(prefix) - } - - async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { - self.underlying_store.list_with_delimiter(prefix).await - } - - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - self.underlying_store.copy(from, to).await - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - self.underlying_store.copy_if_not_exists(from, to).await - } -} - -#[cfg(test)] -mod test { - use runtime::{Builder, RuntimeRef}; - use tempfile::{tempdir, TempDir}; - - use super::*; - use crate::{local_file, test_util::MemoryStore}; - - struct StoreWithCacheDir { - inner: DiskCacheStore, - cache_dir: TempDir, - } - - async fn prepare_store( - page_size: usize, - cap: usize, - partition_bits: usize, - runtime: RuntimeRef, - ) -> StoreWithCacheDir { - let local_store = Arc::new(MemoryStore::default()); - - let cache_dir = tempdir().unwrap(); - let store = DiskCacheStore::try_new( - cache_dir.as_ref().to_string_lossy().to_string(), - cap, - page_size, - local_store, - partition_bits, - runtime, - ) - .await - .unwrap(); - - StoreWithCacheDir { - inner: store, - cache_dir, - } - } - - fn test_file_exists(cache_dir: &TempDir, location: &Path, range: &Range) -> bool { - cache_dir - .path() - .join(DiskCacheStore::page_cache_name(location, range)) - .exists() - } - - #[test] - fn test_disk_cache_out_of_range() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - let page_size = 16; - // 51 byte - let data = b"a b c d e f g h i j k l m n o p q r s t u v w x y z"; - let location = Path::from("out_of_range_test.sst"); - let store = prepare_store(page_size, 32, 0, rt.clone()).await; - let buf = Bytes::from_static(data); - store.inner.put(&location, buf.into()).await.unwrap(); - - // Read one page out of range. - let res = store.inner.get_range(&location, 48..54).await; - assert!(res.is_err()); - - // Read multiple pages out of range. - let res = store.inner.get_range(&location, 24..54).await; - assert!(res.is_err()); - }); - } - - #[test] - fn test_disk_cache_store_get_range() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - let page_size = 16; - // 51 byte - let data = b"a b c d e f g h i j k l m n o p q r s t u v w x y z"; - let location = Path::from("1.sst"); - let store = prepare_store(page_size, 1024, 0, rt.clone()).await; - - let mut buf = BytesMut::with_capacity(data.len() * 4); - // extend 4 times, then location will contain 200 bytes - for _ in 0..4 { - buf.extend_from_slice(data); - } - store.inner.put(&location, buf.freeze().into()).await.unwrap(); - - let testcases = vec![ - (0..6, "a b c "), - (0..16, "a b c d e f g h "), - // len of aligned ranges will be 2 - (0..17, "a b c d e f g h i"), - (16..17, "i"), - // len of aligned ranges will be 6 - (16..100, "i j k l m n o p q r s t u v w x y za b c d e f g h i j k l m n o p q r s t u v w x y"), - ]; - - for (input, expected) in testcases { - assert_eq!( - store.inner.get_range(&location, input).await.unwrap(), - Bytes::copy_from_slice(expected.as_bytes()) - ); - } - - // remove cached values, then get again - { - for range in [0..16, 16..32, 32..48, 48..64, 64..80, 80..96, 96..112] { - let data_cache = store - .inner - .cache - .meta_cache - .lock(&DiskCacheStore::page_cache_name(&location, &range).as_str()); - assert!(data_cache - .contains(DiskCacheStore::page_cache_name(&location, &range).as_str())); - assert!(test_file_exists(&store.cache_dir, &location, &range)); - } - - for range in [16..32, 48..64, 80..96] { - let mut data_cache = store - .inner - .cache - .meta_cache - .lock(&DiskCacheStore::page_cache_name(&location, &range).as_str()); - assert!(data_cache - .pop(&DiskCacheStore::page_cache_name(&location, &range)) - .is_some()); - } - } - - assert_eq!( - store.inner.get_range(&location, 16..100).await.unwrap(), - Bytes::copy_from_slice( - b"i j k l m n o p q r s t u v w x y za b c d e f g h i j k l m n o p q r s t u v w x y" - ) - ); - - }); - } - - #[test] - #[ignore = "https://github.com/apache/horaedb/issues/1215"] - fn test_disk_cache_multi_thread_fetch_same_block() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - let page_size = 16; - // 51 byte - let data = b"a b c d e f g h i j k l m n o p q r s t u v w x y z"; - let location = Path::from("1.sst"); - let store = Arc::new(prepare_store(page_size, 32, 0,rt.clone()).await); - - let mut buf = BytesMut::with_capacity(data.len() * 4); - // extend 4 times, then location will contain 200 bytes - for _ in 0..4 { - buf.extend_from_slice(data); - } - store.inner.put(&location, buf.freeze().into()).await.unwrap(); - - let testcases = [ - (0..6, "a b c "), - (0..16, "a b c d e f g h "), - (0..17, "a b c d e f g h i"), - (16..17, "i"), - (16..100, "i j k l m n o p q r s t u v w x y za b c d e f g h i j k l m n o p q r s t u v w x y"), - ]; - let testcases = testcases - .iter() - .cycle() - .take(testcases.len() * 100) - .cloned() - .collect::>(); - - let mut tasks = Vec::with_capacity(testcases.len()); - for (input, _) in &testcases { - let store = store.clone(); - let location = location.clone(); - let input = input.clone(); - - tasks.push(tokio::spawn(async move { - store.inner.get_range(&location, input).await.unwrap() - })); - } - - let actual = futures::future::join_all(tasks).await; - for (actual, (_, expected)) in actual.into_iter().zip(testcases.into_iter()) { - assert_eq!(actual.unwrap(), Bytes::from(expected)) - } - }); - } - - #[test] - fn test_disk_cache_remove_cache_file() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - let page_size = 16; - // 51 byte - let data = b"a b c d e f g h i j k l m n o p q r s t u v w x y z"; - let location = Path::from("remove_cache_file.sst"); - let store = prepare_store(page_size, 32, 0, rt.clone()).await; - let mut buf = BytesMut::with_capacity(data.len() * 4); - // extend 4 times, then location will contain 200 bytes, but cache cap is 32 - for _ in 0..4 { - buf.extend_from_slice(data); - } - store - .inner - .put(&location, buf.freeze().into()) - .await - .unwrap(); - - let _ = store.inner.get_range(&location, 0..16).await.unwrap(); - let _ = store.inner.get_range(&location, 16..32).await.unwrap(); - // cache is full now - assert!(test_file_exists(&store.cache_dir, &location, &(0..16))); - assert!(test_file_exists(&store.cache_dir, &location, &(16..32))); - - // insert new cache, evict oldest entry - let _ = store.inner.get_range(&location, 32..48).await.unwrap(); - assert!(!test_file_exists(&store.cache_dir, &location, &(0..16))); - assert!(test_file_exists(&store.cache_dir, &location, &(32..48))); - - // insert new cache, evict oldest entry - let _ = store.inner.get_range(&location, 48..64).await.unwrap(); - assert!(!test_file_exists(&store.cache_dir, &location, &(16..32))); - assert!(test_file_exists(&store.cache_dir, &location, &(48..64))); - }); - } - - #[test] - fn test_disk_cache_remove_cache_file_two_partition() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - let page_size = 16; - // 51 byte - let data = b"a b c d e f g h i j k l m n o p q r s t u v w x y z"; - let location = Path::from("remove_cache_file_two_partition.sst"); - // partition_cap: 64 / 16 / 2 = 2 - let store = prepare_store(page_size, 64, 1, rt.clone()).await; - let mut buf = BytesMut::with_capacity(data.len() * 8); - // extend 8 times - for _ in 0..8 { - buf.extend_from_slice(data); - } - store - .inner - .put(&location, buf.freeze().into()) - .await - .unwrap(); - // use seahash - // 0..16: partition 1 - // 16..32 partition 1 - // 32..48 partition 0 - // 48..64 partition 1 - // 64..80 partition 1 - // 80..96 partition 0 - // 96..112 partition 0 - // 112..128 partition 0 - // 128..144 partition 0 - let _ = store.inner.get_range(&location, 0..16).await.unwrap(); - let _ = store.inner.get_range(&location, 16..32).await.unwrap(); - // partition 1 cache is full now - assert!(test_file_exists(&store.cache_dir, &location, &(0..16))); - assert!(test_file_exists(&store.cache_dir, &location, &(16..32))); - - let _ = store.inner.get_range(&location, 32..48).await.unwrap(); - let _ = store.inner.get_range(&location, 80..96).await.unwrap(); - // partition 0 cache is full now - - assert!(test_file_exists(&store.cache_dir, &location, &(32..48))); - assert!(test_file_exists(&store.cache_dir, &location, &(80..96))); - - // insert new entry into partition 0, evict partition 0's oldest entry - let _ = store.inner.get_range(&location, 96..112).await.unwrap(); - assert!(!test_file_exists(&store.cache_dir, &location, &(32..48))); - assert!(test_file_exists(&store.cache_dir, &location, &(80..96))); - - assert!(test_file_exists(&store.cache_dir, &location, &(0..16))); - assert!(test_file_exists(&store.cache_dir, &location, &(16..32))); - - // insert new entry into partition 0, evict partition 0's oldest entry - let _ = store.inner.get_range(&location, 128..144).await.unwrap(); - assert!(!test_file_exists(&store.cache_dir, &location, &(80..96))); - assert!(test_file_exists(&store.cache_dir, &location, &(96..112))); - assert!(test_file_exists(&store.cache_dir, &location, &(128..144))); - - assert!(test_file_exists(&store.cache_dir, &location, &(0..16))); - assert!(test_file_exists(&store.cache_dir, &location, &(16..32))); - - // insert new entry into partition 1, evict partition 1's oldest entry - let _ = store.inner.get_range(&location, 64..80).await.unwrap(); - assert!(!test_file_exists(&store.cache_dir, &location, &(0..16))); - assert!(test_file_exists(&store.cache_dir, &location, &(16..32))); - assert!(test_file_exists(&store.cache_dir, &location, &(64..80))); - - assert!(test_file_exists(&store.cache_dir, &location, &(96..112))); - assert!(test_file_exists(&store.cache_dir, &location, &(128..144))); - }); - } - - #[test] - fn test_disk_cache_manifest() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - let cache_dir = tempdir().unwrap(); - let cache_root_dir = cache_dir.as_ref().to_string_lossy().to_string(); - let page_size = 8; - let first_create_time = { - let _store = { - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = - Arc::new(local_file::try_new_with_default(local_path).unwrap()); - - DiskCacheStore::try_new( - cache_root_dir.clone(), - 160, - 8, - local_store, - 0, - rt.clone(), - ) - .await - .unwrap() - }; - let manifest = - DiskCacheStore::create_manifest_if_not_exists(&cache_root_dir, page_size) - .await - .unwrap(); - - assert_eq!(manifest.page_size, 8); - assert_eq!(manifest.version, Manifest::CURRENT_VERSION); - manifest.create_at - }; - - // open again - { - let _store = { - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = - Arc::new(local_file::try_new_with_default(local_path).unwrap()); - DiskCacheStore::try_new( - cache_root_dir.clone(), - 160, - 8, - local_store, - 0, - rt.clone(), - ) - .await - .unwrap() - }; - - let manifest = - DiskCacheStore::create_manifest_if_not_exists(&cache_root_dir, page_size) - .await - .unwrap(); - assert_eq!(manifest.create_at, first_create_time); - assert_eq!(manifest.page_size, 8); - assert_eq!(manifest.version, Manifest::CURRENT_VERSION); - } - - // open again, but with different page_size - { - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = Arc::new(local_file::try_new_with_default(local_path).unwrap()); - let store = DiskCacheStore::try_new( - cache_dir.as_ref().to_string_lossy().to_string(), - 160, - page_size * 2, - local_store, - 0, - rt.clone(), - ) - .await; - - assert!(store.is_err()) - } - }); - } - - #[test] - fn test_disk_cache_recovery() { - let rt = Arc::new(Builder::default().enable_all().build().unwrap()); - rt.block_on(async { - let cache_dir = tempdir().unwrap(); - let cache_root_dir = cache_dir.as_ref().to_string_lossy().to_string(); - let page_size = 16; - let location = Path::from("recovery.sst"); - { - let store = { - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = - Arc::new(local_file::try_new_with_default(local_path).unwrap()); - DiskCacheStore::try_new( - cache_root_dir.clone(), - 10240, - page_size, - local_store, - 0, - rt.clone(), - ) - .await - .unwrap() - }; - let data = b"abcd"; - let mut buf = BytesMut::with_capacity(data.len() * 1024); - for _ in 0..1024 { - buf.extend_from_slice(data); - } - let buf = buf.freeze(); - store.put(&location, buf.clone().into()).await.unwrap(); - let read_range = 16..100; - let bytes = store - .get_range(&location, read_range.clone()) - .await - .unwrap(); - assert_eq!(bytes.len(), read_range.len()); - assert_eq!(bytes[..], buf[read_range]) - }; - - // recover - { - let store = { - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = - Arc::new(local_file::try_new_with_default(local_path).unwrap()); - DiskCacheStore::try_new( - cache_root_dir.clone(), - 160, - page_size, - local_store, - 0, - rt.clone(), - ) - .await - .unwrap() - }; - for range in [16..32, 32..48, 48..64, 64..80, 80..96, 96..112] { - let filename = DiskCacheStore::page_cache_name(&location, &range); - let cache = store.cache.meta_cache.lock(&filename); - assert!(cache.contains(&filename)); - assert!(test_file_exists(&cache_dir, &location, &range)); - } - }; - }); - } - - #[test] - fn test_disk_cache_bytes_crc() { - let testcases = vec![("abc", 910901175), ("hello horaedb", 4015368565)]; - - for (input, expect) in testcases { - let actual = CASTAGNOLI.checksum(input.as_bytes()); - assert_eq!(actual, expect); - } - } - - #[test] - fn test_corrupt_disk_cache() { - let rt = Arc::new(Builder::default().build().unwrap()); - rt.block_on(async { - for page_size in [1, 2, 4, 8, 16, 32, 64, 128] { - corrupt_disk_cache(page_size, rt.clone()).await; - } - }); - } - - async fn corrupt_disk_cache(page_size: usize, rt: RuntimeRef) { - let StoreWithCacheDir { - inner: store, - cache_dir, - } = prepare_store(page_size, 1024, 0, rt).await; - let test_file_name = "corrupted_disk_cache_file"; - let test_file_path = Path::from(test_file_name); - let test_file_bytes = Bytes::from("corrupted_disk_cache_file_data"); - - // Put data into store and get it to let the cache load the data. - store - .put(&test_file_path, test_file_bytes.clone().into()) - .await - .unwrap(); - - // The data should be in the cache. - let got_bytes = store - .get_range(&test_file_path, 0..test_file_bytes.len()) - .await - .unwrap(); - assert_eq!(got_bytes, test_file_bytes); - - // Corrupt files in the cache dir. - let mut cache_read_dir = tokio::fs::read_dir(cache_dir.as_ref()).await.unwrap(); - while let Some(entry) = cache_read_dir.next_entry().await.unwrap() { - let path_buf = entry.path(); - let path = path_buf.to_str().unwrap(); - if path.contains(test_file_name) { - let mut file = tokio::fs::OpenOptions::new() - .write(true) - .truncate(true) - .open(path) - .await - .unwrap(); - // TODO: currently the data integrity is checked based on the file size, so here - // we give a bytes with designed length to make the check failed. - file.write_all(b"corrupt").await.unwrap(); - } - } - - // The data should be removed from the cache. - let got_bytes = store - .get_range(&test_file_path, 0..test_file_bytes.len()) - .await - .unwrap(); - assert_eq!(got_bytes, test_file_bytes); - // The cache should be updated. - let mut cache_read_dir = tokio::fs::read_dir(cache_dir.as_ref()).await.unwrap(); - while let Some(entry) = cache_read_dir.next_entry().await.unwrap() { - let path_buf = entry.path(); - let path = path_buf.to_str().unwrap(); - if path.contains(test_file_name) { - let mut file = tokio::fs::OpenOptions::new() - .read(true) - .open(path) - .await - .unwrap(); - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer).await.unwrap(); - assert_ne!(buffer, b"corrupted"); - } - } - } -} diff --git a/src/components/object_store/src/lib.rs b/src/components/object_store/src/lib.rs deleted file mode 100644 index 4627dbae77..0000000000 --- a/src/components/object_store/src/lib.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Re-export of [object_store] crate. - -use std::sync::Arc; - -pub use opendal::Error as OpenDalError; -pub use upstream::{ - path::Path, Error as ObjectStoreError, GetResult, ListResult, ObjectMeta, ObjectStore, - PutPayloadMut, -}; - -pub mod aliyun; -pub mod config; -pub mod disk_cache; -pub mod local_file; -pub mod mem_cache; -pub mod metrics; -pub mod multi_part; -pub mod prefix; -pub mod s3; -#[cfg(test)] -pub mod test_util; - -pub type ObjectStoreRef = Arc; diff --git a/src/components/object_store/src/local_file.rs b/src/components/object_store/src/local_file.rs deleted file mode 100644 index 4070b00489..0000000000 --- a/src/components/object_store/src/local_file.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use object_store_opendal::OpendalStore; -use opendal::{ - layers::{RetryLayer, TimeoutLayer}, - services::Fs, - Operator, Result, -}; - -use crate::config::LocalOptions; - -pub fn try_new(local_opts: &LocalOptions) -> Result { - let builder = Fs::default().root(&local_opts.data_dir); - let op = Operator::new(builder)? - .layer( - TimeoutLayer::new() - .with_timeout(local_opts.timeout.timeout.0) - .with_io_timeout(local_opts.timeout.io_timeout.0), - ) - .layer(RetryLayer::new().with_max_times(local_opts.max_retries)) - .finish(); - - Ok(OpendalStore::new(op)) -} - -pub fn try_new_with_default(data_dir: String) -> Result { - let local_opts = LocalOptions::new_with_default(data_dir); - try_new(&local_opts) -} diff --git a/src/components/object_store/src/mem_cache.rs b/src/components/object_store/src/mem_cache.rs deleted file mode 100644 index 9e40fb8e5c..0000000000 --- a/src/components/object_store/src/mem_cache.rs +++ /dev/null @@ -1,415 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! An implementation of ObjectStore, which support -//! 1. Cache based on memory, and support evict based on memory usage -//! 2. Builtin Partition to reduce lock contention - -use std::{ - fmt::{self, Display}, - num::NonZeroUsize, - ops::Range, - sync::Arc, -}; - -use async_trait::async_trait; -use bytes::Bytes; -use clru::{CLruCache, CLruCacheConfig, WeightScale}; -use futures::stream::BoxStream; -use hash_ext::{ahash::RandomState, build_fixed_seed_ahasher_builder}; -use macros::define_result; -use partitioned_lock::PartitionedMutex; -use snafu::{OptionExt, Snafu}; -use upstream::{ - path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as ObjectStoreResult, -}; - -use crate::{ - metrics::{OBJECT_STORE_MEMORY_CACHE_HIT, OBJECT_STORE_MEMORY_CACHE_MISS}, - ObjectStoreRef, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("mem cache cap must large than 0",))] - InvalidCapacity, -} - -define_result!(Error); - -struct CustomScale; - -impl WeightScale for CustomScale { - fn weight(&self, _key: &String, value: &Bytes) -> usize { - value.len() - } -} - -pub struct MemCache { - /// Max memory this store can use - mem_cap: NonZeroUsize, - inner: PartitionedMutex, RandomState>, -} - -pub type MemCacheRef = Arc; - -impl MemCache { - pub fn try_new(partition_bits: usize, mem_cap: NonZeroUsize) -> Result { - let init_lru = |partition_num: usize| -> Result<_> { - let cap_per_part = - NonZeroUsize::new(mem_cap.get() / partition_num).context(InvalidCapacity)?; - Ok(CLruCache::with_config( - CLruCacheConfig::new(cap_per_part) - .with_hasher(build_fixed_seed_ahasher_builder()) - .with_scale(CustomScale), - )) - }; - - let inner = PartitionedMutex::try_new_with_bit_len( - init_lru, - partition_bits, - build_fixed_seed_ahasher_builder(), - )?; - - Ok(Self { mem_cap, inner }) - } - - fn get(&self, key: &str) -> Option { - self.inner.lock(&key).get(key).cloned() - } - - fn peek(&self, key: &str) -> Option { - self.inner.lock(&key).peek(key).cloned() - } - - fn insert(&self, key: String, value: Bytes) { - // don't care error now. - _ = self.inner.lock(&key).put_with_weight(key, value); - } - - /// Give a description of the cache state. - - #[cfg(test)] - fn keys(&self, part: &CLruCache) -> Vec { - part.iter().map(|(key, _)| key).cloned().collect::>() - } - - #[cfg(test)] - fn state_desc(&self) -> String { - self.inner - .get_all_partition() - .iter() - .map(|part| self.keys(&part.lock().unwrap()).join(",")) - .enumerate() - .map(|(part_no, keys)| format!("{part_no}: [{keys}]")) - .collect::>() - .join("\n") - } -} - -impl Display for MemCache { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MemCache") - .field("mem_cap", &self.mem_cap) - .field("partitions", &self.inner.get_all_partition().len()) - .finish() - } -} - -/// Assembled with [`MemCache`], the [`MemCacheStore`] can cache the loaded data -/// from the `underlying_store` to avoid unnecessary data loading. -/// -/// With the `read_only_cache` field, caller can control whether to do caching -/// for the loaded data. BTW, all the accesses are forced to the order: -/// `cache` -> `underlying_store`. -pub struct MemCacheStore { - cache: MemCacheRef, - underlying_store: ObjectStoreRef, - readonly_cache: bool, -} - -impl MemCacheStore { - /// Create a default [`MemCacheStore`]. - pub fn new(cache: MemCacheRef, underlying_store: ObjectStoreRef) -> Self { - Self { - cache, - underlying_store, - readonly_cache: false, - } - } - - /// Create a [`MemCacheStore`] with a readonly cache. - pub fn new_with_readonly_cache(cache: MemCacheRef, underlying_store: ObjectStoreRef) -> Self { - Self { - cache, - underlying_store, - readonly_cache: true, - } - } - - fn cache_key(location: &Path, range: &Range) -> String { - format!("{}-{}-{}", location, range.start, range.end) - } - - async fn get_range_with_rw_cache( - &self, - location: &Path, - range: Range, - ) -> ObjectStoreResult { - // TODO(chenxiang): What if there are some overlapping range in cache? - // A request with range [5, 10) can also use [0, 20) cache - let cache_key = Self::cache_key(location, &range); - if let Some(bytes) = self.cache.get(&cache_key) { - OBJECT_STORE_MEMORY_CACHE_HIT.inc(); - return Ok(bytes); - } - - OBJECT_STORE_MEMORY_CACHE_MISS.inc(); - // TODO(chenxiang): What if two threads reach here? It's better to - // pend one thread, and only let one to fetch data from underlying store. - let bytes = self.underlying_store.get_range(location, range).await?; - self.cache.insert(cache_key, bytes.clone()); - - Ok(bytes) - } - - async fn get_range_with_ro_cache( - &self, - location: &Path, - range: Range, - ) -> ObjectStoreResult { - let cache_key = Self::cache_key(location, &range); - if let Some(bytes) = self.cache.peek(&cache_key) { - return Ok(bytes); - } - - // TODO(chenxiang): What if two threads reach here? It's better to - // pend one thread, and only let one to fetch data from underlying store. - self.underlying_store.get_range(location, range).await - } -} - -impl Display for MemCacheStore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.cache.fmt(f) - } -} - -impl fmt::Debug for MemCacheStore { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("MemCacheStore").finish() - } -} - -#[async_trait] -impl ObjectStore for MemCacheStore { - async fn put(&self, location: &Path, payload: PutPayload) -> ObjectStoreResult { - self.underlying_store.put(location, payload).await - } - - async fn put_opts( - &self, - location: &Path, - payload: PutPayload, - opts: PutOptions, - ) -> ObjectStoreResult { - self.underlying_store - .put_opts(location, payload, opts) - .await - } - - async fn put_multipart(&self, location: &Path) -> ObjectStoreResult> { - self.underlying_store.put_multipart(location).await - } - - async fn put_multipart_opts( - &self, - location: &Path, - opts: PutMultipartOpts, - ) -> ObjectStoreResult> { - self.underlying_store - .put_multipart_opts(location, opts) - .await - } - - // TODO(chenxiang): don't cache whole path for reasons below - // 1. cache key don't support overlapping - // 2. In sst module, we only use get_range, get is not used - async fn get(&self, location: &Path) -> ObjectStoreResult { - self.underlying_store.get(location).await - } - - async fn get_opts(&self, location: &Path, options: GetOptions) -> ObjectStoreResult { - self.underlying_store.get_opts(location, options).await - } - - async fn get_range(&self, location: &Path, range: Range) -> ObjectStoreResult { - if self.readonly_cache { - self.get_range_with_ro_cache(location, range).await - } else { - self.get_range_with_rw_cache(location, range).await - } - } - - async fn head(&self, location: &Path) -> ObjectStoreResult { - self.underlying_store.head(location).await - } - - async fn delete(&self, location: &Path) -> ObjectStoreResult<()> { - self.underlying_store.delete(location).await - } - - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, ObjectStoreResult> { - self.underlying_store.list(prefix) - } - - async fn list_with_delimiter(&self, prefix: Option<&Path>) -> ObjectStoreResult { - self.underlying_store.list_with_delimiter(prefix).await - } - - async fn copy(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { - self.underlying_store.copy(from, to).await - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { - self.underlying_store.copy_if_not_exists(from, to).await - } -} - -#[cfg(test)] -mod test { - use tempfile::tempdir; - - use super::*; - use crate::local_file; - - fn prepare_store(bits: usize, mem_cap: usize) -> MemCacheStore { - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = Arc::new(local_file::try_new_with_default(local_path).unwrap()); - - let mem_cache = - Arc::new(MemCache::try_new(bits, NonZeroUsize::new(mem_cap).unwrap()).unwrap()); - MemCacheStore::new(mem_cache, local_store) - } - - #[tokio::test] - async fn test_mem_cache_evict() { - // single partition - let store = prepare_store(0, 13); - - // write date - let location = Path::from("1.sst"); - store - .put(&location, Bytes::from_static(&[1; 1024]).into()) - .await - .unwrap(); - - // get bytes from [0, 5), insert to cache - let range0_5 = 0..5; - _ = store.get_range(&location, range0_5.clone()).await.unwrap(); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range0_5)) - .is_some()); - - // get bytes from [5, 10), insert to cache - let range5_10 = 5..10; - _ = store.get_range(&location, range5_10.clone()).await.unwrap(); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range0_5)) - .is_some()); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range5_10)) - .is_some()); - - // get bytes from [10, 15), insert to cache - // cache is full, evict [0, 5) - let range10_15 = 10..15; - _ = store - .get_range(&location, range10_15.clone()) - .await - .unwrap(); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range0_5)) - .is_none()); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range5_10)) - .is_some()); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range10_15)) - .is_some()); - } - - #[tokio::test] - async fn test_mem_cache_partition() { - // 4 partitions - let store = prepare_store(2, 100); - let location = Path::from("partition.sst"); - store - .put(&location, Bytes::from_static(&[1; 1024]).into()) - .await - .unwrap(); - - let range0_5 = 0..5; - let range100_105 = 100..105; - _ = store.get_range(&location, range0_5.clone()).await.unwrap(); - _ = store - .get_range(&location, range100_105.clone()) - .await - .unwrap(); - - assert_eq!( - r#"0: [partition.sst-0-5] -1: [] -2: [partition.sst-100-105] -3: []"#, - store.cache.as_ref().state_desc() - ); - - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range0_5)) - .is_some()); - assert!(store - .cache - .get(&MemCacheStore::cache_key(&location, &range100_105)) - .is_some()); - } - - #[test] - fn test_mem_cache_capacity() { - // 4 partitions - let store = prepare_store(2, 100); - assert_eq!( - "25,25,25,25", - store - .cache - .inner - .get_all_partition() - .iter() - .map(|p| p.lock().unwrap().capacity().to_string()) - .collect::>() - .join(",") - ); - } -} diff --git a/src/components/object_store/src/metrics.rs b/src/components/object_store/src/metrics.rs deleted file mode 100644 index 2847d2bf9e..0000000000 --- a/src/components/object_store/src/metrics.rs +++ /dev/null @@ -1,429 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{fmt::Display, ops::Range, sync::Arc, thread, time::Instant}; - -use async_trait::async_trait; -use bytes::Bytes; -use futures::stream::BoxStream; -use lazy_static::lazy_static; -use logger::trace; -use prometheus::{ - exponential_buckets, register_histogram_vec, register_int_counter, HistogramVec, IntCounter, -}; -use prometheus_static_metric::make_static_metric; -use runtime::Runtime; -use upstream::{ - path::Path, Error as StoreError, GetOptions, GetResult, ListResult, MultipartUpload, - ObjectMeta, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, -}; - -use crate::ObjectStoreRef; - -make_static_metric! { - pub struct ObjectStoreDurationHistogram: Histogram { - "op" => { - put, - put_opts, - put_multipart, - put_multipart_opts, - abort_multipart, - get, - get_opts, - get_range, - get_ranges, - head, - delete, - list, - list_with_delimiter, - copy, - rename, - copy_if_not_exists, - rename_if_not_exists - }, - } - - pub struct ObjectStoreThroughputHistogram: Histogram { - "op" => { - put, - put_opts, - get_range, - get_ranges, - }, - } -} - -lazy_static! { - static ref OBJECT_STORE_DURATION_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( - "object_store_latency", - "latency of object store's operation", - &["op"], - exponential_buckets(0.0005, 2.0, 20).unwrap() - ) - .unwrap(); - static ref OBJECT_STORE_THROUGHPUT_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( - "object_store_throughput", - "throughput of object store's operation", - &["op"], - // The max bound value is 64 * 2^24 = 1GB - exponential_buckets(64.0, 4.0, 12).unwrap() - ) - .unwrap(); - pub static ref OBJECT_STORE_MEMORY_CACHE_HIT: IntCounter = register_int_counter!( - "object_store_memory_cache_hit", - "object store memory cache hit" - ) - .unwrap(); - pub static ref OBJECT_STORE_MEMORY_CACHE_MISS: IntCounter = register_int_counter!( - "object_store_memory_cache_miss", - "object store memory cache miss" - ) - .unwrap(); - pub static ref OBJECT_STORE_DISK_CACHE_HIT: IntCounter = register_int_counter!( - "object_store_disk_cache_hit", - "object store disk cache hit" - ) - .unwrap(); - pub static ref OBJECT_STORE_DISK_CACHE_MISS: IntCounter = register_int_counter!( - "object_store_disk_cache_miss", - "object store disk cache miss" - ) - .unwrap(); -} - -lazy_static! { - pub static ref DISK_CACHE_DEDUP_COUNT: IntCounter = register_int_counter!( - "disk_cache_dedup_counter", - "Dedup disk cache fetch request counts" - ) - .unwrap(); -} - -lazy_static! { - pub static ref OBJECT_STORE_DURATION_HISTOGRAM: ObjectStoreDurationHistogram = - ObjectStoreDurationHistogram::from(&OBJECT_STORE_DURATION_HISTOGRAM_VEC); - pub static ref OBJECT_STORE_THROUGHPUT_HISTOGRAM: ObjectStoreThroughputHistogram = - ObjectStoreThroughputHistogram::from(&OBJECT_STORE_THROUGHPUT_HISTOGRAM_VEC); -} - -pub const METRICS: &str = "METRICS"; -/// A object store wrapper for collecting statistics about the underlying store. -#[derive(Debug)] -pub struct StoreWithMetrics { - store: ObjectStoreRef, - /// Use a separate runtime to execute object store methods; - /// Prevent computationally intensive tasks from occupying the runtime for a - /// long time and causing an increase in access time. - runtime: Arc, -} - -impl StoreWithMetrics { - pub fn new(store: ObjectStoreRef, runtime: Arc) -> Self { - Self { store, runtime } - } -} - -impl Display for StoreWithMetrics { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Store with metrics, underlying store:{}", self.store) - } -} - -#[async_trait] -impl ObjectStore for StoreWithMetrics { - async fn put(&self, location: &Path, payload: PutPayload) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.put.start_timer(); - OBJECT_STORE_THROUGHPUT_HISTOGRAM - .put - .observe(payload.content_length() as f64); - - let loc = location.clone(); - let store = self.store.clone(); - self.runtime - .spawn(async move { store.put(&loc, payload).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn put_opts( - &self, - location: &Path, - payload: PutPayload, - opts: PutOptions, - ) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.put_opts.start_timer(); - OBJECT_STORE_THROUGHPUT_HISTOGRAM - .put_opts - .observe(payload.content_length() as f64); - - let loc = location.clone(); - let store = self.store.clone(); - self.runtime - .spawn(async move { store.put_opts(&loc, payload, opts).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn put_multipart(&self, location: &Path) -> Result> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.put_multipart.start_timer(); - - let instant = Instant::now(); - let loc = location.clone(); - let store = self.store.clone(); - let res = self - .runtime - .spawn(async move { store.put_multipart(&loc).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })?; - - trace!( - "Object store with metrics put_multipart cost:{}ms, location:{}, thread:{}-{:?}", - instant.elapsed().as_millis(), - location, - thread::current().name().unwrap_or("noname").to_string(), - thread::current().id() - ); - res - } - - async fn put_multipart_opts( - &self, - location: &Path, - opts: PutMultipartOpts, - ) -> Result> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM - .put_multipart_opts - .start_timer(); - - let instant = Instant::now(); - let loc = location.clone(); - let store = self.store.clone(); - let res = self - .runtime - .spawn(async move { store.put_multipart_opts(&loc, opts).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })?; - - trace!( - "Object store with metrics put_multipart_opts cost:{}ms, location:{}, thread:{}-{:?}", - instant.elapsed().as_millis(), - location, - thread::current().name().unwrap_or("noname").to_string(), - thread::current().id() - ); - res - } - - async fn get(&self, location: &Path) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.get.start_timer(); - let store = self.store.clone(); - let loc = location.clone(); - self.runtime - .spawn(async move { store.get(&loc).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.get_opts.start_timer(); - let store = self.store.clone(); - let loc = location.clone(); - self.runtime - .spawn(async move { store.get_opts(&loc, options).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn get_range(&self, location: &Path, range: Range) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.get_range.start_timer(); - - let instant = Instant::now(); - let store = self.store.clone(); - let loc = location.clone(); - let result = self - .runtime - .spawn(async move { store.get_range(&loc, range.clone()).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })??; - trace!( - "Object store with metrics get_range cost:{}ms, location:{}, thread:{}-{:?}", - instant.elapsed().as_millis(), - location, - thread::current().name().unwrap_or("noname").to_string(), - thread::current().id() - ); - - OBJECT_STORE_THROUGHPUT_HISTOGRAM - .get_range - .observe(result.len() as f64); - Ok(result) - } - - async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.get_ranges.start_timer(); - let store = self.store.clone(); - let loc = location.clone(); - let ranges = ranges.to_vec(); - let result = self - .runtime - .spawn(async move { store.get_ranges(&loc, &ranges).await }) - .await - .map_err(|e| StoreError::Generic { - store: METRICS, - source: Box::new(e), - })??; - let len: usize = result.iter().map(|v| v.len()).sum(); - OBJECT_STORE_THROUGHPUT_HISTOGRAM - .get_ranges - .observe(len as f64); - - Ok(result) - } - - async fn head(&self, location: &Path) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.head.start_timer(); - - let instant = Instant::now(); - let store = self.store.clone(); - let loc = location.clone(); - let response = self - .runtime - .spawn(async move { store.head(&loc).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })?; - - trace!( - "Object store with metrics head cost:{}ms, location:{}", - instant.elapsed().as_millis(), - location - ); - response - } - - async fn delete(&self, location: &Path) -> Result<()> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.delete.start_timer(); - let store = self.store.clone(); - let loc = location.clone(); - self.runtime - .spawn(async move { store.delete(&loc).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.list.start_timer(); - self.store.list(prefix) - } - - async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM - .list_with_delimiter - .start_timer(); - self.store.list_with_delimiter(prefix).await - } - - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.copy.start_timer(); - - let store = self.store.clone(); - let from = from.clone(); - let to = to.clone(); - self.runtime - .spawn(async move { store.copy(&from, &to).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn rename(&self, from: &Path, to: &Path) -> Result<()> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM.rename.start_timer(); - - let store = self.store.clone(); - let from = from.clone(); - let to = to.clone(); - self.runtime - .spawn(async move { store.rename(&from, &to).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM - .copy_if_not_exists - .start_timer(); - - let store = self.store.clone(); - let from = from.clone(); - let to = to.clone(); - self.runtime - .spawn(async move { store.copy_if_not_exists(&from, &to).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } - - async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - let _timer = OBJECT_STORE_DURATION_HISTOGRAM - .rename_if_not_exists - .start_timer(); - - let store = self.store.clone(); - let from = from.clone(); - let to = to.clone(); - self.runtime - .spawn(async move { store.rename_if_not_exists(&from, &to).await }) - .await - .map_err(|source| StoreError::Generic { - store: METRICS, - source: Box::new(source), - })? - } -} diff --git a/src/components/object_store/src/multi_part.rs b/src/components/object_store/src/multi_part.rs deleted file mode 100644 index fb5b9dd9fb..0000000000 --- a/src/components/object_store/src/multi_part.rs +++ /dev/null @@ -1,224 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - io::Error as IoError, - pin::Pin, - sync::Arc, - task::{Context, Poll}, -}; - -use bytes::Bytes; -use futures::{future::BoxFuture, ready, Future, FutureExt}; -use tokio::{io::AsyncWrite, sync::Mutex, task::JoinSet}; -pub use upstream::PutPayloadMut; -use upstream::{path::Path, Error, MultipartUpload, PutPayload, PutResult}; - -use crate::ObjectStoreRef; - -// TODO: remove Mutex and make ConcurrentMultipartUpload thread-safe -pub type MultiUploadRef = Arc>; - -const CHUNK_SIZE: usize = 5 * 1024 * 1024; -const MAX_CONCURRENCY: usize = 10; - -#[derive(Debug)] -pub struct ConcurrentMultipartUpload { - upload: Box, - - buffer: PutPayloadMut, - - chunk_size: usize, - - tasks: JoinSet>, -} - -impl ConcurrentMultipartUpload { - pub fn new(upload: Box, chunk_size: usize) -> Self { - Self { - upload, - chunk_size, - buffer: PutPayloadMut::new(), - tasks: Default::default(), - } - } - - pub fn poll_tasks( - &mut self, - cx: &mut Context<'_>, - max_concurrency: usize, - ) -> Poll> { - while !self.tasks.is_empty() && self.tasks.len() >= max_concurrency { - ready!(self.tasks.poll_join_next(cx)).unwrap()?? - } - Poll::Ready(Ok(())) - } - - fn put_part(&mut self, part: PutPayload) { - self.tasks.spawn(self.upload.put_part(part)); - } - - pub fn put(&mut self, mut bytes: Bytes) { - while !bytes.is_empty() { - let remaining = self.chunk_size - self.buffer.content_length(); - if bytes.len() < remaining { - self.buffer.push(bytes); - return; - } - self.buffer.push(bytes.split_to(remaining)); - let buffer = std::mem::take(&mut self.buffer); - self.put_part(buffer.into()) - } - } - - pub fn write(&mut self, mut buf: &[u8]) { - while !buf.is_empty() { - let remaining = self.chunk_size - self.buffer.content_length(); - let to_read = buf.len().min(remaining); - self.buffer.extend_from_slice(&buf[..to_read]); - if to_read == remaining { - let buffer = std::mem::take(&mut self.buffer); - self.put_part(buffer.into()) - } - buf = &buf[to_read..] - } - } - - pub async fn flush(&mut self, max_concurrency: usize) -> Result<(), Error> { - futures::future::poll_fn(|cx| self.poll_tasks(cx, max_concurrency)).await - } - - pub async fn finish(&mut self) -> Result { - if !self.buffer.is_empty() { - let part = std::mem::take(&mut self.buffer); - self.put_part(part.into()) - } - - self.flush(0).await?; - self.upload.complete().await - } - - pub async fn abort(&mut self) -> Result<(), Error> { - self.tasks.shutdown().await; - self.upload.abort().await - } -} - -pub struct MultiUploadWriter { - pub multi_upload: MultiUploadRef, - upload_task: Option>>, - flush_task: Option>>, - completion_task: Option>>, -} - -impl<'a> MultiUploadWriter { - pub async fn new(object_store: &'a ObjectStoreRef, location: &'a Path) -> Result { - let upload_writer = object_store.put_multipart(location).await?; - - let multi_upload = Arc::new(Mutex::new(ConcurrentMultipartUpload::new( - upload_writer, - CHUNK_SIZE, - ))); - - let multi_upload = Self { - multi_upload, - upload_task: None, - flush_task: None, - completion_task: None, - }; - - Ok(multi_upload) - } - - pub fn aborter(&self) -> MultiUploadRef { - self.multi_upload.clone() - } -} - -impl AsyncWrite for MultiUploadWriter { - fn poll_write( - mut self: Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - buf: &[u8], - ) -> Poll> { - let multi_upload = self.multi_upload.clone(); - let buf = buf.to_owned(); - - let upload_task = self.upload_task.insert( - async move { - multi_upload - .lock() - .await - .flush(MAX_CONCURRENCY) - .await - .map_err(IoError::other)?; - - multi_upload.lock().await.write(&buf); - Ok(buf.len()) - } - .boxed(), - ); - - Pin::new(upload_task).poll(cx) - } - - fn poll_flush( - mut self: Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> Poll> { - let multi_upload = self.multi_upload.clone(); - - let flush_task = self.flush_task.insert( - async move { - multi_upload - .lock() - .await - .flush(0) - .await - .map_err(IoError::other)?; - - Ok(()) - } - .boxed(), - ); - - Pin::new(flush_task).poll(cx) - } - - fn poll_shutdown( - mut self: Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> Poll> { - let multi_upload = self.multi_upload.clone(); - - let completion_task = self.completion_task.get_or_insert_with(|| { - async move { - multi_upload - .lock() - .await - .finish() - .await - .map_err(IoError::other)?; - - Ok(()) - } - .boxed() - }); - - Pin::new(completion_task).poll(cx) - } -} diff --git a/src/components/object_store/src/prefix.rs b/src/components/object_store/src/prefix.rs deleted file mode 100644 index 24233eebf9..0000000000 --- a/src/components/object_store/src/prefix.rs +++ /dev/null @@ -1,444 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{fmt::Display, ops::Range}; - -use async_trait::async_trait; -use bytes::Bytes; -use futures::{stream::BoxStream, StreamExt}; -use upstream::{ - path::{self, Path, DELIMITER}, - Error, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, - ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, -}; - -use crate::ObjectStoreRef; - -#[derive(Debug)] -struct ErrorWithMsg { - msg: String, -} - -impl std::error::Error for ErrorWithMsg {} - -impl Display for ErrorWithMsg { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "StoreWithPrefix error, msg:{}", self.msg,) - } -} - -/// Wrap a real store and hijack all operations by adding the specific prefix to -/// the target location. -#[derive(Debug)] -pub struct StoreWithPrefix { - store: ObjectStoreRef, - prefix: Path, -} - -impl Display for StoreWithPrefix { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "Store with prefix, underlying store:{}, prefix path:{:?}", - self.store, self.prefix, - ) - } -} - -impl StoreWithPrefix { - pub fn new(prefix: String, store: ObjectStoreRef) -> Result { - let prefix = Path::parse(prefix)?; - Ok(Self { store, prefix }) - } - - fn add_prefix_to_loc(&self, loc: &Path) -> Path { - if self.prefix.as_ref().is_empty() { - return loc.clone(); - } - - let splitted_prefix = self.prefix.as_ref().split(DELIMITER); - let splitted_loc = loc.as_ref().split(DELIMITER); - Path::from_iter(splitted_prefix.chain(splitted_loc)) - } - - fn remove_prefix_from_loc(&self, loc: &Path) -> Result { - if self.prefix.as_ref().is_empty() { - return Ok(loc.clone()); - } - - let raw_prefix = self.prefix.as_ref(); - let raw_loc = loc.as_ref(); - match raw_loc.strip_prefix(raw_prefix) { - Some(v) => Path::parse(v).map_err(|e| Error::InvalidPath { source: e }), - None => Err(Error::InvalidPath { - source: path::Error::PrefixMismatch { - path: raw_loc.to_string(), - prefix: raw_prefix.to_string(), - }, - }), - } - } -} - -#[async_trait] -impl ObjectStore for StoreWithPrefix { - async fn put(&self, location: &Path, payload: PutPayload) -> Result { - let new_loc = self.add_prefix_to_loc(location); - self.store.put(&new_loc, payload).await - } - - async fn put_opts( - &self, - location: &Path, - payload: PutPayload, - opts: PutOptions, - ) -> Result { - let new_loc = self.add_prefix_to_loc(location); - self.store.put_opts(&new_loc, payload, opts).await - } - - async fn put_multipart(&self, location: &Path) -> Result> { - let new_loc = self.add_prefix_to_loc(location); - self.store.put_multipart(&new_loc).await - } - - async fn put_multipart_opts( - &self, - location: &Path, - opts: PutMultipartOpts, - ) -> Result> { - let new_loc = self.add_prefix_to_loc(location); - self.store.put_multipart_opts(&new_loc, opts).await - } - - async fn get(&self, location: &Path) -> Result { - let new_loc = self.add_prefix_to_loc(location); - let res = self.store.get(&new_loc).await?; - if let GetResultPayload::File(_, _) = &res.payload { - let err = ErrorWithMsg { - msg: "StoreWithPrefix doesn't support object store based on local file system" - .to_string(), - }; - return Err(Error::NotSupported { - source: Box::new(err), - }); - } - - Ok(res) - } - - async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { - let new_loc = self.add_prefix_to_loc(location); - let res = self.store.get_opts(&new_loc, options).await?; - if let GetResultPayload::File(_, _) = &res.payload { - let err = ErrorWithMsg { - msg: "StoreWithPrefix doesn't support object store based on local file system" - .to_string(), - }; - return Err(Error::NotSupported { - source: Box::new(err), - }); - } - - Ok(res) - } - - async fn get_range(&self, location: &Path, range: Range) -> Result { - let new_loc = self.add_prefix_to_loc(location); - self.store.get_range(&new_loc, range).await - } - - async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { - let new_loc = self.add_prefix_to_loc(location); - self.store.get_ranges(&new_loc, ranges).await - } - - /// Return the metadata for the specified location - async fn head(&self, location: &Path) -> Result { - let new_loc = self.add_prefix_to_loc(location); - let mut meta = self.store.head(&new_loc).await?; - meta.location = self.remove_prefix_from_loc(&meta.location)?; - Ok(meta) - } - - /// Delete the object at the specified location. - async fn delete(&self, location: &Path) -> Result<()> { - let new_loc = self.add_prefix_to_loc(location); - self.store.delete(&new_loc).await - } - - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { - let objects = if let Some(loc) = prefix { - let new_loc = self.add_prefix_to_loc(loc); - self.store.list(Some(&new_loc)) - } else { - self.store.list(Some(&self.prefix)) - }; - - let new_objects = objects.map(|mut obj| { - if let Ok(v) = &mut obj { - v.location = self.remove_prefix_from_loc(&v.location)?; - } - - obj - }); - new_objects.boxed() - } - - async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { - let mut list_res = if let Some(loc) = prefix { - let new_loc = self.add_prefix_to_loc(loc); - self.store.list_with_delimiter(Some(&new_loc)).await? - } else { - self.store.list_with_delimiter(Some(&self.prefix)).await? - }; - - for dir in &mut list_res.common_prefixes { - *dir = self.remove_prefix_from_loc(dir)?; - } - - for object in &mut list_res.objects { - object.location = self.remove_prefix_from_loc(&object.location)?; - } - - Ok(list_res) - } - - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - let new_from = self.add_prefix_to_loc(from); - let new_to = self.add_prefix_to_loc(to); - self.store.copy(&new_from, &new_to).await - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - let new_from = self.add_prefix_to_loc(from); - let new_to = self.add_prefix_to_loc(to); - self.store.copy(&new_from, &new_to).await - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use chrono::{DateTime, Utc}; - use futures::{stream, stream::StreamExt}; - use tempfile::tempdir; - - use super::*; - use crate::local_file; - - #[derive(Debug, Clone)] - struct PathPrefixChecker { - prefix: String, - } - - impl PathPrefixChecker { - fn check(&self, location: &Path) { - assert!(location.as_ref().starts_with(&self.prefix)); - } - } - - // Simple mock object store, only used for test. - #[derive(Debug, Clone)] - struct MockObjectStore { - file_num: usize, - content: Bytes, - prefix_checker: PathPrefixChecker, - } - - impl Display for MockObjectStore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "MockObjectStore") - } - } - - #[async_trait] - impl ObjectStore for MockObjectStore { - async fn put(&self, location: &Path, _payload: PutPayload) -> Result { - self.prefix_checker.check(location); - Ok(PutResult { - e_tag: None, - version: None, - }) - } - - async fn put_opts( - &self, - _location: &Path, - _payload: PutPayload, - _opts: PutOptions, - ) -> Result { - Err(Error::NotImplemented) - } - - async fn put_multipart_opts( - &self, - _location: &Path, - _opts: PutMultipartOpts, - ) -> Result> { - Err(Error::NotImplemented) - } - - async fn get(&self, location: &Path) -> Result { - self.prefix_checker.check(location); - Err(Error::NotImplemented) - } - - async fn get_opts(&self, _location: &Path, _options: GetOptions) -> Result { - Err(Error::NotImplemented) - } - - async fn get_range(&self, location: &Path, _range: Range) -> Result { - self.prefix_checker.check(location); - Ok(self.content.clone()) - } - - async fn head(&self, location: &Path) -> Result { - self.prefix_checker.check(location); - - Ok(ObjectMeta { - location: location.clone(), - last_modified: DateTime::::default(), - size: 0, - e_tag: None, - version: None, - }) - } - - async fn delete(&self, location: &Path) -> Result<()> { - self.prefix_checker.check(location); - - Err(Error::NotImplemented) - } - - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { - if let Some(loc) = prefix { - self.prefix_checker.check(loc); - } - let prefix = prefix.map(|v| v.to_string()).unwrap_or_default(); - let mut objects = Vec::with_capacity(self.file_num); - for file_idx in 0..self.file_num { - let raw_filepath = format!("{prefix}/{file_idx}"); - let filepath = Path::from(raw_filepath); - let object = ObjectMeta { - location: filepath, - last_modified: DateTime::::default(), - size: 0, - e_tag: None, - version: None, - }; - objects.push(Ok(object)); - } - - stream::iter(objects).boxed() - } - - async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { - if let Some(loc) = prefix { - self.prefix_checker.check(loc); - } - Err(Error::NotImplemented) - } - - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - self.prefix_checker.check(from); - self.prefix_checker.check(to); - Err(Error::NotImplemented) - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - self.prefix_checker.check(from); - self.prefix_checker.check(to); - Err(Error::NotImplemented) - } - } - - #[tokio::test] - async fn test_with_mock_store() { - let test_prefix = "test"; - let mock_store = MockObjectStore { - file_num: 3, - content: Bytes::from_static(b"00000"), - prefix_checker: PathPrefixChecker { - prefix: test_prefix.to_string(), - }, - }; - let mock_store = Arc::new(mock_store); - let prefix_store = StoreWithPrefix::new(test_prefix.to_string(), mock_store).unwrap(); - - let test_filepath = Path::from("0/100"); - - // Ignore the result and let the `prefix_checker` in the `MockObjectStore` to do - // the assertion. - let _ = prefix_store - .put(&test_filepath, Bytes::from_static(b"1111").into()) - .await; - - let _ = prefix_store.get(&test_filepath).await; - let _ = prefix_store.get_range(&test_filepath, 0..1).await; - let _ = prefix_store.get_ranges(&test_filepath, &[0..2; 1]).await; - - let meta = prefix_store.head(&test_filepath).await.unwrap(); - assert!(!meta.location.as_ref().starts_with(test_prefix)); - - let _ = prefix_store.delete(&test_filepath).await; - - for meta in prefix_store - .list(Some(&test_filepath)) - .collect::>() - .await - { - let meta = meta.unwrap(); - assert!(!meta.location.as_ref().starts_with(test_prefix)); - } - - let _ = prefix_store.copy(&test_filepath, &test_filepath).await; - let _ = prefix_store - .copy_if_not_exists(&test_filepath, &test_filepath) - .await; - } - - #[test] - fn test_prefix() { - let cases = vec![ - ("", "100/101.sst", "100/101.sst"), - ("0", "100/101.sst", "0/100/101.sst"), - ("0/1", "100/101.sst", "0/1/100/101.sst"), - ("/0/1", "100/101.sst", "0/1/100/101.sst"), - ("/0/1/", "100/101.sst", "0/1/100/101.sst"), - ]; - - let local_path = tempdir().unwrap().as_ref().to_string_lossy().to_string(); - let local_store = Arc::new(local_file::try_new_with_default(local_path).unwrap()); - for (prefix, filename, expect_loc) in cases.clone() { - let prefix_store = - StoreWithPrefix::new(prefix.to_string(), local_store.clone()).unwrap(); - let real_loc = prefix_store.add_prefix_to_loc(&Path::from(filename)); - assert_eq!(expect_loc, real_loc.as_ref(), "prefix:{prefix}"); - } - - for (prefix, expect_filename, loc) in cases { - let prefix_store = - StoreWithPrefix::new(prefix.to_string(), local_store.clone()).unwrap(); - let real_filename = prefix_store - .remove_prefix_from_loc(&Path::from(loc)) - .unwrap(); - assert_eq!(expect_filename, real_filename.as_ref(), "prefix:{prefix}"); - } - } -} diff --git a/src/components/object_store/src/s3.rs b/src/components/object_store/src/s3.rs deleted file mode 100644 index 2b81521f81..0000000000 --- a/src/components/object_store/src/s3.rs +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use object_store_opendal::OpendalStore; -use opendal::{ - layers::{RetryLayer, TimeoutLayer}, - raw::HttpClient, - services::S3, - Operator, Result, -}; - -use crate::config::S3Options; - -pub fn try_new(s3_option: &S3Options) -> Result { - let http_builder = reqwest::ClientBuilder::new() - .pool_max_idle_per_host(s3_option.http.pool_max_idle_per_host) - .http2_keep_alive_timeout(s3_option.http.keep_alive_timeout.0) - .http2_keep_alive_while_idle(true) - .http2_keep_alive_interval(s3_option.http.keep_alive_interval.0) - .timeout(s3_option.http.timeout.0); - let http_client = HttpClient::build(http_builder)?; - - let builder = S3::default() - .region(&s3_option.region) - .access_key_id(&s3_option.key_id) - .secret_access_key(&s3_option.key_secret) - .endpoint(&s3_option.endpoint) - .bucket(&s3_option.bucket) - .http_client(http_client); - let op = Operator::new(builder)? - .layer( - TimeoutLayer::new() - .with_timeout(s3_option.timeout.timeout.0) - .with_io_timeout(s3_option.timeout.io_timeout.0), - ) - .layer(RetryLayer::new().with_max_times(s3_option.max_retries)) - .finish(); - - Ok(OpendalStore::new(op)) -} diff --git a/src/components/object_store/src/test_util.rs b/src/components/object_store/src/test_util.rs deleted file mode 100644 index ca643e4e93..0000000000 --- a/src/components/object_store/src/test_util.rs +++ /dev/null @@ -1,207 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, fmt::Display, ops::Range, sync::RwLock}; - -use async_trait::async_trait; -use bytes::Bytes; -use futures::stream::{self, BoxStream}; -use upstream::{ - path::Path, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, - ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, -}; - -#[derive(Debug)] -struct StoreError { - path: Path, - msg: String, -} - -impl Display for StoreError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("StoreError") - .field("path", &self.path) - .field("msg", &self.msg) - .finish() - } -} - -impl std::error::Error for StoreError {} - -/// A memory based object store implementation, mainly used for testing. -#[derive(Debug, Default)] -pub struct MemoryStore { - files: RwLock>, - get_range_counts: RwLock>, -} - -impl Display for MemoryStore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MemoryStore") - .field("counts", &self.get_counts()) - .finish() - } -} - -impl MemoryStore { - pub fn get_counts(&self) -> HashMap { - let counts = self.get_range_counts.read().unwrap(); - counts.clone().into_iter().collect() - } -} - -#[async_trait] -impl ObjectStore for MemoryStore { - async fn put(&self, location: &Path, payload: PutPayload) -> Result { - let mut files = self.files.write().unwrap(); - files.insert(location.clone(), Bytes::from(payload)); - Ok(PutResult { - e_tag: None, - version: None, - }) - } - - async fn get(&self, location: &Path) -> Result { - let files = self.files.read().unwrap(); - if let Some(bs) = files.get(location) { - let bs = bs.clone(); - let size = bs.len(); - let payload = GetResultPayload::Stream(Box::pin(stream::once(async move { Ok(bs) }))); - Ok(GetResult { - payload, - meta: ObjectMeta { - location: location.clone(), - last_modified: Default::default(), - size, - e_tag: None, - version: None, - }, - range: Default::default(), - attributes: Default::default(), - }) - } else { - let source = Box::new(StoreError { - msg: "not found".to_string(), - path: location.clone(), - }); - Err(upstream::Error::Generic { - store: "get", - source, - }) - } - } - - async fn get_range(&self, location: &Path, range: Range) -> Result { - { - let mut counts = self.get_range_counts.write().unwrap(); - counts - .entry(location.clone()) - .and_modify(|c| *c += 1) - .or_insert(1); - } - - let files = self.files.read().unwrap(); - if let Some(bs) = files.get(location) { - Ok(bs.slice(range)) - } else { - let source = Box::new(StoreError { - msg: "not found".to_string(), - path: location.clone(), - }); - Err(upstream::Error::Generic { - store: "get_range", - source, - }) - } - } - - async fn head(&self, location: &Path) -> Result { - let files = self.files.read().unwrap(); - - if let Some(bs) = files.get(location) { - Ok(ObjectMeta { - location: location.clone(), - size: bs.len(), - e_tag: None, - last_modified: Default::default(), - version: None, - }) - } else { - let source = Box::new(StoreError { - msg: "not found".to_string(), - path: location.clone(), - }); - Err(upstream::Error::Generic { - store: "head", - source, - }) - } - } - - async fn put_multipart(&self, _location: &Path) -> Result> { - unimplemented!() - } - - async fn delete(&self, _location: &Path) -> Result<()> { - unimplemented!() - } - - fn list(&self, _prefix: Option<&Path>) -> BoxStream<'_, Result> { - unimplemented!() - } - - async fn list_with_delimiter(&self, _prefix: Option<&Path>) -> Result { - unimplemented!() - } - - async fn copy(&self, _from: &Path, _to: &Path) -> Result<()> { - unimplemented!() - } - - async fn rename(&self, _from: &Path, _to: &Path) -> Result<()> { - unimplemented!() - } - - async fn copy_if_not_exists(&self, _from: &Path, _to: &Path) -> Result<()> { - unimplemented!() - } - - async fn rename_if_not_exists(&self, _from: &Path, _to: &Path) -> Result<()> { - unimplemented!() - } - - async fn put_opts( - &self, - _location: &Path, - _payload: PutPayload, - _opts: PutOptions, - ) -> Result { - unimplemented!() - } - - async fn put_multipart_opts( - &self, - _location: &Path, - _opts: PutMultipartOpts, - ) -> Result> { - unimplemented!() - } - - async fn get_opts(&self, _location: &Path, _options: GetOptions) -> Result { - unimplemented!() - } -} diff --git a/src/components/panic_ext/Cargo.toml b/src/components/panic_ext/Cargo.toml deleted file mode 100644 index 0792959ee3..0000000000 --- a/src/components/panic_ext/Cargo.toml +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "panic_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[dependencies.slog-global] -version = "0.1" -git = "https://github.com/tikv/slog-global.git" -rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" - -[package.edition] -workspace = true - -[dependencies] -backtrace = "0.3.9" -libc = "0.2" -logger = { workspace = true } - -[dev-dependencies] -gag = "1.0" -nix = "0.22" -slog = { workspace = true } diff --git a/src/components/panic_ext/src/lib.rs b/src/components/panic_ext/src/lib.rs deleted file mode 100644 index c9fa8bc1a7..0000000000 --- a/src/components/panic_ext/src/lib.rs +++ /dev/null @@ -1,175 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::thread; - -use logger::error; - -/// fork from https://github.com/tikv/tikv/blob/83d173a2c0058246631f0e71de74238ccff670fd/components/tikv_util/src/lib.rs#L429 -/// Exit the whole process when panic. -pub fn set_panic_hook(panic_abort: bool) { - use std::{panic, process}; - - // HACK! New a backtrace ahead for caching necessary elf sections of this - // tikv-server, in case it can not open more files during panicking - // which leads to no stack info (0x5648bdfe4ff2 - ). - // - // Crate backtrace caches debug info in a static variable `STATE`, - // and the `STATE` lives forever once it has been created. - // See more: https://github.com/alexcrichton/backtrace-rs/blob/\ - // 597ad44b131132f17ed76bf94ac489274dd16c7f/\ - // src/symbolize/libbacktrace.rs#L126-L159 - // Caching is slow, spawn it in another thread to speed up. - thread::Builder::new() - .name("backtrace-loader".to_owned()) - .spawn(backtrace::Backtrace::new) - .unwrap(); - - panic::set_hook(Box::new(move |info: &panic::PanicInfo<'_>| { - let msg = match info.payload().downcast_ref::<&'static str>() { - Some(s) => *s, - None => match info.payload().downcast_ref::() { - Some(s) => &s[..], - None => "Box", - }, - }; - - let thread = thread::current(); - let name = thread.name().unwrap_or(""); - let loc = info - .location() - .map(|l| format!("{}:{}", l.file(), l.line())); - let bt = backtrace::Backtrace::new(); - error!( - "thread '{}' panicked '{}' at {:?}\n{:?}", - name, - msg, - loc.unwrap_or_else(|| "".to_owned()), - bt - ); - - // There might be remaining logs in the async logger. - // To collect remaining logs and also collect future logs, replace the old one - // with a terminal logger. - // When the old global async logger is replaced, the old async guard will be - // taken and dropped. In the drop() the async guard, it waits for the - // finish of the remaining logs in the async logger. - if let Some(level) = logger::max_level().to_level() { - let drainer = logger::term_drainer(); - let _ = logger::init_log_from_drain( - drainer, - logger::convert_log_level_to_slog_level(level), - false, // Use sync logger to avoid an unnecessary log thread. - 0, - false, // It is initialized already. - ); - } - - if panic_abort { - process::abort(); - } else { - unsafe { - // Calling process::exit would trigger global static to destroy, like C++ - // static variables of RocksDB, which may cause other threads encounter - // pure virtual method call. So calling libc::_exit() instead to skip the - // cleanup process. - libc::_exit(1); - } - } - })) -} - -#[cfg(test)] -mod tests { - use std::{io::Read, time::Duration}; - - use nix::{ - sys::wait::{wait, WaitStatus}, - unistd::{fork, ForkResult}, - }; - use slog::{self, Drain, Level, OwnedKVList, Record}; - - use crate::set_panic_hook; - - /// Create a child process and wait to get its exit code. - fn run_and_wait_child_process(child: impl Fn()) -> Result { - match unsafe { fork() } { - Ok(ForkResult::Parent { .. }) => match wait().unwrap() { - WaitStatus::Exited(_, status) => Ok(status), - v => Err(format!("{v:?}")), - }, - Ok(ForkResult::Child) => { - child(); - std::process::exit(0); - } - Err(e) => Err(format!("Fork failed: {e}")), - } - } - - #[ignore = "This test will fail on github ubuntu runner"] - #[test] - fn test_panic_hook() { - use gag::BufferRedirect; - - struct DelayDrain(D); - - impl Drain for DelayDrain - where - D: Drain, - ::Err: std::fmt::Display, - { - type Err = ::Err; - type Ok = ::Ok; - - fn log( - &self, - record: &Record<'_>, - values: &OwnedKVList, - ) -> Result { - std::thread::sleep(Duration::from_millis(100)); - self.0.log(record, values) - } - } - - let mut stderr = BufferRedirect::stderr().unwrap(); - let status = run_and_wait_child_process(|| { - set_panic_hook(false); - let drainer = logger::term_drainer(); - let _ = logger::init_log_from_drain( - drainer, - Level::Debug, - true, // use async drainer - 0, - true, // init std log - ); - - let _ = std::thread::spawn(|| { - // let the global logger is held by the other thread, so the - // drop() of the async drain is not called in time. - let _guard = slog_global::borrow_global(); - std::thread::sleep(Duration::from_secs(1)); - }); - panic!("test"); - }) - .unwrap(); - - assert_eq!(status, 1); - let mut panic = String::new(); - stderr.read_to_string(&mut panic).unwrap(); - assert!(!panic.is_empty()); - } -} diff --git a/src/components/parquet-testing b/src/components/parquet-testing deleted file mode 160000 index a11fc8f148..0000000000 --- a/src/components/parquet-testing +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a11fc8f148f8a7a89d9281cc0da3eb9d56095fbf diff --git a/src/components/parquet_ext/Cargo.toml b/src/components/parquet_ext/Cargo.toml deleted file mode 100644 index 1d57cb4b02..0000000000 --- a/src/components/parquet_ext/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "parquet_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arrow = { workspace = true } -arrow_ext = { workspace = true } -async-trait = { workspace = true } -bytes = { workspace = true } -datafusion = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -logger = { workspace = true } -object_store = { workspace = true } -parquet = { workspace = true } -tokio = { workspace = true } diff --git a/src/components/parquet_ext/src/lib.rs b/src/components/parquet_ext/src/lib.rs deleted file mode 100644 index 3167834911..0000000000 --- a/src/components/parquet_ext/src/lib.rs +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod meta_data; -pub mod prune; -pub mod reader; -#[cfg(test)] -pub mod tests; - -use std::sync::Arc; - -pub use parquet::file::metadata::ParquetMetaData; -pub type ParquetMetaDataRef = Arc; diff --git a/src/components/parquet_ext/src/meta_data.rs b/src/components/parquet_ext/src/meta_data.rs deleted file mode 100644 index ad18a36cb7..0000000000 --- a/src/components/parquet_ext/src/meta_data.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ops::Range, sync::Arc}; - -use async_trait::async_trait; -use bytes::Bytes; -use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder; -use generic_error::GenericResult; -use parquet::{ - arrow::arrow_reader::ArrowReaderOptions, - errors::{ParquetError, Result}, - file::{footer, metadata::ParquetMetaData}, -}; - -use crate::reader::{NoopMetricsObserver, ObjectStoreReader}; - -#[async_trait] -pub trait ChunkReader: Sync + Send { - async fn get_bytes(&self, range: Range) -> GenericResult; -} - -/// Fetch and parse [`ParquetMetadata`] from the file reader. -/// -/// Referring to: https://github.com/apache/arrow-datafusion/blob/ac2e5d15e5452e83c835d793a95335e87bf35569/datafusion/core/src/datasource/file_format/parquet.rs#L390-L449 -pub async fn fetch_parquet_metadata( - file_size: usize, - file_reader: &dyn ChunkReader, -) -> Result<(ParquetMetaData, usize)> { - const FOOTER_LEN: usize = 8; - - if file_size < FOOTER_LEN { - let err_msg = format!("file size of {file_size} is less than footer"); - return Err(ParquetError::General(err_msg)); - } - - let footer_start = file_size - FOOTER_LEN; - - let footer_bytes = file_reader - .get_bytes(footer_start..file_size) - .await - .map_err(|e| { - let err_msg = format!("failed to get footer bytes, err:{e}"); - ParquetError::General(err_msg) - })?; - - assert_eq!(footer_bytes.len(), FOOTER_LEN); - let mut footer = [0; FOOTER_LEN]; - footer.copy_from_slice(&footer_bytes); - - let metadata_len = footer::decode_footer(&footer)?; - - if file_size < metadata_len + FOOTER_LEN { - let err_msg = format!( - "file size of {} is smaller than footer + metadata {}", - file_size, - metadata_len + FOOTER_LEN - ); - return Err(ParquetError::General(err_msg)); - } - - let metadata_start = file_size - metadata_len - FOOTER_LEN; - let metadata_bytes = file_reader - .get_bytes(metadata_start..footer_start) - .await - .map_err(|e| { - let err_msg = format!("failed to get metadata bytes, err:{e}"); - ParquetError::General(err_msg) - })?; - - footer::decode_metadata(&metadata_bytes).map(|v| (v, metadata_len)) -} - -/// Build page indexes for meta data -/// -/// TODO: Currently there is no method to build page indexes for meta data in -/// `parquet`, maybe we can write a issue in `arrow-rs` . -pub async fn meta_with_page_indexes( - object_store_reader: ObjectStoreReader, -) -> Result> { - let read_options = ArrowReaderOptions::new().with_page_index(true); - let builder = - ParquetRecordBatchStreamBuilder::new_with_options(object_store_reader, read_options) - .await - .map_err(|e| { - let err_msg = format!("failed to build page indexes in metadata, err:{e}"); - ParquetError::General(err_msg) - })?; - Ok(builder.metadata().clone()) -} diff --git a/src/components/parquet_ext/src/prune/equal.rs b/src/components/parquet_ext/src/prune/equal.rs deleted file mode 100644 index c66436d5b4..0000000000 --- a/src/components/parquet_ext/src/prune/equal.rs +++ /dev/null @@ -1,503 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::datatypes::SchemaRef; -use datafusion::{ - common::Column, - logical_expr::{expr::InList, Expr, Operator}, - scalar::ScalarValue, -}; - -const MAX_ELEMS_IN_LIST_FOR_FILTER: usize = 100; - -/// A position used to describe the location of a column in the row groups. -#[derive(Debug, Clone, Copy)] -pub struct ColumnPosition { - pub row_group_idx: usize, - pub column_idx: usize, -} - -/// Filter the row groups according to the `exprs`. -/// -/// The return value is the filtered row group indexes. And the `is_equal` -/// closure receive three parameters: -/// - The position of the column in the row groups; -/// - The value of the column used to determine equality; -/// - Whether this compare is negated; -/// And it should return the result of this comparison, and None denotes -/// unknown. -pub fn prune_row_groups( - schema: SchemaRef, - exprs: &[Expr], - num_row_groups: usize, - is_equal: E, -) -> Vec -where - E: Fn(ColumnPosition, &ScalarValue, bool) -> Option, -{ - let mut should_reads = vec![true; num_row_groups]; - for expr in exprs { - let pruner = EqPruner::new(expr); - for (row_group_idx, should_read) in should_reads.iter_mut().enumerate() { - if !*should_read { - continue; - } - - let f = |column: &Column, val: &ScalarValue, negated: bool| -> bool { - match schema.column_with_name(&column.name) { - Some((column_idx, _)) => { - let pos = ColumnPosition { - row_group_idx, - column_idx, - }; - // Just set the result is true to ensure not to miss any possible row group - // if the caller has no idea of the compare result. - is_equal(pos, val, negated).unwrap_or(true) - } - _ => true, - } - }; - - *should_read = pruner.prune(&f); - } - } - - should_reads - .iter() - .enumerate() - .filter_map(|(row_group_idx, should_read)| { - if *should_read { - Some(row_group_idx) - } else { - None - } - }) - .collect() -} - -/// A pruner based on (not)equal predicates, including in-list predicate. -#[derive(Debug, Clone)] -pub struct EqPruner { - /// Normalized expression for pruning. - normalized_expr: NormalizedExpr, -} - -impl EqPruner { - pub fn new(predicate_expr: &Expr) -> Self { - Self { - normalized_expr: normalize_predicate_expression(predicate_expr), - } - } - - /// Use the prune function provided by caller to finish pruning. - /// - /// The prune function receives three parameters: - /// - the column to compare; - /// - the value of the column used to determine equality; - /// - Whether this compare is negated; - pub fn prune(&self, f: &F) -> bool - where - F: Fn(&Column, &ScalarValue, bool) -> bool, - { - self.normalized_expr.compute(f) - } -} - -/// The normalized expression based on [`datafusion::logical_expr::Expr`]. -/// -/// It only includes these kinds of `And`, `Or`, `Eq`, `NotEq` and `True`. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -enum NormalizedExpr { - And { - left: Box, - right: Box, - }, - Or { - left: Box, - right: Box, - }, - Eq { - column: Column, - value: ScalarValue, - }, - NotEq { - column: Column, - value: ScalarValue, - }, - True, - False, -} - -impl NormalizedExpr { - fn boxed(self) -> Box { - Box::new(self) - } - - fn compute(&self, f: &F) -> bool - where - F: Fn(&Column, &ScalarValue, bool) -> bool, - { - match self { - NormalizedExpr::And { left, right } => left.compute(f) && right.compute(f), - NormalizedExpr::Or { left, right } => left.compute(f) || right.compute(f), - NormalizedExpr::Eq { column, value } => f(column, value, false), - NormalizedExpr::NotEq { column, value } => f(column, value, true), - NormalizedExpr::True => true, - NormalizedExpr::False => false, - } - } -} - -fn normalize_predicate_expression(expr: &Expr) -> NormalizedExpr { - // Returned for unsupported expressions, which are converted to TRUE. - let unhandled = NormalizedExpr::True; - - match expr { - Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr { left, op, right }) => match op { - Operator::And => { - let left = normalize_predicate_expression(left); - let right = normalize_predicate_expression(right); - NormalizedExpr::And { - left: left.boxed(), - right: right.boxed(), - } - } - Operator::Or => { - let left = normalize_predicate_expression(left); - let right = normalize_predicate_expression(right); - NormalizedExpr::Or { - left: left.boxed(), - right: right.boxed(), - } - } - Operator::Eq => normalize_equal_expr(left, right, true), - Operator::NotEq => normalize_equal_expr(left, right, false), - _ => unhandled, - }, - Expr::InList(InList { - expr, - list, - negated, - }) if list.len() < MAX_ELEMS_IN_LIST_FOR_FILTER => { - if list.is_empty() { - if *negated { - // "not in empty list" is always true - NormalizedExpr::True - } else { - // "in empty list" is always false - NormalizedExpr::False - } - } else { - let eq_fun = if *negated { Expr::not_eq } else { Expr::eq }; - let re_fun = if *negated { Expr::and } else { Expr::or }; - let transformed_expr = list - .iter() - .map(|e| eq_fun(*expr.clone(), e.clone())) - .reduce(re_fun) - .unwrap(); - normalize_predicate_expression(&transformed_expr) - } - } - _ => unhandled, - } -} - -/// Normalize the equal expr as: `column = value` or `column != value`. -/// -/// Return [`NormalizedExpr::True`] if it can't be normalized. -fn normalize_equal_expr(left: &Expr, right: &Expr, is_equal: bool) -> NormalizedExpr { - let (column, value) = match (left, right) { - (Expr::Column(col), Expr::Literal(val)) => (col, val), - (Expr::Literal(val), Expr::Column(col)) => (col, val), - _ => return NormalizedExpr::True, - }; - let (column, value) = (column.clone(), value.clone()); - if is_equal { - NormalizedExpr::Eq { column, value } - } else { - NormalizedExpr::NotEq { column, value } - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use arrow::datatypes::{DataType, Field, Schema}; - - use super::*; - - fn make_column_expr(name: &str) -> Expr { - Expr::Column(make_column(name)) - } - - fn make_literal_expr(val: i32) -> Expr { - Expr::Literal(make_scalar_value(val)) - } - - fn make_column(name: &str) -> Column { - Column { - relation: None, - name: name.to_string(), - } - } - - fn make_scalar_value(val: i32) -> ScalarValue { - ScalarValue::from(val) - } - - fn make_normalized_eq_expr(column: &str, val: i32) -> Box { - NormalizedExpr::Eq { - column: make_column(column), - value: make_scalar_value(val), - } - .boxed() - } - - fn make_normalized_not_eq_expr(column: &str, val: i32) -> Box { - NormalizedExpr::NotEq { - column: make_column(column), - value: make_scalar_value(val), - } - .boxed() - } - - fn check_normalize(expr: &Expr, expect_expr: &NormalizedExpr) { - let normalized_expr = normalize_predicate_expression(expr); - assert_eq!(&normalized_expr, expect_expr); - } - - #[test] - fn test_normalize_and() { - let expr = Expr::and( - Expr::eq(make_column_expr("c0"), make_literal_expr(0)), - Expr::not_eq(make_column_expr("c1"), make_literal_expr(0)), - ); - let expect_expr = NormalizedExpr::And { - left: make_normalized_eq_expr("c0", 0), - right: make_normalized_not_eq_expr("c1", 0), - }; - - check_normalize(&expr, &expect_expr); - } - - #[test] - fn test_normalize_or() { - let expr = Expr::or( - Expr::eq(make_column_expr("c0"), make_literal_expr(0)), - Expr::not_eq(make_column_expr("c1"), make_literal_expr(0)), - ); - let expect_expr = NormalizedExpr::Or { - left: make_normalized_eq_expr("c0", 0), - right: make_normalized_not_eq_expr("c1", 0), - }; - - check_normalize(&expr, &expect_expr); - } - - #[test] - fn test_normalize_inlist() { - let equal_list_expr = Expr::in_list( - make_column_expr("c0"), - vec![make_literal_expr(0), make_literal_expr(1)], - false, - ); - - let expect_equal_expr = NormalizedExpr::Or { - left: make_normalized_eq_expr("c0", 0), - right: make_normalized_eq_expr("c0", 1), - }; - check_normalize(&equal_list_expr, &expect_equal_expr); - - let not_equal_list_expr = Expr::in_list( - make_column_expr("c0"), - vec![make_literal_expr(0), make_literal_expr(1)], - true, - ); - - let expect_not_equal_expr = NormalizedExpr::And { - left: make_normalized_not_eq_expr("c0", 0), - right: make_normalized_not_eq_expr("c0", 1), - }; - check_normalize(¬_equal_list_expr, &expect_not_equal_expr); - } - - #[test] - fn test_normalize_in_empty_list() { - let empty_list_expr = Expr::in_list(make_column_expr("c0"), vec![], false); - check_normalize(&empty_list_expr, &NormalizedExpr::False); - - let negated_empty_list_expr = Expr::in_list(make_column_expr("c0"), vec![], true); - check_normalize(&negated_empty_list_expr, &NormalizedExpr::True); - } - - #[test] - fn test_normalize_complex() { - // (c0 in [0, 1]) or ((c1 != 0 or c2 = 1 ) and not c3)) - let expr = Expr::or( - Expr::in_list( - make_column_expr("c0"), - vec![make_literal_expr(0), make_literal_expr(1)], - false, - ), - Expr::and( - Expr::or( - Expr::not_eq(make_literal_expr(0), make_column_expr("c1")), - Expr::eq(make_literal_expr(1), make_column_expr("c2")), - ), - !make_column_expr("c3"), - ), - ); - - // (c0 = 0 or c0 = 1) or ((c1 != 0 or c2 = 1) and true) - let expect_expr = NormalizedExpr::Or { - left: NormalizedExpr::Or { - left: make_normalized_eq_expr("c0", 0), - right: make_normalized_eq_expr("c0", 1), - } - .boxed(), - right: NormalizedExpr::And { - left: NormalizedExpr::Or { - left: make_normalized_not_eq_expr("c1", 0), - right: make_normalized_eq_expr("c2", 1), - } - .boxed(), - right: NormalizedExpr::True.boxed(), - } - .boxed(), - }; - - check_normalize(&expr, &expect_expr) - } - - #[test] - fn test_normalize_unhandled() { - let lt_expr = Expr::gt(make_column_expr("c0"), make_literal_expr(0)); - let empty_list_expr = Expr::in_list(make_column_expr("c0"), vec![], true); - let not_expr = !make_column_expr("c0"); - - let unhandled_exprs = vec![lt_expr, empty_list_expr, not_expr]; - let expect_expr = NormalizedExpr::True; - for expr in &unhandled_exprs { - check_normalize(expr, &expect_expr); - } - } - - #[test] - fn test_prune() { - let f = |column: &Column, val: &ScalarValue, negated: bool| -> bool { - let val = match val { - ScalarValue::Int32(v) => v.unwrap(), - _ => panic!("Unexpected value type"), - }; - - let res = match column.name.as_str() { - "c0" => val == 0, - "c1" => val == 1, - "c2" => val == 2, - _ => panic!("Unexpected column"), - }; - if negated { - !res - } else { - res - } - }; - - // (c0 in [0, 1]) or ((c1 != 0 or c2 = 1 ) and not c3)) - let true_expr = Expr::or( - Expr::in_list( - make_column_expr("c0"), - vec![make_literal_expr(0), make_literal_expr(1)], - false, - ), - Expr::and( - Expr::or( - Expr::not_eq(make_literal_expr(0), make_column_expr("c1")), - Expr::eq(make_literal_expr(1), make_column_expr("c2")), - ), - !make_column_expr("c3"), - ), - ); - assert!(EqPruner::new(&true_expr).prune(&f)); - - // (c0 in [2, 3]) or (c1 != 0 and c2 = 1) - let false_expr = Expr::or( - Expr::in_list( - make_column_expr("c0"), - vec![make_literal_expr(2), make_literal_expr(3)], - false, - ), - Expr::and( - Expr::not_eq(make_literal_expr(0), make_column_expr("c1")), - Expr::eq(make_literal_expr(1), make_column_expr("c2")), - ), - ); - assert!(!EqPruner::new(&false_expr).prune(&f)); - } - - #[test] - fn test_filter_row_groups() { - // Provide three row groups (one row in one row group). - // | c0 | c1 | c2 | - // | 0 | 1 | 2 | - // | 1 | 2 | 3 | - // | 2 | 3 | 4 | - let row_groups = [vec![0, 1, 2], vec![1, 2, 3], vec![2, 3, 4]]; - let is_equal = |pos: ColumnPosition, val: &ScalarValue, negated: bool| -> Option { - let expect_val = row_groups[pos.row_group_idx][pos.column_idx]; - let val = if let ScalarValue::Int32(v) = val { - v.expect("Unexpected value") - } else { - panic!("Unexpected value type") - }; - - if negated { - Some(expect_val != val) - } else { - Some(expect_val == val) - } - }; - - // (c0 in [1, 3]) or c1 not in [1, 2] - let predicate1 = Expr::or( - Expr::in_list( - make_column_expr("c0"), - vec![make_literal_expr(1), make_literal_expr(3)], - false, - ), - Expr::in_list( - make_column_expr("c1"), - vec![make_literal_expr(1), make_literal_expr(2)], - true, - ), - ); - - // c2 != 2 - let predicate2 = Expr::not_eq(make_literal_expr(2), make_column_expr("c2")); - - let schema = Schema::new(vec![ - Field::new("c0", DataType::Int32, false), - Field::new("c1", DataType::Int32, false), - Field::new("c2", DataType::Int32, false), - ]); - let target_row_groups = - prune_row_groups(Arc::new(schema), &vec![predicate1, predicate2], 3, is_equal); - - assert_eq!(vec![1, 2], target_row_groups) - } -} diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs deleted file mode 100644 index 25e774b2d8..0000000000 --- a/src/components/parquet_ext/src/prune/min_max.rs +++ /dev/null @@ -1,337 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use arrow::{array::ArrayRef, datatypes::Schema as ArrowSchema}; -use datafusion::{ - common::ToDFSchema, - error::Result as DataFusionResult, - physical_expr::{create_physical_expr, execution_props::ExecutionProps}, - physical_optimizer::pruning::{PruningPredicate, PruningStatistics}, - physical_plan::PhysicalExpr, - prelude::{Column, Expr}, - scalar::ScalarValue, -}; -use logger::{error, trace}; -use parquet::file::{metadata::RowGroupMetaData, statistics::Statistics as ParquetStatistics}; - -/// Filters row groups according to the predicate function, and returns the -/// indexes of the filtered row groups. -pub fn prune_row_groups( - schema: Arc, - exprs: &[Expr], - row_groups: &[RowGroupMetaData], -) -> Vec { - let mut target_row_groups = Vec::with_capacity(row_groups.len()); - let should_reads = filter_row_groups_inner(schema, exprs, row_groups); - for (i, should_read) in should_reads.iter().enumerate() { - if *should_read { - target_row_groups.push(i); - } - } - - target_row_groups -} - -/// Determine whether a row group should be read according to the meta data -/// in the `row_groups`. -/// -/// The boolean value in the returned vector denotes the corresponding row -/// group in the `row_groups` whether should be read. -fn filter_row_groups_inner( - schema: Arc, - exprs: &[Expr], - row_groups: &[RowGroupMetaData], -) -> Vec { - let mut results = vec![true; row_groups.len()]; - let execution_props = ExecutionProps::new(); - for expr in exprs { - match logical2physical(expr, &schema, &execution_props) - .and_then(|physical_expr| PruningPredicate::try_new(physical_expr, schema.clone())) - { - Ok(pruning_predicate) => { - trace!("pruning_predicate is:{:?}", pruning_predicate); - - if let Ok(values) = build_row_group_predicate(&pruning_predicate, row_groups) { - for (curr_val, result_val) in values.into_iter().zip(results.iter_mut()) { - *result_val = curr_val && *result_val - } - }; - // if fail to build, just ignore this filter so that all the - // row groups should be read for this - // filter. - } - Err(e) => { - // for any error just ignore it and that is to say, for this filter all the row - // groups should be read. - error!("fail to build pruning predicate, err:{}", e); - } - } - } - - results -} - -fn logical2physical( - expr: &Expr, - schema: &ArrowSchema, - execution_props: &ExecutionProps, -) -> DataFusionResult> { - schema - .clone() - .to_dfschema() - .and_then(|df_schema| create_physical_expr(expr, &df_schema, schema, execution_props)) -} - -fn build_row_group_predicate( - predicate_builder: &PruningPredicate, - row_group_metadata: &[RowGroupMetaData], -) -> datafusion::common::Result> { - let parquet_schema = predicate_builder.schema().as_ref(); - - let pruning_stats = RowGroupPruningStatistics { - row_group_metadata, - parquet_schema, - }; - - predicate_builder.prune(&pruning_stats) -} - -/// port from datafusion. -/// Extract the min/max statistics from a `ParquetStatistics` object -macro_rules! get_statistic { - ($column_statistics:expr, $func:ident, $bytes_func:ident) => {{ - if !$column_statistics.has_min_max_set() { - return None; - } - match $column_statistics { - ParquetStatistics::Boolean(s) => Some(ScalarValue::Boolean(Some(*s.$func()))), - ParquetStatistics::Int32(s) => Some(ScalarValue::Int32(Some(*s.$func()))), - ParquetStatistics::Int64(s) => Some(ScalarValue::Int64(Some(*s.$func()))), - // 96 bit ints not supported - ParquetStatistics::Int96(_) => None, - ParquetStatistics::Float(s) => Some(ScalarValue::Float32(Some(*s.$func()))), - ParquetStatistics::Double(s) => Some(ScalarValue::Float64(Some(*s.$func()))), - ParquetStatistics::ByteArray(s) => { - let s = std::str::from_utf8(s.$bytes_func()) - .map(|s| s.to_string()) - .ok(); - Some(ScalarValue::Utf8(s)) - } - // type not supported yet - ParquetStatistics::FixedLenByteArray(_) => None, - } - }}; -} - -/// port from datafusion. -// Extract the min or max value calling `func` or `bytes_func` on the -// ParquetStatistics as appropriate -macro_rules! get_min_max_values { - ($self:expr, $column:expr, $func:ident, $bytes_func:ident) => {{ - let (column_index, field) = - if let Some((v, f)) = $self.parquet_schema.column_with_name(&$column.name) { - (v, f) - } else { - // Named column was not present - return None; - }; - - let data_type = field.data_type(); - let null_scalar: ScalarValue = if let Ok(v) = data_type.try_into() { - v - } else { - // DataFusion doesn't have support for ScalarValues of the column type - return None; - }; - - let scalar_values: Vec = $self - .row_group_metadata - .iter() - .flat_map(|meta| meta.column(column_index).statistics()) - .map(|stats| get_statistic!(stats, $func, $bytes_func)) - .map(|maybe_scalar| { - // column either did't have statistics at all or didn't have min/max values - maybe_scalar.unwrap_or_else(|| null_scalar.clone()) - }) - .collect(); - - // ignore errors converting to arrays (e.g. different types) - ScalarValue::iter_to_array(scalar_values).ok() - }}; -} - -/// Wraps parquet statistics in a way -/// that implements [`PruningStatistics`] -struct RowGroupPruningStatistics<'a> { - row_group_metadata: &'a [RowGroupMetaData], - parquet_schema: &'a ArrowSchema, -} - -impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { - fn min_values(&self, column: &Column) -> Option { - get_min_max_values!(self, column, min, min_bytes) - } - - fn max_values(&self, column: &Column) -> Option { - get_min_max_values!(self, column, max, max_bytes) - } - - fn num_containers(&self) -> usize { - self.row_group_metadata.len() - } - - // TODO: support this. - fn null_counts(&self, _column: &Column) -> Option { - None - } -} - -#[cfg(test)] -mod test { - - use arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField}; - use datafusion::logical_expr::{expr_fn::col, lit}; - use parquet::{ - basic::Type, - file::{metadata::ColumnChunkMetaData, statistics::Statistics}, - schema::types::{SchemaDescPtr, SchemaDescriptor, Type as SchemaType}, - }; - - use super::*; - - fn convert_data_type(data_type: &ArrowDataType) -> Type { - match data_type { - ArrowDataType::Boolean => Type::BOOLEAN, - ArrowDataType::Int32 => Type::INT32, - ArrowDataType::Int64 => Type::INT64, - ArrowDataType::Utf8 => Type::BYTE_ARRAY, - _ => unimplemented!(), - } - } - - fn prepare_arrow_schema(fields: Vec<(&str, ArrowDataType)>) -> Arc { - let fields = fields - .into_iter() - .map(|(name, data_type)| ArrowField::new(name, data_type, false)) - .collect::>(); - Arc::new(ArrowSchema::new(fields)) - } - - fn prepare_parquet_schema_descr(schema: &ArrowSchema) -> SchemaDescPtr { - let fields = schema - .fields() - .iter() - .map(|field| { - Arc::new( - SchemaType::primitive_type_builder( - field.name(), - convert_data_type(field.data_type()), - ) - .build() - .unwrap(), - ) - }) - .collect(); - let schema = SchemaType::group_type_builder("schema") - .with_fields(fields) - .build() - .unwrap(); - - Arc::new(SchemaDescriptor::new(Arc::new(schema))) - } - - fn prepare_metadata(schema: &ArrowSchema, statistics: Statistics) -> RowGroupMetaData { - let schema_descr = prepare_parquet_schema_descr(schema); - let column_metadata = schema_descr - .columns() - .iter() - .cloned() - .map(|col_descr| { - ColumnChunkMetaData::builder(col_descr) - .set_statistics(statistics.clone()) - .build() - .unwrap() - }) - .collect(); - RowGroupMetaData::builder(schema_descr) - .set_column_metadata(column_metadata) - .build() - .unwrap() - } - - fn int32_stat(min: i32, max: i32) -> Statistics { - Statistics::int32(Some(min), Some(max), None, 0, false) - } - - fn string_stat(min: &str, max: &str) -> Statistics { - Statistics::byte_array(Some(min.into()), Some(max.into()), None, 0, false) - } - - #[test] - fn test_row_group_filter() { - let testcases = vec![ - // (expr, min, max, schema, expected) - ( - col("a").eq(lit(5i64)), // a == 5 - int32_stat(10, 20), - vec![("a", ArrowDataType::Int64)], - vec![], - ), - ( - col("a").eq(lit(14i64)), // a == 14 - int32_stat(10, 20), - vec![("a", ArrowDataType::Int64)], - vec![0], - ), - ( - col("a").lt(col("b")), // a < b - int32_stat(10, 20), - vec![("a", ArrowDataType::Int32), ("b", ArrowDataType::Int32)], - // nothing actually gets calculated. - vec![0], - ), - ( - col("a").in_list(vec![lit(17i64), lit(100i64)], false), // a in (17, 100) - int32_stat(101, 200), - vec![("a", ArrowDataType::Int64)], - vec![], - ), - ( - col("hostname").eq(lit("host-1794")), // hostname == host-1794 - string_stat("host-18000", "host-20000"), - vec![("hostname", ArrowDataType::Utf8)], - vec![], - ), - ( - col("hostname").eq(lit("host-1794")), // hostname == host-1794 - string_stat("host-1000", "host-20000"), - vec![("hostname", ArrowDataType::Utf8)], - vec![0], - ), - ]; - - for (expr, stat, schema, expected) in testcases { - let schema = prepare_arrow_schema(schema); - let metadata = prepare_metadata(&schema, stat); - - let actual = prune_row_groups(schema, &[expr], &[metadata]); - assert_eq!(actual, expected); - } - } -} diff --git a/src/components/parquet_ext/src/prune/mod.rs b/src/components/parquet_ext/src/prune/mod.rs deleted file mode 100644 index 44536d35f4..0000000000 --- a/src/components/parquet_ext/src/prune/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod equal; -pub mod min_max; diff --git a/src/components/parquet_ext/src/reader.rs b/src/components/parquet_ext/src/reader.rs deleted file mode 100644 index 61f11b21f0..0000000000 --- a/src/components/parquet_ext/src/reader.rs +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - ops::Range, - sync::Arc, - time::{Duration, Instant}, -}; - -use bytes::Bytes; -use futures::{ - future::{BoxFuture, FutureExt}, - TryFutureExt, -}; -use object_store::{ObjectStoreRef, Path}; -use parquet::{arrow::async_reader::AsyncFileReader, file::metadata::ParquetMetaData}; - -/// The observer for metrics of [ObjectStoreReader]. -pub trait MetricsObserver: Send { - fn elapsed(&self, path: &Path, elapsed: Duration); - fn num_bytes_fetched(&self, path: &Path, num_bytes: usize); -} - -#[derive(Debug, Clone)] -pub struct NoopMetricsObserver; - -impl MetricsObserver for NoopMetricsObserver { - fn elapsed(&self, _: &Path, _: Duration) {} - - fn num_bytes_fetched(&self, _: &Path, _: usize) {} -} - -/// The implementation based on `ObjectStore` for [`AsyncFileReader`]. -#[derive(Clone)] -pub struct ObjectStoreReader { - storage: ObjectStoreRef, - path: Path, - meta_data: Arc, - begin: Instant, - metrics: T, -} - -impl ObjectStoreReader { - pub fn new(storage: ObjectStoreRef, path: Path, meta_data: Arc) -> Self { - Self::with_metrics(storage, path, meta_data, NoopMetricsObserver) - } -} - -impl ObjectStoreReader { - pub fn with_metrics( - storage: ObjectStoreRef, - path: Path, - meta_data: Arc, - metrics: T, - ) -> Self { - Self { - storage, - path, - meta_data, - begin: Instant::now(), - metrics, - } - } -} - -impl Drop for ObjectStoreReader { - fn drop(&mut self) { - self.metrics.elapsed(&self.path, self.begin.elapsed()) - } -} - -impl AsyncFileReader for ObjectStoreReader { - fn get_bytes(&mut self, range: Range) -> BoxFuture<'_, parquet::errors::Result> { - async move { - let get_res = self - .storage - .get_range(&self.path, range) - .map_err(|e| { - parquet::errors::ParquetError::General(format!( - "Failed to fetch range from object store, err:{e}" - )) - }) - .await; - - if let Ok(bytes) = &get_res { - self.metrics.num_bytes_fetched(&self.path, bytes.len()); - } - - get_res - } - .boxed() - } - - fn get_byte_ranges( - &mut self, - ranges: Vec>, - ) -> BoxFuture<'_, parquet::errors::Result>> { - async move { - let get_res = self - .storage - .get_ranges(&self.path, &ranges) - .map_err(|e| { - parquet::errors::ParquetError::General(format!( - "Failed to fetch ranges from object store, err:{e}" - )) - }) - .await; - - if let Ok(bytes) = &get_res { - let num_bytes: usize = bytes.iter().map(|v| v.len()).sum(); - self.metrics.num_bytes_fetched(&self.path, num_bytes); - } - - get_res - } - .boxed() - } - - fn get_metadata( - &mut self, - ) -> BoxFuture<'_, parquet::errors::Result>> { - Box::pin(async move { Ok(self.meta_data.clone()) }) - } -} diff --git a/src/components/parquet_ext/src/tests.rs b/src/components/parquet_ext/src/tests.rs deleted file mode 100644 index 20ca480f7b..0000000000 --- a/src/components/parquet_ext/src/tests.rs +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{env, error::Error, fs, path::PathBuf, str::FromStr}; - -use arrow::{array::*, datatypes::DataType, record_batch::RecordBatch}; -use parquet::record::{Field, Row}; - -fn get_data_dir( - udf_env: &str, - submodule_data: &str, -) -> std::result::Result> { - // Try user defined env. - if let Ok(dir) = env::var(udf_env) { - let trimmed = dir.trim().to_string(); - if !trimmed.is_empty() { - let pb = PathBuf::from(trimmed); - if pb.is_dir() { - return Ok(pb); - } else { - return Err(format!( - "the data dir `{}` defined by env {} not found", - pb.display(), - udf_env - ) - .into()); - } - } - } - - // The env is undefined or its value is trimmed to empty, let's try default dir. - - // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your - // package", set by `cargo run` or `cargo test`, see: - // https://doc.rust-lang.org/cargo/reference/environment-variables.html - let dir = env!("CARGO_MANIFEST_DIR"); - - let pb = PathBuf::from(dir).join(submodule_data); - if pb.is_dir() { - Ok(pb) - } else { - Err(format!( - "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\ - HINT: try running `git submodule update --init`", - udf_env, - pb.display(), - ).into()) - } -} - -fn parquet_test_data() -> String { - match get_data_dir("PARQUET_TEST_DATA", "../parquet-testing/data") { - Ok(pb) => pb.display().to_string(), - Err(err) => panic!("failed to get parquet data dir: {err}"), - } -} - -/// Returns path to the test parquet file in 'data' directory -fn get_test_path(file_name: &str) -> PathBuf { - let mut pathbuf = PathBuf::from_str(&parquet_test_data()).unwrap(); - pathbuf.push(file_name); - pathbuf -} - -/// Returns file handle for a test parquet file from 'data' directory -pub fn get_test_file(file_name: &str) -> fs::File { - let path = get_test_path(file_name); - fs::File::open(path.as_path()).unwrap_or_else(|err| { - panic!( - "Test file {} could not be opened, did you do `git submodule update`?: {}", - path.display(), - err - ) - }) -} - -struct RowViewOfRecordBatch<'a> { - record_batch: &'a RecordBatch, - row_idx: usize, -} - -impl<'a> RowViewOfRecordBatch<'a> { - fn check_row(&self, expect_row: &Row) { - for (col_idx, (_, field)) in expect_row.get_column_iter().enumerate() { - let array_ref = self.record_batch.column(col_idx); - - match array_ref.data_type() { - DataType::Binary => { - let array = array_ref.as_any().downcast_ref::().unwrap(); - let v = array.value(self.row_idx); - - if let Field::Bytes(field_value) = field { - assert_eq!(v, field_value.data()); - } else { - panic!("different value type"); - } - } - _ => unimplemented!("not support {:?}", array_ref.data_type()), - } - } - } -} - -pub fn check_rows_and_record_batches(rows: &[Row], record_batches: &[RecordBatch]) { - let mut row_idx = 0; - for record_batch in record_batches { - for row_idx_in_batch in 0..record_batch.num_rows() { - let expect_row = &rows[row_idx]; - let row_view = RowViewOfRecordBatch { - record_batch, - row_idx: row_idx_in_batch, - }; - row_view.check_row(expect_row); - row_idx += 1; - } - } -} diff --git a/src/components/partitioned_lock/Cargo.toml b/src/components/partitioned_lock/Cargo.toml deleted file mode 100644 index ac14e56450..0000000000 --- a/src/components/partitioned_lock/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "partitioned_lock" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -tokio = { workspace = true } - -[dev-dependencies] -hash_ext = { workspace = true } diff --git a/src/components/partitioned_lock/src/lib.rs b/src/components/partitioned_lock/src/lib.rs deleted file mode 100644 index 22273b9709..0000000000 --- a/src/components/partitioned_lock/src/lib.rs +++ /dev/null @@ -1,426 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Partitioned locks - -use std::{ - hash::{BuildHasher, Hash}, - sync::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}, -}; - -/// Simple partitioned `RwLock` -pub struct PartitionedRwLock -where - B: BuildHasher, -{ - partitions: Vec>, - partition_mask: usize, - hash_builder: B, -} - -impl PartitionedRwLock -where - B: BuildHasher, -{ - /// New cache with capacity set to `2^bit_len` - pub fn try_new_with_bit_len( - init_fn: F, - partition_bit_len: usize, - hash_builder: B, - ) -> Result - where - F: Fn(usize) -> Result, - { - let partition_num = 1 << partition_bit_len; - PartitionedRwLock::try_new(init_fn, partition_num, hash_builder) - } - - /// New cache with capacity round to `suggest_cap`'s power of 2 - pub fn try_new_with_suggest_cap( - init_fn: F, - suggest_cap: usize, - hash_builder: B, - ) -> Result - where - F: Fn(usize) -> Result, - { - let partition_num = suggest_cap.next_power_of_two(); - PartitionedRwLock::try_new(init_fn, partition_num, hash_builder) - } - - pub fn read(&self, key: &K) -> RwLockReadGuard<'_, T> { - let rwlock = self.get_partition(key); - - rwlock.read().unwrap() - } - - pub fn write(&self, key: &K) -> RwLockWriteGuard<'_, T> { - let rwlock = self.get_partition(key); - - rwlock.write().unwrap() - } - - fn get_partition(&self, key: &K) -> &RwLock { - &self.partitions[(self.hash_builder.hash_one(key) as usize) & self.partition_mask] - } - - #[inline] - fn try_new(init_fn: F, partition_num: usize, hash_builder: B) -> Result - where - F: Fn(usize) -> Result, - { - let partitions = (0..partition_num) - .map(|_| init_fn(partition_num).map(RwLock::new)) - .collect::>, E>>()?; - - Ok(Self { - partitions, - partition_mask: partition_num - 1, - hash_builder, - }) - } - - #[cfg(test)] - fn get_partition_by_index(&self, index: usize) -> &RwLock { - &self.partitions[index] - } -} - -/// Simple partitioned `Mutex` -#[derive(Debug)] -pub struct PartitionedMutex -where - B: BuildHasher, -{ - partitions: Vec>, - partition_mask: usize, - hash_builder: B, -} - -impl PartitionedMutex -where - B: BuildHasher, -{ - /// New cache with capacity set to `2^bit_len` - pub fn try_new_with_bit_len( - init_fn: F, - partition_bit_len: usize, - hash_builder: B, - ) -> Result - where - F: Fn(usize) -> Result, - { - let partition_num = 1 << partition_bit_len; - PartitionedMutex::try_new(init_fn, partition_num, hash_builder) - } - - /// New cache with capacity round to `suggest_cap`'s power of 2 - pub fn try_new_with_suggest_cap( - init_fn: F, - suggest_cap: usize, - hash_builder: B, - ) -> Result - where - F: Fn(usize) -> Result, - { - let partition_num = suggest_cap.next_power_of_two(); - PartitionedMutex::try_new(init_fn, partition_num, hash_builder) - } - - pub fn lock(&self, key: &K) -> MutexGuard<'_, T> { - let mutex = self.get_partition(key); - - mutex.lock().unwrap() - } - - fn get_partition(&self, key: &K) -> &Mutex { - &self.partitions[(self.hash_builder.hash_one(key) as usize) & self.partition_mask] - } - - #[inline] - fn try_new(init_fn: F, partition_num: usize, hash_builder: B) -> Result - where - F: Fn(usize) -> Result, - { - let partitions = (0..partition_num) - .map(|_| init_fn(partition_num).map(Mutex::new)) - .collect::>, E>>()?; - - Ok(Self { - partitions, - partition_mask: partition_num - 1, - hash_builder, - }) - } - - #[cfg(test)] - fn get_partition_by_index(&self, index: usize) -> &Mutex { - &self.partitions[index] - } - - /// This function should be marked with `#[cfg(test)]`, but there is [an issue](https://github.com/rust-lang/cargo/issues/8379) in cargo, so public this function now. - pub fn get_all_partition(&self) -> &Vec> { - &self.partitions - } -} - -#[derive(Debug)] -pub struct PartitionedMutexAsync -where - B: BuildHasher, -{ - partitions: Vec>, - partition_mask: usize, - hash_builder: B, -} - -impl PartitionedMutexAsync -where - B: BuildHasher, -{ - /// New cache with capacity set to `2^bit_len` - pub fn try_new_with_bit_len( - init_fn: F, - partition_bit_len: usize, - hash_builder: B, - ) -> Result - where - F: Fn(usize) -> Result, - { - let partition_num = 1 << partition_bit_len; - PartitionedMutexAsync::try_new(init_fn, partition_num, hash_builder) - } - - /// New cache with capacity round to `suggest_cap`'s power of 2 - pub fn try_new_with_suggest_cap( - init_fn: F, - suggest_cap: usize, - hash_builder: B, - ) -> Result - where - F: Fn(usize) -> Result, - { - let partition_num = suggest_cap.next_power_of_two(); - PartitionedMutexAsync::try_new(init_fn, partition_num, hash_builder) - } - - pub async fn lock(&self, key: &K) -> tokio::sync::MutexGuard<'_, T> { - let mutex = self.get_partition(key); - - mutex.lock().await - } - - #[inline] - fn try_new(init_fn: F, partition_num: usize, hash_builder: B) -> Result - where - F: Fn(usize) -> Result, - { - let partitions = (0..partition_num) - .map(|_| init_fn(partition_num).map(tokio::sync::Mutex::new)) - .collect::>, E>>()?; - - Ok(Self { - partitions, - partition_mask: partition_num - 1, - hash_builder, - }) - } - - fn get_partition(&self, key: &K) -> &tokio::sync::Mutex { - &self.partitions[(self.hash_builder.hash_one(key) as usize) & self.partition_mask] - } - - #[cfg(test)] - async fn get_partition_by_index(&self, index: usize) -> &tokio::sync::Mutex { - &self.partitions[index] - } -} - -#[cfg(test)] -mod tests { - use std::collections::HashMap; - - // TODO: remove this importing. - use hash_ext::{build_fixed_seed_ahasher_builder, SeaHasherBuilder}; - - use super::*; - - #[test] - fn test_new_equivalence() { - let init_42 = |_: usize| Ok::<_, ()>(42); - - let test_rwlock_42_bit_len = - PartitionedRwLock::try_new_with_bit_len(init_42, 4, build_fixed_seed_ahasher_builder()) - .unwrap(); - let test_rwlock_42_suggest_cap = PartitionedRwLock::try_new_with_suggest_cap( - init_42, - 13, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - - let test_mutex_42_bit_len = - PartitionedMutex::try_new_with_bit_len(init_42, 4, build_fixed_seed_ahasher_builder()) - .unwrap(); - let test_mutex_42_suggest_cap = PartitionedMutex::try_new_with_suggest_cap( - init_42, - 16, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - - let test_mutex_async_42_bit_len = PartitionedMutexAsync::try_new_with_bit_len( - init_42, - 4, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - let test_mutex_async_42_suggest_cap = PartitionedMutexAsync::try_new_with_suggest_cap( - init_42, - 13, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - - assert_eq!( - test_rwlock_42_bit_len.partition_mask, - test_rwlock_42_suggest_cap.partition_mask - ); - assert_eq!( - test_mutex_42_bit_len.partition_mask, - test_mutex_42_suggest_cap.partition_mask - ); - assert_eq!( - test_mutex_async_42_bit_len.partition_mask, - test_mutex_async_42_suggest_cap.partition_mask - ); - } - - #[test] - fn test_partitioned_rwlock() { - let init_hmap = |_: usize| Ok::<_, ()>(HashMap::new()); - let test_locked_map = PartitionedRwLock::try_new_with_bit_len( - init_hmap, - 4, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - let test_key = "test_key".to_string(); - let test_value = "test_value".to_string(); - - { - let mut map = test_locked_map.write(&test_key); - map.insert(test_key.clone(), test_value.clone()); - } - - { - let map = test_locked_map.read(&test_key); - assert_eq!(map.get(&test_key).unwrap(), &test_value); - } - } - - #[test] - fn test_partitioned_mutex() { - let init_hmap = |_: usize| Ok::<_, ()>(HashMap::new()); - let test_locked_map = PartitionedMutex::try_new_with_bit_len( - init_hmap, - 4, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - let test_key = "test_key".to_string(); - let test_value = "test_value".to_string(); - - { - let mut map = test_locked_map.lock(&test_key); - map.insert(test_key.clone(), test_value.clone()); - } - - { - let map = test_locked_map.lock(&test_key); - assert_eq!(map.get(&test_key).unwrap(), &test_value); - } - } - - #[tokio::test] - async fn test_partitioned_mutex_async() { - let init_hmap = |_: usize| Ok::<_, ()>(HashMap::new()); - let test_locked_map = - PartitionedMutexAsync::try_new_with_bit_len(init_hmap, 4, SeaHasherBuilder).unwrap(); - let test_key = "test_key".to_string(); - let test_value = "test_value".to_string(); - - { - let mut map = test_locked_map.lock(&test_key).await; - map.insert(test_key.clone(), test_value.clone()); - } - - { - let map = test_locked_map.lock(&test_key).await; - assert_eq!(map.get(&test_key).unwrap(), &test_value); - } - } - - #[test] - fn test_partitioned_mutex_vis_different_partition() { - let init_vec = |_: usize| Ok::<_, ()>(Vec::::new()); - let test_locked_map = - PartitionedMutex::try_new_with_bit_len(init_vec, 4, build_fixed_seed_ahasher_builder()) - .unwrap(); - let mutex_first = test_locked_map.get_partition_by_index(0); - - let mut _tmp_data = mutex_first.lock().unwrap(); - assert!(mutex_first.try_lock().is_err()); - - let mutex_second = test_locked_map.get_partition_by_index(1); - assert!(mutex_second.try_lock().is_ok()); - assert!(mutex_first.try_lock().is_err()); - } - - #[test] - fn test_partitioned_rwmutex_vis_different_partition() { - let init_vec = |_: usize| Ok::<_, ()>(Vec::::new()); - let test_locked_map = PartitionedRwLock::try_new_with_bit_len( - init_vec, - 4, - build_fixed_seed_ahasher_builder(), - ) - .unwrap(); - let mutex_first = test_locked_map.get_partition_by_index(0); - let mut _tmp = mutex_first.write().unwrap(); - assert!(mutex_first.try_write().is_err()); - - let mutex_second_try_lock = test_locked_map.get_partition_by_index(1); - assert!(mutex_second_try_lock.try_write().is_ok()); - assert!(mutex_first.try_write().is_err()); - } - - #[tokio::test] - async fn test_partitioned_mutex_async_vis_different_partition() { - let init_vec = |_: usize| Ok::<_, ()>(Vec::::new()); - let test_locked_map = - PartitionedMutexAsync::try_new_with_bit_len(init_vec, 4, SeaHasherBuilder).unwrap(); - let mutex_first = test_locked_map.get_partition_by_index(0).await; - - let mut _tmp_data = mutex_first.lock().await; - assert!(mutex_first.try_lock().is_err()); - - let mutex_second = test_locked_map.get_partition_by_index(1).await; - assert!(mutex_second.try_lock().is_ok()); - assert!(mutex_first.try_lock().is_err()); - } -} diff --git a/src/components/profile/Cargo.toml b/src/components/profile/Cargo.toml deleted file mode 100644 index 1e6f9f44ee..0000000000 --- a/src/components/profile/Cargo.toml +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "profile" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[dependencies.jemalloc-sys] -version = "0.3.2" -features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"] - -[package.edition] -workspace = true - -[dependencies] -jemalloc-ctl = "0.3.2" -jemallocator = "0.3.2" -logger = { workspace = true } -pprof = { workspace = true, features = ["flamegraph"] } diff --git a/src/components/profile/src/lib.rs b/src/components/profile/src/lib.rs deleted file mode 100644 index a6e7edf79f..0000000000 --- a/src/components/profile/src/lib.rs +++ /dev/null @@ -1,186 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Profiler for running application. - -use std::{ - fmt::Formatter, - fs::{File, OpenOptions}, - io, - io::Read, - sync::{Mutex, MutexGuard}, - thread, time, - time::Duration, -}; - -use jemalloc_ctl::{Access, AsName}; -use logger::{error, info}; - -#[derive(Debug)] -pub enum Error { - Internal { msg: String }, - IO(io::Error), - Jemalloc(jemalloc_ctl::Error), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "Profile Error: {self:?}") - } -} - -impl std::error::Error for Error {} - -pub type Result = std::result::Result; - -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - -const PROF_ACTIVE: &[u8] = b"prof.active\0"; -const PROF_DUMP: &[u8] = b"prof.dump\0"; -const PROFILE_HEAP_OUTPUT_FILE_OS_PATH: &[u8] = b"/tmp/profile_heap.out\0"; -const PROFILE_HEAP_OUTPUT_FILE_PATH: &str = "/tmp/profile_heap.out"; -const PROFILE_CPU_OUTPUT_FILE_PATH: &str = "/tmp/flamegraph_cpu.svg"; - -fn set_prof_active(active: bool) -> Result<()> { - let name = PROF_ACTIVE.name(); - name.write(active).map_err(Error::Jemalloc) -} - -fn dump_profile() -> Result<()> { - let name = PROF_DUMP.name(); - name.write(PROFILE_HEAP_OUTPUT_FILE_OS_PATH) - .map_err(Error::Jemalloc) -} - -#[allow(dead_code)] -struct ProfLockGuard<'a>(MutexGuard<'a, ()>); - -/// ProfLockGuard hold the profile lock and take responsibilities for -/// (de)activating heap profiling. NOTE: Keeping heap profiling on may cause -/// some extra runtime cost so we choose to activating it dynamically. -impl<'a> ProfLockGuard<'a> { - pub fn new(guard: MutexGuard<'a, ()>) -> Result { - set_prof_active(true)?; - Ok(Self(guard)) - } -} - -impl<'a> Drop for ProfLockGuard<'a> { - fn drop(&mut self) { - if let Err(e) = set_prof_active(false) { - error!("Fail to deactivate profiling, err:{}", e); - } - } -} - -pub struct Profiler { - heap_prof_lock: Mutex<()>, -} - -impl Default for Profiler { - fn default() -> Self { - Self::new() - } -} - -impl Profiler { - pub fn new() -> Self { - Self { - heap_prof_lock: Mutex::new(()), - } - } - - // dump_heap_prof collects heap profiling data in `seconds`. - // TODO(xikai): limit the profiling duration - pub fn dump_heap_prof(&self, seconds: u64) -> Result> { - // concurrent profiling is disabled. - let lock_guard = self - .heap_prof_lock - .try_lock() - .map_err(|e| Error::Internal { - msg: format!("failed to acquire heap_prof_lock, err:{e}"), - })?; - info!( - "Profiler::dump_heap_prof start heap profiling {} seconds", - seconds - ); - - let _guard = ProfLockGuard::new(lock_guard)?; - - // wait for seconds for collect the profiling data - thread::sleep(time::Duration::from_secs(seconds)); - - // clearing the profile output file before dumping profile results. - let _ = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(PROFILE_HEAP_OUTPUT_FILE_PATH) - .map_err(|e| { - error!("Failed to open prof data file, err:{}", e); - Error::IO(e) - })?; - - // dump the profile results to profile output file. - dump_profile().map_err(|e| { - error!( - "Failed to dump prof to {}, err:{}", - PROFILE_HEAP_OUTPUT_FILE_PATH, e - ); - e - })?; - - // read the profile results into buffer - let mut f = File::open(PROFILE_HEAP_OUTPUT_FILE_PATH).map_err(|e| { - error!("Failed to open prof data file, err:{}", e); - Error::IO(e) - })?; - - let mut buffer = Vec::new(); - f.read_to_end(&mut buffer).map_err(|e| { - error!("Failed to read prof data file, err:{}", e); - Error::IO(e) - })?; - - Ok(buffer) - } - - pub fn dump_cpu_prof(&self, seconds: u64) -> Result<()> { - let guard = pprof::ProfilerGuardBuilder::default() - .frequency(100) - .blocklist(&["libc", "libgcc", "pthread", "vdso"]) - .build() - .map_err(|e| Error::Internal { - msg: format!("Profiler guard, err:{e}"), - })?; - - thread::sleep(Duration::from_secs(seconds)); - - let report = guard.report().build().map_err(|e| Error::Internal { - msg: format!("Report build, err:{e}"), - })?; - let file = File::create(PROFILE_CPU_OUTPUT_FILE_PATH).map_err(|e| { - error!("Failed to create cpu profile svg file, err:{}", e); - Error::IO(e) - })?; - report.flamegraph(file).map_err(|e| Error::Internal { - msg: format!("Flamegraph output, err:{e}"), - })?; - Ok(()) - } -} diff --git a/src/components/runtime/Cargo.toml b/src/components/runtime/Cargo.toml deleted file mode 100644 index 398cac4a68..0000000000 --- a/src/components/runtime/Cargo.toml +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "runtime" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -lazy_static = { workspace = true } -macros = { workspace = true } -pin-project-lite = { workspace = true } -prometheus = { workspace = true } -snafu = { workspace = true } -tokio = { workspace = true } - -[dev-dependencies] -tokio-test = "0.4.2" diff --git a/src/components/runtime/src/lib.rs b/src/components/runtime/src/lib.rs deleted file mode 100644 index 726f0fde14..0000000000 --- a/src/components/runtime/src/lib.rs +++ /dev/null @@ -1,327 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! A multi-threaded runtime that supports running Futures -use std::{ - future::Future, - pin::Pin, - sync::Arc, - task::{Context, Poll}, -}; - -use macros::define_result; -use metrics::Metrics; -use pin_project_lite::pin_project; -use snafu::{Backtrace, GenerateBacktrace, ResultExt, Snafu}; -use tokio::{ - runtime::{Builder as RuntimeBuilder, Runtime as TokioRuntime}, - task::{JoinError, JoinHandle as TokioJoinHandle}, -}; - -mod metrics; -mod priority_runtime; - -pub use priority_runtime::{Priority, PriorityRuntime}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display( - "Runtime Failed to build runtime, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - BuildRuntime { - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Runtime Failed to join task, err:{}.\nBacktrace:\n{}", - source, - backtrace - ))] - JoinTask { - source: JoinError, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -pub type RuntimeRef = Arc; - -/// A runtime to run future tasks -#[derive(Debug)] -pub struct Runtime { - rt: TokioRuntime, - metrics: Arc, -} - -impl Runtime { - /// Spawn a future and execute it in this thread pool - /// - /// Similar to tokio::runtime::Runtime::spawn() - pub fn spawn(&self, future: F) -> JoinHandle - where - F: Future + Send + 'static, - F::Output: Send + 'static, - { - JoinHandle { - inner: self.rt.spawn(future), - } - } - - /// Run the provided function on an executor dedicated to blocking - /// operations. - pub fn spawn_blocking(&self, func: F) -> JoinHandle - where - F: FnOnce() -> R + Send + 'static, - R: Send + 'static, - { - JoinHandle { - inner: self.rt.spawn_blocking(func), - } - } - - /// Run a future to complete, this is the runtime's entry point - pub fn block_on(&self, future: F) -> F::Output { - self.rt.block_on(future) - } - - /// Returns the runtime stats - pub fn stats(&self) -> RuntimeStats { - RuntimeStats { - alive_thread_num: self.metrics.thread_alive_gauge.get(), - idle_thread_num: self.metrics.thread_idle_gauge.get(), - } - } -} - -pin_project! { - #[derive(Debug)] - pub struct JoinHandle { - #[pin] - inner: TokioJoinHandle, - } -} - -impl JoinHandle { - pub fn abort(&self) { - self.inner.abort(); - } -} - -impl Future for JoinHandle { - type Output = Result; - - fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { - let this = self.project(); - this.inner.poll(ctx).map_err(|source| Error::JoinTask { - source, - backtrace: Backtrace::generate(), - }) - } -} - -/// Helper that aborts the given join handles on drop. -/// -/// Useful to kill background tasks when the consumer is dropped. -#[derive(Debug)] -pub struct AbortOnDropMany(pub Vec>); - -impl Drop for AbortOnDropMany { - fn drop(&mut self) { - for join_handle in &self.0 { - join_handle.inner.abort(); - } - } -} - -/// Runtime statistics -pub struct RuntimeStats { - pub alive_thread_num: i64, - pub idle_thread_num: i64, -} - -pub struct Builder { - thread_name: String, - builder: RuntimeBuilder, -} - -impl Default for Builder { - fn default() -> Self { - Self { - thread_name: "runtime-worker".to_string(), - builder: RuntimeBuilder::new_multi_thread(), - } - } -} - -fn with_metrics(metrics: &Arc, f: F) -> impl Fn() -where - F: Fn(&Arc) + 'static, -{ - let m = metrics.clone(); - move || { - f(&m); - } -} - -impl Builder { - /// Sets the number of worker threads the Runtime will use. - /// - /// This can be any number above 0 - pub fn worker_threads(&mut self, val: usize) -> &mut Self { - self.builder.worker_threads(val); - self - } - - /// Sets the size of the stack allocated to the worker threads the Runtime - /// will use. - /// - /// This can be any number above 0. - pub fn stack_size(&mut self, val: usize) -> &mut Self { - self.builder.thread_stack_size(val); - self - } - - /// Sets name of threads spawned by the Runtime thread pool - pub fn thread_name(&mut self, val: impl Into) -> &mut Self { - self.thread_name = val.into(); - self - } - - /// Enable all feature of the underlying runtime - pub fn enable_all(&mut self) -> &mut Self { - self.builder.enable_all(); - self - } - - pub fn build(&mut self) -> Result { - let metrics = Arc::new(Metrics::new(&self.thread_name)); - - let rt = self - .builder - .thread_name(self.thread_name.clone()) - .on_thread_start(with_metrics(&metrics, |m| { - m.on_thread_start(); - })) - .on_thread_stop(with_metrics(&metrics, |m| { - m.on_thread_stop(); - })) - .on_thread_park(with_metrics(&metrics, |m| { - m.on_thread_park(); - })) - .on_thread_unpark(with_metrics(&metrics, |m| { - m.on_thread_unpark(); - })) - .build() - .context(BuildRuntime)?; - - Ok(Runtime { rt, metrics }) - } -} - -#[cfg(test)] -mod tests { - use std::{sync::Arc, thread, time::Duration}; - - use tokio::sync::oneshot; - use tokio_test::assert_ok; - - use super::*; - - fn rt() -> Arc { - let rt = Builder::default() - .worker_threads(2) - .thread_name("test_spawn_join") - .enable_all() - .build(); - assert!(rt.is_ok()); - Arc::new(rt.unwrap()) - } - - #[test] - fn test_stats() { - let rt = Builder::default() - .worker_threads(5) - .thread_name("test_stats") - .enable_all() - .build(); - assert!(rt.is_ok()); - let rt = Arc::new(rt.unwrap()); - // wait threads created - thread::sleep(Duration::from_millis(50)); - - let s = rt.stats(); - assert_eq!(5, s.alive_thread_num); - assert_eq!(5, s.idle_thread_num); - - rt.spawn(async { - thread::sleep(Duration::from_millis(50)); - }); - - thread::sleep(Duration::from_millis(10)); - let s = rt.stats(); - assert_eq!(5, s.alive_thread_num); - assert_eq!(4, s.idle_thread_num); - } - - #[test] - fn block_on_async() { - let rt = rt(); - - let out = rt.block_on(async { - let (tx, rx) = oneshot::channel(); - - thread::spawn(move || { - thread::sleep(Duration::from_millis(50)); - tx.send("ZOMG").unwrap(); - }); - - assert_ok!(rx.await) - }); - - assert_eq!(out, "ZOMG"); - } - - #[test] - fn spawn_from_blocking() { - let rt = rt(); - let rt1 = rt.clone(); - let out = rt.block_on(async move { - let rt2 = rt1.clone(); - let inner = assert_ok!( - rt1.spawn_blocking(move || { rt2.spawn(async move { "hello" }) }) - .await - ); - - assert_ok!(inner.await) - }); - - assert_eq!(out, "hello") - } - - #[test] - fn test_spawn_join() { - let rt = rt(); - let handle = rt.spawn(async { 1 + 1 }); - - assert_eq!(2, rt.block_on(handle).unwrap()); - } -} diff --git a/src/components/runtime/src/metrics.rs b/src/components/runtime/src/metrics.rs deleted file mode 100644 index 23a130642b..0000000000 --- a/src/components/runtime/src/metrics.rs +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use lazy_static::lazy_static; -use prometheus::{register_int_gauge_vec, IntGauge, IntGaugeVec}; - -lazy_static! { - // Gauges: - static ref RUNTIME_THREAD_ALIVE_GAUGE: IntGaugeVec = register_int_gauge_vec!( - "runtime_thread_alive_gauge", - "alive thread number for runtime", - &["name"] - ) - .unwrap(); - static ref RUNTIME_THREAD_IDLE_GAUGE: IntGaugeVec = register_int_gauge_vec!( - "runtime_thread_idle_gauge", - "idle thread number for runtime", - &["name"] - ) - .unwrap(); -} - -/// Runtime metrics. -#[derive(Debug)] -pub struct Metrics { - // Gauges: - pub thread_alive_gauge: IntGauge, - pub thread_idle_gauge: IntGauge, -} - -impl Metrics { - pub fn new(name: &str) -> Self { - Self { - thread_alive_gauge: RUNTIME_THREAD_ALIVE_GAUGE.with_label_values(&[name]), - thread_idle_gauge: RUNTIME_THREAD_IDLE_GAUGE.with_label_values(&[name]), - } - } - - #[inline] - pub fn on_thread_start(&self) { - self.thread_alive_gauge.inc(); - } - - #[inline] - pub fn on_thread_stop(&self) { - self.thread_alive_gauge.dec(); - } - - #[inline] - pub fn on_thread_park(&self) { - self.thread_idle_gauge.inc(); - } - - #[inline] - pub fn on_thread_unpark(&self) { - self.thread_idle_gauge.dec(); - } -} diff --git a/src/components/runtime/src/priority_runtime.rs b/src/components/runtime/src/priority_runtime.rs deleted file mode 100644 index 922b80ea5a..0000000000 --- a/src/components/runtime/src/priority_runtime.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::future::Future; - -use crate::{JoinHandle, RuntimeRef}; - -// TODO: maybe we could move this to common_types crate. -#[derive(Copy, Clone, Debug, Default)] -#[repr(u8)] -pub enum Priority { - #[default] - High = 0, - Low = 1, -} - -impl Priority { - pub fn as_u8(&self) -> u8 { - *self as u8 - } - - pub fn as_str(&self) -> &str { - match self { - Self::High => "high", - Self::Low => "low", - } - } -} - -impl TryFrom for Priority { - type Error = String; - - fn try_from(value: u8) -> Result { - match value { - 0 => Ok(Priority::High), - 1 => Ok(Priority::Low), - _ => Err(format!("Unknown priority, value:{value}")), - } - } -} - -#[derive(Clone, Debug)] -pub struct PriorityRuntime { - low: RuntimeRef, - high: RuntimeRef, -} - -impl PriorityRuntime { - pub fn new(low: RuntimeRef, high: RuntimeRef) -> Self { - Self { low, high } - } - - pub fn low(&self) -> &RuntimeRef { - &self.low - } - - pub fn high(&self) -> &RuntimeRef { - &self.high - } - - pub fn choose_runtime(&self, priority: &Priority) -> &RuntimeRef { - match priority { - Priority::Low => &self.low, - Priority::High => &self.high, - } - } - - // By default we spawn the future to the higher priority runtime. - pub fn spawn(&self, future: F) -> JoinHandle - where - F: Future + Send + 'static, - F::Output: Send + 'static, - { - self.high.spawn(future) - } - - pub fn spawn_with_priority(&self, future: F, priority: Priority) -> JoinHandle - where - F: Future + Send + 'static, - F::Output: Send + 'static, - { - match priority { - Priority::Low => self.low.spawn(future), - Priority::High => self.high.spawn(future), - } - } -} diff --git a/src/components/sampling_cache/Cargo.toml b/src/components/sampling_cache/Cargo.toml deleted file mode 100644 index 8792a3b2f1..0000000000 --- a/src/components/sampling_cache/Cargo.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "sampling_cache" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -chrono = { workspace = true } diff --git a/src/components/sampling_cache/src/lib.rs b/src/components/sampling_cache/src/lib.rs deleted file mode 100644 index 814e29f41d..0000000000 --- a/src/components/sampling_cache/src/lib.rs +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering}; - -use chrono::prelude::*; - -// Cache the provided value and update it in a sampling rate. -pub struct SamplingCachedUsize { - interval_ms: i64, - - last_updated_at: AtomicI64, - cached_val: AtomicUsize, -} - -impl SamplingCachedUsize { - pub fn new(interval_ms: u64) -> Self { - Self { - interval_ms: interval_ms as i64, - last_updated_at: AtomicI64::default(), - cached_val: AtomicUsize::default(), - } - } - - /// Read the cached value and update it by the `val_source` if necessary. - /// - /// The returned error only results from the `val_source`. - pub fn read(&self, val_source: F) -> std::result::Result - where - F: FnOnce() -> std::result::Result, - { - // Fast path for no sampling. - if self.interval_ms == 0 { - return val_source(); - } - - let now_ms = Utc::now().timestamp_millis(); - let last_updated_at = self.last_updated_at.load(Ordering::Relaxed); - let deadline_ms = last_updated_at + self.interval_ms; - - assert!(deadline_ms >= 0); - if now_ms >= deadline_ms { - let new_value = val_source()?; - self.last_updated_at.store(now_ms, Ordering::Relaxed); - self.cached_val.store(new_value, Ordering::Relaxed); - Ok(new_value) - } else { - Ok(self.cached_val.load(Ordering::Relaxed)) - } - } -} - -#[cfg(test)] -mod tests { - use std::{sync::Mutex, time::Duration}; - - use super::*; - - #[derive(Default)] - struct ValueSource { - val: Mutex, - } - - impl ValueSource { - fn get(&self) -> std::result::Result { - Ok(*self.val.lock().unwrap()) - } - - fn inc(&self) { - let mut val = self.val.lock().unwrap(); - *val += 1; - } - } - - #[test] - fn test_always_update() { - let updater = SamplingCachedUsize::new(0); - let val_source = ValueSource::default(); - - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 0); - val_source.inc(); - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 1); - } - - #[test] - fn test_normal_update() { - let interval_ms = 100u64; - let interval = Duration::from_millis(interval_ms); - let updater = SamplingCachedUsize::new(interval_ms); - let val_source = ValueSource::default(); - - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 0); - val_source.inc(); - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 0); - - std::thread::sleep(interval / 2); - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 0); - - std::thread::sleep(interval / 3); - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 0); - - std::thread::sleep(interval / 2); - let v = updater.read(|| val_source.get()).unwrap(); - assert_eq!(v, 1); - } -} diff --git a/src/components/size_ext/Cargo.toml b/src/components/size_ext/Cargo.toml deleted file mode 100644 index 8f329adca3..0000000000 --- a/src/components/size_ext/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "size_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -serde = { workspace = true } - -[dev-dependencies] -toml = { workspace = true } diff --git a/src/components/size_ext/src/lib.rs b/src/components/size_ext/src/lib.rs deleted file mode 100644 index e35f113593..0000000000 --- a/src/components/size_ext/src/lib.rs +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. - -//! Configure utils - -// This module is forked from tikv and remove unnecessary code. -// https://github.com/tikv/tikv/blob/HEAD/src/util/config.rs -use std::{ - fmt::{self, Write}, - ops::{Div, Mul}, - str::{self, FromStr}, -}; - -use serde::{ - de::{self, Unexpected, Visitor}, - Deserialize, Deserializer, Serialize, Serializer, -}; - -const UNIT: u64 = 1; - -const BINARY_DATA_MAGNITUDE: u64 = 1024; -pub const B: u64 = UNIT; -pub const KIB: u64 = UNIT * BINARY_DATA_MAGNITUDE; -pub const MIB: u64 = KIB * BINARY_DATA_MAGNITUDE; -pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE; -pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE; -pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE; - -#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd)] -pub struct ReadableSize(pub u64); - -impl ReadableSize { - pub const fn kb(count: u64) -> ReadableSize { - ReadableSize(count * KIB) - } - - pub const fn mb(count: u64) -> ReadableSize { - ReadableSize(count * MIB) - } - - pub const fn gb(count: u64) -> ReadableSize { - ReadableSize(count * GIB) - } - - pub const fn as_mb(self) -> u64 { - self.0 / MIB - } - - pub const fn as_byte(self) -> u64 { - self.0 - } -} - -impl Div for ReadableSize { - type Output = ReadableSize; - - fn div(self, rhs: u64) -> ReadableSize { - ReadableSize(self.0 / rhs) - } -} - -impl Div for ReadableSize { - type Output = u64; - - fn div(self, rhs: ReadableSize) -> u64 { - self.0 / rhs.0 - } -} - -impl Mul for ReadableSize { - type Output = ReadableSize; - - fn mul(self, rhs: u64) -> ReadableSize { - ReadableSize(self.0 * rhs) - } -} - -impl Serialize for ReadableSize { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let size = self.0; - let mut buffer = String::new(); - if size == 0 { - write!(buffer, "{size}KiB").unwrap(); - } else if size % PIB == 0 { - write!(buffer, "{}PiB", size / PIB).unwrap(); - } else if size % TIB == 0 { - write!(buffer, "{}TiB", size / TIB).unwrap(); - } else if size % GIB == 0 { - write!(buffer, "{}GiB", size / GIB).unwrap(); - } else if size % MIB == 0 { - write!(buffer, "{}MiB", size / MIB).unwrap(); - } else if size % KIB == 0 { - write!(buffer, "{}KiB", size / KIB).unwrap(); - } else { - return serializer.serialize_u64(size); - } - serializer.serialize_str(&buffer) - } -} - -impl FromStr for ReadableSize { - type Err = String; - - // This method parses value in binary unit. - fn from_str(s: &str) -> Result { - let size_str = s.trim(); - if size_str.is_empty() { - return Err(format!("{s:?} is not a valid size.")); - } - - if !size_str.is_ascii() { - return Err(format!("ASCII string is expected, but got {s:?}")); - } - - // size: digits and '.' as decimal separator - let size_len = size_str - .to_string() - .chars() - .take_while(|c| char::is_ascii_digit(c) || ['.', 'e', 'E', '-', '+'].contains(c)) - .count(); - - // unit: alphabetic characters - let (size, unit) = size_str.split_at(size_len); - - let unit = match unit.trim() { - "K" | "KB" | "KiB" => KIB, - "M" | "MB" | "MiB" => MIB, - "G" | "GB" | "GiB" => GIB, - "T" | "TB" | "TiB" => TIB, - "P" | "PB" | "PiB" => PIB, - "B" | "" => UNIT, - _ => { - return Err(format!( - "only B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, and PiB are supported: {s:?}" - )); - } - }; - - match size.parse::() { - Ok(n) => Ok(ReadableSize((n * unit as f64) as u64)), - Err(_) => Err(format!("invalid size string: {s:?}")), - } - } -} - -impl<'de> Deserialize<'de> for ReadableSize { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct SizeVisitor; - - impl<'de> Visitor<'de> for SizeVisitor { - type Value = ReadableSize; - - fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter.write_str("valid size") - } - - fn visit_i64(self, size: i64) -> Result - where - E: de::Error, - { - if size >= 0 { - self.visit_u64(size as u64) - } else { - Err(E::invalid_value(Unexpected::Signed(size), &self)) - } - } - - fn visit_u64(self, size: u64) -> Result - where - E: de::Error, - { - Ok(ReadableSize(size)) - } - - fn visit_str(self, size_str: &str) -> Result - where - E: de::Error, - { - size_str.parse().map_err(E::custom) - } - } - - deserializer.deserialize_any(SizeVisitor) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_readable_size() { - let s = ReadableSize::kb(2); - assert_eq!(s.0, 2048); - assert_eq!(s.as_mb(), 0); - let s = ReadableSize::mb(2); - assert_eq!(s.0, 2 * 1024 * 1024); - assert_eq!(s.as_mb(), 2); - let s = ReadableSize::gb(2); - assert_eq!(s.0, 2 * 1024 * 1024 * 1024); - assert_eq!(s.as_mb(), 2048); - - assert_eq!((ReadableSize::mb(2) / 2).0, MIB); - assert_eq!((ReadableSize::mb(1) / 2).0, 512 * KIB); - assert_eq!(ReadableSize::mb(2) / ReadableSize::kb(1), 2048); - } - - #[test] - fn test_parse_readable_size() { - #[derive(Serialize, Deserialize)] - struct SizeHolder { - s: ReadableSize, - } - - let legal_cases = vec![ - (0, "0KiB"), - (2 * KIB, "2KiB"), - (4 * MIB, "4MiB"), - (5 * GIB, "5GiB"), - (7 * TIB, "7TiB"), - (11 * PIB, "11PiB"), - ]; - for (size, exp) in legal_cases { - let c = SizeHolder { - s: ReadableSize(size), - }; - let res_str = toml::to_string(&c).unwrap(); - let exp_str = format!("s = {exp:?}\n"); - assert_eq!(res_str, exp_str); - let res_size: SizeHolder = toml::from_str(&exp_str).unwrap(); - assert_eq!(res_size.s.0, size); - } - - let c = SizeHolder { - s: ReadableSize(512), - }; - let res_str = toml::to_string(&c).unwrap(); - assert_eq!(res_str, "s = 512\n"); - let res_size: SizeHolder = toml::from_str(&res_str).unwrap(); - assert_eq!(res_size.s.0, c.s.0); - - let decode_cases = vec![ - (" 0.5 PB", PIB / 2), - ("0.5 TB", TIB / 2), - ("0.5GB ", GIB / 2), - ("0.5MB", MIB / 2), - ("0.5KB", KIB / 2), - ("0.5P", PIB / 2), - ("0.5T", TIB / 2), - ("0.5G", GIB / 2), - ("0.5M", MIB / 2), - ("0.5K", KIB / 2), - ("23", 23), - ("1", 1), - ("1024B", KIB), - // units with binary prefixes - (" 0.5 PiB", PIB / 2), - ("1PiB", PIB), - ("0.5 TiB", TIB / 2), - ("2 TiB", TIB * 2), - ("0.5GiB ", GIB / 2), - ("787GiB ", GIB * 787), - ("0.5MiB", MIB / 2), - ("3MiB", MIB * 3), - ("0.5KiB", KIB / 2), - ("1 KiB", KIB), - // scientific notation - ("0.5e6 B", B * 500000), - ("0.5E6 B", B * 500000), - ("1e6B", B * 1000000), - ("8E6B", B * 8000000), - ("8e7", B * 80000000), - ("1e-1MB", MIB / 10), - ("1e+1MB", MIB * 10), - ("0e+10MB", 0), - ]; - for (src, exp) in decode_cases { - let src = format!("s = {src:?}"); - let res: SizeHolder = toml::from_str(&src).unwrap(); - assert_eq!(res.s.0, exp); - } - - let illegal_cases = vec![ - "0.5kb", "0.5kB", "0.5Kb", "0.5k", "0.5g", "b", "gb", "1b", "B", "1K24B", " 5_KB", - "4B7", "5M_", - ]; - for src in illegal_cases { - let src_str = format!("s = {src:?}"); - assert!(toml::from_str::(&src_str).is_err(), "{}", src); - } - } -} diff --git a/src/components/skiplist/Cargo.toml b/src/components/skiplist/Cargo.toml deleted file mode 100644 index 5953c15144..0000000000 --- a/src/components/skiplist/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -[package] -name = "skiplist" -authors = ["Jay Lee ", "HoraeDB Authors"] - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arena = { workspace = true } -bytes = { workspace = true } -rand = { workspace = true } - -[dev-dependencies] -criterion = { workspace = true } -yatp = { git = "https://github.com/tikv/yatp.git", rev = "793be4d789d4bd15292fe4d06e38063b4ec9d48e" } - -[[bench]] -name = "bench" -harness = false diff --git a/src/components/skiplist/benches/bench.rs b/src/components/skiplist/benches/bench.rs deleted file mode 100644 index a3b5f392a3..0000000000 --- a/src/components/skiplist/benches/bench.rs +++ /dev/null @@ -1,179 +0,0 @@ -use std::{ - collections::*, - sync::{atomic::*, *}, - thread, -}; - -use arena::MonoIncArena; -use bytes::*; -use criterion::*; -use rand::prelude::*; -use skiplist::*; - -// #[cfg(not(target_env = "msvc"))] -// use tikv_jemallocator::Jemalloc; - -// #[cfg(not(target_env = "msvc"))] -// #[global_allocator] -// static GLOBAL: Jemalloc = Jemalloc; - -fn skiplist_round( - l: &Skiplist, - case: &(Bytes, bool), - exp: &Bytes, -) { - if case.1 { - if let Some(v) = l.get(&case.0) { - assert_eq!(v, exp); - } - } else { - l.put(&case.0, exp); - } -} - -fn append_ts(key: &mut BytesMut, ts: u64) { - key.put_u64(ts); -} - -fn random_key(rng: &mut ThreadRng) -> Bytes { - let mut key = BytesMut::with_capacity(16); - unsafe { - rng.fill_bytes(&mut *(&mut key.chunk_mut()[..8] as *mut _ as *mut [u8])); - key.advance_mut(8); - } - append_ts(&mut key, 0); - key.freeze() -} - -fn bench_read_write_skiplist_frac(b: &mut Bencher<'_>, frac: &usize) { - let frac = *frac; - let value = Bytes::from_static(b"00123"); - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let l = list.clone(); - let stop = Arc::new(AtomicBool::new(false)); - let s = stop.clone(); - let v = value.clone(); - let handle = thread::spawn(move || { - let mut rng = rand::thread_rng(); - while !s.load(Ordering::SeqCst) { - let key = random_key(&mut rng); - let case = (key, frac > rng.gen_range(0..11)); - skiplist_round(&l, &case, &v); - } - }); - let mut rng = rand::thread_rng(); - b.iter_batched_ref( - || (random_key(&mut rng), frac > rng.gen_range(0..11)), - |case| skiplist_round(&list, case, &value), - BatchSize::SmallInput, - ); - stop.store(true, Ordering::SeqCst); - handle.join().unwrap(); -} - -fn bench_read_write_skiplist(c: &mut Criterion) { - let mut group = c.benchmark_group("skiplist_read_write"); - for i in 0..=10 { - group.bench_with_input( - BenchmarkId::from_parameter(i), - &i, - bench_read_write_skiplist_frac, - ); - } - group.finish(); -} - -fn map_round(m: &Mutex>, case: &(Bytes, bool), exp: &Bytes) { - if case.1 { - let rm = m.lock().unwrap(); - let value = rm.get(&case.0); - if let Some(v) = value { - assert_eq!(v, exp); - } - } else { - let mut rm = m.lock().unwrap(); - rm.insert(case.0.clone(), exp.clone()); - } -} - -fn bench_read_write_map_frac(b: &mut Bencher<'_>, frac: &usize) { - let frac = *frac; - let value = Bytes::from_static(b"00123"); - let map = Arc::new(Mutex::new(HashMap::with_capacity(512 << 10))); - let map_in_thread = map.clone(); - let stop = Arc::new(AtomicBool::new(false)); - let thread_stop = stop.clone(); - - let v = value.clone(); - let handle = thread::spawn(move || { - let mut rng = rand::thread_rng(); - while !thread_stop.load(Ordering::SeqCst) { - let f = rng.gen_range(0..11); - let case = (random_key(&mut rng), f < frac); - map_round(&map_in_thread, &case, &v); - } - }); - let mut rng = rand::thread_rng(); - b.iter_batched_ref( - || { - let f = rng.gen_range(0..11); - (random_key(&mut rng), f < frac) - }, - |case| map_round(&map, case, &value), - BatchSize::SmallInput, - ); - stop.store(true, Ordering::SeqCst); - handle.join().unwrap(); -} - -fn bench_read_write_map(c: &mut Criterion) { - let mut group = c.benchmark_group("map_read_write"); - for i in 0..=10 { - group.bench_with_input( - BenchmarkId::from_parameter(i), - &i, - bench_read_write_map_frac, - ); - } - group.finish(); -} - -fn bench_write_skiplist(c: &mut Criterion) { - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let value = Bytes::from_static(b"00123"); - let l = list.clone(); - let stop = Arc::new(AtomicBool::new(false)); - let s = stop.clone(); - let v = value.clone(); - let handle = thread::spawn(move || { - let mut rng = rand::thread_rng(); - while !s.load(Ordering::SeqCst) { - let case = (random_key(&mut rng), false); - skiplist_round(&l, &case, &v); - } - }); - let mut rng = rand::thread_rng(); - c.bench_function("skiplist_write", |b| { - b.iter_batched( - || random_key(&mut rng), - |key| { - list.put(&key, &value); - }, - BatchSize::SmallInput, - ) - }); - stop.store(true, Ordering::SeqCst); - handle.join().unwrap(); -} - -criterion_group!( - benches, - bench_read_write_skiplist, - bench_read_write_map, - bench_write_skiplist -); -criterion_main!(benches); diff --git a/src/components/skiplist/src/key.rs b/src/components/skiplist/src/key.rs deleted file mode 100644 index cda4ded0ce..0000000000 --- a/src/components/skiplist/src/key.rs +++ /dev/null @@ -1,68 +0,0 @@ -use std::cmp::Ordering; - -use bytes::Bytes; - -pub trait KeyComparator: Clone { - fn compare_key(&self, lhs: &[u8], rhs: &[u8]) -> Ordering; - fn same_key(&self, lhs: &[u8], rhs: &[u8]) -> bool; -} - -#[derive(Debug, Clone)] -pub struct BytewiseComparator; - -impl KeyComparator for BytewiseComparator { - #[inline] - fn compare_key(&self, lhs: &[u8], rhs: &[u8]) -> Ordering { - lhs.cmp(rhs) - } - - #[inline] - fn same_key(&self, lhs: &[u8], rhs: &[u8]) -> bool { - lhs == rhs - } -} - -#[derive(Default, Debug, Clone, Copy)] -pub struct FixedLengthSuffixComparator { - len: usize, -} - -impl FixedLengthSuffixComparator { - pub const fn new(len: usize) -> FixedLengthSuffixComparator { - FixedLengthSuffixComparator { len } - } -} - -impl KeyComparator for FixedLengthSuffixComparator { - #[inline] - fn compare_key(&self, lhs: &[u8], rhs: &[u8]) -> Ordering { - if lhs.len() < self.len { - panic!( - "cannot compare with suffix {}: {:?}", - self.len, - Bytes::copy_from_slice(lhs) - ); - } - if rhs.len() < self.len { - panic!( - "cannot compare with suffix {}: {:?}", - self.len, - Bytes::copy_from_slice(rhs) - ); - } - let (l_p, l_s) = lhs.split_at(lhs.len() - self.len); - let (r_p, r_s) = rhs.split_at(rhs.len() - self.len); - let res = l_p.cmp(r_p); - match res { - Ordering::Greater | Ordering::Less => res, - Ordering::Equal => l_s.cmp(r_s), - } - } - - #[inline] - fn same_key(&self, lhs: &[u8], rhs: &[u8]) -> bool { - let (l_p, _) = lhs.split_at(lhs.len() - self.len); - let (r_p, _) = rhs.split_at(rhs.len() - self.len); - l_p == r_p - } -} diff --git a/src/components/skiplist/src/lib.rs b/src/components/skiplist/src/lib.rs deleted file mode 100644 index 0bd650b782..0000000000 --- a/src/components/skiplist/src/lib.rs +++ /dev/null @@ -1,19 +0,0 @@ -//! Forked from -//! -//! Differences: -//! 1. Inline key and value in Node, so all memory of skiplist is allocated from -//! arena. Drawback: we have to copy the content of key/value -//! 2. Tower stores pointer to Node instead of offset, so we can use other arena -//! implementation -//! 3. Use [ArenaSlice] to replace Bytes -//! 4. impl Send/Sync for the iterator - -mod key; -mod list; -mod slice; - -const MAX_HEIGHT: usize = 20; - -pub use key::{BytewiseComparator, FixedLengthSuffixComparator, KeyComparator}; -pub use list::{IterRef, Skiplist, MAX_KEY_SIZE}; -pub use slice::ArenaSlice; diff --git a/src/components/skiplist/src/list.rs b/src/components/skiplist/src/list.rs deleted file mode 100644 index 4e4114c16f..0000000000 --- a/src/components/skiplist/src/list.rs +++ /dev/null @@ -1,702 +0,0 @@ -use std::{ - alloc::Layout, - convert::TryInto, - mem, ptr, - ptr::NonNull, - slice, - sync::{ - atomic::{AtomicPtr, AtomicUsize, Ordering}, - Arc, - }, -}; - -use arena::{Arena, BasicStats}; -use rand::Rng; - -use crate::{slice::ArenaSlice, KeyComparator, MAX_HEIGHT}; - -const HEIGHT_INCREASE: u32 = u32::MAX / 3; - -type KeySize = u16; -type ValueSize = u32; - -pub const MAX_KEY_SIZE: u16 = u16::MAX; - -/// The layout of Node -/// 1. height: usize -/// 2. tower: AtomicPtr x (height + 1) -/// 3. key_size: KeySize -/// 4. key: u8 x key_size -/// 5. value_size: ValueSize -/// 6. value: ValueSize -// Uses C layout to make sure tower is at the bottom -#[derive(Debug)] -#[repr(C)] -pub struct Node { - /// Height of node, different from badger, The valid range of tower is [0, - /// height] - height: usize, - /// The node tower - /// - /// Only [0, height] parts is utilized to store node pointer, the key and - /// value block are start from tower[height + 1] - tower: [AtomicPtr; MAX_HEIGHT], -} - -impl Node { - /// Allocate a new node from the arena, and copy the content of key/value - /// into the node - /// # Safety - /// - from_size_align_unchecked: align is got from [mem::align_of]. - /// # Notice - /// This will only allocate the *exact* amount of memory needed within the - /// given height. - fn alloc(arena: &A, key: &[u8], value: &[u8], height: usize) -> *mut Node - where - A: Arena, - { - // Calculate node size to alloc - let size = mem::size_of::(); - // Not all values in Node::tower will be utilized. - let not_used = (MAX_HEIGHT - height - 1) * mem::size_of::>(); - // Space to store key/value: (key size) + key + (value size) + value - let kv_used = - mem::size_of::() + key.len() + mem::size_of::() + value.len(); - // UB in fact: the `not_used` size is able to be access in a "safe" way. - // It is guaranteed by the user to not use those memory. - let alloc_size = size - not_used + kv_used; - let layout = - unsafe { Layout::from_size_align_unchecked(alloc_size, mem::align_of::()) }; - let node_ptr = arena.alloc(layout).as_ptr() as *mut Node; - unsafe { - let node = &mut *node_ptr; - node.height = height; - ptr::write_bytes(node.tower.as_mut_ptr(), 0, height + 1); - Self::init_key_value(node, key, value); - - node_ptr - } - } - - /// Fetch next node ptr in given height - fn next_ptr(&self, height: usize) -> *mut Node { - self.tower[height].load(Ordering::SeqCst) - } - - /// Get key - /// - /// REQUIRE: This Node is created via `Node::alloc()` - unsafe fn key(&self) -> &[u8] { - let (key_block, key_size) = self.load_key_size(); - - slice::from_raw_parts(key_block, key_size as usize) - } - - /// Get value - /// - /// REQUIRE: This Node is created via `Node::alloc()` - unsafe fn value(&self) -> &[u8] { - let (key_block, key_size) = self.load_key_size(); - let (value_block, value_size) = self.load_value_size(key_block, key_size); - - slice::from_raw_parts(value_block, value_size as usize) - } - - /// Set key and value parts of Node during creating Node - /// - /// Will copy the content of key and value to the Node - /// - /// REQUIRE: This Node is created via Arena and node.tower and node.height - /// is already set to correct value - /// Panic: The size of key/value must less than max value of - /// KeySize/ValueSize (u16/u32), otherwise this function will panic - unsafe fn init_key_value(node: &mut Node, key: &[u8], value: &[u8]) { - let key_block = node.tower.as_mut_ptr().add(node.height + 1) as *mut u8; - let key_size: KeySize = key.len().try_into().unwrap(); - let key_size_bytes = key_size.to_ne_bytes(); - - ptr::copy_nonoverlapping( - key_size_bytes.as_ptr(), - key_block, - mem::size_of::(), - ); - let key_block = key_block.add(mem::size_of::()); - ptr::copy_nonoverlapping(key.as_ptr(), key_block, key.len()); - - let value_block = key_block.add(key.len()); - let value_size: ValueSize = value.len().try_into().unwrap(); - let value_size_bytes = value_size.to_ne_bytes(); - - ptr::copy_nonoverlapping( - value_size_bytes.as_ptr(), - value_block, - mem::size_of::(), - ); - let value_block = value_block.add(mem::size_of::()); - ptr::copy_nonoverlapping(value.as_ptr(), value_block, value.len()); - } - - /// Load key pointer and size of key - /// - /// REQUIRE: This Node is created via `Node::alloc()` - unsafe fn load_key_size(&self) -> (*const u8, KeySize) { - let tower = self.tower.as_ptr(); - // Move to key block - let key_block = tower.add(self.height + 1) as *const u8; - // Load key size from key block - let key_size = u16::from_ne_bytes(*(key_block as *const [u8; mem::size_of::()])); - // Move key block to the start of key - let key_block = key_block.add(mem::size_of::()); - - (key_block, key_size) - } - - /// Load value pointer and size of value - /// - /// Given key_block and key_size returned from `load_key_size()`, loads - /// value pointer and value size - /// - /// REQUIRE: This Node is created via `Node::alloc()` - unsafe fn load_value_size( - &self, - key_block: *const u8, - key_size: KeySize, - ) -> (*const u8, ValueSize) { - // Move to value block - let value_block = key_block.add(key_size as usize); - // Load value size from value block - let value_size = - u32::from_ne_bytes(*(value_block as *const [u8; mem::size_of::()])); - // Move value block to the start of value - let value_block = value_block.add(mem::size_of::()); - - (value_block, value_size) - } - - /// Get key with arena - /// - /// REQUIRE: This Node is created via `Node::alloc()` - unsafe fn key_with_arena(&self, arena: A) -> ArenaSlice - where - A: Arena, - { - let (key_block, key_size) = self.load_key_size(); - - ArenaSlice::from_raw_parts(arena, key_block, key_size as usize) - } - - /// Get value with arena - /// - /// REQUIRE: This Node is created via `Node::alloc()` - unsafe fn value_with_arena(&self, arena: A) -> ArenaSlice - where - A: Arena, - { - let (key_block, key_size) = self.load_key_size(); - let (value_block, value_size) = self.load_value_size(key_block, key_size); - - ArenaSlice::from_raw_parts(arena, value_block, value_size as usize) - } -} - -struct SkiplistCore { - height: AtomicUsize, - head: NonNull, - arena: A, -} - -/// FIXME(yingwen): Modify the skiplist to support arena that supports growth, -/// otherwise it is hard to avoid memory usage not out of the arena capacity -#[derive(Clone)] -pub struct Skiplist { - core: Arc>, - c: C, -} - -impl + Clone> Skiplist { - pub fn with_arena(c: C, arena: A) -> Skiplist { - let head = Node::alloc(&arena, &[], &[], MAX_HEIGHT - 1); - let head = unsafe { NonNull::new_unchecked(head) }; - Skiplist { - core: Arc::new(SkiplistCore { - height: AtomicUsize::new(0), - head, - arena, - }), - c, - } - } - - fn random_height(&self) -> usize { - let mut rng = rand::thread_rng(); - for h in 0..(MAX_HEIGHT - 1) { - if !rng.gen_ratio(HEIGHT_INCREASE, u32::MAX) { - return h; - } - } - MAX_HEIGHT - 1 - } - - fn height(&self) -> usize { - self.core.height.load(Ordering::SeqCst) - } - - pub fn arena_block_size(&self) -> usize { - self.core.arena.block_size() - } -} - -impl + Clone> Skiplist { - /// Finds the node near to key. - /// - /// If less=true, it finds rightmost node such that node.key < key (if - /// allow_equal=false) or node.key <= key (if allow_equal=true). - /// If less=false, it finds leftmost node such that node.key > key (if - /// allowEqual=false) or node.key >= key (if allow_equal=true). - /// Returns the node found. - unsafe fn find_near(&self, key: &[u8], less: bool, allow_equal: bool) -> *const Node { - let mut cursor: *const Node = self.core.head.as_ptr(); - let mut level = self.height(); - loop { - // Assume cursor.key < key - let next_ptr = (*cursor).next_ptr(level); - if next_ptr.is_null() { - // cursor.key < key < END OF LIST - if level > 0 { - // Can descend further to iterate closer to the end - level -= 1; - continue; - } - // 1. Level=0. Cannot descend further. Let's return something that makes sense - // 2. Try to return cursor. Make sure it is not a head node - if !less || cursor == self.core.head.as_ptr() { - return ptr::null(); - } - return cursor; - } - - let next = &*next_ptr; - let res = self.c.compare_key(key, next.key()); - if res == std::cmp::Ordering::Greater { - // cursor.key < next.key < key. We can continue to move right - cursor = next_ptr; - continue; - } - if res == std::cmp::Ordering::Equal { - // cursor.key < key == next.key - if allow_equal { - return next; - } - if !less { - // We want >, so go to base level to grab the next bigger node - return next.next_ptr(0); - } - // We want <. If not base level, we should go closer in the next level. - if level > 0 { - level -= 1; - continue; - } - // On base level. Return cursor - if cursor == self.core.head.as_ptr() { - return ptr::null(); - } - return cursor; - } - // cursor.key < key < next.key - if level > 0 { - level -= 1; - continue; - } - // At base level. Need to return something - if !less { - return next; - } - // Try to return cursor. Make sure it is not a head node - if cursor == self.core.head.as_ptr() { - return ptr::null(); - } - return cursor; - } - } - - /// Returns (out_before, out_after) with out_before.key <= key <= - /// out_after.key - /// - /// The input `before` tells us where to start looking - /// If we found a node with the same key, then we return out_before = - /// out_after. Otherwise, out_before.key < key < out_after.key - unsafe fn find_splice_for_level( - &self, - key: &[u8], - mut before: *mut Node, - level: usize, - ) -> (*mut Node, *mut Node) { - loop { - // Assume before.key < key - let next_ptr = (*before).next_ptr(level); - if next_ptr.is_null() { - return (before, ptr::null_mut()); - } - let next_node = &*next_ptr; - match self.c.compare_key(key, next_node.key()) { - // Equality case - std::cmp::Ordering::Equal => return (next_ptr, next_ptr), - // before.key < key < next.key. We are done for this level - std::cmp::Ordering::Less => return (before, next_ptr), - // Keep moving right on this level - _ => before = next_ptr, - } - } - } - - /// Put the key-value into the skiplist if the key does not exists. - /// - /// The content of key and value will be copied into the list. Returns true - /// if the node is inserted, otherwise return false (key is duplicated) - /// - /// Panic: The skiplist will panic if the allocated memory - /// out of the capacity - pub fn put(&self, key: &[u8], value: &[u8]) -> bool { - let mut list_height = self.height(); - let mut prev = [ptr::null_mut(); MAX_HEIGHT + 1]; - let mut next = [ptr::null_mut(); MAX_HEIGHT + 1]; - prev[list_height + 1] = self.core.head.as_ptr(); - // Recompute splice levels - for i in (0..=list_height).rev() { - // Use higher level to speed up for current level - let (p, n) = unsafe { self.find_splice_for_level(key, prev[i + 1], i) }; - prev[i] = p; - next[i] = n; - if p == n { - // Key already exists - return false; - } - } - - // Create a new node - let height = self.random_height(); - let node_ptr = Node::alloc(&self.core.arena, key, value, height); - - // Try to increase skiplist height via CAS - while height > list_height { - match self.core.height.compare_exchange_weak( - list_height, - height, - Ordering::SeqCst, - Ordering::SeqCst, - ) { - // Successfully increased skiplist height - Ok(_) => break, - Err(h) => list_height = h, - } - } - - // We always insert from the base level and up. After you add a node in base - // level, we cannot create a node in the level above because it would - // have discovered the node in the base level - let x: &mut Node = unsafe { &mut *node_ptr }; - for i in 0..=height { - loop { - if prev[i].is_null() { - // This cannot happen in base level - assert!(i > 1); - // We haven't computed prev, next for this level because height exceeds old - // list_height. For these levels, we expect the lists to be - // sparse, so we can just search from head. - let (p, n) = - unsafe { self.find_splice_for_level(x.key(), self.core.head.as_ptr(), i) }; - prev[i] = p; - next[i] = n; - // Someone adds the exact same key before we are able to do so. This can only - // happen on the base level. But we know we are not on the - // base level. - assert_ne!(p, n); - } - x.tower[i].store(next[i], Ordering::SeqCst); - match unsafe { &*prev[i] }.tower[i].compare_exchange( - next[i], - node_ptr, - Ordering::SeqCst, - Ordering::SeqCst, - ) { - // Managed to insert x between prev[i] and next[i]. Go to the next level. - Ok(_) => break, - Err(_) => { - // CAS failed. We need to recompute prev and next. - // It is unlikely to be helpful to try to use a different level as we redo - // the search, because it is unlikely that lots of - // nodes are inserted between prev[i] and next[i]. - let (p, n) = unsafe { self.find_splice_for_level(x.key(), prev[i], i) }; - if p == n { - assert_eq!(i, 0); - return false; - } - prev[i] = p; - next[i] = n; - } - } - } - } - true - } - - /// Returns if the skiplist is empty - pub fn is_empty(&self) -> bool { - let node = self.core.head.as_ptr(); - let next_ptr = unsafe { (*node).next_ptr(0) }; - next_ptr.is_null() - } - - /// Returns len of the skiplist - pub fn len(&self) -> usize { - let mut node = self.core.head.as_ptr(); - let mut count = 0; - loop { - let next_ptr = unsafe { (*node).next_ptr(0) }; - if !next_ptr.is_null() { - count += 1; - node = next_ptr; - continue; - } - return count; - } - } - - /// Returns the last element. If head (empty list), we return null. All the - /// find functions will NEVER return the head nodes - fn find_last(&self) -> *const Node { - let mut node = self.core.head.as_ptr(); - let mut level = self.height(); - loop { - let next_ptr = unsafe { (*node).next_ptr(level) }; - if !next_ptr.is_null() { - node = next_ptr; - continue; - } - // next is null - if level == 0 { - if node == self.core.head.as_ptr() { - return ptr::null(); - } - return node; - } - level -= 1; - } - } - - /// Gets the value associated with the key. It returns a valid value if it - /// finds equal or earlier version of the same key. - pub fn get(&self, key: &[u8]) -> Option<&[u8]> { - if let Some((_, value)) = self.get_with_key(key) { - Some(value) - } else { - None - } - } - - /// Gets the key and value associated with the key. It returns a valid value - /// if it finds equal or earlier version of the same key. - pub fn get_with_key(&self, key: &[u8]) -> Option<(&[u8], &[u8])> { - // Find greater or equal - let node = unsafe { self.find_near(key, false, true) }; - if node.is_null() { - return None; - } - if self.c.same_key(unsafe { (*node).key() }, key) { - return Some(unsafe { ((*node).key(), (*node).value()) }); - } - None - } - - /// Returns a skiplist iterator - pub fn iter_ref(&self) -> IterRef<&Skiplist, C, A> { - IterRef { - list: self, - cursor: ptr::null(), - _key_cmp: std::marker::PhantomData, - _arena: std::marker::PhantomData, - } - } - - /// Returns a skiplist iterator - pub fn iter(&self) -> IterRef, C, A> { - IterRef { - list: self.clone(), - cursor: ptr::null(), - _key_cmp: std::marker::PhantomData, - _arena: std::marker::PhantomData, - } - } - - /// Consider the total bytes allocated by the arena (not the bytes used). - pub fn mem_size(&self) -> u32 { - self.core.arena.stats().bytes_allocated() as u32 - } -} - -impl + Clone> AsRef> for Skiplist { - fn as_ref(&self) -> &Skiplist { - self - } -} - -unsafe impl + Clone + Send> Send for Skiplist {} -unsafe impl + Clone + Sync> Sync for Skiplist {} - -pub struct IterRef -where - T: AsRef>, - A: Arena + Clone, -{ - list: T, - cursor: *const Node, - _key_cmp: std::marker::PhantomData, - _arena: std::marker::PhantomData, -} - -impl>, C: KeyComparator, A: Arena + Clone> - IterRef -{ - pub fn valid(&self) -> bool { - !self.cursor.is_null() - } - - pub fn key(&self) -> &[u8] { - assert!(self.valid()); - unsafe { (*self.cursor).key() } - } - - pub fn value(&self) -> &[u8] { - assert!(self.valid()); - unsafe { (*self.cursor).value() } - } - - pub fn next(&mut self) { - assert!(self.valid()); - unsafe { - self.cursor = (*self.cursor).next_ptr(0); - } - } - - pub fn prev(&mut self) { - assert!(self.valid()); - unsafe { - self.cursor = self.list.as_ref().find_near(self.key(), true, false); - } - } - - pub fn seek(&mut self, target: &[u8]) { - unsafe { - self.cursor = self.list.as_ref().find_near(target, false, true); - } - } - - pub fn seek_for_prev(&mut self, target: &[u8]) { - unsafe { - self.cursor = self.list.as_ref().find_near(target, true, true); - } - } - - pub fn seek_to_first(&mut self) { - unsafe { - self.cursor = (*self.list.as_ref().core.head.as_ptr()).next_ptr(0); - } - } - - pub fn seek_to_last(&mut self) { - self.cursor = self.list.as_ref().find_last(); - } - - pub fn key_with_arena(&self) -> ArenaSlice { - assert!(self.valid()); - unsafe { (*self.cursor).key_with_arena(self.list.as_ref().core.arena.clone()) } - } - - pub fn value_with_arena(&self) -> ArenaSlice { - assert!(self.valid()); - unsafe { (*self.cursor).value_with_arena(self.list.as_ref().core.arena.clone()) } - } -} - -unsafe impl>, C: Send, A: Arena + Clone + Send> Send - for IterRef -{ -} -unsafe impl>, C: Sync, A: Arena + Clone + Sync> Sync - for IterRef -{ -} - -#[cfg(test)] -mod tests { - use arena::MonoIncArena; - use bytes::Bytes; - - use super::*; - use crate::FixedLengthSuffixComparator; - - #[test] - fn test_node_alloc() { - let arena = MonoIncArena::new(1 << 10); - let key = b"key of node"; - let value = b"value of node"; - let node_ptr = Node::alloc(&arena, key, value, 5); - unsafe { - let node = &*node_ptr; - assert_eq!(5, node.height); - for i in 0..=node.height { - assert!(node.tower[i].load(Ordering::SeqCst).is_null()); - } - assert_eq!(key, node.key()); - assert_eq!(value, node.value()); - } - } - - #[test] - fn test_find_near() { - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - for i in 0..1000 { - let key = Bytes::from(format!("{:05}{:08}", i * 10 + 5, 0)); - let value = Bytes::from(format!("{i:05}")); - list.put(&key, &value); - } - let mut cases = vec![ - ("00001", false, false, Some("00005")), - ("00001", false, true, Some("00005")), - ("00001", true, false, None), - ("00001", true, true, None), - ("00005", false, false, Some("00015")), - ("00005", false, true, Some("00005")), - ("00005", true, false, None), - ("00005", true, true, Some("00005")), - ("05555", false, false, Some("05565")), - ("05555", false, true, Some("05555")), - ("05555", true, false, Some("05545")), - ("05555", true, true, Some("05555")), - ("05558", false, false, Some("05565")), - ("05558", false, true, Some("05565")), - ("05558", true, false, Some("05555")), - ("05558", true, true, Some("05555")), - ("09995", false, false, None), - ("09995", false, true, Some("09995")), - ("09995", true, false, Some("09985")), - ("09995", true, true, Some("09995")), - ("59995", false, false, None), - ("59995", false, true, None), - ("59995", true, false, Some("09995")), - ("59995", true, true, Some("09995")), - ]; - for (i, (key, less, allow_equal, exp)) in cases.drain(..).enumerate() { - let seek_key = Bytes::from(format!("{}{:08}", key, 0)); - let res = unsafe { list.find_near(&seek_key, less, allow_equal) }; - if exp.is_none() { - assert!(res.is_null(), "{}", i); - continue; - } - let e = format!("{}{:08}", exp.unwrap(), 0); - assert_eq!(unsafe { (*res).key() }, e.as_bytes(), "{i}"); - } - } -} diff --git a/src/components/skiplist/src/slice.rs b/src/components/skiplist/src/slice.rs deleted file mode 100644 index 01b82d3788..0000000000 --- a/src/components/skiplist/src/slice.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Slice with arena - -use std::{fmt, ops::Deref, slice}; - -use arena::{Arena, BasicStats}; - -/// Arena slice -/// -/// A slice allocated from the arena, it will holds the reference to the arena -/// so it is safe to clone and deref the slice -#[derive(Clone)] -pub struct ArenaSlice> { - /// Arena the slice memory allocated from. - _arena: A, - /// The slice pointer. - slice_ptr: *const u8, - /// The slice len. - slice_len: usize, -} - -impl> ArenaSlice { - /// Create a [ArenaSlice] - /// - /// See the documentation of [`slice::from_raw_parts`] for slice safety - /// requirements. - pub(crate) unsafe fn from_raw_parts(_arena: A, slice_ptr: *const u8, slice_len: usize) -> Self { - Self { - _arena, - slice_ptr, - slice_len, - } - } -} - -unsafe impl + Send> Send for ArenaSlice {} -unsafe impl + Sync> Sync for ArenaSlice {} - -impl> Deref for ArenaSlice { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - unsafe { slice::from_raw_parts(self.slice_ptr, self.slice_len) } - } -} - -impl> fmt::Debug for ArenaSlice { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.iter()).finish() - } -} - -#[cfg(test)] -mod tests { - use std::{alloc::Layout, mem, ptr}; - - use arena::MonoIncArena; - - use super::*; - - #[test] - fn test_arena_slice() { - let hello = b"hello"; - let arena = MonoIncArena::new(1 << 10); - let slice = unsafe { - let data_ptr = arena - .alloc(Layout::from_size_align(hello.len(), mem::align_of_val(hello)).unwrap()); - ptr::copy_nonoverlapping(hello.as_ptr(), data_ptr.as_ptr(), hello.len()); - ArenaSlice::from_raw_parts(arena, data_ptr.as_ptr(), hello.len()) - }; - assert_eq!(hello, &slice[..]); - } -} diff --git a/src/components/skiplist/tests/tests.rs b/src/components/skiplist/tests/tests.rs deleted file mode 100644 index f152acc615..0000000000 --- a/src/components/skiplist/tests/tests.rs +++ /dev/null @@ -1,259 +0,0 @@ -use std::{ - str, - sync::{atomic::*, *}, - thread::yield_now, - time::Duration, -}; - -use arena::MonoIncArena; -use bytes::*; -use skiplist::*; -use yatp::task::callback::Handle; - -fn new_value(v: usize) -> Bytes { - Bytes::from(format!("{v:05}")) -} - -fn key_with_ts(key: &str, ts: u64) -> Bytes { - Bytes::from(format!("{key}{ts:08}")) -} - -#[test] -fn test_empty() { - let key = key_with_ts("aaa", 0); - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let v = list.get(&key); - assert!(v.is_none()); - - let mut iter = list.iter_ref(); - assert!(!iter.valid()); - iter.seek_to_first(); - assert!(!iter.valid()); - iter.seek_to_last(); - assert!(!iter.valid()); - iter.seek(&key); - assert!(!iter.valid()); - assert!(list.is_empty()); -} - -#[test] -fn test_basic() { - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let table = vec![ - ("key1", new_value(42)), - ("key2", new_value(52)), - ("key3", new_value(62)), - ("key5", Bytes::from(format!("{:0102400}", 1))), - ("key4", new_value(72)), - ]; - - for (key, value) in &table { - list.put(&key_with_ts(key, 0), value); - } - - assert_eq!(list.get(&key_with_ts("key", 0)), None); - assert_eq!(list.len(), 5); - assert!(!list.is_empty()); - for (key, value) in &table { - let get_key = key_with_ts(key, 0); - assert_eq!(list.get(&get_key), Some(&value[..]), "{key}"); - } -} - -fn test_concurrent_basic(n: usize, value_len: usize) { - let pool = yatp::Builder::new("concurrent_basic").build_callback_pool(); - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let kvs: Vec<_> = (0..n) - .map(|i| { - ( - key_with_ts(format!("{i:05}").as_str(), 0), - Bytes::from(format!("{i:0value_len$}")), - ) - }) - .collect(); - let (tx, rx) = mpsc::channel(); - for (k, v) in kvs.clone() { - let tx = tx.clone(); - let list = list.clone(); - pool.spawn(move |_: &mut Handle<'_>| { - list.put(&k, &v); - tx.send(()).unwrap(); - }) - } - for _ in 0..n { - rx.recv_timeout(Duration::from_secs(3)).unwrap(); - } - for (k, v) in kvs { - let tx = tx.clone(); - let list = list.clone(); - pool.spawn(move |_: &mut Handle<'_>| { - let val = list.get(&k); - assert_eq!(val, Some(&v[..]), "{k:?}"); - tx.send(()).unwrap(); - }); - } - for _ in 0..n { - rx.recv_timeout(Duration::from_secs(3)).unwrap(); - } - assert_eq!(list.len(), n); -} - -#[test] -fn test_concurrent_basic_small_value() { - test_concurrent_basic(1000, 5); -} - -#[test] -fn test_concurrent_basic_big_value() { - test_concurrent_basic(100, 1048576); -} - -#[test] -fn test_one_key() { - let n = 10000; - let write_pool = yatp::Builder::new("one_key_write").build_callback_pool(); - let read_pool = yatp::Builder::new("one_key_read").build_callback_pool(); - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let key = key_with_ts("thekey", 0); - let (tx, rx) = mpsc::channel(); - list.put(&key, &new_value(0)); - for i in 0..n { - let tx = tx.clone(); - let list = list.clone(); - let key = key.clone(); - let value = new_value(i); - write_pool.spawn(move |_: &mut Handle<'_>| { - list.put(&key, &value); - tx.send("w").unwrap(); - yield_now(); - }) - } - let mark = Arc::new(AtomicBool::new(false)); - for _ in 0..n { - let tx = tx.clone(); - let list = list.clone(); - let mark = mark.clone(); - let key = key.clone(); - read_pool.spawn(move |_: &mut Handle<'_>| { - let val = list.get(&key); - if val.is_none() { - return; - } - let s = unsafe { str::from_utf8_unchecked(val.unwrap()) }; - let val: usize = s.parse().unwrap(); - assert!(val < n); - mark.store(true, Ordering::SeqCst); - tx.send("r").unwrap(); - yield_now(); - }); - } - let mut r = 0; - let mut w = 0; - for _ in 0..(n * 2) { - match rx.recv_timeout(Duration::from_secs(3)) { - Ok("w") => w += 1, - Ok("r") => r += 1, - Err(err) => panic!("timeout on receiving r{r} w{w} msg {err:?}"), - _ => panic!("unexpected value"), - } - } - assert_eq!(list.len(), 1); - assert!(mark.load(Ordering::SeqCst)); -} - -#[test] -fn test_iterator_next() { - let n = 100; - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let mut iter_ref = list.iter_ref(); - assert!(!iter_ref.valid()); - iter_ref.seek_to_first(); - assert!(!iter_ref.valid()); - for i in (0..n).rev() { - let key = key_with_ts(format!("{i:05}").as_str(), 0); - list.put(&key, &new_value(i)); - } - iter_ref.seek_to_first(); - for i in 0..n { - assert!(iter_ref.valid()); - let v = iter_ref.value(); - assert_eq!(v, &new_value(i)); - iter_ref.next(); - } - assert!(!iter_ref.valid()); -} - -#[test] -fn test_iterator_prev() { - let n = 100; - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let mut iter_ref = list.iter_ref(); - assert!(!iter_ref.valid()); - iter_ref.seek_to_last(); - assert!(!iter_ref.valid()); - for i in (0..n).rev() { - let key = key_with_ts(format!("{i:05}").as_str(), 0); - list.put(&key, &new_value(i)); - } - iter_ref.seek_to_last(); - for i in (0..n).rev() { - assert!(iter_ref.valid()); - let v = iter_ref.value(); - assert_eq!(v, &new_value(i)); - iter_ref.prev(); - } - assert!(!iter_ref.valid()); -} - -#[test] -fn test_iterator_seek() { - let n = 100; - let comp = FixedLengthSuffixComparator::new(8); - let arena = MonoIncArena::new(1 << 10); - let list = Skiplist::with_arena(comp, arena); - let mut iter_ref = list.iter_ref(); - assert!(!iter_ref.valid()); - iter_ref.seek_to_first(); - assert!(!iter_ref.valid()); - for i in (0..n).rev() { - let v = i * 10 + 1000; - let key = key_with_ts(format!("{v:05}").as_str(), 0); - list.put(&key, &new_value(v)); - } - iter_ref.seek_to_first(); - assert!(iter_ref.valid()); - assert_eq!(iter_ref.value(), b"01000" as &[u8]); - - let cases = vec![ - ("00000", Some(b"01000"), None), - ("01000", Some(b"01000"), Some(b"01000")), - ("01005", Some(b"01010"), Some(b"01000")), - ("01010", Some(b"01010"), Some(b"01010")), - ("99999", None, Some(b"01990")), - ]; - for (key, seek_expect, for_prev_expect) in cases { - let key = key_with_ts(key, 0); - iter_ref.seek(&key); - assert_eq!(iter_ref.valid(), seek_expect.is_some()); - if let Some(v) = seek_expect { - assert_eq!(iter_ref.value(), &v[..]); - } - iter_ref.seek_for_prev(&key); - assert_eq!(iter_ref.valid(), for_prev_expect.is_some()); - if let Some(v) = for_prev_expect { - assert_eq!(iter_ref.value(), &v[..]); - } - } -} diff --git a/src/components/system_stats/Cargo.toml b/src/components/system_stats/Cargo.toml deleted file mode 100644 index b4ab611210..0000000000 --- a/src/components/system_stats/Cargo.toml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "system_stats" -authors = ["HoraeDB Authors"] - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -sysinfo = { version = "0.30", default-features = false } -tokio = { workspace = true } diff --git a/src/components/system_stats/src/lib.rs b/src/components/system_stats/src/lib.rs deleted file mode 100644 index c06c2317d8..0000000000 --- a/src/components/system_stats/src/lib.rs +++ /dev/null @@ -1,157 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Helps to collect and report statistics about the system. - -use std::{sync::Mutex, time::Duration}; - -pub use sysinfo::LoadAvg; -use sysinfo::{Cpu, CpuRefreshKind, MemoryRefreshKind, RefreshKind, System}; - -/// The stats about the system. -#[derive(Debug)] -pub struct SystemStats { - /// The valid range is [0.0, total_cpu]. - pub cpu_usage: f32, - /// The total cpu - pub num_cpus: u32, - /// The memory is counted in byte. - pub used_memory: u64, - /// The memory is counted in byte. - pub total_memory: u64, - pub load_avg: LoadAvg, -} - -/// Collect the stats of the system for reporting. -/// -/// One background thread will be spawned to run stats collection. -pub struct SystemStatsCollector { - total_memory: u64, - system: Mutex, -} - -pub type ErrorMessage = String; - -impl SystemStatsCollector { - /// Create an new collector for the system stats. - pub fn try_new() -> Result { - if !sysinfo::IS_SUPPORTED_SYSTEM { - return Err("Unsupported system to collect system metrics".to_string()); - } - - let system = System::new_with_specifics(Self::make_mem_refresh_kind()); - Ok(Self { - total_memory: system.total_memory(), - system: Mutex::new(system), - }) - } - - /// Collect the system stats for `observe_dur`. - /// - /// The [`sysinfo::MINIMUM_CPU_UPDATE_INTERVAL`] will be used if - /// `observe_dur` is smaller. - pub async fn collect_and_report(&self, observe_dur: Duration) -> SystemStats { - { - let mut system = self.system.lock().unwrap(); - system.refresh_specifics(Self::make_cpu_refresh_kind()); - } - - let wait_dur = sysinfo::MINIMUM_CPU_UPDATE_INTERVAL.max(observe_dur); - tokio::time::sleep(wait_dur).await; - - let mut system = self.system.lock().unwrap(); - system.refresh_specifics(Self::make_cpu_and_mem_refresh_kind()); - - SystemStats { - cpu_usage: self.compute_cpu_usage(system.cpus()), - num_cpus: system.cpus().len() as u32, - used_memory: system.used_memory(), - total_memory: self.total_memory, - load_avg: System::load_average(), - } - } - - // Refresh and compute the latest cpu usage. - fn compute_cpu_usage(&self, cpus: &[Cpu]) -> f32 { - let mut total_cpu_usage = 0.0; - let valid_cpus = cpus.iter().filter(|v| !v.cpu_usage().is_nan()); - for cpu in valid_cpus { - total_cpu_usage += cpu.cpu_usage(); - } - - total_cpu_usage / 100.0 - } - - #[inline] - fn make_mem_refresh_kind() -> RefreshKind { - let mem_refresh_kind = MemoryRefreshKind::new().with_ram(); - RefreshKind::new().with_memory(mem_refresh_kind) - } - - #[inline] - fn make_cpu_refresh_kind() -> RefreshKind { - let cpu_refresh_kind = CpuRefreshKind::new().with_cpu_usage(); - RefreshKind::new().with_cpu(cpu_refresh_kind) - } - - #[inline] - fn make_cpu_and_mem_refresh_kind() -> RefreshKind { - let cpu_refresh_kind = CpuRefreshKind::new().with_cpu_usage(); - let mem_refresh_kind = MemoryRefreshKind::new().with_ram(); - RefreshKind::new() - .with_cpu(cpu_refresh_kind) - .with_memory(mem_refresh_kind) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn check_system_stats(stats: &SystemStats) { - assert!(stats.total_memory > 0); - assert!(stats.used_memory > 0); - assert!(stats.used_memory < stats.total_memory); - assert!(stats.cpu_usage >= 0.0); - assert!(stats.cpu_usage <= stats.num_cpus as f32); - assert!(stats.load_avg.one >= 0.0); - assert!(stats.load_avg.five >= 0.0); - assert!(stats.load_avg.fifteen >= 0.0); - } - - #[tokio::test] - async fn test_collect_system_stats() { - let collector = SystemStatsCollector::try_new().unwrap(); - let stats = collector - .collect_and_report(Duration::from_millis(500)) - .await; - check_system_stats(&stats); - - let mut all_cpu_usages = Vec::with_capacity(5); - for _ in 0..5 { - let new_stats = collector - .collect_and_report(Duration::from_millis(200)) - .await; - check_system_stats(&new_stats); - all_cpu_usages.push(new_stats); - } - - assert!(all_cpu_usages - .into_iter() - .all(|v| v.num_cpus == stats.num_cpus && v.total_memory == stats.total_memory)); - } -} diff --git a/src/components/table_kv/Cargo.toml b/src/components/table_kv/Cargo.toml deleted file mode 100644 index 6f503ad407..0000000000 --- a/src/components/table_kv/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "table_kv" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -lazy_static = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -obkv-table-client-rs = { git = "https://github.com/oceanbase/obkv-table-client-rs.git", rev = "81cee5d55a2423686dee07163f1ec60f9e28272c" } -prometheus = { workspace = true } -serde = { workspace = true } -snafu = { workspace = true } -time_ext = { workspace = true } - -[dev-dependencies] -rand = { workspace = true } diff --git a/src/components/table_kv/src/config.rs b/src/components/table_kv/src/config.rs deleted file mode 100644 index 40d57dcd2b..0000000000 --- a/src/components/table_kv/src/config.rs +++ /dev/null @@ -1,238 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Config of table kv. - -use serde::{Deserialize, Serialize}; -use time_ext::ReadableDuration; - -// TODO: use test conf to control which environments to test. -const TEST_FULL_USER_NAME: &str = "user_name"; -const TEST_URL: &str = "url"; -const TEST_PASSWORD: &str = "passwd"; -const TEST_SYS_USER_NAME: &str = ""; -const TEST_SYS_PASSWORD: &str = ""; - -/// Config of obkv. -#[derive(Clone, Serialize, Deserialize, Debug)] -#[serde(default)] -pub struct ObkvConfig { - pub full_user_name: String, - pub param_url: String, - pub password: String, - pub check_batch_result_num: bool, - pub enable_purge_recyclebin: bool, - pub max_create_table_retries: usize, - pub create_table_retry_interval: ReadableDuration, - pub client: ClientConfig, -} - -impl Default for ObkvConfig { - fn default() -> Self { - Self { - full_user_name: String::new(), - param_url: String::new(), - password: String::new(), - check_batch_result_num: true, - enable_purge_recyclebin: false, - max_create_table_retries: 2, - create_table_retry_interval: ReadableDuration::secs(5), - client: ClientConfig::default(), - } - } -} - -impl ObkvConfig { - pub fn valid(&self) -> bool { - !self.full_user_name.is_empty() && !self.param_url.is_empty() - } - - /// Create a test-only obkv config. - pub fn for_test() -> Self { - let mut config = ObkvConfig { - full_user_name: TEST_FULL_USER_NAME.to_string(), - param_url: TEST_URL.to_string(), - password: TEST_PASSWORD.to_string(), - ..Default::default() - }; - config.client.metadata_mysql_conn_pool_min_size = 1; - config.client.metadata_mysql_conn_pool_max_size = 1; - config.client.sys_user_name = TEST_SYS_USER_NAME.to_string(); - config.client.sys_password = TEST_SYS_PASSWORD.to_string(); - - config - } -} - -/// Obkv server log level. -#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Debug)] -#[serde(rename_all = "lowercase")] -#[derive(Default)] -pub enum ObLogLevel { - #[default] - None = 7, - Error = 0, - Warn = 2, - Info = 3, - Trace = 4, - Debug = 5, -} - -impl From for ObLogLevel { - fn from(level: u16) -> Self { - match level { - level if level == ObLogLevel::None as u16 => ObLogLevel::None, - level if level == ObLogLevel::Error as u16 => ObLogLevel::Error, - level if level == ObLogLevel::Warn as u16 => ObLogLevel::Warn, - level if level == ObLogLevel::Info as u16 => ObLogLevel::Info, - level if level == ObLogLevel::Trace as u16 => ObLogLevel::Trace, - level if level == ObLogLevel::Debug as u16 => ObLogLevel::Debug, - _ => ObLogLevel::None, - } - } -} - -/// Config of obkv client. -#[derive(Clone, Serialize, Deserialize, Debug)] -#[serde(default)] -pub struct ClientConfig { - pub sys_user_name: String, - pub sys_password: String, - pub metadata_refresh_interval: ReadableDuration, - pub ocp_model_cache_file: String, - pub rslist_acquire_timeout: ReadableDuration, - pub rslist_acquire_try_times: usize, - pub rslist_acquire_retry_interval: ReadableDuration, - pub table_entry_acquire_connect_timeout: ReadableDuration, - pub table_entry_acquire_read_timeout: ReadableDuration, - pub table_entry_refresh_interval_base: ReadableDuration, - pub table_entry_refresh_interval_ceiling: ReadableDuration, - pub table_entry_refresh_try_times: usize, - pub table_entry_refresh_try_interval: ReadableDuration, - pub table_entry_refresh_continuous_failure_ceiling: usize, - pub server_address_priority_timeout: ReadableDuration, - pub runtime_continuous_failure_ceiling: usize, - pub rpc_connect_timeout: ReadableDuration, - pub rpc_read_timeout: ReadableDuration, - pub rpc_operation_timeout: ReadableDuration, - pub rpc_login_timeout: ReadableDuration, - pub rpc_retry_limit: usize, - pub rpc_retry_interval: ReadableDuration, - pub refresh_workers_num: usize, - pub max_conns_per_server: usize, - pub min_idle_conns_per_server: usize, - pub conn_init_thread_num: usize, - pub metadata_mysql_conn_pool_max_size: usize, - pub metadata_mysql_conn_pool_min_size: usize, - pub table_batch_op_thread_num: usize, - pub query_concurrency_limit: Option, - pub log_level_flag: ObLogLevel, -} - -impl From for ClientConfig { - fn from(client_config: obkv::ClientConfig) -> Self { - Self { - sys_user_name: client_config.sys_user_name, - sys_password: client_config.sys_password, - metadata_refresh_interval: client_config.metadata_refresh_interval.into(), - ocp_model_cache_file: client_config.ocp_model_cache_file, - rslist_acquire_timeout: client_config.rslist_acquire_timeout.into(), - rslist_acquire_try_times: client_config.rslist_acquire_try_times, - rslist_acquire_retry_interval: client_config.rslist_acquire_retry_interval.into(), - table_entry_acquire_connect_timeout: client_config - .table_entry_acquire_connect_timeout - .into(), - table_entry_acquire_read_timeout: client_config.table_entry_acquire_read_timeout.into(), - table_entry_refresh_interval_base: client_config - .table_entry_refresh_interval_base - .into(), - table_entry_refresh_interval_ceiling: client_config - .table_entry_refresh_interval_ceiling - .into(), - table_entry_refresh_try_times: client_config.table_entry_refresh_try_times, - table_entry_refresh_try_interval: client_config.table_entry_refresh_try_interval.into(), - table_entry_refresh_continuous_failure_ceiling: client_config - .table_entry_refresh_continuous_failure_ceiling, - server_address_priority_timeout: client_config.server_address_priority_timeout.into(), - runtime_continuous_failure_ceiling: client_config.runtime_continuous_failure_ceiling, - rpc_connect_timeout: client_config.rpc_connect_timeout.into(), - rpc_read_timeout: client_config.rpc_read_timeout.into(), - rpc_operation_timeout: client_config.rpc_operation_timeout.into(), - rpc_login_timeout: client_config.rpc_login_timeout.into(), - rpc_retry_limit: client_config.rpc_retry_limit, - rpc_retry_interval: client_config.rpc_retry_interval.into(), - refresh_workers_num: client_config.refresh_workers_num, - max_conns_per_server: client_config.max_conns_per_server, - min_idle_conns_per_server: client_config.min_idle_conns_per_server, - conn_init_thread_num: client_config.conn_init_thread_num, - metadata_mysql_conn_pool_max_size: client_config.metadata_mysql_conn_pool_max_size, - metadata_mysql_conn_pool_min_size: client_config.metadata_mysql_conn_pool_min_size, - table_batch_op_thread_num: client_config.table_batch_op_thread_num, - query_concurrency_limit: client_config.query_concurrency_limit, - log_level_flag: client_config.log_level_flag.into(), - } - } -} - -impl Default for ClientConfig { - fn default() -> Self { - let client_config = obkv::ClientConfig::default(); - - Self::from(client_config) - } -} - -impl From for obkv::ClientConfig { - fn from(config: ClientConfig) -> obkv::ClientConfig { - obkv::ClientConfig { - sys_user_name: config.sys_user_name, - sys_password: config.sys_password, - metadata_refresh_interval: config.metadata_refresh_interval.into(), - ocp_model_cache_file: config.ocp_model_cache_file, - rslist_acquire_timeout: config.rslist_acquire_timeout.into(), - rslist_acquire_try_times: config.rslist_acquire_try_times, - rslist_acquire_retry_interval: config.rslist_acquire_retry_interval.into(), - table_entry_acquire_connect_timeout: config.table_entry_acquire_connect_timeout.into(), - table_entry_acquire_read_timeout: config.table_entry_acquire_read_timeout.into(), - table_entry_refresh_interval_base: config.table_entry_refresh_interval_base.into(), - table_entry_refresh_interval_ceiling: config - .table_entry_refresh_interval_ceiling - .into(), - table_entry_refresh_try_times: config.table_entry_refresh_try_times, - table_entry_refresh_try_interval: config.table_entry_refresh_try_interval.into(), - table_entry_refresh_continuous_failure_ceiling: config - .table_entry_refresh_continuous_failure_ceiling, - server_address_priority_timeout: config.server_address_priority_timeout.into(), - runtime_continuous_failure_ceiling: config.runtime_continuous_failure_ceiling, - rpc_connect_timeout: config.rpc_connect_timeout.into(), - rpc_read_timeout: config.rpc_read_timeout.into(), - rpc_operation_timeout: config.rpc_operation_timeout.into(), - rpc_login_timeout: config.rpc_login_timeout.into(), - rpc_retry_limit: config.rpc_retry_limit, - rpc_retry_interval: config.rpc_retry_interval.into(), - refresh_workers_num: config.refresh_workers_num, - max_conns_per_server: config.max_conns_per_server, - min_idle_conns_per_server: config.min_idle_conns_per_server, - conn_init_thread_num: config.conn_init_thread_num, - metadata_mysql_conn_pool_max_size: config.metadata_mysql_conn_pool_max_size, - metadata_mysql_conn_pool_min_size: config.metadata_mysql_conn_pool_min_size, - table_batch_op_thread_num: config.table_batch_op_thread_num, - query_concurrency_limit: config.query_concurrency_limit, - log_level_flag: config.log_level_flag as u16, - } - } -} diff --git a/src/components/table_kv/src/lib.rs b/src/components/table_kv/src/lib.rs deleted file mode 100644 index 8b80d56724..0000000000 --- a/src/components/table_kv/src/lib.rs +++ /dev/null @@ -1,221 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Remote key-value api based on table. - -use std::{fmt, time::Duration}; - -pub mod config; -pub mod memory; -mod metrics; -pub mod obkv; -#[cfg(test)] -mod tests; - -/// Context during write. -#[derive(Default)] -pub struct WriteContext {} - -/// Write operations. -pub trait WriteBatch: Default { - /// Create a new batch with given `capacity`. - fn with_capacity(capacity: usize) -> Self; - - /// Insert given key-value pair, write should returns error if - /// given key already exists. - fn insert(&mut self, key: &[u8], value: &[u8]); - - /// Insert given key-value pair or update it if value of given - /// key already exists. - fn insert_or_update(&mut self, key: &[u8], value: &[u8]); - - /// Delete value with given key. - fn delete(&mut self, key: &[u8]); -} - -/// Key to seek. -#[derive(Debug, Clone, PartialEq)] -pub struct SeekKey(Vec); - -impl From<&[u8]> for SeekKey { - fn from(key: &[u8]) -> Self { - Self(key.to_vec()) - } -} - -impl SeekKey { - pub fn to_bytes(&self) -> Vec { - self.0.clone() - } - - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } -} - -/// Boundary of key to seek. -#[derive(Debug, Clone, PartialEq)] -pub enum KeyBoundary { - /// Included key boudary. - Included(SeekKey), - /// Excluded key boudary. - Excluded(SeekKey), - /// Include min key, only used as start key boundary. - MinIncluded, - /// Include max key, only used as end key boundary. - MaxIncluded, -} - -impl KeyBoundary { - pub fn included(key: &[u8]) -> Self { - Self::Included(key.into()) - } - - pub fn excluded(key: &[u8]) -> Self { - Self::Excluded(key.into()) - } - - pub fn min_included() -> Self { - Self::MinIncluded - } - - pub fn max_included() -> Self { - Self::MaxIncluded - } -} - -/// Context during scan. -#[derive(Debug, Clone)] -pub struct ScanContext { - /// Timeout for a single scan operation of the scan iteator. Note that the - /// scan iterator continuouslly send scan request to remote server to - /// fetch data of next key range, and this timeout is applied to every - /// send request, instead of the whole iteration. So user can hold this - /// iterator more longer than the `timeout`. - pub timeout: Duration, - /// Batch size of a single scan operation. - pub batch_size: i32, -} - -impl ScanContext { - /// Default scan batch size. - pub const DEFAULT_BATCH_SIZE: i32 = 100; -} - -impl Default for ScanContext { - fn default() -> Self { - Self { - timeout: Duration::from_secs(5), - batch_size: Self::DEFAULT_BATCH_SIZE, - } - } -} - -/// Scan request. -#[derive(Debug, Clone, PartialEq)] -pub struct ScanRequest { - /// Start bound. - pub start: KeyBoundary, - /// End bound. - pub end: KeyBoundary, - /// Scan in reverse order if `reverse` is set to true. - pub reverse: bool, -} - -/// Iterator to the scan result. -pub trait ScanIter: fmt::Debug { - type Error: std::error::Error + Send + Sync + 'static; - - /// Returns true if the iterator is valid. - fn valid(&self) -> bool; - - /// Advance the iterator. - /// - /// Panic if the iterator is invalid. - fn next(&mut self) -> Result; - - /// Get current key. - /// - /// Panic if iter is invalid. - fn key(&self) -> &[u8]; - - /// Get current value. - /// - /// Panic if iter is invalid. - fn value(&self) -> &[u8]; -} - -/// Error of TableKv. -pub trait TableError: std::error::Error { - /// Is it primary key duplicate error. - fn is_primary_key_duplicate(&self) -> bool; -} - -/// Kv service provided by a relational database. -pub trait TableKv: Clone + Send + Sync + fmt::Debug + 'static { - type Error: TableError + Send + Sync + 'static; - type WriteBatch: WriteBatch + Send; - type ScanIter: ScanIter + Send; - - /// Returns true if table with `table_name` already exists. - fn table_exists(&self, table_name: &str) -> Result; - - /// Create table with given `table_name` if it is not exist. - fn create_table(&self, table_name: &str) -> Result<(), Self::Error>; - - /// Drop table with given `table_name`. - fn drop_table(&self, table_name: &str) -> Result<(), Self::Error>; - - /// Truncate table with given `table_name`. - fn truncate_table(&self, table_name: &str) -> Result<(), Self::Error>; - - /// Write data in `write_batch` to table with `table_name`. - fn write( - &self, - ctx: WriteContext, - table_name: &str, - write_batch: Self::WriteBatch, - ) -> Result<(), Self::Error>; - - /// Scan data in given `table_name`, returns a [ScanIter]. - fn scan( - &self, - ctx: ScanContext, - table_name: &str, - request: ScanRequest, - ) -> Result; - - /// Get value by key from table with `table_name`. - fn get(&self, table_name: &str, key: &[u8]) -> Result>, Self::Error>; - - /// Get a batch of value by keys from table with `table_name` - fn get_batch( - &self, - table_name: &str, - keys: Vec<&[u8]>, - ) -> Result>>, Self::Error>; - - /// Delete data by key from table with `table_name`. - fn delete(&self, table_name: &str, key: &[u8]) -> Result<(), Self::Error>; - - /// Delete a batch of data by key list from table with `table_name`. - fn delete_batch( - &self, - table_name: &str, - keys: Vec>, - ) -> std::result::Result<(), Self::Error>; -} diff --git a/src/components/table_kv/src/memory.rs b/src/components/table_kv/src/memory.rs deleted file mode 100644 index 47d9109f25..0000000000 --- a/src/components/table_kv/src/memory.rs +++ /dev/null @@ -1,355 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Memory table kv, mainly for test. - -use std::{ - collections::{BTreeMap, HashMap}, - ops::Bound, - sync::{Arc, Mutex}, -}; - -use macros::define_result; -use snafu::{Backtrace, OptionExt, Snafu}; - -use crate::{ - KeyBoundary, ScanContext, ScanIter, ScanRequest, TableError, TableKv, WriteBatch, WriteContext, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Table not found, table:{}.\nBacktrace:\n{}", table_name, backtrace))] - TableNotFound { - table_name: String, - backtrace: Backtrace, - }, - - #[snafu(display("Primary key duplicate.\nBacktrace:\n{}", backtrace))] - PrimaryKeyDuplicate { backtrace: Backtrace }, -} - -define_result!(Error); - -impl TableError for Error { - fn is_primary_key_duplicate(&self) -> bool { - matches!(self, Self::PrimaryKeyDuplicate { .. }) - } -} - -enum WriteOp { - /// Insert (key, value). - Insert(Vec, Vec), - /// Insert or update (key, value). - InsertOrUpdate(Vec, Vec), - /// Delete key. - Delete(Vec), -} - -#[derive(Default)] -pub struct MemoryWriteBatch(Vec); - -impl WriteBatch for MemoryWriteBatch { - fn with_capacity(capacity: usize) -> Self { - Self(Vec::with_capacity(capacity)) - } - - fn insert(&mut self, key: &[u8], value: &[u8]) { - let op = WriteOp::Insert(key.to_vec(), value.to_vec()); - - self.0.push(op); - } - - fn insert_or_update(&mut self, key: &[u8], value: &[u8]) { - let op = WriteOp::InsertOrUpdate(key.to_vec(), value.to_vec()); - - self.0.push(op); - } - - fn delete(&mut self, key: &[u8]) { - let op = WriteOp::Delete(key.to_vec()); - - self.0.push(op); - } -} - -type KeyValue = (Vec, Vec); - -#[derive(Debug)] -pub struct MemoryScanIter { - /// All key values from the iterator. - key_values: Vec, - /// Current key/value offset. - offset: usize, -} - -impl MemoryScanIter { - fn new(key_values: Vec) -> Self { - Self { - key_values, - offset: 0, - } - } -} - -impl ScanIter for MemoryScanIter { - type Error = Error; - - fn valid(&self) -> bool { - self.offset < self.key_values.len() - } - - fn next(&mut self) -> Result { - assert!(self.valid()); - self.offset += 1; - Ok(self.valid()) - } - - fn key(&self) -> &[u8] { - assert!(self.valid()); - &self.key_values[self.offset].0 - } - - fn value(&self) -> &[u8] { - assert!(self.valid()); - &self.key_values[self.offset].1 - } -} - -type BytesMap = BTreeMap, Vec>; - -#[derive(Default, Debug)] -struct Table { - data: Mutex, -} - -impl Table { - fn truncate(&self) { - self.data.lock().unwrap().clear(); - } - - fn insert(&self, key: Vec, value: Vec) -> Result<()> { - let mut data = self.data.lock().unwrap(); - if data.contains_key(&key) { - return PrimaryKeyDuplicate.fail(); - } - - data.insert(key, value); - - Ok(()) - } - - fn insert_or_update(&self, key: Vec, value: Vec) -> Result<()> { - let mut data = self.data.lock().unwrap(); - data.insert(key, value); - - Ok(()) - } - - fn delete(&self, key: &[u8]) -> Result<()> { - let mut data = self.data.lock().unwrap(); - data.remove(key); - - Ok(()) - } - - fn scan(&self, request: ScanRequest) -> Vec { - let range = match to_range(request.start, request.end) { - Some(v) => v, - None => return Vec::new(), - }; - - let data = self.data.lock().unwrap(); - if request.reverse { - data.range(range) - .rev() - .map(|(k, v)| (k.to_vec(), v.to_vec())) - .collect() - } else { - data.range(range) - .map(|(k, v)| (k.to_vec(), v.to_vec())) - .collect() - } - } - - fn get(&self, key: &[u8]) -> Option> { - let data = self.data.lock().unwrap(); - data.get(key).cloned() - } -} - -type TableRef = Arc; -type TableMap = HashMap; - -#[derive(Clone, Default, Debug)] -pub struct MemoryImpl { - tables: Arc>, -} - -impl MemoryImpl { - fn find_table(&self, table_name: &str) -> Option { - self.tables.lock().unwrap().get(table_name).cloned() - } -} - -impl TableKv for MemoryImpl { - type Error = Error; - type ScanIter = MemoryScanIter; - type WriteBatch = MemoryWriteBatch; - - fn table_exists(&self, table_name: &str) -> Result { - let exists = self.tables.lock().unwrap().contains_key(table_name); - Ok(exists) - } - - fn create_table(&self, table_name: &str) -> Result<()> { - let mut tables = self.tables.lock().unwrap(); - if tables.contains_key(table_name) { - return Ok(()); - } - - let table = Arc::new(Table::default()); - tables.insert(table_name.to_string(), table); - - Ok(()) - } - - fn drop_table(&self, table_name: &str) -> Result<()> { - let mut tables = self.tables.lock().unwrap(); - tables.remove(table_name); - - Ok(()) - } - - fn truncate_table(&self, table_name: &str) -> Result<()> { - let table_opt = self.find_table(table_name); - if let Some(table) = table_opt { - table.truncate(); - } - - Ok(()) - } - - fn write( - &self, - _ctx: WriteContext, - table_name: &str, - write_batch: MemoryWriteBatch, - ) -> Result<()> { - let table = self - .find_table(table_name) - .context(TableNotFound { table_name })?; - - for op in write_batch.0 { - match op { - WriteOp::Insert(k, v) => { - table.insert(k, v)?; - } - WriteOp::InsertOrUpdate(k, v) => { - table.insert_or_update(k, v)?; - } - WriteOp::Delete(k) => { - table.delete(&k)?; - } - } - } - - Ok(()) - } - - fn scan( - &self, - _ctx: ScanContext, - table_name: &str, - request: ScanRequest, - ) -> Result { - let table = self - .find_table(table_name) - .context(TableNotFound { table_name })?; - - let key_values = table.scan(request); - - Ok(MemoryScanIter::new(key_values)) - } - - fn get(&self, table_name: &str, key: &[u8]) -> Result>> { - let table = self - .find_table(table_name) - .context(TableNotFound { table_name })?; - - Ok(table.get(key)) - } - - fn get_batch( - &self, - table_name: &str, - keys: Vec<&[u8]>, - ) -> std::result::Result>>, Self::Error> { - let table = self - .find_table(table_name) - .context(TableNotFound { table_name })?; - - let mut result = Vec::with_capacity(keys.len()); - for key in keys { - result.push(table.get(key)); - } - - Ok(result) - } - - fn delete(&self, table_name: &str, key: &[u8]) -> std::result::Result<(), Self::Error> { - let table = self - .find_table(table_name) - .context(TableNotFound { table_name })?; - table.delete(key)?; - Ok(()) - } - - fn delete_batch( - &self, - table_name: &str, - keys: Vec>, - ) -> std::result::Result<(), Self::Error> { - let table = self - .find_table(table_name) - .context(TableNotFound { table_name })?; - for key in keys { - table.delete(&key)?; - } - - Ok(()) - } -} - -type Range = (Bound>, Bound>); - -fn to_range(start: KeyBoundary, end: KeyBoundary) -> Option { - let start_bound = match start { - KeyBoundary::Included(k) => Bound::Included(k.to_bytes()), - KeyBoundary::Excluded(k) => Bound::Excluded(k.to_bytes()), - KeyBoundary::MinIncluded => Bound::Unbounded, - KeyBoundary::MaxIncluded => return None, - }; - - let end_bound = match end { - KeyBoundary::Included(k) => Bound::Included(k.to_bytes()), - KeyBoundary::Excluded(k) => Bound::Excluded(k.to_bytes()), - KeyBoundary::MinIncluded => return None, - KeyBoundary::MaxIncluded => Bound::Unbounded, - }; - - Some((start_bound, end_bound)) -} diff --git a/src/components/table_kv/src/metrics.rs b/src/components/table_kv/src/metrics.rs deleted file mode 100644 index 17132c52bd..0000000000 --- a/src/components/table_kv/src/metrics.rs +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use lazy_static::lazy_static; -use prometheus::{exponential_buckets, register_histogram_vec, HistogramVec}; - -lazy_static! { - // Buckets: 0.001, .., 0.001 * 2^15 = 32.7s - pub static ref OBKV_OP_DURATION_HISTOGRAM: HistogramVec = register_histogram_vec!( - "obkv_op_duration", - "Histogram for duration of different obkv operations", - &["type"], - exponential_buckets(0.001, 2.0, 15).unwrap() - ) - .unwrap(); -} diff --git a/src/components/table_kv/src/obkv.rs b/src/components/table_kv/src/obkv.rs deleted file mode 100644 index 30a534f245..0000000000 --- a/src/components/table_kv/src/obkv.rs +++ /dev/null @@ -1,853 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Obkv implementation. - -use std::{collections::HashMap, error::Error as StdError, fmt}; - -use logger::{error, info}; -use macros::define_result; -use obkv::{ - payloads::ObTableBatchOperation, Builder, ObTableClient, QueryResultSet, RunningMode, Table, - TableOpResult, TableQuery, Value, -}; -use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - config::ObkvConfig, metrics::OBKV_OP_DURATION_HISTOGRAM, KeyBoundary, ScanContext, ScanIter, - ScanRequest, SeekKey, TableError, TableKv, WriteBatch, WriteContext, -}; - -#[cfg(test)] -mod tests; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Invalid obkv config.\nBacktrace:\n{}", backtrace))] - InvalidConfig { backtrace: Backtrace }, - - #[snafu(display( - "Failed to build obkv client, user:{}, err:{}.\nBacktrace:\n{}", - full_user_name, - source, - backtrace - ))] - BuildClient { - full_user_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to init obkv client, user:{}, err:{}.\nBacktrace:\n{}", - full_user_name, - source, - backtrace - ))] - InitClient { - full_user_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to execute sql, err:{}, sql:{}.\nBacktrace:\n{}", - source, - sql, - backtrace - ))] - ExecuteSql { - sql: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to check table existence, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - CheckTable { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Table not created, table:{}.\nBacktrace:\n{}", table_name, backtrace))] - TableNotCreated { - table_name: String, - backtrace: Backtrace, - }, - - #[snafu(display("Table not dropped, table:{}.\nBacktrace:\n{}", table_name, backtrace))] - TableNotDropped { - table_name: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to write to table, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - WriteTable { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to iter result set, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - IterResultSet { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Column not found in result, table:{}, column:{}.\nBacktrace:\n{}", - table_name, - column_name, - backtrace - ))] - MissColumn { - table_name: String, - column_name: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to execute query, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - ExecuteQuery { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to truncate table, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - TruncateTable { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to get value from table, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - GetValue { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to get batch value from table, table:{table_name}, err:{source}.\nBacktrace:\n{backtrace}" - ))] - GetBatchValue { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Unexpected batch result found, table:{table_name}.\nBacktrace:\n{backtrace}" - ))] - UnexpectedBatchResult { - table_name: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to delete data from table, table:{}, err:{}.\nBacktrace:\n{}", - table_name, - source, - backtrace - ))] - DeleteData { - table_name: String, - source: obkv::error::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid result num, table:{}, expect:{}, actual:{}.\nBacktrace:\n{}", - table_name, - expect, - actual, - backtrace - ))] - UnexpectedResultNum { - table_name: String, - expect: usize, - actual: usize, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -impl Error { - fn obkv_result_code(&self) -> Option { - if let Some(obkv::error::Error::Common(obkv::error::CommonErrCode::ObException(code), _)) = - self.source() - .and_then(|s| s.downcast_ref::()) - { - Some(*code) - } else { - None - } - } -} - -impl TableError for Error { - fn is_primary_key_duplicate(&self) -> bool { - self.obkv_result_code().map_or(false, |code| { - code == obkv::ResultCodes::OB_ERR_PRIMARY_KEY_DUPLICATE - }) - } -} - -const KEY_COLUMN_NAME: &str = "k"; -const VALUE_COLUMN_NAME: &str = "v"; -const KEY_COLUMN_LEN: usize = 2048; -const VALUE_COLUMN_TYPE: &str = "LONGBLOB"; - -#[inline] -fn bytes_to_values(bs: &[u8]) -> Vec { - vec![Value::from(bs)] -} - -/// Batch operations to write to obkv. -pub struct ObkvWriteBatch { - batch_op: ObTableBatchOperation, - op_num: usize, -} - -impl WriteBatch for ObkvWriteBatch { - fn with_capacity(capacity: usize) -> Self { - Self { - batch_op: ObTableBatchOperation::with_ops_num_raw(capacity), - op_num: 0, - } - } - - fn insert(&mut self, key: &[u8], value: &[u8]) { - self.batch_op.insert( - bytes_to_values(key), - vec![VALUE_COLUMN_NAME.to_string()], - bytes_to_values(value), - ); - self.op_num += 1; - } - - fn insert_or_update(&mut self, key: &[u8], value: &[u8]) { - self.batch_op.insert_or_update( - bytes_to_values(key), - vec![VALUE_COLUMN_NAME.to_string()], - bytes_to_values(value), - ); - self.op_num += 1; - } - - fn delete(&mut self, key: &[u8]) { - self.batch_op.delete(bytes_to_values(key)); - self.op_num += 1; - } -} - -impl Default for ObkvWriteBatch { - fn default() -> ObkvWriteBatch { - Self { - batch_op: ObTableBatchOperation::raw(), - op_num: 0, - } - } -} - -impl From<&SeekKey> for Value { - fn from(key: &SeekKey) -> Value { - Value::from(&key.0) - } -} - -// Returns (key, equals). -fn to_scan_range(bound: &KeyBoundary) -> (Vec, bool) { - match bound { - KeyBoundary::Included(v) => (vec![v.into()], true), - KeyBoundary::Excluded(v) => (vec![v.into()], false), - KeyBoundary::MinIncluded => (vec![Value::get_min()], true), - KeyBoundary::MaxIncluded => (vec![Value::get_max()], true), - } -} - -/// Table kv implementation based on obkv. -#[derive(Clone)] -pub struct ObkvImpl { - client: ObTableClient, - - // The following are configs, if there are too many configs, maybe we should put them - // on heap to avoid the `ObkvImpl` struct allocating too much stack size. - enable_purge_recyclebin: bool, - check_batch_result_num: bool, - max_create_table_retries: usize, -} - -impl fmt::Debug for ObkvImpl { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ObkvImpl") - .field("client", &"ObTableClient".to_string()) - .field("enable_purge_recyclebin", &self.enable_purge_recyclebin) - .field("check_batch_result_num", &self.check_batch_result_num) - .field("max_create_table_retries", &self.max_create_table_retries) - .finish() - } -} - -impl ObkvImpl { - /// Create a new obkv client instance with given `config`. - pub fn new(config: ObkvConfig) -> Result { - ensure!(config.valid(), InvalidConfig); - - info!( - "Try to create obkv client, param_url:{}, full_user_name:{}", - config.param_url, config.full_user_name - ); - - let client = Builder::new() - .full_user_name(&config.full_user_name) - .param_url(&config.param_url) - .running_mode(RunningMode::Normal) - .password(&config.password) - .config(config.client.clone().into()) - .build() - .context(BuildClient { - full_user_name: &config.full_user_name, - })?; - - client.init().context(InitClient { - full_user_name: &config.full_user_name, - })?; - - info!( - "Obkv client created, param_url:{}, full_user_name:{}", - config.param_url, config.full_user_name - ); - - Ok(Self { - client, - enable_purge_recyclebin: config.enable_purge_recyclebin, - check_batch_result_num: config.check_batch_result_num, - max_create_table_retries: config.max_create_table_retries, - }) - } - - fn try_create_kv_table(&self, table_name: &str) -> Result<()> { - let create_sql = format_create_table_sql(table_name); - - info!( - "Try to create table, table_name:{}, sql:{}", - table_name, create_sql - ); - - self.client - .execute_sql(&create_sql) - .context(ExecuteSql { sql: &create_sql })?; - - // Table is not exist after created. - let created = self - .client - .check_table_exists(table_name) - .context(CheckTable { table_name })?; - - ensure!(created, TableNotCreated { table_name }); - - info!("Obkv table created, table_name:{}", table_name); - - Ok(()) - } - - fn try_drop_kv_table(&self, table_name: &str) -> Result<()> { - let drop_sql = format_drop_table_sql(table_name, self.enable_purge_recyclebin); - - info!( - "Try to drop table, table_name:{}, sql:{}", - table_name, drop_sql - ); - - let exists = self - .client - .check_table_exists(table_name) - .context(CheckTable { table_name })?; - // TODO: currently `check_table_exists` will return false when pick active - // server failed, which is to say, we may leak table in this case. - if !exists { - info!("Drop non-exist table, table_name:{}", table_name); - return Ok(()); - } - - self.client - .execute_sql(&drop_sql) - .context(ExecuteSql { sql: &drop_sql })?; - - let exists = self - .client - .check_table_exists(table_name) - .context(CheckTable { table_name })?; - - ensure!(!exists, TableNotDropped { table_name }); - info!("Drop table success, table_name:{}", table_name); - - Ok(()) - } - - fn check_write_batch_op_results( - &self, - table_name: &str, - results: &[TableOpResult], - expect_num: usize, - ) -> Result<()> { - ensure!( - !self.check_batch_result_num || results.len() == expect_num, - UnexpectedResultNum { - table_name, - expect: expect_num, - actual: results.len(), - } - ); - - Ok(()) - } -} - -impl TableKv for ObkvImpl { - type Error = Error; - type ScanIter = ObkvScanIter; - type WriteBatch = ObkvWriteBatch; - - fn table_exists(&self, table_name: &str) -> Result { - self.client - .check_table_exists(table_name) - .context(CheckTable { table_name }) - } - - fn create_table(&self, table_name: &str) -> Result<()> { - let mut retry = 0; - loop { - match self.try_create_kv_table(table_name) { - Ok(()) => { - info!( - "Obkv table created, table_name:{}, retry:{}", - table_name, retry - ); - - return Ok(()); - } - Err(e) => { - error!( - "Failed to create table, table_name:{}, retry:{}, err:{}", - table_name, retry, e - ); - - retry += 1; - if retry > self.max_create_table_retries { - return Err(e); - } - } - } - } - } - - fn drop_table(&self, table_name: &str) -> Result<()> { - // Drop table won't retry on failure now. - self.try_drop_kv_table(table_name).map_err(|e| { - error!("Failed to drop table, table_name:{}, err:{}", table_name, e); - e - }) - } - - fn truncate_table(&self, table_name: &str) -> Result<()> { - info!("Try to truncate table, table_name:{}", table_name); - - self.client - .truncate_table(table_name) - .context(TruncateTable { table_name }) - } - - fn write( - &self, - _ctx: WriteContext, - table_name: &str, - write_batch: ObkvWriteBatch, - ) -> Result<()> { - let _timer = OBKV_OP_DURATION_HISTOGRAM - .with_label_values(&["write"]) - .start_timer(); - - let results = self - .client - .execute_batch(table_name, write_batch.batch_op) - .context(WriteTable { table_name })?; - - self.check_write_batch_op_results(table_name, &results, write_batch.op_num)?; - - Ok(()) - } - - fn scan( - &self, - ctx: ScanContext, - table_name: &str, - request: ScanRequest, - ) -> Result { - let iter = ObkvScanIter::new(self.client.clone(), ctx, table_name.to_string(), request)?; - - Ok(iter) - } - - fn get(&self, table_name: &str, key: &[u8]) -> Result>> { - let _timer = OBKV_OP_DURATION_HISTOGRAM - .with_label_values(&["get"]) - .start_timer(); - - let mut values = self - .client - .get( - table_name, - bytes_to_values(key), - vec![VALUE_COLUMN_NAME.to_string()], - ) - .context(GetValue { table_name })?; - - Ok(values.remove(VALUE_COLUMN_NAME).map(Value::as_bytes)) - } - - fn get_batch(&self, table_name: &str, keys: Vec<&[u8]>) -> Result>>> { - let _timer = OBKV_OP_DURATION_HISTOGRAM - .with_label_values(&["get_batch"]) - .start_timer(); - - let mut batch_ops = ObTableBatchOperation::with_ops_num_raw(keys.len()); - let mut batch_res = Vec::with_capacity(keys.len()); - - for key in keys { - batch_ops.get(bytes_to_values(key), vec![VALUE_COLUMN_NAME.to_string()]); - } - - let result = self - .client - .execute_batch(table_name, batch_ops) - .context(GetBatchValue { table_name })?; - - for table_ops_result in result { - match table_ops_result { - TableOpResult::RetrieveRows(mut values) => { - batch_res.push(values.remove(VALUE_COLUMN_NAME).map(Value::as_bytes)) - } - TableOpResult::AffectedRows(_) => UnexpectedBatchResult { table_name }.fail()?, - } - } - Ok(batch_res) - } - - fn delete(&self, table_name: &str, key: &[u8]) -> std::result::Result<(), Self::Error> { - let _timer = OBKV_OP_DURATION_HISTOGRAM - .with_label_values(&["delete"]) - .start_timer(); - - self.client - .delete(table_name, bytes_to_values(key)) - .context(DeleteData { table_name })?; - - Ok(()) - } - - fn delete_batch( - &self, - table_name: &str, - keys: Vec>, - ) -> std::result::Result<(), Self::Error> { - let _timer = OBKV_OP_DURATION_HISTOGRAM - .with_label_values(&["delete_batch"]) - .start_timer(); - - let mut batch_ops = ObTableBatchOperation::with_ops_num_raw(keys.len()); - for key in keys { - batch_ops.delete(bytes_to_values(&key)); - } - - self.client - .execute_batch(table_name, batch_ops) - .context(WriteTable { table_name })?; - - Ok(()) - } -} - -pub struct ObkvScanIter { - client: ObTableClient, - ctx: ScanContext, - table_name: String, - request: ScanRequest, - - /// Current result set. - result_set: Option, - current_key: Vec, - current_value: Vec, - result_set_fetched_num: i32, - /// The iterator has been exhausted. - eof: bool, -} - -impl fmt::Debug for ObkvScanIter { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ObkvScanIter") - .field("client", &"ObTableClient".to_string()) - .field("ctx", &self.ctx) - .field("table_name", &self.table_name) - .field("request", &self.request) - .field("result_set", &self.result_set) - .field("current_key", &self.current_key) - .field("current_value", &self.current_value) - .field("result_set_fetched_num", &self.result_set_fetched_num) - .field("eof", &self.eof) - .finish() - } -} - -impl ObkvScanIter { - /// Create a new iterator for scan. - fn new( - client: ObTableClient, - ctx: ScanContext, - table_name: String, - request: ScanRequest, - ) -> Result { - assert!(ctx.batch_size > 0); - - let mut iter = ObkvScanIter { - client, - ctx, - table_name, - request, - - result_set: None, - current_key: Vec::new(), - current_value: Vec::new(), - result_set_fetched_num: 0, - eof: false, - }; - - iter.init()?; - - Ok(iter) - } - - /// Init the iterator. - fn init(&mut self) -> Result<()> { - let start = to_scan_range(&self.request.start); - let end = to_scan_range(&self.request.end); - let query = self - .new_query() - .add_scan_range(start.0, start.1, end.0, end.1); - - let result_set = query.execute().context(ExecuteQuery { - table_name: &self.table_name, - })?; - self.result_set = Some(result_set); - - if !self.step_result_set()? { - // Nothing fetched from first result set, mark eof to true. - self.eof = true; - } - - Ok(()) - } - - /// Fetch next key-value pair from the `result_set`, store them to - /// `current_key`, `current_value`, then return Ok(true). If no entry - /// was fetched, reset `result_set` to None and returns Ok(false). - fn step_result_set(&mut self) -> Result { - if let Some(result_set) = &mut self.result_set { - if let Some(row) = result_set.next() { - self.result_set_fetched_num += 1; - - let row = row.context(IterResultSet { - table_name: &self.table_name, - })?; - - let (key, value) = row_to_key_value(&self.table_name, row)?; - self.current_key = key; - self.current_value = value; - - return Ok(true); - } else { - self.result_set = None; - } - } - - Ok(false) - } - - /// Scan next key range, init `result_set` and store first key-value from - /// `result_set` to `current_key`, `current_value`. If next key range - /// has no data, set `eof` to true. - fn scan_next_key_range(&mut self) -> Result<()> { - assert!(self.result_set.is_none()); - - if self.result_set_fetched_num < self.ctx.batch_size { - // We have reached eof. - self.eof = true; - return Ok(()); - } - - let current_key = bytes_to_values(&self.current_key); - let result_set = if self.request.reverse { - let start = to_scan_range(&self.request.start); - self.new_query() - .add_scan_range(start.0, start.1, current_key, false) - .execute() - .context(ExecuteQuery { - table_name: &self.table_name, - })? - } else { - let end = to_scan_range(&self.request.end); - self.new_query() - .add_scan_range(current_key, false, end.0, end.1) - .execute() - .context(ExecuteQuery { - table_name: &self.table_name, - })? - }; - self.result_set = Some(result_set); - self.result_set_fetched_num = 0; - - if !self.step_result_set()? { - // No data in result set of next key range. - self.eof = true; - } - - Ok(()) - } - - fn new_query(&self) -> impl TableQuery { - self.client - .query(&self.table_name) - .batch_size(self.ctx.batch_size) - // NOTE: keep the limit same as the batch size so as to avoid stream query session kept - // on ObServer. - .limit(None, self.ctx.batch_size) - .primary_index() - .select(vec![ - KEY_COLUMN_NAME.to_string(), - VALUE_COLUMN_NAME.to_string(), - ]) - .operation_timeout(self.ctx.timeout) - // Scan order takes `forward` as input, reverse means NOT forward. - .scan_order(!self.request.reverse) - } -} - -impl ScanIter for ObkvScanIter { - type Error = Error; - - fn valid(&self) -> bool { - !self.eof - } - - fn next(&mut self) -> Result { - assert!(self.valid()); - - let _timer = OBKV_OP_DURATION_HISTOGRAM - .with_label_values(&["scan_next"]) - .start_timer(); - - // Try to fetch next key-value from current result set. - if self.step_result_set()? { - return Ok(true); - } - - // Need to scan next key range. - self.scan_next_key_range()?; - - Ok(self.valid()) - } - - fn key(&self) -> &[u8] { - assert!(self.valid()); - - &self.current_key - } - - fn value(&self) -> &[u8] { - assert!(self.valid()); - - &self.current_value - } -} - -fn row_to_key_value( - table_name: &str, - mut row: HashMap, -) -> Result<(Vec, Vec)> { - let key = row.remove(KEY_COLUMN_NAME).context(MissColumn { - table_name, - column_name: KEY_COLUMN_NAME, - })?; - - let value = row.remove(VALUE_COLUMN_NAME).context(MissColumn { - table_name, - column_name: VALUE_COLUMN_NAME, - })?; - - Ok((key.as_bytes(), value.as_bytes())) -} - -fn format_create_table_sql(table_name: &str) -> String { - format!( - "CREATE TABLE IF NOT EXISTS {table_name}( - {KEY_COLUMN_NAME} VARBINARY({KEY_COLUMN_LEN}), - {VALUE_COLUMN_NAME} {VALUE_COLUMN_TYPE} NOT NULL, - PRIMARY KEY({KEY_COLUMN_NAME}) - );" - ) -} - -fn format_drop_table_sql(table_name: &str, purge_recyclebin: bool) -> String { - if purge_recyclebin { - format!("DROP TABLE IF EXISTS {table_name}; PURGE RECYCLEBIN;") - } else { - format!("DROP TABLE IF EXISTS {table_name};") - } -} diff --git a/src/components/table_kv/src/obkv/tests.rs b/src/components/table_kv/src/obkv/tests.rs deleted file mode 100644 index 96b88bf9f2..0000000000 --- a/src/components/table_kv/src/obkv/tests.rs +++ /dev/null @@ -1,553 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Tests. - -use std::{collections::HashSet, time::Duration}; - -use logger::{error, info}; -use rand::prelude::*; - -use super::*; - -const MAX_TABLE_ID: u32 = 30; - -struct ObkvTester { - obkv: ObkvImpl, - tables: HashSet, -} - -impl ObkvTester { - fn new_config() -> ObkvConfig { - ObkvConfig::for_test() - } - - fn create_table(&mut self, table_name: &str) { - self.obkv.create_table(table_name).unwrap(); - - self.tables.insert(table_name.to_string()); - } - - fn insert_batch(&self, table_name: &str, pairs: &[(&[u8], &[u8])]) { - self.try_insert_batch(table_name, pairs).unwrap(); - } - - fn try_insert_batch(&self, table_name: &str, pairs: &[(&[u8], &[u8])]) -> Result<()> { - let mut batch = ObkvWriteBatch::with_capacity(pairs.len()); - for pair in pairs { - batch.insert(pair.0, pair.1); - } - - self.obkv.write(WriteContext::default(), table_name, batch) - } - - fn insert_or_update_batch(&self, table_name: &str, pairs: &[(&[u8], &[u8])]) { - let mut batch = ObkvWriteBatch::with_capacity(pairs.len()); - for pair in pairs { - batch.insert_or_update(pair.0, pair.1); - } - - self.obkv - .write(WriteContext::default(), table_name, batch) - .unwrap(); - } - - fn delete_batch(&self, table_name: &str, keys: &[&[u8]]) { - let mut batch = ObkvWriteBatch::with_capacity(keys.len()); - for key in keys { - batch.delete(key); - } - - self.obkv - .write(WriteContext::default(), table_name, batch) - .unwrap(); - } - - fn scan( - &self, - ctx: ScanContext, - table_name: &str, - scan_req: ScanRequest, - ) -> Vec<(Vec, Vec)> { - let mut iter = self.obkv.scan(ctx, table_name, scan_req).unwrap(); - - let mut pairs = Vec::new(); - while iter.valid() { - let pair = (iter.key().to_vec(), iter.value().to_vec()); - pairs.push(pair); - - iter.next().unwrap(); - } - - pairs - } - - fn get(&self, table_name: &str, key: &[u8]) -> Option> { - self.obkv.get(table_name, key).unwrap() - } - - fn truncate_table(&self, table_name: &str) { - info!("truncate table, table_name:{}", table_name); - - self.obkv.truncate_table(table_name).unwrap(); - - // It seems that truncate of obkv isn't taking effect immediately. - std::thread::sleep(Duration::from_secs(1)); - } -} - -impl Default for ObkvTester { - fn default() -> Self { - let config = Self::new_config(); - - let obkv = ObkvImpl::new(config).unwrap(); - - Self { - obkv, - tables: HashSet::new(), - } - } -} - -impl Drop for ObkvTester { - fn drop(&mut self) { - for table in &self.tables { - info!("Obkv tester truncate table, table_name:{}", table); - - if let Err(e) = self.obkv.truncate_table(table) { - error!( - "Obkv tester failed to truncate table, table_name:{}, err:{}", - table, e - ); - } - - if let Err(e) = self.obkv.drop_table(table) { - error!( - "Obkv tester failed to drop table, table_name:{}, err:{}", - table, e - ); - } - } - } -} - -fn random_table_name(prefix: &str) -> String { - let mut rng = thread_rng(); - let v: u32 = rng.gen_range(0..MAX_TABLE_ID); - - format!("{prefix}_{v}") -} - -fn new_scan_ctx(batch_size: i32) -> ScanContext { - ScanContext { - batch_size, - ..Default::default() - } -} - -fn all_scan_ctxs() -> Vec { - vec![ - new_scan_ctx(1), - new_scan_ctx(10), - new_scan_ctx(50), - ScanContext::default(), - ] -} - -fn check_scan_result(expect: &[(&[u8], &[u8])], result: &[(Vec, Vec)]) { - assert_eq!(expect.len(), result.len()); - - for (pair1, pair2) in expect.iter().zip(result) { - assert_eq!(pair1.0, pair2.0); - assert_eq!(pair1.1, pair2.1); - } -} - -#[test] -#[ignore] -fn test_obkv() { - let mut tester = ObkvTester::default(); - - let table_name = random_table_name("horaedb"); - tester.create_table(&table_name); - tester.truncate_table(&table_name); - - info!("test obkv, table_name:{}", table_name); - - test_simple_write_read(&tester, &table_name); - - test_update(&tester, &table_name); - - test_insert_duplicate(&tester, &table_name); - - test_partial_scan(&tester, &table_name); - - test_prefix_scan(&tester, &table_name); - - test_delete(&tester, &table_name); - - test_min_max_scan(&tester, &table_name); - - test_reverse_scan(&tester, &table_name); -} - -// This test does a full scan, need to truncate table. -fn test_simple_write_read(tester: &ObkvTester, table_name: &str) { - tester.truncate_table(table_name); - - let mut data: [(&[u8], &[u8]); 3] = [ - (b"simple:a1", b"value a1"), - (b"simple:b2", b"value b2"), - (b"simple:a2", b"value a2"), - ]; - - // Test get empty. - for pair in data { - assert!(tester.get(table_name, pair.0).is_none()); - } - - // Test scan empty. - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - assert!(result.is_empty()); - } - - tester.insert_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - data.sort_unstable_by_key(|v| v.0); - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data, &result); - } -} - -fn test_update(tester: &ObkvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 2] = [(b"update:a1", b"value a1"), (b"update:b1", b"value b1")]; - - tester.insert_or_update_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } - - let data: [(&[u8], &[u8]); 2] = [ - (b"update:b1", b"update value b1"), - (b"update:c1", b"update value c1"), - ]; - - tester.insert_or_update_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } -} - -fn test_insert_duplicate(tester: &ObkvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 1] = [(b"duplicate:a1", b"value a1")]; - - tester.insert_batch(table_name, &data); - - let ret = tester.try_insert_batch(table_name, &data); - check_duplicate_primary_key(ret, table_name); -} - -fn check_duplicate_primary_key(ret: Result<()>, expect_table_name: &str) { - if let Err(err) = ret { - assert!(err.is_primary_key_duplicate()); - if let Error::WriteTable { - table_name, - source: _, - backtrace: _, - } = err - { - assert_eq!(expect_table_name, table_name); - } else { - panic!("Unexpected insert error, err:{err:?}"); - } - } else { - panic!("Unexpected insert result, ret:{ret:?}"); - } -} - -fn test_delete(tester: &ObkvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 4] = [ - (b"delete:a1", b"value a1"), - (b"delete:b1", b"value b1"), - (b"delete:b2", b"value b2"), - (b"delete:c1", b"value c1"), - ]; - - tester.insert_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } - - tester.delete_batch(table_name, &[b"b1", b"b2"]); - - assert_eq!( - b"value a1", - tester.get(table_name, b"delete:a1").unwrap().as_slice() - ); - assert!(tester.get(table_name, b"b1").is_none()); - assert!(tester.get(table_name, b"b2").is_none()); - assert_eq!( - b"value c1", - tester.get(table_name, b"delete:c1").unwrap().as_slice() - ); -} - -// This test scan to min/max, need to truncate table. -fn test_min_max_scan(tester: &ObkvTester, table_name: &str) { - tester.truncate_table(table_name); - - let data: [(&[u8], &[u8]); 5] = [ - (b"minmax:a1", b"value a1"), - (b"minmax:b1", b"value b1"), - (b"minmax:c1", b"value c1"), - (b"minmax:d1", b"value d1"), - (b"minmax:e1", b"value e1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data, &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::min_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"minmax"), - end: KeyBoundary::min_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::max_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::max_included(), - end: KeyBoundary::included(b"minmax:d"), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::max_included(), - end: KeyBoundary::min_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } -} - -// This test does a full scan, need to truncate table. -fn test_reverse_scan(tester: &ObkvTester, table_name: &str) { - tester.truncate_table(table_name); - - let data: [(&[u8], &[u8]); 5] = [ - (b"reverse:e1", b"value e1"), - (b"reverse:d1", b"value d1"), - (b"reverse:c1", b"value c1"), - (b"reverse:b1", b"value b1"), - (b"reverse:a1", b"value a1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: true, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data, &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::excluded(b"reverse:d1"), - reverse: true, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[2..], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"reverse:b1"), - end: KeyBoundary::max_included(), - reverse: true, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[..4], &result); - } -} - -fn test_partial_scan(tester: &ObkvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 7] = [ - (b"partial:a1", b"value a1"), - (b"partial:b1", b"value b1"), - (b"partial:c1", b"value c1"), - (b"partial:d1", b"value d1"), - (b"partial:e1", b"value e1"), - (b"partial:f1", b"value f1"), - (b"partial:g1", b"value g1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::included(data[1].0), - end: KeyBoundary::included(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..=5], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::excluded(data[1].0), - end: KeyBoundary::included(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[2..=5], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(data[1].0), - end: KeyBoundary::excluded(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..5], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::excluded(data[1].0), - end: KeyBoundary::excluded(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[2..5], &result); - } -} - -fn test_prefix_scan(tester: &ObkvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 6] = [ - (b"prefix:a1", b"value a1"), - (b"prefix:b1", b"value b1"), - (b"prefix:b2", b"value b2"), - (b"prefix:b3", b"value b3"), - (b"prefix:b4", b"value b4"), - (b"prefix:c1", b"value c1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"prefix:b"), - end: KeyBoundary::included(b"prefix:z"), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"prefix:b"), - end: KeyBoundary::excluded(b"prefix:b4"), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..4], &result); - } -} diff --git a/src/components/table_kv/src/tests.rs b/src/components/table_kv/src/tests.rs deleted file mode 100644 index a2e5b69c53..0000000000 --- a/src/components/table_kv/src/tests.rs +++ /dev/null @@ -1,564 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Tests. - -use std::{collections::HashSet, time::Duration}; - -use logger::{error, info}; -use rand::prelude::*; - -use super::*; -use crate::{config::ObkvConfig, memory::MemoryImpl, obkv::ObkvImpl}; - -const MAX_TABLE_ID: u32 = 30; - -struct TableKvTester { - table_kv: T, - tables: HashSet, -} - -impl TableKvTester { - fn new(table_kv: T) -> Self { - TableKvTester { - table_kv, - tables: HashSet::new(), - } - } - - fn create_table(&mut self, table_name: &str) { - self.table_kv.create_table(table_name).unwrap(); - - self.tables.insert(table_name.to_string()); - } - - fn insert_batch(&self, table_name: &str, pairs: &[(&[u8], &[u8])]) { - self.try_insert_batch(table_name, pairs).unwrap(); - } - - fn try_insert_batch( - &self, - table_name: &str, - pairs: &[(&[u8], &[u8])], - ) -> std::result::Result<(), T::Error> { - let mut batch = T::WriteBatch::with_capacity(pairs.len()); - for pair in pairs { - batch.insert(pair.0, pair.1); - } - - self.table_kv - .write(WriteContext::default(), table_name, batch) - } - - fn insert_or_update_batch(&self, table_name: &str, pairs: &[(&[u8], &[u8])]) { - let mut batch = T::WriteBatch::with_capacity(pairs.len()); - for pair in pairs { - batch.insert_or_update(pair.0, pair.1); - } - - self.table_kv - .write(WriteContext::default(), table_name, batch) - .unwrap(); - } - - fn delete_batch(&self, table_name: &str, keys: &[&[u8]]) { - let mut batch = T::WriteBatch::with_capacity(keys.len()); - for key in keys { - batch.delete(key); - } - - self.table_kv - .write(WriteContext::default(), table_name, batch) - .unwrap(); - } - - fn scan( - &self, - ctx: ScanContext, - table_name: &str, - scan_req: ScanRequest, - ) -> Vec<(Vec, Vec)> { - let mut iter = self.table_kv.scan(ctx, table_name, scan_req).unwrap(); - - let mut pairs = Vec::new(); - while iter.valid() { - let pair = (iter.key().to_vec(), iter.value().to_vec()); - pairs.push(pair); - - iter.next().unwrap(); - } - - pairs - } - - fn get(&self, table_name: &str, key: &[u8]) -> Option> { - self.table_kv.get(table_name, key).unwrap() - } - - fn truncate_table(&self, table_name: &str) { - info!("truncate table, table_name:{}", table_name); - - self.table_kv.truncate_table(table_name).unwrap(); - - // It seems that truncate of obkv isn't taking effect immediately. - std::thread::sleep(Duration::from_secs(1)); - } -} - -impl Drop for TableKvTester { - fn drop(&mut self) { - for table in &self.tables { - info!("tester truncate table, table_name:{}", table); - - if let Err(e) = self.table_kv.truncate_table(table) { - error!( - "tester failed to truncate table, table_name:{}, err:{}", - table, e - ); - } - - if let Err(e) = self.table_kv.drop_table(table) { - error!( - "tester failed to drop table, table_name:{}, err:{}", - table, e - ); - } - } - } -} - -fn new_obkv_config() -> ObkvConfig { - ObkvConfig::for_test() -} - -fn new_obkv_tester() -> TableKvTester { - let config = new_obkv_config(); - - let table_kv = ObkvImpl::new(config).unwrap(); - - TableKvTester::new(table_kv) -} - -fn new_memory_tester() -> TableKvTester { - let table_kv = MemoryImpl::default(); - - TableKvTester::new(table_kv) -} - -fn random_table_name(prefix: &str) -> String { - let mut rng = thread_rng(); - let v: u32 = rng.gen_range(0..MAX_TABLE_ID); - - format!("{prefix}_{v}") -} - -fn new_scan_ctx(batch_size: i32) -> ScanContext { - ScanContext { - batch_size, - ..Default::default() - } -} - -fn all_scan_ctxs() -> Vec { - vec![ - new_scan_ctx(1), - new_scan_ctx(10), - new_scan_ctx(50), - ScanContext::default(), - ] -} - -fn check_scan_result(expect: &[(&[u8], &[u8])], result: &[(Vec, Vec)]) { - assert_eq!(expect.len(), result.len()); - - for (pair1, pair2) in expect.iter().zip(result) { - assert_eq!(pair1.0, pair2.0); - assert_eq!(pair1.1, pair2.1); - } -} - -#[test] -#[ignore] -fn test_obkv() { - let tester = new_obkv_tester(); - - run_table_kv_test(tester); -} - -#[test] -fn test_memory() { - let tester = new_memory_tester(); - - run_table_kv_test(tester); -} - -fn run_table_kv_test(mut tester: TableKvTester) { - let table_name = random_table_name("horaedb"); - tester.create_table(&table_name); - tester.truncate_table(&table_name); - - info!("test table kv, table_name:{}", table_name); - - test_simple_write_read(&tester, &table_name); - - test_update(&tester, &table_name); - - test_insert_duplicate(&tester, &table_name); - - test_partial_scan(&tester, &table_name); - - test_prefix_scan(&tester, &table_name); - - test_delete(&tester, &table_name); - - test_min_max_scan(&tester, &table_name); - - test_reverse_scan(&tester, &table_name); -} - -// This test does a full scan, need to truncate table. -fn test_simple_write_read(tester: &TableKvTester, table_name: &str) { - tester.truncate_table(table_name); - - let mut data: [(&[u8], &[u8]); 3] = [ - (b"simple:a1", b"value a1"), - (b"simple:b2", b"value b2"), - (b"simple:a2", b"value a2"), - ]; - - // Test get empty. - for pair in data { - assert!(tester.get(table_name, pair.0).is_none()); - } - - // Test scan empty. - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - assert!(result.is_empty()); - } - - tester.insert_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - data.sort_unstable_by_key(|v| v.0); - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data, &result); - } -} - -fn test_update(tester: &TableKvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 2] = [(b"update:a1", b"value a1"), (b"update:b1", b"value b1")]; - - tester.insert_or_update_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } - - let data: [(&[u8], &[u8]); 2] = [ - (b"update:b1", b"update value b1"), - (b"update:c1", b"update value c1"), - ]; - - tester.insert_or_update_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } -} - -fn test_insert_duplicate(tester: &TableKvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 1] = [(b"duplicate:a1", b"value a1")]; - - tester.insert_batch(table_name, &data); - - let ret = tester.try_insert_batch(table_name, &data); - if let Err(err) = ret { - assert!(err.is_primary_key_duplicate()); - } else { - panic!("Unexpected insert result, ret:{ret:?}"); - } -} - -fn test_delete(tester: &TableKvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 4] = [ - (b"delete:a1", b"value a1"), - (b"delete:b1", b"value b1"), - (b"delete:b2", b"value b2"), - (b"delete:c1", b"value c1"), - ]; - - tester.insert_batch(table_name, &data); - - for pair in data { - let v = tester.get(table_name, pair.0).unwrap(); - assert_eq!(pair.1, v); - } - - tester.delete_batch(table_name, &[b"b1", b"b2"]); - - assert_eq!( - b"value a1", - tester.get(table_name, b"delete:a1").unwrap().as_slice() - ); - assert!(tester.get(table_name, b"b1").is_none()); - assert!(tester.get(table_name, b"b2").is_none()); - assert_eq!( - b"value c1", - tester.get(table_name, b"delete:c1").unwrap().as_slice() - ); -} - -// This test scan to min/max, need to truncate table. -fn test_min_max_scan(tester: &TableKvTester, table_name: &str) { - tester.truncate_table(table_name); - - let data: [(&[u8], &[u8]); 5] = [ - (b"minmax:a1", b"value a1"), - (b"minmax:b1", b"value b1"), - (b"minmax:c1", b"value c1"), - (b"minmax:d1", b"value d1"), - (b"minmax:e1", b"value e1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data, &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::min_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"minmax"), - end: KeyBoundary::min_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::max_included(), - end: KeyBoundary::max_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::max_included(), - end: KeyBoundary::included(b"minmax:d"), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::max_included(), - end: KeyBoundary::min_included(), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&[], &result); - } -} - -// This test does a full scan, need to truncate table. -fn test_reverse_scan(tester: &TableKvTester, table_name: &str) { - tester.truncate_table(table_name); - - let data: [(&[u8], &[u8]); 5] = [ - (b"reverse:e1", b"value e1"), - (b"reverse:d1", b"value d1"), - (b"reverse:c1", b"value c1"), - (b"reverse:b1", b"value b1"), - (b"reverse:a1", b"value a1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::max_included(), - reverse: true, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data, &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::min_included(), - end: KeyBoundary::excluded(b"reverse:d1"), - reverse: true, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[2..], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"reverse:b1"), - end: KeyBoundary::max_included(), - reverse: true, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[..4], &result); - } -} - -fn test_partial_scan(tester: &TableKvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 7] = [ - (b"partial:a1", b"value a1"), - (b"partial:b1", b"value b1"), - (b"partial:c1", b"value c1"), - (b"partial:d1", b"value d1"), - (b"partial:e1", b"value e1"), - (b"partial:f1", b"value f1"), - (b"partial:g1", b"value g1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::included(data[1].0), - end: KeyBoundary::included(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..=5], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::excluded(data[1].0), - end: KeyBoundary::included(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[2..=5], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(data[1].0), - end: KeyBoundary::excluded(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..5], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::excluded(data[1].0), - end: KeyBoundary::excluded(data[5].0), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[2..5], &result); - } -} - -fn test_prefix_scan(tester: &TableKvTester, table_name: &str) { - let data: [(&[u8], &[u8]); 6] = [ - (b"prefix:a1", b"value a1"), - (b"prefix:b1", b"value b1"), - (b"prefix:b2", b"value b2"), - (b"prefix:b3", b"value b3"), - (b"prefix:b4", b"value b4"), - (b"prefix:c1", b"value c1"), - ]; - - tester.insert_batch(table_name, &data); - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"prefix:b"), - end: KeyBoundary::included(b"prefix:z"), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..], &result); - } - - let scan_req = ScanRequest { - start: KeyBoundary::included(b"prefix:b"), - end: KeyBoundary::excluded(b"prefix:b4"), - reverse: false, - }; - for ctx in all_scan_ctxs() { - let result = tester.scan(ctx, table_name, scan_req.clone()); - - check_scan_result(&data[1..4], &result); - } -} diff --git a/src/components/test_util/Cargo.toml b/src/components/test_util/Cargo.toml deleted file mode 100644 index a9a8b4c43d..0000000000 --- a/src/components/test_util/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "test_util" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -# In alphabetical order -arrow = { workspace = true } -chrono = { workspace = true } -common_types = { workspace = true, features = ["test"] } -env_logger = { workspace = true } diff --git a/src/components/test_util/src/lib.rs b/src/components/test_util/src/lib.rs deleted file mode 100644 index 9dcd91e252..0000000000 --- a/src/components/test_util/src/lib.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Common utils shared by the whole project - -use std::{io::Write, sync::Once}; - -use arrow::util::pretty; -use common_types::record_batch::RecordBatch; - -static INIT_LOG: Once = Once::new(); - -pub fn init_log_for_test() { - INIT_LOG.call_once(|| { - env_logger::Builder::from_default_env() - .format(|buf, record| { - writeln!( - buf, - "{} {} [{}:{}] {}", - chrono::Local::now().format("%Y-%m-%dT%H:%M:%S.%3f"), - buf.default_styled_level(record.level()), - record.file().unwrap_or("unknown"), - record.line().unwrap_or(0), - record.args() - ) - }) - .init(); - }); -} - -/// A helper function to assert record batch. -pub fn assert_record_batches_eq(expected: &[&str], record_batches: Vec) { - let arrow_record_batch = record_batches - .into_iter() - .map(|record| record.into_arrow_record_batch()) - .collect::>(); - - let expected_lines: Vec = expected.iter().map(|&s| s.into()).collect(); - let formatted = pretty::pretty_format_batches(arrow_record_batch.as_slice()) - .unwrap() - .to_string(); - let actual_lines: Vec<&str> = formatted.trim().lines().collect(); - assert_eq!( - expected_lines, actual_lines, - "\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n" - ); -} diff --git a/src/components/time_ext/Cargo.toml b/src/components/time_ext/Cargo.toml deleted file mode 100644 index 928418afcc..0000000000 --- a/src/components/time_ext/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "time_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -# In alphabetical order -chrono = { workspace = true } -common_types = { workspace = true } -horaedbproto = { workspace = true } -macros = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -snafu = { workspace = true } - -[dev-dependencies] -toml = { workspace = true } diff --git a/src/components/time_ext/src/lib.rs b/src/components/time_ext/src/lib.rs deleted file mode 100644 index 1c2d11ee4e..0000000000 --- a/src/components/time_ext/src/lib.rs +++ /dev/null @@ -1,550 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Time utilities - -// TODO(yingwen): Move to common_types ? - -use std::{ - convert::TryInto, - fmt::{self, Write}, - ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}, - str::FromStr, - time::{Duration, Instant, UNIX_EPOCH}, -}; - -use chrono::{DateTime, Utc}; -use common_types::time::Timestamp; -use horaedbproto::manifest as manifest_pb; -use macros::define_result; -use serde::{ - de::{self, Visitor}, - Deserialize, Deserializer, Serialize, Serializer, -}; -use snafu::{Backtrace, GenerateBacktrace, Snafu}; - -#[derive(Debug, Snafu)] -#[allow(clippy::enum_variant_names)] -pub enum Error { - #[snafu(display("Failed to parse duration, err:{}.\nBacktrace:\n{}", err, backtrace))] - ParseDuration { err: String, backtrace: Backtrace }, -} - -define_result!(Error); - -const TIME_MAGNITUDE_1: u64 = 1000; -const TIME_MAGNITUDE_2: u64 = 60; -const TIME_MAGNITUDE_3: u64 = 24; -const UNIT: u64 = 1; -const MS: u64 = UNIT; -const SECOND: u64 = MS * TIME_MAGNITUDE_1; -const MINUTE: u64 = SECOND * TIME_MAGNITUDE_2; -const HOUR: u64 = MINUTE * TIME_MAGNITUDE_2; -const DAY: u64 = HOUR * TIME_MAGNITUDE_3; - -/// Convert Duration to milliseconds. -/// -/// Panic if overflow. Mainly used by `ReadableDuration`. -#[inline] -fn duration_to_ms(d: Duration) -> u64 { - let nanos = u64::from(d.subsec_nanos()); - // Most of case, we can't have so large Duration, so here just panic if overflow - // now. - d.as_secs() * 1_000 + (nanos / 1_000_000) -} - -#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum TimeUnit { - Nanoseconds, - Microseconds, - Milliseconds, - Seconds, - Minutes, - Hours, - Days, -} - -impl From for manifest_pb::TimeUnit { - fn from(unit: TimeUnit) -> Self { - match unit { - TimeUnit::Nanoseconds => manifest_pb::TimeUnit::Nanoseconds, - TimeUnit::Microseconds => manifest_pb::TimeUnit::Microseconds, - TimeUnit::Milliseconds => manifest_pb::TimeUnit::Milliseconds, - TimeUnit::Seconds => manifest_pb::TimeUnit::Seconds, - TimeUnit::Minutes => manifest_pb::TimeUnit::Minutes, - TimeUnit::Hours => manifest_pb::TimeUnit::Hours, - TimeUnit::Days => manifest_pb::TimeUnit::Days, - } - } -} - -impl From for TimeUnit { - fn from(unit: manifest_pb::TimeUnit) -> Self { - match unit { - manifest_pb::TimeUnit::Nanoseconds => TimeUnit::Nanoseconds, - manifest_pb::TimeUnit::Microseconds => TimeUnit::Microseconds, - manifest_pb::TimeUnit::Milliseconds => TimeUnit::Milliseconds, - manifest_pb::TimeUnit::Seconds => TimeUnit::Seconds, - manifest_pb::TimeUnit::Minutes => TimeUnit::Minutes, - manifest_pb::TimeUnit::Hours => TimeUnit::Hours, - manifest_pb::TimeUnit::Days => TimeUnit::Days, - } - } -} - -impl FromStr for TimeUnit { - type Err = String; - - fn from_str(tu_str: &str) -> std::result::Result { - let tu_str = tu_str.trim(); - if !tu_str.is_ascii() { - return Err(format!("unexpected ascii string: {tu_str}")); - } - - match tu_str.to_lowercase().as_str() { - "nanoseconds" => Ok(TimeUnit::Nanoseconds), - "microseconds" => Ok(TimeUnit::Microseconds), - "milliseconds" => Ok(TimeUnit::Milliseconds), - "seconds" => Ok(TimeUnit::Seconds), - "minutes" => Ok(TimeUnit::Minutes), - "hours" => Ok(TimeUnit::Hours), - "days" => Ok(TimeUnit::Days), - _ => Err(format!("unexpected TimeUnit: {tu_str}")), - } - } -} - -impl fmt::Display for TimeUnit { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - TimeUnit::Nanoseconds => "nanoseconds", - TimeUnit::Microseconds => "microseconds", - TimeUnit::Milliseconds => "milliseconds", - TimeUnit::Seconds => "seconds", - TimeUnit::Minutes => "minutes", - TimeUnit::Hours => "hours", - TimeUnit::Days => "days", - }; - write!(f, "{s}") - } -} - -pub trait DurationExt { - /// Convert into u64. - /// - /// Returns u64::MAX if overflow - fn as_millis_u64(&self) -> u64; -} - -impl DurationExt for Duration { - #[inline] - fn as_millis_u64(&self) -> u64 { - match self.as_millis().try_into() { - Ok(v) => v, - Err(_) => u64::MAX, - } - } -} - -pub trait InstantExt { - fn saturating_elapsed(&self) -> Duration; -} - -impl InstantExt for Instant { - fn saturating_elapsed(&self) -> Duration { - Instant::now().saturating_duration_since(*self) - } -} - -#[inline] -pub fn secs_to_nanos(s: u64) -> u64 { - s * 1_000_000_000 -} - -#[inline] -pub fn current_time_millis() -> u64 { - Utc::now().timestamp_millis() as u64 -} - -#[inline] -pub fn current_as_rfc3339() -> String { - Utc::now().to_rfc3339() -} - -#[inline] -pub fn format_as_ymdhms(unix_timestamp: i64) -> String { - let dt = DateTime::::from(UNIX_EPOCH + Duration::from_millis(unix_timestamp as u64)); - dt.format("%Y-%m-%d %H:%M:%S").to_string() -} - -pub fn try_to_millis(ts: i64) -> Option { - // https://help.aliyun.com/document_detail/60683.html - if (4294968..=4294967295).contains(&ts) { - return Some(Timestamp::new(ts * 1000)); - } - if (4294967296..=9999999999999).contains(&ts) { - return Some(Timestamp::new(ts)); - } - None -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd, Default)] -pub struct ReadableDuration(pub Duration); - -impl Add for ReadableDuration { - type Output = ReadableDuration; - - fn add(self, rhs: ReadableDuration) -> ReadableDuration { - Self(self.0 + rhs.0) - } -} - -impl AddAssign for ReadableDuration { - fn add_assign(&mut self, rhs: ReadableDuration) { - *self = *self + rhs; - } -} - -impl Sub for ReadableDuration { - type Output = ReadableDuration; - - fn sub(self, rhs: ReadableDuration) -> ReadableDuration { - Self(self.0 - rhs.0) - } -} - -impl SubAssign for ReadableDuration { - fn sub_assign(&mut self, rhs: ReadableDuration) { - *self = *self - rhs; - } -} - -impl Mul for ReadableDuration { - type Output = ReadableDuration; - - fn mul(self, rhs: u32) -> Self::Output { - Self(self.0 * rhs) - } -} - -impl MulAssign for ReadableDuration { - fn mul_assign(&mut self, rhs: u32) { - *self = *self * rhs; - } -} - -impl Div for ReadableDuration { - type Output = ReadableDuration; - - fn div(self, rhs: u32) -> ReadableDuration { - Self(self.0 / rhs) - } -} - -impl DivAssign for ReadableDuration { - fn div_assign(&mut self, rhs: u32) { - *self = *self / rhs; - } -} - -impl From for Duration { - fn from(readable: ReadableDuration) -> Duration { - readable.0 - } -} - -// yingwen: Support From. -impl From for ReadableDuration { - fn from(t: Duration) -> ReadableDuration { - ReadableDuration(t) - } -} - -impl FromStr for ReadableDuration { - type Err = String; - - fn from_str(dur_str: &str) -> std::result::Result { - let dur_str = dur_str.trim(); - if !dur_str.is_ascii() { - return Err(format!("unexpected ascii string: {dur_str}")); - } - let err_msg = "valid duration, only d, h, m, s, ms are supported.".to_owned(); - let mut left = dur_str.as_bytes(); - let mut last_unit = DAY + 1; - let mut dur = 0f64; - while let Some(idx) = left.iter().position(|c| b"dhms".contains(c)) { - let (first, second) = left.split_at(idx); - let unit = if second.starts_with(b"ms") { - left = &left[idx + 2..]; - MS - } else { - let u = match second[0] { - b'd' => DAY, - b'h' => HOUR, - b'm' => MINUTE, - b's' => SECOND, - _ => return Err(err_msg), - }; - left = &left[idx + 1..]; - u - }; - if unit >= last_unit { - return Err("d, h, m, s, ms should occur in given order.".to_owned()); - } - // do we need to check 12h360m? - let number_str = unsafe { std::str::from_utf8_unchecked(first) }; - dur += match number_str.trim().parse::() { - Ok(n) => n * unit as f64, - Err(_) => return Err(err_msg), - }; - last_unit = unit; - } - if !left.is_empty() { - return Err(err_msg); - } - if dur.is_sign_negative() { - return Err("duration should be positive.".to_owned()); - } - let secs = dur as u64 / SECOND; - let millis = (dur as u64 % SECOND) as u32 * 1_000_000; - Ok(ReadableDuration(Duration::new(secs, millis))) - } -} - -impl ReadableDuration { - pub const fn secs(secs: u64) -> ReadableDuration { - ReadableDuration(Duration::from_secs(secs)) - } - - pub const fn millis(millis: u64) -> ReadableDuration { - ReadableDuration(Duration::from_millis(millis)) - } - - pub const fn minutes(minutes: u64) -> ReadableDuration { - ReadableDuration::secs(minutes * 60) - } - - pub const fn hours(hours: u64) -> ReadableDuration { - ReadableDuration::minutes(hours * 60) - } - - pub const fn days(days: u64) -> ReadableDuration { - ReadableDuration::hours(days * 24) - } - - pub fn as_secs(&self) -> u64 { - self.0.as_secs() - } - - pub fn as_millis(&self) -> u64 { - duration_to_ms(self.0) - } - - pub fn is_zero(&self) -> bool { - self.0.as_nanos() == 0 - } -} - -impl fmt::Display for ReadableDuration { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut dur = duration_to_ms(self.0); - let mut written = false; - if dur >= DAY { - written = true; - write!(f, "{}d", dur / DAY)?; - dur %= DAY; - } - if dur >= HOUR { - written = true; - write!(f, "{}h", dur / HOUR)?; - dur %= HOUR; - } - if dur >= MINUTE { - written = true; - write!(f, "{}m", dur / MINUTE)?; - dur %= MINUTE; - } - if dur >= SECOND { - written = true; - write!(f, "{}s", dur / SECOND)?; - dur %= SECOND; - } - if dur > 0 { - written = true; - write!(f, "{dur}ms")?; - } - if !written { - write!(f, "0s")?; - } - Ok(()) - } -} - -impl Serialize for ReadableDuration { - fn serialize(&self, serializer: S) -> std::result::Result - where - S: Serializer, - { - let mut buffer = String::new(); - write!(buffer, "{self}").unwrap(); - serializer.serialize_str(&buffer) - } -} - -impl<'de> Deserialize<'de> for ReadableDuration { - fn deserialize(deserializer: D) -> std::result::Result - where - D: Deserializer<'de>, - { - struct DurVisitor; - - impl<'de> Visitor<'de> for DurVisitor { - type Value = ReadableDuration; - - fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter.write_str("valid duration") - } - - fn visit_str(self, dur_str: &str) -> std::result::Result - where - E: de::Error, - { - dur_str.parse().map_err(E::custom) - } - } - - deserializer.deserialize_str(DurVisitor) - } -} - -pub fn parse_duration(v: &str) -> Result { - v.parse::() - .map_err(|err| Error::ParseDuration { - err, - backtrace: Backtrace::generate(), - }) -} - -#[cfg(test)] -mod tests { - use std::thread; - - use super::*; - #[test] - fn test_as_mills_u64() { - let d = Duration::from_millis(100); - assert_eq!(100, d.as_millis_u64()); - - let d = Duration::from_secs(100); - assert_eq!(100000, d.as_millis_u64()); - } - - #[test] - fn test_saturating_elapsed() { - let ins = Instant::now(); - let one_hundred_mills = Duration::from_millis(100); - let error = 10; - thread::sleep(one_hundred_mills); - assert!(ins.saturating_elapsed().as_millis_u64() - 100 < error); - thread::sleep(one_hundred_mills); - assert!(ins.saturating_elapsed().as_millis_u64() - 200 < 2 * error); - } - - #[test] - fn test_duration_construction() { - let mut dur = ReadableDuration::secs(1); - assert_eq!(dur.0, Duration::new(1, 0)); - assert_eq!(dur.as_secs(), 1); - assert_eq!(dur.as_millis(), 1000); - dur = ReadableDuration::millis(1001); - assert_eq!(dur.0, Duration::new(1, 1_000_000)); - assert_eq!(dur.as_secs(), 1); - assert_eq!(dur.as_millis(), 1001); - dur = ReadableDuration::minutes(2); - assert_eq!(dur.0, Duration::new(2 * 60, 0)); - assert_eq!(dur.as_secs(), 120); - assert_eq!(dur.as_millis(), 120000); - dur = ReadableDuration::hours(2); - assert_eq!(dur.0, Duration::new(2 * 3600, 0)); - assert_eq!(dur.as_secs(), 7200); - assert_eq!(dur.as_millis(), 7200000); - } - - #[test] - fn test_parse_readable_duration() { - #[derive(Serialize, Deserialize)] - struct DurHolder { - d: ReadableDuration, - } - - let legal_cases = vec![ - (0, 0, "0s"), - (0, 1, "1ms"), - (2, 0, "2s"), - (24 * 3600, 0, "1d"), - (2 * 24 * 3600, 10, "2d10ms"), - (4 * 60, 0, "4m"), - (5 * 3600, 0, "5h"), - (3600 + 2 * 60, 0, "1h2m"), - (5 * 24 * 3600 + 3600 + 2 * 60, 0, "5d1h2m"), - (3600 + 2, 5, "1h2s5ms"), - (3 * 24 * 3600 + 7 * 3600 + 2, 5, "3d7h2s5ms"), - ]; - for (secs, ms, exp) in legal_cases { - let d = DurHolder { - d: ReadableDuration(Duration::new(secs, ms * 1_000_000)), - }; - let res_str = toml::to_string(&d).unwrap(); - let exp_str = format!("d = {exp:?}\n"); - assert_eq!(res_str, exp_str); - let res_dur: DurHolder = toml::from_str(&exp_str).unwrap(); - assert_eq!(res_dur.d.0, d.d.0); - } - - let decode_cases = vec![(" 0.5 h2m ", 3600 / 2 + 2 * 60, 0)]; - for (src, secs, ms) in decode_cases { - let src = format!("d = {src:?}"); - let res: DurHolder = toml::from_str(&src).unwrap(); - assert_eq!(res.d.0, Duration::new(secs, ms * 1_000_000)); - } - - let illegal_cases = vec!["1H", "1M", "1S", "1MS", "1h1h", "h"]; - for src in illegal_cases { - let src_str = format!("d = {src:?}"); - assert!(toml::from_str::(&src_str).is_err(), "{}", src); - } - assert!(toml::from_str::("d = 23").is_err()); - } - - #[test] - fn test_parse_timeunit() { - let s = "milliseconds"; - assert_eq!(TimeUnit::Milliseconds, s.parse::().unwrap()); - let s = "seconds"; - assert_eq!(TimeUnit::Seconds, s.parse::().unwrap()); - let s = "minutes"; - assert_eq!(TimeUnit::Minutes, s.parse::().unwrap()); - let s = "hours"; - assert_eq!(TimeUnit::Hours, s.parse::().unwrap()); - let s = "days"; - assert_eq!(TimeUnit::Days, s.parse::().unwrap()); - let s = "microseconds"; - assert_eq!(TimeUnit::Microseconds, s.parse::().unwrap()); - } -} diff --git a/src/components/timed_task/Cargo.toml b/src/components/timed_task/Cargo.toml deleted file mode 100644 index b1f90e0d3d..0000000000 --- a/src/components/timed_task/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "timed_task" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -logger = { workspace = true } -runtime = { workspace = true } -tokio = { workspace = true } diff --git a/src/components/timed_task/src/lib.rs b/src/components/timed_task/src/lib.rs deleted file mode 100644 index b988ec4e91..0000000000 --- a/src/components/timed_task/src/lib.rs +++ /dev/null @@ -1,164 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Timed background tasks. - -use std::{future::Future, time::Duration}; - -use logger::info; -use runtime::{self, JoinHandle, Runtime}; -use tokio::{ - sync::{ - mpsc::{self, UnboundedReceiver, UnboundedSender}, - Mutex, - }, - time, -}; - -/// A task to run periodically. -pub struct TimedTask { - name: String, - period: Duration, - builder: B, -} - -impl TimedTask -where - B: Fn() -> Fut + Send + Sync + 'static, - Fut: Future + Send, -{ - pub fn start_timed_task( - name: String, - runtime: &Runtime, - period: Duration, - builder: B, - ) -> TaskHandle { - let (tx, rx) = mpsc::unbounded_channel(); - let task = TimedTask { - name, - period, - builder, - }; - - let handle = runtime.spawn(async move { - task.run(rx).await; - }); - TaskHandle { - handle: Mutex::new(Some(handle)), - sender: tx, - } - } - - async fn run(&self, mut rx: UnboundedReceiver<()>) { - info!("TimedTask started, name:{}", self.name); - - loop { - // TODO(yingwen): Maybe add a random offset to the peroid. - match time::timeout(self.period, rx.recv()).await { - Ok(_) => { - info!("TimedTask stopped, name:{}", self.name); - - return; - } - Err(_) => { - let future = (self.builder)(); - future.await; - } - } - } - } -} - -/// Handle to the timed task. -/// -/// The task will exit asynchronously after this handle is dropped. -pub struct TaskHandle { - handle: Mutex>>, - sender: UnboundedSender<()>, -} - -impl TaskHandle { - /// Explicit stop the task and wait util the task exits. - pub async fn stop_task(&self) -> std::result::Result<(), runtime::Error> { - self.notify_exit(); - - let handle = self.handle.lock().await.take(); - if let Some(h) = handle { - h.await?; - } - - Ok(()) - } - - fn notify_exit(&self) { - if self.sender.send(()).is_err() { - info!("The sender of task is disconnected"); - } - } -} - -impl Drop for TaskHandle { - fn drop(&mut self) { - self.notify_exit(); - } -} - -#[cfg(test)] -mod tests { - use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }; - - use super::*; - use crate::runtime::Builder; - - #[test] - fn test_timed_task() { - let period = Duration::from_millis(100); - let runtime = Arc::new( - Builder::default() - .worker_threads(1) - .enable_all() - .build() - .unwrap(), - ); - let tick_count = Arc::new(AtomicUsize::new(0)); - let expect_ticks = 5; - - let rt = runtime.clone(); - rt.block_on(async { - let tc = tick_count.clone(); - let timed_builder = move || { - let count = tc.clone(); - async move { - count.fetch_add(1, Ordering::Relaxed); - } - }; - - let name = "test-timed".to_string(); - let handle = TimedTask::start_timed_task(name, &runtime, period, timed_builder); - - // Sleep more times, ensure the builder is called enough times. - time::sleep(period * (expect_ticks as u32 + 2)).await; - - handle.stop_task().await.unwrap(); - }); - - assert!(tick_count.load(Ordering::Relaxed) > expect_ticks); - } -} diff --git a/src/components/toml_ext/Cargo.toml b/src/components/toml_ext/Cargo.toml deleted file mode 100644 index bca2ee890f..0000000000 --- a/src/components/toml_ext/Cargo.toml +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "toml_ext" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -macros = { workspace = true } -serde = { workspace = true } -snafu = { workspace = true } -toml = { workspace = true } - -[dev-dependencies] -tempfile = { workspace = true } diff --git a/src/components/toml_ext/src/lib.rs b/src/components/toml_ext/src/lib.rs deleted file mode 100644 index a411e7f24f..0000000000 --- a/src/components/toml_ext/src/lib.rs +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Toml config utilities. - -use std::{fs::File, io::Read}; - -use macros::define_result; -use serde::de; -use snafu::{Backtrace, ResultExt, Snafu}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Failed to open file, path:{}, err:{}.\nBacktrace:\n{}", - path, - source, - backtrace - ))] - OpenFile { - path: String, - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to read toml, path:{}, err:{}.\nBacktrace:\n{}", - path, - source, - backtrace - ))] - ReadToml { - path: String, - source: std::io::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to parse toml, path:{}, err:{}.\nBacktrace:\n{}", - path, - source, - backtrace - ))] - ParseToml { - path: String, - source: toml::de::Error, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -/// Read toml file from given `path` to `toml_buf`, then parsed it to `T` and -/// return. -pub fn parse_toml_from_path(path: &str, toml_buf: &mut String) -> Result -where - T: de::DeserializeOwned, -{ - let mut file = File::open(path).context(OpenFile { path })?; - file.read_to_string(toml_buf).context(ReadToml { path })?; - - toml::from_str(toml_buf).context(ParseToml { path }) -} - -#[cfg(test)] -mod tests { - use std::io::Write; - - use serde::Deserialize; - use tempfile::tempdir; - - use super::*; - - #[test] - fn test_parse_toml_from_path() { - let dir = tempdir().unwrap(); - let file_path = dir.path().join("test.toml"); - let path = file_path.to_str().unwrap(); - - let mut f = File::create(path).expect("Failed to create test config file"); - f.write_all(b"host=\"localhost\"\nport=1081") - .expect("Failed to write test config"); - - f.sync_all().expect("Failed to sync test config"); - - #[derive(Clone, Debug, Deserialize)] - struct TestConfig { - host: String, - port: u16, - } - let mut config = TestConfig { - host: "".to_string(), - port: 0, - }; - - assert_eq!("", config.host); - assert_eq!(0, config.port); - - let mut toml_str = String::new(); - - config = parse_toml_from_path(path, &mut toml_str).unwrap(); - - assert_eq!("localhost", config.host); - assert_eq!(1081, config.port); - } -} diff --git a/src/components/trace_metric/Cargo.toml b/src/components/trace_metric/Cargo.toml deleted file mode 100644 index 5a1ce0385f..0000000000 --- a/src/components/trace_metric/Cargo.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "trace_metric" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -trace_metric_derive = { workspace = true } diff --git a/src/components/trace_metric/src/collector.rs b/src/components/trace_metric/src/collector.rs deleted file mode 100644 index 60a4f6271e..0000000000 --- a/src/components/trace_metric/src/collector.rs +++ /dev/null @@ -1,197 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::BTreeMap, - sync::{Arc, Mutex}, -}; - -use crate::metric::{Metric, MetricAggregator}; - -/// A collector for metrics of a single read request. -/// -/// It can be cloned and shared among threads. -#[derive(Clone, Debug, Default)] -pub struct MetricsCollector { - name: String, - metrics: Arc>>, - children: Arc>>, -} - -impl MetricsCollector { - /// Create a new collector with the given name. - pub fn new(name: String) -> Self { - Self { - name, - metrics: Arc::new(Mutex::new(vec![])), - children: Arc::new(Mutex::new(vec![])), - } - } - - /// Collect a metric. - pub fn collect(&self, metric: Metric) { - let mut metrics = self.metrics.lock().unwrap(); - metrics.push(metric); - } - - /// Span a child collector with a given name. - pub fn span(&self, name: String) -> MetricsCollector { - let mut children = self.children.lock().unwrap(); - let child = Self::new(name); - children.push(child.clone()); - child - } - - #[inline] - pub fn name(&self) -> &str { - &self.name - } - - /// Calls a closure on each top-level metrics of this collector. - pub fn for_each_metric(&self, f: &mut impl FnMut(&Metric)) { - let metrics = self.metrics.lock().unwrap(); - - let mut metrics_by_name = BTreeMap::new(); - for metric in metrics.iter() { - metrics_by_name - .entry(metric.name()) - .or_insert_with(Vec::new) - .push(metric); - } - - for metrics in metrics_by_name.values() { - if metrics.is_empty() { - continue; - } - - if let Some(op) = metrics[0].aggregator() { - match op { - MetricAggregator::Sum => { - let mut first = metrics[0].clone(); - for m in &metrics[1..] { - first.sum(m); - } - // only apply fn to first metric. - f(&first); - } - } - } else { - for metric in metrics { - f(metric); - } - } - } - } - - /// Visit all the collectors including itself and its children. - pub fn visit(&self, visitor: &mut impl CollectorVisitor) { - self.visit_with_level(0, visitor); - } - - /// Visit all the collectors including itself and its children. - fn visit_with_level(&self, level: usize, visitor: &mut impl CollectorVisitor) { - visitor.visit(level, self); - // Clone the children to avoid holding the lock, which may cause deadlocks - // because the lock order is not guaranteed. - let children = self.children.lock().unwrap().clone(); - for child in children { - child.visit_with_level(level + 1, visitor); - } - } -} - -pub trait CollectorVisitor { - fn visit(&mut self, level: usize, collector: &MetricsCollector); -} - -#[derive(Default)] -pub struct FormatCollectorVisitor { - buffer: String, -} - -impl FormatCollectorVisitor { - pub fn into_string(self) -> String { - self.buffer - } - - fn indent(level: usize) -> String { - " ".repeat(level * 4) - } - - fn append_line(&mut self, indent: &str, line: &str) { - self.buffer.push_str(&format!("{indent}{line}\n")); - } -} - -impl CollectorVisitor for FormatCollectorVisitor { - fn visit(&mut self, level: usize, collector: &MetricsCollector) { - let collector_indent = Self::indent(level); - self.append_line(&collector_indent, &format!("{}:", collector.name())); - let metric_indent = Self::indent(level + 1); - collector.for_each_metric(&mut |metric| { - self.append_line(&metric_indent, &format!("{metric:?}")); - }); - } -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use super::*; - - #[test] - fn test_metrics_collector() { - let collector = MetricsCollector::new("root".to_string()); - collector.collect(Metric::number("counter".to_string(), 1, None)); - collector.collect(Metric::duration( - "elapsed".to_string(), - Duration::from_millis(100), - None, - )); - let child_1_0 = collector.span("child_1_0".to_string()); - child_1_0.collect(Metric::boolean("boolean".to_string(), false, None)); - - let child_2_0 = child_1_0.span("child_2_0".to_string()); - child_2_0.collect(Metric::number("counter".to_string(), 1, None)); - child_2_0.collect(Metric::duration( - "elapsed".to_string(), - Duration::from_millis(100), - None, - )); - - let child_1_1 = collector.span("child_1_1".to_string()); - child_1_1.collect(Metric::boolean("boolean".to_string(), false, None)); - let _child_1_2 = collector.span("child_1_2".to_string()); - - let mut visitor = FormatCollectorVisitor::default(); - collector.visit(&mut visitor); - let expect_output = r#"root: - counter=1 - elapsed=100ms - child_1_0: - boolean=false - child_2_0: - counter=1 - elapsed=100ms - child_1_1: - boolean=false - child_1_2: -"#; - assert_eq!(expect_output, &visitor.into_string()); - } -} diff --git a/src/components/trace_metric/src/lib.rs b/src/components/trace_metric/src/lib.rs deleted file mode 100644 index d49f4b94fa..0000000000 --- a/src/components/trace_metric/src/lib.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod collector; -pub mod metric; - -pub use collector::MetricsCollector; -pub use metric::Metric; -pub use trace_metric_derive::TraceMetricWhenDrop; diff --git a/src/components/trace_metric/src/metric.rs b/src/components/trace_metric/src/metric.rs deleted file mode 100644 index 83946d649d..0000000000 --- a/src/components/trace_metric/src/metric.rs +++ /dev/null @@ -1,114 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{fmt, time::Duration}; - -#[derive(Clone)] -pub enum MetricAggregator { - Sum, -} - -#[derive(Clone)] -pub struct MetricValue { - pub name: String, - pub val: T, - pub aggregator: Option, -} - -#[derive(Clone)] -pub enum Metric { - Boolean(MetricValue), - Number(MetricValue), - Duration(MetricValue), -} - -impl Metric { - #[inline] - pub fn number(name: String, val: usize, aggregator: Option) -> Self { - Metric::Number(MetricValue { - name, - val, - aggregator, - }) - } - - #[inline] - pub fn duration(name: String, val: Duration, aggregator: Option) -> Self { - Metric::Duration(MetricValue { - name, - val, - aggregator, - }) - } - - #[inline] - pub fn boolean(name: String, val: bool, aggregator: Option) -> Self { - Metric::Boolean(MetricValue { - name, - val, - aggregator, - }) - } - - #[inline] - pub fn name(&self) -> &str { - match self { - Self::Boolean(v) => &v.name, - Self::Number(v) => &v.name, - Self::Duration(v) => &v.name, - } - } - - #[inline] - pub fn aggregator(&self) -> &Option { - match self { - Self::Boolean(v) => &v.aggregator, - Self::Number(v) => &v.aggregator, - Self::Duration(v) => &v.aggregator, - } - } - - // Sum metric values together when metrics are same type, - // Panic if their types are different. - #[inline] - pub fn sum(&mut self, rhs: &Self) { - match (self, rhs) { - (Self::Boolean(lhs), Self::Boolean(rhs)) => lhs.val |= rhs.val, - (Self::Number(lhs), Self::Number(rhs)) => lhs.val += rhs.val, - (Self::Duration(lhs), Self::Duration(rhs)) => lhs.val += rhs.val, - (lhs, rhs) => { - panic!("Only same type metric could be applied, lhs:{lhs:?}, rhs:{rhs:?}") - } - } - } -} - -impl fmt::Debug for MetricValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}={:?}", self.name, self.val) - } -} - -impl fmt::Debug for Metric { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Metric::Boolean(v) => write!(f, "{}={:?}", v.name, v.val), - Metric::Number(v) => write!(f, "{}={:?}", v.name, v.val), - Metric::Duration(v) => write!(f, "{}={:?}", v.name, v.val), - } - } -} diff --git a/src/components/trace_metric_derive/Cargo.toml b/src/components/trace_metric_derive/Cargo.toml deleted file mode 100644 index 98b759199d..0000000000 --- a/src/components/trace_metric_derive/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "trace_metric_derive" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[lib] -proc-macro = true - -[dependencies] -proc-macro2 = "1.0" -quote = "1.0" -syn = { version = "1.0", features = ["full"] } diff --git a/src/components/trace_metric_derive/src/builder.rs b/src/components/trace_metric_derive/src/builder.rs deleted file mode 100644 index 1fd593c1ef..0000000000 --- a/src/components/trace_metric_derive/src/builder.rs +++ /dev/null @@ -1,252 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use proc_macro::TokenStream; -use proc_macro2::Span; -use quote::{quote, ToTokens, TokenStreamExt}; -use syn::{DeriveInput, Field, Generics, Ident}; - -const COLLECTOR_FIELD_TOKENS: &str = "(collector)"; -const NUMBER_FIELD_TOKENS: &str = "number"; -const DURATION_FIELD_TOKENS: &str = "duration"; -const BOOLEAN_FIELD_TOKENS: &str = "boolean"; - -#[derive(Debug, Clone)] -enum MetricAggregator { - Sum, -} - -impl ToTokens for MetricAggregator { - fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { - tokens.append(Ident::new(&format!("{self:?}"), Span::call_site())); - } -} - -#[derive(Debug)] -enum MetricType { - Number, - Duration, - Boolean, -} - -impl ToTokens for MetricType { - fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { - tokens.append(Ident::new( - &format!("{self:?}").to_lowercase(), - Span::call_site(), - )); - } -} - -struct MetricMetadata { - typ: MetricType, - aggregator: Option, -} - -impl MetricMetadata { - fn parse_aggregator(s: &str) -> Option { - match s.to_lowercase().as_str() { - "sum" => Some(MetricAggregator::Sum), - _ => None, - } - } - - fn parse_type(s: &str) -> Option { - if s == NUMBER_FIELD_TOKENS { - Some(MetricType::Number) - } else if s == DURATION_FIELD_TOKENS { - Some(MetricType::Duration) - } else if s == BOOLEAN_FIELD_TOKENS { - Some(MetricType::Boolean) - } else { - None - } - } - - fn try_from_tokens(tokens: &proc_macro2::TokenStream) -> Option { - for tree in tokens.clone().into_iter() { - if let proc_macro2::TokenTree::Group(group) = tree { - let trees = group.stream().into_iter().collect::>(); - match trees.len() { - // #[metric(number)] - 1 => { - return Self::parse_type(&trees[0].to_string()).map(|typ| Self { - typ, - aggregator: None, - }) - } - // #[metric(number, add)] - 3 => { - let typ = Self::parse_type(&trees[0].to_string())?; - let aggregator = Self::parse_aggregator(&trees[2].to_string())?; - return Some(Self { - typ, - aggregator: Some(aggregator), - }); - } - _ => return None, - } - } - } - - None - } -} - -struct MetricField { - metric_metadata: MetricMetadata, - field_name: Ident, -} - -impl MetricField { - fn try_from_field(field: Field) -> Option { - for attr in field.attrs.iter() { - if !attr.path.is_ident("metric") { - continue; - } - - let field_name = field.ident.expect("Metric field must have a name"); - let metric_metadata = - MetricMetadata::try_from_tokens(&attr.tokens).expect("Unknown metric type"); - return Some(Self { - metric_metadata, - field_name, - }); - } - - None - } -} - -struct CollectorField { - field_name: Ident, - optional: bool, -} - -impl CollectorField { - fn try_from_field(field: Field) -> Option { - let is_collector_field = field.attrs.iter().any(|attr| { - attr.path.is_ident("metric") - && attr.tokens.to_string().as_str() == COLLECTOR_FIELD_TOKENS - }); - - if !is_collector_field { - None - } else { - let ident = field.ident.expect("Collector field must be named"); - let type_tokens = field.ty.into_token_stream().to_string(); - Some(Self { - field_name: ident, - optional: type_tokens.starts_with("Option"), - }) - } - } -} - -pub struct Builder { - struct_name: Ident, - metric_fields: Vec, - collector_field: CollectorField, - generics: Generics, -} - -impl Builder { - pub fn parse_from_ast(ast: DeriveInput) -> Self { - let struct_name = ast.ident; - let (metric_fields, collector_field) = match ast.data { - syn::Data::Struct(syn::DataStruct { - fields: syn::Fields::Named(syn::FieldsNamed { named, .. }), - .. - }) => { - let mut metric_fields = Vec::new(); - let mut collector_field = None; - for field in named { - if let Some(collector) = CollectorField::try_from_field(field.clone()) { - collector_field = Some(collector); - } else if let Some(metric_field) = MetricField::try_from_field(field) { - metric_fields.push(metric_field); - } - } - ( - metric_fields, - collector_field.expect("TraceMetricWhenDrop must have a collector field"), - ) - } - _ => panic!("TraceMetricWhenDrop only supports struct with named fields"), - }; - - Self { - struct_name, - metric_fields, - collector_field, - generics: ast.generics, - } - } - - pub fn build(&self) -> TokenStream { - let mut collect_statements = Vec::with_capacity(self.metric_fields.len()); - for metric_field in self.metric_fields.iter() { - let field_name = &metric_field.field_name; - let metadata = &metric_field.metric_metadata; - let aggregator = &metadata.aggregator; - let metric_type = &metadata.typ; - let metric = if let Some(aggregator) = aggregator { - quote! { ::trace_metric::Metric::#metric_type(stringify!(#field_name).to_string(), - self.#field_name, - Some(::trace_metric::metric::MetricAggregator::#aggregator)) - } - } else { - quote! { ::trace_metric::Metric::#metric_type(stringify!(#field_name).to_string(), - self.#field_name, - None) - } - }; - - let statement = quote! { - collector.collect(#metric); - }; - collect_statements.push(statement); - } - - let where_clause = &self.generics.where_clause; - let generics = &self.generics; - let struct_name = &self.struct_name; - let collector_field_name = &self.collector_field.field_name; - let stream = if self.collector_field.optional { - quote! { - impl #generics ::core::ops::Drop for #struct_name #generics #where_clause { - fn drop(&mut self) { - if let Some(collector) = &self.#collector_field_name { - #(#collect_statements)* - } - } - } - } - } else { - quote! { - impl #generics ::core::ops::Drop for #struct_name #generics #where_clause { - fn drop(&mut self) { - let collector = &self.#collector_field_name; - #(#collect_statements)* - } - } - } - }; - - stream.into() - } -} diff --git a/src/components/trace_metric_derive/src/lib.rs b/src/components/trace_metric_derive/src/lib.rs deleted file mode 100644 index cef4937f18..0000000000 --- a/src/components/trace_metric_derive/src/lib.rs +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use proc_macro::TokenStream; -use syn::{parse_macro_input, DeriveInput}; - -mod builder; - -use builder::Builder; - -#[proc_macro_derive(TraceMetricWhenDrop, attributes(metric))] -pub fn derive(input: TokenStream) -> TokenStream { - let ast = parse_macro_input!(input as DeriveInput); - Builder::parse_from_ast(ast).build() -} diff --git a/src/components/trace_metric_derive_tests/Cargo.toml b/src/components/trace_metric_derive_tests/Cargo.toml deleted file mode 100644 index 02008e7201..0000000000 --- a/src/components/trace_metric_derive_tests/Cargo.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "trace_metric_derive_tests" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -trace_metric = { workspace = true } diff --git a/src/components/trace_metric_derive_tests/src/lib.rs b/src/components/trace_metric_derive_tests/src/lib.rs deleted file mode 100644 index 9c0778bbbf..0000000000 --- a/src/components/trace_metric_derive_tests/src/lib.rs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::time::Duration; - -use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; - -#[derive(Debug, Clone, TraceMetricWhenDrop)] -pub struct ExampleMetrics { - #[metric(number, sum)] - pub counter: usize, - #[metric(duration)] - pub elapsed: Duration, - #[metric(boolean)] - pub boolean: bool, - pub foo: String, - - #[metric(collector)] - pub collector: MetricsCollector, -} - -#[cfg(test)] -mod test { - use trace_metric::collector::FormatCollectorVisitor; - - use super::*; - - #[test] - fn basic() { - let collector = MetricsCollector::new("test".to_string()); - { - let _ = ExampleMetrics { - counter: 1, - elapsed: Duration::from_secs(1), - boolean: true, - foo: "foor".to_owned(), - collector: collector.clone(), - }; - let _ = ExampleMetrics { - counter: 10, - elapsed: Duration::from_secs(2), - boolean: false, - foo: "bar".to_owned(), - collector: collector.clone(), - }; - } - let mut formatter = FormatCollectorVisitor::default(); - collector.visit(&mut formatter); - let actual = formatter.into_string(); - - let expected = r#"test: - boolean=true - boolean=false - counter=11 - elapsed=1s - elapsed=2s -"#; - assert_eq!(expected, actual); - } -} diff --git a/src/components/tracing_util/Cargo.toml b/src/components/tracing_util/Cargo.toml deleted file mode 100644 index 4992e5424a..0000000000 --- a/src/components/tracing_util/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "tracing_util" -authors = ["Databend Authors ", "HoraeDB Authors"] -publish = false - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -console-subscriber = "0.1.9" -lazy_static = { workspace = true } -serde = { workspace = true } -tracing = "0.1.37" -tracing-appender = "0.2.2" -tracing-subscriber = "0.3.17" diff --git a/src/components/tracing_util/src/lib.rs b/src/components/tracing_util/src/lib.rs deleted file mode 100644 index 2fcad4b5a3..0000000000 --- a/src/components/tracing_util/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2020 Datafuse Labs. -// fork from:https://github.com/datafuselabs/databend/tree/master/common/tracing - -mod logging; - -pub use logging::{init_default_tracing, init_default_ut_tracing, init_tracing_with_file, Config}; -pub use tracing_appender; diff --git a/src/components/tracing_util/src/logging.rs b/src/components/tracing_util/src/logging.rs deleted file mode 100644 index 9fd477f951..0000000000 --- a/src/components/tracing_util/src/logging.rs +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2020 Datafuse Labs. - -use std::{ - fs::OpenOptions, - path::Path, - sync::{Arc, Mutex, Once}, -}; - -use fmt::format::FmtSpan; -use lazy_static::lazy_static; -use serde::{Deserialize, Serialize}; -use tracing::Subscriber; -use tracing_appender::{ - non_blocking::WorkerGuard, - rolling::{RollingFileAppender, Rotation}, -}; -use tracing_subscriber::{ - fmt, - fmt::{time::Uptime, Layer}, - prelude::*, - registry::Registry, - EnvFilter, -}; - -/// Write logs to stdout. -pub fn init_default_tracing() { - static START: Once = Once::new(); - - START.call_once(|| { - init_tracing_stdout(); - }); -} - -/// Init tracing for unittest. -/// Write logs to file `unittest`. -pub fn init_default_ut_tracing() { - static START: Once = Once::new(); - - START.call_once(|| { - let mut g = GLOBAL_UT_LOG_GUARD.as_ref().lock().unwrap(); - let (work_guard, sub) = init_file_subscriber("unittest", "_logs"); - tracing::subscriber::set_global_default(sub) - .expect("error setting global tracing subscriber"); - - tracing::info!("init default ut tracing"); - *g = Some(work_guard); - }); -} - -lazy_static! { - static ref GLOBAL_UT_LOG_GUARD: Arc>> = Arc::new(Mutex::new(None)); -} - -fn init_tracing_stdout() { - let fmt_layer = Layer::default() - .with_thread_ids(true) - .with_thread_names(false) - .with_ansi(false) - .with_span_events(fmt::format::FmtSpan::FULL); - - let subscriber = Registry::default() - .with(EnvFilter::from_default_env()) - .with(fmt_layer); - - tracing::subscriber::set_global_default(subscriber) - .expect("error setting global tracing subscriber"); -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(default)] -/// The configurations for tracing. -pub struct Config { - /// The prefix of tracing log files. - pub prefix: String, - /// The directory of tracing log files. - pub dir: String, - /// The level of tracing. - pub level: String, - /// Console config. - pub console: Option, -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct ConsoleConfig { - pub port: u16, -} - -impl Default for Config { - fn default() -> Self { - Self { - prefix: String::from("tracing"), - dir: String::from("/tmp/horaedb"), - level: String::from("info"), - console: None, - } - } -} - -/// Write logs to file and rotation. -pub fn init_tracing_with_file(config: &Config, node_addr: &str, rotation: Rotation) -> WorkerGuard { - let file_appender = RollingFileAppender::new(rotation, &config.dir, &config.prefix); - let (file_writer, file_guard) = tracing_appender::non_blocking(file_appender); - let f_layer = Layer::new() - .with_timer(Uptime::default()) - .with_writer(file_writer) - .with_thread_ids(true) - .with_thread_names(true) - .with_ansi(false) - .with_span_events(FmtSpan::ENTER | FmtSpan::CLOSE); - - let subscriber = Registry::default().with(f_layer); - match &config.console { - Some(console) => { - let console_addr = format!("{}:{}", node_addr, console.port); - let console_addr: std::net::SocketAddr = console_addr - .parse() - .unwrap_or_else(|_| panic!("invalid tokio console addr:{console_addr}")); - let directives = format!("tokio=trace,runtime=trace,{}", config.level); - - // It is part of initializing logger, so just print it to stdout. - println!("Tokio console server tries to listen on {console_addr}..."); - let subscriber = subscriber.with(EnvFilter::new(directives)).with( - console_subscriber::ConsoleLayer::builder() - .server_addr(console_addr) - .spawn(), - ); - tracing::subscriber::set_global_default(subscriber) - .expect("error setting global tracing subscriber"); - } - None => { - let subscriber = subscriber.with(EnvFilter::new(&config.level)); - tracing::subscriber::set_global_default(subscriber) - .expect("error setting global tracing subscriber"); - } - }; - - file_guard -} - -/// Create a file based tracing/logging subscriber. -/// A guard must be held during using the logging. -fn init_file_subscriber(app_name: &str, dir: &str) -> (WorkerGuard, impl Subscriber) { - let path_str = dir.to_string() + "/" + app_name; - let path: &Path = path_str.as_ref(); - - // open log file - - let mut open_options = OpenOptions::new(); - open_options.append(true).create(true); - - let mut open_res = open_options.open(path); - if open_res.is_err() { - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent).unwrap(); - open_res = open_options.open(path); - } - } - - let f = open_res.unwrap(); - - // build subscriber - - let (writer, writer_guard) = tracing_appender::non_blocking(f); - - let f_layer = Layer::new() - .with_timer(Uptime::default()) - .with_writer(writer) - .with_thread_ids(true) - .with_thread_names(false) - .with_ansi(false) - .with_span_events(FmtSpan::ENTER | FmtSpan::CLOSE); - - let subscriber = Registry::default() - .with(EnvFilter::from_default_env()) - .with(f_layer); - - (writer_guard, subscriber) -} diff --git a/src/df_engine_extensions/Cargo.toml b/src/df_engine_extensions/Cargo.toml deleted file mode 100644 index ec00b7a5d7..0000000000 --- a/src/df_engine_extensions/Cargo.toml +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "df_engine_extensions" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arrow = { workspace = true } -async-recursion = "1.0.4" -async-trait = { workspace = true } -catalog = { workspace = true, features = ["test"] } -common_types = { workspace = true, features = ["test"] } -datafusion = { workspace = true } -datafusion-proto = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -horaedbproto = { workspace = true } -lazy_static = { workspace = true } -prometheus = { workspace = true } -prost = { workspace = true } -runtime = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -trace_metric = { workspace = true } - -[dev-dependencies] -insta = { version = "1.31.0" } -tokio = { workspace = true } diff --git a/src/df_engine_extensions/src/codec.rs b/src/df_engine_extensions/src/codec.rs deleted file mode 100644 index b94a831c58..0000000000 --- a/src/df_engine_extensions/src/codec.rs +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{fmt, sync::Arc}; - -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - execution::FunctionRegistry, - physical_plan::ExecutionPlan, -}; -use datafusion_proto::physical_plan::PhysicalExtensionCodec; -use horaedbproto::remote_engine::{extension_node::TypedExtension, ExtensionNode}; -use prost::Message; - -use crate::dist_sql_query::codec::DistSqlQueryCodec; - -/// Codec for specific extension physical plan -pub trait TypedPhysicalExtensionCodec: fmt::Debug + Sync + Send + 'static { - fn try_decode( - &self, - typed_extension: &TypedExtension, - inputs: &[Arc], - registry: &dyn FunctionRegistry, - ) -> Option>>; - - fn try_encode(&self, node: Arc) -> Option>; -} - -/// HoraeDB datafusion `PhysicalExtensionCodec` -/// Each extension physical plan will define its `TypedPhysicalExtensionCodec`, -/// and register into here. -#[derive(Debug)] -pub struct PhysicalExtensionCodecImpl { - typed_codecs: Vec>, -} - -impl PhysicalExtensionCodecImpl { - pub fn new() -> Self { - let typed_codecs = vec![Box::new(DistSqlQueryCodec) as _]; - - Self { typed_codecs } - } -} - -impl PhysicalExtensionCodec for PhysicalExtensionCodecImpl { - fn try_decode( - &self, - buf: &[u8], - inputs: &[Arc], - registry: &dyn datafusion::execution::FunctionRegistry, - ) -> DfResult> { - let extension_node = ExtensionNode::decode(buf).map_err(|e| { - DataFusionError::Internal(format!("failed to decode extension physical plan, err{e}")) - })?; - - let typed_extension = extension_node - .typed_extension - .ok_or(DataFusionError::Internal( - "typed extension not found".to_string(), - ))?; - - for typed_codec in &self.typed_codecs { - if let Some(result) = typed_codec.try_decode(&typed_extension, inputs, registry) { - return result; - } - } - - Err(DataFusionError::Internal( - "unimplemented extension physical plan".to_string(), - )) - } - - fn try_encode(&self, node: Arc, buf: &mut Vec) -> DfResult<()> { - for typed_codec in &self.typed_codecs { - if let Some(result) = typed_codec.try_encode(node.clone()) { - let typed_extension = result?; - let extension_node = ExtensionNode { - typed_extension: Some(typed_extension), - }; - - return extension_node.encode(buf).map_err(|e| { - DataFusionError::Internal(format!( - "failed to encode extension physical plan, err{e}" - )) - }); - } - } - - Err(DataFusionError::Internal( - "unimplemented extension physical plan".to_string(), - )) - } -} - -impl Default for PhysicalExtensionCodecImpl { - fn default() -> Self { - Self::new() - } -} diff --git a/src/df_engine_extensions/src/dist_sql_query/codec.rs b/src/df_engine_extensions/src/dist_sql_query/codec.rs deleted file mode 100644 index 556af4f761..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/codec.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - execution::FunctionRegistry, - physical_plan::ExecutionPlan, -}; -use horaedbproto::remote_engine::{extension_node::TypedExtension, DistSqlQueryExtensionNode}; - -use crate::{ - codec::TypedPhysicalExtensionCodec, dist_sql_query::physical_plan::UnresolvedSubTableScan, -}; - -#[derive(Debug)] -pub struct DistSqlQueryCodec; - -impl TypedPhysicalExtensionCodec for DistSqlQueryCodec { - // It is possible to have more extension physical plans, so `if let` is - // necessary. - #[allow(irrefutable_let_patterns)] - fn try_decode( - &self, - typed_extension: &TypedExtension, - _inputs: &[Arc], - _registry: &dyn FunctionRegistry, - ) -> Option>> { - if let TypedExtension::DistSqlQuery(extension) = typed_extension { - match extension.unresolved_sub_scan.clone() { - Some(plan_pb) => { - Some(UnresolvedSubTableScan::try_from(plan_pb).map(|plan| Arc::new(plan) as _)) - } - None => Some(Err(DataFusionError::Internal( - "actual node not found in dist query extension plan".to_string(), - ))), - } - } else { - None - } - } - - fn try_encode(&self, node: Arc) -> Option> { - let plan_pb_res: DfResult = - if let Some(plan) = node.as_any().downcast_ref::() { - plan.clone().try_into().map(|pb| DistSqlQueryExtensionNode { - unresolved_sub_scan: Some(pb), - }) - } else { - return None; - }; - - Some(plan_pb_res.map(TypedExtension::DistSqlQuery)) - } -} - -#[cfg(test)] -mod test { - use datafusion::{physical_plan::displayable, prelude::SessionContext}; - use datafusion_proto::bytes::{ - physical_plan_from_bytes_with_extension_codec, physical_plan_to_bytes_with_extension_codec, - }; - - use crate::{codec::PhysicalExtensionCodecImpl, dist_sql_query::test_util::TestContext}; - - #[test] - fn test_sub_table_scan_codec() { - let test_ctx = TestContext::default(); - let sub_table_plan = test_ctx.build_basic_sub_table_plan(); - let extension_codec = PhysicalExtensionCodecImpl::default(); - let session_ctx = SessionContext::default(); - - // Encode and decode again - let encoded_plan = - physical_plan_to_bytes_with_extension_codec(sub_table_plan.clone(), &extension_codec) - .unwrap(); - let re_decoded_plan = physical_plan_from_bytes_with_extension_codec( - &encoded_plan, - &session_ctx, - &extension_codec, - ) - .unwrap(); - - // Compare. - let expected = displayable(sub_table_plan.as_ref()) - .indent(true) - .to_string(); - let re_decoded = displayable(re_decoded_plan.as_ref()) - .indent(true) - .to_string(); - - assert_eq!(expected, re_decoded); - } -} diff --git a/src/df_engine_extensions/src/dist_sql_query/mod.rs b/src/df_engine_extensions/src/dist_sql_query/mod.rs deleted file mode 100644 index f41d8792b6..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/mod.rs +++ /dev/null @@ -1,185 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - fmt, - sync::{Arc, Mutex}, -}; - -use async_trait::async_trait; -use common_types::projected_schema::ProjectedSchema; -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - execution::TaskContext, - physical_plan::{ExecutionPlan, SendableRecordBatchStream}, -}; -use futures::future::BoxFuture; -use generic_error::BoxError; -use runtime::Priority; -use table_engine::{predicate::PredicateRef, remote::model::TableIdentifier, table::TableRef}; - -pub mod codec; -pub mod physical_plan; -pub mod resolver; -#[cfg(test)] -pub mod test_util; - -/// Remote datafusion physical plan executor -pub trait RemotePhysicalPlanExecutor: fmt::Debug + Send + Sync + 'static { - fn execute( - &self, - task_context: RemoteTaskContext, - table: TableIdentifier, - plan: Arc, - ) -> DfResult>>; -} - -pub type RemotePhysicalPlanExecutorRef = Arc; - -/// Executable scan's builder -/// -/// It is not suitable to restrict the detailed implementation of executable -/// scan, so we define a builder here which return the general `ExecutionPlan`. -#[async_trait] -pub trait ExecutableScanBuilder: fmt::Debug + Send + Sync + 'static { - async fn build( - &self, - table: TableRef, - ctx: TableScanContext, - priority: Priority, - ) -> DfResult>; -} - -type ExecutableScanBuilderRef = Box; - -pub struct RemoteTaskContext { - pub task_ctx: Arc, - pub remote_metrics: Arc>>, - pub is_analyze: bool, -} - -impl RemoteTaskContext { - pub fn new( - task_ctx: Arc, - remote_metrics: Arc>>, - is_analyze: bool, - ) -> Self { - Self { - task_ctx, - remote_metrics, - is_analyze, - } - } -} - -#[derive(Clone)] -pub struct TableScanContext { - pub batch_size: usize, - - /// Suggested read parallelism, the actual returned stream should equal to - /// `read_parallelism`. - pub read_parallelism: usize, - - /// The schema and projection for read, the output data should match this - /// schema. - pub projected_schema: ProjectedSchema, - - /// Predicate of the query. - pub predicate: PredicateRef, -} - -impl TableScanContext { - pub fn new( - batch_size: usize, - read_parallelism: usize, - projected_schema: ProjectedSchema, - predicate: PredicateRef, - ) -> Self { - Self { - batch_size, - read_parallelism, - projected_schema, - predicate, - } - } -} - -impl TryFrom for horaedbproto::remote_engine::TableScanContext { - type Error = datafusion::error::DataFusionError; - - fn try_from(value: TableScanContext) -> DfResult { - let pb_projected_schema = value.projected_schema.into(); - - let pb_predicate = value - .predicate - .as_ref() - .try_into() - .box_err() - .map_err(DataFusionError::External)?; - - Ok(Self { - batch_size: value.batch_size as u64, - read_parallelism: value.read_parallelism as u64, - projected_schema: Some(pb_projected_schema), - predicate: Some(pb_predicate), - }) - } -} - -impl TryFrom for TableScanContext { - type Error = datafusion::error::DataFusionError; - - fn try_from(value: horaedbproto::remote_engine::TableScanContext) -> DfResult { - let projected_schema = value - .projected_schema - .ok_or(DataFusionError::Internal( - "projected schema not found".to_string(), - ))? - .try_into() - .map_err(|e| { - DataFusionError::Internal(format!("failed to decode projected schema, err:{}", e)) - })?; - - let predicate = value - .predicate - .ok_or(DataFusionError::Internal("predicate not found".to_string()))? - .try_into() - .map_err(|e| { - DataFusionError::Internal(format!("failed to decode predicate, err:{}", e)) - })?; - - Ok(Self { - batch_size: value.batch_size as usize, - read_parallelism: value.read_parallelism as usize, - projected_schema, - predicate: Arc::new(predicate), - }) - } -} - -impl fmt::Debug for TableScanContext { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let projection = self.projected_schema.projection(); - - f.debug_struct("TableScanContext") - .field("read_parallelism", &self.read_parallelism) - .field("batch_size", &self.batch_size) - .field("projection", &projection) - .field("predicate", &self.predicate) - .finish() - } -} diff --git a/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs b/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs deleted file mode 100644 index 55692f258b..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs +++ /dev/null @@ -1,769 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! dist sql query physical plans - -use std::{ - any::Any, - fmt, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll}, - time::{Duration, Instant}, -}; - -use arrow::{datatypes::SchemaRef as ArrowSchemaRef, record_batch::RecordBatch}; -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - execution::TaskContext, - physical_expr::PhysicalSortExpr, - physical_plan::{ - aggregates::{AggregateExec, AggregateMode}, - coalesce_batches::CoalesceBatchesExec, - coalesce_partitions::CoalescePartitionsExec, - displayable, - expressions::{ApproxPercentileCont, ApproxPercentileContWithWeight}, - filter::FilterExec, - metrics::{Count, MetricValue, MetricsSet}, - projection::ProjectionExec, - repartition::RepartitionExec, - DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning, RecordBatchStream, - SendableRecordBatchStream as DfSendableRecordBatchStream, Statistics, - }, -}; -use futures::{future::BoxFuture, FutureExt, Stream, StreamExt}; -use runtime::Priority; -use table_engine::{predicate::Predicate, remote::model::TableIdentifier, table::ReadRequest}; -use trace_metric::{collector::FormatCollectorVisitor, MetricsCollector, TraceMetricWhenDrop}; - -use crate::dist_sql_query::{RemotePhysicalPlanExecutor, RemoteTaskContext, TableScanContext}; - -/// Placeholder of partitioned table's scan plan -/// It is inexecutable actually and just for carrying the necessary information -/// of building remote execution plans for sub tables. -// TODO: can we skip this and generate `ResolvedPartitionedScan` directly? -#[derive(Debug)] -pub struct UnresolvedPartitionedScan { - pub sub_tables: Vec, - pub table_scan_ctx: TableScanContext, - pub metrics_collector: MetricsCollector, - pub priority: Priority, - pub predicates: Option>, -} - -impl UnresolvedPartitionedScan { - pub fn new( - table_name: &str, - sub_tables: Vec, - read_request: ReadRequest, - predicates: Option>, - ) -> Self { - let metrics_collector = MetricsCollector::new(table_name.to_string()); - let table_scan_ctx = TableScanContext { - batch_size: read_request.opts.batch_size, - read_parallelism: read_request.opts.read_parallelism, - projected_schema: read_request.projected_schema, - predicate: read_request.predicate, - }; - - Self { - sub_tables, - table_scan_ctx, - metrics_collector, - priority: read_request.priority, - predicates, - } - } -} - -impl ExecutionPlan for UnresolvedPartitionedScan { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> ArrowSchemaRef { - self.table_scan_ctx - .projected_schema - .to_projected_arrow_schema() - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(self.sub_tables.len()) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } - - fn children(&self) -> Vec> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> DfResult> { - Err(DataFusionError::Internal( - "UnresolvedPartitionedScan should not have children".to_string(), - )) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> DfResult { - Err(DataFusionError::Internal( - "UnresolvedPartitionedScan can not be executed".to_string(), - )) - } - - fn statistics( - &self, - ) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } -} - -impl DisplayAs for UnresolvedPartitionedScan { - fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "UnresolvedPartitionedScan: sub_tables={:?}, table_scan_ctx:{:?}, partition_count={}", - self.sub_tables, - self.table_scan_ctx, - self.output_partitioning().partition_count(), - ) - } -} - -/// The executable scan plan of the partitioned table -/// It includes remote execution plans of sub tables, and will send them to -/// related nodes to execute. -#[derive(Debug)] -pub(crate) struct ResolvedPartitionedScan { - pub remote_exec_ctx: Arc, - pub pushdown_continue: bool, - pub metrics_collector: MetricsCollector, - pub is_analyze: bool, -} - -impl ResolvedPartitionedScan { - pub fn new( - remote_executor: Arc, - sub_table_plan_ctxs: Vec, - metrics_collector: MetricsCollector, - is_analyze: bool, - ) -> Self { - let remote_exec_ctx = Arc::new(RemoteExecContext { - executor: remote_executor, - plan_ctxs: sub_table_plan_ctxs, - }); - - Self::new_with_details(remote_exec_ctx, true, metrics_collector, is_analyze) - } - - pub fn new_with_details( - remote_exec_ctx: Arc, - pushdown_continue: bool, - metrics_collector: MetricsCollector, - is_analyze: bool, - ) -> Self { - Self { - remote_exec_ctx, - pushdown_continue, - metrics_collector, - is_analyze, - } - } - - pub fn pushdown_finished(&self) -> Arc { - Arc::new(Self { - remote_exec_ctx: self.remote_exec_ctx.clone(), - pushdown_continue: false, - metrics_collector: self.metrics_collector.clone(), - is_analyze: self.is_analyze, - }) - } - - pub fn try_to_push_down_more( - &self, - cur_node: Arc, - ) -> DfResult> { - // Can not push more... - if !self.pushdown_continue { - return cur_node.with_new_children(vec![self.pushdown_finished()]); - } - - // Push down more, and when occur the terminated push down able node, we need to - // set `can_push_down_more` false. - let pushdown_status = Self::maybe_a_pushdown_node(cur_node.clone()); - let (node, can_push_down_more) = match pushdown_status { - PushDownEvent::Continue(node) => (node, true), - PushDownEvent::Terminated(node) => (node, false), - PushDownEvent::Unable => { - let partitioned_scan = self.pushdown_finished(); - return cur_node.with_new_children(vec![partitioned_scan]); - } - }; - - let new_plan_ctxs = self - .remote_exec_ctx - .plan_ctxs - .iter() - .map(|plan_ctx| { - node.clone() - .with_new_children(vec![plan_ctx.plan.clone()]) - .map(|extended_plan| SubTablePlanContext { - table: plan_ctx.table.clone(), - plan: extended_plan, - metrics_collector: plan_ctx.metrics_collector.clone(), - remote_metrics: plan_ctx.remote_metrics.clone(), - }) - }) - .collect::>>()?; - - let remote_exec_ctx = Arc::new(RemoteExecContext { - executor: self.remote_exec_ctx.executor.clone(), - plan_ctxs: new_plan_ctxs, - }); - let plan = ResolvedPartitionedScan::new_with_details( - remote_exec_ctx, - can_push_down_more, - self.metrics_collector.clone(), - self.is_analyze, - ); - - Ok(Arc::new(plan)) - } - - #[inline] - pub fn maybe_a_pushdown_node(plan: Arc) -> PushDownEvent { - PushDownEvent::new(plan) - } - - /// `ResolvedPartitionedScan` can be executable after satisfying followings: - /// + The pushdown searching process is finished. - #[inline] - fn is_executable(&self) -> bool { - !self.pushdown_continue - } -} - -#[derive(Debug)] -pub struct RemoteExecContext { - executor: Arc, - plan_ctxs: Vec, -} - -#[derive(Debug)] -pub(crate) struct SubTablePlanContext { - table: TableIdentifier, - plan: Arc, - metrics_collector: MetricsCollector, - remote_metrics: Arc>>, -} - -impl SubTablePlanContext { - pub fn new( - table: TableIdentifier, - plan: Arc, - metrics_collector: MetricsCollector, - ) -> Self { - Self { - table, - plan, - metrics_collector, - remote_metrics: Arc::new(Mutex::new(None)), - } - } -} - -impl ExecutionPlan for ResolvedPartitionedScan { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> ArrowSchemaRef { - self.remote_exec_ctx - .plan_ctxs - .first() - .expect("remote_exec_plans should not be empty") - .plan - .schema() - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(self.remote_exec_ctx.plan_ctxs.len()) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } - - fn children(&self) -> Vec> { - // If this is a analyze plan, we should not collect metrics of children - // which have been send to remote, So we just return empty children. - if self.is_analyze { - return vec![]; - } - - self.remote_exec_ctx - .plan_ctxs - .iter() - .map(|plan_ctx| plan_ctx.plan.clone()) - .collect() - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> DfResult> { - Err(DataFusionError::Internal( - "UnresolvedPartitionedScan can't be built directly from new children".to_string(), - )) - } - - fn execute( - &self, - partition: usize, - context: Arc, - ) -> DfResult { - if !self.is_executable() { - return Err(DataFusionError::Internal(format!( - "partitioned scan is still inexecutable, plan:{}", - displayable(self).indent(true) - ))); - } - - let SubTablePlanContext { - table: sub_table, - plan, - metrics_collector, - remote_metrics, - } = &self.remote_exec_ctx.plan_ctxs[partition]; - - let remote_task_ctx = - RemoteTaskContext::new(context, remote_metrics.clone(), self.is_analyze); - - // Send plan for remote execution. - let stream_future = self.remote_exec_ctx.executor.execute( - remote_task_ctx, - sub_table.clone(), - plan.clone(), - )?; - let record_stream = - PartitionedScanStream::new(stream_future, plan.schema(), metrics_collector.clone()); - - Ok(Box::pin(record_stream)) - } - - fn statistics( - &self, - ) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } - - fn metrics(&self) -> Option { - let mut metric_set = MetricsSet::new(); - - let mut format_visitor = FormatCollectorVisitor::default(); - self.metrics_collector.visit(&mut format_visitor); - let mut metrics_desc = format_visitor.into_string(); - - // collect metrics from remote - for sub_table_ctx in &self.remote_exec_ctx.plan_ctxs { - if let Some(remote_metrics) = sub_table_ctx.remote_metrics.lock().unwrap().take() { - metrics_desc.push_str(&format!( - "\n{}:\n{}", - sub_table_ctx.table.table, remote_metrics - )); - } - } - - metric_set.push(Arc::new(Metric::new( - MetricValue::Count { - name: format!("\n{metrics_desc}").into(), - count: Count::new(), - }, - None, - ))); - Some(metric_set) - } -} - -/// Partitioned scan stream -pub(crate) struct PartitionedScanStream { - /// Future to init the stream - stream_future: BoxFuture<'static, DfResult>, - - /// Stream to poll the records - stream_state: StreamState, - - /// Record schema - arrow_record_schema: ArrowSchemaRef, - - /// Last time left due to `Pending` - last_time_left: Option, - - /// Metrics collected for analyze - metrics: Metrics, -} - -impl PartitionedScanStream { - /// Create an empty RecordBatchStream - pub fn new( - stream_future: BoxFuture<'static, DfResult>, - arrow_record_schema: ArrowSchemaRef, - metrics_collector: MetricsCollector, - ) -> Self { - let metrics = Metrics { - metrics_collector, - ..Default::default() - }; - Self { - stream_future, - stream_state: StreamState::Initializing, - arrow_record_schema, - last_time_left: None, - metrics, - } - } -} - -impl RecordBatchStream for PartitionedScanStream { - fn schema(&self) -> ArrowSchemaRef { - self.arrow_record_schema.clone() - } -} - -impl Stream for PartitionedScanStream { - type Item = DfResult; - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.get_mut(); - let this_time_polled = Instant::now(); - let wait_cost = match this.last_time_left { - Some(last_left) => this_time_polled.saturating_duration_since(last_left), - None => Duration::default(), - }; - this.metrics.wait_duration += wait_cost; - this.metrics.total_duration += wait_cost; - - let poll_result = loop { - let stream_state = &mut this.stream_state; - match stream_state { - StreamState::Initializing => { - let poll_res = this.stream_future.poll_unpin(cx); - match poll_res { - Poll::Ready(Ok(stream)) => { - *stream_state = StreamState::Polling(stream); - } - Poll::Ready(Err(e)) => { - *stream_state = StreamState::InitializeFailed; - break Poll::Ready(Some(Err(e))); - } - Poll::Pending => break Poll::Pending, - } - } - StreamState::InitializeFailed => return Poll::Ready(None), - StreamState::Polling(stream) => break stream.poll_next_unpin(cx), - } - }; - - let this_time_left = Instant::now(); - let poll_cost = this_time_left.saturating_duration_since(this_time_polled); - this.metrics.poll_duration += poll_cost; - this.metrics.total_duration += poll_cost; - this.last_time_left = Some(this_time_left); - - poll_result - } -} - -/// Stream state -/// Before polling record batch from it, we must initializing the record batch -/// stream first. The process of state changing is like: -/// -/// ```plaintext -/// ┌────────────┐ -/// │Initializing│ -/// └──────┬─────┘ -/// _________▽_________ ┌──────────────────────────────┐ -/// ╱ ╲ │Polling(we just return the │ -/// ╱ Success to init the ╲___│inner stream's polling result)│ -/// ╲ record batch stream ╱yes└──────────────────────────────┘ -/// ╲___________________╱ -/// │no -/// ┌────────▽───────┐ -/// │InitializeFailed│ -/// └────────────────┘ -/// ``` -pub(crate) enum StreamState { - Initializing, - InitializeFailed, - Polling(DfSendableRecordBatchStream), -} - -impl DisplayAs for ResolvedPartitionedScan { - fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { - match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!( - f, - "ResolvedPartitionedScan: pushdown_continue:{}, partition_count:{}", - self.pushdown_continue, - self.remote_exec_ctx.plan_ctxs.len() - ) - } - } - } -} - -/// Placeholder of sub table's scan plan -/// It is inexecutable actually and just for carrying the necessary information -/// of building the executable scan plan. -#[derive(Debug, Clone)] -pub struct UnresolvedSubTableScan { - pub table: TableIdentifier, - pub table_scan_ctx: TableScanContext, -} - -impl ExecutionPlan for UnresolvedSubTableScan { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> ArrowSchemaRef { - self.table_scan_ctx - .projected_schema - .to_projected_arrow_schema() - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(self.table_scan_ctx.read_parallelism) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } - - fn children(&self) -> Vec> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> DfResult> { - Err(DataFusionError::Internal( - "UnresolvedSubTableScan should not have children".to_string(), - )) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> DfResult { - Err(DataFusionError::Internal( - "UnresolvedSubTableScan can not be executed".to_string(), - )) - } - - fn statistics( - &self, - ) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } -} - -impl DisplayAs for UnresolvedSubTableScan { - fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "UnresolvedSubTableScan: table:{:?}, table_scan_ctx:{:?}, partition_count:{}", - self.table, - self.table_scan_ctx, - self.output_partitioning().partition_count(), - ) - } -} - -impl TryFrom for UnresolvedSubTableScan { - type Error = DataFusionError; - - fn try_from( - value: horaedbproto::remote_engine::UnresolvedSubScan, - ) -> Result { - let table = value - .table - .ok_or(DataFusionError::Internal( - "table ident not found".to_string(), - ))? - .into(); - let table_scan_ctx = value - .table_scan_ctx - .ok_or(DataFusionError::Internal( - "table scan context not found".to_string(), - ))? - .try_into() - .map_err(|e| { - DataFusionError::Internal(format!("failed to decode table scan context, err:{e}")) - })?; - - Ok(Self { - table, - table_scan_ctx, - }) - } -} - -impl TryFrom for horaedbproto::remote_engine::UnresolvedSubScan { - type Error = DataFusionError; - - fn try_from(value: UnresolvedSubTableScan) -> Result { - let table = value.table.into(); - let table_scan_ctx = value.table_scan_ctx.try_into().map_err(|e| { - DataFusionError::Internal(format!("failed to encode read request, err:{e}")) - })?; - - Ok(Self { - table: Some(table), - table_scan_ctx: Some(table_scan_ctx), - }) - } -} - -/// Pushdown status, including: -/// + Unable, plan node which can't be pushed down to -/// `ResolvedPartitionedScan` node. -/// + Continue, node able to be pushed down to `ResolvedPartitionedScan`, and -/// the newly generated `ResolvedPartitionedScan` can continue to accept -/// more pushdown nodes after. -/// + Terminated, node able to be pushed down to `ResolvedPartitionedScan`, -/// but the newly generated `ResolvedPartitionedScan` can't accept more -/// pushdown nodes after. -pub enum PushDownEvent { - Unable, - Continue(Arc), - Terminated(Arc), -} - -impl PushDownEvent { - // Those aggregate functions can't be pushed down. - // https://github.com/apache/incubator-horaedb/issues/1405 - fn blacklist_expr(expr: &dyn Any) -> bool { - expr.is::() || expr.is::() - } - - pub fn new(plan: Arc) -> Self { - if let Some(aggr) = plan.as_any().downcast_ref::() { - for aggr_expr in aggr.aggr_expr() { - if Self::blacklist_expr(aggr_expr.as_any()) { - return Self::Unable; - } - } - - if *aggr.mode() == AggregateMode::Partial { - Self::Terminated(plan) - } else { - Self::Unable - } - } else if plan.as_any().downcast_ref::().is_some() - || plan.as_any().downcast_ref::().is_some() - || plan.as_any().downcast_ref::().is_some() - || plan - .as_any() - .downcast_ref::() - .is_some() - || plan - .as_any() - .downcast_ref::() - .is_some() - { - Self::Continue(plan) - } else { - Self::Unable - } - } -} -/// Metrics for [ChainIterator]. -#[derive(TraceMetricWhenDrop, Default)] -struct Metrics { - #[metric(duration)] - wait_duration: Duration, - #[metric(duration)] - poll_duration: Duration, - #[metric(duration)] - total_duration: Duration, - #[metric(collector)] - metrics_collector: MetricsCollector, -} - -#[cfg(test)] -mod test { - use datafusion::error::DataFusionError; - use futures::StreamExt; - - use crate::dist_sql_query::{ - physical_plan::PartitionedScanStream, - test_util::{MockPartitionedScanStreamBuilder, PartitionedScanStreamCase}, - }; - - #[tokio::test] - async fn test_stream_poll_success() { - let builder = MockPartitionedScanStreamBuilder::new(PartitionedScanStreamCase::Success); - let mut stream = builder.build(); - let result_opt = stream.next().await; - assert!(result_opt.is_none()); - } - - #[tokio::test] - async fn test_stream_init_failed() { - let builder = - MockPartitionedScanStreamBuilder::new(PartitionedScanStreamCase::InitializeFailed); - let stream = builder.build(); - test_stream_failed_state(stream, "failed to init").await - } - - #[tokio::test] - async fn test_stream_poll_failed() { - let builder = MockPartitionedScanStreamBuilder::new(PartitionedScanStreamCase::PollFailed); - let stream = builder.build(); - test_stream_failed_state(stream, "failed to poll").await - } - - async fn test_stream_failed_state(mut stream: PartitionedScanStream, failed_msg: &str) { - // Error happened, check error message. - let result_opt = stream.next().await; - assert!(result_opt.is_some()); - let result = result_opt.unwrap(); - assert!(result.is_err()); - let err = result.unwrap_err(); - match err { - DataFusionError::Internal(msg) => { - assert!(msg.contains(failed_msg)) - } - other => panic!("unexpected error:{other}"), - } - - // Should return `None` in next poll. - let result_opt = stream.next().await; - assert!(result_opt.is_none()); - } -} diff --git a/src/df_engine_extensions/src/dist_sql_query/resolver.rs b/src/df_engine_extensions/src/dist_sql_query/resolver.rs deleted file mode 100644 index 5fd3430a88..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/resolver.rs +++ /dev/null @@ -1,362 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use async_recursion::async_recursion; -use catalog::manager::ManagerRef as CatalogManagerRef; -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - physical_plan::{analyze::AnalyzeExec, ExecutionPlan}, -}; -use runtime::Priority; -use table_engine::{remote::model::TableIdentifier, table::TableRef}; - -use crate::{ - dist_sql_query::{ - physical_plan::{ - ResolvedPartitionedScan, SubTablePlanContext, UnresolvedPartitionedScan, - UnresolvedSubTableScan, - }, - ExecutableScanBuilderRef, RemotePhysicalPlanExecutorRef, - }, - metrics::PUSH_DOWN_PLAN_COUNTER, -}; - -/// Resolver which makes datafuison dist query related plan executable. -/// -/// The reason we define a `Resolver` rather than `physical optimization rule` -/// is: As I see, physical optimization rule is responsible for optimizing a bad -/// plan to good one, rather than making a inexecutable plan executable. -/// So we define `Resolver` to make it, it may be somthing similar to task -/// generator responsible for generating task for executor to run based on -/// physical plan. -pub struct Resolver { - remote_executor: RemotePhysicalPlanExecutorRef, - catalog_manager: CatalogManagerRef, - scan_builder: ExecutableScanBuilderRef, - priority: Priority, -} - -impl Resolver { - pub fn new( - remote_executor: RemotePhysicalPlanExecutorRef, - catalog_manager: CatalogManagerRef, - scan_builder: ExecutableScanBuilderRef, - priority: Priority, - ) -> Self { - Self { - remote_executor, - catalog_manager, - scan_builder, - priority, - } - } - - /// Resolve partitioned scan, including: - /// - Convert `UnresolvedPartitionedScan`(inexecutable) to - /// `ResolvedPartitionedScan`(executable). - /// - Push nodes(e.g. filter, projection, partial aggregation,...) to - /// `ResolvedPartitionedScan`. - /// - /// Example for the process: - /// - Initial plan: - /// - /// ```plaintext - /// Final Aggregation - /// Partial Aggregation - /// Filter - /// UnresolvedPartitionedScan - /// ``` - /// - /// - After converting partitioned scan from unresolved to resolved: - /// - /// ```plaintext - /// Final Aggregation - /// Partial Aggregation - /// Filter - /// ResolvedPartitionedScan - /// UnresolvedSubTableScan (send to remote node) - /// ``` - /// - /// - After pushing down nodes: - /// - /// ```plaintext - /// Final Aggregation - /// ResolvedPartitionedScan - /// Partial Aggregation (send to remote node) - /// Filter (send to remote node) - /// UnresolvedSubTableScan (send to remote node) - /// ``` - pub fn resolve_partitioned_scan( - &self, - plan: Arc, - ) -> DfResult> { - // Check if this plan is `AnalyzeExec`, if it is, we should collect metrics. - let is_analyze = plan.as_any().is::(); - - let resolved_plan = self.resolve_partitioned_scan_internal(plan, is_analyze)?; - PUSH_DOWN_PLAN_COUNTER - .with_label_values(&["remote_scan"]) - .inc(); - - if let Some(plan) = resolved_plan - .as_any() - .downcast_ref::() - { - Ok(plan.pushdown_finished()) - } else { - Ok(resolved_plan) - } - } - - pub fn resolve_partitioned_scan_internal( - &self, - plan: Arc, - is_analyze: bool, - ) -> DfResult> { - // Leave node, let's resolve it and return. - if let Some(unresolved) = plan.as_any().downcast_ref::() { - let metrics_collector = unresolved.metrics_collector.clone(); - let sub_tables = unresolved.sub_tables.clone(); - let remote_plans = sub_tables - .into_iter() - .enumerate() - .map(|(idx, table)| { - let plan = Arc::new(UnresolvedSubTableScan { - table: table.clone(), - table_scan_ctx: if let Some(ref predicates) = unresolved.predicates { - // Since all each partition has different predicate, so we shall build - // seperate ctx regarding each partition - // with different predicate - let mut ctx = unresolved.table_scan_ctx.clone(); - // overwrite old predicate (it's the predidcate before partiton - // calculation) with optimized predicate - ctx.predicate = Arc::new(predicates[idx].clone()); - ctx - } else { - unresolved.table_scan_ctx.clone() - }, - }); - let sub_metrics_collect = metrics_collector.span(table.table.clone()); - - SubTablePlanContext::new(table, plan, sub_metrics_collect) - }) - .collect::>(); - - return Ok(Arc::new(ResolvedPartitionedScan::new( - self.remote_executor.clone(), - remote_plans, - metrics_collector, - is_analyze, - ))); - } - - let children = plan.children().clone(); - // Occur some node isn't table scan but without children? It should return, too. - if children.is_empty() { - return Ok(plan); - } - - // Resolve children if exist. - let mut new_children = Vec::with_capacity(children.len()); - for child in children { - let child = self.resolve_partitioned_scan_internal(child, is_analyze)?; - - new_children.push(child); - } - - Self::maybe_push_down_to_remote_plans(new_children, plan) - } - - fn maybe_push_down_to_remote_plans( - mut new_children: Vec>, - current_node: Arc, - ) -> DfResult> { - // No children, just return. - if new_children.is_empty() { - return Ok(current_node); - } - - // When this node has multiple children, it can't be pushed down to remote. - if new_children.len() > 1 { - new_children.iter_mut().for_each(|child| { - if let Some(plan) = child.as_any().downcast_ref::() { - *child = plan.pushdown_finished(); - } - }); - return current_node.with_new_children(new_children); - } - - // Has ensured that this node has just child and it is just - // `ResolvedPartitionedScan`, try to push down it to remote plans in - // `ResolvedPartitionedScan`. - let child = new_children.first().unwrap(); - let partitioned_scan = - if let Some(plan) = child.as_any().downcast_ref::() { - plan - } else { - return current_node.with_new_children(new_children); - }; - - partitioned_scan.try_to_push_down_more(current_node.clone()) - } - - #[async_recursion] - pub async fn resolve_sub_scan( - &self, - plan: Arc, - ) -> DfResult> { - // Leave node, let's resolve it and return. - let build_scan_opt = - if let Some(unresolved) = plan.as_any().downcast_ref::() { - let table = self.find_table(&unresolved.table)?; - let table_scan_ctx = unresolved.table_scan_ctx.clone(); - - Some((table, table_scan_ctx)) - } else { - None - }; - - if let Some((table, table_scan_ctx)) = build_scan_opt { - return self - .scan_builder - .build(table, table_scan_ctx, self.priority) - .await; - } - - let children = plan.children().clone(); - // Occur some node isn't table scan but without children? It should return, too. - if children.is_empty() { - return Ok(plan); - } - - // Resolve children if exist. - let mut new_children = Vec::with_capacity(children.len()); - for child in children { - let child = self.resolve_sub_scan(child).await?; - - new_children.push(child); - } - - plan.with_new_children(new_children) - } - - fn find_table(&self, table_ident: &TableIdentifier) -> DfResult { - let catalog = self - .catalog_manager - .catalog_by_name(&table_ident.catalog) - .map_err(|e| DataFusionError::Internal(format!("failed to find catalog, err:{e}")))? - .ok_or(DataFusionError::Internal("catalog not found".to_string()))?; - - let schema = catalog - .schema_by_name(&table_ident.schema) - .map_err(|e| DataFusionError::Internal(format!("failed to find schema, err:{e}")))? - .ok_or(DataFusionError::Internal("schema not found".to_string()))?; - - schema - .table_by_name(&table_ident.table) - .map_err(|e| DataFusionError::Internal(format!("failed to find table, err:{e}")))? - .ok_or(DataFusionError::Internal("table not found".to_string())) - } -} - -#[cfg(test)] -mod test { - - use datafusion::physical_plan::displayable; - - use crate::dist_sql_query::test_util::TestContext; - - #[test] - fn test_basic_partitioned_scan() { - let ctx = TestContext::new(); - let plan = ctx.build_basic_partitioned_table_plan(); - let resolver = ctx.resolver(); - let new_plan = displayable(resolver.resolve_partitioned_scan(plan).unwrap().as_ref()) - .indent(true) - .to_string(); - insta::assert_snapshot!(new_plan); - } - - #[tokio::test] - async fn test_basic_sub_scan() { - let ctx = TestContext::new(); - let plan = ctx.build_basic_sub_table_plan(); - let resolver = ctx.resolver(); - let new_plan = displayable(resolver.resolve_sub_scan(plan).await.unwrap().as_ref()) - .indent(true) - .to_string(); - insta::assert_snapshot!(new_plan); - } - - #[tokio::test] - async fn test_unprocessed_plan() { - let ctx = TestContext::new(); - let plan = ctx.build_unprocessed_plan(); - let resolver = ctx.resolver(); - - let original_plan_display = displayable(plan.as_ref()).indent(true).to_string(); - - // It should not be processed by `resolve_partitioned_scan`. - let new_plan = resolver.resolve_partitioned_scan(plan.clone()).unwrap(); - - let new_plan_display = displayable(new_plan.as_ref()).indent(true).to_string(); - - assert_eq!(original_plan_display, new_plan_display); - - // It should not be processed by `resolve_sub_scan_internal`. - let new_plan = resolver.resolve_sub_scan(plan.clone()).await.unwrap(); - - let new_plan_display = displayable(new_plan.as_ref()).indent(true).to_string(); - - assert_eq!(original_plan_display, new_plan_display); - } - - #[test] - fn test_aggr_push_down() { - let ctx = TestContext::new(); - let plan = ctx.build_aggr_push_down_plan(); - let resolver = ctx.resolver(); - let new_plan = displayable(resolver.resolve_partitioned_scan(plan).unwrap().as_ref()) - .indent(true) - .to_string(); - insta::assert_snapshot!(new_plan); - } - - #[test] - fn test_compounded_aggr_push_down() { - let ctx = TestContext::new(); - let plan = ctx.build_compounded_aggr_push_down_plan(); - let resolver = ctx.resolver(); - let new_plan = displayable(resolver.resolve_partitioned_scan(plan).unwrap().as_ref()) - .indent(true) - .to_string(); - insta::assert_snapshot!(new_plan); - } - - #[test] - fn test_node_with_multiple_partitioned_scan_children() { - let ctx = TestContext::new(); - let plan = ctx.build_union_plan(); - let resolver = ctx.resolver(); - let new_plan = displayable(resolver.resolve_partitioned_scan(plan).unwrap().as_ref()) - .indent(true) - .to_string(); - insta::assert_snapshot!(new_plan); - } -} diff --git a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__aggr_push_down.snap b/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__aggr_push_down.snap deleted file mode 100644 index 1563d71ada..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__aggr_push_down.snap +++ /dev/null @@ -1,15 +0,0 @@ ---- -source: df_engine_extensions/src/dist_sql_query/resolver.rs -assertion_line: 311 -expression: new_plan ---- -AggregateExec: mode=Final, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - CoalescePartitionsExec - ResolvedPartitionedScan: pushdown_continue:false, partition_count:3 - AggregateExec: mode=Partial, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_1" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - AggregateExec: mode=Partial, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_2" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - AggregateExec: mode=Partial, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_3" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - diff --git a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_partitioned_scan.snap b/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_partitioned_scan.snap deleted file mode 100644 index c0a1264b32..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_partitioned_scan.snap +++ /dev/null @@ -1,16 +0,0 @@ ---- -source: df_engine_extensions/src/dist_sql_query/resolver.rs -assertion_line: 266 -expression: new_plan ---- -ResolvedPartitionedScan: pushdown_continue:false, partition_count:3 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_1" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_2" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_3" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - diff --git a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_sub_scan.snap b/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_sub_scan.snap deleted file mode 100644 index df366b4326..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__basic_sub_scan.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: df_engine_extensions/src/dist_sql_query/resolver.rs -assertion_line: 277 -expression: new_plan ---- -ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - MockScan - diff --git a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__compounded_aggr_push_down.snap b/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__compounded_aggr_push_down.snap deleted file mode 100644 index 00400aa8ac..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__compounded_aggr_push_down.snap +++ /dev/null @@ -1,21 +0,0 @@ ---- -source: df_engine_extensions/src/dist_sql_query/resolver.rs -assertion_line: 322 -expression: new_plan ---- -AggregateExec: mode=Final, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - CoalescePartitionsExec - ResolvedPartitionedScan: pushdown_continue:false, partition_count:3 - AggregateExec: mode=Partial, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_1" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - AggregateExec: mode=Partial, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_2" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - AggregateExec: mode=Partial, gby=[tag1@1 as tag1, tag2@2 as tag2], aggr=[COUNT(value), COUNT(field2)] - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_3" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - diff --git a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__node_with_multiple_partitioned_scan_children.snap b/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__node_with_multiple_partitioned_scan_children.snap deleted file mode 100644 index 279ee1d7e7..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/snapshots/df_engine_extensions__dist_sql_query__resolver__test__node_with_multiple_partitioned_scan_children.snap +++ /dev/null @@ -1,27 +0,0 @@ ---- -source: df_engine_extensions/src/dist_sql_query/resolver.rs -assertion_line: 333 -expression: new_plan ---- -UnionExec - ResolvedPartitionedScan: pushdown_continue:false, partition_count:3 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_1" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_2" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_3" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ResolvedPartitionedScan: pushdown_continue:false, partition_count:3 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_new_1" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_new_2" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - ProjectionExec: expr=[time@0 as time, tag1@1 as tag1, tag2@2 as tag2, value@3 as value, field2@4 as field2] - FilterExec: time@0 < 1691974518000 AND tag1@1 = test_tag - UnresolvedSubTableScan: table:TableIdentifier { catalog: "test_catalog", schema: "test_schema", table: "__test_new_3" }, table_scan_ctx:TableScanContext { read_parallelism: 8, batch_size: 10000, projection: Some([1, 2, 3, 4, 5]), predicate: Predicate { exprs:[time < TimestampMillisecond(1691974518000, None) AND tag1 = Utf8("test_tag")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(1691974518000) } } }, partition_count:8 - diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs deleted file mode 100644 index 873c7a2214..0000000000 --- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs +++ /dev/null @@ -1,619 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - pin::Pin, - sync::Arc, - task::{Context, Poll}, -}; - -use arrow::{ - datatypes::{DataType, Schema, SchemaRef}, - record_batch::RecordBatch, -}; -use async_trait::async_trait; -use catalog::{manager::ManagerRef, test_util::MockCatalogManagerBuilder}; -use common_types::{ - projected_schema::ProjectedSchema, request_id::RequestId, tests::build_schema_for_cpu, -}; -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - execution::FunctionRegistry, - logical_expr::{expr_fn, Literal, Operator}, - physical_plan::{ - aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}, - coalesce_partitions::CoalescePartitionsExec, - expressions::{binary, col, lit, Count}, - filter::FilterExec, - projection::ProjectionExec, - union::UnionExec, - AggregateExpr, DisplayAs, EmptyRecordBatchStream, ExecutionPlan, PhysicalExpr, - RecordBatchStream, SendableRecordBatchStream, - }, - scalar::ScalarValue, -}; -use futures::{future::BoxFuture, Stream}; -use runtime::Priority; -use table_engine::{ - memory::MemoryTable, - predicate::PredicateBuilder, - remote::model::TableIdentifier, - table::{ReadOptions, ReadRequest, TableId, TableRef}, - ANALYTIC_ENGINE_TYPE, -}; -use trace_metric::MetricsCollector; - -use crate::dist_sql_query::{ - physical_plan::{PartitionedScanStream, UnresolvedPartitionedScan, UnresolvedSubTableScan}, - resolver::Resolver, - ExecutableScanBuilder, RemotePhysicalPlanExecutor, RemoteTaskContext, TableScanContext, -}; - -// Test context -pub struct TestContext { - request: ReadRequest, - sub_table_groups: Vec>, - physical_filter: Arc, - physical_projection: Vec<(Arc, String)>, - group_by: PhysicalGroupBy, - aggr_exprs: Vec>, - catalog_manager: ManagerRef, -} - -impl Default for TestContext { - fn default() -> Self { - Self::new() - } -} - -impl TestContext { - pub fn new() -> Self { - let test_schema = build_schema_for_cpu(); - let sub_tables_0 = vec![ - "__test_1".to_string(), - "__test_2".to_string(), - "__test_3".to_string(), - ] - .into_iter() - .map(|table| TableIdentifier { - catalog: "test_catalog".to_string(), - schema: "test_schema".to_string(), - table, - }) - .collect::>(); - - let sub_tables_1 = vec![ - "__test_new_1".to_string(), - "__test_new_2".to_string(), - "__test_new_3".to_string(), - ] - .into_iter() - .map(|table| TableIdentifier { - catalog: "test_catalog".to_string(), - schema: "test_schema".to_string(), - table, - }) - .collect::>(); - - let sub_table_groups = vec![sub_tables_0, sub_tables_1]; - - // Logical exprs. - // Projection: [time, tag1, tag2, value, field2] - let projection = vec![1_usize, 2, 3, 4, 5]; - let projected_schema = ProjectedSchema::new(test_schema.clone(), Some(projection)).unwrap(); - // Filter: time < 1691974518000 and tag1 == 'test_tag' - let logical_filters = vec![(expr_fn::col("time").lt(ScalarValue::TimestampMillisecond( - Some(1691974518000), - None, - ) - .lit())) - .and(expr_fn::col("tag1").eq("test_tag".lit()))]; - - // Physical exprs. - let arrow_projected_schema = projected_schema.to_projected_arrow_schema(); - // Projection - let physical_projection = vec![ - ( - col("time", &arrow_projected_schema).unwrap(), - "time".to_string(), - ), - ( - col("tag1", &arrow_projected_schema).unwrap(), - "tag1".to_string(), - ), - ( - col("tag2", &arrow_projected_schema).unwrap(), - "tag2".to_string(), - ), - ( - col("value", &arrow_projected_schema).unwrap(), - "value".to_string(), - ), - ( - col("field2", &arrow_projected_schema).unwrap(), - "field2".to_string(), - ), - ]; - - // Filter - let physical_filter1: Arc = binary( - col("time", &arrow_projected_schema).unwrap(), - Operator::Lt, - lit(ScalarValue::TimestampMillisecond(Some(1691974518000), None)), - &arrow_projected_schema, - ) - .unwrap(); - let physical_filter2: Arc = binary( - col("tag1", &arrow_projected_schema).unwrap(), - Operator::Eq, - lit("test_tag"), - &arrow_projected_schema, - ) - .unwrap(); - let physical_filter: Arc = binary( - physical_filter1, - Operator::And, - physical_filter2, - &arrow_projected_schema, - ) - .unwrap(); - - // Aggr and group by - let group_by = PhysicalGroupBy::new_single(vec![ - ( - col("tag1", &arrow_projected_schema).unwrap(), - "tag1".to_string(), - ), - ( - col("tag2", &arrow_projected_schema).unwrap(), - "tag2".to_string(), - ), - ]); - - let aggr_exprs: Vec> = vec![ - Arc::new(Count::new( - col("value", &arrow_projected_schema).unwrap(), - "COUNT(value)".to_string(), - DataType::Int64, - )), - Arc::new(Count::new( - col("field2", &arrow_projected_schema).unwrap(), - "COUNT(field2)".to_string(), - DataType::Int64, - )), - ]; - - // Build the physical plan. - let predicate = PredicateBuilder::default() - .add_pushdown_exprs(&logical_filters) - .extract_time_range(&test_schema, &logical_filters) - .build(); - let read_request = ReadRequest { - request_id: "42".into(), - opts: ReadOptions::default(), - projected_schema, - predicate, - metrics_collector: MetricsCollector::default(), - priority: Default::default(), - }; - - // Build the test catalog - let table = Arc::new(MemoryTable::new( - "__test_1".to_string(), - TableId::from(42), - build_schema_for_cpu(), - ANALYTIC_ENGINE_TYPE.to_string(), - )); - - let catalog_manager_builder = MockCatalogManagerBuilder::new( - "test_catalog".to_string(), - "test_schema".to_string(), - vec![table], - ); - let catalog_manager = catalog_manager_builder.build(); - - Self { - request: read_request, - sub_table_groups, - physical_filter, - physical_projection, - group_by, - aggr_exprs, - catalog_manager, - } - } - - // Return resolver - pub fn resolver(&self) -> Resolver { - Resolver::new( - Arc::new(MockRemotePhysicalPlanExecutor), - self.catalog_manager.clone(), - Box::new(MockScanBuilder), - Priority::High, - ) - } - - // Return test catalog manager - pub fn catalog_manager(&self) -> ManagerRef { - self.catalog_manager.clone() - } - - pub fn build_aggr_plan_with_input( - &self, - input: Arc, - ) -> Arc { - let input_schema = input.schema(); - let partial_aggregate = Arc::new( - AggregateExec::try_new( - AggregateMode::Partial, - self.group_by.clone(), - self.aggr_exprs.clone(), - vec![None], - vec![None], - input, - input_schema.clone(), - ) - .unwrap(), - ); - - // Aggr final - let groups = partial_aggregate.group_expr().expr().to_vec(); - - let merge = Arc::new(CoalescePartitionsExec::new(partial_aggregate)); - - let final_group: Vec<(Arc, String)> = groups - .iter() - .map(|(_expr, name)| Ok((col(name, &input_schema)?, name.clone()))) - .collect::>() - .unwrap(); - - let final_group_by = PhysicalGroupBy::new_single(final_group); - - Arc::new( - AggregateExec::try_new( - AggregateMode::Final, - final_group_by, - self.aggr_exprs.clone(), - vec![None], - vec![None], - merge, - input_schema, - ) - .unwrap(), - ) - } - - // Basic plan includes: - // Projection - // Filter - // Scan - pub fn build_basic_partitioned_table_plan(&self) -> Arc { - self.build_basic_partitioned_table_plan_with_sub_tables(self.sub_table_groups[0].clone()) - } - - pub fn build_basic_partitioned_table_plan_with_sub_tables( - &self, - sub_tables: Vec, - ) -> Arc { - let unresolved_scan = Arc::new(UnresolvedPartitionedScan::new( - "test", - sub_tables, - self.request.clone(), - None, - )); - - let filter: Arc = - Arc::new(FilterExec::try_new(self.physical_filter.clone(), unresolved_scan).unwrap()); - - Arc::new(ProjectionExec::try_new(self.physical_projection.clone(), filter).unwrap()) - } - - // Basic plan includes: - // Projection - // Filter - // Scan - pub fn build_basic_sub_table_plan(&self) -> Arc { - let table_scan_ctx = TableScanContext { - batch_size: self.request.opts.batch_size, - read_parallelism: self.request.opts.read_parallelism, - projected_schema: self.request.projected_schema.clone(), - predicate: self.request.predicate.clone(), - }; - - let unresolved_scan = Arc::new(UnresolvedSubTableScan { - table: self.sub_table_groups[0][0].clone(), - table_scan_ctx, - }); - - let filter: Arc = - Arc::new(FilterExec::try_new(self.physical_filter.clone(), unresolved_scan).unwrap()); - - Arc::new(ProjectionExec::try_new(self.physical_projection.clone(), filter).unwrap()) - } - - // Plan that should not be processed by resolver. - pub fn build_unprocessed_plan(&self) -> Arc { - let mock_scan = Arc::new(MockScan { - request: self.request.clone(), - }); - - Arc::new(ProjectionExec::try_new(self.physical_projection.clone(), mock_scan).unwrap()) - } - - // Aggregate push down plan includes: - // Aggr final - // Coalesce partition - // Aggr partial - // Scan - pub fn build_aggr_push_down_plan(&self) -> Arc { - // Scan - let unresolved_scan = Arc::new(UnresolvedPartitionedScan::new( - "test", - self.sub_table_groups[0].clone(), - self.request.clone(), - None, - )); - - self.build_aggr_plan_with_input(unresolved_scan) - } - - // Compunded aggregate push down plan includes: - // Aggr final - // Coalesce partition - // Aggr partial - // Projection - // Filter - // Scan - pub fn build_compounded_aggr_push_down_plan(&self) -> Arc { - let basic_plan = self.build_basic_partitioned_table_plan(); - self.build_aggr_plan_with_input(basic_plan) - } - - // Union plan includes: - // Union - // Scan - // Scan - pub fn build_union_plan(&self) -> Arc { - // UnionExec - let partitioned_scan_0 = self - .build_basic_partitioned_table_plan_with_sub_tables(self.sub_table_groups[0].clone()); - let partitioned_scan_1 = self - .build_basic_partitioned_table_plan_with_sub_tables(self.sub_table_groups[1].clone()); - let union = UnionExec::new(vec![partitioned_scan_0, partitioned_scan_1]); - - Arc::new(union) - } -} - -// Mock function registry -struct MockFunctionRegistry; - -impl FunctionRegistry for MockFunctionRegistry { - fn udfs(&self) -> std::collections::HashSet { - unimplemented!() - } - - fn udf(&self, _name: &str) -> DfResult> { - unimplemented!() - } - - fn udaf(&self, _name: &str) -> DfResult> { - unimplemented!() - } - - fn udwf(&self, _name: &str) -> DfResult> { - unimplemented!() - } -} - -// Mock scan and its builder -#[derive(Debug)] -struct MockScanBuilder; - -#[async_trait] -impl ExecutableScanBuilder for MockScanBuilder { - async fn build( - &self, - _table: TableRef, - ctx: TableScanContext, - priority: Priority, - ) -> datafusion::error::Result> { - let request = ReadRequest { - request_id: RequestId::from("test"), - opts: ReadOptions { - batch_size: ctx.batch_size, - read_parallelism: ctx.read_parallelism, - deadline: None, - }, - projected_schema: ctx.projected_schema.clone(), - predicate: ctx.predicate.clone(), - metrics_collector: MetricsCollector::default(), - priority, - }; - - Ok(Arc::new(MockScan { request })) - } -} - -#[derive(Debug)] -struct MockScan { - request: ReadRequest, -} - -impl ExecutionPlan for MockScan { - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn schema(&self) -> arrow::datatypes::SchemaRef { - self.request.projected_schema.to_projected_arrow_schema() - } - - fn output_partitioning(&self) -> datafusion::physical_plan::Partitioning { - datafusion::physical_plan::Partitioning::UnknownPartitioning( - self.request.opts.read_parallelism, - ) - } - - fn output_ordering(&self) -> Option<&[datafusion::physical_expr::PhysicalSortExpr]> { - None - } - - fn children(&self) -> Vec> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> datafusion::error::Result> { - unimplemented!() - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> datafusion::error::Result { - unimplemented!() - } - - fn statistics(&self) -> DfResult { - Ok(datafusion::physical_plan::Statistics::new_unknown( - &self.schema(), - )) - } -} - -impl DisplayAs for MockScan { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "MockScan") - } -} - -// Mock remote executor -#[derive(Debug, Clone)] -struct MockRemotePhysicalPlanExecutor; - -impl RemotePhysicalPlanExecutor for MockRemotePhysicalPlanExecutor { - fn execute( - &self, - _task_context: RemoteTaskContext, - _table: TableIdentifier, - _plan: Arc, - ) -> DfResult>> { - unimplemented!() - } -} - -/// Used in [PartitionedScanStream]'s testing -pub struct MockPartitionedScanStreamBuilder { - schema: SchemaRef, - case: PartitionedScanStreamCase, -} - -#[derive(Clone, Copy)] -pub enum PartitionedScanStreamCase { - InitializeFailed, - PollFailed, - Success, -} - -impl MockPartitionedScanStreamBuilder { - pub(crate) fn new(case: PartitionedScanStreamCase) -> Self { - let schema = Arc::new(Schema::empty()); - Self { schema, case } - } - - pub(crate) fn build(&self) -> PartitionedScanStream { - let stream_future: BoxFuture<'static, DfResult> = match self.case - { - PartitionedScanStreamCase::InitializeFailed => { - Box::pin( - async move { Err(DataFusionError::Internal("failed to init".to_string())) }, - ) - } - PartitionedScanStreamCase::PollFailed => { - let error_stream = self.build_error_record_stream(); - Box::pin(async move { Ok(error_stream) }) - } - PartitionedScanStreamCase::Success => { - let success_stream = self.build_success_record_stream(); - Box::pin(async move { Ok(success_stream) }) - } - }; - - PartitionedScanStream::new( - stream_future, - self.schema.clone(), - MetricsCollector::default(), - ) - } - - #[inline] - fn build_error_record_stream(&self) -> SendableRecordBatchStream { - Box::pin(ErrorRecordBatchStream::new(self.schema.clone())) - } - - #[inline] - fn build_success_record_stream(&self) -> SendableRecordBatchStream { - Box::pin(EmptyRecordBatchStream::new(self.schema.clone())) - } -} - -/// ErrorRecordBatchStream which will produce error results -pub struct ErrorRecordBatchStream { - /// Schema wrapped by Arc - schema: SchemaRef, - - /// Mark the stream is terminated. - done: bool, -} - -impl ErrorRecordBatchStream { - pub fn new(schema: SchemaRef) -> Self { - Self { - schema, - done: false, - } - } -} - -impl RecordBatchStream for ErrorRecordBatchStream { - fn schema(&self) -> SchemaRef { - self.schema.clone() - } -} - -impl Stream for ErrorRecordBatchStream { - type Item = DfResult; - - fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { - if self.done { - return Poll::Ready(None); - } - - self.get_mut().done = true; - Poll::Ready(Some(Err(DataFusionError::Internal( - "failed to poll".to_string(), - )))) - } -} diff --git a/src/df_engine_extensions/src/lib.rs b/src/df_engine_extensions/src/lib.rs deleted file mode 100644 index 902ac75218..0000000000 --- a/src/df_engine_extensions/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod codec; -pub mod dist_sql_query; -pub mod metrics; diff --git a/src/df_engine_extensions/src/metrics.rs b/src/df_engine_extensions/src/metrics.rs deleted file mode 100644 index 97237544e5..0000000000 --- a/src/df_engine_extensions/src/metrics.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use lazy_static::lazy_static; -use prometheus::{register_int_counter_vec, IntCounterVec}; - -lazy_static! { - pub static ref PUSH_DOWN_PLAN_COUNTER: IntCounterVec = register_int_counter_vec!( - "push_down_plan", - "partitioned table push down type", - &["type"] - ) - .unwrap(); -} diff --git a/src/df_operator/Cargo.toml b/src/df_operator/Cargo.toml deleted file mode 100644 index b2df35e118..0000000000 --- a/src/df_operator/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "df_operator" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arrow = { workspace = true } -base64 = { workspace = true } -bincode = "1.3" -chrono = { workspace = true } -common_types = { workspace = true } -datafusion = { workspace = true } -generic_error = { workspace = true } -hyperloglog = { workspace = true, features = ["with_serde"] } -macros = { workspace = true } -smallvec = { workspace = true } -snafu = { workspace = true } diff --git a/src/df_operator/src/aggregate.rs b/src/df_operator/src/aggregate.rs deleted file mode 100644 index 592c2e3561..0000000000 --- a/src/df_operator/src/aggregate.rs +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Aggregate functions. - -use std::{fmt, ops::Deref}; - -use arrow::array::ArrayRef as DfArrayRef; -use common_types::column_block::ColumnBlock; -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - physical_plan::Accumulator as DfAccumulator, - scalar::ScalarValue as DfScalarValue, -}; -use generic_error::GenericError; -use macros::define_result; -use snafu::Snafu; - -use crate::functions::ScalarValue; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to get state, err:{}", source))] - GetState { source: GenericError }, - - #[snafu(display("Failed to merge state, err:{}", source))] - MergeState { source: GenericError }, -} - -define_result!(Error); - -// TODO: Use `Datum` rather than `ScalarValue`. -pub struct State(Vec); - -impl State { - /// Convert to a set of ScalarValues - fn into_state(self) -> Vec { - self.0 - } -} - -impl From for State { - fn from(value: ScalarValue) -> Self { - Self(vec![value.into_df_scalar_value()]) - } -} - -pub struct Input<'a>(&'a [ColumnBlock]); - -impl<'a> Input<'a> { - pub fn num_columns(&self) -> usize { - self.0.len() - } - - pub fn column(&self, col_idx: usize) -> Option<&ColumnBlock> { - self.0.get(col_idx) - } - - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } -} - -pub struct StateRef<'a>(Input<'a>); - -impl<'a> Deref for StateRef<'a> { - type Target = Input<'a>; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -/// An accumulator represents a stateful object that lives throughout the -/// evaluation of multiple rows and generically accumulates values. -/// -/// An accumulator knows how to: -/// * update its state from inputs via `update` -/// * convert its internal state to column blocks -/// * update its state from multiple accumulators' states via `merge` -/// * compute the final value from its internal state via `evaluate` -pub trait Accumulator: Send + Sync + fmt::Debug { - /// Returns the state of the accumulator at the end of the accumulation. - // in the case of an average on which we track `sum` and `n`, this function - // should return a vector of two values, sum and n. - // TODO: Use `Datum` rather than `ScalarValue`. - fn state(&self) -> Result; - - /// updates the accumulator's state from column blocks. - fn update(&mut self, values: Input) -> Result<()>; - - /// updates the accumulator's state from column blocks. - fn merge(&mut self, states: StateRef) -> Result<()>; - - /// returns its value based on its current state. - // TODO: Use `Datum` rather than `ScalarValue`. - fn evaluate(&self) -> Result; -} - -#[derive(Debug)] -pub struct ToDfAccumulator { - accumulator: T, -} - -impl ToDfAccumulator { - pub fn new(accumulator: T) -> Self { - Self { accumulator } - } -} - -impl DfAccumulator for ToDfAccumulator { - fn state(&self) -> DfResult> { - let state = self.accumulator.state().map_err(|e| { - DataFusionError::Execution(format!("Accumulator failed to get state, err:{e}")) - })?; - Ok(state.into_state()) - } - - fn update_batch(&mut self, values: &[DfArrayRef]) -> DfResult<()> { - if values.is_empty() { - return Ok(()); - }; - - let column_blocks = values - .iter() - .map(|array| { - ColumnBlock::try_cast_arrow_array_ref(array).map_err(|e| { - DataFusionError::Execution(format!( - "Accumulator failed to cast arrow array to column block, column, err:{e}" - )) - }) - }) - .collect::>>()?; - - let input = Input(&column_blocks); - self.accumulator.update(input).map_err(|e| { - DataFusionError::Execution(format!("Accumulator failed to update, err:{e}")) - }) - } - - fn merge_batch(&mut self, states: &[DfArrayRef]) -> DfResult<()> { - if states.is_empty() { - return Ok(()); - }; - - let column_blocks = states - .iter() - .map(|array| { - ColumnBlock::try_cast_arrow_array_ref(array).map_err(|e| { - DataFusionError::Execution(format!( - "Accumulator failed to cast arrow array to column block, column, err:{e}" - )) - }) - }) - .collect::>>()?; - - let state_ref = StateRef(Input(&column_blocks)); - self.accumulator.merge(state_ref).map_err(|e| { - DataFusionError::Execution(format!("Accumulator failed to merge, err:{e}")) - }) - } - - fn evaluate(&self) -> DfResult { - let value = self.accumulator.evaluate().map_err(|e| { - DataFusionError::Execution(format!("Accumulator failed to evaluate, err:{e}")) - })?; - - Ok(value.into_df_scalar_value()) - } - - fn size(&self) -> usize { - std::mem::size_of_val(self) - } -} diff --git a/src/df_operator/src/functions.rs b/src/df_operator/src/functions.rs deleted file mode 100644 index d0291e4275..0000000000 --- a/src/df_operator/src/functions.rs +++ /dev/null @@ -1,334 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Functions. - -use std::{ - hash::{Hash, Hasher}, - sync::Arc, -}; - -use arrow::datatypes::DataType; -use common_types::{column_block::ColumnBlock, datum::DatumKind}; -use datafusion::{ - error::DataFusionError, - logical_expr::{ - AccumulatorFactoryFunction, ReturnTypeFunction, ScalarFunctionImplementation, - Signature as DfSignature, StateTypeFunction, TypeSignature as DfTypeSignature, Volatility, - }, - physical_plan::ColumnarValue as DfColumnarValue, - scalar::ScalarValue as DfScalarValue, -}; -use generic_error::GenericError; -use macros::define_result; -use smallvec::SmallVec; -use snafu::{ResultExt, Snafu}; - -use crate::aggregate::{Accumulator, ToDfAccumulator}; - -// Most functions have no more than 5 args. -const FUNC_ARG_NUM: usize = 5; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to convert array to ColumnarValue, err:{}", source))] - InvalidArray { - source: common_types::column_block::Error, - }, - - #[snafu(display("Invalid function arguments, err:{}", source))] - InvalidArguments { source: GenericError }, - - #[snafu(display("Failed to execute function, err:{}", source))] - CallFunction { source: GenericError }, -} - -define_result!(Error); - -/// A dynamically typed, nullable single value. -// TODO(yingwen): Can we use Datum? -#[derive(Debug)] -pub struct ScalarValue(DfScalarValue); - -impl ScalarValue { - pub(crate) fn into_df_scalar_value(self) -> DfScalarValue { - self.0 - } - - fn from_df_scalar_value(df_scalar: &DfScalarValue) -> Self { - Self(df_scalar.clone()) - } - - pub fn as_str(&self) -> Option<&str> { - match &self.0 { - DfScalarValue::Utf8(value_opt) => value_opt.as_ref().map(|v| v.as_str()), - _ => None, - } - } -} - -impl From for ScalarValue { - fn from(value: String) -> Self { - Self(DfScalarValue::Utf8(Some(value))) - } -} - -impl From for ScalarValue { - fn from(value: u64) -> Self { - Self(value.into()) - } -} - -pub struct ScalarValueRef<'a>(&'a DfScalarValue); - -impl<'a> ScalarValueRef<'a> { - pub fn as_str(&self) -> Option<&str> { - match self.0 { - DfScalarValue::Utf8(value_opt) | DfScalarValue::LargeUtf8(value_opt) => { - value_opt.as_ref().map(|v| v.as_str()) - } - _ => None, - } - } -} - -impl<'a> From<&'a DfScalarValue> for ScalarValueRef<'a> { - fn from(value: &DfScalarValue) -> ScalarValueRef { - ScalarValueRef(value) - } -} - -impl<'a> Hash for ScalarValueRef<'a> { - fn hash(&self, state: &mut H) { - self.0.hash(state) - } -} - -/// Represent a value of function result. -#[derive(Debug)] -pub enum ColumnarValue { - /// Array of values. - Array(ColumnBlock), - /// A single value. - Scalar(ScalarValue), -} - -impl ColumnarValue { - fn into_df_columnar_value(self) -> DfColumnarValue { - match self { - ColumnarValue::Array(v) => DfColumnarValue::Array(v.to_arrow_array_ref()), - ColumnarValue::Scalar(v) => DfColumnarValue::Scalar(v.into_df_scalar_value()), - } - } - - fn try_from_df_columnar_value(df_value: &DfColumnarValue) -> Result { - let columnar_value = match df_value { - DfColumnarValue::Array(array) => { - let column_block = - ColumnBlock::try_cast_arrow_array_ref(array).context(InvalidArray)?; - ColumnarValue::Array(column_block) - } - DfColumnarValue::Scalar(v) => { - ColumnarValue::Scalar(ScalarValue::from_df_scalar_value(v)) - } - }; - - Ok(columnar_value) - } -} - -/// A function's TypeSignature. -#[derive(Debug)] -pub enum TypeSignature { - /// exact number of arguments of an exact type - Exact(Vec), - /// fixed number of arguments of an arbitrary but equal type out of a list - /// of valid types - // A function of one argument of double is `Uniform(1, vec![DatumKind::Double])` - // A function of one argument of double or uint64 is `Uniform(1, vec![DatumKind::Double, - // DatumKind::UInt64])` - Uniform(usize, Vec), - /// One of a list of signatures - OneOf(Vec), -} - -impl TypeSignature { - pub(crate) fn to_datafusion_signature(&self) -> DfSignature { - DfSignature::new(self.to_datafusion_type_signature(), Volatility::Immutable) - } - - fn to_datafusion_type_signature(&self) -> DfTypeSignature { - match self { - TypeSignature::Exact(kinds) => { - let data_types = kinds.iter().map(|v| DataType::from(*v)).collect(); - DfTypeSignature::Exact(data_types) - } - TypeSignature::Uniform(num, kinds) => { - let data_types = kinds.iter().map(|v| DataType::from(*v)).collect(); - DfTypeSignature::Uniform(*num, data_types) - } - TypeSignature::OneOf(sigs) => { - let df_sigs = sigs - .iter() - .map(|v| v.to_datafusion_type_signature()) - .collect(); - DfTypeSignature::OneOf(df_sigs) - } - } - } -} - -/// A scalar function's return type. -#[derive(Debug)] -pub struct ReturnType { - kind: DatumKind, -} - -impl ReturnType { - pub(crate) fn to_datafusion_return_type(&self) -> ReturnTypeFunction { - let data_type = Arc::new(DataType::from(self.kind)); - Arc::new(move |_| Ok(data_type.clone())) - } -} - -pub struct ScalarFunction { - signature: TypeSignature, - return_type: ReturnType, - df_scalar_fn: ScalarFunctionImplementation, -} - -impl ScalarFunction { - pub fn make_by_fn(signature: TypeSignature, return_type: DatumKind, func: F) -> Self - where - F: Fn(&[ColumnarValue]) -> Result + Send + Sync + 'static, - { - let return_type = ReturnType { kind: return_type }; - - // Adapter to map func to Fn(&[DfColumnarValue]) -> Result - let df_adapter = move |df_args: &[DfColumnarValue]| { - // Convert df_args from DfColumnarValue to ColumnarValue. - let mut values: SmallVec<[ColumnarValue; FUNC_ARG_NUM]> = - SmallVec::with_capacity(df_args.len()); - for df_arg in df_args { - let value = ColumnarValue::try_from_df_columnar_value(df_arg).map_err(|e| { - DataFusionError::Internal(format!( - "Failed to convert datafusion columnar value, err:{e}" - )) - })?; - values.push(value); - } - - // Execute our function. - let result_value = func(&values).map_err(|e| { - DataFusionError::Execution(format!("Failed to execute function, err:{e}")) - })?; - - // Convert the result value to DfColumnarValue. - Ok(result_value.into_df_columnar_value()) - }; - - let df_scalar_fn = Arc::new(df_adapter); - - Self { - signature, - return_type, - df_scalar_fn, - } - } - - #[inline] - pub fn signature(&self) -> &TypeSignature { - &self.signature - } - - #[inline] - pub fn return_type(&self) -> &ReturnType { - &self.return_type - } - - #[inline] - pub(crate) fn to_datafusion_function(&self) -> ScalarFunctionImplementation { - self.df_scalar_fn.clone() - } -} - -pub struct AggregateFunction { - type_signature: TypeSignature, - return_type: ReturnType, - df_accumulator: AccumulatorFactoryFunction, - state_type: Vec, -} - -impl AggregateFunction { - pub fn make_by_fn( - type_signature: TypeSignature, - return_type: DatumKind, - state_type: Vec, - accumulator_fn: F, - ) -> Self - where - F: Fn(&DataType) -> Result + Send + Sync + 'static, - A: Accumulator + 'static, - { - // Create accumulator. - let df_adapter = move |data_type: &DataType| { - let accumulator = accumulator_fn(data_type).map_err(|e| { - DataFusionError::Execution(format!("Failed to create accumulator, err:{e}")) - })?; - let accumulator = Box::new(ToDfAccumulator::new(accumulator)); - - Ok(accumulator as _) - }; - let df_accumulator = Arc::new(df_adapter); - - // Create return type. - let return_type = ReturnType { kind: return_type }; - - Self { - type_signature, - return_type, - df_accumulator, - state_type, - } - } - - #[inline] - pub fn signature(&self) -> &TypeSignature { - &self.type_signature - } - - #[inline] - pub fn return_type(&self) -> &ReturnType { - &self.return_type - } - - #[inline] - pub(crate) fn to_datafusion_accumulator(&self) -> AccumulatorFactoryFunction { - self.df_accumulator.clone() - } - - pub(crate) fn to_datafusion_state_type(&self) -> StateTypeFunction { - let data_types = Arc::new( - self.state_type - .iter() - .map(|kind| DataType::from(*kind)) - .collect::>(), - ); - Arc::new(move |_| Ok(data_types.clone())) - } -} diff --git a/src/df_operator/src/lib.rs b/src/df_operator/src/lib.rs deleted file mode 100644 index aede7dd0dd..0000000000 --- a/src/df_operator/src/lib.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! UDF support. - -pub mod aggregate; -pub mod functions; -pub mod registry; -pub mod scalar; -pub mod udaf; -pub mod udfs; -pub mod visitor; diff --git a/src/df_operator/src/registry.rs b/src/df_operator/src/registry.rs deleted file mode 100644 index d8b9fa082b..0000000000 --- a/src/df_operator/src/registry.rs +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Function registry. - -use std::{ - collections::{HashMap, HashSet}, - sync::Arc, -}; - -use datafusion::{ - error::{DataFusionError, Result as DfResult}, - execution::FunctionRegistry as DfFunctionRegistry, - logical_expr::{ - AggregateUDF as DfAggregateUDF, ScalarUDF as DfScalarUDF, WindowUDF as DfWindowUDF, - }, -}; -use macros::define_result; -use snafu::{ensure, Backtrace, Snafu}; - -use crate::{scalar::ScalarUdf, udaf::AggregateUdf, udfs}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Udf already exists, name:{}.\nBacktrace:\n{}", name, backtrace))] - UdfExists { name: String, backtrace: Backtrace }, -} - -define_result!(Error); - -/// A registry knows how to build logical expressions out of user-defined -/// function' names -// TODO: maybe unnecessary to define inner trait rather than using datafusion's? -pub trait FunctionRegistry { - fn register_udf(&mut self, udf: ScalarUdf) -> Result<()>; - - fn register_udaf(&mut self, udaf: AggregateUdf) -> Result<()>; - - fn find_udf(&self, name: &str) -> Result>; - - fn find_udaf(&self, name: &str) -> Result>; - - fn list_udfs(&self) -> Result>; - - // TODO: can we remove restriction about `Send` and `Sync`? - fn to_df_function_registry(self: Arc) -> Arc; -} - -/// Default function registry. -#[derive(Debug, Default)] -pub struct FunctionRegistryImpl { - scalar_functions: HashMap, - aggregate_functions: HashMap, -} - -impl FunctionRegistryImpl { - pub fn new() -> Self { - Self::default() - } - - /// Load all provided udfs. - pub fn load_functions(&mut self) -> Result<()> { - udfs::register_all_udfs(self) - } -} - -impl FunctionRegistry for FunctionRegistryImpl { - fn register_udf(&mut self, udf: ScalarUdf) -> Result<()> { - ensure!( - !self.scalar_functions.contains_key(udf.name()), - UdfExists { name: udf.name() } - ); - - self.scalar_functions.insert(udf.name().to_string(), udf); - - Ok(()) - } - - fn register_udaf(&mut self, udaf: AggregateUdf) -> Result<()> { - ensure!( - !self.aggregate_functions.contains_key(udaf.name()), - UdfExists { name: udaf.name() } - ); - - self.aggregate_functions - .insert(udaf.name().to_string(), udaf); - - Ok(()) - } - - fn find_udf(&self, name: &str) -> Result> { - let udf = self.scalar_functions.get(name).cloned(); - Ok(udf) - } - - fn find_udaf(&self, name: &str) -> Result> { - let udaf = self.aggregate_functions.get(name).cloned(); - Ok(udaf) - } - - fn list_udfs(&self) -> Result> { - Ok(self.scalar_functions.values().cloned().collect()) - } - - fn to_df_function_registry(self: Arc) -> Arc { - Arc::new(DfFunctionRegistryAdapter(self)) - } -} - -struct DfFunctionRegistryAdapter(FunctionRegistryRef); - -impl DfFunctionRegistry for DfFunctionRegistryAdapter { - fn udfs(&self) -> HashSet { - self.0 - .list_udfs() - .expect("failed to list udfs") - .into_iter() - .map(|f| f.name().to_string()) - .collect() - } - - fn udf(&self, name: &str) -> DfResult> { - self.0 - .find_udf(name) - .map_err(|e| DataFusionError::Internal(format!("failed to find udf, err:{e}")))? - .ok_or(DataFusionError::Internal(format!( - "udf not found, name:{name}" - ))) - .map(|f| f.to_datafusion_udf()) - } - - fn udaf(&self, name: &str) -> DfResult> { - self.0 - .find_udaf(name) - .map_err(|e| DataFusionError::Internal(format!("failed to find udaf, err:{e}")))? - .ok_or(DataFusionError::Internal(format!( - "udaf not found, name:{name}" - ))) - .map(|f| f.to_datafusion_udaf()) - } - - fn udwf(&self, _name: &str) -> DfResult> { - Err(DataFusionError::Internal( - "no udwfs defined now".to_string(), - )) - } -} - -pub type FunctionRegistryRef = Arc; diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs deleted file mode 100644 index 58e8214c1a..0000000000 --- a/src/df_operator/src/scalar.rs +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Scalar udfs. - -use std::sync::Arc; - -use datafusion::physical_plan::udf::ScalarUDF; - -use crate::functions::ScalarFunction; - -/// Logical representation of a UDF. -#[derive(Debug, Clone)] -pub struct ScalarUdf { - /// DataFusion UDF. - df_udf: Arc, -} - -impl ScalarUdf { - #[allow(deprecated)] - pub fn create(name: &str, func: ScalarFunction) -> Self { - let signature = func.signature().to_datafusion_signature(); - let return_type = func.return_type().to_datafusion_return_type(); - let scalar_fn = func.to_datafusion_function(); - - let df_udf = Arc::new(ScalarUDF::new(name, &signature, &return_type, &scalar_fn)); - - Self { df_udf } - } - - #[inline] - pub fn name(&self) -> &str { - self.df_udf.name() - } - - /// Convert into datafusion's udf - #[inline] - pub fn to_datafusion_udf(&self) -> Arc { - self.df_udf.clone() - } -} diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs deleted file mode 100644 index 44f3913673..0000000000 --- a/src/df_operator/src/udaf.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! UDAF support. - -use std::sync::Arc; - -use datafusion::physical_plan::udaf::AggregateUDF; - -use crate::functions::AggregateFunction; - -/// Logical representation of a UDAF. -#[derive(Debug, Clone)] -pub struct AggregateUdf { - /// DataFusion UDAF. - df_udaf: Arc, -} - -impl AggregateUdf { - #[allow(deprecated)] - pub fn create(name: &str, func: AggregateFunction) -> Self { - let signature = func.signature().to_datafusion_signature(); - let return_type = func.return_type().to_datafusion_return_type(); - let accumulator = func.to_datafusion_accumulator(); - let state_type = func.to_datafusion_state_type(); - - let df_udaf = Arc::new(AggregateUDF::new( - name, - &signature, - &return_type, - &accumulator, - &state_type, - )); - - Self { df_udaf } - } - - #[inline] - pub fn name(&self) -> &str { - self.df_udaf.name() - } - - #[inline] - pub fn to_datafusion_udaf(&self) -> Arc { - self.df_udaf.clone() - } -} diff --git a/src/df_operator/src/udfs/mod.rs b/src/df_operator/src/udfs/mod.rs deleted file mode 100644 index 20624742eb..0000000000 --- a/src/df_operator/src/udfs/mod.rs +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! UDFs - -use crate::registry::{FunctionRegistry, Result}; - -mod thetasketch_distinct; -mod time_bucket; - -pub fn register_all_udfs(registry: &mut dyn FunctionRegistry) -> Result<()> { - // Register all udfs - time_bucket::register_to_registry(registry)?; - thetasketch_distinct::register_to_registry(registry)?; - - Ok(()) -} diff --git a/src/df_operator/src/udfs/thetasketch_distinct.rs b/src/df_operator/src/udfs/thetasketch_distinct.rs deleted file mode 100644 index ff7d59ffa8..0000000000 --- a/src/df_operator/src/udfs/thetasketch_distinct.rs +++ /dev/null @@ -1,202 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! thetasketch_distinct() udaf. - -use std::fmt; - -use arrow::datatypes::DataType; -use common_types::datum::DatumKind; -use generic_error::BoxError; -use hyperloglog::HyperLogLog; -use macros::define_result; -use snafu::{ensure, OptionExt, ResultExt, Snafu}; - -use crate::{ - aggregate::{self, Accumulator, GetState, Input, MergeState, State, StateRef}, - functions::{AggregateFunction, ScalarValue, TypeSignature}, - registry::{self, FunctionRegistry}, - udaf::AggregateUdf, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Invalid argument number."))] - InvalidArgNum, - - #[snafu(display("Invalid state len."))] - InvalidStateLen, - - #[snafu(display("Invalid state, state is not string."))] - StateNotString, - - #[snafu(display("Failed to decode base64 of hll, err:{}.", source))] - DecodeBase64 { source: base64::DecodeError }, - - #[snafu(display("Invalid state, failed to decode hll, err:{}.", source))] - DecodeHll { source: bincode::Error }, - - #[snafu(display("Invalid state, failed to encode hll, err:{}.", source))] - EncodeHll { source: bincode::Error }, -} - -define_result!(Error); - -const HLL_ERROR_RATE: f64 = 0.01; -// Hll seed: -const HLL_KEY: u128 = 0; - -pub fn register_to_registry(registry: &mut dyn FunctionRegistry) -> registry::Result<()> { - registry.register_udaf(new_udaf()) -} - -fn new_udaf() -> AggregateUdf { - let aggregate_function = new_function(); - - AggregateUdf::create("thetasketch_distinct", aggregate_function) -} - -pub(crate) fn new_function() -> AggregateFunction { - // Always use the same hasher with same keys. - let hll = HyperLogLog::new_deterministic(HLL_ERROR_RATE, HLL_KEY); - - let accumulator_fn = move |_: &DataType| { - let distinct = HllDistinct { - hll: HyperLogLog::new_from_template(&hll), - }; - - Ok(distinct) - }; - - let type_signature = make_type_signature(); - let state_type = make_state_type(); - - AggregateFunction::make_by_fn( - type_signature, - DatumKind::UInt64, - state_type, - accumulator_fn, - ) -} - -fn make_type_signature() -> TypeSignature { - TypeSignature::Uniform( - 1, - vec![ - DatumKind::Timestamp, - DatumKind::Double, - DatumKind::Varbinary, - DatumKind::String, - DatumKind::UInt64, - ], - ) -} - -fn make_state_type() -> Vec { - vec![DatumKind::String] -} - -/// Distinct counter based on HyperLogLog. -/// -/// The HyperLogLogs must be initialized with same hash seeds (new from same -/// template). -struct HllDistinct { - hll: HyperLogLog, -} - -// binary datatype to scalarvalue. -// TODO: maybe we can remove base64 encoding? -impl HllDistinct { - fn merge_impl(&mut self, states: StateRef) -> Result<()> { - // The states are serialize from hll. - ensure!(states.num_columns() == 1, InvalidStateLen); - let merged_col = states.column(0).unwrap(); - - let num_rows = merged_col.num_rows(); - for row_idx in 0..num_rows { - let datum = merged_col.datum_view(row_idx); - // Try to deserialize the hll. - let hll_string = datum.into_str().context(StateNotString)?; - let hll_bytes = base64::decode(hll_string).context(DecodeBase64)?; - // Try to deserialize the hll. - let hll = bincode::deserialize(&hll_bytes).context(DecodeHll)?; - - // Merge the hll, note that the two hlls must created or serialized from the - // same template hll. - self.hll.merge(&hll); - } - - Ok(()) - } -} - -impl fmt::Debug for HllDistinct { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("HllDistinct") - .field("len", &self.hll.len()) - .finish() - } -} - -impl Accumulator for HllDistinct { - // TODO: maybe we can remove base64 encoding? - fn state(&self) -> aggregate::Result { - // Serialize `self.hll` to bytes. - let buf = bincode::serialize(&self.hll).box_err().context(GetState)?; - // HACK: DataFusion does not support creating a scalar from binary, so we need - // to use base64 to convert a binary into string. - let hll_string = base64::encode(buf); - - Ok(State::from(ScalarValue::from(hll_string))) - } - - fn update(&mut self, input: Input) -> aggregate::Result<()> { - if input.is_empty() { - return Ok(()); - } - - // Has found it not empty, so we can unwrap here. - let first_col = input.column(0).unwrap(); - let num_rows = first_col.num_rows(); - if num_rows == 0 { - return Ok(()); - } - - // Loop over the datums in the column blocks, insert them into hll. - let num_cols = input.num_columns(); - for col_idx in 0..num_cols { - let col = input.column(col_idx).unwrap(); - for row_idx in 0..num_rows { - let datum = col.datum_view(row_idx); - // Insert datum into hll. - self.hll.insert(&datum); - } - } - - Ok(()) - } - - fn merge(&mut self, states: StateRef) -> aggregate::Result<()> { - self.merge_impl(states).box_err().context(MergeState) - } - - fn evaluate(&self) -> aggregate::Result { - let count = self.hll.len() as u64; - - Ok(ScalarValue::from(count)) - } -} diff --git a/src/df_operator/src/udfs/time_bucket.rs b/src/df_operator/src/udfs/time_bucket.rs deleted file mode 100644 index 33c9cb7cea..0000000000 --- a/src/df_operator/src/udfs/time_bucket.rs +++ /dev/null @@ -1,342 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! time_bucket UDF. - -use std::time::Duration; - -use chrono::{Datelike, FixedOffset, TimeZone}; -use common_types::{ - column_block::{ColumnBlock, ColumnBlockBuilder, TimestampColumn}, - datum::{Datum, DatumKind}, - time::Timestamp, -}; -use generic_error::BoxError; -use macros::define_result; -use snafu::{ensure, OptionExt, ResultExt, Snafu}; - -use crate::{ - functions::{CallFunction, ColumnarValue, InvalidArguments, ScalarFunction, TypeSignature}, - registry::{self, FunctionRegistry}, - scalar::ScalarUdf, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Invalid period, period:{}", period))] - InvalidPeriod { period: String }, - - #[snafu(display("Invalid period number, period:{}, err:{}", period, source))] - InvalidPeriodNumber { - period: String, - source: std::num::ParseIntError, - }, - - #[snafu(display("Invalid argument number."))] - InvalidArgNum, - - #[snafu(display("Invalid arguments, require timestamp column."))] - NotTimestampColumn, - - #[snafu(display("Invalid arguments, require period."))] - NotPeriod, - - #[snafu(display("Period of week only support P1W."))] - UnsupportedWeek, - - #[snafu(display("Period of month only support P1M."))] - UnsupportedMonth, - - #[snafu(display("Period of year only support P1Y."))] - UnsupportedYear, - - #[snafu(display( - "Failed to truncate timestamp, timestamp:{}, period:{:?}", - timestamp, - period - ))] - TruncateTimestamp { timestamp: i64, period: Period }, - - #[snafu(display("Failed to build result column, err:{}", source))] - BuildColumn { - source: common_types::column_block::Error, - }, -} - -define_result!(Error); - -/// Default timezone: +08:00 -const DEFAULT_TIMEZONE_OFFSET_SECS: i32 = 8 * 3600; - -pub fn register_to_registry(registry: &mut dyn FunctionRegistry) -> registry::Result<()> { - registry.register_udf(new_udf()) -} - -fn new_udf() -> ScalarUdf { - // args: - // - timestamp column. - // - period. - // - input timestamp format in PARTITION BY (unsed now). - // - input timezone (ignored now). - // - timestamp output format (ignored now). - let func = |args: &[ColumnarValue]| { - let bucket = TimeBucket::parse_args(args) - .box_err() - .context(InvalidArguments)?; - - let result_column = bucket.call().box_err().context(CallFunction)?; - - Ok(ColumnarValue::Array(result_column)) - }; - - let signature = make_signature(); - let scalar_function = ScalarFunction::make_by_fn(signature, DatumKind::Timestamp, func); - - ScalarUdf::create("time_bucket", scalar_function) -} - -fn make_signature() -> TypeSignature { - let sigs = vec![ - TypeSignature::Exact(vec![DatumKind::Timestamp, DatumKind::String]), - TypeSignature::Exact(vec![ - DatumKind::Timestamp, - DatumKind::String, - DatumKind::String, - ]), - TypeSignature::Exact(vec![ - DatumKind::Timestamp, - DatumKind::String, - DatumKind::String, - DatumKind::String, - ]), - TypeSignature::Exact(vec![ - DatumKind::Timestamp, - DatumKind::String, - DatumKind::String, - DatumKind::String, - DatumKind::String, - ]), - ]; - TypeSignature::OneOf(sigs) -} - -struct TimeBucket<'a> { - column: &'a TimestampColumn, - period: Period, -} - -impl<'a> TimeBucket<'a> { - fn parse_args(args: &[ColumnarValue]) -> Result { - ensure!(args.len() >= 2, InvalidArgNum); - - let column = match &args[0] { - ColumnarValue::Array(block) => block.as_timestamp().context(NotTimestampColumn)?, - _ => return NotTimestampColumn.fail(), - }; - let period = match &args[1] { - ColumnarValue::Scalar(value) => { - let period_str = value.as_str().context(NotPeriod)?; - Period::parse(period_str)? - } - _ => return NotPeriod.fail(), - }; - - Ok(TimeBucket { column, period }) - } - - fn call(&self) -> Result { - // TODO(tanruixiang) : mising is_dictionary params - let mut out_column_builder = - ColumnBlockBuilder::with_capacity(&DatumKind::Timestamp, self.column.num_rows(), false); - for ts_opt in self.column.iter() { - match ts_opt { - Some(ts) => { - let truncated = self.period.truncate(ts).context(TruncateTimestamp { - timestamp: ts, - period: self.period, - })?; - out_column_builder - .append(Datum::Timestamp(truncated)) - .context(BuildColumn)?; - } - None => { - out_column_builder - .append(Datum::Null) - .context(BuildColumn)?; - } - } - } - Ok(out_column_builder.build()) - } -} - -/// A time bucket period. -/// -/// e.g. -/// - PT1S -/// - PT1M -/// - PT1H -/// - P1D -/// - P1W -/// - P1M -/// - P1Y -#[derive(Debug, Clone, Copy)] -pub enum Period { - Second(u16), - Minute(u16), - Hour(u16), - Day(u16), - Week, - Month, - Year, -} - -impl Period { - fn parse(period: &str) -> Result { - ensure!(period.len() >= 3, InvalidPeriod { period }); - let is_pt = if period.starts_with("PT") { - true - } else if period.starts_with('P') { - false - } else { - return InvalidPeriod { period }.fail(); - }; - - let back = period.chars().last().context(InvalidPeriod { period })?; - let parsed = if is_pt { - let number = &period[2..period.len() - 1]; - let number = number - .parse::() - .context(InvalidPeriodNumber { period })?; - match back { - 'S' => Period::Second(number), - 'M' => Period::Minute(number), - 'H' => Period::Hour(number), - _ => return InvalidPeriod { period }.fail(), - } - } else { - let number = &period[1..period.len() - 1]; - let number = number - .parse::() - .context(InvalidPeriodNumber { period })?; - match back { - 'D' => Period::Day(number), - 'W' => { - ensure!(number == 1, UnsupportedWeek); - Period::Week - } - 'M' => { - ensure!(number == 1, UnsupportedMonth); - Period::Month - } - 'Y' => { - ensure!(number == 1, UnsupportedYear); - Period::Year - } - _ => return InvalidPeriod { period }.fail(), - } - }; - - Ok(parsed) - } - - fn truncate(&self, ts: Timestamp) -> Option { - const MINUTE_SECONDS: u64 = 60; - const HOUR_SECONDS: u64 = 60 * MINUTE_SECONDS; - - let truncated_ts = match self { - Period::Second(period) => { - let duration = Duration::from_secs(u64::from(*period)); - ts.truncate_by(duration) - } - Period::Minute(period) => { - let duration = Duration::from_secs(u64::from(*period) * MINUTE_SECONDS); - ts.truncate_by(duration) - } - Period::Hour(period) => { - let duration = Duration::from_secs(u64::from(*period) * HOUR_SECONDS); - ts.truncate_by(duration) - } - Period::Day(period) => Self::truncate_day(ts, *period)?, - Period::Week => Self::truncate_week(ts), - Period::Month => Self::truncate_month(ts), - Period::Year => Self::truncate_year(ts), - }; - - Some(truncated_ts) - } - - fn truncate_day(ts: Timestamp, period: u16) -> Option { - let offset = FixedOffset::east_opt(DEFAULT_TIMEZONE_OFFSET_SECS).expect("won't panic"); - // Convert to local time. Won't panic. - let datetime = offset.timestamp_millis_opt(ts.as_i64()).unwrap(); - - // Truncate day. Won't panic. - let day = datetime.day(); - let day = day - (day % u32::from(period)); - let truncated_datetime = offset - .with_ymd_and_hms(datetime.year(), datetime.month(), day, 0, 0, 0) - .unwrap(); - let truncated_ts = truncated_datetime.timestamp_millis(); - - Some(Timestamp::new(truncated_ts)) - } - - fn truncate_week(ts: Timestamp) -> Timestamp { - let offset = FixedOffset::east_opt(DEFAULT_TIMEZONE_OFFSET_SECS).expect("won't panic"); - // Convert to local time. Won't panic. - let datetime = offset.timestamp_millis_opt(ts.as_i64()).unwrap(); - - // Truncate week. Won't panic. - let week_offset = datetime.weekday().num_days_from_monday(); - let week_millis = 7 * 24 * 3600 * 1000; - let ts_offset = week_offset * week_millis; - // TODO(yingwen): Impl sub/divide for Timestamp - let week_millis = i64::from(week_millis); - let truncated_ts = (ts.as_i64() - i64::from(ts_offset)) / week_millis * week_millis; - - Timestamp::new(truncated_ts) - } - - fn truncate_month(ts: Timestamp) -> Timestamp { - let offset = FixedOffset::east_opt(DEFAULT_TIMEZONE_OFFSET_SECS).expect("won't panic"); - // Convert to local time. Won't panic. - let datetime = offset.timestamp_millis_opt(ts.as_i64()).unwrap(); - - // Truncate month. Won't panic. - let truncated_datetime = offset - .with_ymd_and_hms(datetime.year(), datetime.month(), 1, 0, 0, 0) - .unwrap(); - let truncated_ts = truncated_datetime.timestamp_millis(); - - Timestamp::new(truncated_ts) - } - - fn truncate_year(ts: Timestamp) -> Timestamp { - let offset = FixedOffset::east_opt(DEFAULT_TIMEZONE_OFFSET_SECS).expect("won't panic"); - // Convert to local time. Won't panic. - let datetime = offset.timestamp_millis_opt(ts.as_i64()).unwrap(); - - // Truncate year. Won't panic. - let truncated_datetime = offset - .with_ymd_and_hms(datetime.year(), 1, 1, 0, 0, 0) - .unwrap(); - let truncated_ts = truncated_datetime.timestamp_millis(); - - Timestamp::new(truncated_ts) - } -} diff --git a/src/df_operator/src/visitor.rs b/src/df_operator/src/visitor.rs deleted file mode 100644 index 788bead255..0000000000 --- a/src/df_operator/src/visitor.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Helper function and struct to find input columns for an Expr; - -use datafusion::logical_expr::expr::Expr as DfLogicalExpr; - -pub fn find_columns_by_expr(expr: &DfLogicalExpr) -> Vec { - expr.to_columns() - .unwrap() - .into_iter() - .map(|col| col.name) - .collect() -} diff --git a/src/horaedb/Cargo.toml b/src/horaedb/Cargo.toml deleted file mode 100644 index 5a6144d3cc..0000000000 --- a/src/horaedb/Cargo.toml +++ /dev/null @@ -1,80 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "horaedb" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[features] -default = ["wal-table-kv", "wal-message-queue", "wal-local-storage"] -wal-table-kv = ["wal/wal-table-kv", "analytic_engine/wal-table-kv"] -wal-message-queue = ["wal/wal-message-queue", "analytic_engine/wal-message-queue"] -wal-rocksdb = ["wal/wal-rocksdb", "analytic_engine/wal-rocksdb"] -wal-local-storage = ["wal/wal-local-storage", "analytic_engine/wal-local-storage"] - -[dependencies] -analytic_engine = { workspace = true } -catalog = { workspace = true } -catalog_impls = { workspace = true } -clap = { workspace = true } -cluster = { workspace = true } -common_types = { workspace = true } -datafusion = { workspace = true } -df_operator = { workspace = true } -etcd-client = { workspace = true } -interpreters = { workspace = true } -logger = { workspace = true } -meta_client = { workspace = true } -moka = { version = "0.10", features = ["future"] } -panic_ext = { workspace = true } -proxy = { workspace = true } -query_engine = { workspace = true } -router = { workspace = true } -runtime = { workspace = true } -serde = { workspace = true } -server = { workspace = true } -signal-hook = "0.3" -size_ext = { workspace = true } -table_engine = { workspace = true } -toml = { workspace = true } -toml_ext = { workspace = true } -tracing_util = { workspace = true } -wal = { workspace = true } - -[build-dependencies] -vergen = { version = "8", default-features = false, features = [ - "build", - "cargo", - "git", - "gitcl", - "rustc", -] } - -[[bin]] -name = "horaedb-server" -path = "bin/horaedb-server.rs" diff --git a/src/horaedb/bin/horaedb-server.rs b/src/horaedb/bin/horaedb-server.rs deleted file mode 100644 index bf6662085d..0000000000 --- a/src/horaedb/bin/horaedb-server.rs +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! The main entry point to start the server - -use std::env; - -use clap::{Arg, Command}; -use horaedb::{ - config::{ClusterDeployment, Config}, - setup, -}; -use logger::info; - -/// By this environment variable, the address of current node can be overridden. -/// And it could be domain name or ip address, but no port follows it. -const HORAEDB_SERVER_ADDR: &str = "HORAEDB_SERVER_ADDR"; -/// By this environment variable, the address of horaemeta can be overridden. -const HORAEMETA_SERVER_ADDR: &str = "HORAEMETA_SERVER_ADDR"; -/// By this environment variable, the etcd addresses can be overridden. -const ETCD_ADDRS: &str = "ETCD_ADDRS"; -/// By this environment variable, the cluster name of current node can be -/// overridden. -const CLUSTER_NAME: &str = "CLUSTER_NAME"; - -/// Default value for version information is not found from environment -const UNKNOWN: &str = "Unknown"; - -fn fetch_version() -> String { - let version = option_env!("CARGO_PKG_VERSION").unwrap_or(UNKNOWN); - let git_branch = option_env!("VERGEN_GIT_BRANCH").unwrap_or(UNKNOWN); - let git_commit_id = option_env!("VERGEN_GIT_SHA").unwrap_or(UNKNOWN); - let build_time = option_env!("VERGEN_BUILD_TIMESTAMP").unwrap_or(UNKNOWN); - let rustc_version = option_env!("VERGEN_RUSTC_SEMVER").unwrap_or(UNKNOWN); - let opt_level = option_env!("VERGEN_CARGO_OPT_LEVEL").unwrap_or(UNKNOWN); - let target = option_env!("VERGEN_CARGO_TARGET_TRIPLE").unwrap_or(UNKNOWN); - - [ - ("\nVersion", version), - ("Git commit", git_commit_id), - ("Git branch", git_branch), - ("Opt level", opt_level), - ("Rustc version", rustc_version), - ("Target", target), - ("Build date", build_time), - ] - .iter() - .map(|(label, value)| format!("{label}: {value}")) - .collect::>() - .join("\n") -} - -fn main() { - let version: &'static str = Box::leak(fetch_version().into_boxed_str()); - let matches = Command::new("HoraeDB Server") - .version(version) - .arg( - Arg::new("config") - .short('c') - .long("config") - .required(false) - .num_args(1) - .help("Set configuration file, eg: \"/path/server.toml\""), - ) - .get_matches(); - - let mut config = match matches.get_one::("config") { - Some(path) => { - let mut toml_buf = String::new(); - toml_ext::parse_toml_from_path(path, &mut toml_buf).expect("Failed to parse config.") - } - None => Config::default(), - }; - - if let Ok(node_addr) = env::var(HORAEDB_SERVER_ADDR) { - config.node.addr = node_addr; - } - if let Ok(meta_addr) = env::var(HORAEMETA_SERVER_ADDR) { - config.set_meta_addr(meta_addr); - } - if let Ok(etcd_addrs) = env::var(ETCD_ADDRS) { - config.set_etcd_addrs(etcd_addrs); - } - if let Ok(cluster) = env::var(CLUSTER_NAME) { - if let Some(ClusterDeployment::WithMeta(v)) = &mut config.cluster_deployment { - v.meta_client.cluster_name = cluster; - } - } - - println!("HoraeDB server tries starting with config:{config:?}"); - - // Setup log. - let runtime_level = setup::setup_logger(&config); - - // Setup tracing. - let _writer_guard = setup::setup_tracing(&config); - - panic_ext::set_panic_hook(false); - - // Log version. - info!("version:{}", version); - - setup::run_server(config, runtime_level); -} - -#[cfg(test)] -mod tests { - #[test] - fn test_env_reader() { - assert!(option_env!("CARGO_PKG_VERSION").is_some()); - assert!(option_env!("VERGEN_GIT_SHA").is_some()); - assert!(option_env!("VERGEN_BUILD_TIMESTAMP").is_some()); - assert!(option_env!("VERGEN_RUSTC_SEMVER").is_some()); - assert!(option_env!("VERGEN_CARGO_OPT_LEVEL").is_some()); - assert!(option_env!("VERGEN_CARGO_TARGET_TRIPLE").is_some()); - } -} diff --git a/src/horaedb/build.rs b/src/horaedb/build.rs deleted file mode 100644 index c92a7bec57..0000000000 --- a/src/horaedb/build.rs +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Build script - -use vergen::EmitBuilder; - -fn main() { - EmitBuilder::builder() - .all_cargo() - .all_build() - .all_rustc() - .git_branch() - .git_sha(true) - .emit() - .expect("Should succeed emit version message"); -} diff --git a/src/horaedb/src/config.rs b/src/horaedb/src/config.rs deleted file mode 100644 index e7f19233f0..0000000000 --- a/src/horaedb/src/config.rs +++ /dev/null @@ -1,148 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Config for horaedb server. - -use cluster::config::ClusterConfig; -use proxy::limiter::LimiterConfig; -use serde::{Deserialize, Serialize}; -use server::config::{ServerConfig, StaticRouteConfig}; -use size_ext::ReadableSize; - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(default)] -pub struct NodeInfo { - /// The address of the horaedb (or compaction server) node. It can be a - /// domain name or an IP address without port followed. - pub addr: String, - pub zone: String, - pub idc: String, - pub binary_version: String, -} - -impl Default for NodeInfo { - fn default() -> Self { - Self { - addr: "127.0.0.1".to_string(), - zone: "".to_string(), - idc: "".to_string(), - binary_version: "".to_string(), - } - } -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -#[serde(default, deny_unknown_fields)] -pub struct Config { - /// The information of the host node. - pub node: NodeInfo, - - /// Config for service of server, including http, mysql and grpc. - pub server: ServerConfig, - - /// Runtime config. - pub runtime: RuntimeConfig, - - /// Logger config. - pub logger: logger::Config, - - /// Tracing config. - pub tracing: tracing_util::Config, - - /// Analytic engine config. - pub analytic: analytic_engine::Config, - - /// Query engine config. - pub query_engine: query_engine::config::Config, - - /// The deployment of the server. - pub cluster_deployment: Option, - - /// Config of limiter - pub limiter: LimiterConfig, -} - -impl Config { - pub fn set_meta_addr(&mut self, meta_addr: String) { - if let Some(ClusterDeployment::WithMeta(v)) = &mut self.cluster_deployment { - v.meta_client.meta_addr = meta_addr; - } - } - - // etcd_addrs: should be a string split by ",". - // Example: "etcd1:2379,etcd2:2379,etcd3:2379" - pub fn set_etcd_addrs(&mut self, etcd_addrs: String) { - if let Some(ClusterDeployment::WithMeta(v)) = &mut self.cluster_deployment { - v.etcd_client.server_addrs = etcd_addrs.split(',').map(|s| s.to_string()).collect(); - } - } -} - -/// The cluster deployment decides how to deploy the HoraeDB cluster. -/// -/// [ClusterDeployment::NoMeta] means to start one or multiple HoraeDB -/// instance(s) without HoraeMeta. -/// -/// [ClusterDeployment::WithMeta] means to start one or multiple HoraeDB -/// instance(s) under the control of HoraeMeta. -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(tag = "mode")] -#[allow(clippy::large_enum_variant)] -pub enum ClusterDeployment { - NoMeta(StaticRouteConfig), - WithMeta(ClusterConfig), -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(default)] -pub struct RuntimeConfig { - /// High priority runtime for reading data - pub read_thread_num: usize, - /// The size of the stack used by the read thread - /// - /// The size should be a set as a large number if the complex query exists. - /// TODO: this config may be removed in the future when the complex query - /// won't overflow the stack. - pub read_thread_stack_size: ReadableSize, - /// Low priority runtime for reading data - pub low_read_thread_num: usize, - /// Runtime for writing data - pub write_thread_num: usize, - /// Runtime for communicating with meta cluster - pub meta_thread_num: usize, - /// Runtime for compaction - pub compact_thread_num: usize, - /// Runtime for other tasks which may not important - pub default_thread_num: usize, - /// Runtime for io - pub io_thread_num: usize, -} - -impl Default for RuntimeConfig { - fn default() -> Self { - Self { - read_thread_num: 8, - read_thread_stack_size: ReadableSize::mb(16), - low_read_thread_num: 1, - write_thread_num: 8, - meta_thread_num: 2, - compact_thread_num: 4, - default_thread_num: 8, - io_thread_num: 4, - } - } -} diff --git a/src/horaedb/src/lib.rs b/src/horaedb/src/lib.rs deleted file mode 100644 index b6a3bb5eb7..0000000000 --- a/src/horaedb/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod config; -pub mod setup; -mod signal_handler; diff --git a/src/horaedb/src/setup.rs b/src/horaedb/src/setup.rs deleted file mode 100644 index 33632b5524..0000000000 --- a/src/horaedb/src/setup.rs +++ /dev/null @@ -1,488 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Setup server - -use std::sync::Arc; - -use analytic_engine::{ - self, - setup::{EngineBuilder, TableEngineContext}, -}; -use catalog::{manager::ManagerRef, schema::OpenOptions, table_operator::TableOperator}; -use catalog_impls::{table_based::TableBasedManager, volatile, CatalogManagerImpl}; -use cluster::{cluster_impl::ClusterImpl, config::ClusterConfig, shard_set::ShardSet}; -use datafusion::execution::runtime_env::RuntimeConfig as DfRuntimeConfig; -use df_operator::registry::{FunctionRegistry, FunctionRegistryImpl}; -use interpreters::table_manipulator::{catalog_based, meta_based}; -use logger::{info, warn, RuntimeLevel}; -use meta_client::{meta_impl, types::NodeMetaInfo}; -use proxy::{ - limiter::Limiter, - schema_config_provider::{ - cluster_based::ClusterBasedProvider, config_based::ConfigBasedProvider, - }, -}; -use router::{rule_based::ClusterView, ClusterBasedRouter, RuleBasedRouter}; -use runtime::PriorityRuntime; -use server::{ - config::{StaticRouteConfig, StaticTopologyConfig}, - local_tables::LocalTablesRecoverer, - server::{Builder, DatafusionContext}, -}; -use table_engine::{ - engine::{EngineRuntimes, TableEngineRef}, - memory::MemoryTableEngine, - proxy::TableEngineProxy, -}; -use tracing_util::{ - self, - tracing_appender::{non_blocking::WorkerGuard, rolling::Rotation}, -}; -use wal::{ - config::StorageConfig, - manager::{WalRuntimes, WalsOpener}, -}; - -use crate::{ - config::{ClusterDeployment, Config, RuntimeConfig}, - signal_handler, -}; - -/// Setup log with given `config`, returns the runtime log level switch. -pub fn setup_logger(config: &Config) -> RuntimeLevel { - logger::init_log(&config.logger).expect("Failed to init log.") -} - -/// Setup tracing with given `config`, returns the writer guard. -pub fn setup_tracing(config: &Config) -> WorkerGuard { - tracing_util::init_tracing_with_file(&config.tracing, &config.node.addr, Rotation::NEVER) -} - -fn build_runtime_with_stack_size( - name: &str, - threads_num: usize, - stack_size: Option, -) -> runtime::Runtime { - let mut builder = runtime::Builder::default(); - - if let Some(stack_size) = stack_size { - builder.stack_size(stack_size); - } - - builder - .worker_threads(threads_num) - .thread_name(name) - .enable_all() - .build() - .expect("Failed to create runtime") -} - -fn build_runtime(name: &str, threads_num: usize) -> runtime::Runtime { - build_runtime_with_stack_size(name, threads_num, None) -} - -fn build_engine_runtimes(config: &RuntimeConfig) -> EngineRuntimes { - let read_stack_size = config.read_thread_stack_size.as_byte() as usize; - EngineRuntimes { - read_runtime: PriorityRuntime::new( - Arc::new(build_runtime_with_stack_size( - "read-low", - config.low_read_thread_num, - Some(read_stack_size), - )), - Arc::new(build_runtime_with_stack_size( - "read-high", - config.read_thread_num, - Some(read_stack_size), - )), - ), - write_runtime: Arc::new(build_runtime("horaedb-write", config.write_thread_num)), - compact_runtime: Arc::new(build_runtime("horaedb-compact", config.compact_thread_num)), - meta_runtime: Arc::new(build_runtime("horaedb-meta", config.meta_thread_num)), - default_runtime: Arc::new(build_runtime("horaedb-default", config.default_thread_num)), - io_runtime: Arc::new(build_runtime("horaedb-io", config.io_thread_num)), - } -} - -fn validate_config(config: &Config) { - let is_data_wal_disabled = config.analytic.wal.disable_data; - if is_data_wal_disabled { - warn!("disable data wal may cause data loss, please check whether this configuration is correct") - } -} - -/// Run a server, returns when the server is shutdown by user -pub fn run_server(config: Config, log_runtime: RuntimeLevel) { - let runtimes = Arc::new(build_engine_runtimes(&config.runtime)); - let engine_runtimes = runtimes.clone(); - let log_runtime = Arc::new(log_runtime); - - info!("Server starts up, config:{:#?}", config); - - validate_config(&config); - - runtimes.default_runtime.block_on(async { - match config.analytic.wal.storage { - StorageConfig::RocksDB(_) => { - #[cfg(feature = "wal-rocksdb")] - { - use wal::rocksdb_impl::manager::RocksDBWalsOpener; - run_server_with_runtimes::( - config, - engine_runtimes, - log_runtime, - ) - .await - } - #[cfg(not(feature = "wal-rocksdb"))] - { - panic!("RocksDB WAL not bundled!"); - } - } - - StorageConfig::Obkv(_) => { - #[cfg(feature = "wal-table-kv")] - { - use wal::table_kv_impl::wal::ObkvWalsOpener; - run_server_with_runtimes::( - config, - engine_runtimes, - log_runtime, - ) - .await; - } - #[cfg(not(feature = "wal-table-kv"))] - { - panic!("Table KV WAL not bundled!"); - } - } - - StorageConfig::Kafka(_) => { - #[cfg(feature = "wal-message-queue")] - { - use wal::message_queue_impl::wal::KafkaWalsOpener; - run_server_with_runtimes::( - config, - engine_runtimes, - log_runtime, - ) - .await; - } - #[cfg(not(feature = "wal-message-queue"))] - { - panic!("Message Queue WAL not bundled!"); - } - } - StorageConfig::Local(_) => { - #[cfg(feature = "wal-local-storage")] - { - use wal::local_storage_impl::wal_manager::LocalStorageWalsOpener; - run_server_with_runtimes::( - config, - engine_runtimes, - log_runtime, - ) - .await; - } - #[cfg(not(feature = "wal-local-storage"))] - { - panic!("Local Storage WAL not bundled!"); - } - } - } - }); -} - -async fn run_server_with_runtimes( - config: Config, - engine_runtimes: Arc, - log_runtime: Arc, -) where - T: WalsOpener, -{ - // Init function registry. - let mut function_registry = FunctionRegistryImpl::new(); - function_registry - .load_functions() - .expect("Failed to create function registry"); - let function_registry = Arc::new(function_registry); - let datafusion_context = DatafusionContext { - function_registry: function_registry.clone().to_df_function_registry(), - runtime_config: DfRuntimeConfig::default(), - }; - - // Config limiter - let limiter = Limiter::new(config.limiter.clone()); - let config_content = toml::to_string(&config).expect("Fail to serialize config"); - - let builder = Builder::new(config.server.clone()) - .node_addr(config.node.addr.clone()) - .config_content(config_content) - .engine_runtimes(engine_runtimes.clone()) - .log_runtime(log_runtime.clone()) - .function_registry(function_registry) - .limiter(limiter) - .datafusion_context(datafusion_context) - .query_engine_config(config.query_engine.clone()); - - let wal_builder = T::default(); - let builder = match &config.cluster_deployment { - None => { - build_without_meta( - &config, - &StaticRouteConfig::default(), - builder, - engine_runtimes.clone(), - wal_builder, - ) - .await - } - Some(ClusterDeployment::NoMeta(v)) => { - build_without_meta(&config, v, builder, engine_runtimes.clone(), wal_builder).await - } - Some(ClusterDeployment::WithMeta(cluster_config)) => { - build_with_meta( - &config, - cluster_config, - builder, - engine_runtimes.clone(), - wal_builder, - ) - .await - } - }; - - // Build and start server - let mut server = builder.build().expect("Failed to create server"); - server.start().await.expect("Failed to start server"); - - // Wait for signal - signal_handler::wait_for_signal(); - - // Stop server - server.stop().await; -} - -// Build proxy for all table engines. -async fn build_table_engine_proxy(analytic: TableEngineRef) -> Arc { - // Create memory engine - let memory = MemoryTableEngine; - - // Create table engine proxy - Arc::new(TableEngineProxy { - memory, - analytic: analytic.clone(), - }) -} - -fn make_wal_runtime(runtimes: Arc) -> WalRuntimes { - WalRuntimes { - write_runtime: runtimes.write_runtime.clone(), - // TODO: remove read_runtime from WalRuntimes - read_runtime: runtimes.read_runtime.high().clone(), - default_runtime: runtimes.default_runtime.clone(), - } -} - -async fn build_with_meta( - config: &Config, - cluster_config: &ClusterConfig, - builder: Builder, - runtimes: Arc, - wal_opener: T, -) -> Builder { - // Build meta related modules. - let node_meta_info = NodeMetaInfo { - addr: config.node.addr.clone(), - port: config.server.grpc_port, - zone: config.node.zone.clone(), - idc: config.node.idc.clone(), - binary_version: config.node.binary_version.clone(), - node_type: cluster_config.node_type.clone(), - }; - - info!("Build horaedb with node meta info:{node_meta_info:?}"); - - let endpoint = node_meta_info.endpoint(); - let meta_client = - meta_impl::build_meta_client(cluster_config.meta_client.clone(), node_meta_info) - .await - .expect("fail to build meta client"); - - let shard_set = ShardSet::default(); - let cluster = { - let cluster_impl = ClusterImpl::try_new( - endpoint, - shard_set.clone(), - meta_client.clone(), - cluster_config.clone(), - runtimes.meta_runtime.clone(), - ) - .await - .unwrap(); - Arc::new(cluster_impl) - }; - let router = Arc::new(ClusterBasedRouter::new( - cluster.clone(), - config.server.route_cache.clone(), - )); - - let opened_wals = wal_opener - .open_wals(&config.analytic.wal, make_wal_runtime(runtimes.clone())) - .await - .expect("Failed to setup analytic engine"); - let engine_builder = EngineBuilder { - config: &config.analytic, - engine_runtimes: runtimes.clone(), - opened_wals: opened_wals.clone(), - meta_client: Some(meta_client.clone()), - }; - let TableEngineContext { - table_engine, - local_compaction_runner, - } = engine_builder - .build() - .await - .expect("Failed to setup analytic engine"); - let engine_proxy = build_table_engine_proxy(table_engine).await; - - let meta_based_manager_ref = Arc::new(volatile::ManagerImpl::new( - shard_set, - meta_client.clone(), - cluster.clone(), - )); - - // Build catalog manager. - let catalog_manager = Arc::new(CatalogManagerImpl::new(meta_based_manager_ref)); - - let table_manipulator = Arc::new(meta_based::TableManipulatorImpl::new(meta_client)); - - let schema_config_provider = Arc::new(ClusterBasedProvider::new(cluster.clone())); - - let mut builder = builder - .table_engine(engine_proxy) - .catalog_manager(catalog_manager) - .table_manipulator(table_manipulator) - .cluster(cluster) - .opened_wals(opened_wals) - .router(router) - .schema_config_provider(schema_config_provider); - builder = builder.compaction_runner(local_compaction_runner.expect("Empty compaction runner.")); - - builder -} - -async fn build_without_meta( - config: &Config, - static_route_config: &StaticRouteConfig, - builder: Builder, - runtimes: Arc, - wal_builder: T, -) -> Builder { - let opened_wals = wal_builder - .open_wals(&config.analytic.wal, make_wal_runtime(runtimes.clone())) - .await - .expect("Failed to setup analytic engine"); - - let engine_builder = EngineBuilder { - config: &config.analytic, - engine_runtimes: runtimes.clone(), - opened_wals: opened_wals.clone(), - meta_client: None, - }; - let TableEngineContext { table_engine, .. } = engine_builder - .build() - .await - .expect("Failed to setup analytic engine"); - let engine_proxy = build_table_engine_proxy(table_engine).await; - - // Create catalog manager, use analytic engine as backend. - let analytic = engine_proxy.analytic.clone(); - let mut table_based_manager = TableBasedManager::new(analytic) - .await - .expect("Failed to create catalog manager"); - - // Get collected table infos. - let table_infos = table_based_manager - .fetch_table_infos() - .await - .expect("Failed to fetch table infos for opening"); - - let catalog_manager = Arc::new(CatalogManagerImpl::new(Arc::new(table_based_manager))); - let table_operator = TableOperator::new(catalog_manager.clone()); - let table_manipulator = Arc::new(catalog_based::TableManipulatorImpl::new( - table_operator.clone(), - )); - - // Iterate the table infos to recover. - let open_opts = OpenOptions { - table_engine: engine_proxy.clone(), - }; - - // Create local tables recoverer. - let local_tables_recoverer = LocalTablesRecoverer::new(table_infos, table_operator, open_opts); - - // Create schema in default catalog. - create_static_topology_schema( - catalog_manager.clone(), - static_route_config.topology.clone(), - ) - .await; - - // Build static router and schema config provider - let cluster_view = ClusterView::from(&static_route_config.topology); - let schema_configs = cluster_view.schema_configs.clone(); - let router = Arc::new(RuleBasedRouter::new( - cluster_view, - static_route_config.rules.clone(), - )); - let schema_config_provider = Arc::new(ConfigBasedProvider::new( - schema_configs, - config.server.default_schema_config.clone(), - )); - - builder - .table_engine(engine_proxy) - .catalog_manager(catalog_manager) - .table_manipulator(table_manipulator) - .router(router) - .opened_wals(opened_wals) - .schema_config_provider(schema_config_provider) - .local_tables_recoverer(local_tables_recoverer) -} - -async fn create_static_topology_schema( - catalog_mgr: ManagerRef, - static_topology_config: StaticTopologyConfig, -) { - let default_catalog = catalog_mgr - .catalog_by_name(catalog_mgr.default_catalog_name()) - .expect("Fail to retrieve default catalog") - .expect("Default catalog doesn't exist"); - for schema_shard_view in static_topology_config.schema_shards { - default_catalog - .create_schema(&schema_shard_view.schema) - .await - .unwrap_or_else(|_| panic!("Fail to create schema:{}", schema_shard_view.schema)); - info!( - "Create static topology in default catalog:{}, schema:{}", - catalog_mgr.default_catalog_name(), - &schema_shard_view.schema - ); - } -} diff --git a/src/horaedb/src/signal_handler.rs b/src/horaedb/src/signal_handler.rs deleted file mode 100644 index 9c0cb76515..0000000000 --- a/src/horaedb/src/signal_handler.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Signal handler -//! -//! Only works on unix like environments - -pub use self::details::wait_for_signal; - -#[cfg(unix)] -mod details { - use logger::info; - use signal_hook::{consts::TERM_SIGNALS, iterator::Signals}; - - pub fn wait_for_signal() { - let mut sigs = Signals::new(TERM_SIGNALS).expect("Failed to register signal handlers"); - - for signal in &mut sigs { - if TERM_SIGNALS.contains(&signal) { - info!("Received signal {}, stopping server...", signal); - break; - } - } - } -} - -#[cfg(not(unix))] -mod details { - pub fn wait_for_signal() {} -} diff --git a/src/interpreters/Cargo.toml b/src/interpreters/Cargo.toml deleted file mode 100644 index d237d5b870..0000000000 --- a/src/interpreters/Cargo.toml +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "interpreters" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arrow = { workspace = true } -# In alphabetical order -async-trait = { workspace = true } -catalog = { workspace = true } -codec = { workspace = true } -common_types = { workspace = true } -datafusion = { workspace = true } -datafusion-proto = { workspace = true } -df_operator = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -hash_ext = { workspace = true } -lazy_static = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -meta_client = { workspace = true } -prometheus = { workspace = true } -query_engine = { workspace = true } -query_frontend = { workspace = true } -regex = { workspace = true } -runtime = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -tokio = { workspace = true } - -[dev-dependencies] -analytic_engine = { workspace = true, features = ["test"] } -catalog_impls = { workspace = true } -query_frontend = { workspace = true, features = ["test"] } -test_util = { workspace = true } -tokio = { workspace = true } diff --git a/src/interpreters/src/alter_table.rs b/src/interpreters/src/alter_table.rs deleted file mode 100644 index ceb2e5a380..0000000000 --- a/src/interpreters/src/alter_table.rs +++ /dev/null @@ -1,147 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter for insert statement - -use async_trait::async_trait; -use common_types::{ - column_schema::{self, ColumnSchema}, - schema::{self, Schema}, -}; -use macros::define_result; -use query_frontend::plan::{AlterTableOperation, AlterTablePlan}; -use snafu::{ensure, ResultExt, Snafu}; -use table_engine::table::AlterSchemaRequest; - -use crate::interpreter::{self, AlterTable, Interpreter, InterpreterPtr, Output}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to add column to schema, err:{}", source))] - AddColumnSchema { source: common_types::schema::Error }, - - #[snafu(display("Failed to build schema, err:{}", source))] - BuildSchema { source: common_types::schema::Error }, - - #[snafu(display("Failed to alter table schema, err:{}", source))] - AlterSchema { source: table_engine::table::Error }, - - #[snafu(display("Failed to alter table options, err:{}", source))] - AlterOptions { source: table_engine::table::Error }, - - #[snafu(display("Not allow to add a not null column, name:{}", name))] - AddNotNull { name: String }, -} - -define_result!(Error); - -pub struct AlterTableInterpreter { - plan: AlterTablePlan, -} - -impl AlterTableInterpreter { - pub fn create(plan: AlterTablePlan) -> InterpreterPtr { - Box::new(Self { plan }) - } -} - -#[async_trait] -impl Interpreter for AlterTableInterpreter { - async fn execute(self: Box) -> interpreter::Result { - self.execute_alter().await.context(AlterTable) - } -} - -impl AlterTableInterpreter { - async fn execute_alter(self: Box) -> Result { - let AlterTablePlan { table, operations } = self.plan; - - match operations { - AlterTableOperation::AddColumn(columns) => { - let current_schema = table.schema(); - let new_schema = build_new_schema(¤t_schema, columns)?; - - let request = AlterSchemaRequest { - schema: new_schema, - pre_schema_version: current_schema.version(), - }; - - let num_rows = table.alter_schema(request).await.context(AlterSchema)?; - - Ok(Output::AffectedRows(num_rows)) - } - AlterTableOperation::ModifySetting(options) => { - let num_rows = table.alter_options(options).await.context(AlterOptions)?; - Ok(Output::AffectedRows(num_rows)) - } - } - } -} - -fn build_new_schema(current_schema: &Schema, column_schemas: Vec) -> Result { - let current_version = current_schema.version(); - - let mut builder = - schema::Builder::with_capacity(current_schema.num_columns() + column_schemas.len()) - .primary_key_indexes(current_schema.primary_key_indexes().to_vec()) - // Increment the schema version. - .version(current_version + 1); - for (idx, column) in current_schema.columns().iter().enumerate() { - if current_schema.is_primary_key_index(&idx) { - builder = builder - .add_key_column(column.clone()) - .context(AddColumnSchema)?; - } else { - builder = builder - .add_normal_column(column.clone()) - .context(AddColumnSchema)?; - } - } - - builder = builder - // Enable column id generation of the schema builder. - .auto_increment_column_id(true); - - // Add new columns - for mut column_schema in column_schemas { - // Uninit the id of the column schema. - column_schema.id = column_schema::COLUMN_ID_UNINIT; - - validate_add_column(&column_schema)?; - - // Only allow to add normal column. - builder = builder - .add_normal_column(column_schema) - .context(AddColumnSchema)?; - } - - // Build the final schema. - let new_schema = builder.build().context(BuildSchema)?; - - Ok(new_schema) -} - -fn validate_add_column(column_schema: &ColumnSchema) -> Result<()> { - ensure!( - column_schema.is_nullable, - AddNotNull { - name: &column_schema.name - } - ); - - Ok(()) -} diff --git a/src/interpreters/src/context.rs b/src/interpreters/src/context.rs deleted file mode 100644 index ac80df08a5..0000000000 --- a/src/interpreters/src/context.rs +++ /dev/null @@ -1,135 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter context - -use std::{sync::Arc, time::Instant}; - -use common_types::request_id::RequestId; -use macros::define_result; -use query_engine::context::{Context as QueryContext, ContextRef as QueryContextRef}; -use runtime::Priority; -use snafu::Snafu; - -#[derive(Debug, Snafu)] -pub enum Error {} - -define_result!(Error); - -/// Interpreter context -/// -/// Contains information that all interpreters need -#[derive(Debug, Clone)] -pub struct Context { - request_id: RequestId, - deadline: Option, - default_catalog: String, - default_schema: String, - enable_partition_table_access: bool, - /// If time range exceeds this threshold, the query will be marked as - /// expensive - expensive_query_threshold: u64, -} - -impl Context { - pub fn builder(request_id: RequestId, deadline: Option) -> Builder { - Builder { - request_id, - deadline, - default_catalog: String::new(), - default_schema: String::new(), - enable_partition_table_access: false, - expensive_query_threshold: 24 * 3600 * 1000, // default 24 hours - } - } - - /// Create a new context of query executor - pub fn new_query_context(&self, priority: Priority) -> Result { - let ctx = QueryContext { - request_id: self.request_id.clone(), - deadline: self.deadline, - default_catalog: self.default_catalog.clone(), - default_schema: self.default_schema.clone(), - priority, - }; - Ok(Arc::new(ctx)) - } - - #[inline] - pub fn default_catalog(&self) -> &str { - &self.default_catalog - } - - #[inline] - pub fn default_schema(&self) -> &str { - &self.default_schema - } - - #[inline] - pub fn request_id(&self) -> RequestId { - self.request_id.clone() - } - - #[inline] - pub fn enable_partition_table_access(&self) -> bool { - self.enable_partition_table_access - } - - #[inline] - pub fn expensive_query_threshold(&self) -> u64 { - self.expensive_query_threshold - } -} - -#[must_use] -pub struct Builder { - request_id: RequestId, - deadline: Option, - default_catalog: String, - default_schema: String, - enable_partition_table_access: bool, - expensive_query_threshold: u64, -} - -impl Builder { - pub fn default_catalog_and_schema(mut self, catalog: String, schema: String) -> Self { - self.default_catalog = catalog; - self.default_schema = schema; - self - } - - pub fn enable_partition_table_access(mut self, enable_partition_table_access: bool) -> Self { - self.enable_partition_table_access = enable_partition_table_access; - self - } - - pub fn expensive_query_threshold(mut self, threshold: u64) -> Self { - self.expensive_query_threshold = threshold; - self - } - - pub fn build(self) -> Context { - Context { - request_id: self.request_id, - deadline: self.deadline, - default_catalog: self.default_catalog, - default_schema: self.default_schema, - enable_partition_table_access: self.enable_partition_table_access, - expensive_query_threshold: self.expensive_query_threshold, - } - } -} diff --git a/src/interpreters/src/create.rs b/src/interpreters/src/create.rs deleted file mode 100644 index ba618b54c1..0000000000 --- a/src/interpreters/src/create.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter for create statements - -use async_trait::async_trait; -use macros::define_result; -use query_frontend::plan::CreateTablePlan; -use snafu::{ResultExt, Snafu}; -use table_engine::engine::TableEngineRef; - -use crate::{ - context::Context, - interpreter::{Create, Interpreter, InterpreterPtr, Output, Result as InterpreterResult}, - table_manipulator::{self, TableManipulatorRef}, -}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to create table by table manipulator, err:{}", source))] - ManipulateTable { source: table_manipulator::Error }, -} - -define_result!(Error); - -/// Create interpreter -pub struct CreateInterpreter { - ctx: Context, - plan: CreateTablePlan, - table_engine: TableEngineRef, - table_manipulator: TableManipulatorRef, -} - -impl CreateInterpreter { - pub fn create( - ctx: Context, - plan: CreateTablePlan, - table_engine: TableEngineRef, - table_manipulator: TableManipulatorRef, - ) -> InterpreterPtr { - Box::new(Self { - ctx, - plan, - table_engine, - table_manipulator, - }) - } -} - -impl CreateInterpreter { - async fn execute_create(self: Box) -> Result { - self.table_manipulator - .create_table(self.ctx, self.plan, self.table_engine) - .await - .context(ManipulateTable) - } -} - -// TODO(yingwen): Wrap a method that returns self::Result, simplify some code to -// converting self::Error to super::Error -#[async_trait] -impl Interpreter for CreateInterpreter { - async fn execute(self: Box) -> InterpreterResult { - self.execute_create().await.context(Create) - } -} diff --git a/src/interpreters/src/describe.rs b/src/interpreters/src/describe.rs deleted file mode 100644 index 3047f54b98..0000000000 --- a/src/interpreters/src/describe.rs +++ /dev/null @@ -1,108 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{convert::TryInto, sync::Arc}; - -use arrow::{ - array::{BooleanArray, StringArray}, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, -}; -use async_trait::async_trait; -use macros::define_result; -use query_frontend::plan::DescribeTablePlan; -use snafu::{ResultExt, Snafu}; -use table_engine::table::TableRef; - -use crate::{ - interpreter::{Describe, Interpreter, InterpreterPtr, Output, Result as InterpreterResult}, - RecordBatchVec, -}; - -#[derive(Debug, Snafu)] -pub enum Error {} - -define_result!(Error); - -pub struct DescribeInterpreter { - plan: DescribeTablePlan, -} - -impl DescribeInterpreter { - pub fn create(plan: DescribeTablePlan) -> InterpreterPtr { - Box::new(Self { plan }) - } - - async fn execute_describe(self: Box) -> Result { - let DescribeTablePlan { table } = self.plan; - - Self::table_ref_to_record_batch(table).map(Output::Records) - } - - fn table_ref_to_record_batch(table_ref: TableRef) -> Result { - let table_schema = table_ref.schema(); - let num_columns = table_schema.num_columns(); - - let mut names = Vec::with_capacity(num_columns); - let mut types = Vec::with_capacity(num_columns); - let mut is_primary_keys = Vec::with_capacity(num_columns); - let mut is_nullables = Vec::with_capacity(num_columns); - let mut is_tags = Vec::with_capacity(num_columns); - let mut is_dictionarys = Vec::with_capacity(num_columns); - for (idx, col) in table_schema.columns().iter().enumerate() { - names.push(col.name.to_string()); - types.push(col.data_type.to_string()); - is_primary_keys.push(table_schema.is_primary_key_index(&idx)); - is_nullables.push(col.is_nullable); - is_tags.push(col.is_tag); - is_dictionarys.push(col.is_dictionary); - } - - let schema = Schema::new(vec![ - Field::new("name", DataType::Utf8, false), - Field::new("type", DataType::Utf8, false), - Field::new("is_primary", DataType::Boolean, false), - Field::new("is_nullable", DataType::Boolean, false), - Field::new("is_tag", DataType::Boolean, false), - Field::new("is_dictionary", DataType::Boolean, false), - ]); - - let arrow_record_batch = RecordBatch::try_new( - Arc::new(schema), - vec![ - Arc::new(StringArray::from(names)), - Arc::new(StringArray::from(types)), - Arc::new(BooleanArray::from(is_primary_keys)), - Arc::new(BooleanArray::from(is_nullables)), - Arc::new(BooleanArray::from(is_tags)), - Arc::new(BooleanArray::from(is_dictionarys)), - ], - ) - .unwrap(); - - let record_batch = arrow_record_batch.try_into().unwrap(); - - Ok(vec![record_batch]) - } -} - -#[async_trait] -impl Interpreter for DescribeInterpreter { - async fn execute(self: Box) -> InterpreterResult { - self.execute_describe().await.context(Describe) - } -} diff --git a/src/interpreters/src/drop.rs b/src/interpreters/src/drop.rs deleted file mode 100644 index 6a164e1fed..0000000000 --- a/src/interpreters/src/drop.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter for drop statements - -use async_trait::async_trait; -use macros::define_result; -use query_frontend::plan::DropTablePlan; -use snafu::{ResultExt, Snafu}; -use table_engine::engine::TableEngineRef; - -use crate::{ - context::Context, - interpreter::{Drop, Interpreter, InterpreterPtr, Output, Result as InterpreterResult}, - table_manipulator::{self, TableManipulatorRef}, -}; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to drop table by table manipulator, err:{}", source))] - ManipulateTable { source: table_manipulator::Error }, -} - -define_result!(Error); - -/// Drop interpreter -pub struct DropInterpreter { - ctx: Context, - plan: DropTablePlan, - table_engine: TableEngineRef, - table_manipulator: TableManipulatorRef, -} - -impl DropInterpreter { - pub fn create( - ctx: Context, - plan: DropTablePlan, - table_engine: TableEngineRef, - table_manipulator: TableManipulatorRef, - ) -> InterpreterPtr { - Box::new(Self { - ctx, - plan, - table_engine, - table_manipulator, - }) - } -} - -impl DropInterpreter { - async fn execute_drop(self: Box) -> Result { - self.table_manipulator - .drop_table(self.ctx, self.plan, self.table_engine) - .await - .context(ManipulateTable) - } -} - -// TODO(yingwen): Wrap a method that returns self::Result, simplify some code to -// converting self::Error to super::Error -#[async_trait] -impl Interpreter for DropInterpreter { - async fn execute(self: Box) -> InterpreterResult { - self.execute_drop().await.context(Drop) - } -} diff --git a/src/interpreters/src/exists.rs b/src/interpreters/src/exists.rs deleted file mode 100644 index fa237f615f..0000000000 --- a/src/interpreters/src/exists.rs +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{convert::TryInto, sync::Arc}; - -use arrow::{ - array::UInt8Array, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, -}; -use async_trait::async_trait; -use macros::define_result; -use query_frontend::plan::ExistsTablePlan; -use snafu::{ResultExt, Snafu}; - -use crate::{ - interpreter::{Exists, Interpreter, InterpreterPtr, Output, Result as InterpreterResult}, - RecordBatchVec, -}; - -#[derive(Debug, Snafu)] -pub enum Error {} - -define_result!(Error); - -pub struct ExistsInterpreter { - plan: ExistsTablePlan, -} - -impl ExistsInterpreter { - pub fn create(plan: ExistsTablePlan) -> InterpreterPtr { - Box::new(Self { plan }) - } - - async fn execute_exists(self: Box) -> Result { - let ExistsTablePlan { exists } = self.plan; - - exists_table_result(exists).map(Output::Records) - } -} - -fn exists_table_result(exists: bool) -> Result { - let schema = Schema::new(vec![Field::new("result", DataType::UInt8, false)]); - - let arrow_record_batch = RecordBatch::try_new( - Arc::new(schema), - vec![Arc::new(UInt8Array::from_value( - if exists { 1u8 } else { 0u8 }, - 1, - ))], - ) - .unwrap(); - - let record_batch = arrow_record_batch.try_into().unwrap(); - - Ok(vec![record_batch]) -} - -#[async_trait] -impl Interpreter for ExistsInterpreter { - async fn execute(self: Box) -> InterpreterResult { - self.execute_exists().await.context(Exists) - } -} diff --git a/src/interpreters/src/factory.rs b/src/interpreters/src/factory.rs deleted file mode 100644 index a47a86b259..0000000000 --- a/src/interpreters/src/factory.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter factory - -use catalog::manager::ManagerRef; -use query_engine::{executor::ExecutorRef, physical_planner::PhysicalPlannerRef}; -use query_frontend::plan::Plan; -use runtime::PriorityRuntime; -use table_engine::engine::TableEngineRef; - -use crate::{ - alter_table::AlterTableInterpreter, - context::Context, - create::CreateInterpreter, - describe::DescribeInterpreter, - drop::DropInterpreter, - exists::ExistsInterpreter, - insert::InsertInterpreter, - interpreter::{InterpreterPtr, Result}, - select::SelectInterpreter, - show::ShowInterpreter, - table_manipulator::TableManipulatorRef, - validator::{ValidateContext, Validator}, -}; - -/// A factory to create interpreters -pub struct Factory { - query_executor: ExecutorRef, - query_runtime: PriorityRuntime, - physical_planner: PhysicalPlannerRef, - catalog_manager: ManagerRef, - table_engine: TableEngineRef, - table_manipulator: TableManipulatorRef, -} - -impl Factory { - pub fn new( - query_executor: ExecutorRef, - physical_planner: PhysicalPlannerRef, - catalog_manager: ManagerRef, - table_engine: TableEngineRef, - table_manipulator: TableManipulatorRef, - query_runtime: PriorityRuntime, - ) -> Self { - Self { - query_executor, - query_runtime, - physical_planner, - catalog_manager, - table_engine, - table_manipulator, - } - } - - pub fn create(self, ctx: Context, plan: Plan) -> Result { - let validate_ctx = ValidateContext { - enable_partition_table_access: ctx.enable_partition_table_access(), - }; - let validator = Validator::new(validate_ctx); - validator.validate(&plan)?; - - let interpreter = match plan { - Plan::Query(p) => SelectInterpreter::create( - ctx, - p, - self.query_executor, - self.physical_planner, - self.query_runtime, - ), - Plan::Insert(p) => { - InsertInterpreter::create(ctx, p, self.query_executor, self.physical_planner) - } - Plan::Create(p) => { - CreateInterpreter::create(ctx, p, self.table_engine, self.table_manipulator) - } - Plan::Drop(p) => { - DropInterpreter::create(ctx, p, self.table_engine, self.table_manipulator) - } - Plan::Describe(p) => DescribeInterpreter::create(p), - Plan::AlterTable(p) => AlterTableInterpreter::create(p), - Plan::Show(p) => ShowInterpreter::create(ctx, p, self.catalog_manager), - Plan::Exists(p) => ExistsInterpreter::create(p), - }; - - Ok(interpreter) - } -} diff --git a/src/interpreters/src/insert.rs b/src/interpreters/src/insert.rs deleted file mode 100644 index 5d9e254f1b..0000000000 --- a/src/interpreters/src/insert.rs +++ /dev/null @@ -1,614 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter for insert statement - -use std::{ - collections::{BTreeMap, HashMap}, - ops::IndexMut, - sync::Arc, -}; - -use arrow::{array::ArrayRef, error::ArrowError, record_batch::RecordBatch}; -use async_trait::async_trait; -use codec::{compact::MemCompactEncoder, Encoder}; -use common_types::{ - column_block::{ColumnBlock, ColumnBlockBuilder}, - column_schema::ColumnId, - datum::Datum, - record_batch::RecordBatch as CommonRecordBatch, - row::{Row, RowBuilder, RowGroup}, - schema::Schema, -}; -use datafusion::{ - common::ToDFSchema, - error::DataFusionError, - logical_expr::{expr::Expr as DfLogicalExpr, ColumnarValue as DfColumnarValue}, - optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext}, - physical_expr::{ - create_physical_expr, execution_props::ExecutionProps, expressions::TryCastExpr, - }, -}; -use df_operator::visitor::find_columns_by_expr; -use futures::TryStreamExt; -use generic_error::{BoxError, GenericError}; -use hash_ext::hash64; -use macros::define_result; -use query_engine::{executor::ExecutorRef, physical_planner::PhysicalPlannerRef}; -use query_frontend::{ - plan::{InsertPlan, InsertSource, QueryPlan}, - planner::InsertMode, -}; -use runtime::Priority; -use snafu::{ensure, OptionExt, ResultExt, Snafu}; -use table_engine::{ - stream::SendableRecordBatchStream, - table::{TableRef, WriteRequest}, -}; -use tokio::sync::mpsc; - -use crate::{ - context::Context, - interpreter::{Insert, Interpreter, InterpreterPtr, Output, Result as InterpreterResult}, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to generate datafusion expr, err:{}", source))] - DatafusionExpr { source: DataFusionError }, - - #[snafu(display( - "Failed to get data type from datafusion physical expr, err:{}", - source - ))] - DatafusionDataType { source: DataFusionError }, - - #[snafu(display("Failed to get arrow schema, err:{}", source))] - ArrowSchema { source: ArrowError }, - - #[snafu(display("Failed to get datafusion schema, err:{}", source))] - DatafusionSchema { source: DataFusionError }, - - #[snafu(display("Failed to evaluate datafusion physical expr, err:{}", source))] - DatafusionExecutor { source: DataFusionError }, - - #[snafu(display("Failed to build arrow record batch, err:{}", source))] - BuildArrowRecordBatch { source: ArrowError }, - - #[snafu(display("Failed to write table, err:{}", source))] - WriteTable { source: table_engine::table::Error }, - - #[snafu(display("Failed to encode tsid, err:{}", source))] - EncodeTsid { source: codec::compact::Error }, - - #[snafu(display("Failed to convert arrow array to column block, err:{}", source))] - ConvertColumnBlock { - source: common_types::column_block::Error, - }, - - #[snafu(display("Failed to find input columns of expr, column_name:{}", column_name))] - FindExpressionInput { column_name: String }, - - #[snafu(display("Failed to build column block, err:{}", source))] - BuildColumnBlock { - source: common_types::column_block::Error, - }, - - #[snafu(display("Failed to create query context, err:{}", source))] - CreateQueryContext { source: crate::context::Error }, - - #[snafu(display("Failed to execute select physical plan, msg:{}, err:{}", msg, source))] - ExecuteSelectPlan { msg: String, source: GenericError }, - - #[snafu(display("Failed to build row, err:{}", source))] - BuildRow { source: common_types::row::Error }, - - #[snafu(display("Record columns not enough, len:{}, index:{}", len, index))] - RecordColumnsNotEnough { len: usize, index: usize }, - - #[snafu(display("Failed to do select, err:{}", source))] - Select { source: table_engine::stream::Error }, - - #[snafu(display("Failed to send msg in channel, err:{}", msg))] - MsgChannel { msg: String }, - - #[snafu(display("Failed to join async task, err:{}", msg))] - AsyncTask { msg: String }, -} - -define_result!(Error); - -// TODO: make those configurable -const INSERT_SELECT_ROW_BATCH_NUM: usize = 1000; -const INSERT_SELECT_PENDING_BATCH_NUM: usize = 3; - -pub struct InsertInterpreter { - ctx: Context, - plan: InsertPlan, - executor: ExecutorRef, - physical_planner: PhysicalPlannerRef, -} - -impl InsertInterpreter { - pub fn create( - ctx: Context, - plan: InsertPlan, - executor: ExecutorRef, - physical_planner: PhysicalPlannerRef, - ) -> InterpreterPtr { - Box::new(Self { - ctx, - plan, - executor, - physical_planner, - }) - } -} - -#[async_trait] -impl Interpreter for InsertInterpreter { - async fn execute(mut self: Box) -> InterpreterResult { - // Generate tsid if needed. - let InsertPlan { - table, - source, - default_value_map, - } = self.plan; - - match source { - InsertSource::Values { row_group: rows } => { - let num_rows = - prepare_and_write_table(table.clone(), rows, &default_value_map).await?; - - Ok(Output::AffectedRows(num_rows)) - } - InsertSource::Select { - query: query_plan, - column_index_in_insert, - } => { - let mut record_batches_stream = exec_select_logical_plan( - self.ctx, - query_plan, - self.executor, - self.physical_planner, - ) - .await - .context(Insert)?; - - let (tx, rx) = mpsc::channel(INSERT_SELECT_PENDING_BATCH_NUM); - let producer = tokio::spawn(async move { - while let Some(record_batch) = record_batches_stream - .try_next() - .await - .context(Select) - .context(Insert)? - { - if record_batch.is_empty() { - continue; - } - if let Err(e) = tx.send(record_batch).await { - return Err(Error::MsgChannel { - msg: format!("{}", e), - }) - .context(Insert)?; - } - } - Ok(()) - }); - - let consumer = tokio::spawn(async move { - let mut rx = rx; - let mut result_rows = 0; - let mut pending_rows = 0; - let mut record_batches = Vec::new(); - while let Some(record_batch) = rx.recv().await { - pending_rows += record_batch.num_rows(); - record_batches.push(record_batch); - if pending_rows >= INSERT_SELECT_ROW_BATCH_NUM { - pending_rows = 0; - let num_rows = write_record_batches( - &mut record_batches, - column_index_in_insert.as_slice(), - table.clone(), - &default_value_map, - ) - .await?; - result_rows += num_rows; - } - } - - if !record_batches.is_empty() { - let num_rows = write_record_batches( - &mut record_batches, - column_index_in_insert.as_slice(), - table, - &default_value_map, - ) - .await?; - result_rows += num_rows; - } - Ok(result_rows) - }); - - match tokio::try_join!(producer, consumer) { - Ok((select_res, write_rows)) => { - select_res?; - Ok(Output::AffectedRows(write_rows?)) - } - Err(e) => Err(Error::AsyncTask { - msg: format!("{}", e), - }) - .context(Insert)?, - } - } - } - } -} - -async fn write_record_batches( - record_batches: &mut Vec, - column_index_in_insert: &[InsertMode], - table: TableRef, - default_value_map: &BTreeMap, -) -> InterpreterResult { - let row_group = convert_records_to_row_group( - record_batches.as_slice(), - column_index_in_insert, - table.schema(), - ) - .context(Insert)?; - record_batches.clear(); - - prepare_and_write_table(table, row_group, default_value_map).await -} - -async fn prepare_and_write_table( - table: TableRef, - mut row_group: RowGroup, - default_value_map: &BTreeMap, -) -> InterpreterResult { - maybe_generate_tsid(&mut row_group).context(Insert)?; - - // Fill default values - fill_default_values(table.clone(), &mut row_group, default_value_map).context(Insert)?; - - let request = WriteRequest { row_group }; - - let num_rows = table - .write(request) - .await - .context(WriteTable) - .context(Insert)?; - - Ok(num_rows) -} - -async fn exec_select_logical_plan( - ctx: Context, - query_plan: QueryPlan, - executor: ExecutorRef, - physical_planner: PhysicalPlannerRef, -) -> Result { - let priority = Priority::High; - - let query_ctx = ctx - .new_query_context(priority) - .context(CreateQueryContext)?; - - // Create select physical plan. - let physical_plan = physical_planner - .plan(&query_ctx, query_plan) - .await - .box_err() - .context(ExecuteSelectPlan { - msg: "failed to build select physical plan", - })?; - - // Execute select physical plan. - let record_batch_stream: SendableRecordBatchStream = executor - .execute(&query_ctx, physical_plan) - .await - .box_err() - .context(ExecuteSelectPlan { - msg: "failed to execute select physical plan", - })?; - - Ok(record_batch_stream) -} - -fn convert_records_to_row_group( - record_batches: &[CommonRecordBatch], - column_index_in_insert: &[InsertMode], - schema: Schema, -) -> Result { - let mut data_rows: Vec = Vec::new(); - - for record in record_batches { - let num_cols = record.num_columns(); - let num_rows = record.num_rows(); - for row_idx in 0..num_rows { - let mut row_builder = RowBuilder::new(&schema); - // For each column in schema, append datum into row builder - for (index_opt, column_schema) in column_index_in_insert.iter().zip(schema.columns()) { - match index_opt { - InsertMode::Direct(index) => { - ensure!( - *index < num_cols, - RecordColumnsNotEnough { - len: num_cols, - index: *index - } - ); - let datum = record.column(*index).datum(row_idx); - row_builder = row_builder.append_datum(datum).context(BuildRow)?; - } - InsertMode::Null => { - // This is a null column - row_builder = row_builder.append_datum(Datum::Null).context(BuildRow)?; - } - InsertMode::Auto => { - // This is an auto generated column, fill by default value. - let kind = &column_schema.data_type; - row_builder = row_builder - .append_datum(Datum::empty(kind)) - .context(BuildRow)?; - } - } - } - let row = row_builder.finish().context(BuildRow)?; - data_rows.push(row); - } - } - RowGroup::try_new(schema, data_rows).context(BuildRow) -} - -fn maybe_generate_tsid(rows: &mut RowGroup) -> Result<()> { - let schema = rows.schema(); - let tsid_idx = schema.index_of_tsid(); - - if let Some(idx) = tsid_idx { - // Vec of (`index of tag`, `column id of tag`). - let tag_idx_column_ids: Vec<_> = schema - .columns() - .iter() - .enumerate() - .filter_map(|(i, column)| { - if column.is_tag { - Some((i, column.id)) - } else { - None - } - }) - .collect(); - - let mut hash_bytes = Vec::new(); - for i in 0..rows.num_rows() { - let row = rows.get_row_mut(i).unwrap(); - - let mut tsid_builder = TsidBuilder::new(&mut hash_bytes); - - for (idx, column_id) in &tag_idx_column_ids { - tsid_builder.maybe_write_datum(*column_id, &row[*idx])?; - } - - let tsid = tsid_builder.finish(); - row[idx] = Datum::UInt64(tsid); - } - } - Ok(()) -} - -struct TsidBuilder<'a> { - encoder: MemCompactEncoder, - hash_bytes: &'a mut Vec, -} - -impl<'a> TsidBuilder<'a> { - fn new(hash_bytes: &'a mut Vec) -> Self { - // Clear the bytes buffer. - hash_bytes.clear(); - - Self { - encoder: MemCompactEncoder, - hash_bytes, - } - } - - fn maybe_write_datum(&mut self, column_id: ColumnId, datum: &Datum) -> Result<()> { - // Null datum will be ignored, so tsid remains unchanged after adding a null - // column. - if datum.is_null() { - return Ok(()); - } - - // Write column id first. - self.encoder - .encode(self.hash_bytes, &Datum::UInt64(u64::from(column_id))) - .context(EncodeTsid)?; - // Write datum. - self.encoder - .encode(self.hash_bytes, datum) - .context(EncodeTsid)?; - Ok(()) - } - - fn finish(self) -> u64 { - hash64(&self.hash_bytes[..]) - } -} - -/// Fill missing columns which can be calculated via default value expr. -fn fill_default_values( - table: TableRef, - row_groups: &mut RowGroup, - default_value_map: &BTreeMap, -) -> Result<()> { - let mut cached_column_values: HashMap = HashMap::new(); - let table_arrow_schema = table.schema().to_arrow_schema_ref(); - let df_schema_ref = table_arrow_schema - .clone() - .to_dfschema_ref() - .context(DatafusionSchema)?; - - for (column_idx, default_value_expr) in default_value_map.iter() { - let execution_props = ExecutionProps::default(); - - // Optimize logical expr - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(df_schema_ref.clone()), - ); - let default_value_expr = simplifier - .coerce(default_value_expr.clone(), df_schema_ref.clone()) - .context(DatafusionExpr)?; - let simplified_expr = simplifier - .simplify(default_value_expr) - .context(DatafusionExpr)?; - - // Find input columns - let required_column_idxes = find_columns_by_expr(&simplified_expr) - .iter() - .map(|column_name| { - table - .schema() - .index_of(column_name) - .context(FindExpressionInput { column_name }) - }) - .collect::>>()?; - let input_arrow_schema = table_arrow_schema - .project(&required_column_idxes) - .context(ArrowSchema)?; - let input_df_schema = input_arrow_schema - .clone() - .to_dfschema() - .context(DatafusionSchema)?; - - // Create physical expr - let physical_expr = create_physical_expr( - &simplified_expr, - &input_df_schema, - &input_arrow_schema, - &execution_props, - ) - .context(DatafusionExpr)?; - - let from_type = physical_expr - .data_type(&input_arrow_schema) - .context(DatafusionDataType)?; - let to_type = row_groups.schema().column(*column_idx).data_type; - - let casted_physical_expr = if from_type != to_type.into() { - Arc::new(TryCastExpr::new(physical_expr, to_type.into())) - } else { - physical_expr - }; - - // Build input record batch - let input_arrays = required_column_idxes - .into_iter() - .map(|col_idx| { - get_or_extract_column_from_row_groups( - col_idx, - row_groups, - &mut cached_column_values, - ) - }) - .collect::>>()?; - let input = if input_arrays.is_empty() { - RecordBatch::new_empty(Arc::new(input_arrow_schema)) - } else { - RecordBatch::try_new(Arc::new(input_arrow_schema), input_arrays) - .context(BuildArrowRecordBatch)? - }; - - let output = casted_physical_expr - .evaluate(&input) - .context(DatafusionExecutor)?; - - fill_column_to_row_group(*column_idx, &output, row_groups)?; - - // Write output to cache. - cached_column_values.insert(*column_idx, output); - } - - Ok(()) -} - -fn fill_column_to_row_group( - column_idx: usize, - column: &DfColumnarValue, - rows: &mut RowGroup, -) -> Result<()> { - match column { - DfColumnarValue::Array(array) => { - let datum_kind = rows.schema().column(column_idx).data_type; - let column_block = ColumnBlock::try_from_arrow_array_ref(&datum_kind, array) - .context(ConvertColumnBlock)?; - for row_idx in 0..rows.num_rows() { - let datum = column_block.datum(row_idx); - rows.get_row_mut(row_idx) - .map(|row| std::mem::replace(row.index_mut(column_idx), datum.clone())); - } - } - DfColumnarValue::Scalar(scalar) => { - if let Some(datum) = Datum::from_scalar_value(scalar) { - for row_idx in 0..rows.num_rows() { - rows.get_row_mut(row_idx) - .map(|row| std::mem::replace(row.index_mut(column_idx), datum.clone())); - } - } - } - }; - - Ok(()) -} - -/// This method is used to get specific column data. -/// There are two pathes: -/// 1. get from cached_column_values -/// 2. extract from row_groups -/// -/// For performance reasons, we cached the columns extracted from row_groups -/// before, and we will also cache the output of the exprs. -fn get_or_extract_column_from_row_groups( - column_idx: usize, - row_groups: &RowGroup, - cached_column_values: &mut HashMap, -) -> Result { - let num_rows = row_groups.num_rows(); - let column = cached_column_values - .get(&column_idx) - .map(|c| Ok(c.clone())) - .unwrap_or_else(|| { - let data_type = row_groups.schema().column(column_idx).data_type; - let iter = row_groups.iter_column(column_idx); - let mut builder = ColumnBlockBuilder::with_capacity( - &data_type, - iter.size_hint().0, - row_groups.schema().column(column_idx).is_dictionary, - ); - - for datum in iter { - builder.append(datum.clone()).context(BuildColumnBlock)?; - } - - let columnar_value = DfColumnarValue::Array(builder.build().to_arrow_array_ref()); - cached_column_values.insert(column_idx, columnar_value.clone()); - Ok(columnar_value) - })?; - - column.into_array(num_rows).context(DatafusionExecutor) -} diff --git a/src/interpreters/src/interpreter.rs b/src/interpreters/src/interpreter.rs deleted file mode 100644 index 5f594c4d8d..0000000000 --- a/src/interpreters/src/interpreter.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter trait - -use async_trait::async_trait; -use macros::define_result; -use snafu::Snafu; - -use crate::RecordBatchVec; - -// Make the variant closer to actual error code like invalid arguments. -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to execute select, err:{}", source))] - Select { source: crate::select::Error }, - - #[snafu(display("Failed to execute create table, err:{}", source))] - Create { source: crate::create::Error }, - - #[snafu(display("Failed to execute drop table, err:{}", source))] - Drop { source: crate::drop::Error }, - - #[snafu(display("Failed to execute insert, err:{}", source))] - Insert { source: crate::insert::Error }, - - #[snafu(display("Failed to execute describe, err:{}", source))] - Describe { source: crate::describe::Error }, - - #[snafu(display("Failed to execute alter table, err:{}", source))] - AlterTable { source: crate::alter_table::Error }, - - #[snafu(display("Failed to execute show create tables, err:{}", source))] - ShowCreateTable { source: crate::show::Error }, - - #[snafu(display("Failed to execute show tables, err:{}", source))] - ShowTables { source: crate::show::Error }, - - #[snafu(display("Failed to execute show database, err:{}", source))] - ShowDatabases { source: crate::show::Error }, - - #[snafu(display("Failed to execute exists, err:{}", source))] - Exists { source: crate::exists::Error }, - - #[snafu(display("Failed to transfer output to records"))] - TryIntoRecords, - - #[snafu(display("Failed to check permission, msg:{}", msg))] - PermissionDenied { msg: String }, -} - -define_result!(Error); - -// TODO(yingwen): Maybe add a stream variant for streaming result -/// The interpreter output -#[derive(Clone)] -pub enum Output { - /// Affected rows number - AffectedRows(usize), - /// A vec of RecordBatch - Records(RecordBatchVec), -} - -impl TryFrom for RecordBatchVec { - type Error = Error; - - fn try_from(output: Output) -> Result { - if let Output::Records(records) = output { - Ok(records) - } else { - Err(Error::TryIntoRecords) - } - } -} - -/// Interpreter executes the plan it holds -#[async_trait] -pub trait Interpreter { - async fn execute(self: Box) -> Result; -} - -/// A pointer to Interpreter -pub type InterpreterPtr = Box; diff --git a/src/interpreters/src/lib.rs b/src/interpreters/src/lib.rs deleted file mode 100644 index 3f12eeab5a..0000000000 --- a/src/interpreters/src/lib.rs +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreters of query/insert/update/delete commands -//! -//! Inspired by fuse-query: and ClickHouse - -#![feature(string_remove_matches)] - -use common_types::record_batch::RecordBatch; - -pub mod alter_table; -pub mod context; -pub mod create; -pub mod describe; -pub mod drop; -pub mod exists; -pub mod factory; -pub mod insert; -pub mod interpreter; -mod metrics; -pub mod select; -pub mod show; -mod show_create; -pub mod table_manipulator; -pub mod validator; - -#[cfg(test)] -mod tests; - -// Use a type alias so that we are able to replace the implementation -pub type RecordBatchVec = Vec; diff --git a/src/interpreters/src/metrics.rs b/src/interpreters/src/metrics.rs deleted file mode 100644 index d406c5e5e0..0000000000 --- a/src/interpreters/src/metrics.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use lazy_static::lazy_static; -use prometheus::{register_int_counter_vec, IntCounterVec}; - -lazy_static! { - pub static ref ENGINE_QUERY_COUNTER: IntCounterVec = register_int_counter_vec!( - "engine_query_counter", - "engine_query_counter", - &["priority"] - ) - .unwrap(); -} diff --git a/src/interpreters/src/select.rs b/src/interpreters/src/select.rs deleted file mode 100644 index 3be55b5719..0000000000 --- a/src/interpreters/src/select.rs +++ /dev/null @@ -1,176 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Interpreter for select statement - -use async_trait::async_trait; -use futures::TryStreamExt; -use generic_error::{BoxError, GenericError}; -use logger::debug; -use macros::define_result; -use query_engine::{ - context::ContextRef as QueryContextRef, - executor::ExecutorRef, - physical_planner::{PhysicalPlanRef, PhysicalPlannerRef}, -}; -use query_frontend::plan::{PriorityContext, QueryPlan}; -use runtime::{Priority, PriorityRuntime}; -use snafu::{ResultExt, Snafu}; - -use crate::{ - context::Context, - interpreter::{Interpreter, InterpreterPtr, Output, Result as InterpreterResult, Select}, - metrics::ENGINE_QUERY_COUNTER, -}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Failed to create query context, err:{}", source))] - CreateQueryContext { source: crate::context::Error }, - - #[snafu(display("Failed to execute physical plan, msg:{}, err:{}", msg, source))] - ExecutePlan { msg: String, source: GenericError }, - - #[snafu(display("Failed to spawn task, err:{}", source))] - Spawn { source: runtime::Error }, -} - -define_result!(Error); - -/// Select interpreter -pub struct SelectInterpreter { - ctx: Context, - plan: QueryPlan, - executor: ExecutorRef, - physical_planner: PhysicalPlannerRef, - query_runtime: PriorityRuntime, -} - -impl SelectInterpreter { - pub fn create( - ctx: Context, - plan: QueryPlan, - executor: ExecutorRef, - physical_planner: PhysicalPlannerRef, - query_runtime: PriorityRuntime, - ) -> InterpreterPtr { - Box::new(Self { - ctx, - plan, - executor, - physical_planner, - query_runtime, - }) - } -} - -#[async_trait] -impl Interpreter for SelectInterpreter { - async fn execute(self: Box) -> InterpreterResult { - let request_id = self.ctx.request_id(); - let plan = self.plan; - let priority = match plan - .decide_query_priority(PriorityContext { - time_range_threshold: self.ctx.expensive_query_threshold(), - }) - .box_err() - .with_context(|| ExecutePlan { - msg: format!("decide query priority failed, id:{request_id}"), - }) - .context(Select)? - { - Some(v) => v, - None => { - debug!( - "Query has invalid query range, return empty result directly, id:{request_id}, plan:{plan:?}" - ); - return Ok(Output::Records(Vec::new())); - } - }; - - ENGINE_QUERY_COUNTER - .with_label_values(&[priority.as_str()]) - .inc(); - - let query_ctx = self - .ctx - .new_query_context(priority) - .context(CreateQueryContext) - .context(Select)?; - - debug!( - "Interpreter execute select begin, request_id:{request_id}, plan:{plan:?}, priority:{priority:?}" - ); - - // Create physical plan. - let physical_plan = self - .physical_planner - .plan(&query_ctx, plan) - .await - .box_err() - .context(ExecutePlan { - msg: "failed to build physical plan", - }) - .context(Select)?; - - if matches!(priority, Priority::Low) { - let executor = self.executor; - return self - .query_runtime - .spawn_with_priority( - async move { - execute_and_collect(query_ctx, executor, physical_plan) - .await - .context(Select) - }, - Priority::Low, - ) - .await - .context(Spawn) - .context(Select)?; - } - - execute_and_collect(query_ctx, self.executor, physical_plan) - .await - .context(Select) - } -} - -async fn execute_and_collect( - query_ctx: QueryContextRef, - executor: ExecutorRef, - physical_plan: PhysicalPlanRef, -) -> Result { - let record_batch_stream = executor - .execute(&query_ctx, physical_plan) - .await - .box_err() - .context(ExecutePlan { - msg: "failed to execute physical plan", - })?; - - let record_batches = - record_batch_stream - .try_collect() - .await - .box_err() - .context(ExecutePlan { - msg: "failed to collect execution results", - })?; - - Ok(Output::Records(record_batches)) -} diff --git a/src/interpreters/src/show.rs b/src/interpreters/src/show.rs deleted file mode 100644 index eac19f7453..0000000000 --- a/src/interpreters/src/show.rs +++ /dev/null @@ -1,284 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{convert::TryInto, sync::Arc}; - -use arrow::{ - array::StringArray, - datatypes::{DataType, Field, Schema as DataSchema}, - record_batch::RecordBatch, -}; -use async_trait::async_trait; -use catalog::{manager::ManagerRef, schema::Schema, Catalog}; -use macros::define_result; -use query_frontend::{ - ast::ShowCreateObject, - plan::{QueryType, ShowCreatePlan, ShowPlan, ShowTablesPlan}, -}; -use regex::Regex; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; - -use crate::{ - context::Context, - interpreter::{ - Interpreter, InterpreterPtr, Output, Result as InterpreterResult, ShowCreateTable, - ShowDatabases, ShowTables, - }, - show_create::ShowCreateInterpreter, -}; - -const SHOW_TABLES_COLUMN_SCHEMA: &str = "Tables"; -const SHOW_DATABASES_COLUMN_SCHEMA: &str = "Schemas"; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display( - "Unsupported show create type, type:{:?}.\nBacktrace:{}", - obj_type, - backtrace - ))] - UnsupportedType { - obj_type: ShowCreateObject, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to create a new arrow RecordBatch, err:{}", source))] - CreateRecordBatch { source: arrow::error::ArrowError }, - - #[snafu(display( - "Failed to convert arrow::RecordBatch to common_types::RecordBatch, err:{}", - source - ))] - ToCommonRecordType { - source: common_types::record_batch::Error, - }, - - #[snafu(display("Failed to fetch tables, err:{}", source))] - FetchTables { source: Box }, - - #[snafu(display("Failed to fetch databases, err:{}", source))] - FetchDatabases { source: catalog::Error }, - - #[snafu(display("Catalog does not exist, catalog:{}.\nBacktrace\n:{}", name, backtrace))] - CatalogNotExists { name: String, backtrace: Backtrace }, - - #[snafu(display("Schema does not exist, schema:{}.\nBacktrace\n:{}", name, backtrace))] - SchemaNotExists { name: String, backtrace: Backtrace }, - - #[snafu(display("Failed to fetch catalog, err:{}", source))] - FetchCatalog { source: catalog::manager::Error }, - - #[snafu(display("Failed to fetch schema, err:{}", source))] - FetchSchema { source: catalog::Error }, - - #[snafu(display("Invalid regexp, err:{}.\nBacktrace\n:{}", source, backtrace))] - InvalidRegexp { - source: regex::Error, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -pub struct ShowInterpreter { - ctx: Context, - plan: ShowPlan, - catalog_manager: ManagerRef, -} - -impl ShowInterpreter { - pub fn create(ctx: Context, plan: ShowPlan, catalog_manager: ManagerRef) -> InterpreterPtr { - Box::new(Self { - ctx, - plan, - catalog_manager, - }) - } -} - -impl ShowInterpreter { - fn show_create(plan: ShowCreatePlan) -> Result { - let show_create = ShowCreateInterpreter::create(plan); - show_create.execute_show_create() - } - - fn show_tables( - ctx: Context, - catalog_manager: ManagerRef, - plan: ShowTablesPlan, - ) -> Result { - let schema = get_default_schema(&ctx, &catalog_manager)?; - let tables_names = match plan.pattern { - Some(pattern) => { - let pattern_re = to_pattern_re(&pattern)?; - schema - .all_tables() - .map_err(Box::new) - .context(FetchTables)? - .iter() - .map(|t| t.name().to_string()) - .filter(|table_name| pattern_re.is_match(table_name)) - .collect::>() - } - None => schema - .all_tables() - .map_err(Box::new) - .context(FetchTables)? - .iter() - .map(|t| t.name().to_string()) - .collect::>(), - }; - - let record_batch = match plan.query_type { - QueryType::Sql => { - let schema = DataSchema::new(vec![Field::new( - SHOW_TABLES_COLUMN_SCHEMA, - DataType::Utf8, - false, - )]); - - RecordBatch::try_new( - Arc::new(schema), - vec![Arc::new(StringArray::from(tables_names))], - ) - .context(CreateRecordBatch)? - } - QueryType::InfluxQL => { - // TODO: refactor those constants - let schema = DataSchema::new(vec![ - Field::new("iox::measurement", DataType::Utf8, false), - Field::new("name", DataType::Utf8, false), - ]); - - let measurements = vec!["measurements".to_string(); tables_names.len()]; - let measurements = Arc::new(StringArray::from(measurements)); - RecordBatch::try_new( - Arc::new(schema), - vec![measurements, Arc::new(StringArray::from(tables_names))], - ) - .context(CreateRecordBatch)? - } - }; - let record_batch = record_batch.try_into().context(ToCommonRecordType)?; - - Ok(Output::Records(vec![record_batch])) - } - - fn show_databases(ctx: Context, catalog_manager: ManagerRef) -> Result { - let catalog = get_default_catalog(&ctx, &catalog_manager)?; - let schema_names = catalog - .all_schemas() - .context(FetchDatabases)? - .iter() - .map(|t| t.name().to_string()) - .collect::>(); - - let schema = DataSchema::new(vec![Field::new( - SHOW_DATABASES_COLUMN_SCHEMA, - DataType::Utf8, - false, - )]); - let record_batch = RecordBatch::try_new( - Arc::new(schema), - vec![Arc::new(StringArray::from(schema_names))], - ) - .context(CreateRecordBatch)?; - - let record_batch = record_batch.try_into().context(ToCommonRecordType)?; - - Ok(Output::Records(vec![record_batch])) - } -} - -fn to_pattern_re(pattern: &str) -> Result { - // In MySQL - // `_` match any single character - // `% ` match an arbitrary number of characters (including zero characters). - // so replace those meta character to regexp syntax - // TODO: support escape char to match exact those two chars - let pattern = pattern.replace('_', ".").replace('%', ".*"); - let pattern = format!("^{pattern}$"); - Regex::new(&pattern).context(InvalidRegexp) -} - -#[async_trait] -impl Interpreter for ShowInterpreter { - async fn execute(self: Box) -> InterpreterResult { - match self.plan { - ShowPlan::ShowCreatePlan(t) => Self::show_create(t).context(ShowCreateTable), - ShowPlan::ShowTablesPlan(t) => { - Self::show_tables(self.ctx, self.catalog_manager, t).context(ShowTables) - } - ShowPlan::ShowDatabase => { - Self::show_databases(self.ctx, self.catalog_manager).context(ShowDatabases) - } - } - } -} - -fn get_default_catalog( - ctx: &Context, - catalog_manager: &ManagerRef, -) -> Result> { - let default_catalog = ctx.default_catalog(); - catalog_manager - .catalog_by_name(default_catalog) - .context(FetchCatalog)? - .context(CatalogNotExists { - name: default_catalog, - }) -} - -fn get_default_schema( - ctx: &Context, - catalog_manager: &ManagerRef, -) -> Result> { - let catalog = get_default_catalog(ctx, catalog_manager)?; - - let default_schema = ctx.default_schema(); - catalog - .schema_by_name(default_schema) - .context(FetchSchema)? - .context(SchemaNotExists { - name: default_schema, - }) -} - -#[cfg(test)] -mod tests { - use crate::show::to_pattern_re; - #[test] - - fn test_is_table_matched() { - let testcases = vec![ - // table, pattern, matched - ("abc", "abc", true), - ("abc", "abcd", false), - ("abc", "ab%", true), - ("abc", "%b%", true), - ("abc", "_b_", true), - ("aabcc", "%b%", true), - ("aabcc", "_b_", false), - ]; - - for (table_name, pattern, matched) in testcases { - let pattern = to_pattern_re(pattern).unwrap(); - assert_eq!(matched, pattern.is_match(table_name)); - } - } -} diff --git a/src/interpreters/src/show_create.rs b/src/interpreters/src/show_create.rs deleted file mode 100644 index 2230a912c4..0000000000 --- a/src/interpreters/src/show_create.rs +++ /dev/null @@ -1,255 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, convert::TryInto, sync::Arc}; - -use arrow::{ - array::StringArray, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, -}; -use datafusion::logical_expr::Expr; -use datafusion_proto::bytes::Serializeable; -use logger::error; -use query_frontend::{ast::ShowCreateObject, plan::ShowCreatePlan}; -use snafu::ensure; -use table_engine::{partition::PartitionInfo, table::TableRef}; - -use crate::{ - interpreter::Output, - show::{Result, UnsupportedType}, - RecordBatchVec, -}; - -pub struct ShowCreateInterpreter { - plan: ShowCreatePlan, -} - -impl ShowCreateInterpreter { - pub fn create(plan: ShowCreatePlan) -> ShowCreateInterpreter { - Self { plan } - } - - pub fn execute_show_create(self) -> Result { - let ShowCreatePlan { table, obj_type } = self.plan; - - ensure!( - obj_type == ShowCreateObject::Table, - UnsupportedType { obj_type } - ); - - Self::table_ref_to_record_batch(table).map(Output::Records) - } - - fn table_ref_to_record_batch(table_ref: TableRef) -> Result { - let tables = vec![table_ref.name().to_string()]; - let sqls = vec![Self::render_table_sql(table_ref)]; - - let schema = Schema::new(vec![ - Field::new("Table", DataType::Utf8, false), - Field::new("Create Table", DataType::Utf8, false), - ]); - - let arrow_record_batch = RecordBatch::try_new( - Arc::new(schema), - vec![ - Arc::new(StringArray::from(tables)), - Arc::new(StringArray::from(sqls)), - ], - ) - .unwrap(); - - let record_batch = arrow_record_batch.try_into().unwrap(); - - Ok(vec![record_batch]) - } - - fn render_table_sql(table_ref: TableRef) -> String { - // TODO(boyan) pretty output - format!( - "CREATE TABLE `{}` ({}){} ENGINE={}{}", - table_ref.name(), - Self::render_columns_and_constrains(&table_ref), - Self::render_partition_info(table_ref.partition_info()), - table_ref.engine_type(), - Self::render_options(table_ref.options()) - ) - } - - fn render_columns_and_constrains(table_ref: &TableRef) -> String { - let table_schema = table_ref.schema(); - let key_columns = table_schema.key_columns(); - let timestamp_key = table_schema.timestamp_name(); - - let mut res = String::new(); - for col in table_schema.columns() { - res += format!("`{}` {}", col.name, col.data_type).as_str(); - if col.is_tag { - res += " TAG"; - } - - if col.is_dictionary { - res += " DICTIONARY"; - } - - if !col.is_nullable { - res += " NOT NULL"; - } - - if let Some(expr) = &col.default_value { - res += format!(" DEFAULT {expr}").as_str(); - } - - if !col.comment.is_empty() { - res += format!(" COMMENT '{}'", col.comment).as_str(); - } - res += ", "; - } - let keys: Vec = key_columns.iter().map(|col| col.name.to_string()).collect(); - res += format!("PRIMARY KEY({}), ", keys.join(",")).as_str(); - res += format!("TIMESTAMP KEY({timestamp_key})").as_str(); - - res - } - - fn render_partition_info(partition_info: Option) -> String { - if partition_info.is_none() { - return String::new(); - } - - let partition_info = partition_info.unwrap(); - match partition_info { - PartitionInfo::Hash(v) => { - let expr = match Expr::from_bytes(&v.expr) { - Ok(expr) => expr, - Err(e) => { - error!("show create table parse partition info failed, err:{}", e); - return String::new(); - } - }; - - if v.linear { - format!( - " PARTITION BY LINEAR HASH({expr}) PARTITIONS {}", - v.definitions.len() - ) - } else { - format!( - " PARTITION BY HASH({expr}) PARTITIONS {}", - v.definitions.len() - ) - } - } - PartitionInfo::Key(v) => { - let rendered_partition_key = v.partition_key.join(","); - if v.linear { - format!( - " PARTITION BY LINEAR KEY({rendered_partition_key}) PARTITIONS {}", - v.definitions.len() - ) - } else { - format!( - " PARTITION BY KEY({rendered_partition_key}) PARTITIONS {}", - v.definitions.len() - ) - } - } - PartitionInfo::Random(v) => { - format!(" PARTITION BY RANDOM PARTITIONS {}", v.definitions.len()) - } - } - } - - fn render_options(opts: HashMap) -> String { - if !opts.is_empty() { - let mut v: Vec = opts - .into_iter() - .map(|(k, v)| format!("{k}='{v}'")) - .collect(); - // sorted by option name - v.sort(); - format!(" WITH({})", v.join(", ")) - } else { - "".to_string() - } - } -} - -#[cfg(test)] -mod test { - use std::ops::Add; - - use datafusion::logical_expr::col; - use datafusion_proto::bytes::Serializeable; - use table_engine::partition::{ - HashPartitionInfo, KeyPartitionInfo, PartitionDefinition, PartitionInfo, - }; - - use super::*; - - #[test] - fn test_render_hash_partition_info() { - let expr = col("col1").add(col("col2")); - let partition_info = PartitionInfo::Hash(HashPartitionInfo { - version: 0, - definitions: vec![ - PartitionDefinition { - name: "p0".to_string(), - origin_name: None, - }, - PartitionDefinition { - name: "p1".to_string(), - origin_name: None, - }, - ], - expr: expr.to_bytes().unwrap(), - linear: false, - }); - - let expected = " PARTITION BY HASH(col1 + col2) PARTITIONS 2".to_string(); - assert_eq!( - expected, - ShowCreateInterpreter::render_partition_info(Some(partition_info)) - ); - } - - #[test] - fn test_render_key_partition_info() { - let partition_key_col_name = "col1"; - let partition_info = PartitionInfo::Key(KeyPartitionInfo { - version: 0, - definitions: vec![ - PartitionDefinition { - name: "p0".to_string(), - origin_name: None, - }, - PartitionDefinition { - name: "p1".to_string(), - origin_name: None, - }, - ], - partition_key: vec![partition_key_col_name.to_string()], - linear: false, - }); - - let expected = " PARTITION BY KEY(col1) PARTITIONS 2".to_string(); - assert_eq!( - expected, - ShowCreateInterpreter::render_partition_info(Some(partition_info)) - ); - } -} diff --git a/src/interpreters/src/table_manipulator/catalog_based.rs b/src/interpreters/src/table_manipulator/catalog_based.rs deleted file mode 100644 index 261a3904a5..0000000000 --- a/src/interpreters/src/table_manipulator/catalog_based.rs +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use async_trait::async_trait; -use catalog::{ - schema::{CreateOptions, CreateTableRequest, DropOptions, DropTableRequest}, - table_operator::TableOperator, -}; -use common_types::table::DEFAULT_SHARD_ID; -use query_frontend::plan::{CreateTablePlan, DropTablePlan}; -use snafu::{ensure, ResultExt}; -use table_engine::engine::{CreateTableParams, TableEngineRef, TableState}; - -use crate::{ - context::Context, - interpreter::Output, - table_manipulator::{ - PartitionTableNotSupported, Result, TableManipulator, TableOperator as TableOperatorErr, - }, -}; - -pub struct TableManipulatorImpl { - table_operator: TableOperator, -} - -impl TableManipulatorImpl { - pub fn new(table_operator: TableOperator) -> Self { - Self { table_operator } - } -} - -#[async_trait] -impl TableManipulator for TableManipulatorImpl { - async fn create_table( - &self, - ctx: Context, - plan: CreateTablePlan, - table_engine: TableEngineRef, - ) -> Result { - ensure!( - plan.partition_info.is_none(), - PartitionTableNotSupported { table: plan.table } - ); - let default_catalog = ctx.default_catalog(); - let default_schema = ctx.default_schema(); - - let CreateTablePlan { - engine, - table, - table_schema, - if_not_exists, - options, - .. - } = plan; - - let params = CreateTableParams { - catalog_name: default_catalog.to_string(), - schema_name: default_schema.to_string(), - table_name: table.clone(), - table_schema, - engine, - table_options: options, - partition_info: None, - }; - let request = CreateTableRequest { - params, - table_id: None, - state: TableState::Stable, - shard_id: DEFAULT_SHARD_ID, - }; - - let opts = CreateOptions { - table_engine, - create_if_not_exists: if_not_exists, - }; - - let _ = self - .table_operator - .create_table_on_shard(request, opts) - .await - .context(TableOperatorErr)?; - - Ok(Output::AffectedRows(0)) - } - - async fn drop_table( - &self, - ctx: Context, - plan: DropTablePlan, - table_engine: TableEngineRef, - ) -> Result { - let default_catalog = ctx.default_catalog(); - let default_schema = ctx.default_schema(); - - let table = plan.table; - let request = DropTableRequest { - catalog_name: default_catalog.to_string(), - schema_name: default_schema.to_string(), - table_name: table.clone(), - engine: plan.engine, - }; - - let opts = DropOptions { table_engine }; - - self.table_operator - .drop_table_on_shard(request, opts) - .await - .context(TableOperatorErr)?; - - Ok(Output::AffectedRows(0)) - } -} diff --git a/src/interpreters/src/table_manipulator/meta_based.rs b/src/interpreters/src/table_manipulator/meta_based.rs deleted file mode 100644 index 97d3db3cdf..0000000000 --- a/src/interpreters/src/table_manipulator/meta_based.rs +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use async_trait::async_trait; -use common_types::schema::SchemaEncoder; -use generic_error::BoxError; -use logger::info; -use meta_client::{ - types::{CreateTableRequest, DropTableRequest, PartitionTableInfo}, - MetaClientRef, -}; -use query_frontend::plan::{CreateTablePlan, DropTablePlan}; -use snafu::ResultExt; -use table_engine::{ - engine::TableEngineRef, - partition::{format_sub_partition_table_name, PartitionInfo}, -}; - -use crate::{ - context::Context, - interpreter::Output, - table_manipulator::{CreateWithCause, DropWithCause, Result, TableManipulator}, -}; - -pub struct TableManipulatorImpl { - meta_client: MetaClientRef, -} - -impl TableManipulatorImpl { - pub fn new(meta_client: MetaClientRef) -> Self { - Self { meta_client } - } -} - -#[async_trait] -impl TableManipulator for TableManipulatorImpl { - async fn create_table( - &self, - ctx: Context, - plan: CreateTablePlan, - table_engine: TableEngineRef, - ) -> Result { - { - let params = table_engine::engine::CreateTableParams { - catalog_name: ctx.default_catalog().to_string(), - schema_name: ctx.default_schema().to_string(), - table_name: plan.table.clone(), - table_schema: plan.table_schema.clone(), - engine: plan.engine.clone(), - table_options: plan.options.clone(), - partition_info: plan.partition_info.clone(), - }; - table_engine - .validate_create_table(¶ms) - .await - .box_err() - .with_context(|| CreateWithCause { - msg: format!("invalid parameters to create table, plan:{plan:?}"), - })?; - } - - let encoded_schema = SchemaEncoder::default() - .encode(&plan.table_schema) - .box_err() - .with_context(|| CreateWithCause { - msg: format!("fail to encode table schema, plan:{plan:?}"), - })?; - - let partition_table_info = create_partition_table_info(&plan.table, &plan.partition_info); - - let req = CreateTableRequest { - schema_name: ctx.default_schema().to_string(), - name: plan.table, - encoded_schema, - engine: plan.engine, - create_if_not_exist: plan.if_not_exists, - options: plan.options, - partition_table_info, - }; - - let resp = self - .meta_client - .create_table(req.clone()) - .await - .box_err() - .with_context(|| CreateWithCause { - msg: format!("failed to create table by meta client, req:{req:?}"), - })?; - - info!( - "Create table by meta successfully, req:{:?}, resp:{:?}", - req, resp - ); - - Ok(Output::AffectedRows(0)) - } - - async fn drop_table( - &self, - ctx: Context, - plan: DropTablePlan, - _table_engine: TableEngineRef, - ) -> Result { - let partition_table_info = create_partition_table_info(&plan.table, &plan.partition_info); - - let req = DropTableRequest { - schema_name: ctx.default_schema().to_string(), - name: plan.table, - partition_table_info, - }; - - let resp = self - .meta_client - .drop_table(req.clone()) - .await - .box_err() - .context(DropWithCause { - msg: format!("failed to drop table by meta client, req:{req:?}"), - })?; - - info!( - "Drop table by meta successfully, req:{:?}, resp:{:?}", - req, resp - ); - - Ok(Output::AffectedRows(0)) - } -} - -fn create_partition_table_info( - table_name: &str, - partition_info: &Option, -) -> Option { - if let Some(info) = partition_info { - let sub_table_names = info - .get_definitions() - .iter() - .map(|def| format_sub_partition_table_name(table_name, &def.name)) - .collect::>(); - Some(PartitionTableInfo { - sub_table_names, - partition_info: info.clone(), - }) - } else { - None - } -} diff --git a/src/interpreters/src/table_manipulator/mod.rs b/src/interpreters/src/table_manipulator/mod.rs deleted file mode 100644 index cae06405db..0000000000 --- a/src/interpreters/src/table_manipulator/mod.rs +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use async_trait::async_trait; -use generic_error::GenericError; -use macros::define_result; -use query_frontend::plan::{CreateTablePlan, DropTablePlan}; -use snafu::{Backtrace, Snafu}; -use table_engine::engine::TableEngineRef; - -use crate::{context::Context, interpreter::Output}; - -pub mod catalog_based; -pub mod meta_based; - -pub type TableManipulatorRef = Arc; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to find catalog, name:{}, err:{}", name, source))] - FindCatalog { - name: String, - source: catalog::manager::Error, - }, - - #[snafu(display("Catalog not exists, name:{}.\nBacktrace:\n{}", name, backtrace))] - CatalogNotExists { name: String, backtrace: Backtrace }, - - #[snafu(display("Failed to find schema, name:{}, err:{}", name, source))] - FindSchema { - name: String, - source: catalog::Error, - }, - - #[snafu(display("Schema not exists, name:{}.\nBacktrace:\n{}", name, backtrace))] - SchemaNotExists { name: String, backtrace: Backtrace }, - - #[snafu(display("Failed to create table, name:{}, err:{}", table, source))] - SchemaCreateTable { - table: String, - source: catalog::schema::Error, - }, - - #[snafu(display("Failed to drop table in schema, name:{}, err:{}", table, source))] - SchemaDropTable { - table: String, - source: catalog::schema::Error, - }, - - #[snafu(display("Failed to drop table, name:{}, err:{}", table, source))] - DropTable { - table: String, - source: table_engine::engine::Error, - }, - - #[snafu(display("Failed to create table, msg:{}, err:{}", msg, source))] - CreateWithCause { msg: String, source: GenericError }, - - #[snafu(display("Failed to drop table, msg:{}, err:{}", msg, source))] - DropWithCause { msg: String, source: GenericError }, - - #[snafu(display("Failed to create partition table without horaemeta, table:{}", table))] - PartitionTableNotSupported { table: String }, - - #[snafu(display("Failed to operate table, err:{}", source))] - TableOperator { source: catalog::Error }, -} - -define_result!(Error); - -#[async_trait] -pub trait TableManipulator { - async fn create_table( - &self, - ctx: Context, - plan: CreateTablePlan, - table_engine: TableEngineRef, - ) -> Result; - - async fn drop_table( - &self, - ctx: Context, - plan: DropTablePlan, - table_engine: TableEngineRef, - ) -> Result; -} diff --git a/src/interpreters/src/tests.rs b/src/interpreters/src/tests.rs deleted file mode 100644 index f9c8c75bd9..0000000000 --- a/src/interpreters/src/tests.rs +++ /dev/null @@ -1,422 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use analytic_engine::tests::util::{EngineBuildContext, RocksDBEngineBuildContext, TestEnv}; -use catalog::{ - consts::{DEFAULT_CATALOG, DEFAULT_SCHEMA}, - manager::ManagerRef, - table_operator::TableOperator, -}; -use catalog_impls::table_based::TableBasedManager; -use common_types::request_id::RequestId; -use datafusion::execution::runtime_env::RuntimeConfig; -use df_operator::registry::{FunctionRegistry, FunctionRegistryImpl}; -use query_engine::{datafusion_impl::DatafusionQueryEngineImpl, QueryEngineRef}; -use query_frontend::{ - config::DynamicConfig, parser::Parser, plan::Plan, planner::Planner, provider::MetaProvider, - tests::MockMetaProvider, -}; -use runtime::{Builder, PriorityRuntime}; -use table_engine::{engine::TableEngineRef, memory::MockRemoteEngine}; - -use crate::{ - context::Context, - factory::Factory, - interpreter::{Output, Result}, - table_manipulator::{catalog_based::TableManipulatorImpl, TableManipulatorRef}, -}; - -async fn build_catalog_manager(analytic: TableEngineRef) -> TableBasedManager { - // Create catalog manager, use analytic table as backend - TableBasedManager::new(analytic.clone()) - .await - .expect("Failed to create catalog manager") -} - -fn sql_to_plan(meta_provider: &M, sql: &str) -> Plan { - let dyn_config = DynamicConfig::default(); - let planner = Planner::new(meta_provider, RequestId::next_id(), 1, &dyn_config); - let mut statements = Parser::parse_sql(sql).unwrap(); - assert_eq!(statements.len(), 1); - planner.statement_to_plan(statements.remove(0)).unwrap() -} - -struct Env -where - M: MetaProvider, -{ - pub engine: TableEngineRef, - pub meta_provider: M, - pub catalog_manager: ManagerRef, - pub table_manipulator: TableManipulatorRef, - pub query_engine: QueryEngineRef, - pub read_runtime: PriorityRuntime, -} - -impl Env -where - M: MetaProvider, -{ - fn engine(&self) -> TableEngineRef { - self.engine.clone() - } -} - -impl Env -where - M: MetaProvider, -{ - async fn build_factory(&self) -> Factory { - Factory::new( - self.query_engine.executor(), - self.query_engine.physical_planner(), - self.catalog_manager.clone(), - self.engine(), - self.table_manipulator.clone(), - self.read_runtime.clone(), - ) - } - - async fn sql_to_output(&self, sql: &str) -> Result { - let ctx = Context::builder(RequestId::next_id(), None) - .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) - .build(); - self.sql_to_output_with_context(sql, ctx).await - } - - async fn sql_to_output_with_context(&self, sql: &str, ctx: Context) -> Result { - let plan = sql_to_plan(&self.meta_provider, sql); - let factory = self.build_factory().await; - let interpreter = factory.create(ctx, plan)?; - interpreter.execute().await - } - - async fn create_table_and_check( - &self, - table_name: &str, - enable_partition_table_access: bool, - ) -> Result<()> { - let ctx = Context::builder(RequestId::next_id(), None) - .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) - .enable_partition_table_access(enable_partition_table_access) - .build(); - let sql= format!("CREATE TABLE IF NOT EXISTS {table_name}(c1 string tag not null,ts timestamp not null, c3 string, timestamp key(ts),primary key(c1, ts)) \ - ENGINE=Analytic WITH (enable_ttl='false',update_mode='overwrite',arena_block_size='1KB')"); - - let output = self.sql_to_output_with_context(&sql, ctx).await?; - assert!( - matches!(output, Output::AffectedRows(v) if v == 0), - "create table should success" - ); - - Ok(()) - } - - async fn insert_table_and_check( - &self, - table_name: &str, - enable_partition_table_access: bool, - ) -> Result<()> { - let ctx = Context::builder(RequestId::next_id(), None) - .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) - .enable_partition_table_access(enable_partition_table_access) - .build(); - let sql = format!("INSERT INTO {table_name}(key1, key2, field1,field2,field3,field4) VALUES('tagk', 1638428434000,100, 'hello3','2022-10-10','10:10:10.234'),('tagk2', 1638428434000,100, 'hello3','2022-10-11','11:10:10.234');"); - let output = self.sql_to_output_with_context(&sql, ctx).await?; - assert!( - matches!(output, Output::AffectedRows(v) if v == 2), - "insert table should success" - ); - - Ok(()) - } - - async fn select_table_and_check( - &self, - table_name: &str, - enable_partition_table_access: bool, - ) -> Result<()> { - let ctx = Context::builder(RequestId::next_id(), None) - .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) - .enable_partition_table_access(enable_partition_table_access) - .build(); - let sql = format!("select * from {table_name}"); - let output = self.sql_to_output_with_context(&sql, ctx.clone()).await?; - let records = output.try_into().unwrap(); - let expected = vec![ - "+------------+---------------------+--------+--------+------------+--------------+", - "| key1 | key2 | field1 | field2 | field3 | field4 |", - "+------------+---------------------+--------+--------+------------+--------------+", - "| 7461676b | 2021-12-02T07:00:34 | 100.0 | hello3 | 2022-10-10 | 10:10:10.234 |", - "| 7461676b32 | 2021-12-02T07:00:34 | 100.0 | hello3 | 2022-10-11 | 11:10:10.234 |", - "+------------+---------------------+--------+--------+------------+--------------+", - ]; - test_util::assert_record_batches_eq(&expected, records); - - let sql = format!("select count(*) from {table_name}"); - let output = self.sql_to_output_with_context(&sql, ctx).await?; - let records = output.try_into().unwrap(); - let expected = vec![ - "+----------+", - "| COUNT(*) |", - "+----------+", - "| 2 |", - "+----------+", - ]; - test_util::assert_record_batches_eq(&expected, records); - - Ok(()) - } - - async fn test_create_table(&self) { - self.create_table_and_check("test_table", false) - .await - .unwrap(); - } - - async fn test_desc_table(&self) { - let sql = "desc table test_table"; - let output = self.sql_to_output(sql).await.unwrap(); - let records = output.try_into().unwrap(); - let expected = vec![ - "+--------+-----------+------------+-------------+--------+---------------+", - "| name | type | is_primary | is_nullable | is_tag | is_dictionary |", - "+--------+-----------+------------+-------------+--------+---------------+", - "| key1 | varbinary | true | false | false | false |", - "| key2 | timestamp | true | false | false | false |", - "| field1 | double | false | true | false | false |", - "| field2 | string | false | true | false | false |", - "| field3 | date | false | true | false | false |", - "| field4 | time | false | true | false | false |", - "+--------+-----------+------------+-------------+--------+---------------+", - ]; - test_util::assert_record_batches_eq(&expected, records); - } - - async fn test_exists_table(&self) { - let sql = "exists table test_table"; - let output = self.sql_to_output(sql).await.unwrap(); - let records = output.try_into().unwrap(); - let expected = vec![ - "+--------+", - "| result |", - "+--------+", - "| 1 |", - "+--------+", - ]; - test_util::assert_record_batches_eq(&expected, records); - } - - async fn test_insert_table(&self) { - self.insert_table_and_check("test_table", false) - .await - .unwrap(); - } - - async fn test_insert_table_with_missing_columns(&self) { - let catalog_manager = Arc::new(build_catalog_manager(self.engine()).await); - let ctx = Context::builder(RequestId::next_id(), None) - .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) - .build(); - let table_operator = TableOperator::new(catalog_manager.clone()); - let table_manipulator = Arc::new(TableManipulatorImpl::new(table_operator)); - let insert_factory = Factory::new( - self.query_engine.executor(), - self.query_engine.physical_planner(), - catalog_manager.clone(), - self.engine(), - table_manipulator.clone(), - self.read_runtime.clone(), - ); - let insert_sql = "INSERT INTO test_missing_columns_table(key1, key2, field4) VALUES('tagk', 1638428434000, 1), ('tagk2', 1638428434000, 10);"; - - let plan = sql_to_plan(&self.meta_provider, insert_sql); - let interpreter = insert_factory.create(ctx, plan).unwrap(); - let output = interpreter.execute().await.unwrap(); - assert!( - matches!(output, Output::AffectedRows(v) if v == 2), - "insert should success" - ); - - // Check data which just insert. - let select_sql = - "SELECT key1, key2, field1, field2, field3, field4, field5 from test_missing_columns_table"; - let select_factory = Factory::new( - self.query_engine.executor(), - self.query_engine.physical_planner(), - catalog_manager, - self.engine(), - table_manipulator, - self.read_runtime.clone(), - ); - let ctx = Context::builder(RequestId::next_id(), None) - .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) - .build(); - let plan = sql_to_plan(&self.meta_provider, select_sql); - let interpreter = select_factory.create(ctx, plan).unwrap(); - let output = interpreter.execute().await.unwrap(); - let records = output.try_into().unwrap(); - - #[rustfmt::skip] - // sql: CREATE TABLE `test_missing_columns_table` (`key1` varbinary NOT NULL, - // `key2` timestamp NOT NULL, - // `field1` bigint NOT NULL DEFAULT 10, - // `field2` uint32 NOT NULL DEFAULT 20, - // `field3` uint32 NOT NULL DEFAULT 1 + 2, - // `field4` uint32 NOT NULL, - // `field5` uint32 NOT NULL DEFAULT field4 + 2, - // PRIMARY KEY(key1,key2), TIMESTAMP KEY(key2)) ENGINE=Analytic - let expected = vec![ - "+------------+---------------------+--------+--------+--------+--------+--------+", - "| key1 | key2 | field1 | field2 | field3 | field4 | field5 |", - "+------------+---------------------+--------+--------+--------+--------+--------+", - "| 7461676b | 2021-12-02T07:00:34 | 10 | 20 | 3 | 1 | 3 |", - "| 7461676b32 | 2021-12-02T07:00:34 | 10 | 20 | 3 | 10 | 12 |", - "+------------+---------------------+--------+--------+--------+--------+--------+", - ]; - test_util::assert_record_batches_eq(&expected, records); - } - - async fn test_select_table(&self) { - self.select_table_and_check("test_table", false) - .await - .unwrap(); - } - - async fn test_show_create_table(&self) { - let sql = "show create table test_table"; - let output = self.sql_to_output(sql).await.unwrap(); - let records = output.try_into().unwrap(); - let expected = vec![ - "+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| Table | Create Table |", - "+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| test_table | CREATE TABLE `test_table` (`key1` varbinary NOT NULL, `key2` timestamp NOT NULL, `field1` double, `field2` string, `field3` date, `field4` time, PRIMARY KEY(key1,key2), TIMESTAMP KEY(key2)) ENGINE=Analytic |", - "+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+" - ]; - test_util::assert_record_batches_eq(&expected, records); - } - - async fn test_alter_table(&self) { - let sql = "alter table test_table add column add_col string"; - let output = self.sql_to_output(sql).await.unwrap(); - assert!( - matches!(output, Output::AffectedRows(v) if v == 0), - "alter table should success" - ); - - let sql = "alter table test_table modify SETTING ttl='9d'"; - let output = self.sql_to_output(sql).await.unwrap(); - assert!( - matches!(output, Output::AffectedRows(v) if v == 0), - "alter table should success" - ); - } - - async fn test_drop_table(&self) { - let sql = "drop table test_table"; - let output = self.sql_to_output(sql).await.unwrap(); - assert!( - matches!(output, Output::AffectedRows(v) if v == 0), - "alter table should success" - ); - } - - async fn test_enable_partition_table_access(&self) { - // Disable partition table access, all of create, insert and select about sub - // table(in table partition) directly will failed. - let res = self.create_table_and_check("__test_table", false).await; - assert!(format!("{res:?}") - .contains("only can process sub tables in table partition directly when enable partition table access")); - let res1 = self.insert_table_and_check("__test_table", false).await; - assert!(format!("{res1:?}") - .contains("only can process sub tables in table partition directly when enable partition table access")); - let res2 = self.select_table_and_check("__test_table", false).await; - assert!(format!("{res2:?}") - .contains("only can process sub tables in table partition directly when enable partition table access")); - - // Enable partition table access, operations above will success. - self.create_table_and_check("__test_table", true) - .await - .unwrap(); - self.insert_table_and_check("__test_table", true) - .await - .unwrap(); - self.select_table_and_check("__test_table", true) - .await - .unwrap(); - } -} - -#[test] -fn test_interpreters_rocks() { - let rt = Arc::new(Builder::default().build().unwrap()); - let read_runtime = PriorityRuntime::new(rt.clone(), rt.clone()); - rt.block_on(async { - test_util::init_log_for_test(); - let rocksdb_ctx = RocksDBEngineBuildContext::default(); - test_interpreters(rocksdb_ctx, read_runtime).await; - }) -} - -async fn test_interpreters( - engine_context: T, - read_runtime: PriorityRuntime, -) { - let env = TestEnv::builder().build(); - let mut test_ctx = env.new_context(engine_context); - test_ctx.open().await; - let mock = MockMetaProvider::default(); - let engine = test_ctx.clone_engine(); - let catalog_manager = Arc::new(build_catalog_manager(engine.clone()).await); - let table_operator = TableOperator::new(catalog_manager.clone()); - let table_manipulator = Arc::new(TableManipulatorImpl::new(table_operator)); - let function_registry = Arc::new(FunctionRegistryImpl::default()); - let remote_engine = Arc::new(MockRemoteEngine); - let query_engine = Arc::new( - DatafusionQueryEngineImpl::new( - query_engine::config::Config::default(), - RuntimeConfig::default(), - function_registry.to_df_function_registry(), - remote_engine, - catalog_manager.clone(), - ) - .unwrap(), - ); - - let env = Env { - engine: test_ctx.clone_engine(), - meta_provider: mock, - catalog_manager, - table_manipulator, - query_engine, - read_runtime, - }; - - env.test_create_table().await; - env.test_desc_table().await; - env.test_exists_table().await; - env.test_insert_table().await; - env.test_select_table().await; - env.test_show_create_table().await; - env.test_alter_table().await; - env.test_drop_table().await; - env.test_insert_table_with_missing_columns().await; - env.test_enable_partition_table_access().await; -} diff --git a/src/interpreters/src/validator.rs b/src/interpreters/src/validator.rs deleted file mode 100644 index 91bc819948..0000000000 --- a/src/interpreters/src/validator.rs +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use query_frontend::plan::{Plan, ShowPlan}; -use table_engine::partition; - -use crate::interpreter::{PermissionDenied, Result}; - -macro_rules! is_sub_table { - ($table_name:expr) => {{ - let table_name = $table_name; - partition::is_sub_partition_table(table_name) - }}; -} - -/// Validator for [Plan] -#[derive(Debug)] -pub(crate) struct Validator { - ctx: ValidateContext, -} - -impl Validator { - pub fn new(ctx: ValidateContext) -> Self { - Self { ctx } - } - - pub fn validate(&self, plan: &Plan) -> Result<()> { - self.validate_partition_table_access(plan)?; - - Ok(()) - } - - fn validate_partition_table_access(&self, plan: &Plan) -> Result<()> { - // Only can operate the sub tables(table partition) directly while enable - // partition table access. - if !self.ctx.enable_partition_table_access && Validator::contains_sub_tables(plan) { - PermissionDenied { - msg: "only can process sub tables in table partition directly when enable partition table access", - } - .fail() - } else { - Ok(()) - } - } - - // TODO: reduce duplicated codes. - fn contains_sub_tables(plan: &Plan) -> bool { - match plan { - Plan::Query(plan) => { - let res = plan.tables.visit::<_, ()>(|name, _| { - if partition::is_sub_partition_table(name.table.as_ref()) { - Err(()) - } else { - Ok(()) - } - }); - - res.is_err() - } - - Plan::Create(plan) => { - is_sub_table!(&plan.table) - } - - Plan::Drop(plan) => { - is_sub_table!(&plan.table) - } - - Plan::Insert(plan) => { - is_sub_table!(plan.table.name()) - } - - Plan::Describe(plan) => { - is_sub_table!(plan.table.name()) - } - - Plan::AlterTable(plan) => { - is_sub_table!(plan.table.name()) - } - - Plan::Show(show_plan) => { - if let ShowPlan::ShowCreatePlan(show_create_plan) = show_plan { - is_sub_table!(show_create_plan.table.name()) - } else { - false - } - } - - Plan::Exists(_) => false, - } - } -} - -#[derive(Debug, Default, Clone)] -pub struct ValidateContext { - pub enable_partition_table_access: bool, -} diff --git a/src/meta_client/Cargo.toml b/src/meta_client/Cargo.toml deleted file mode 100644 index 3ff3207718..0000000000 --- a/src/meta_client/Cargo.toml +++ /dev/null @@ -1,50 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "meta_client" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -async-trait = { workspace = true } -common_types = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -horaedbproto = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -prost = { workspace = true } -reqwest = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } -time_ext = { workspace = true } -tokio = { workspace = true } -tonic = { workspace = true } -url = "2.2" diff --git a/src/meta_client/src/lib.rs b/src/meta_client/src/lib.rs deleted file mode 100644 index ba93313537..0000000000 --- a/src/meta_client/src/lib.rs +++ /dev/null @@ -1,127 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use async_trait::async_trait; -use generic_error::GenericError; -use macros::define_result; -use snafu::{Backtrace, Snafu}; -use types::{ - AllocSchemaIdRequest, AllocSchemaIdResponse, CreateTableRequest, CreateTableResponse, - DropTableRequest, DropTableResponse, FetchCompactionNodeRequest, FetchCompactionNodeResponse, - GetNodesRequest, GetNodesResponse, GetTablesOfShardsRequest, GetTablesOfShardsResponse, - RouteTablesRequest, RouteTablesResponse, ShardInfo, -}; - -pub mod meta_impl; -pub mod types; - -#[derive(Debug, Snafu)] -#[snafu(visibility = "pub")] -pub enum Error { - #[snafu(display("{msg}, err:{source}"))] - Convert { msg: String, source: GenericError }, - - #[snafu(display("Missing shard info, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - MissingShardInfo { msg: String, backtrace: Backtrace }, - - #[snafu(display("Missing table info, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - MissingTableInfo { msg: String, backtrace: Backtrace }, - - #[snafu(display("Missing header in rpc response.\nBacktrace:\n{}", backtrace))] - MissingHeader { backtrace: Backtrace }, - - #[snafu(display( - "Failed to connect the service endpoint:{}, err:{}\nBacktrace:\n{}", - addr, - source, - backtrace - ))] - FailConnect { - addr: String, - source: GenericError, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to send heartbeat, cluster:{}, err:{}", cluster, source))] - FailSendHeartbeat { - cluster: String, - source: GenericError, - }, - - #[snafu(display("Failed to alloc schema id, err:{}", source))] - FailAllocSchemaId { source: GenericError }, - - #[snafu(display("Failed to alloc table id, err:{}", source))] - FailCreateTable { source: GenericError }, - - #[snafu(display("Failed to drop table, err:{}", source))] - FailDropTable { source: GenericError }, - - #[snafu(display("Failed to get tables, err:{}", source))] - FailGetTables { source: GenericError }, - - #[snafu(display("Failed to fetch compaction node, err:{}", source))] - FailFetchCompactionNode { source: GenericError }, - - #[snafu(display("Failed to route tables, err:{}", source))] - FailRouteTables { source: GenericError }, - - #[snafu(display( - "Bad response, resp code:{}, msg:{}.\nBacktrace:\n{}", - code, - msg, - backtrace - ))] - BadResponse { - code: u32, - msg: String, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -/// MetaClient is the abstraction of client used to communicate with HoraeMeta -/// cluster. -#[async_trait] -pub trait MetaClient: Send + Sync { - async fn alloc_schema_id(&self, req: AllocSchemaIdRequest) -> Result; - - async fn create_table(&self, req: CreateTableRequest) -> Result; - - async fn drop_table(&self, req: DropTableRequest) -> Result; - - async fn get_tables_of_shards( - &self, - req: GetTablesOfShardsRequest, - ) -> Result; - - async fn route_tables(&self, req: RouteTablesRequest) -> Result; - - async fn get_nodes(&self, req: GetNodesRequest) -> Result; - - async fn fetch_compaction_node( - &self, - req: FetchCompactionNodeRequest, - ) -> Result; - - async fn send_heartbeat(&self, req: Vec) -> Result<()>; -} - -pub type MetaClientRef = Arc; diff --git a/src/meta_client/src/load_balance.rs b/src/meta_client/src/load_balance.rs deleted file mode 100644 index 79ca96b2b9..0000000000 --- a/src/meta_client/src/load_balance.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Load balancer - -use macros::define_result; -use rand::Rng; -use snafu::{Backtrace, Snafu}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Meta Addresses empty.\nBacktrace:\n{}", backtrace))] - MetaAddressesEmpty { backtrace: Backtrace }, -} - -define_result!(Error); - -pub trait LoadBalancer { - fn select<'a>(&self, addresses: &'a [String]) -> Result<&'a String>; -} - -pub struct RandomLoadBalancer; - -impl LoadBalancer for RandomLoadBalancer { - fn select<'a>(&self, addresses: &'a [String]) -> Result<&'a String> { - if addresses.is_empty() { - return MetaAddressesEmpty.fail(); - } - - let len = addresses.len(); - if len == 1 { - return Ok(&addresses[0]); - } - let mut rng = rand::thread_rng(); - let idx = rng.gen_range(0..len); - - Ok(&addresses[idx]) - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_random_loadbalancer() { - let lb = RandomLoadBalancer; - let addresses = vec![ - "127.0.0.1:8080".to_string(), - "127.0.0.2:8080".to_string(), - "127.0.0.3:8080".to_string(), - "127.0.0.4:8080".to_string(), - "127.0.0.5:8080".to_string(), - ]; - for _idx in 0..100 { - let addr = lb.select(&addresses).unwrap(); - assert!(addresses.contains(addr)); - } - - // Empty case - assert!(lb.select(&[]).is_err()); - - let addresses = ["127.0.0.1:5000".to_string()]; - assert_eq!(&addresses[0], lb.select(&addresses).unwrap()); - } -} diff --git a/src/meta_client/src/meta_impl.rs b/src/meta_client/src/meta_impl.rs deleted file mode 100644 index ffe32faeb8..0000000000 --- a/src/meta_client/src/meta_impl.rs +++ /dev/null @@ -1,295 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use async_trait::async_trait; -use generic_error::BoxError; -use horaedbproto::{ - common::ResponseHeader, - meta_service::{self, meta_rpc_service_client::MetaRpcServiceClient}, -}; -use logger::{debug, info}; -use serde::{Deserialize, Serialize}; -use snafu::{OptionExt, ResultExt}; -use time_ext::ReadableDuration; - -use crate::{ - types::{ - AllocSchemaIdRequest, AllocSchemaIdResponse, CreateTableRequest, CreateTableResponse, - DropTableRequest, DropTableResponse, FetchCompactionNodeRequest, - FetchCompactionNodeResponse, GetNodesRequest, GetNodesResponse, GetTablesOfShardsRequest, - GetTablesOfShardsResponse, NodeInfo, NodeMetaInfo, RequestHeader, RouteTablesRequest, - RouteTablesResponse, ShardInfo, - }, - BadResponse, FailAllocSchemaId, FailConnect, FailCreateTable, FailDropTable, FailGetTables, - FailRouteTables, FailSendHeartbeat, MetaClient, MetaClientRef, MissingHeader, Result, -}; - -type MetaServiceGrpcClient = MetaRpcServiceClient; - -#[derive(Debug, Deserialize, Clone, Serialize)] -#[serde(default)] -pub struct MetaClientConfig { - pub cluster_name: String, - pub meta_addr: String, - pub lease: ReadableDuration, - pub timeout: ReadableDuration, - pub cq_count: usize, -} - -impl Default for MetaClientConfig { - fn default() -> Self { - Self { - cluster_name: String::new(), - meta_addr: "127.0.0.1:8080".to_string(), - lease: ReadableDuration::secs(10), - timeout: ReadableDuration::secs(5), - cq_count: 8, - } - } -} - -/// Default meta client impl, will interact with a remote meta node. -pub struct MetaClientImpl { - config: MetaClientConfig, - node_meta_info: NodeMetaInfo, - client: MetaServiceGrpcClient, -} - -impl MetaClientImpl { - pub async fn connect(config: MetaClientConfig, node_meta_info: NodeMetaInfo) -> Result { - let client = { - let endpoint = tonic::transport::Endpoint::from_shared(config.meta_addr.to_string()) - .box_err() - .context(FailConnect { - addr: &config.meta_addr, - })? - .timeout(config.timeout.0); - MetaServiceGrpcClient::connect(endpoint) - .await - .box_err() - .context(FailConnect { - addr: &config.meta_addr, - })? - }; - - Ok(Self { - config, - node_meta_info, - client, - }) - } - - fn request_header(&self) -> RequestHeader { - RequestHeader { - node: self.node_meta_info.endpoint(), - cluster_name: self.config.cluster_name.clone(), - } - } - - #[inline] - fn client(&self) -> MetaServiceGrpcClient { - self.client.clone() - } -} - -#[async_trait] -impl MetaClient for MetaClientImpl { - async fn alloc_schema_id(&self, req: AllocSchemaIdRequest) -> Result { - let mut pb_req = meta_service::AllocSchemaIdRequest::from(req); - pb_req.header = Some(self.request_header().into()); - - info!("Meta client try to alloc schema id, req:{:?}", pb_req); - - let pb_resp = self - .client() - .alloc_schema_id(pb_req) - .await - .box_err() - .context(FailAllocSchemaId)? - .into_inner(); - - info!( - "Meta client finish allocating schema id, resp:{:?}", - pb_resp - ); - - check_response_header(&pb_resp.header)?; - Ok(AllocSchemaIdResponse::from(pb_resp)) - } - - async fn create_table(&self, req: CreateTableRequest) -> Result { - let mut pb_req = meta_service::CreateTableRequest::from(req); - pb_req.header = Some(self.request_header().into()); - - info!("Meta client try to create table, req:{:?}", pb_req); - - let pb_resp = self - .client() - .create_table(pb_req) - .await - .box_err() - .context(FailCreateTable)? - .into_inner(); - - info!("Meta client finish creating table, resp:{:?}", pb_resp); - - check_response_header(&pb_resp.header)?; - CreateTableResponse::try_from(pb_resp) - } - - async fn drop_table(&self, req: DropTableRequest) -> Result { - let mut pb_req = meta_service::DropTableRequest::from(req.clone()); - pb_req.header = Some(self.request_header().into()); - - info!("Meta client try to drop table, req:{:?}", pb_req); - - let pb_resp = self - .client() - .drop_table(pb_req) - .await - .box_err() - .context(FailDropTable)? - .into_inner(); - - info!("Meta client finish dropping table, resp:{:?}", pb_resp); - - check_response_header(&pb_resp.header)?; - DropTableResponse::try_from(pb_resp) - } - - async fn get_tables_of_shards( - &self, - req: GetTablesOfShardsRequest, - ) -> Result { - let mut pb_req = meta_service::GetTablesOfShardsRequest::from(req); - pb_req.header = Some(self.request_header().into()); - - info!("Meta client try to get tables, req:{:?}", pb_req); - - let pb_resp = self - .client() - .get_tables_of_shards(pb_req) - .await - .box_err() - .context(FailGetTables)? - .into_inner(); - - info!("Meta client finish getting tables, resp:{:?}", pb_resp); - - check_response_header(&pb_resp.header)?; - - GetTablesOfShardsResponse::try_from(pb_resp) - } - - async fn route_tables(&self, req: RouteTablesRequest) -> Result { - let mut pb_req = meta_service::RouteTablesRequest::from(req); - pb_req.header = Some(self.request_header().into()); - - debug!("Meta client try to route tables, req:{:?}", pb_req); - - let pb_resp = self - .client() - .route_tables(pb_req) - .await - .box_err() - .context(FailRouteTables)? - .into_inner(); - - debug!("Meta client finish routing tables, resp:{:?}", pb_resp); - - check_response_header(&pb_resp.header)?; - RouteTablesResponse::try_from(pb_resp) - } - - async fn get_nodes(&self, req: GetNodesRequest) -> Result { - let mut pb_req = meta_service::GetNodesRequest::from(req); - pb_req.header = Some(self.request_header().into()); - - debug!("Meta client try to get nodes, req:{:?}", pb_req); - - let pb_resp = self - .client() - .get_nodes(pb_req) - .await - .box_err() - .context(FailRouteTables)? - .into_inner(); - - debug!("Meta client finish getting nodes, resp:{:?}", pb_resp); - - check_response_header(&pb_resp.header)?; - GetNodesResponse::try_from(pb_resp) - } - - async fn fetch_compaction_node( - &self, - _req: FetchCompactionNodeRequest, - ) -> Result { - todo!() - } - - async fn send_heartbeat(&self, shard_infos: Vec) -> Result<()> { - let node_info = NodeInfo { - node_meta_info: self.node_meta_info.clone(), - shard_infos, - }; - let pb_req = meta_service::NodeHeartbeatRequest { - header: Some(self.request_header().into()), - info: Some(node_info.into()), - }; - - info!("Meta client try to send heartbeat req:{:?}", pb_req); - - let pb_resp = self - .client() - .node_heartbeat(pb_req) - .await - .box_err() - .context(FailSendHeartbeat { - cluster: &self.config.cluster_name, - })? - .into_inner(); - - info!("Meta client finish sending heartbeat, resp:{:?}", pb_resp); - - check_response_header(&pb_resp.header) - } -} - -fn check_response_header(header: &Option) -> Result<()> { - let header = header.as_ref().context(MissingHeader)?; - if header.code == 0 { - Ok(()) - } else { - BadResponse { - code: header.code, - msg: header.error.clone(), - } - .fail() - } -} - -/// Create a meta client with given `config`. -pub async fn build_meta_client( - config: MetaClientConfig, - node_meta_info: NodeMetaInfo, -) -> Result { - let meta_client = MetaClientImpl::connect(config, node_meta_info).await?; - Ok(Arc::new(meta_client)) -} diff --git a/src/meta_client/src/types.rs b/src/meta_client/src/types.rs deleted file mode 100644 index 524843620b..0000000000 --- a/src/meta_client/src/types.rs +++ /dev/null @@ -1,600 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{collections::HashMap, fmt, sync::Arc}; - -pub use common_types::table::{ShardId, ShardVersion}; -use common_types::{ - cluster::NodeType, - schema::{SchemaId, SchemaName}, - table::{TableId, TableName}, -}; -use generic_error::BoxError; -use horaedbproto::{cluster as cluster_pb, meta_service as meta_service_pb}; -use serde::{Deserialize, Serialize}; -use snafu::{OptionExt, ResultExt}; -use table_engine::partition::PartitionInfo; -use time_ext::ReadableDuration; - -use crate::{Convert, Error, MissingShardInfo, MissingTableInfo, Result}; -pub type ClusterNodesRef = Arc>; - -#[derive(Debug, Clone)] -pub struct RequestHeader { - pub node: String, - pub cluster_name: String, -} - -#[derive(Debug)] -pub struct AllocSchemaIdRequest { - pub name: String, -} - -#[derive(Debug)] -pub struct AllocSchemaIdResponse { - pub name: String, - pub id: SchemaId, -} - -#[derive(Clone, Debug)] -pub struct PartitionTableInfo { - pub sub_table_names: Vec, - pub partition_info: PartitionInfo, -} - -#[derive(Clone)] -pub struct CreateTableRequest { - pub schema_name: String, - pub name: String, - pub encoded_schema: Vec, - pub engine: String, - pub create_if_not_exist: bool, - pub options: HashMap, - pub partition_table_info: Option, -} - -impl fmt::Debug for CreateTableRequest { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // ignore encoded_schema - f.debug_struct("CreateTableRequest") - .field("schema_name", &self.schema_name) - .field("name", &self.name) - .field("engine", &self.engine) - .field("create_if_not_exist", &self.create_if_not_exist) - .field("options", &self.options) - .field("partition_table_info", &self.partition_table_info) - .finish() - } -} - -#[derive(Clone, Debug)] -pub struct CreateTableResponse { - pub created_table: TableInfo, - pub shard_info: ShardInfo, -} - -#[derive(Debug, Clone)] -pub struct DropTableRequest { - pub schema_name: String, - pub name: String, - pub partition_table_info: Option, -} - -#[derive(Debug, Clone)] -pub struct DropTableResponse { - /// The dropped table. - /// - /// And it will be None if drop a non-exist table. - pub dropped_table: Option, -} - -#[derive(Clone, Debug)] -pub struct GetTablesOfShardsRequest { - pub shard_ids: Vec, -} - -#[derive(Clone, Debug)] -pub struct GetTablesOfShardsResponse { - pub tables_by_shard: HashMap, -} - -#[derive(Clone, Debug)] -pub struct TableInfo { - pub id: TableId, - pub name: String, - pub schema_id: SchemaId, - pub schema_name: String, - pub partition_info: Option, -} - -impl TableInfo { - pub fn is_partition_table(&self) -> bool { - self.partition_info.is_some() - } -} - -impl TryFrom for TableInfo { - type Error = Error; - - fn try_from(pb_table_info: meta_service_pb::TableInfo) -> Result { - let partition_info = pb_table_info - .partition_info - .map(|v| { - PartitionInfo::try_from(v).box_err().context(Convert { - msg: "Failed to parse partition", - }) - }) - .transpose()?; - - Ok(TableInfo { - id: pb_table_info.id, - name: pb_table_info.name, - schema_id: pb_table_info.schema_id, - schema_name: pb_table_info.schema_name, - partition_info, - }) - } -} - -#[derive(Clone, Debug)] -pub struct TablesOfShard { - pub shard_info: ShardInfo, - pub tables: Vec, -} - -#[derive(Debug, Default, Clone, Deserialize)] -#[serde(default)] -pub struct NodeMetaInfo { - pub addr: String, - pub port: u16, - pub zone: String, - pub idc: String, - pub binary_version: String, - pub node_type: NodeType, -} - -impl NodeMetaInfo { - pub fn endpoint(&self) -> String { - format!("{}:{}", self.addr, self.port) - } -} - -#[derive(Debug, Clone)] -pub struct NodeInfo { - pub node_meta_info: NodeMetaInfo, - pub shard_infos: Vec, -} - -/// The status changes of a shard as following: -/// -/// ```plaintext -/// ┌────┐ -/// │Init│ -/// └──┬─┘ -/// ___▽___ -/// ╱ ╲ ┌─────┐ -/// ╱ Opening ╲____│Ready│ -/// ╲ ╱yes └──┬──┘ -/// ╲_______╱ ┌───▽──┐ -/// │Frozen│ -/// └──────┘ -/// ``` -/// When an open request comes in, shard can only be opened when it's in -/// - `Init`, which means it has not been opened before. -/// - `Opening`, which means it has been opened before, but failed. -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize)] -pub enum ShardStatus { - /// Created, but not opened - #[default] - Init, - /// In opening - Opening, - /// Healthy - Ready, - /// Further updates are prohibited - Frozen, -} - -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize)] -pub struct ShardInfo { - pub id: ShardId, - pub role: ShardRole, - pub version: ShardVersion, - // This status is only used for request horaedb send to horaemeta via heartbeat - // When horaedb receive this via open shard request, this field is meaningless. - // TODO: Use different request and response body between horaedb and - // horaemeta. - pub status: ShardStatus, -} - -impl ShardInfo { - #[inline] - pub fn is_leader(&self) -> bool { - self.role == ShardRole::Leader - } - - #[inline] - pub fn is_opened(&self) -> bool { - matches!(self.status, ShardStatus::Ready | ShardStatus::Frozen) - } - - #[inline] - pub fn is_ready(&self) -> bool { - matches!(self.status, ShardStatus::Ready) - } -} - -#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Serialize)] -pub enum ShardRole { - #[default] - Leader, - Follower, -} - -#[derive(Debug, Deserialize, Clone)] -#[serde(default)] -pub struct MetaClientConfig { - pub cluster_name: String, - pub meta_addr: String, - pub meta_members_url: String, - pub lease: ReadableDuration, - pub timeout: ReadableDuration, - pub cq_count: usize, -} - -impl Default for MetaClientConfig { - fn default() -> Self { - Self { - cluster_name: String::new(), - meta_addr: "127.0.0.1:8080".to_string(), - meta_members_url: "horaemeta/members".to_string(), - lease: ReadableDuration::secs(10), - timeout: ReadableDuration::secs(5), - cq_count: 8, - } - } -} - -impl From for meta_service_pb::NodeInfo { - fn from(node_info: NodeInfo) -> Self { - let shard_infos = node_info - .shard_infos - .into_iter() - .map(meta_service_pb::ShardInfo::from) - .collect(); - - Self { - endpoint: node_info.node_meta_info.endpoint(), - zone: node_info.node_meta_info.zone, - binary_version: node_info.node_meta_info.binary_version, - shard_infos, - lease: 0, - } - } -} - -impl From for meta_service_pb::ShardInfo { - fn from(shard_info: ShardInfo) -> Self { - let role = cluster_pb::ShardRole::from(shard_info.role); - - Self { - id: shard_info.id, - role: role as i32, - version: shard_info.version, - status: Some(if shard_info.is_opened() { - meta_service_pb::shard_info::Status::Ready - } else { - meta_service_pb::shard_info::Status::PartialOpen - } as i32), - } - } -} - -impl From<&meta_service_pb::ShardInfo> for ShardInfo { - fn from(pb_shard_info: &meta_service_pb::ShardInfo) -> Self { - ShardInfo { - id: pb_shard_info.id, - role: pb_shard_info.role().into(), - version: pb_shard_info.version, - status: ShardStatus::Init, - } - } -} - -impl From for cluster_pb::ShardRole { - fn from(shard_role: ShardRole) -> Self { - match shard_role { - ShardRole::Leader => cluster_pb::ShardRole::Leader, - ShardRole::Follower => cluster_pb::ShardRole::Follower, - } - } -} - -impl From for ShardRole { - fn from(pb_role: cluster_pb::ShardRole) -> Self { - match pb_role { - cluster_pb::ShardRole::Leader => ShardRole::Leader, - cluster_pb::ShardRole::Follower => ShardRole::Follower, - } - } -} - -impl From for meta_service_pb::GetTablesOfShardsRequest { - fn from(req: GetTablesOfShardsRequest) -> Self { - Self { - header: None, - shard_ids: req.shard_ids, - } - } -} - -impl TryFrom for GetTablesOfShardsResponse { - type Error = Error; - - fn try_from(pb_resp: meta_service_pb::GetTablesOfShardsResponse) -> Result { - let tables_by_shard = pb_resp - .tables_by_shard - .into_iter() - .map(|(k, v)| Ok((k, TablesOfShard::try_from(v)?))) - .collect::>>()?; - - Ok(Self { tables_by_shard }) - } -} - -impl TryFrom for TablesOfShard { - type Error = Error; - - fn try_from(pb_tables_of_shard: meta_service_pb::TablesOfShard) -> Result { - let shard_info = pb_tables_of_shard - .shard_info - .with_context(|| MissingShardInfo { - msg: "in meta_service_pb::TablesOfShard", - })?; - Ok(Self { - shard_info: ShardInfo::from(&shard_info), - tables: pb_tables_of_shard - .tables - .into_iter() - .map(TryInto::::try_into) - .collect::>>()?, - }) - } -} - -impl From for meta_service_pb::RequestHeader { - fn from(req: RequestHeader) -> Self { - Self { - node: req.node, - cluster_name: req.cluster_name, - } - } -} - -impl From for meta_service_pb::AllocSchemaIdRequest { - fn from(req: AllocSchemaIdRequest) -> Self { - Self { - header: None, - name: req.name, - } - } -} - -impl From for AllocSchemaIdResponse { - fn from(pb_resp: meta_service_pb::AllocSchemaIdResponse) -> Self { - Self { - name: pb_resp.name, - id: pb_resp.id, - } - } -} - -impl From for meta_service_pb::CreateTableRequest { - fn from(req: CreateTableRequest) -> Self { - let partition_table_info = - req.partition_table_info - .map(|v| meta_service_pb::PartitionTableInfo { - partition_info: Some(v.partition_info.into()), - sub_table_names: v.sub_table_names, - }); - Self { - header: None, - schema_name: req.schema_name, - name: req.name, - encoded_schema: req.encoded_schema, - engine: req.engine, - create_if_not_exist: req.create_if_not_exist, - options: req.options, - partition_table_info, - } - } -} - -impl TryFrom for CreateTableResponse { - type Error = Error; - - fn try_from(pb_resp: meta_service_pb::CreateTableResponse) -> Result { - let pb_table_info = pb_resp.created_table.context(MissingTableInfo { - msg: "created table is not found in the create table response", - })?; - let pb_shard_info = pb_resp.shard_info.context(MissingShardInfo { - msg: "shard info is not found in the create table response", - })?; - - Ok(Self { - created_table: TableInfo::try_from(pb_table_info)?, - shard_info: ShardInfo::from(&pb_shard_info), - }) - } -} - -impl From for meta_service_pb::DropTableRequest { - fn from(req: DropTableRequest) -> Self { - let partition_table_info = - req.partition_table_info - .map(|v| meta_service_pb::PartitionTableInfo { - partition_info: Some(v.partition_info.into()), - sub_table_names: v.sub_table_names, - }); - Self { - header: None, - schema_name: req.schema_name, - name: req.name, - partition_table_info, - } - } -} - -impl TryFrom for DropTableResponse { - type Error = Error; - - fn try_from(pb_resp: meta_service_pb::DropTableResponse) -> Result { - Ok(Self { - dropped_table: pb_resp.dropped_table.map(TableInfo::try_from).transpose()?, - }) - } -} - -#[derive(Debug, Clone)] -pub struct RouteTablesRequest { - pub schema_name: SchemaName, - pub table_names: Vec, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct NodeShard { - pub endpoint: String, - pub shard_info: ShardInfo, -} - -#[derive(Debug, Clone)] -pub struct RouteEntry { - pub table_info: TableInfo, - pub node_shards: Vec, -} - -#[derive(Debug, Clone)] -pub struct RouteTablesResponse { - pub cluster_topology_version: u64, - pub entries: HashMap, -} - -impl RouteTablesResponse { - pub fn contains_all_tables(&self, queried_tables: &[TableName]) -> bool { - queried_tables - .iter() - .all(|table_name| self.entries.contains_key(table_name)) - } -} - -impl From for meta_service_pb::RouteTablesRequest { - fn from(req: RouteTablesRequest) -> Self { - Self { - header: None, - schema_name: req.schema_name, - table_names: req.table_names, - } - } -} - -impl TryFrom for NodeShard { - type Error = Error; - - fn try_from(pb: meta_service_pb::NodeShard) -> Result { - let pb_shard_info = pb.shard_info.with_context(|| MissingShardInfo { - msg: "in meta_service_pb::NodeShard", - })?; - Ok(NodeShard { - endpoint: pb.endpoint, - shard_info: ShardInfo::from(&pb_shard_info), - }) - } -} - -impl TryFrom for RouteEntry { - type Error = Error; - - fn try_from(pb_entry: meta_service_pb::RouteEntry) -> Result { - let mut node_shards = Vec::with_capacity(pb_entry.node_shards.len()); - for pb_node_shard in pb_entry.node_shards { - let node_shard = NodeShard::try_from(pb_node_shard)?; - node_shards.push(node_shard); - } - - let table_info = pb_entry.table.context(MissingTableInfo { - msg: "table info is missing in route entry", - })?; - Ok(RouteEntry { - table_info: TableInfo::try_from(table_info)?, - node_shards, - }) - } -} - -impl TryFrom for RouteTablesResponse { - type Error = Error; - - fn try_from(pb_resp: meta_service_pb::RouteTablesResponse) -> Result { - let mut entries = HashMap::with_capacity(pb_resp.entries.len()); - for (table_name, entry) in pb_resp.entries { - let route_entry = RouteEntry::try_from(entry)?; - entries.insert(table_name, route_entry); - } - - Ok(RouteTablesResponse { - cluster_topology_version: pb_resp.cluster_topology_version, - entries, - }) - } -} - -#[derive(Debug, Clone, Default)] -pub struct GetNodesRequest {} - -pub struct GetNodesResponse { - pub cluster_topology_version: u64, - pub node_shards: Vec, -} - -impl From for meta_service_pb::GetNodesRequest { - fn from(_: GetNodesRequest) -> Self { - meta_service_pb::GetNodesRequest::default() - } -} - -impl TryFrom for GetNodesResponse { - type Error = Error; - - fn try_from(pb_resp: meta_service_pb::GetNodesResponse) -> Result { - let mut node_shards = Vec::with_capacity(pb_resp.node_shards.len()); - for node_shard in pb_resp.node_shards { - node_shards.push(NodeShard::try_from(node_shard)?); - } - - Ok(GetNodesResponse { - cluster_topology_version: pb_resp.cluster_topology_version, - node_shards, - }) - } -} - -#[derive(Debug, Clone, Default)] -pub struct FetchCompactionNodeRequest {} - -pub struct FetchCompactionNodeResponse { - pub endpoint: String, -} diff --git a/horaedb/metric_engine/Cargo.toml b/src/metric_engine/Cargo.toml similarity index 98% rename from horaedb/metric_engine/Cargo.toml rename to src/metric_engine/Cargo.toml index bfb70d449f..e84c408791 100644 --- a/horaedb/metric_engine/Cargo.toml +++ b/src/metric_engine/Cargo.toml @@ -43,7 +43,6 @@ datafusion = { workspace = true } futures = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } -macros = { workspace = true } object_store = { workspace = true } parquet = { workspace = true, features = ["object_store"] } pb_types = { workspace = true } diff --git a/horaedb/metric_engine/src/compaction/mod.rs b/src/metric_engine/src/compaction/mod.rs similarity index 100% rename from horaedb/metric_engine/src/compaction/mod.rs rename to src/metric_engine/src/compaction/mod.rs diff --git a/horaedb/metric_engine/src/compaction/picker.rs b/src/metric_engine/src/compaction/picker.rs similarity index 100% rename from horaedb/metric_engine/src/compaction/picker.rs rename to src/metric_engine/src/compaction/picker.rs diff --git a/horaedb/metric_engine/src/compaction/scheduler.rs b/src/metric_engine/src/compaction/scheduler.rs similarity index 100% rename from horaedb/metric_engine/src/compaction/scheduler.rs rename to src/metric_engine/src/compaction/scheduler.rs diff --git a/horaedb/metric_engine/src/error.rs b/src/metric_engine/src/error.rs similarity index 100% rename from horaedb/metric_engine/src/error.rs rename to src/metric_engine/src/error.rs diff --git a/horaedb/metric_engine/src/lib.rs b/src/metric_engine/src/lib.rs similarity index 100% rename from horaedb/metric_engine/src/lib.rs rename to src/metric_engine/src/lib.rs diff --git a/horaedb/metric_engine/src/macros.rs b/src/metric_engine/src/macros.rs similarity index 67% rename from horaedb/metric_engine/src/macros.rs rename to src/metric_engine/src/macros.rs index 71c668dbcc..a6fefb319e 100644 --- a/horaedb/metric_engine/src/macros.rs +++ b/src/metric_engine/src/macros.rs @@ -28,3 +28,25 @@ macro_rules! compare_primitive_columns { )+ }; } + +/// Util for working with anyhow + thiserror +/// Works like anyhow's [ensure](https://docs.rs/anyhow/latest/anyhow/macro.ensure.html) +/// But return `Return` +#[macro_export] +macro_rules! ensure { + ($cond:expr, $msg:literal) => { + if !$cond { + return Err(anyhow::anyhow!($msg).into()); + } + }; + ($cond:expr, $err:expr) => { + if !$cond { + return Err($err.into()); + } + }; + ($cond:expr, $fmt:expr, $($arg:tt)*) => { + if !$cond { + return Err(anyhow::anyhow!($fmt, $($arg)*).into()); + } + }; +} diff --git a/horaedb/metric_engine/src/manifest.rs b/src/metric_engine/src/manifest.rs similarity index 99% rename from horaedb/metric_engine/src/manifest.rs rename to src/metric_engine/src/manifest.rs index 3277558b4b..0cb7f51fd4 100644 --- a/horaedb/metric_engine/src/manifest.rs +++ b/src/metric_engine/src/manifest.rs @@ -30,7 +30,6 @@ use async_scoped::TokioScope; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; -use macros::ensure; use object_store::{path::Path, PutPayload}; use parquet::data_type::AsBytes; use prost::Message; @@ -44,6 +43,7 @@ use tokio::{ use tracing::error; use crate::{ + ensure, sst::{FileId, FileMeta, SstFile}, types::{ManifestMergeOptions, ObjectStoreRef, RuntimeRef, TimeRange}, AnyhowError, Error, Result, diff --git a/horaedb/metric_engine/src/operator.rs b/src/metric_engine/src/operator.rs similarity index 99% rename from horaedb/metric_engine/src/operator.rs rename to src/metric_engine/src/operator.rs index 9847c80e9f..653e56d8b1 100644 --- a/horaedb/metric_engine/src/operator.rs +++ b/src/metric_engine/src/operator.rs @@ -23,10 +23,9 @@ use arrow::{ buffer::OffsetBuffer, }; use arrow_schema::DataType; -use macros::ensure; use tracing::debug; -use crate::Result; +use crate::{ensure, Result}; pub trait MergeOperator: Send + Sync + Debug { fn merge(&self, batch: RecordBatch) -> Result; diff --git a/horaedb/metric_engine/src/read.rs b/src/metric_engine/src/read.rs similarity index 100% rename from horaedb/metric_engine/src/read.rs rename to src/metric_engine/src/read.rs diff --git a/horaedb/metric_engine/src/sst.rs b/src/metric_engine/src/sst.rs similarity index 99% rename from horaedb/metric_engine/src/sst.rs rename to src/metric_engine/src/sst.rs index 9534bd78f8..1cbfedec0e 100644 --- a/horaedb/metric_engine/src/sst.rs +++ b/src/metric_engine/src/sst.rs @@ -23,9 +23,8 @@ use std::{ time::SystemTime, }; -use macros::ensure; - use crate::{ + ensure, types::{TimeRange, Timestamp}, Error, }; diff --git a/horaedb/metric_engine/src/storage.rs b/src/metric_engine/src/storage.rs similarity index 99% rename from horaedb/metric_engine/src/storage.rs rename to src/metric_engine/src/storage.rs index f680bd944f..0aa2536108 100644 --- a/horaedb/metric_engine/src/storage.rs +++ b/src/metric_engine/src/storage.rs @@ -45,7 +45,6 @@ use datafusion::{ }; use futures::StreamExt; use itertools::Itertools; -use macros::ensure; use object_store::path::Path; use parquet::{ arrow::{async_writer::ParquetObjectWriter, AsyncArrowWriter}, @@ -57,6 +56,7 @@ use tokio::runtime::Runtime; use crate::{ compaction::{CompactionScheduler, SchedulerConfig}, + ensure, manifest::{Manifest, ManifestRef}, operator::{BytesMergeOperator, LastValueOperator}, read::{DefaultParquetFileReaderFactory, MergeExec}, diff --git a/horaedb/metric_engine/src/test_util.rs b/src/metric_engine/src/test_util.rs similarity index 100% rename from horaedb/metric_engine/src/test_util.rs rename to src/metric_engine/src/test_util.rs diff --git a/horaedb/metric_engine/src/types.rs b/src/metric_engine/src/types.rs similarity index 100% rename from horaedb/metric_engine/src/types.rs rename to src/metric_engine/src/types.rs diff --git a/src/partition_table_engine/Cargo.toml b/src/partition_table_engine/Cargo.toml deleted file mode 100644 index 64df048ab8..0000000000 --- a/src/partition_table_engine/Cargo.toml +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "partition_table_engine" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -analytic_engine = { workspace = true } -arrow = { workspace = true } -async-trait = { workspace = true } -common_types = { workspace = true } -datafusion = { workspace = true } -df_engine_extensions = { workspace = true } -df_operator = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -lazy_static = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -prometheus = { workspace = true } -snafu = { workspace = true } -table_engine = { workspace = true } diff --git a/src/partition_table_engine/src/error.rs b/src/partition_table_engine/src/error.rs deleted file mode 100644 index 1f00106fc8..0000000000 --- a/src/partition_table_engine/src/error.rs +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use generic_error::GenericError; -use macros::define_result; -use snafu::Snafu; -define_result!(Error); - -#[derive(Snafu, Debug)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Internal error, message:{}, err:{}", msg, source))] - Internal { msg: String, source: GenericError }, - - #[snafu(display("Datafusion error, message:{}, err:{}", msg, source))] - Datafusion { msg: String, source: GenericError }, -} diff --git a/src/partition_table_engine/src/lib.rs b/src/partition_table_engine/src/lib.rs deleted file mode 100644 index ee0b460c30..0000000000 --- a/src/partition_table_engine/src/lib.rs +++ /dev/null @@ -1,155 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Partition table engine implementations - -mod error; -mod metrics; -mod partition; -pub mod scan_builder; -pub mod test_util; - -use std::sync::Arc; - -use analytic_engine::TableOptions; -use async_trait::async_trait; -use datafusion::logical_expr::expr::{Expr, InList}; -use generic_error::BoxError; -use snafu::{ensure, OptionExt, ResultExt}; -use table_engine::{ - engine::{ - CloseShardRequest, CloseTableRequest, CreateTableParams, CreateTableRequest, - DropTableRequest, InvalidPartitionContext, OpenShardRequest, OpenShardResult, - OpenTableRequest, Result, TableEngine, Unexpected, UnexpectedNoCause, - }, - partition::rule::df_adapter::PartitionedFilterKeyIndex, - predicate::Predicate, - remote::RemoteEngineRef, - table::TableRef, - PARTITION_TABLE_ENGINE_TYPE, -}; - -use crate::partition::{PartitionTableImpl, TableData}; - -/// Partition table engine implementation. -pub struct PartitionTableEngine { - remote_engine_ref: RemoteEngineRef, -} - -impl PartitionTableEngine { - pub fn new(remote_engine_ref: RemoteEngineRef) -> Self { - Self { remote_engine_ref } - } -} - -#[async_trait] -impl TableEngine for PartitionTableEngine { - fn engine_type(&self) -> &str { - PARTITION_TABLE_ENGINE_TYPE - } - - async fn close(&self) -> Result<()> { - Ok(()) - } - - /// Validate the request of create table. - async fn validate_create_table(&self, _params: &CreateTableParams) -> Result<()> { - Ok(()) - } - - async fn create_table(&self, request: CreateTableRequest) -> Result { - let table_data = TableData { - catalog_name: request.params.catalog_name, - schema_name: request.params.schema_name, - table_name: request.params.table_name, - table_id: request.table_id, - table_schema: request.params.table_schema, - partition_info: request.params.partition_info.context(UnexpectedNoCause { - msg: "partition info not found", - })?, - options: TableOptions::from_map(&request.params.table_options, true) - .box_err() - .context(Unexpected)?, - engine_type: request.params.engine, - }; - Ok(Arc::new( - PartitionTableImpl::new(table_data, self.remote_engine_ref.clone()) - .box_err() - .context(Unexpected)?, - )) - } - - async fn drop_table(&self, _request: DropTableRequest) -> Result { - Ok(true) - } - - async fn open_table(&self, _request: OpenTableRequest) -> Result> { - Ok(None) - } - - async fn close_table(&self, _request: CloseTableRequest) -> Result<()> { - Ok(()) - } - - async fn open_shard(&self, _request: OpenShardRequest) -> Result { - Ok(OpenShardResult::default()) - } - - async fn close_shard(&self, _request: CloseShardRequest) -> Vec> { - vec![Ok("".to_string())] - } -} - -pub fn partitioned_predicates( - predicate: Arc, - partitions: &[usize], - partitioned_key_indices: &mut PartitionedFilterKeyIndex, -) -> Result> { - ensure!( - partitions.len() == partitioned_key_indices.keys().len(), - InvalidPartitionContext { - msg: format!( - "partitions length:{}, partitioned_key_indices length: {}", - partitions.len(), - partitioned_key_indices.keys().len() - ) - } - ); - let mut predicates = vec![(*predicate).clone(); partitions.len()]; - for (idx, predicate) in predicates.iter_mut().enumerate() { - let partition = partitions[idx]; - if let Some(filter_indices) = partitioned_key_indices.get(&partition) { - let exprs = predicate.mut_exprs(); - for (filter_idx, key_indices) in filter_indices { - if let Expr::InList(InList { - list, - negated: false, - .. - }) = &mut exprs[*filter_idx] - { - let mut idx = 0; - list.retain(|_| { - let should_kept = key_indices.contains(&idx); - idx += 1; - should_kept - }); - } - } - } - } - Ok(predicates) -} diff --git a/src/partition_table_engine/src/metrics.rs b/src/partition_table_engine/src/metrics.rs deleted file mode 100644 index 7bc42ea014..0000000000 --- a/src/partition_table_engine/src/metrics.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use lazy_static::lazy_static; -use prometheus::{exponential_buckets, register_histogram_vec, HistogramVec}; - -lazy_static! { - // Buckets: 0, 0.01, .., 0.01 * 2^12 - pub static ref PARTITION_TABLE_WRITE_DURATION_HISTOGRAM: HistogramVec = register_histogram_vec!( - "partition_table_write_duration", - "Histogram for write duration of the partition table in seconds", - &["type"], - exponential_buckets(0.01, 2.0, 13).unwrap() - ) - .unwrap(); - - // Buckets: 0, 0.01, .., 0.01 * 2^12 - pub static ref PARTITION_TABLE_PARTITIONED_READ_DURATION_HISTOGRAM: HistogramVec = register_histogram_vec!( - "partition_table_partitioned_read_duration", - "Histogram for partitioned read duration of the partition table in seconds", - &["type"], - exponential_buckets(0.01, 2.0, 13).unwrap() - ) - .unwrap(); -} diff --git a/src/partition_table_engine/src/partition.rs b/src/partition_table_engine/src/partition.rs deleted file mode 100644 index 2eb0a6a32f..0000000000 --- a/src/partition_table_engine/src/partition.rs +++ /dev/null @@ -1,451 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Distributed Table implementation - -use std::{collections::HashMap, fmt}; - -use analytic_engine::{table::support_pushdown, TableOptions}; -use async_trait::async_trait; -use common_types::{ - row::{Row, RowGroup}, - schema::Schema, -}; -use futures::{stream::FuturesUnordered, StreamExt}; -use generic_error::BoxError; -use logger::error; -use snafu::ResultExt; -use table_engine::{ - partition::{ - format_sub_partition_table_name, - rule::{ - df_adapter::{DfPartitionRuleAdapter, PartitionedFilterKeyIndex}, - PartitionedRow, PartitionedRows, PartitionedRowsIter, - }, - PartitionInfo, - }, - remote::{ - model::{ - AlterTableOptionsRequest, AlterTableSchemaRequest, ReadRequest as RemoteReadRequest, - TableIdentifier, WriteBatchResult, WriteRequest as RemoteWriteRequest, - }, - RemoteEngineRef, - }, - stream::{PartitionedStreams, SendableRecordBatchStream}, - table::{ - AlterOptions, AlterSchema, AlterSchemaRequest, CreatePartitionRule, FlushRequest, - GetRequest, LocatePartitions, ReadRequest, Result, Scan, Table, TableId, TableStats, - UnexpectedWithMsg, UnsupportedMethod, WriteBatch, WriteRequest, - }, -}; - -use crate::metrics::{ - PARTITION_TABLE_PARTITIONED_READ_DURATION_HISTOGRAM, PARTITION_TABLE_WRITE_DURATION_HISTOGRAM, -}; - -#[derive(Debug)] -pub struct TableData { - pub catalog_name: String, - pub schema_name: String, - pub table_name: String, - pub table_id: TableId, - pub table_schema: Schema, - pub partition_info: PartitionInfo, - pub options: TableOptions, - pub engine_type: String, -} - -/// Table trait implementation -pub struct PartitionTableImpl { - table_data: TableData, - remote_engine: RemoteEngineRef, - partition_rule: DfPartitionRuleAdapter, -} - -impl PartitionTableImpl { - pub fn new(table_data: TableData, remote_engine: RemoteEngineRef) -> Result { - // Build partition rule. - let partition_rule = DfPartitionRuleAdapter::new( - table_data.partition_info.clone(), - &table_data.table_schema, - ) - .box_err() - .context(CreatePartitionRule)?; - - Ok(Self { - table_data, - remote_engine, - partition_rule, - }) - } - - fn get_sub_table_ident(&self, id: usize) -> TableIdentifier { - let partition_name = self.table_data.partition_info.get_definitions()[id] - .name - .clone(); - TableIdentifier { - catalog: self.table_data.catalog_name.clone(), - schema: self.table_data.schema_name.clone(), - table: format_sub_partition_table_name(&self.table_data.table_name, &partition_name), - } - } - - async fn write_single_row_group( - &self, - partition_id: usize, - row_group: RowGroup, - ) -> Result { - let sub_table_ident = self.get_sub_table_ident(partition_id); - - let request = RemoteWriteRequest { - table: sub_table_ident, - write_request: WriteRequest { row_group }, - }; - - self.remote_engine - .write(request) - .await - .box_err() - .with_context(|| WriteBatch { - tables: vec![self.table_data.table_name.clone()], - }) - } - - async fn write_partitioned_row_groups( - &self, - schema: Schema, - partitioned_rows: PartitionedRowsIter, - ) -> Result { - let mut split_rows = HashMap::new(); - for PartitionedRow { partition_id, row } in partitioned_rows { - split_rows - .entry(partition_id) - .or_insert_with(Vec::new) - .push(row); - } - - // Insert split write request through remote engine. - let mut request_batch = Vec::with_capacity(split_rows.len()); - for (partition, rows) in split_rows { - let sub_table_ident = self.get_sub_table_ident(partition); - // The rows should have the valid schema, so there is no need to do one more - // check here. - let row_group = RowGroup::new_unchecked(schema.clone(), rows); - - let request = RemoteWriteRequest { - table: sub_table_ident, - write_request: WriteRequest { row_group }, - }; - request_batch.push(request); - } - - let batch_results = self - .remote_engine - .write_batch(request_batch) - .await - .box_err() - .with_context(|| WriteBatch { - tables: vec![self.table_data.table_name.clone()], - })?; - let mut total_rows = 0; - for batch_result in batch_results { - let WriteBatchResult { - table_idents, - result, - } = batch_result; - - let written_rows = result.with_context(|| { - let tables = table_idents - .into_iter() - .map(|ident| ident.table) - .collect::>(); - WriteBatch { tables } - })?; - total_rows += written_rows; - } - - Ok(total_rows as usize) - } -} - -impl fmt::Debug for PartitionTableImpl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("PartitionTableImpl") - .field("table_data", &self.table_data) - .finish() - } -} - -#[async_trait] -impl Table for PartitionTableImpl { - fn name(&self) -> &str { - &self.table_data.table_name - } - - fn id(&self) -> TableId { - self.table_data.table_id - } - - fn schema(&self) -> Schema { - self.table_data.table_schema.clone() - } - - // TODO: get options from sub partition table with remote engine - fn options(&self) -> HashMap { - self.table_data.options.to_raw_map() - } - - fn partition_info(&self) -> Option { - Some(self.table_data.partition_info.clone()) - } - - fn engine_type(&self) -> &str { - &self.table_data.engine_type - } - - fn stats(&self) -> TableStats { - TableStats::default() - } - - // TODO: maybe we should ask remote sub table whether support pushdown - fn support_pushdown(&self, read_schema: &Schema, col_names: &[String]) -> bool { - let need_dedup = self.table_data.options.need_dedup(); - - support_pushdown(read_schema, need_dedup, col_names) - } - - async fn write(&self, request: WriteRequest) -> Result { - let _timer = PARTITION_TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["total"]) - .start_timer(); - - // Split write request. - let schema = request.row_group.schema().clone(); - let partition_rows = { - let _locate_timer = PARTITION_TABLE_WRITE_DURATION_HISTOGRAM - .with_label_values(&["locate"]) - .start_timer(); - self.partition_rule - .locate_partitions_for_write(request.row_group) - .box_err() - .context(LocatePartitions)? - }; - - match partition_rows { - PartitionedRows::Single { - partition_id, - row_group, - } => self.write_single_row_group(partition_id, row_group).await, - PartitionedRows::Multiple(iter) => { - self.write_partitioned_row_groups(schema, iter).await - } - } - } - - async fn read(&self, _request: ReadRequest) -> Result { - UnsupportedMethod { - table: self.name(), - method: "read", - } - .fail() - } - - async fn get(&self, _request: GetRequest) -> Result> { - UnsupportedMethod { - table: self.name(), - method: "get", - } - .fail() - } - - async fn partitioned_read(&self, request: ReadRequest) -> Result { - let _timer = PARTITION_TABLE_PARTITIONED_READ_DURATION_HISTOGRAM - .with_label_values(&["total"]) - .start_timer(); - - // Build partition rule. - let df_partition_rule = match self.partition_info() { - None => UnexpectedWithMsg { - msg: "partition table partition info can't be empty", - } - .fail()?, - Some(partition_info) => { - DfPartitionRuleAdapter::new(partition_info, &self.table_data.table_schema) - .box_err() - .context(CreatePartitionRule)? - } - }; - let mut partitioned_key_indices = PartitionedFilterKeyIndex::new(); - // Evaluate expr and locate partition. - let partitions = { - let _locate_timer = PARTITION_TABLE_PARTITIONED_READ_DURATION_HISTOGRAM - .with_label_values(&["locate"]) - .start_timer(); - df_partition_rule - .locate_partitions_for_read(request.predicate.exprs(), &mut partitioned_key_indices) - .box_err() - .context(LocatePartitions)? - }; - - // Query streams through remote engine. - let mut futures = FuturesUnordered::new(); - for partition in partitions { - let read_partition = self.remote_engine.read(RemoteReadRequest { - table: self.get_sub_table_ident(partition), - read_request: request.clone(), - }); - futures.push(read_partition); - } - - let mut record_batch_streams = Vec::with_capacity(futures.len()); - while let Some(record_batch_stream) = futures.next().await { - let record_batch_stream = record_batch_stream - .box_err() - .context(Scan { table: self.name() })?; - record_batch_streams.push(record_batch_stream); - } - - let streams = { - let _remote_timer = PARTITION_TABLE_PARTITIONED_READ_DURATION_HISTOGRAM - .with_label_values(&["remote_read"]) - .start_timer(); - record_batch_streams - }; - - Ok(PartitionedStreams { streams }) - } - - async fn alter_schema(&self, request: AlterSchemaRequest) -> Result { - let partition_num = match self.partition_info() { - None => UnexpectedWithMsg { - msg: "partition table partition info can't be empty", - } - .fail()?, - Some(partition_info) => partition_info.get_partition_num(), - }; - - // Alter schema of partitions except the first one. - // Because the schema of partition table is stored in the first partition. - let mut futures = FuturesUnordered::new(); - for id in 1..partition_num { - let partition = self - .remote_engine - .alter_table_schema(AlterTableSchemaRequest { - table_ident: self.get_sub_table_ident(id), - table_schema: request.schema.clone(), - pre_schema_version: request.pre_schema_version, - }); - futures.push(partition); - } - - let mut alter_err = None; - while let Some(alter_ret) = futures.next().await { - if let Err(e) = &alter_ret { - error!("Alter schema failed, table_name:{}, err:{e}", self.name()); - alter_err.get_or_insert( - alter_ret - .box_err() - .context(AlterSchema { table: self.name() }), - ); - } - } - - // Remove the first error. - if let Some(ret) = alter_err { - ret?; - } - - // Alter schema of the first partition. - self.remote_engine - .alter_table_schema(AlterTableSchemaRequest { - table_ident: self.get_sub_table_ident(0), - table_schema: request.schema.clone(), - pre_schema_version: request.pre_schema_version, - }) - .await - .box_err() - .with_context(|| AlterSchema { - table: self.get_sub_table_ident(0).table, - })?; - - Ok(0) - } - - async fn alter_options(&self, options: HashMap) -> Result { - let partition_num = match self.partition_info() { - None => UnexpectedWithMsg { - msg: "partition table partition info can't be empty", - } - .fail()?, - Some(partition_info) => partition_info.get_partition_num(), - }; - - // Alter options of partitions except the first one. - // Because the schema of partition table is stored in the first partition. - let mut futures = FuturesUnordered::new(); - for id in 1..partition_num { - let partition = self - .remote_engine - .alter_table_options(AlterTableOptionsRequest { - table_ident: self.get_sub_table_ident(id), - options: options.clone(), - }); - futures.push(partition); - } - - let mut alter_err = None; - while let Some(alter_ret) = futures.next().await { - if let Err(e) = &alter_ret { - error!("Alter options failed, table_name:{}, err:{e}", self.name()); - alter_err.get_or_insert( - alter_ret - .box_err() - .context(AlterOptions { table: self.name() }), - ); - } - } - - // Remove the first error. - if let Some(ret) = alter_err { - ret?; - } - - // Alter options of the first partition. - self.remote_engine - .alter_table_options(AlterTableOptionsRequest { - table_ident: self.get_sub_table_ident(0), - options: options.clone(), - }) - .await - .box_err() - .with_context(|| AlterOptions { - table: self.get_sub_table_ident(0).table, - })?; - - Ok(0) - } - - // Partition table is a virtual table, so it don't need to flush. - async fn flush(&self, _request: FlushRequest) -> Result<()> { - Ok(()) - } - - // Partition table is a virtual table, so it don't need to compact. - async fn compact(&self) -> Result<()> { - Ok(()) - } -} diff --git a/src/partition_table_engine/src/scan_builder.rs b/src/partition_table_engine/src/scan_builder.rs deleted file mode 100644 index 25d080d3ff..0000000000 --- a/src/partition_table_engine/src/scan_builder.rs +++ /dev/null @@ -1,317 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Partitioned table scan builder - -use std::sync::Arc; - -use async_trait::async_trait; -use datafusion::{ - error::{DataFusionError, Result}, - physical_plan::ExecutionPlan, -}; -use df_engine_extensions::dist_sql_query::physical_plan::UnresolvedPartitionedScan; -use table_engine::{ - partition::{ - format_sub_partition_table_name, - rule::df_adapter::{DfPartitionRuleAdapter, PartitionedFilterKeyIndex}, - PartitionInfo, - }, - provider::TableScanBuilder, - remote::model::TableIdentifier, - table::ReadRequest, -}; - -use crate::partitioned_predicates; -#[derive(Debug)] -pub struct PartitionedTableScanBuilder { - table_name: String, - catalog_name: String, - schema_name: String, - partition_info: PartitionInfo, -} - -impl PartitionedTableScanBuilder { - pub fn new( - table_name: String, - catalog_name: String, - schema_name: String, - partition_info: PartitionInfo, - ) -> Self { - Self { - table_name, - catalog_name, - schema_name, - partition_info, - } - } - - fn get_sub_table_idents( - &self, - table_name: &str, - partition_info: &PartitionInfo, - partitions: &[usize], - ) -> Vec { - let definitions = partition_info.get_definitions(); - partitions - .iter() - .map(|p| { - let partition_name = &definitions[*p].name; - TableIdentifier { - catalog: self.catalog_name.clone(), - schema: self.schema_name.clone(), - table: format_sub_partition_table_name(table_name, partition_name), - } - }) - .collect() - } -} - -#[async_trait] -impl TableScanBuilder for PartitionedTableScanBuilder { - async fn build(&self, request: ReadRequest) -> Result> { - // Build partition rule. - let table_schema_snapshot = request.projected_schema.table_schema(); - let df_partition_rule = - DfPartitionRuleAdapter::new(self.partition_info.clone(), table_schema_snapshot) - .map_err(|e| { - DataFusionError::Internal(format!("failed to build partition rule, err:{e}")) - })?; - - let mut partitioned_key_indices = PartitionedFilterKeyIndex::new(); - // Evaluate expr and locate partition. - let partitions = df_partition_rule - .locate_partitions_for_read(request.predicate.exprs(), &mut partitioned_key_indices) - .map_err(|e| { - DataFusionError::Internal(format!("failed to locate partition for read, err:{e}")) - })?; - - let sub_tables = - self.get_sub_table_idents(&self.table_name, &self.partition_info, &partitions); - - let predicates = if partitioned_key_indices.len() == partitions.len() { - Some( - partitioned_predicates( - request.predicate.clone(), - &partitions, - &mut partitioned_key_indices, - ) - .map_err(|e| { - DataFusionError::Internal(format!("partition predicates failed, err:{e}")) - })?, - ) - } else { - // since FilterExtractor.extract only cover some specific expr - // cases, partitioned_key_indices.len() could be 0. - // All partition requests will have the same predicate. - None - }; - - // Build plan. - let plan = - UnresolvedPartitionedScan::new(&self.table_name, sub_tables, request, predicates); - - Ok(Arc::new(plan)) - } -} - -#[cfg(test)] -mod tests { - use common_types::{column_schema::Builder as ColBuilder, datum::DatumKind, schema::Builder}; - use datafusion::logical_expr::{binary_expr, in_list, Expr, Operator}; - use table_engine::{ - partition::{ - rule::df_adapter::{DfPartitionRuleAdapter, PartitionedFilterKeyIndex}, - KeyPartitionInfo, PartitionDefinition, PartitionInfo, - }, - predicate::PredicateBuilder, - }; - - use crate::partitioned_predicates; - - #[test] - fn test_partitioned_predicate() { - // conditions: - // 1) table schema: col_ts, col1, col2, in which col1 and col2 are both keys, - // and with two partitions - // 2) sql: select * from table where col1 = '33' and col2 in ("aa", "bb", - // "cc", "dd") - // partition expectations: - // 1) query fit in two partitions - // 2) yield two predicates, p0: col1 = '33' and col2 in ("aa", "bb", "cc"); - // p1: col1 = '33' and col2 in ("dd") - let definitions = vec![ - PartitionDefinition { - name: "p1".to_string(), - origin_name: None, - }, - PartitionDefinition { - name: "p2".to_string(), - origin_name: None, - }, - ]; - - let partition_info = PartitionInfo::Key(KeyPartitionInfo { - version: 0, - definitions, - partition_key: vec!["col1".to_string(), "col2".to_string()], - linear: false, - }); - - let schema = { - let builder = Builder::new(); - let col_ts = ColBuilder::new("col_ts".to_string(), DatumKind::Timestamp) - .build() - .expect("ts"); - let col1 = ColBuilder::new("col1".to_string(), DatumKind::String) - .build() - .expect("should succeed to build column schema"); - let col2 = ColBuilder::new("col2".to_string(), DatumKind::String) - .build() - .expect("should succeed to build column schema"); - let col3 = ColBuilder::new("col3".to_string(), DatumKind::String) - .build() - .expect("should succeed to build column schema"); - builder - .auto_increment_column_id(true) - .add_key_column(col_ts) - .unwrap() - .add_key_column(col1) - .unwrap() - .add_key_column(col2) - .unwrap() - .add_key_column(col3) - .unwrap() - .primary_key_indexes(vec![1, 2]) - .build() - .unwrap() - }; - - let df_partition_rule = DfPartitionRuleAdapter::new(partition_info, &schema).unwrap(); - - let exprs = vec![ - binary_expr( - Expr::Column("col1".into()), - Operator::Eq, - Expr::Literal("33".into()), - ), - in_list( - Expr::Column("col2".into()), - vec![ - Expr::Literal("aa".into()), - Expr::Literal("bb".into()), - Expr::Literal("cc".into()), - Expr::Literal("dd".into()), - ], - false, - ), - in_list( - Expr::Column("col3".into()), - vec![ - Expr::Literal("1".into()), - Expr::Literal("2".into()), - Expr::Literal("3".into()), - Expr::Literal("4".into()), - ], - false, - ), - ]; - let mut partitioned_key_indices = PartitionedFilterKeyIndex::new(); - let partitions = df_partition_rule - .locate_partitions_for_read(&exprs, &mut partitioned_key_indices) - .unwrap(); - assert!(partitions.len() == 2); - assert!(partitioned_key_indices.len() == 2); - - let predicate = PredicateBuilder::default() - .add_pushdown_exprs(exprs.as_slice()) - .build(); - - let predicates = partitioned_predicates( - predicate, - partitions.as_slice(), - &mut partitioned_key_indices, - ); - assert!(predicates.is_ok()); - let predicates = predicates.unwrap(); - assert!(predicates.len() == 2); - - assert!(predicates[0].exprs().len() == 3); - assert!( - predicates[0].exprs()[0] - == binary_expr( - Expr::Column("col1".into()), - Operator::Eq, - Expr::Literal("33".into()) - ) - ); - assert!( - predicates[0].exprs()[1] - == in_list( - Expr::Column("col2".into()), - vec![ - Expr::Literal("aa".into()), - Expr::Literal("bb".into()), - Expr::Literal("cc".into()), - ], - false, - ) - ); - assert!( - predicates[0].exprs()[2] - == in_list( - Expr::Column("col3".into()), - vec![ - Expr::Literal("1".into()), - Expr::Literal("2".into()), - Expr::Literal("3".into()), - Expr::Literal("4".into()), - ], - false, - ) - ); - assert!( - predicates[1].exprs()[0] - == binary_expr( - Expr::Column("col1".into()), - Operator::Eq, - Expr::Literal("33".into()) - ) - ); - assert!( - predicates[1].exprs()[1] - == in_list( - Expr::Column("col2".into()), - vec![Expr::Literal("dd".into()),], - false, - ) - ); - assert!( - predicates[1].exprs()[2] - == in_list( - Expr::Column("col3".into()), - vec![ - Expr::Literal("1".into()), - Expr::Literal("2".into()), - Expr::Literal("3".into()), - Expr::Literal("4".into()), - ], - false, - ) - ); - } -} diff --git a/src/partition_table_engine/src/test_util.rs b/src/partition_table_engine/src/test_util.rs deleted file mode 100644 index aebc593c90..0000000000 --- a/src/partition_table_engine/src/test_util.rs +++ /dev/null @@ -1,158 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! In-memory table engine implementations - -use std::{collections::HashMap, fmt}; - -use async_trait::async_trait; -use common_types::{row::Row, schema::Schema}; -use table_engine::{ - partition::PartitionInfo, - stream::{PartitionedStreams, SendableRecordBatchStream}, - table::{ - AlterSchemaRequest, FlushRequest, GetRequest, ReadRequest, Table, TableId, TableStats, - WriteRequest, - }, -}; - -/// In-memory table -/// -/// Mainly for test, DO NOT use it in production. All data inserted are buffered -/// in memory, does not support schema change. -pub struct PartitionedMemoryTable { - /// Table name - name: String, - /// Table id - id: TableId, - /// Table schema - schema: Schema, - /// Engine type - engine_type: String, - /// Partition info - partition_info: PartitionInfo, -} - -impl PartitionedMemoryTable { - pub fn new( - name: String, - id: TableId, - schema: Schema, - engine_type: String, - partition_info: PartitionInfo, - ) -> Self { - Self { - name, - id, - schema, - engine_type, - partition_info, - } - } -} - -impl fmt::Debug for PartitionedMemoryTable { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("MemoryTable") - .field("name", &self.name) - .field("id", &self.id) - .field("schema", &self.schema) - .finish() - } -} - -#[async_trait] -impl Table for PartitionedMemoryTable { - fn name(&self) -> &str { - &self.name - } - - fn id(&self) -> TableId { - self.id - } - - fn options(&self) -> HashMap { - HashMap::new() - } - - fn partition_info(&self) -> Option { - Some(self.partition_info.clone()) - } - - fn schema(&self) -> Schema { - self.schema.clone() - } - - fn engine_type(&self) -> &str { - &self.engine_type - } - - fn stats(&self) -> TableStats { - TableStats::default() - } - - fn support_pushdown(&self, _read_schema: &Schema, _col_names: &[String]) -> bool { - false - } - - async fn write(&self, _request: WriteRequest) -> table_engine::table::Result { - unimplemented!() - } - - // batch_size is ignored now - async fn read( - &self, - _request: ReadRequest, - ) -> table_engine::table::Result { - unimplemented!() - } - - async fn get(&self, _request: GetRequest) -> table_engine::table::Result> { - unimplemented!() - } - - async fn partitioned_read( - &self, - _request: ReadRequest, - ) -> table_engine::table::Result { - unimplemented!() - } - - // TODO: Alter schema is not supported now - async fn alter_schema( - &self, - _request: AlterSchemaRequest, - ) -> table_engine::table::Result { - unimplemented!() - } - - // TODO: Alter modify setting is not supported now - async fn alter_options( - &self, - _options: HashMap, - ) -> table_engine::table::Result { - unimplemented!() - } - - async fn flush(&self, _request: FlushRequest) -> table_engine::table::Result<()> { - unimplemented!() - } - - async fn compact(&self) -> table_engine::table::Result<()> { - unimplemented!() - } -} diff --git a/horaedb/pb_types/Cargo.toml b/src/pb_types/Cargo.toml similarity index 100% rename from horaedb/pb_types/Cargo.toml rename to src/pb_types/Cargo.toml diff --git a/horaedb/pb_types/build.rs b/src/pb_types/build.rs similarity index 100% rename from horaedb/pb_types/build.rs rename to src/pb_types/build.rs diff --git a/horaedb/pb_types/protos/sst.proto b/src/pb_types/protos/sst.proto similarity index 100% rename from horaedb/pb_types/protos/sst.proto rename to src/pb_types/protos/sst.proto diff --git a/horaedb/pb_types/src/lib.rs b/src/pb_types/src/lib.rs similarity index 100% rename from horaedb/pb_types/src/lib.rs rename to src/pb_types/src/lib.rs diff --git a/src/proxy/Cargo.toml b/src/proxy/Cargo.toml deleted file mode 100644 index c6831b3682..0000000000 --- a/src/proxy/Cargo.toml +++ /dev/null @@ -1,84 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "proxy" - -[package.license] -workspace = true - -[package.version] -workspace = true - -[package.authors] -workspace = true - -[package.edition] -workspace = true - -[dependencies] -arrow = { workspace = true } -arrow_ext = { workspace = true } -async-trait = { workspace = true } -base64 = { workspace = true } -bytes = { workspace = true } -catalog = { workspace = true } -clru = { workspace = true } -cluster = { workspace = true } -common_types = { workspace = true } -datafusion = { workspace = true } -df_operator = { workspace = true } -futures = { workspace = true } -generic_error = { workspace = true } -horaedbproto = { workspace = true } -http = "0.2" -influxdb-line-protocol = "1.0" -influxql-query = { workspace = true } -interpreters = { workspace = true } -itertools = { workspace = true } -lazy_static = { workspace = true } -logger = { workspace = true } -macros = { workspace = true } -meta_client = { workspace = true } -notifier = { workspace = true } -paste = { workspace = true } -prom-remote-api = { workspace = true, features = ["warp"] } -prometheus = { workspace = true } -prometheus-static-metric = { workspace = true } -prost = { workspace = true } -query_engine = { workspace = true } -query_frontend = { workspace = true } -router = { workspace = true } -runtime = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -snafu = { workspace = true } -spin = { workspace = true } -sqlparser = { workspace = true } -table_engine = { workspace = true } -time_ext = { workspace = true } -timed_task = { workspace = true } -tokio = { workspace = true } -tokio-stream = { version = "0.1", features = ["net"] } -tonic = { workspace = true } -warp = "0.3" -zstd = { workspace = true } - -[dev-dependencies] -json_pretty = "0.1.2" -query_frontend = { workspace = true, features = ["test"] } -system_catalog = { workspace = true } diff --git a/src/proxy/src/auth/mod.rs b/src/proxy/src/auth/mod.rs deleted file mode 100644 index b0b5269123..0000000000 --- a/src/proxy/src/auth/mod.rs +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use serde::{Deserialize, Serialize}; - -pub mod with_file; - -/// Header of authorization -pub const AUTHORIZATION: &str = "authorization"; - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub enum AuthType { - #[default] - #[serde(rename = "file")] - File, -} - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub struct Config { - pub enable: bool, - pub auth_type: AuthType, - pub source: String, -} diff --git a/src/proxy/src/auth/with_file.rs b/src/proxy/src/auth/with_file.rs deleted file mode 100644 index 116005cee4..0000000000 --- a/src/proxy/src/auth/with_file.rs +++ /dev/null @@ -1,136 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::HashMap, - fs::File, - io::{self, BufRead}, - path::Path, -}; - -use generic_error::BoxError; -use snafu::{OptionExt, ResultExt}; -use tonic::service::Interceptor; - -use crate::{ - auth::AUTHORIZATION, - error::{Internal, InternalNoCause, Result}, -}; - -#[derive(Debug, Clone, Default)] -pub struct AuthWithFile { - enable: bool, - file_path: String, - // name -> password - users: HashMap, -} - -impl AuthWithFile { - pub fn new(enable: bool, file_path: String) -> Self { - Self { - enable, - file_path, - users: HashMap::new(), - } - } - - // Load a csv format config - pub fn load_credential(&mut self) -> Result<()> { - if !self.enable { - return Ok(()); - } - - let path = Path::new(&self.file_path); - if !path.exists() { - return InternalNoCause { - msg: format!("file not existed: {:?}", path), - } - .fail(); - } - - let file = File::open(path).box_err().context(Internal { - msg: "failed to open file", - })?; - let reader = io::BufReader::new(file); - - for line in reader.lines() { - let line = line.box_err().context(Internal { - msg: "failed to read line", - })?; - let (username, password) = line.split_once(',').with_context(|| InternalNoCause { - msg: format!("invalid line: {:?}", line), - })?; - self.users - .insert(username.to_string(), password.to_string()); - } - - Ok(()) - } - - // TODO: currently we only support basic auth - // This function should return Result - pub fn identify(&self, input: Option) -> bool { - if !self.enable { - return true; - } - - let input = match input { - Some(v) => v, - None => return false, - }; - let input = match input.split_once("Basic ") { - Some((_, encoded)) => match base64::decode(encoded) { - Ok(v) => v, - Err(_e) => return false, - }, - None => return false, - }; - let input = match std::str::from_utf8(&input) { - Ok(v) => v, - Err(_e) => return false, - }; - match input.split_once(':') { - Some((user, pass)) => self - .users - .get(user) - .map(|expected| expected == pass) - .unwrap_or_default(), - None => false, - } - } -} - -pub fn get_authorization(req: &tonic::Request) -> Option { - req.metadata() - .get(AUTHORIZATION) - .and_then(|value| value.to_str().ok().map(String::from)) -} - -impl Interceptor for AuthWithFile { - fn call( - &mut self, - request: tonic::Request<()>, - ) -> std::result::Result, tonic::Status> { - // TODO: extract username from request - let authorization = get_authorization(&request); - if self.identify(authorization) { - Ok(request) - } else { - Err(tonic::Status::unauthenticated("unauthenticated")) - } - } -} diff --git a/src/proxy/src/context.rs b/src/proxy/src/context.rs deleted file mode 100644 index 02aa48c9b2..0000000000 --- a/src/proxy/src/context.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Server context - -use std::time::Duration; - -use common_types::request_id::RequestId; -use macros::define_result; -use snafu::{ensure, Backtrace, Snafu}; - -#[allow(clippy::enum_variant_names)] -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Missing catalog.\nBacktrace:\n{}", backtrace))] - MissingCatalog { backtrace: Backtrace }, - - #[snafu(display("Missing schema.\nBacktrace:\n{}", backtrace))] - MissingSchema { backtrace: Backtrace }, - - #[snafu(display("Missing runtime.\nBacktrace:\n{}", backtrace))] - MissingRuntime { backtrace: Backtrace }, - - #[snafu(display("Missing router.\nBacktrace:\n{}", backtrace))] - MissingRouter { backtrace: Backtrace }, -} - -define_result!(Error); - -/// Server request context -/// -/// Context for request, may contains -/// 1. Request context and options -/// 2. Info from http headers -#[derive(Debug)] -pub struct RequestContext { - /// Catalog of the request - pub catalog: String, - /// Schema of request - pub schema: String, - /// Request timeout - pub timeout: Option, - /// Request id - pub request_id: RequestId, - /// authorization - pub authorization: Option, -} - -impl RequestContext { - pub fn builder() -> Builder { - Builder::default() - } -} - -#[derive(Default)] -pub struct Builder { - catalog: String, - schema: String, - timeout: Option, - authorization: Option, -} - -impl Builder { - pub fn catalog(mut self, catalog: String) -> Self { - self.catalog = catalog; - self - } - - pub fn schema(mut self, schema: String) -> Self { - self.schema = schema; - self - } - - pub fn timeout(mut self, timeout: Option) -> Self { - self.timeout = timeout; - self - } - - pub fn authorization(mut self, authorization: Option) -> Self { - self.authorization = authorization; - self - } - - pub fn build(self) -> Result { - ensure!(!self.catalog.is_empty(), MissingCatalog); - ensure!(!self.schema.is_empty(), MissingSchema); - - Ok(RequestContext { - catalog: self.catalog, - schema: self.schema, - timeout: self.timeout, - request_id: RequestId::next_id(), - authorization: self.authorization, - }) - } -} diff --git a/src/proxy/src/error.rs b/src/proxy/src/error.rs deleted file mode 100644 index 50c5c999f6..0000000000 --- a/src/proxy/src/error.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use generic_error::GenericError; -use horaedbproto::common::ResponseHeader; -use http::StatusCode; -use macros::define_result; -use snafu::{Backtrace, Snafu}; - -use crate::error_util; - -define_result!(Error); - -#[derive(Snafu, Debug)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("Internal error, msg:{}, err:{}", msg, source))] - Internal { msg: String, source: GenericError }, - - #[snafu(display("Internal error, msg:{}.\nBacktrace:\n{}", msg, backtrace))] - InternalNoCause { msg: String, backtrace: Backtrace }, - - #[snafu(display("Rpc error, code:{:?}, err:{}", code, msg))] - ErrNoCause { code: StatusCode, msg: String }, - - #[snafu(display("Rpc error, code:{:?}, message:{}, err:{}", code, msg, source))] - ErrWithCause { - code: StatusCode, - msg: String, - source: GenericError, - }, - - #[snafu(display("Query warning, msg:{msg}"))] - QueryMaybeExceedTTL { msg: String }, -} - -impl Error { - pub fn code(&self) -> StatusCode { - match *self { - Error::ErrNoCause { code, .. } => code, - Error::ErrWithCause { code, .. } => code, - Error::QueryMaybeExceedTTL { .. } => StatusCode::OK, - Error::Internal { .. } | Error::InternalNoCause { .. } => { - StatusCode::INTERNAL_SERVER_ERROR - } - } - } - - /// Get the error message returned to the user. - pub fn error_message(&self) -> String { - match self { - Error::ErrNoCause { msg, .. } | Error::InternalNoCause { msg, .. } => msg.clone(), - - Error::ErrWithCause { msg, source, .. } | Error::Internal { msg, source, .. } => { - let err_string = source.to_string(); - let first_line = error_util::remove_backtrace_from_err(&err_string); - format!("{msg}. Caused by: {first_line}") - } - Error::QueryMaybeExceedTTL { msg } => msg.clone(), - } - } -} - -pub fn build_err_header(err: Error) -> ResponseHeader { - ResponseHeader { - code: err.code().as_u16() as u32, - error: err.error_message(), - } -} - -pub fn build_ok_header() -> ResponseHeader { - ResponseHeader { - code: StatusCode::OK.as_u16() as u32, - ..Default::default() - } -} - -impl From for Error { - fn from(route_err: router::Error) -> Self { - match &route_err { - router::Error::RouteNotFound { .. } | router::Error::ShardNotFound { .. } => { - Error::ErrNoCause { - code: StatusCode::NOT_FOUND, - msg: route_err.to_string(), - } - } - router::Error::ParseEndpoint { .. } - | router::Error::OtherWithCause { .. } - | router::Error::OtherNoCause { .. } => Error::ErrNoCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: route_err.to_string(), - }, - } - } -} diff --git a/src/proxy/src/error_util.rs b/src/proxy/src/error_util.rs deleted file mode 100644 index 102d1295a7..0000000000 --- a/src/proxy/src/error_util.rs +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Our backtrace is defined like this -// #[snafu(display("Time range is not found.\nBacktrace\n:{}", backtrace))] -// -// So here we split by `Backtrace`, and return first part -pub fn remove_backtrace_from_err(err_string: &str) -> &str { - err_string - .split("Backtrace") - .next() - .map(|s| s.trim_end()) - .unwrap_or(err_string) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_remove_backtrace() { - let cases = vec![ - ( - r#"Failed to execute select, err:Failed to execute logical plan, err:Failed to do physical optimization, -err:DataFusion Failed to optimize physical plan, err:Error during planning. -Backtrace: - 0 ::generate::h996ee016dfa35e37"#, - r#"Failed to execute select, err:Failed to execute logical plan, err:Failed to do physical optimization, -err:DataFusion Failed to optimize physical plan, err:Error during planning."#, - ), - ("", ""), - ("some error", "some error"), - ]; - - for (input, expected) in cases { - assert_eq!(expected, remove_backtrace_from_err(input)); - } - } -} diff --git a/src/proxy/src/forward.rs b/src/proxy/src/forward.rs deleted file mode 100644 index 93d0dae96b..0000000000 --- a/src/proxy/src/forward.rs +++ /dev/null @@ -1,549 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Forward for grpc services -use std::{ - collections::HashMap, - net::Ipv4Addr, - sync::{Arc, RwLock}, -}; - -use async_trait::async_trait; -use horaedbproto::storage::{ - storage_service_client::StorageServiceClient, RequestContext, RouteRequest as RouteRequestPb, -}; -use logger::{debug, error, warn}; -use macros::define_result; -use router::{endpoint::Endpoint, RouteRequest, RouterRef}; -use serde::{Deserialize, Serialize}; -use snafu::{Backtrace, ResultExt, Snafu}; -use time_ext::ReadableDuration; -use tonic::{ - metadata::errors::InvalidMetadataValue, - transport::{self, Channel}, -}; - -use crate::{auth::AUTHORIZATION, FORWARDED_FROM}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "Invalid endpoint, endpoint:{}, err:{}.\nBacktrace:\n{}", - endpoint, - source, - backtrace - ))] - InvalidEndpoint { - endpoint: String, - source: tonic::transport::Error, - backtrace: Backtrace, - }, - - #[snafu(display( - "Local ip addr should not be loopback, addr:{}.\nBacktrace:\n{}", - ip_addr, - backtrace - ))] - LoopbackLocalIpAddr { - ip_addr: String, - backtrace: Backtrace, - }, - - #[snafu(display( - "Invalid schema, schema:{}, err:{}.\nBacktrace:\n{}", - schema, - source, - backtrace - ))] - InvalidSchema { - schema: String, - source: InvalidMetadataValue, - backtrace: Backtrace, - }, - - #[snafu(display( - "Failed to connect endpoint, endpoint:{}, err:{}.\nBacktrace:\n{}", - endpoint, - source, - backtrace - ))] - Connect { - endpoint: String, - source: tonic::transport::Error, - backtrace: Backtrace, - }, - - #[snafu(display("Request should not be forwarded twice, forward from:{}", endpoint))] - ForwardedErr { endpoint: String }, -} - -define_result!(Error); - -pub type ForwarderRef = Arc>; -pub trait ForwarderRpc = FnOnce( - StorageServiceClient, - tonic::Request, - &Endpoint, -) -> Box< - dyn std::future::Future> + Send + Unpin, ->; - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -pub struct Config { - /// Sets an interval for HTTP2 Ping frames should be sent to keep a - /// connection alive. - pub keep_alive_interval: ReadableDuration, - /// A timeout for receiving an acknowledgement of the keep-alive ping - /// If the ping is not acknowledged within the timeout, the connection will - /// be closed - pub keep_alive_timeout: ReadableDuration, - /// default keep http2 connections alive while idle - pub keep_alive_while_idle: bool, - pub connect_timeout: ReadableDuration, - pub forward_timeout: Option, -} - -impl Default for Config { - fn default() -> Self { - Self { - keep_alive_interval: ReadableDuration::secs(60 * 10), - keep_alive_timeout: ReadableDuration::secs(3), - keep_alive_while_idle: true, - connect_timeout: ReadableDuration::secs(3), - forward_timeout: None, - } - } -} - -#[async_trait] -pub trait ClientBuilder { - async fn connect(&self, endpoint: &Endpoint) -> Result>; -} - -pub struct DefaultClientBuilder { - config: Config, -} - -impl DefaultClientBuilder { - #[inline] - fn make_endpoint_with_scheme(endpoint: &Endpoint) -> String { - format!("http://{}:{}", endpoint.addr, endpoint.port) - } -} - -#[async_trait] -impl ClientBuilder for DefaultClientBuilder { - async fn connect(&self, endpoint: &Endpoint) -> Result> { - let endpoint_with_scheme = Self::make_endpoint_with_scheme(endpoint); - let configured_endpoint = transport::Endpoint::from_shared(endpoint_with_scheme.clone()) - .context(InvalidEndpoint { - endpoint: &endpoint_with_scheme, - })?; - - let configured_endpoint = match self.config.keep_alive_while_idle { - true => configured_endpoint - .connect_timeout(self.config.connect_timeout.0) - .keep_alive_timeout(self.config.keep_alive_timeout.0) - .keep_alive_while_idle(true) - .http2_keep_alive_interval(self.config.keep_alive_interval.0), - false => configured_endpoint - .connect_timeout(self.config.connect_timeout.0) - .keep_alive_while_idle(false), - }; - let channel = configured_endpoint.connect().await.context(Connect { - endpoint: &endpoint_with_scheme, - })?; - - let client = StorageServiceClient::new(channel); - Ok(client) - } -} - -/// Forwarder does request forwarding. -/// -/// No forward happens if the router tells the target endpoint is the same as -/// the local endpoint. -/// -/// Assuming client wants to access some table which are located on server1 (the -/// router can tell the location information). Then here is the diagram -/// describing what the forwarder does: -/// peer-to-peer procedure: client --> server1 -/// forwarding procedure: client --> server0 (forwarding server) --> server1 -pub struct Forwarder { - config: Config, - router: RouterRef, - local_endpoint: Endpoint, - client_builder: B, - clients: RwLock>>, -} - -/// The result of forwarding. -/// -/// If no forwarding happens, [`Local`] can be used. -pub enum ForwardResult { - Local, - Forwarded(std::result::Result), -} - -#[derive(Debug)] -pub struct ForwardRequest { - pub schema: String, - pub table: String, - pub req: tonic::Request, - pub forwarded_from: Option, - pub authorization: Option, -} - -impl Forwarder { - pub fn new(config: Config, router: RouterRef, local_endpoint: Endpoint) -> Self { - let client_builder = DefaultClientBuilder { - config: config.clone(), - }; - - Self::new_with_client_builder(config, router, local_endpoint, client_builder) - } -} - -impl Forwarder { - #[inline] - fn is_loopback_ip(ip_addr: &str) -> bool { - ip_addr - .parse::() - .map(|ip| ip.is_loopback()) - .unwrap_or(false) - } - - /// Check whether the target endpoint is the same as the local endpoint. - pub fn is_local_endpoint(&self, target: &Endpoint) -> bool { - if &self.local_endpoint == target { - return true; - } - - if self.local_endpoint.port != target.port { - return false; - } - - // Only need to check the remote is loopback addr. - Self::is_loopback_ip(&target.addr) - } - - /// Release the client for the given endpoint. - fn release_client(&self, endpoint: &Endpoint) -> Option> { - let mut clients = self.clients.write().unwrap(); - clients.remove(endpoint) - } -} - -impl Forwarder { - pub fn new_with_client_builder( - config: Config, - router: RouterRef, - local_endpoint: Endpoint, - client_builder: B, - ) -> Self { - Self { - config, - local_endpoint, - router, - clients: RwLock::new(HashMap::new()), - client_builder, - } - } - - /// Forward the request according to the configured router. - /// - /// Error will be thrown if it happens in the forwarding procedure, that is - /// to say, some errors like the output from the `do_rpc` will be - /// wrapped in the [`ForwardResult::Forwarded`]. - pub async fn forward( - &self, - forward_req: ForwardRequest, - do_rpc: F, - ) -> Result> - where - F: ForwarderRpc, - Req: std::fmt::Debug + Clone, - { - let ForwardRequest { - schema, - table, - req, - forwarded_from, - authorization, - } = forward_req; - - let req_pb = RouteRequestPb { - context: Some(RequestContext { database: schema }), - tables: vec![table], - }; - - let request = RouteRequest::new(req_pb, true); - let endpoint = match self.router.route(request).await { - Ok(mut routes) => { - if routes.len() != 1 || routes[0].endpoint.is_none() { - warn!( - "Fail to forward request for multiple or empty route results, routes result:{:?}, req:{:?}", - routes, req - ); - return Ok(ForwardResult::Local); - } - - Endpoint::from(routes.remove(0).endpoint.unwrap()) - } - Err(e) => { - error!("Fail to route request, req:{:?}, err:{}", req, e); - return Ok(ForwardResult::Local); - } - }; - - self.forward_with_endpoint(endpoint, req, forwarded_from, authorization, do_rpc) - .await - } - - pub async fn forward_with_endpoint( - &self, - endpoint: Endpoint, - mut req: tonic::Request, - forwarded_from: Option, - authorization: Option, - do_rpc: F, - ) -> Result> - where - F: ForwarderRpc, - Req: std::fmt::Debug + Clone, - { - if self.is_local_endpoint(&endpoint) { - return Ok(ForwardResult::Local); - } - - // Update the request. - { - if let Some(timeout) = self.config.forward_timeout { - req.set_timeout(timeout.0); - } - } - - // TODO: add metrics to record the forwarding. - debug!( - "Try to forward request to {:?}, request:{:?}", - endpoint, req, - ); - - if let Some(endpoint) = forwarded_from { - return ForwardedErr { endpoint }.fail(); - } - - // mark forwarded - req.metadata_mut().insert( - FORWARDED_FROM, - self.local_endpoint.to_string().parse().unwrap(), - ); - - if let Some(authorization) = authorization { - req.metadata_mut() - .insert(AUTHORIZATION, authorization.parse().unwrap()); - } - - let client = self.get_or_create_client(&endpoint).await?; - match do_rpc(client, req, &endpoint).await { - Err(e) => { - // Release the grpc client for the error doesn't belong to the normal error. - self.release_client(&endpoint); - Ok(ForwardResult::Forwarded(Err(e))) - } - Ok(resp) => Ok(ForwardResult::Forwarded(Ok(resp))), - } - } - - async fn get_or_create_client( - &self, - endpoint: &Endpoint, - ) -> Result> { - { - let clients = self.clients.read().unwrap(); - if let Some(v) = clients.get(endpoint) { - return Ok(v.clone()); - } - } - - let new_client = self.client_builder.connect(endpoint).await?; - { - let mut clients = self.clients.write().unwrap(); - if let Some(v) = clients.get(endpoint) { - return Ok(v.clone()); - } - clients.insert(endpoint.clone(), new_client.clone()); - } - - Ok(new_client) - } -} - -#[cfg(test)] -mod tests { - use catalog::consts::DEFAULT_SCHEMA; - use futures::FutureExt; - use horaedbproto::storage::{Route, SqlQueryRequest, SqlQueryResponse}; - use meta_client::types::TableInfo; - use router::Router; - use tonic::IntoRequest; - - use super::*; - - #[test] - fn test_check_loopback_endpoint() { - let loopback_ips = vec!["127.0.0.1", "127.0.0.2"]; - for loopback_ip in loopback_ips { - assert!(Forwarder::::is_loopback_ip( - loopback_ip - )); - } - - let normal_ips = vec!["10.100.10.14", "192.168.1.2", "0.0.0.0"]; - for ip in normal_ips { - assert!(!Forwarder::::is_loopback_ip(ip)); - } - - let invalid_addrs = vec!["hello.world.com", "test", "localhost", ""]; - for ip in invalid_addrs { - assert!(!Forwarder::::is_loopback_ip(ip)); - } - } - - struct MockRouter { - routing_tables: HashMap, - } - - #[async_trait] - impl Router for MockRouter { - async fn route(&self, req: RouteRequest) -> router::Result> { - let endpoint = self.routing_tables.get(&req.inner.tables[0]); - match endpoint { - None => Ok(vec![]), - Some(v) => Ok(vec![Route { - table: req.inner.tables[0].clone(), - endpoint: Some(v.clone().into()), - }]), - } - } - - async fn fetch_table_info( - &self, - _schema: &str, - _table: &str, - ) -> router::Result> { - return Ok(None); - } - } - - struct MockClientBuilder; - - #[async_trait] - impl ClientBuilder for MockClientBuilder { - async fn connect(&self, _: &Endpoint) -> Result> { - let (channel, _) = Channel::balance_channel::(10); - Ok(StorageServiceClient::::new(channel)) - } - } - - #[tokio::test] - async fn test_normal_forward() { - let config = Config::default(); - - let mut mock_router = MockRouter { - routing_tables: HashMap::new(), - }; - let test_table0: &str = "test_table0"; - let test_table1: &str = "test_table1"; - let test_table2: &str = "test_table2"; - let test_table3: &str = "test_table3"; - let test_endpoint0 = Endpoint::new("192.168.1.12".to_string(), 8831); - let test_endpoint1 = Endpoint::new("192.168.1.2".to_string(), 8831); - let test_endpoint2 = Endpoint::new("192.168.1.2".to_string(), 8832); - let test_endpoint3 = Endpoint::new("192.168.1.1".to_string(), 8831); - mock_router - .routing_tables - .insert(test_table0.to_string(), test_endpoint0.clone()); - mock_router - .routing_tables - .insert(test_table1.to_string(), test_endpoint1.clone()); - mock_router - .routing_tables - .insert(test_table2.to_string(), test_endpoint2.clone()); - mock_router - .routing_tables - .insert(test_table3.to_string(), test_endpoint3.clone()); - let mock_router = Arc::new(mock_router); - - let local_endpoint = test_endpoint3.clone(); - let forwarder = Forwarder::new_with_client_builder( - config, - mock_router.clone() as _, - local_endpoint.clone(), - MockClientBuilder, - ); - - let make_forward_req = |table: &str| { - let query_request = SqlQueryRequest { - context: Some(RequestContext { - database: DEFAULT_SCHEMA.to_string(), - }), - tables: vec![table.to_string()], - sql: "".to_string(), - }; - ForwardRequest { - schema: DEFAULT_SCHEMA.to_string(), - table: table.to_string(), - req: query_request.into_request(), - forwarded_from: None, - authorization: None, - } - }; - - let do_rpc = |_client, req: tonic::Request, endpoint: &Endpoint| { - let req = req.into_inner(); - assert_eq!(req.context.unwrap().database, DEFAULT_SCHEMA); - let expect_endpoint = mock_router.routing_tables.get(&req.tables[0]).unwrap(); - assert_eq!(expect_endpoint, endpoint); - - let resp = SqlQueryResponse::default(); - Box::new(async move { Ok(resp) }.boxed()) as _ - }; - - for test_table in [test_table0, test_table1, test_table2, test_table3] { - let endpoint = mock_router.routing_tables.get(test_table).unwrap(); - let forward_req = make_forward_req(test_table); - let res: Result> = - forwarder.forward(forward_req, do_rpc).await; - let forward_res = res.expect("should succeed in forwarding"); - if endpoint == &local_endpoint { - assert!(forwarder.is_local_endpoint(endpoint)); - assert!( - matches!(forward_res, ForwardResult::Local), - "endpoint is:{endpoint:?}" - ); - } else { - assert!(!forwarder.is_local_endpoint(endpoint)); - assert!( - matches!(forward_res, ForwardResult::Forwarded(_)), - "endpoint is:{endpoint:?}" - ); - } - } - } -} diff --git a/src/proxy/src/grpc/mod.rs b/src/proxy/src/grpc/mod.rs deleted file mode 100644 index 4fa0f47ece..0000000000 --- a/src/proxy/src/grpc/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -mod prom_query; -mod route; -mod sql_query; -mod write; diff --git a/src/proxy/src/grpc/prom_query.rs b/src/proxy/src/grpc/prom_query.rs deleted file mode 100644 index e596f61a1d..0000000000 --- a/src/proxy/src/grpc/prom_query.rs +++ /dev/null @@ -1,517 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::{BTreeMap, HashMap}, - sync::Arc, - time::Instant, -}; - -use common_types::{ - datum::DatumKind, - record_batch::RecordBatch, - schema::{RecordSchema, TSID_COLUMN}, -}; -use generic_error::BoxError; -use horaedbproto::{ - common::ResponseHeader, - prometheus::{Label, Sample, TimeSeries}, - storage::{PrometheusQueryRequest, PrometheusQueryResponse}, -}; -use http::StatusCode; -use interpreters::{interpreter::Output, RecordBatchVec}; -use logger::info; -use query_frontend::{ - frontend::{Context as SqlContext, Error as FrontendError, Frontend}, - promql::ColumnNames, - provider::CatalogMetaProvider, -}; -use snafu::{ensure, OptionExt, ResultExt}; - -use crate::{ - error, - error::{ErrNoCause, ErrWithCause, Error, Result}, - Context, Proxy, -}; - -impl Proxy { - /// Implement prometheus query in grpc service. - /// Note: not used in prod now. - pub async fn handle_prom_query( - &self, - ctx: Context, - req: PrometheusQueryRequest, - ) -> PrometheusQueryResponse { - self.hotspot_recorder.inc_promql_reqs(&req).await; - match self.handle_prom_query_internal(ctx, req).await { - Err(e) => { - error!("Failed to handle prom query, err:{e}"); - PrometheusQueryResponse { - header: Some(error::build_err_header(e)), - ..Default::default() - } - } - Ok(v) => v, - } - } - - async fn handle_prom_query_internal( - &self, - ctx: Context, - req: PrometheusQueryRequest, - ) -> Result { - let request_id = ctx.request_id; - let begin_instant = Instant::now(); - let deadline = ctx.timeout.map(|t| begin_instant + t); - let req_ctx = req.context.context(ErrNoCause { - msg: "Missing context", - code: StatusCode::BAD_REQUEST, - })?; - - let schema = req_ctx.database; - let catalog = self.instance.catalog_manager.default_catalog_name(); - - info!( - "Grpc handle prom query begin, catalog:{catalog}, schema:{schema}, request_id:{request_id}", - ); - - let provider = CatalogMetaProvider { - manager: self.instance.catalog_manager.clone(), - default_catalog: catalog, - default_schema: &schema, - function_registry: &*self.instance.function_registry, - }; - let frontend = Frontend::new(provider, self.instance.dyn_config.fronted.clone()); - - let mut sql_ctx = SqlContext::new(request_id.clone(), deadline); - let expr = frontend - .parse_promql(&mut sql_ctx, req.expr) - .box_err() - .context(ErrWithCause { - code: StatusCode::BAD_REQUEST, - msg: "Invalid request", - })?; - - let (plan, column_name) = frontend.promql_expr_to_plan(&sql_ctx, expr).map_err(|e| { - let code = if is_table_not_found_error(&e) { - StatusCode::NOT_FOUND - } else { - StatusCode::INTERNAL_SERVER_ERROR - }; - Error::ErrWithCause { - code, - msg: "Failed to create plan".to_string(), - source: Box::new(e), - } - })?; - - self.instance - .limiter - .try_limit(&plan) - .box_err() - .context(ErrWithCause { - code: StatusCode::FORBIDDEN, - msg: "Query is blocked", - })?; - - let output = self - .execute_plan(request_id.clone(), catalog, &schema, plan, deadline) - .await - .box_err() - .with_context(|| ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Failed to execute plan", - })?; - - let resp = convert_output(output, column_name) - .box_err() - .context(ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Failed to convert output", - })?; - - Ok(resp) - } -} - -fn is_table_not_found_error(e: &FrontendError) -> bool { - matches!(&e, FrontendError::CreatePlan { source } - if matches!(source, query_frontend::planner::Error::BuildPromPlanError { source } - if matches!(source, query_frontend::promql::Error::TableNotFound { .. }))) -} - -fn convert_output( - output: Output, - column_name: Arc, -) -> Result { - match output { - Output::Records(records) => convert_records(records, column_name), - _ => unreachable!(), - } -} - -fn convert_records( - records: RecordBatchVec, - column_name: Arc, -) -> Result { - if records.is_empty() { - return Ok(empty_ok_resp()); - } - - let mut tsid_to_tags = HashMap::new(); - let mut tsid_to_samples = HashMap::new(); - - // TODO(chenxiang): benchmark iterator by columns - for record_batch in records { - let converter = RecordConverter::try_new(&column_name, record_batch.schema())?; - - for (tsid, samples) in converter.convert_to_samples(record_batch, &mut tsid_to_tags) { - tsid_to_samples - .entry(tsid) - .or_insert_with(Vec::new) - .extend(samples) - } - } - - let series_set = tsid_to_samples - .into_iter() - .map(|(tsid, samples)| { - let tags = tsid_to_tags - .get(&tsid) - .expect("ensured in convert_to_samples"); - let labels = tags - .iter() - .map(|(k, v)| Label { - name: k.clone(), - value: v.clone(), - }) - .collect::>(); - - TimeSeries { labels, samples } - }) - .collect::>(); - - let mut resp = empty_ok_resp(); - resp.timeseries = series_set; - Ok(resp) -} - -fn empty_ok_resp() -> PrometheusQueryResponse { - let header = ResponseHeader { - code: StatusCode::OK.as_u16() as u32, - ..Default::default() - }; - - PrometheusQueryResponse { - header: Some(header), - ..Default::default() - } -} - -/// RecordConverter convert RecordBatch to time series format required by PromQL -struct RecordConverter { - tsid_idx: usize, - timestamp_idx: usize, - tags_idx: BTreeMap, // tag_key -> column_index - field_idx: usize, -} - -impl RecordConverter { - fn try_new(column_name: &ColumnNames, record_schema: &RecordSchema) -> Result { - let tsid_idx = record_schema - .index_of(TSID_COLUMN) - .with_context(|| ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: "Failed to find Tsid column", - })?; - let timestamp_idx = record_schema - .index_of(&column_name.timestamp) - .context(ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: "Failed to find Timestamp column", - })?; - ensure!( - record_schema.column(timestamp_idx).data_type == DatumKind::Timestamp, - ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: "Timestamp column should be timestamp type" - } - ); - let field_idx = record_schema - .index_of(&column_name.field) - .with_context(|| ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: format!("Failed to find {} column", column_name.field), - })?; - let field_type = record_schema.column(field_idx).data_type; - ensure!( - field_type.is_f64_castable(), - ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: format!("Field type must be f64-compatibile type, current:{field_type}") - } - ); - - let tags_idx: BTreeMap<_, _> = column_name - .tag_keys - .iter() - .filter_map(|tag_key| { - record_schema - .index_of(tag_key) - .map(|idx| (tag_key.to_string(), idx)) - }) - .collect(); - - Ok(Self { - tsid_idx, - timestamp_idx, - tags_idx, - field_idx, - }) - } - - fn convert_to_samples( - &self, - record_batch: RecordBatch, - tsid_to_tags: &mut HashMap>, - ) -> HashMap> { - let mut tsid_to_samples = HashMap::new(); - - let tsid_cols = record_batch.column(self.tsid_idx); - let timestamp_cols = record_batch.column(self.timestamp_idx); - let field_cols = record_batch.column(self.field_idx); - for row_idx in 0..record_batch.num_rows() { - let timestamp = timestamp_cols - .datum(row_idx) - .as_timestamp() - .expect("checked in try_new") - .as_i64(); - let field = field_cols - .datum(row_idx) - .as_f64() - .expect("checked in try_new"); - let tsid = tsid_cols - .datum(row_idx) - .as_u64() - .expect("checked in try_new"); - - tsid_to_tags.entry(tsid).or_insert_with(|| { - self.tags_idx - .iter() - .filter_map(|(tag_key, col_idx)| { - // TODO(chenxiang): avoid clone? - record_batch - .column(*col_idx) - .datum(row_idx) - .as_str() - .and_then(|tag_value| { - // filter empty tag value out, since Prometheus don't allow it. - if tag_value.is_empty() { - None - } else { - Some((tag_key.clone(), tag_value.to_string())) - } - }) - }) - .collect::>() - }); - - let samples = tsid_to_samples.entry(tsid).or_insert_with(Vec::new); - let sample = Sample { - value: field, - timestamp, - }; - samples.push(sample); - } - - tsid_to_samples - } -} - -#[cfg(test)] -mod tests { - - use common_types::{ - column_block::{ColumnBlock, ColumnBlockBuilder}, - column_schema, - datum::{Datum, DatumKind}, - row::Row, - schema, - string::StringBytes, - time::Timestamp, - }; - - use super::*; - - fn build_schema() -> schema::Schema { - schema::Builder::new() - .auto_increment_column_id(true) - .add_key_column( - column_schema::Builder::new("timestamp".to_string(), DatumKind::Timestamp) - .build() - .unwrap(), - ) - .unwrap() - .add_key_column( - column_schema::Builder::new(TSID_COLUMN.to_string(), DatumKind::UInt64) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("field1".to_string(), DatumKind::Double) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag1".to_string(), DatumKind::String) - .is_tag(true) - .build() - .unwrap(), - ) - .unwrap() - .add_normal_column( - column_schema::Builder::new("tag_dictionary".to_string(), DatumKind::String) - .is_tag(true) - .is_dictionary(true) - .is_nullable(true) - .build() - .unwrap(), - ) - .unwrap() - .primary_key_indexes(vec![0, 1]) - .build() - .unwrap() - } - - fn build_column_block() -> Vec { - let build_row = |ts: i64, tsid: u64, field1: f64, field2: &str, dic: Option<&str>| -> Row { - let datums = vec![ - Datum::Timestamp(Timestamp::new(ts)), - Datum::UInt64(tsid), - Datum::Double(field1), - Datum::String(StringBytes::from(field2)), - dic.map(|v| Datum::String(StringBytes::from(v))) - .unwrap_or(Datum::Null), - ]; - - Row::from_datums(datums) - }; - - let rows = vec![ - build_row(1000001, 1, 10.0, "v5", Some("d1")), - build_row(1000002, 1, 11.0, "v5", None), - build_row(1000000, 2, 10.0, "v4", Some("d2")), - build_row(1000000, 3, 10.0, "v3", None), - ]; - - let mut builder = ColumnBlockBuilder::with_capacity(&DatumKind::Timestamp, 2, false); - for row in &rows { - builder.append(row[0].clone()).unwrap(); - } - let timestamp_block = builder.build(); - - let mut builder = ColumnBlockBuilder::with_capacity(&DatumKind::UInt64, 2, false); - for row in &rows { - builder.append(row[1].clone()).unwrap(); - } - let tsid_block = builder.build(); - - let mut builder = ColumnBlockBuilder::with_capacity(&DatumKind::Double, 2, false); - for row in &rows { - builder.append(row[2].clone()).unwrap(); - } - let field_block = builder.build(); - - let mut builder = ColumnBlockBuilder::with_capacity(&DatumKind::String, 2, false); - for row in &rows { - builder.append(row[3].clone()).unwrap(); - } - let tag_block = builder.build(); - - let mut builder = ColumnBlockBuilder::with_capacity(&DatumKind::String, 2, true); - for row in &rows { - builder.append(row[4].clone()).unwrap(); - } - let dictionary_block = builder.build(); - - vec![ - timestamp_block, - tsid_block, - field_block, - tag_block, - dictionary_block, - ] - } - - fn make_sample(timestamp: i64, value: f64) -> Sample { - Sample { value, timestamp } - } - - fn make_tags(tags: Vec<(String, String)>) -> BTreeMap { - tags.into_iter().collect::>() - } - - #[test] - fn test_record_convert() { - let schema = build_schema(); - let record_schema = schema.to_record_schema(); - let column_blocks = build_column_block(); - let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap(); - - let column_name = ColumnNames { - timestamp: "timestamp".to_string(), - tag_keys: vec!["tag1".to_string(), "tag_dictionary".to_string()], - field: "field1".to_string(), - }; - let converter = RecordConverter::try_new(&column_name, &schema.to_record_schema()).unwrap(); - let mut tsid_to_tags = HashMap::new(); - let tsid_to_samples = converter.convert_to_samples(record_batch, &mut tsid_to_tags); - - assert_eq!( - tsid_to_samples.get(&1).unwrap().clone(), - vec![make_sample(1000001, 10.0), make_sample(1000002, 11.0)] - ); - assert_eq!( - tsid_to_samples.get(&2).unwrap().clone(), - vec![make_sample(1000000, 10.0)] - ); - assert_eq!( - tsid_to_samples.get(&3).unwrap().clone(), - vec![make_sample(1000000, 10.0)] - ); - assert_eq!( - tsid_to_tags.get(&1).unwrap().clone(), - make_tags(vec![ - ("tag1".to_string(), "v5".to_string()), - ("tag_dictionary".to_string(), "d1".to_string()) - ]) - ); - assert_eq!( - tsid_to_tags.get(&2).unwrap().clone(), - make_tags(vec![ - ("tag1".to_string(), "v4".to_string()), - ("tag_dictionary".to_string(), "d2".to_string()) - ]) - ); - assert_eq!( - tsid_to_tags.get(&3).unwrap().clone(), - make_tags(vec![("tag1".to_string(), "v3".to_string())]) - ); - } -} diff --git a/src/proxy/src/grpc/route.rs b/src/proxy/src/grpc/route.rs deleted file mode 100644 index 0955cec2bd..0000000000 --- a/src/proxy/src/grpc/route.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use horaedbproto::storage::{RouteRequest as RouteRequestPb, RouteResponse}; -use router::RouteRequest; - -use crate::{error, metrics::GRPC_HANDLER_COUNTER_VEC, Context, Proxy}; - -impl Proxy { - pub async fn handle_route(&self, _ctx: Context, req: RouteRequestPb) -> RouteResponse { - let request = RouteRequest::new(req, true); - let routes = self.route(request).await; - - let mut resp = RouteResponse::default(); - match routes { - Err(e) => { - GRPC_HANDLER_COUNTER_VEC.route_failed.inc(); - - error!("Failed to handle route, err:{e}"); - resp.header = Some(error::build_err_header(e)); - } - Ok(v) => { - GRPC_HANDLER_COUNTER_VEC.route_succeeded.inc(); - - resp.header = Some(error::build_ok_header()); - resp.routes = v; - } - } - resp - } -} diff --git a/src/proxy/src/grpc/sql_query.rs b/src/proxy/src/grpc/sql_query.rs deleted file mode 100644 index 16c6201703..0000000000 --- a/src/proxy/src/grpc/sql_query.rs +++ /dev/null @@ -1,408 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Query handler - -use std::sync::Arc; - -use arrow_ext::ipc::{CompressOptions, CompressionMethod, RecordBatchesEncoder}; -use common_types::record_batch::RecordBatch; -use futures::{stream, stream::BoxStream, FutureExt, StreamExt}; -use generic_error::BoxError; -use horaedbproto::{ - common::ResponseHeader, - storage::{ - arrow_payload, sql_query_response, storage_service_client::StorageServiceClient, - ArrowPayload, SqlQueryRequest, SqlQueryResponse, - }, -}; -use http::StatusCode; -use interpreters::interpreter::Output; -use logger::{error, warn}; -use router::endpoint::Endpoint; -use snafu::ResultExt; -use tonic::{transport::Channel, IntoRequest}; - -use crate::{ - error::{self, ErrNoCause, ErrWithCause, Error, Result}, - forward::{ForwardRequest, ForwardResult}, - metrics::GRPC_HANDLER_COUNTER_VEC, - read::SqlResponse, - Context, Proxy, -}; - -impl Proxy { - pub async fn handle_sql_query(&self, ctx: Context, req: SqlQueryRequest) -> SqlQueryResponse { - // Incoming query maybe larger than query_failed + query_succeeded for some - // corner case, like lots of time-consuming queries come in at the same time and - // cause server OOM. - GRPC_HANDLER_COUNTER_VEC.incoming_query.inc(); - - self.hotspot_recorder.inc_sql_query_reqs(&req).await; - match self.handle_sql_query_internal(&ctx, &req).await { - Err(err) => { - error!("Failed to handle sql query, ctx:{ctx:?}, err:{err}"); - GRPC_HANDLER_COUNTER_VEC.query_failed.inc(); - let header = ResponseHeader { - code: err.code().as_u16() as u32, - error: format!("{} sql:{}", err.error_message(), req.sql), - }; - - SqlQueryResponse { - header: Some(header), - ..Default::default() - } - } - Ok(v) => { - GRPC_HANDLER_COUNTER_VEC.query_succeeded.inc(); - v - } - } - } - - async fn handle_sql_query_internal( - &self, - ctx: &Context, - req: &SqlQueryRequest, - ) -> Result { - if req.context.is_none() { - return ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: "Database is not set", - } - .fail(); - } - - let req_context = req.context.as_ref().unwrap(); - let schema = &req_context.database; - - let result = match self.request_notifiers.clone() { - Some(request_notifiers) => { - self.dedup_handle_sql( - ctx, - schema, - &req.sql, - request_notifiers, - self.sub_table_access_perm.enable_others, - ) - .await? - } - None => { - self.handle_sql( - ctx, - schema, - &req.sql, - self.sub_table_access_perm.enable_others, - true, - ) - .await? - } - }; - - match result { - SqlResponse::Forwarded(resp) => Ok(resp), - SqlResponse::Local(output) => convert_output(&output, self.resp_compress_min_length), - } - } - - pub async fn handle_stream_sql_query( - self: Arc, - ctx: Context, - req: SqlQueryRequest, - ) -> BoxStream<'static, SqlQueryResponse> { - GRPC_HANDLER_COUNTER_VEC.stream_query.inc(); - self.hotspot_recorder.inc_sql_query_reqs(&req).await; - match self.clone().handle_stream_query_internal(&ctx, &req).await { - Err(e) => stream::once(async { - error!("Failed to handle stream sql query, err:{e}"); - GRPC_HANDLER_COUNTER_VEC.stream_query_failed.inc(); - SqlQueryResponse { - header: Some(error::build_err_header(e)), - ..Default::default() - } - }) - .boxed(), - Ok(v) => { - GRPC_HANDLER_COUNTER_VEC.stream_query_succeeded.inc(); - v - } - } - } - - async fn handle_stream_query_internal( - self: Arc, - ctx: &Context, - req: &SqlQueryRequest, - ) -> Result> { - if req.context.is_none() { - return ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: "Database is not set", - } - .fail(); - } - - let req_context = req.context.as_ref().unwrap(); - let schema = &req_context.database; - let req = match self.clone().maybe_forward_stream_sql_query(ctx, req).await { - Some(resp) => match resp { - ForwardResult::Forwarded(resp) => return resp, - ForwardResult::Local => req, - }, - None => req, - }; - - let resp_compress_min_length = self.resp_compress_min_length; - let output = self - .as_ref() - .fetch_sql_query_output( - ctx, - schema, - &req.sql, - self.sub_table_access_perm.enable_others, - true, - ) - .await?; - - match output { - Output::AffectedRows(rows) => { - GRPC_HANDLER_COUNTER_VEC - .query_affected_row - .inc_by(rows as u64); - - let resp = QueryResponseBuilder::with_ok_header().build_with_affected_rows(rows); - - Ok(Box::pin(stream::once(async { resp }))) - } - Output::Records(batches) => { - let mut num_rows = 0; - let mut results = Vec::with_capacity(batches.len()); - for batch in &batches { - let resp = { - let mut writer = QueryResponseWriter::new(resp_compress_min_length); - writer.write(batch)?; - writer.finish() - }?; - results.push(resp); - num_rows += batch.num_rows(); - } - - GRPC_HANDLER_COUNTER_VEC - .query_succeeded_row - .inc_by(num_rows as u64); - - Ok(Box::pin(stream::iter(results))) - } - } - } - - async fn maybe_forward_stream_sql_query( - self: Arc, - ctx: &Context, - req: &SqlQueryRequest, - ) -> Option, Error>> { - if req.tables.len() != 1 { - warn!("Unable to forward sql query without exactly one table, req:{req:?}",); - - return None; - } - - let req_ctx = req.context.as_ref().unwrap(); - let forward_req = ForwardRequest { - schema: req_ctx.database.clone(), - table: req.tables[0].clone(), - req: req.clone().into_request(), - forwarded_from: ctx.forwarded_from.clone(), - authorization: ctx.authorization.clone(), - }; - let do_query = |mut client: StorageServiceClient, - request: tonic::Request, - _: &Endpoint| { - let query = async move { - client - .stream_sql_query(request) - .await - .map(|resp| resp.into_inner().boxed()) - .box_err() - .context(ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Forwarded stream sql query failed", - }) - .map(|stream| { - stream - .map(|item| { - item.box_err() - .context(ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Fail to fetch stream sql query response", - }) - .unwrap_or_else(|e| SqlQueryResponse { - header: Some(error::build_err_header(e)), - ..Default::default() - }) - }) - .boxed() - }) - } - .boxed(); - - Box::new(query) as _ - }; - - let forward_result = self.forwarder.forward(forward_req, do_query).await; - - match forward_result { - Ok(forward_res) => Some(forward_res), - Err(e) => { - error!("Failed to forward stream sql req but the error is ignored, err:{e}"); - None - } - } - } -} - -// TODO(chenxiang): Output can have both `rows` and `affected_rows` -pub fn convert_output( - output: &Output, - resp_compress_min_length: usize, -) -> Result { - match output { - Output::Records(batches) => { - let mut writer = QueryResponseWriter::new(resp_compress_min_length); - writer.write_batches(batches)?; - let mut num_rows = 0; - for batch in batches { - num_rows += batch.num_rows(); - } - GRPC_HANDLER_COUNTER_VEC - .query_succeeded_row - .inc_by(num_rows as u64); - writer.finish() - } - Output::AffectedRows(rows) => { - GRPC_HANDLER_COUNTER_VEC - .query_affected_row - .inc_by(*rows as u64); - Ok(QueryResponseBuilder::with_ok_header().build_with_affected_rows(*rows)) - } - } -} - -/// Builder for building [`SqlQueryResponse`]. -#[derive(Debug, Default)] -pub struct QueryResponseBuilder { - header: ResponseHeader, -} - -impl QueryResponseBuilder { - pub fn with_ok_header() -> Self { - let header = ResponseHeader { - code: StatusCode::OK.as_u16() as u32, - ..Default::default() - }; - Self { header } - } - - pub fn build_with_affected_rows(self, affected_rows: usize) -> SqlQueryResponse { - let output = Some(sql_query_response::Output::AffectedRows( - affected_rows as u32, - )); - SqlQueryResponse { - header: Some(self.header), - output, - } - } - - pub fn build_with_empty_arrow_payload(self) -> SqlQueryResponse { - let payload = ArrowPayload { - record_batches: Vec::new(), - compression: arrow_payload::Compression::None as i32, - }; - self.build_with_arrow_payload(payload) - } - - pub fn build_with_arrow_payload(self, payload: ArrowPayload) -> SqlQueryResponse { - let output = Some(sql_query_response::Output::Arrow(payload)); - SqlQueryResponse { - header: Some(self.header), - output, - } - } -} - -/// Writer for encoding multiple [`RecordBatch`]es to the [`SqlQueryResponse`]. -/// -/// Whether to do compression depends on the size of the encoded bytes. -pub struct QueryResponseWriter { - encoder: RecordBatchesEncoder, -} - -impl QueryResponseWriter { - pub fn new(compress_min_length: usize) -> Self { - let compress_opts = CompressOptions { - compress_min_length, - method: CompressionMethod::Zstd, - }; - Self { - encoder: RecordBatchesEncoder::new(compress_opts), - } - } - - pub fn write(&mut self, batch: &RecordBatch) -> Result<()> { - if batch.is_empty() { - return Ok(()); - } - - self.encoder - .write(batch.as_arrow_record_batch()) - .box_err() - .context(ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Failed to encode record batch", - }) - } - - pub fn write_batches(&mut self, record_batch: &[RecordBatch]) -> Result<()> { - for batch in record_batch { - self.write(batch)?; - } - - Ok(()) - } - - pub fn finish(self) -> Result { - let compress_output = self.encoder.finish().box_err().context(ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Failed to encode record batch", - })?; - - if compress_output.payload.is_empty() { - return Ok(QueryResponseBuilder::with_ok_header().build_with_empty_arrow_payload()); - } - - let compression = match compress_output.method { - CompressionMethod::None => arrow_payload::Compression::None, - CompressionMethod::Zstd => arrow_payload::Compression::Zstd, - }; - let resp = QueryResponseBuilder::with_ok_header().build_with_arrow_payload(ArrowPayload { - record_batches: vec![compress_output.payload], - compression: compression as i32, - }); - - Ok(resp) - } -} diff --git a/src/proxy/src/grpc/write.rs b/src/proxy/src/grpc/write.rs deleted file mode 100644 index e1609aaca4..0000000000 --- a/src/proxy/src/grpc/write.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use horaedbproto::storage::{WriteRequest, WriteResponse}; - -use crate::{error, error::build_ok_header, metrics::GRPC_HANDLER_COUNTER_VEC, Context, Proxy}; - -impl Proxy { - pub async fn handle_write(&self, ctx: Context, req: WriteRequest) -> WriteResponse { - self.hotspot_recorder.inc_write_reqs(&req).await; - - let mut num_rows = 0; - for table_request in &req.table_requests { - for entry in &table_request.entries { - num_rows += entry.field_groups.len(); - } - } - - match self.handle_write_internal(ctx, req).await { - Err(e) => { - error!("Failed to handle write, err:{e}"); - GRPC_HANDLER_COUNTER_VEC.write_failed.inc(); - GRPC_HANDLER_COUNTER_VEC - .write_failed_row - .inc_by(num_rows as u64); - WriteResponse { - header: Some(error::build_err_header(e)), - ..Default::default() - } - } - Ok(v) => { - GRPC_HANDLER_COUNTER_VEC.write_succeeded.inc(); - GRPC_HANDLER_COUNTER_VEC - .write_failed_row - .inc_by(v.failed as u64); - GRPC_HANDLER_COUNTER_VEC - .write_succeeded_row - .inc_by(v.success as u64); - WriteResponse { - header: Some(build_ok_header()), - success: v.success, - failed: v.failed, - } - } - } - } -} diff --git a/src/proxy/src/handlers/admin.rs b/src/proxy/src/handlers/admin.rs deleted file mode 100644 index 57107db162..0000000000 --- a/src/proxy/src/handlers/admin.rs +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::BTreeSet; - -use crate::{handlers::prelude::*, limiter::BlockRule}; - -#[derive(Debug, Deserialize)] -pub enum Operation { - Add, - Set, - Remove, -} - -#[derive(Debug, Deserialize)] -pub struct BlockRequest { - operation: Operation, - write_block_list: Vec, - read_block_list: Vec, - block_rules: Vec, -} - -#[derive(Serialize)] -pub struct BlockResponse { - write_block_list: BTreeSet, - read_block_list: BTreeSet, - block_rules: BTreeSet, -} - -pub async fn handle_block( - _ctx: RequestContext, - instance: InstanceRef, - request: BlockRequest, -) -> Result { - let limiter = &instance.limiter; - match request.operation { - Operation::Add => { - limiter.add_write_block_list(request.write_block_list); - limiter.add_read_block_list(request.read_block_list); - limiter.add_block_rules(request.block_rules); - } - Operation::Set => { - limiter.set_write_block_list(request.write_block_list); - limiter.set_read_block_list(request.read_block_list); - limiter.set_block_rules(request.block_rules); - } - Operation::Remove => { - limiter.remove_write_block_list(request.write_block_list); - limiter.remove_read_block_list(request.read_block_list); - limiter.remove_block_rules(&request.block_rules); - } - } - - Ok(BlockResponse { - write_block_list: limiter - .get_write_block_list() - .into_iter() - .collect::>(), - read_block_list: limiter - .get_read_block_list() - .into_iter() - .collect::>(), - block_rules: limiter.get_block_rules().into_iter().collect(), - }) -} diff --git a/src/proxy/src/handlers/error.rs b/src/proxy/src/handlers/error.rs deleted file mode 100644 index 6247ffb00e..0000000000 --- a/src/proxy/src/handlers/error.rs +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Error of handlers - -use macros::define_result; -use snafu::{Backtrace, Snafu}; -use warp::reject::Reject; - -use crate::limiter; -// TODO(yingwen): Avoid printing huge sql string -// TODO(yingwen): Maybe add an error type to sql sub mod - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum Error { - #[snafu(display("Failed to parse sql, err:{}", source))] - ParseSql { - source: query_frontend::frontend::Error, - }, - - #[snafu(display("Failed to create plan, query:{}, err:{}", query, source))] - CreatePlan { - query: String, - source: query_frontend::frontend::Error, - }, - - #[snafu(display( - "Only support execute one statement now, current num:{}, query:{}.\nBacktrace:\n{}", - len, - query, - backtrace, - ))] - TooMuchStmt { - len: usize, - query: String, - backtrace: Backtrace, - }, - - #[snafu(display("Failed to execute interpreter, query:{}, err:{}", query, source))] - InterpreterExec { - query: String, - source: interpreters::interpreter::Error, - }, - - #[snafu(display( - "Failed to convert arrow to string, query:{}, err:{}.\nBacktrace:\n{}", - query, - source, - backtrace - ))] - ArrowToString { - query: String, - source: arrow::error::ArrowError, - backtrace: Backtrace, - }, - - #[snafu(display("Query limited by block list, query:{}, err:{}", query, source))] - QueryBlock { - query: String, - source: limiter::Error, - }, - - #[snafu(display( - "Query timeout, query:{}, err:{}\nBacktrace:\n{}", - query, - source, - backtrace - ))] - QueryTimeout { - query: String, - source: tokio::time::error::Elapsed, - backtrace: Backtrace, - }, -} - -define_result!(Error); - -impl Reject for Error {} diff --git a/src/proxy/src/handlers/mod.rs b/src/proxy/src/handlers/mod.rs deleted file mode 100644 index 1b96784e92..0000000000 --- a/src/proxy/src/handlers/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Request handlers - -pub mod admin; -mod error; - -mod prelude { - pub use serde::{Deserialize, Serialize}; - - pub use crate::{context::RequestContext, handlers::error::Result, instance::InstanceRef}; -} diff --git a/src/proxy/src/hotspot.rs b/src/proxy/src/hotspot.rs deleted file mode 100644 index 34d6e97fdf..0000000000 --- a/src/proxy/src/hotspot.rs +++ /dev/null @@ -1,462 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! hotspot recorder -use std::{fmt::Write, sync::Arc}; - -use horaedbproto::storage::{ - PrometheusQueryRequest, RequestContext, SqlQueryRequest, WriteRequest, -}; -use logger::{info, warn}; -use runtime::Runtime; -use serde::{Deserialize, Serialize}; -use spin::Mutex as SpinMutex; -use time_ext::ReadableDuration; -use timed_task::TimedTask; -use tokio::sync::mpsc::{self, Sender}; - -use crate::{hotspot_lru::HotspotLru, util}; - -type QueryKey = String; -type WriteKey = String; -const TAG: &str = "hotspot autodump"; -const RECORDER_CHANNEL_CAP: usize = 64 * 1024; - -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(default)] -pub struct Config { - /// Max items size for query hotspot - query_cap: Option, - /// Max items size for write hotspot - write_cap: Option, - /// The hotspot records will be auto dumped if set. - enable_auto_dump: bool, - /// The interval between two auto dumps - auto_dump_interval: ReadableDuration, - /// The number of items for auto dump - auto_dump_num_items: usize, -} - -impl Default for Config { - fn default() -> Self { - Self { - query_cap: Some(10_000), - write_cap: Some(10_000), - auto_dump_interval: ReadableDuration::minutes(1), - enable_auto_dump: true, - auto_dump_num_items: 10, - } - } -} - -pub enum Message { - Query(QueryKey), - Write { - key: WriteKey, - row_count: usize, - field_count: usize, - }, -} - -#[derive(Clone)] -pub struct HotspotRecorder { - tx: Arc>, - stat: HotspotStat, -} - -#[derive(Clone)] -pub struct HotspotStat { - hotspot_query: Option>>>, - hotspot_write: Option>>>, - hotspot_field_write: Option>>>, -} - -impl HotspotStat { - /// return read count / write row count / write field count - pub fn dump(&self) -> Dump { - Dump { - read_hots: self - .pop_read_hots() - .map_or_else(Vec::new, HotspotStat::format_hots), - write_hots: self - .pop_write_hots() - .map_or_else(Vec::new, HotspotStat::format_hots), - write_field_hots: self - .pop_write_field_hots() - .map_or_else(Vec::new, HotspotStat::format_hots), - } - } - - fn format_hots(hots: Vec<(String, u64)>) -> Vec { - hots.into_iter() - .map(|(k, v)| format!("metric={k}, heats={v}")) - .collect() - } - - fn pop_read_hots(&self) -> Option> { - HotspotStat::pop_hots(&self.hotspot_query) - } - - fn pop_write_hots(&self) -> Option> { - HotspotStat::pop_hots(&self.hotspot_write) - } - - fn pop_write_field_hots(&self) -> Option> { - HotspotStat::pop_hots(&self.hotspot_field_write) - } - - fn pop_hots(target: &Option>>>) -> Option> { - target.as_ref().map(|hotspot| { - let mut hots = hotspot.lock().pop_all(); - hots.sort_by(|a, b| b.1.cmp(&a.1)); - hots - }) - } -} - -#[derive(Clone)] -pub struct Dump { - pub read_hots: Vec, - pub write_hots: Vec, - pub write_field_hots: Vec, -} - -// TODO: move HotspotRecorder to components dir for reuse. -impl HotspotRecorder { - pub fn new(config: Config, runtime: Arc) -> Self { - let hotspot_query = Self::init_lru(config.query_cap); - let hotspot_write = Self::init_lru(config.write_cap); - let hotspot_field_write = Self::init_lru(config.write_cap); - - let stat = HotspotStat { - hotspot_query: hotspot_query.clone(), - hotspot_write: hotspot_write.clone(), - hotspot_field_write: hotspot_field_write.clone(), - }; - - let task_handle = if config.enable_auto_dump { - let interval = config.auto_dump_interval; - let dump_len = config.auto_dump_num_items; - let stat_clone = stat.clone(); - let builder = move || { - let stat_in_builder = stat_clone.clone(); - async move { - let Dump { - read_hots, - write_hots, - write_field_hots, - } = stat_in_builder.dump(); - - read_hots - .into_iter() - .take(dump_len) - .for_each(|hot| info!("{} query {}", TAG, hot)); - write_hots - .into_iter() - .take(dump_len) - .for_each(|hot| info!("{} write rows {}", TAG, hot)); - write_field_hots - .into_iter() - .take(dump_len) - .for_each(|hot| info!("{} write fields {}", TAG, hot)); - } - }; - - Some(TimedTask::start_timed_task( - String::from("hotspot_dump"), - &runtime, - interval.0, - builder, - )) - } else { - None - }; - - let (tx, mut rx) = mpsc::channel(RECORDER_CHANNEL_CAP); - runtime.spawn(async move { - loop { - match rx.recv().await { - None => { - warn!("Hotspot recoder sender stopped"); - if let Some(handle) = task_handle { - handle.stop_task().await.unwrap(); - } - break; - } - Some(msg) => match msg { - Message::Query(read_key) => { - if let Some(hotspot) = &hotspot_query { - hotspot.lock().inc(&read_key, 1); - } - } - Message::Write { - key, - row_count, - field_count, - } => { - if let Some(hotspot) = &hotspot_write { - hotspot.lock().inc(&key, row_count as u64); - } - - if let Some(hotspot) = &hotspot_field_write { - hotspot.lock().inc(&key, field_count as u64); - } - } - }, - } - } - }); - - Self { - tx: Arc::new(tx), - stat, - } - } - - #[inline] - fn init_lru(cap: Option) -> Option>>> { - HotspotLru::new(cap?).map(|lru| Arc::new(SpinMutex::new(lru))) - } - - fn key_prefix(context: &Option) -> String { - let mut prefix = String::new(); - match context { - Some(ctx) => { - // use database as prefix - if !ctx.database.is_empty() { - write!(prefix, "{}/", ctx.database).unwrap(); - } - } - None => {} - } - - prefix - } - - #[inline] - fn table_hot_key(context: &Option, table: &String) -> String { - let prefix = Self::key_prefix(context); - prefix + table - } - - pub async fn inc_sql_query_reqs(&self, req: &SqlQueryRequest) { - if self.stat.hotspot_query.is_none() { - return; - } - - for table in &req.tables { - self.send_msg_or_log( - "inc_query_reqs", - Message::Query(Self::table_hot_key(&req.context, table)), - ) - .await; - } - } - - pub async fn inc_write_reqs(&self, req: &WriteRequest) { - if self.stat.hotspot_write.is_some() && self.stat.hotspot_field_write.is_some() { - for table_request in &req.table_requests { - let hot_key = Self::table_hot_key(&req.context, &table_request.table); - let mut row_count = 0; - let mut field_count = 0; - for entry in &table_request.entries { - row_count += 1; - for field_group in &entry.field_groups { - field_count += field_group.fields.len(); - } - } - self.send_msg_or_log( - "inc_write_reqs", - Message::Write { - key: hot_key, - row_count, - field_count, - }, - ) - .await; - } - } - } - - pub async fn inc_promql_reqs(&self, req: &PrometheusQueryRequest) { - if self.stat.hotspot_query.is_none() { - return; - } - - if let Some(expr) = &req.expr { - if let Some(table) = util::table_from_expr(expr) { - let hot_key = Self::table_hot_key(&req.context, &table); - self.send_msg_or_log("inc_query_reqs", Message::Query(hot_key)) - .await - } - } - } - - pub async fn send_msg_or_log(&self, method: &str, msg: Message) { - if let Err(e) = self.tx.send(msg).await { - warn!( - "HotspotRecoder::{} fail to send \ - measurement to recoder, err:{}", - method, e - ); - } - } -} - -#[cfg(test)] -mod test { - use std::{thread, time::Duration}; - - use horaedbproto::{ - storage, - storage::{ - value::Value::StringValue, Field, FieldGroup, Value, WriteSeriesEntry, - WriteTableRequest, - }, - }; - use runtime::Builder; - - fn new_runtime() -> Arc { - let runtime = Builder::default() - .worker_threads(4) - .enable_all() - .build() - .unwrap(); - - Arc::new(runtime) - } - - use super::*; - - #[test] - #[allow(clippy::redundant_clone)] - fn test_hotspot() { - let hotspot_runtime = new_runtime(); - let basic_runtime = new_runtime(); - let runtime = hotspot_runtime.clone(); - basic_runtime.block_on(async move { - let read_cap: Option = Some(3); - let write_cap: Option = Some(3); - let options = Config { - query_cap: read_cap, - write_cap, - enable_auto_dump: false, - auto_dump_interval: ReadableDuration::millis(5000), - auto_dump_num_items: 10, - }; - let recorder = HotspotRecorder::new(options, runtime.clone()); - assert!(recorder.stat.pop_read_hots().unwrap().is_empty()); - assert!(recorder.stat.pop_write_hots().unwrap().is_empty()); - let table = String::from("table1"); - let context = mock_context(); - let req = SqlQueryRequest { - context, - tables: vec![table], - sql: String::from("select * from table1 limit 10"), - }; - - recorder.inc_sql_query_reqs(&req).await; - thread::sleep(Duration::from_millis(100)); - - let vec = recorder.stat.pop_read_hots().unwrap(); - assert_eq!(1, vec.len()); - assert_eq!("public/table1", vec.first().unwrap().0); - }) - } - - #[test] - #[allow(clippy::redundant_clone)] - fn test_hotspot_dump() { - let hotspot_runtime = new_runtime(); - let basic_runtime = new_runtime(); - let runtime = hotspot_runtime.clone(); - basic_runtime.block_on(async move { - let read_cap: Option = Some(10); - let write_cap: Option = Some(10); - let options = Config { - query_cap: read_cap, - write_cap, - enable_auto_dump: false, - auto_dump_interval: ReadableDuration::millis(5000), - auto_dump_num_items: 10, - }; - - let recorder = HotspotRecorder::new(options, runtime.clone()); - - assert!(recorder.stat.pop_read_hots().unwrap().is_empty()); - assert!(recorder.stat.pop_write_hots().unwrap().is_empty()); - - let table = String::from("table1"); - let context = mock_context(); - let query_req = SqlQueryRequest { - context, - tables: vec![table.clone()], - sql: String::from("select * from table1 limit 10"), - }; - recorder.inc_sql_query_reqs(&query_req).await; - - let write_req = WriteRequest { - context: mock_context(), - table_requests: vec![WriteTableRequest { - table, - tag_names: vec![String::from("name")], - field_names: vec![String::from("value1"), String::from("value2")], - entries: vec![WriteSeriesEntry { - tags: vec![storage::Tag { - name_index: 0, - value: Some(Value { - value: Some(StringValue(String::from("name1"))), - }), - }], - field_groups: vec![FieldGroup { - timestamp: 1679647020000, - fields: vec![ - Field { - name_index: 0, - value: Some(Value { value: None }), - }, - Field { - name_index: 1, - value: Some(Value { value: None }), - }, - ], - }], - }], - }], - }; - recorder.inc_write_reqs(&write_req).await; - - thread::sleep(Duration::from_millis(100)); - let Dump { - read_hots, - write_hots, - write_field_hots, - } = recorder.stat.dump(); - assert_eq!(vec!["metric=public/table1, heats=1",], write_hots); - assert_eq!(vec!["metric=public/table1, heats=1"], read_hots); - assert_eq!(vec!["metric=public/table1, heats=2",], write_field_hots); - thread::sleep(Duration::from_millis(100)); - }); - drop(hotspot_runtime); - } - - fn mock_context() -> Option { - Some(RequestContext { - database: String::from("public"), - }) - } -} diff --git a/src/proxy/src/hotspot_lru.rs b/src/proxy/src/hotspot_lru.rs deleted file mode 100644 index 265c518c1a..0000000000 --- a/src/proxy/src/hotspot_lru.rs +++ /dev/null @@ -1,122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! hotspot LRU -use std::{hash::Hash, num::NonZeroUsize}; - -use clru::CLruCache; - -pub struct HotspotLru { - heats: CLruCache, -} - -impl HotspotLru { - /// Creates a new LRU Hotspot that holds at most `cap` items - pub fn new(cap: usize) -> Option> { - NonZeroUsize::new(cap).map(|cap| Self { - heats: CLruCache::new(cap), - }) - } - - /// Incs heat into hotspot cache, If the key already exists it - /// updates its heat value - pub fn inc(&mut self, key: &K, heat: u64) { - match self.heats.get_mut(key) { - Some(val) => *val += heat, - None => { - self.heats.put(key.clone(), heat); - } - } - } - - /// Removes and returns all items. - pub fn pop_all(&mut self) -> Vec<(K, u64)> { - let mut values = Vec::with_capacity(self.heats.len()); - - while let Some(value) = self.heats.pop_back() { - values.push(value); - } - - self.heats.clear(); - values - } -} - -#[cfg(test)] -mod tests { - use crate::hotspot_lru::HotspotLru; - - #[test] - fn test_inc_and_pop() { - let mut hotspot = HotspotLru::new(10).unwrap(); - hotspot.inc(&"apple", 1); - hotspot.inc(&"banana", 2); - hotspot.inc(&"orange", 3); - hotspot.inc(&"peach", 4); - hotspot.inc(&"cherry", 5); - - let result = hotspot.pop_all(); - assert_eq!(result.len(), 5); - assert_eq!(result[0], ("apple", 1)); - assert_eq!(result[1], ("banana", 2)); - assert_eq!(result[2], ("orange", 3)); - assert_eq!(result[3], ("peach", 4)); - assert_eq!(result[4], ("cherry", 5)); - } - - #[test] - fn test_update() { - let mut hotspot = HotspotLru::new(1).unwrap(); - - hotspot.inc(&"apple", 2); - hotspot.inc(&"apple", 1); - - let result = hotspot.pop_all(); - assert_eq!(result.len(), 1); - assert_eq!(result[0], ("apple", 3)); - } - - #[test] - fn test_removes_oldest() { - let mut hotspot = HotspotLru::new(2).unwrap(); - - hotspot.inc(&"apple", 1); - hotspot.inc(&"banana", 1); - hotspot.inc(&"pear", 2); - - let result = hotspot.pop_all(); - assert_eq!(result.len(), 2); - assert_eq!(result[0], ("banana", 1)); - assert_eq!(result[1], ("pear", 2)); - } - - #[test] - fn test_send() { - use std::thread; - - let mut hotspot = HotspotLru::new(4).unwrap(); - hotspot.inc(&"apple", 2); - - let handle = thread::spawn(move || { - let result = hotspot.pop_all(); - assert_eq!(result.len(), 1); - assert_eq!(result[0], ("apple", 2)); - }); - - assert!(handle.join().is_ok()); - } -} diff --git a/src/proxy/src/http/mod.rs b/src/proxy/src/http/mod.rs deleted file mode 100644 index d5f038dc0a..0000000000 --- a/src/proxy/src/http/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod prom; -pub mod route; -pub mod sql; diff --git a/src/proxy/src/http/prom.rs b/src/proxy/src/http/prom.rs deleted file mode 100644 index 847da23d0a..0000000000 --- a/src/proxy/src/http/prom.rs +++ /dev/null @@ -1,688 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! This module implements prometheus remote storage API. -//! It converts write request to gRPC write request, and -//! translates query request to SQL for execution. - -use std::{collections::HashMap, result::Result as StdResult, time::Instant}; - -use async_trait::async_trait; -use catalog::consts::DEFAULT_CATALOG; -use common_types::{ - datum::DatumKind, - schema::{RecordSchema, TSID_COLUMN}, -}; -use generic_error::BoxError; -use horaedbproto::storage::{ - value, Field, FieldGroup, PrometheusRemoteQueryRequest, PrometheusRemoteQueryResponse, - RequestContext as GrpcRequestContext, Tag, Value, WriteRequest as GrpcWriteRequest, - WriteSeriesEntry, WriteTableRequest, -}; -use http::StatusCode; -use interpreters::{interpreter::Output, RecordBatchVec}; -use logger::{error, info}; -use prom_remote_api::types::{ - Label, LabelMatcher, Query, QueryResult, RemoteStorage, Sample, TimeSeries, WriteRequest, -}; -use prost::Message; -use query_frontend::{ - frontend::{Context, Frontend}, - promql::{RemoteQueryPlan, DEFAULT_FIELD_COLUMN, NAME_LABEL}, - provider::CatalogMetaProvider, -}; -use snafu::{ensure, OptionExt, ResultExt}; -use time_ext::InstantExt; -use warp::reject; - -use crate::{ - context::RequestContext, - error::{build_ok_header, ErrNoCause, ErrWithCause, Error, Internal, InternalNoCause, Result}, - forward::ForwardResult, - metrics::HTTP_HANDLER_COUNTER_VEC, - Context as ProxyContext, Proxy, -}; - -impl reject::Reject for Error {} - -impl Proxy { - /// Handle write samples to remote storage with remote storage protocol. - async fn handle_prom_remote_write(&self, ctx: RequestContext, req: WriteRequest) -> Result<()> { - let write_table_requests = convert_write_request(req)?; - let num_rows: usize = write_table_requests - .iter() - .map(|req| { - req.entries - .iter() - .map(|e| e.field_groups.len()) - .sum::() - }) - .sum(); - - let table_request = GrpcWriteRequest { - context: Some(GrpcRequestContext { - database: ctx.schema.clone(), - }), - table_requests: write_table_requests, - }; - let ctx = ProxyContext::new(ctx.timeout, None, ctx.authorization); - - match self.handle_write_internal(ctx, table_request).await { - Ok(result) => { - if result.failed != 0 { - HTTP_HANDLER_COUNTER_VEC.write_failed.inc(); - HTTP_HANDLER_COUNTER_VEC - .write_failed_row - .inc_by(result.failed as u64); - - ErrNoCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: format!("fail to write storage, failed rows:{:?}", result.failed), - } - .fail()?; - } - - Ok(()) - } - Err(e) => { - HTTP_HANDLER_COUNTER_VEC.write_failed.inc(); - HTTP_HANDLER_COUNTER_VEC - .write_failed_row - .inc_by(num_rows as u64); - - Err(e) - } - } - } - - /// Handle one query with remote storage protocol. - async fn handle_prom_remote_query( - &self, - ctx: &RequestContext, - metric: String, - query: Query, - ) -> Result { - let request_id = &ctx.request_id; - let begin_instant = Instant::now(); - let deadline = ctx.timeout.map(|t| begin_instant + t); - info!("Handle prom remote query begin, ctx:{ctx:?}, metric:{metric}, request:{query:?}"); - - // Open partition table if needed. - self.maybe_open_partition_table_if_not_exist(&ctx.catalog, &ctx.schema, &metric) - .await?; - - let provider = CatalogMetaProvider { - manager: self.instance.catalog_manager.clone(), - default_catalog: &ctx.catalog, - default_schema: &ctx.schema, - function_registry: &*self.instance.function_registry, - }; - let frontend = Frontend::new(provider, self.instance.dyn_config.fronted.clone()); - let plan_ctx = Context::new(request_id.clone(), deadline); - - let RemoteQueryPlan { - plan, - timestamp_col_name, - field_col_name, - } = frontend - .prom_remote_query_to_plan(&plan_ctx, query.clone()) - .box_err() - .with_context(|| ErrWithCause { - code: StatusCode::BAD_REQUEST, - msg: format!("Failed to build plan, query:{query:?}"), - })?; - - self.instance - .limiter - .try_limit(&plan) - .box_err() - .context(ErrWithCause { - code: StatusCode::INTERNAL_SERVER_ERROR, - msg: "Query is blocked", - })?; - let output = self - .execute_plan( - request_id.clone(), - &ctx.catalog, - &ctx.schema, - plan, - deadline, - ) - .await?; - - let cost = begin_instant.saturating_elapsed().as_millis(); - info!("Handle prom remote query successfully, ctx:{ctx:?}, cost:{cost}ms"); - - convert_query_result(metric, timestamp_col_name, field_col_name, output) - } - - /// This method is used to handle forwarded gRPC query from - /// another HoraeDB instance. - pub async fn handle_prom_grpc_query( - &self, - ctx: ProxyContext, - req: PrometheusRemoteQueryRequest, - ) -> Result { - let req_ctx = req.context.context(ErrNoCause { - code: StatusCode::BAD_REQUEST, - msg: "request context is missing", - })?; - let database = req_ctx.database.to_string(); - let query = Query::decode(req.query.as_ref()) - .box_err() - .context(Internal { - msg: "decode query failed", - })?; - let metric = find_metric(&query.matchers)?; - let builder = RequestContext::builder() - .timeout(ctx.timeout) - .authorization(ctx.authorization) - .schema(database) - // TODO: support different catalog - .catalog(DEFAULT_CATALOG.to_string()); - let ctx = builder.build().box_err().context(Internal { - msg: "build request context failed", - })?; - - self.handle_prom_remote_query(&ctx, metric, query) - .await - .map(|v| PrometheusRemoteQueryResponse { - header: Some(build_ok_header()), - response: v.encode_to_vec(), - }) - } -} - -#[async_trait] -impl RemoteStorage for Proxy { - type Context = RequestContext; - type Err = Error; - - async fn write(&self, ctx: Self::Context, req: WriteRequest) -> StdResult<(), Self::Err> { - self.handle_prom_remote_write(ctx, req).await - } - - async fn process_query( - &self, - ctx: &Self::Context, - query: Query, - ) -> StdResult { - HTTP_HANDLER_COUNTER_VEC.incoming_prom_query.inc(); - - let do_query = || async { - let metric = find_metric(&query.matchers)?; - let remote_req = PrometheusRemoteQueryRequest { - context: Some(horaedbproto::storage::RequestContext { - database: ctx.schema.to_string(), - }), - query: query.encode_to_vec(), - }; - if let Some(resp) = self - .maybe_forward_prom_remote_query(ctx, metric.clone(), remote_req) - .await - .map_err(|e| { - error!("Forward prom remote query failed, err:{e}"); - e - })? - { - match resp { - ForwardResult::Forwarded(resp) => { - return resp.and_then(|v| { - QueryResult::decode(v.response.as_ref()) - .box_err() - .context(Internal { - msg: "decode QueryResult failed", - }) - }); - } - ForwardResult::Local => {} - } - } - - self.handle_prom_remote_query(ctx, metric, query).await - }; - - match do_query().await { - Ok(v) => { - HTTP_HANDLER_COUNTER_VEC.prom_query_succeeded.inc(); - - Ok(v) - } - Err(e) => { - HTTP_HANDLER_COUNTER_VEC.prom_query_failed.inc(); - - error!("Prom remote query failed, err:{e}"); - Err(e) - } - } - } -} - -/// Converter converts Arrow's RecordBatch into Prometheus's QueryResult -struct Converter { - tsid_idx: usize, - timestamp_idx: usize, - value_idx: usize, - // (column_name, index) - tags: Vec<(String, usize)>, -} - -impl Converter { - fn try_new( - schema: &RecordSchema, - timestamp_col_name: &str, - field_col_name: &str, - ) -> Result { - let tsid_idx = schema.index_of(TSID_COLUMN).context(InternalNoCause { - msg: "TSID column is missing in query response", - })?; - let timestamp_idx = schema - .index_of(timestamp_col_name) - .context(InternalNoCause { - msg: "Timestamp column is missing in query response", - })?; - let value_idx = schema.index_of(field_col_name).context(InternalNoCause { - msg: "Value column is missing in query response", - })?; - let tags = schema - .columns() - .iter() - .enumerate() - .filter(|(_, col)| col.is_tag) - .map(|(i, col)| { - ensure!( - matches!(col.data_type, DatumKind::String), - InternalNoCause { - msg: format!("Tag must be string type, current:{}", col.data_type) - } - ); - - Ok((col.name.to_string(), i)) - }) - .collect::>>()?; - - ensure!( - matches!(schema.column(tsid_idx).data_type, DatumKind::UInt64), - InternalNoCause { - msg: format!( - "Tsid must be u64, current:{}", - schema.column(tsid_idx).data_type - ) - } - ); - ensure!( - schema.column(timestamp_idx).data_type.is_timestamp(), - InternalNoCause { - msg: format!( - "Timestamp wrong type, current:{}", - schema.column(timestamp_idx).data_type - ) - } - ); - ensure!( - schema.column(value_idx).data_type.is_f64_castable(), - InternalNoCause { - msg: format!( - "Value must be f64 compatible type, current:{}", - schema.column(value_idx).data_type - ) - } - ); - - Ok(Converter { - tsid_idx, - timestamp_idx, - value_idx, - tags, - }) - } - - fn convert(&self, metric: String, record_batches: RecordBatchVec) -> Result { - let mut series_by_tsid = HashMap::new(); - for batch in record_batches { - let tsid_col = batch.column(self.tsid_idx); - let timestamp_col = batch.column(self.timestamp_idx); - let value_col = batch.column(self.value_idx); - let tag_cols = self - .tags - .iter() - .map(|(_, idx)| batch.column(*idx)) - .collect::>(); - for row_idx in 0..batch.num_rows() { - let tsid = tsid_col.datum(row_idx).as_u64().context(ErrNoCause { - msg: "value should be non-nullable i64", - code: StatusCode::BAD_REQUEST, - })?; - let sample = Sample { - timestamp: timestamp_col - .datum(row_idx) - .as_timestamp() - .context(ErrNoCause { - msg: "timestamp should be non-nullable timestamp", - code: StatusCode::BAD_REQUEST, - })? - .as_i64(), - value: value_col.datum(row_idx).as_f64().context(ErrNoCause { - msg: "value should be non-nullable f64", - code: StatusCode::BAD_REQUEST, - })?, - }; - series_by_tsid - .entry(tsid) - .or_insert_with(|| { - let mut labels = self - .tags - .iter() - .enumerate() - .map(|(idx, (col_name, _))| { - let col_value = tag_cols[idx].datum(row_idx); - // for null tag value, use empty string instead - let col_value = col_value.as_str().unwrap_or_default(); - - Label { - name: col_name.to_string(), - value: col_value.to_string(), - } - }) - .collect::>(); - labels.push(Label { - name: NAME_LABEL.to_string(), - value: metric.clone(), - }); - - TimeSeries { - labels, - ..Default::default() - } - }) - .samples - .push(sample); - } - } - - Ok(QueryResult { - timeseries: series_by_tsid.into_values().collect(), - }) - } -} - -fn find_metric(matchers: &[LabelMatcher]) -> Result { - let idx = matchers - .iter() - .position(|m| m.name == NAME_LABEL) - .context(InternalNoCause { - msg: "Metric name is not found", - })?; - - Ok(matchers[idx].value.clone()) -} - -/// Separate metric from labels, and sort labels by name. -fn normalize_labels(mut labels: Vec