diff --git a/.env.example b/.env.example index 8c751a648..297ff0d30 100644 --- a/.env.example +++ b/.env.example @@ -1,120 +1,23 @@ -# Required by Postgre container -POSTGRE_DB_PATH="postgre/db/path" +ASSETS_ROCKS_DB_PATH="path/to/assets/db" -RUST_BACKTRACE=1 -# Ingester instance config -INGESTER_LOG_LEVEL=info +SLOTS_ROCKS_DB_PATH="path/to/slots/db" -INGESTER_DATABASE_CONFIG='{max_postgres_connections=10, url="postgres://user:pass@0.0.0.0:5432/database"}' -INGESTER_TCP_CONFIG='{receiver_addr="localhost:2000", receiver_reconnect_interval=5, snapshot_receiver_addr="localhost:5000"}' -INGESTER_REDIS_MESSENGER_CONFIG='{messenger_type="Redis", connection_config={redis_connection_str="redis://:pass@localhost:6379"}}' -INGESTER_MESSAGE_SOURCE=Redis #TCP or Redis +INGESTER_SECONDARY_SLOTS_ROCKS_DB_PATH="path/to/slots_secondary/db" +# each worker will have it's own secondary DB directory -INGESTER_ACCOUNTS_BUFFER_SIZE=250 -INGESTER_ACCOUNTS_PARSING_WORKERS=20 -INGESTER_TRANSACTIONS_PARSING_WORKERS=20 +SOLANA_RPC="http://localhost:8080" -INGESTER_SNAPSHOT_PARSING_WORKERS=1 -INGESTER_SNAPSHOT_PARSING_BATCH_SIZE=250 +BIG_TABLE_CREDENTIALS="path/to/creds" +BIG_TABLE_TIMEOUT=10 -INGESTER_GAPFILLER_PEER_ADDR="0.0.0.0" -INGESTER_METRICS_PORT=9091 -INGESTER_SERVER_PORT=9092 -INGESTER_PEER_GRPC_PORT=9099 +SLOT_PERSISTER_START_SLOT=10 +SLOT_PERSISTER_SLOTS=[1,2,3] +SLOT_PERSISTER_CHUNK_SIZE=1000 +SLOT_PERSISTER_MAX_CONCURRENCY=100 +SLOT_PERSISTER_METRICS_PORT=6090 -INGESTER_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" -INGESTER_ROCKS_DB_PATH="path/to/rocks/on/disk" +SLOT_CHECKER_SLOTS=[1,2,3] -INGESTER_ARCHIVES_DIR="path/to/rocks/backup/archives" -INGESTER_ROCKS_BACKUP_ARCHIVES_DIR="path/to/rocks/backup/archives" -INGESTER_ROCKS_BACKUP_DIR="path/to/rocks/backup/" - -INGESTER_BACKFILL_RPC_ADDRESS='https://rpc:port' -INGESTER_RPC_HOST='https://rpc:port' - -INGESTER_BACKFILLER_SOURCE_MODE=RPC #RPC or Bigtable -INGESTER_BIG_TABLE_CONFIG='{creds="/usr/src/app/creds.json", timeout=1000}' - -INGESTER_RUN_SEQUENCE_CONSISTENT_CHECKER=true -# Optional, required only if it needs to run fork cleaner, default is false. Unstable as it removes forked items, but also removes some valid leafs. Recommended to use only! for testing purposes. -INGESTER_RUN_FORK_CLEANER=false -INGESTER_RUN_BUBBLEGUM_BACKFILLER=true - -INGESTER_BACKFILLER_MODE=PersistAndIngest -INGESTER_SLOT_UNTIL=0 -INGESTER_SLOT_START_FROM=0 -INGESTER_WORKERS_COUNT=100 -INGESTER_CHUNK_SIZE=20 -INGESTER_PERMITTED_TASKS=1 -INGESTER_WAIT_PERIOD_SEC=30 -INGESTER_SHOULD_REINGEST=false - -INGESTER_PEER_GRPC_MAX_GAP_SLOTS=1000000 - -INGESTER_RUN_PROFILING=false -INGESTER_PROFILING_FILE_PATH_CONTAINER="/usr/src/profiling" -INGESTER_PROFILING_FILE_PATH="/path/to/profiling" - -INGESTER_FILE_STORAGE_PATH_CONTAINER="/usr/src/app/file_storage" -INGESTER_FILE_STORAGE_PATH="path/to/file/storage" -INGESTER_MIGRATION_STORAGE_PATH=/path/to/migration_storage - -INGESTER_ROCKS_FLUSH_BEFORE_BACKUP=false -INGESTER_ROCKS_INTERVAL_IN_SECONDS=3600 -INGESTER_ROCKS_SYNC_INTERVAL_SECONDS=2 - -INGESTER_SYNCHRONIZER_DUMP_PATH="/path/to/dump" - -# API instance config -API_LOG_LEVEL=info - -API_DATABASE_CONFIG='{max_postgres_connections=250, url="postgres://user:pass@0.0.0.0:5432/database"}' - -API_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" -API_ROCKS_DB_SECONDARY_PATH_CONTAINER="path/to/rocks/secondary/db" -API_ARCHIVES_DIR="path/to/rocks/backup/archives" - -API_PEER_GRPC_PORT=8991 -API_METRICS_PORT=8985 -API_SERVER_PORT=8990 - -API_RPC_HOST='https://rpc:port' - -API_ROCKS_SYNC_INTERVAL_SECONDS=2 -API_FILE_STORAGE_PATH_CONTAINER="/usr/src/app/file_storage" -API_FILE_STORAGE_PATH="path/to/file/storage" - -API_PEER_GRPC_MAX_GAP_SLOTS=1000000 -API_JSON_MIDDLEWARE_CONFIG='{is_enabled=true, max_urls_to_parse=10}' - -API_CONSISTENCE_SYNCHRONIZATION_API_THRESHOLD=1000000 -API_CONSISTENCE_BACKFILLING_SLOTS_THRESHOLD=500 - -# if set to true API will not check if tree where user requests assets from has any gaps -API_SKIP_CHECK_TREE_GAPS=false - -# Synchronizer instance config -SYNCHRONIZER_LOG_LEVEL=info - -SYNCHRONIZER_DATABASE_CONFIG='{max_postgres_connections=100, url="postgres://user:pass@0.0.0.0:5432/database"}' -SYNCHRONIZER_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" -SYNCHRONIZER_ROCKS_DB_SECONDARY_PATH_CONTAINER="path/to/rocks/secondary/db" - -SYNCHRONIZER_METRICS_PORT=6091 - -SYNCHRONIZER_DUMP_PATH="/path/to/migration_data" - -SYNCHRONIZER_DUMP_SYNCHRONIZER_BATCH_SIZE=10000 -SYNCHRONIZER_DUMP_SYNC_THRESHOLD=50000000 - -SYNCHRONIZER_PARALLEL_TASKS=30 - -# Profiling config -# Optional, required only if it needs to run memory profiling -MALLOC_CONF="prof:true,prof_leak:true,prof_final:true,prof_active:true,prof_prefix:/usr/src/app/heaps/,lg_prof_interval:32,lg_prof_sample:19" - -# Integrity verification -INTEGRITY_VERIFICATION_TEST_FILE_PATH="./test_keys/test_keys.txt" -INTEGRITY_VERIFICATION_TEST_FILE_PATH_CONTAINER="/test_keys/test_keys.txt" -INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH="./slots_collect" -INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH_CONTAINER="/slots_collect" \ No newline at end of file +BACKFILL_START_SLOT=10 +BACKFILL_WORKERS=50 +BACKFILL_SLOTS=[1,2,3] diff --git a/Cargo.toml b/Cargo.toml index ecb94d9a7..8ea950cb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ num-traits = "0.2.17" # Configuration, env-vars and cli parsing figment = { version = "0.10.6", features = ["env", "toml", "yaml"] } -clap = { version = "4.2.2", features = ["derive", "cargo"] } +clap = { version = "4.2.2", features = ["derive", "cargo", "env"] } dotenvy = "0.15.7" indicatif = "0.17" diff --git a/docker-compose.yaml b/docker-compose.yaml index e94ad889b..dae7d5155 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -97,51 +97,15 @@ services: options: max-size: "2048m" - raw-backfiller: - container_name: raw-backfiller - restart: always - entrypoint: sh -c "if [ -z '$$MALLOC_CONF' ]; then exec ./raw_backfiller; else exec ./profiling_raw_backfiller; fi" - env_file: - - .env - network_mode: host - volumes: - - ${INGESTER_ROCKS_DB_PATH}:${INGESTER_ROCKS_DB_PATH_CONTAINER}:rw - - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw - - ./creds.json:/usr/src/app/creds.json - - ./heaps:/usr/src/app/heaps:rw - stop_grace_period: 5m - build: - context: . - dockerfile: ingester.Dockerfile - logging: - options: - max-size: "2048m" - slot-persister: container_name: slot-persister restart: always - entrypoint: | - sh -c " - ARGS=\"--target-db-path $target_db_path\" - ARGS=\"$$ARGS --rpc-host $rpc_host\" - [ -n \"$start_slot\" ] && ARGS=\"$$ARGS --start-slot $start_slot\" - [ -n \"$big_table_credentials\" ] && ARGS=\"$$ARGS --big-table-credentials $big_table_credentials\" - [ -n \"$big_table_timeout\" ] && ARGS=\"$$ARGS --big-table-timeout $big_table_timeout\" - [ -n \"$metrics_port\" ] && ARGS=\"$$ARGS --metrics-port $metrics_port\" - [ -n \"$chunk_size\" ] && ARGS=\"$$ARGS --chunk-size $chunk_size\" - [ -n \"$max_concurrency\" ] && ARGS=\"$$ARGS --max-concurrency $max_concurrency\" - - if [ -z \"$MALLOC_CONF\" ]; then - exec ./slot_persister $$ARGS - else - exec ./profiling_slot_persister $$ARGS - fi" + entrypoint: "./slot_persister" env_file: - .env network_mode: host volumes: - - ${target_db_path}:${target_db_path}:rw - - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw + - ${TARGET_DB_PATH}:${TARGET_DB_PATH}:rw - ${big_table_credentials:-/tmp/creds.json}:${big_table_credentials:-/tmp/creds.json} - ./heaps:/usr/src/app/heaps:rw stop_grace_period: 5m @@ -152,24 +116,6 @@ services: options: max-size: "2048m" - core-indexing: - container_name: core-indexing - restart: always - entrypoint: sh -c "if [ -z '$$MALLOC_CONF' ]; then exec ./core_indexing; else exec ./profiling_core_indexing; fi" - env_file: - - .env - network_mode: host - volumes: - - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw - - ./heaps:/usr/src/app/heaps:rw - stop_grace_period: 5m - build: - context: . - dockerfile: ingester.Dockerfile - logging: - options: - max-size: "2048m" - db: container_name: db image: 'postgres:14' diff --git a/ingester.Dockerfile b/ingester.Dockerfile index 249ee2da7..53482b301 100644 --- a/ingester.Dockerfile +++ b/ingester.Dockerfile @@ -36,12 +36,12 @@ RUN cargo chef cook --release --recipe-path recipe.json # Building the services FROM cacher AS builder COPY . . -RUN cargo build --release --bin ingester --bin api --bin raw_backfiller --bin synchronizer --bin slot_persister +RUN cargo build --release --bin ingester --bin api --bin synchronizer --bin slot_persister # Building the profiling feature services FROM cacher AS builder-with-profiling COPY . . -RUN cargo build --release --features profiling --bin ingester --bin api --bin raw_backfiller --bin synchronizer --bin slot_persister +RUN cargo build --release --features profiling --bin ingester --bin api --bin synchronizer # Final image FROM rust:1.76-slim-bullseye AS runtime @@ -52,15 +52,12 @@ ENV TZ=Etc/UTC APP_USER=appuser LD_PRELOAD="/usr/local/lib/libjemalloc.so.2" RUN groupadd $APP_USER && useradd -g $APP_USER $APP_USER && mkdir -p ${APP} COPY --from=builder /rust/target/release/ingester ${APP}/ingester -COPY --from=builder /rust/target/release/raw_backfiller ${APP}/raw_backfiller COPY --from=builder /rust/target/release/api ${APP}/api COPY --from=builder /rust/target/release/synchronizer ${APP}/synchronizer COPY --from=builder /rust/target/release/slot_persister ${APP}/slot_persister COPY --from=builder-with-profiling /rust/target/release/ingester ${APP}/profiling_ingester -COPY --from=builder-with-profiling /rust/target/release/raw_backfiller ${APP}/profiling_raw_backfiller COPY --from=builder-with-profiling /rust/target/release/api ${APP}/profiling_api COPY --from=builder-with-profiling /rust/target/release/synchronizer ${APP}/profiling_synchronizer -COPY --from=builder-with-profiling /rust/target/release/slot_persister ${APP}/profiling_slot_persister WORKDIR ${APP} STOPSIGNAL SIGINT \ No newline at end of file diff --git a/nft_ingester/src/bin/slot_persister/main.rs b/nft_ingester/src/bin/slot_persister/main.rs index a0913b913..44b3afb1d 100644 --- a/nft_ingester/src/bin/slot_persister/main.rs +++ b/nft_ingester/src/bin/slot_persister/main.rs @@ -41,40 +41,40 @@ const SLOT_COLLECTION_OFFSET: u64 = 300; )] struct Args { /// Path to the target RocksDB instance with slots - #[arg(short, long)] + #[arg(short, long, env="ASSETS_ROCKS_DB_PATH")] target_db_path: PathBuf, /// RPC host - #[arg(short, long)] + #[arg(short, long, env="SOLANA_RPC")] rpc_host: String, /// Optional starting slot number, this will override the last saved slot in the RocksDB - #[arg(short, long)] + #[arg(short, long, env="SLOT_PERSISTER_START_SLOT")] start_slot: Option, /// Big table credentials file path - #[arg(short, long)] + #[arg(short, long, env="BIG_TABLE_CREDENTIALS")] big_table_credentials: Option, /// Optional big table timeout (default: 1000) - #[arg(short = 'B', long, default_value_t = 1000)] + #[arg(short = 'B', long, env="BIG_TABLE_TIMEOUT", default_value_t = 1000)] big_table_timeout: u32, /// Metrics port /// Default: 9090 - #[arg(short, long, default_value = "9090")] + #[arg(short, long, env="SLOT_PERSISTER_METRICS_PORT", default_value = "9090")] metrics_port: u16, /// Number of slots to process in each batch - #[arg(short, long, default_value_t = 200)] + #[arg(short, long, env="SLOT_PERSISTER_CHUNK_SIZE", default_value_t = 200)] chunk_size: usize, /// Maximum number of concurrent requests - #[arg(short = 'M', long, default_value_t = 20)] + #[arg(short = 'M', long, env="SLOT_PERSISTER_MAX_CONCURRENCY", default_value_t = 20)] max_concurrency: usize, /// Optional comma-separated list of slot numbers to check - #[arg(long)] + #[arg(long, env="SLOT_PERSISTER_SLOTS")] slots: Option, } pub struct InMemorySlotsDumper {