check chkeeper health manually because swarm doesn't support depends_on-healthy_condition

2026-03-12 09:44:06 +00:00 · 2024-11-28 22:15:27 -07:00
parent 3f15f225f4
commit 030659c3e1
10 changed files with 88 additions and 2 deletions
--- a/clickhouse/clickhouse_keeper/keeper1-config.xml
+++ b/clickhouse/clickhouse_keeper/keeper1-config.xml
@@ -18,6 +18,12 @@
        <server_id>1</server_id>
        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
        <http_control>
            <port>9182</port>
            <readiness>
                <endpoint>/ready</endpoint>
            </readiness>
        </http_control>
        <coordination_settings>
            <operation_timeout_ms>10000</operation_timeout_ms>
            <session_timeout_ms>30000</session_timeout_ms>
--- a/clickhouse/clickhouse_keeper/keeper2-config.xml
+++ b/clickhouse/clickhouse_keeper/keeper2-config.xml
@@ -18,6 +18,12 @@
        <server_id>2</server_id>
        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
        <http_control>
            <port>9182</port>
            <readiness>
                <endpoint>/ready</endpoint>
            </readiness>
        </http_control>
        <coordination_settings>
            <operation_timeout_ms>10000</operation_timeout_ms>
            <session_timeout_ms>30000</session_timeout_ms>
--- a/clickhouse/clickhouse_keeper/keeper3-config.xml
+++ b/clickhouse/clickhouse_keeper/keeper3-config.xml
@@ -18,6 +18,12 @@
        <server_id>3</server_id>
        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
        <http_control>
            <port>9182</port>
            <readiness>
                <endpoint>/ready</endpoint>
            </readiness>
        </http_control>
        <coordination_settings>
            <operation_timeout_ms>10000</operation_timeout_ms>
            <session_timeout_ms>30000</session_timeout_ms>
--- a/clickhouse/node-entrypoints/common/00_wait_for_keeper.sh
+++ b/clickhouse/node-entrypoints/common/00_wait_for_keeper.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 set -e
 keeper_hostnames=(
 	"clickhouse-keeper1"
 	"clickhouse-keeper2"
 	"clickhouse-keeper3"
 )
 keeper_healthy=(false false false)
 can_proceed=false
 while ! $can_proceed ; do
 	for keeper_idx in "${!keeper_hostnames[@]}"; do
 		if wget -q --tries=1 --spider "http://${keeper_hostnames[$keeper_idx]}:9182/ready" ; then
 			echo "keeper healthy"
 			keeper_healthy[$keeper_idx]=true
 		fi
 	done
 	can_proceed=true
 	for keeper_idx in "${!keeper_hostnames[@]}"; do
 		if ! ${keeper_healthy[$keeper_idx]} ; then
 			can_proceed=false
 			sleep 5
 			break
 		fi
 	done
 done
--- a/clickhouse/node-entrypoints/common/01_table_create.sql
+++ b/clickhouse/node-entrypoints/common/01_table_create.sql
--- a/clickhouse/node-entrypoints/main/00_wait_for_keeper.sh
+++ b/clickhouse/node-entrypoints/main/00_wait_for_keeper.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 set -e
 keeper_hostnames=(
 	"clickhouse-keeper1"
 	"clickhouse-keeper2"
 	"clickhouse-keeper3"
 )
 keeper_healthy=(false false false)
 can_proceed=false
 while ! $can_proceed ; do
 	for keeper_idx in "${!keeper_hostnames[@]}"; do
 		if wget -q --tries=1 --spider "http://${keeper_hostnames[$keeper_idx]}:9182/ready" ; then
 			echo "keeper healthy"
 			keeper_healthy[$keeper_idx]=true
 		fi
 	done
 	can_proceed=true
 	for keeper_idx in "${!keeper_hostnames[@]}"; do
 		if ! ${keeper_healthy[$keeper_idx]} ; then
 			can_proceed=false
 			sleep 5
 			break
 		fi
 	done
 done
--- a/clickhouse/node-entrypoints/main/01_table_create.sql
+++ b/clickhouse/node-entrypoints/main/01_table_create.sql
--- a/clickhouse/node-entrypoints/main/02_dist_table_create.sql
+++ b/clickhouse/node-entrypoints/main/02_dist_table_create.sql
--- a/clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql
+++ b/clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql
@@ -11,6 +11,7 @@ kafka_topic_list = 'traffic_records_stream',
 kafka_group_name = 'clickhouse_consumer',
 kafka_format = 'JSONEachRow',
 kafka_num_consumers = 1;
 CREATE MATERIALIZED VIEW traffic_records_kafka_view TO traffic_records_all AS
 SELECT time AS time_stamp,
 	l4_proto AS l4_protocol,
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,4 +1,13 @@
-# Full setup
+# End-to-end stack management
 ## Windows (Powershell)
 `deploy.ps1 -MasterNode` to deploy stack with current node as manager
 `deploy.ps1 -downStack` to bring down stack (run from manager node)
 ## Linux/macOS (Bash)
 `deploy.sh -M` and `deploy.sh -D` for the same
 Add `-S` if Docker requires `sudo` privileges