From 84e501cf4b49ba5e5fd45c5882a1d91bcab74f27 Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Thu, 28 Nov 2024 17:51:53 -0700 Subject: [PATCH 1/5] attempting swarm mode with linux host --- clickhouse/docker-compose.yaml | 17 ++++++++++---- preprocessing/docker-compose.yml | 6 +++++ scripts/deploy.sh | 39 +++++++++++++++++--------------- ui/docker-compose.yaml | 3 +++ 4 files changed, 43 insertions(+), 22 deletions(-) mode change 100644 => 100755 scripts/deploy.sh diff --git a/clickhouse/docker-compose.yaml b/clickhouse/docker-compose.yaml index 4ba1a62..8c17f35 100644 --- a/clickhouse/docker-compose.yaml +++ b/clickhouse/docker-compose.yaml @@ -11,6 +11,9 @@ services: clickhouse-keeper-network: aliases: - clickhouse-keeper1 + deploy: + placement: + constraints: [node.labels.main == true] clickhouse-keeper2: image: clickhouse/clickhouse-server:latest @@ -23,6 +26,9 @@ services: clickhouse-keeper-network: aliases: - clickhouse-keeper2 + deploy: + placement: + constraints: [node.labels.main == true] clickhouse-keeper3: image: clickhouse/clickhouse-server:latest @@ -35,6 +41,9 @@ services: clickhouse-keeper-network: aliases: - clickhouse-keeper3 + deploy: + placement: + constraints: [node.labels.main == true] clickhouse-server1: image: clickhouse/clickhouse-server:latest @@ -56,8 +65,8 @@ services: - clickhouse-server1 deploy: replicas: 1 - # placement: - # constraints: [node.labels.role == server] + placement: + constraints: [node.labels.main == true] update_config: delay: 10s resources: @@ -90,8 +99,8 @@ services: - clickhouse-server2 deploy: replicas: 1 - # placement: - # constraints: [node.labels.role == server] + placement: + constraints: [node.labels.main == true] update_config: delay: 10s resources: diff --git a/preprocessing/docker-compose.yml b/preprocessing/docker-compose.yml index a587882..634b231 100644 --- a/preprocessing/docker-compose.yml +++ b/preprocessing/docker-compose.yml @@ -7,6 +7,8 @@ services: - zookeeper deploy: replicas: 1 + placement: + constraints: [node.labels.worker == true] restart_policy: condition: on-failure environment: @@ -40,6 +42,8 @@ services: - kafka_data:/var/lib/kafka/data deploy: replicas: 1 + placement: + constraints: [node.labels.worker == true] restart_policy: condition: on-failure @@ -56,6 +60,8 @@ services: command: "sh -c 'sleep 30 && python /app/pcap_processor.py -c /data/csv/main.csv -x --stream_size 100000'" deploy: replicas: 1 + placement: + constraints: [node.labels.worker == true] restart_policy: condition: on-failure diff --git a/scripts/deploy.sh b/scripts/deploy.sh old mode 100644 new mode 100755 index d1cc529..2ce11d8 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -1,7 +1,8 @@ #!/bin/bash -while getopts "M:D:T:A" flag; do +while getopts "SMDT:A" flag; do case "${flag}" in + S) sudoRequired=true ;; M) masterNode=true ;; D) downStack=true ;; T) swarmToken=$OPTARG ;; @@ -9,34 +10,36 @@ while getopts "M:D:T:A" flag; do esac done -echo "masterNode: $masterNode" -echo "downStack: $downStack" -echo "swarmToken: $swarmToken" -echo "managerAddr: $managerAddr" - -$scriptDir = $(readlink -f "$0") +scriptDir=$(dirname $(readlink -f "$0")) # echo $scriptDir # ===> /Project/scripts -$stackName="TheWebFarm" +stackName="TheWebFarm" + +dockerCmd="docker" +if [[ $sudoRequired ]]; then + dockerCmd="sudo docker" +fi if [[ $downStack ]]; then echo "[+] Removing stack..." - docker stack rm $stackName - docker service rm registry + echo "$dockerCmd stack rm $stackName" + $dockerCmd stack rm $stackName + $dockerCmd service rm registry sleep 20 - docker volume rm $(docker volume ls --filter name=$stackName -q) -elif ($MasterNode); then + $dockerCmd volume rm $($dockerCmd volume ls --filter name=$stackName -q) +elif ($masterNode); then echo "[+] swarm master" - + $dockerCmd swarm init + # data streaming cd $scriptDir/../preprocessing - docker service create --name registry -p 5000:5000 registry:2 - # docker build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python . - docker build -t 127.0.0.1:5000/data-streamer:latest --push -f Dockerfile.python . + $dockerCmd service create --name registry -p 5000:5000 registry:2 + $dockerCmd build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python . + # $dockerCmd build -t 127.0.0.1:5000/data-streamer:latest --push -f Dockerfile.python . # execute cd $scriptDir - docker stack deploy -d \ + $dockerCmd stack deploy -d \ -c ../preprocessing/docker-compose.yml \ -c ../clickhouse/docker-compose.yaml \ -c ../ui/docker-compose.yaml \ @@ -49,5 +52,5 @@ elif ($MasterNode); then else echo "[+] swarm follower" echo "[+] joining swarm with token $swarmToken" - docker swarm join --token $swarmToken $managerAddr + $dockerCmd swarm join --token $swarmToken $managerAddr fi diff --git a/ui/docker-compose.yaml b/ui/docker-compose.yaml index 3698694..7ac73c8 100644 --- a/ui/docker-compose.yaml +++ b/ui/docker-compose.yaml @@ -14,6 +14,9 @@ services: clickhouse-server-network: aliases: - grafana + deploy: + placement: + constraints: [node.labels.worker == true] depends_on: - clickhouse-server1 environment: From 8b04cbdadb41451bbb45149e91232ec6c379c820 Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Thu, 28 Nov 2024 18:39:04 -0700 Subject: [PATCH 2/5] back to ui, WiP --- clickhouse/docker-compose.yaml | 33 ++++-- preprocessing/docker-compose.yml | 12 +- scripts/deploy.sh | 4 +- ui/dashboard_1.json | 183 ++++++------------------------- ui/docker-compose.yaml | 7 +- 5 files changed, 71 insertions(+), 168 deletions(-) diff --git a/clickhouse/docker-compose.yaml b/clickhouse/docker-compose.yaml index 8c17f35..d0fd34e 100644 --- a/clickhouse/docker-compose.yaml +++ b/clickhouse/docker-compose.yaml @@ -12,8 +12,11 @@ services: aliases: - clickhouse-keeper1 deploy: - placement: - constraints: [node.labels.main == true] + replicas: 1 + # placement: + # constraints: [node.labels.main == true] + restart_policy: + condition: on-failure clickhouse-keeper2: image: clickhouse/clickhouse-server:latest @@ -27,8 +30,11 @@ services: aliases: - clickhouse-keeper2 deploy: - placement: - constraints: [node.labels.main == true] + replicas: 1 + # placement: + # constraints: [node.labels.main == true] + restart_policy: + condition: on-failure clickhouse-keeper3: image: clickhouse/clickhouse-server:latest @@ -42,8 +48,11 @@ services: aliases: - clickhouse-keeper3 deploy: - placement: - constraints: [node.labels.main == true] + replicas: 1 + # placement: + # constraints: [node.labels.main == true] + restart_policy: + condition: on-failure clickhouse-server1: image: clickhouse/clickhouse-server:latest @@ -65,8 +74,10 @@ services: - clickhouse-server1 deploy: replicas: 1 - placement: - constraints: [node.labels.main == true] + # placement: + # constraints: [node.labels.main == true] + restart_policy: + condition: on-failure update_config: delay: 10s resources: @@ -99,8 +110,10 @@ services: - clickhouse-server2 deploy: replicas: 1 - placement: - constraints: [node.labels.main == true] + # placement: + # constraints: [node.labels.main == true] + restart_policy: + condition: on-failure update_config: delay: 10s resources: diff --git a/preprocessing/docker-compose.yml b/preprocessing/docker-compose.yml index 634b231..0973767 100644 --- a/preprocessing/docker-compose.yml +++ b/preprocessing/docker-compose.yml @@ -7,8 +7,8 @@ services: - zookeeper deploy: replicas: 1 - placement: - constraints: [node.labels.worker == true] + # placement: + # constraints: [node.labels.worker == true] restart_policy: condition: on-failure environment: @@ -42,8 +42,8 @@ services: - kafka_data:/var/lib/kafka/data deploy: replicas: 1 - placement: - constraints: [node.labels.worker == true] + # placement: + # constraints: [node.labels.worker == true] restart_policy: condition: on-failure @@ -60,8 +60,8 @@ services: command: "sh -c 'sleep 30 && python /app/pcap_processor.py -c /data/csv/main.csv -x --stream_size 100000'" deploy: replicas: 1 - placement: - constraints: [node.labels.worker == true] + # placement: + # constraints: [node.labels.worker == true] restart_policy: condition: on-failure diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 2ce11d8..3409b90 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -34,8 +34,8 @@ elif ($masterNode); then # data streaming cd $scriptDir/../preprocessing $dockerCmd service create --name registry -p 5000:5000 registry:2 - $dockerCmd build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python . - # $dockerCmd build -t 127.0.0.1:5000/data-streamer:latest --push -f Dockerfile.python . + # $dockerCmd build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python . + $dockerCmd build -t 127.0.0.1:5000/data-streamer:latest --push -f Dockerfile.python . # execute cd $scriptDir diff --git a/ui/dashboard_1.json b/ui/dashboard_1.json index 79d6e5c..97e5ba7 100644 --- a/ui/dashboard_1.json +++ b/ui/dashboard_1.json @@ -154,14 +154,17 @@ "legend": { "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": true, + "values": [ + "percent" + ] }, "pieType": "pie", "reduceOptions": { "calcs": [ "lastNotNull" ], - "fields": "/^ProtocolCount$/", + "fields": "/^Protocol frequency$/", "values": true }, "tooltip": { @@ -190,11 +193,11 @@ }, "pluginVersion": "4.5.1", "queryType": "table", - "rawSql": "SELECT\r\n l4_protocol as Protocol,\r\n COUNT(Protocol) as ProtocolCount\r\n FROM traffic_records_all\r\n GROUP BY Protocol", + "rawSql": "SELECT\r\n l4_protocol as Protocol,\r\n COUNT(Protocol) as \"Protocol frequency\"\r\n FROM traffic_records_all\r\n GROUP BY Protocol", "refId": "A" } ], - "title": "Distribution of L4 protocol", + "title": "Distribution of L4 protocol (frequency)", "type": "piechart" }, { @@ -287,11 +290,11 @@ }, "pluginVersion": "4.5.1", "queryType": "table", - "rawSql": "SELECT \r\n Port, \r\n SourcePortCount, \r\n DestPortCount\r\nFROM\r\n(\r\n SELECT \r\n src_port AS Port, \r\n COUNT(*) AS SourcePortCount\r\n FROM traffic_records_all\r\n GROUP BY src_port\r\n ORDER BY SourcePortCount DESC\r\n LIMIT 40\r\n) AS src\r\nINNER JOIN\r\n(\r\n SELECT \r\n dst_port AS Port, \r\n COUNT(*) AS DestPortCount\r\n FROM traffic_records_all\r\n GROUP BY dst_port\r\n ORDER BY DestPortCount DESC\r\n LIMIT 40\r\n) AS dst\r\nUSING (Port)\r\nORDER BY (SourcePortCount + DestPortCount) DESC\r\nLIMIT 40;\r\n", + "rawSql": "SELECT \r\n Port, \r\n SourcePortCount AS \"Source port frequency\",\r\n DestPortCount AS \"Destination port frequency\"\r\nFROM\r\n(\r\n SELECT \r\n src_port AS Port, \r\n COUNT(*) AS SourcePortCount\r\n FROM traffic_records_all\r\n GROUP BY src_port\r\n ORDER BY SourcePortCount DESC\r\n LIMIT 40\r\n) AS src\r\nINNER JOIN\r\n(\r\n SELECT \r\n dst_port AS Port, \r\n COUNT(*) AS DestPortCount\r\n FROM traffic_records_all\r\n GROUP BY dst_port\r\n ORDER BY DestPortCount DESC\r\n LIMIT 40\r\n) AS dst\r\nUSING (Port)\r\nORDER BY (SourcePortCount + DestPortCount) DESC\r\nLIMIT 40;\r\n", "refId": "A" } ], - "title": "Top ports (by count)", + "title": "Top ports (frequency)", "type": "barchart" }, { @@ -305,67 +308,48 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } + "mappings": [] }, "overrides": [] }, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 16 + "w": 6, + "x": 18, + "y": 8 }, - "id": 2, + "id": 6, "options": { - "barRadius": 0, - "barWidth": 0.97, - "fullHighlight": false, - "groupWidth": 0.7, + "displayLabels": [ + "percent", + "name" + ], "legend": { - "calcs": [], "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Protocol bandwidth$/", + "values": true }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", "tooltip": { "mode": "single", "sort": "none" - }, - "xField": "SourcePort", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 100 + } }, "pluginVersion": "11.3.1", "targets": [ @@ -384,109 +368,12 @@ }, "pluginVersion": "4.5.1", "queryType": "table", - "rawSql": "SELECT\r\n src_port as SourcePort,\r\n COUNT(SourcePort) as SourcePortCount\r\n FROM traffic_records_all\r\n GROUP BY SourcePort\r\n ORDER BY SourcePortCount DESC\r\n LIMIT 10", + "rawSql": "SELECT\n l4_protocol as Protocol,\n SUM(pkt_len)/1024.0/1024.0 as \"Protocol bandwidth\"\n FROM traffic_records_all\n GROUP BY Protocol", "refId": "A" } ], - "title": "Top 10 source ports (by count)", - "type": "barchart" - }, - { - "datasource": { - "type": "grafana-clickhouse-datasource", - "uid": "PDEE91DDB90597936" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1, - "scaleDistribution": { - "type": "linear" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 3, - "options": { - "barRadius": 0, - "barWidth": 0.97, - "fullHighlight": false, - "groupWidth": 0.7, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "orientation": "horizontal", - "showValue": "auto", - "stacking": "none", - "tooltip": { - "mode": "single", - "sort": "none" - }, - "xField": "DestPort", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 100 - }, - "pluginVersion": "11.3.1", - "targets": [ - { - "editorType": "sql", - "format": 1, - "meta": { - "builderOptions": { - "columns": [], - "database": "", - "limit": 1000, - "mode": "list", - "queryType": "table", - "table": "" - } - }, - "pluginVersion": "4.5.1", - "queryType": "table", - "rawSql": "SELECT\r\n dst_port as DestPort,\r\n COUNT(DestPort) as DestPortCount\r\n FROM traffic_records_all\r\n GROUP BY DestPort\r\n ORDER BY DestPortCount DESC\r\n LIMIT 10", - "refId": "A" - } - ], - "title": "Top 10 destination ports (by count)", - "type": "barchart" + "title": "Distribution of L4 protocol (bandwidth)", + "type": "piechart" } ], "preload": false, @@ -503,6 +390,6 @@ "timezone": "browser", "title": "Internet traffic capture analysis", "uid": "be59fkbp3zs3kc", - "version": 11, + "version": 1, "weekStart": "" } \ No newline at end of file diff --git a/ui/docker-compose.yaml b/ui/docker-compose.yaml index 7ac73c8..ce6f69e 100644 --- a/ui/docker-compose.yaml +++ b/ui/docker-compose.yaml @@ -15,8 +15,11 @@ services: aliases: - grafana deploy: - placement: - constraints: [node.labels.worker == true] + replicas: 1 + # placement: + # constraints: [node.labels.worker == true] + restart_policy: + condition: on-failure depends_on: - clickhouse-server1 environment: From 3f15f225f47a7bf2e177c29251d75bf61b544f66 Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Thu, 28 Nov 2024 22:11:32 -0700 Subject: [PATCH 3/5] load geoip data from csv --- .../data/preprocessed_configs/config.xml | 61 ------------------- clickhouse/dml/test_seed_traffic.sql | 36 ----------- clickhouse/docker-compose.yaml | 5 +- .../main/03_insert_geoip_csv.sql | 3 + preprocessing/geoip.csv | 2 +- 5 files changed, 7 insertions(+), 100 deletions(-) delete mode 100644 clickhouse/clickhouse_data/data/preprocessed_configs/config.xml delete mode 100644 clickhouse/dml/test_seed_traffic.sql create mode 100644 clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql diff --git a/clickhouse/clickhouse_data/data/preprocessed_configs/config.xml b/clickhouse/clickhouse_data/data/preprocessed_configs/config.xml deleted file mode 100644 index 08de94b..0000000 --- a/clickhouse/clickhouse_data/data/preprocessed_configs/config.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - - trace - /var/log/clickhouse-keeper/clickhouse-keeper.log - /var/log/clickhouse-keeper/clickhouse-keeper.err.log - 1000M - 3 - - :: - - /var/lib/clickhouse/data/ - /var/lib/clickhouse/tmp/ - /var/lib/clickhouse/user_files/ - /var/lib/clickhouse/format_schemas/ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - 10000 - 30000 - trace - - - - 1 - clickhouse-keeper1 - 9234 - - - 2 - clickhouse-keeper2 - 9234 - - - 3 - clickhouse-keeper3 - 9234 - - - - - - - 0.0.0.0 - 1 - - - diff --git a/clickhouse/dml/test_seed_traffic.sql b/clickhouse/dml/test_seed_traffic.sql deleted file mode 100644 index 2399221..0000000 --- a/clickhouse/dml/test_seed_traffic.sql +++ /dev/null @@ -1,36 +0,0 @@ -INSERT INTO - traffic_records_all -VALUES - ( - '1698728400.40122', - 'UDP', - '142.12.217.111', - '163.213.146.100', - 443, - 47104, - 74 - ) ( - '1698728400.401217', - 'UDP', - '45.144.255.42', - '131.174.60.217', - 51820, - 63998, - 42 - ) ( - '1698728400.401218', - 'TCP', - '152.199.153.111', - '202.215.192.69', - 80, - 65305, - 66 - ) ( - '1698728400.401219', - 'UDP', - '45.144.255.42', - '131.174.60.217', - 51820, - 63998, - 42 - ) \ No newline at end of file diff --git a/clickhouse/docker-compose.yaml b/clickhouse/docker-compose.yaml index d0fd34e..dac8f4b 100644 --- a/clickhouse/docker-compose.yaml +++ b/clickhouse/docker-compose.yaml @@ -59,7 +59,8 @@ services: container_name: clickhouse-server1 volumes: - ../clickhouse/node1-config/:/etc/clickhouse-server/config.d/ - - ../clickhouse/ddl/main:/docker-entrypoint-initdb.d + - ../clickhouse/node-entrypoints/main:/docker-entrypoint-initdb.d + - ../preprocessing/geoip.csv:/tmp/seedData/csv/ip_region_map.csv - clickhouse_server1_data:/var/lib/clickhouse - clickhouse_server1_TTL:/clickhouse_data/server1 networks: @@ -98,7 +99,7 @@ services: container_name: clickhouse-server2 volumes: - ../clickhouse/node2-config/:/etc/clickhouse-server/config.d/ - - ../clickhouse/ddl/common:/docker-entrypoint-initdb.d + - ../clickhouse/node-entrypoints/common:/docker-entrypoint-initdb.d - clickhouse_server2_data:/var/lib/clickhouse - clickhouse_server2_TTL:/clickhouse_data/server2 networks: diff --git a/clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql b/clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql new file mode 100644 index 0000000..9a1ea5c --- /dev/null +++ b/clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql @@ -0,0 +1,3 @@ +INSERT INTO ip_region_map +FROM INFILE '/tmp/seedData/csv/ip_region_map.csv' +FORMAT CSVWithNames; \ No newline at end of file diff --git a/preprocessing/geoip.csv b/preprocessing/geoip.csv index db087c0..305b5d4 100644 --- a/preprocessing/geoip.csv +++ b/preprocessing/geoip.csv @@ -1,4 +1,4 @@ -ip_range_start,ip_range_end,country +ip_range_start,ip_range_end,region 0.0.0.0,0.255.255.255,- 1.0.0.0,1.0.0.255,Australia 1.0.1.0,1.0.3.255,China From 030659c3e10b38f136cfeb0534d13747bc8c7d1f Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Thu, 28 Nov 2024 22:15:27 -0700 Subject: [PATCH 4/5] check chkeeper health manually because swarm doesn't support depends_on-healthy_condition --- .../clickhouse_keeper/keeper1-config.xml | 6 ++++ .../clickhouse_keeper/keeper2-config.xml | 6 ++++ .../clickhouse_keeper/keeper3-config.xml | 6 ++++ .../common/00_wait_for_keeper.sh | 29 +++++++++++++++++++ .../common/01_table_create.sql | 0 .../main/00_wait_for_keeper.sh | 29 +++++++++++++++++++ .../main/01_table_create.sql | 0 .../main/02_dist_table_create.sql | 0 .../main/04_kafka_table_ingest.sql} | 3 +- scripts/README.md | 11 ++++++- 10 files changed, 88 insertions(+), 2 deletions(-) create mode 100755 clickhouse/node-entrypoints/common/00_wait_for_keeper.sh rename clickhouse/{ddl => node-entrypoints}/common/01_table_create.sql (100%) create mode 100755 clickhouse/node-entrypoints/main/00_wait_for_keeper.sh rename clickhouse/{ddl => node-entrypoints}/main/01_table_create.sql (100%) rename clickhouse/{ddl => node-entrypoints}/main/02_dist_table_create.sql (100%) rename clickhouse/{ddl/main/03_create_kafka_table.sql => node-entrypoints/main/04_kafka_table_ingest.sql} (94%) diff --git a/clickhouse/clickhouse_keeper/keeper1-config.xml b/clickhouse/clickhouse_keeper/keeper1-config.xml index c5e5d4d..976fa3e 100644 --- a/clickhouse/clickhouse_keeper/keeper1-config.xml +++ b/clickhouse/clickhouse_keeper/keeper1-config.xml @@ -18,6 +18,12 @@ 1 /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots + + 9182 + + /ready + + 10000 30000 diff --git a/clickhouse/clickhouse_keeper/keeper2-config.xml b/clickhouse/clickhouse_keeper/keeper2-config.xml index bd2914e..cc78c0b 100644 --- a/clickhouse/clickhouse_keeper/keeper2-config.xml +++ b/clickhouse/clickhouse_keeper/keeper2-config.xml @@ -18,6 +18,12 @@ 2 /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots + + 9182 + + /ready + + 10000 30000 diff --git a/clickhouse/clickhouse_keeper/keeper3-config.xml b/clickhouse/clickhouse_keeper/keeper3-config.xml index 383f9d9..37411e7 100644 --- a/clickhouse/clickhouse_keeper/keeper3-config.xml +++ b/clickhouse/clickhouse_keeper/keeper3-config.xml @@ -18,6 +18,12 @@ 3 /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots + + 9182 + + /ready + + 10000 30000 diff --git a/clickhouse/node-entrypoints/common/00_wait_for_keeper.sh b/clickhouse/node-entrypoints/common/00_wait_for_keeper.sh new file mode 100755 index 0000000..a229651 --- /dev/null +++ b/clickhouse/node-entrypoints/common/00_wait_for_keeper.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +keeper_hostnames=( + "clickhouse-keeper1" + "clickhouse-keeper2" + "clickhouse-keeper3" +) +keeper_healthy=(false false false) + +can_proceed=false + +while ! $can_proceed ; do + for keeper_idx in "${!keeper_hostnames[@]}"; do + if wget -q --tries=1 --spider "http://${keeper_hostnames[$keeper_idx]}:9182/ready" ; then + echo "keeper healthy" + keeper_healthy[$keeper_idx]=true + fi + done + can_proceed=true + for keeper_idx in "${!keeper_hostnames[@]}"; do + if ! ${keeper_healthy[$keeper_idx]} ; then + can_proceed=false + sleep 5 + break + fi + done +done diff --git a/clickhouse/ddl/common/01_table_create.sql b/clickhouse/node-entrypoints/common/01_table_create.sql similarity index 100% rename from clickhouse/ddl/common/01_table_create.sql rename to clickhouse/node-entrypoints/common/01_table_create.sql diff --git a/clickhouse/node-entrypoints/main/00_wait_for_keeper.sh b/clickhouse/node-entrypoints/main/00_wait_for_keeper.sh new file mode 100755 index 0000000..a229651 --- /dev/null +++ b/clickhouse/node-entrypoints/main/00_wait_for_keeper.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +keeper_hostnames=( + "clickhouse-keeper1" + "clickhouse-keeper2" + "clickhouse-keeper3" +) +keeper_healthy=(false false false) + +can_proceed=false + +while ! $can_proceed ; do + for keeper_idx in "${!keeper_hostnames[@]}"; do + if wget -q --tries=1 --spider "http://${keeper_hostnames[$keeper_idx]}:9182/ready" ; then + echo "keeper healthy" + keeper_healthy[$keeper_idx]=true + fi + done + can_proceed=true + for keeper_idx in "${!keeper_hostnames[@]}"; do + if ! ${keeper_healthy[$keeper_idx]} ; then + can_proceed=false + sleep 5 + break + fi + done +done diff --git a/clickhouse/ddl/main/01_table_create.sql b/clickhouse/node-entrypoints/main/01_table_create.sql similarity index 100% rename from clickhouse/ddl/main/01_table_create.sql rename to clickhouse/node-entrypoints/main/01_table_create.sql diff --git a/clickhouse/ddl/main/02_dist_table_create.sql b/clickhouse/node-entrypoints/main/02_dist_table_create.sql similarity index 100% rename from clickhouse/ddl/main/02_dist_table_create.sql rename to clickhouse/node-entrypoints/main/02_dist_table_create.sql diff --git a/clickhouse/ddl/main/03_create_kafka_table.sql b/clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql similarity index 94% rename from clickhouse/ddl/main/03_create_kafka_table.sql rename to clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql index b320fe0..f6e2e14 100644 --- a/clickhouse/ddl/main/03_create_kafka_table.sql +++ b/clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql @@ -11,6 +11,7 @@ kafka_topic_list = 'traffic_records_stream', kafka_group_name = 'clickhouse_consumer', kafka_format = 'JSONEachRow', kafka_num_consumers = 1; + CREATE MATERIALIZED VIEW traffic_records_kafka_view TO traffic_records_all AS SELECT time AS time_stamp, l4_proto AS l4_protocol, @@ -19,4 +20,4 @@ SELECT time AS time_stamp, src_port, dst_port, pkt_len -FROM traffic_records_kafka_queue; \ No newline at end of file +FROM traffic_records_kafka_queue; diff --git a/scripts/README.md b/scripts/README.md index e28e606..1aa037a 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,4 +1,13 @@ -# Full setup +# End-to-end stack management + +## Windows (Powershell) `deploy.ps1 -MasterNode` to deploy stack with current node as manager + `deploy.ps1 -downStack` to bring down stack (run from manager node) + +## Linux/macOS (Bash) + +`deploy.sh -M` and `deploy.sh -D` for the same + +Add `-S` if Docker requires `sudo` privileges From fe7f2e570d44b7847b2d363887de851df267059b Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Thu, 28 Nov 2024 22:19:05 -0700 Subject: [PATCH 5/5] stream slow option --- preprocessing/docker-compose.yml | 2 +- preprocessing/pcap_processor.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/preprocessing/docker-compose.yml b/preprocessing/docker-compose.yml index 0973767..98bf52c 100644 --- a/preprocessing/docker-compose.yml +++ b/preprocessing/docker-compose.yml @@ -57,7 +57,7 @@ services: - data-streamer volumes: - "../preprocessing/10k_sample_2023_10_01-2023_10_31.csv:/data/csv/main.csv:ro" - command: "sh -c 'sleep 30 && python /app/pcap_processor.py -c /data/csv/main.csv -x --stream_size 100000'" + command: "sh -c 'sleep 30 && python /app/pcap_processor.py -c /data/csv/main.csv -x --stream_size 100000 -l 0.1'" deploy: replicas: 1 # placement: diff --git a/preprocessing/pcap_processor.py b/preprocessing/pcap_processor.py index 11a15fd..fc31e9f 100644 --- a/preprocessing/pcap_processor.py +++ b/preprocessing/pcap_processor.py @@ -1,6 +1,7 @@ from argparse import ArgumentParser from datetime import datetime, timezone import csv +import time from scapy.packet import Packet from scapy.utils import PcapReader @@ -173,6 +174,13 @@ if __name__ == "__main__": dest="_stream", action="store_true", ) + argp.add_argument( + "-l", + "--delay", + required=False, + default=0, + dest="_delay" + ) argp.add_argument( "-d", "--debug", @@ -187,6 +195,7 @@ if __name__ == "__main__": csv_file = args._csv out_file = args._out streaming = args._stream + batch_delay = float(args._delay) sample = args._sample DEBUG = args._debug @@ -207,6 +216,8 @@ if __name__ == "__main__": producer.client.send(KAFKA_TOPIC, row_to_dict(row)) dbg_print(row_to_dict(row)) dbg_print("streamed packet", idx) + if idx > 0 and idx % batch_size == 0: + time.sleep(batch_delay) if sample and idx > sample_size: break print(f"total streamed: {idx}")