Merge pull request #5 from 20kaushik02/integration_2

Integration 2
This commit is contained in:
Kaushik Narayan Ravishankar 2024-11-28 01:23:41 -07:00 committed by GitHub
commit 2f96ef5641
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 674 additions and 38 deletions

View File

@ -18,8 +18,8 @@ SETTINGS storage_policy = 'hot_cold';
CREATE TABLE ip_region_map (
ip_range_start IPv4,
ip_range_end IPv4,
region String,
ip_range_end IPv4,
region LowCardinality(String),
INDEX region_idx region TYPE bloom_filter
) ENGINE = ReplicatedMergeTree(
'/clickhouse/tables/{shard}/ip_region_map',

View File

@ -0,0 +1,28 @@
-- local table creation
CREATE TABLE traffic_records (
time_stamp DateTime64 (6, 'Japan') CODEC (Delta, ZSTD),
l4_protocol Enum8 ('TCP' = 1, 'UDP' = 2),
src_ip IPv4,
dst_ip IPv4,
src_port UInt16 CODEC (ZSTD),
dst_port UInt16 CODEC (ZSTD),
pkt_len UInt16 CODEC (ZSTD),
INDEX port_idx src_port TYPE bloom_filter GRANULARITY 10
) ENGINE = ReplicatedMergeTree(
'/clickhouse/tables/{shard}/traffic_records',
'{replica}'
)
ORDER BY time_stamp
TTL toDateTime(time_stamp) + INTERVAL 15 DAY TO VOLUME 'cold_vol'
SETTINGS storage_policy = 'hot_cold';
CREATE TABLE ip_region_map (
ip_range_start IPv4,
ip_range_end IPv4,
region LowCardinality(String),
INDEX region_idx region TYPE bloom_filter
) ENGINE = ReplicatedMergeTree(
'/clickhouse/tables/{shard}/ip_region_map',
'{replica}'
)
ORDER BY ip_range_start;

View File

@ -0,0 +1,22 @@
CREATE TABLE traffic_records_kafka_queue (
time Float64,
l4_proto String,
src_addr String,
dst_addr String,
src_port UInt16,
dst_port UInt16,
pkt_len UInt32
) ENGINE = Kafka() SETTINGS kafka_broker_list = 'kafka:9092',
kafka_topic_list = 'traffic_records_stream',
kafka_group_name = 'clickhouse_consumer',
kafka_format = 'JSONEachRow',
kafka_num_consumers = 1;
CREATE MATERIALIZED VIEW traffic_records_kafka_view TO traffic_records_all AS
SELECT time AS time_stamp,
l4_proto AS l4_protocol,
src_addr AS src_ip,
dst_addr AS dst_ip,
src_port,
dst_port,
pkt_len
FROM traffic_records_kafka_queue;

View File

@ -41,8 +41,7 @@ services:
container_name: clickhouse-server1
volumes:
- ../clickhouse/node1-config/:/etc/clickhouse-server/config.d/
- ../clickhouse/ddl/common/table_create.sql:/docker-entrypoint-initdb.d/common_table_create.sql
- ../clickhouse/ddl/distr/table_create.sql:/docker-entrypoint-initdb.d/distr_table_create.sql
- ../clickhouse/ddl/main:/docker-entrypoint-initdb.d
- clickhouse_server1_data:/var/lib/clickhouse
- clickhouse_server1_TTL:/clickhouse_data/server1
networks:
@ -79,7 +78,7 @@ services:
container_name: clickhouse-server2
volumes:
- ../clickhouse/node2-config/:/etc/clickhouse-server/config.d/
- ../clickhouse/ddl/common/table_create.sql:/docker-entrypoint-initdb.d/common_table_create.sql
- ../clickhouse/ddl/common:/docker-entrypoint-initdb.d
- clickhouse_server2_data:/var/lib/clickhouse
- clickhouse_server2_TTL:/clickhouse_data/server2
networks:

View File

@ -28,6 +28,8 @@ services:
KAFKA_BROKER_ID: 1
KAFKA_MESSAGE_MAX_BYTES: 200000000
KAFKA_REPLICA_FETCH_MAX_BYTES: 200000000
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
command: sh -c "/etc/confluent/docker/run && kafka-topics --create --bootstrap-server kafka:9092 --replication-factor 1 --partitions 1 --topic traffic_records_stream"
networks:
data-network:
aliases:

View File

@ -12,36 +12,33 @@ import json
dbg_print = lambda *x: DEBUG and print(f"[DEBUG] {x}")
# Kafka Configuration
KAFKA_TOPIC = "traffic_records_stream"
KAFKA_SERVER = "kafka:9092" # Adjust to your Kafka server
class KafkaClient:
def __init__(self, topic_name=None, mode='producer'):
def __init__(self, topic_name=None, mode="producer"):
self.mode = mode
self.topic_name = topic_name
if mode == 'producer':
if mode == "producer":
self.client = KafkaProducer(
bootstrap_servers=['kafka:9092'],
max_request_size = 200000000,
#api_version=(0,11,5),
value_serializer=lambda x: json.dumps(x).encode('utf-8'))
elif mode == 'consumer' and topic_name is not None:
bootstrap_servers=[KAFKA_SERVER],
max_request_size=200000000,
# api_version=(0,11,5),
value_serializer=lambda x: json.dumps(x).encode("utf-8"),
)
elif mode == "consumer" and topic_name is not None:
self.client = KafkaConsumer(
topic_name,
bootstrap_servers=['localhost:9092'],
api_version=(0,11,5),
value_deserializer=lambda x: json.loads(x.decode('utf-8')))
topic_name,
bootstrap_servers=["localhost:9092"],
api_version=(0, 11, 5),
value_deserializer=lambda x: json.loads(x.decode("utf-8")),
)
else:
raise ValueError("Consumer mode requires a topic_name")
# Kafka Configuration
KAFKA_TOPIC = 'pcap_stream_new'
KAFKA_SERVER = 'kafka:9092' # Adjust to your Kafka server
#KAFKA_SERVER = 'kafka_service:9092'
# Initialize Kafka Producer
# producer = KafkaProducer(
# bootstrap_servers=KAFKA_SERVER,
# value_serializer=lambda v: v.encode('utf-8') if isinstance(v, str) else str(v).encode('utf-8') #remove intermediate JSON encoding
# )
producer = KafkaClient(topic_name=KAFKA_TOPIC)
@ -108,7 +105,7 @@ def create_pkt_object(pkt: Packet) -> dict:
"dst_addr": pkt[IP].dst,
"src_port": pkt[l4_proto].sport,
"dst_port": pkt[l4_proto].dport,
"pkt_len": len(pkt)
"pkt_len": len(pkt),
}
return res_json
@ -157,7 +154,9 @@ if __name__ == "__main__":
argp.add_argument("-f", "--pcap_file", required=False, dest="_pcap")
argp.add_argument("-c", "--csv_file", required=False, dest="_csv")
argp.add_argument("-o", "--out_file", required=False, dest="_out")
argp.add_argument("--stream_size", required=False, default=10000, dest="_streamsize")
argp.add_argument(
"--stream_size", required=False, default=10000, dest="_streamsize"
)
argp.add_argument(
"-x",
"--sample",
@ -193,21 +192,21 @@ if __name__ == "__main__":
DEBUG = args._debug
sample_size = int(args._streamsize) # 100000
batch_size = 100 #100000
batch_size = 100 # 100000
# if preprocessed data ready for streaming
if csv_file:
#print("true")
with open(csv_file, newline="") as f:
csv_rdr = csv.reader(f)
next(csv_rdr) # skip headers
pkts = []
print("started stream from csv")
for idx, row in enumerate(csv_rdr):
# direct streaming to kafka goes here
producer.client.send(KAFKA_TOPIC, row_to_dict(row))
dbg_print(row_to_dict(row))
print("streamed packet", idx)
dbg_print("streamed packet", idx)
if sample and idx > sample_size:
break
print(f"total streamed: {idx}")
@ -222,6 +221,8 @@ if __name__ == "__main__":
pkts = []
cnt = 0
seen_count = 0
print("started stream from pcap")
for idx, pkt in enumerate(pcap_rdr):
seen_count += 1
# filter packets
@ -243,8 +244,9 @@ if __name__ == "__main__":
packet_data = create_pkt_object(pkt)
producer.client.send(KAFKA_TOPIC, packet_data)
cnt += 1
#print(f"streamed packet at index {idx} ")
if idx > sample_size: break
# print(f"streamed packet at index {idx} ")
if idx > sample_size:
break
print(f"total seen: {seen_count-1}")
print(f"total streamed: {cnt}")

View File

@ -14,24 +14,24 @@ if ($downStack) {
Write-Output "[+] Removing stack..."
docker stack rm $stackName
docker service rm registry
Start-Sleep 20
docker volume rm $(docker volume ls --filter name=$stackName -q)
}
elseif ($MasterNode) {
Write-Output "[+] swarm master"
# cleanup
docker stack rm $stackName
docker service rm registry
# data streaming
Set-Location $scriptDir/../preprocessing
docker service create --name registry -p 5000:5000 registry:2
docker build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python .
# docker build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python .
docker build -t 127.0.0.1:5000/data-streamer:latest --push -f Dockerfile.python .
# execute
Set-Location $scriptDir
docker stack deploy -d `
-c ../preprocessing/docker-compose.yml `
-c ../clickhouse/docker-compose.yaml `
-c ../ui/docker-compose.yaml `
$stackName
# scripts

53
scripts/deploy.sh Normal file
View File

@ -0,0 +1,53 @@
#!/bin/bash
while getopts "M:D:T:A" flag; do
case "${flag}" in
M) masterNode=true ;;
D) downStack=true ;;
T) swarmToken=$OPTARG ;;
A) managerAddr=$OPTARG ;;
esac
done
echo "masterNode: $masterNode"
echo "downStack: $downStack"
echo "swarmToken: $swarmToken"
echo "managerAddr: $managerAddr"
$scriptDir = $(readlink -f "$0")
# echo $scriptDir # ===> /Project/scripts
$stackName="TheWebFarm"
if [[ $downStack ]]; then
echo "[+] Removing stack..."
docker stack rm $stackName
docker service rm registry
sleep 20
docker volume rm $(docker volume ls --filter name=$stackName -q)
elif ($MasterNode); then
echo "[+] swarm master"
# data streaming
cd $scriptDir/../preprocessing
docker service create --name registry -p 5000:5000 registry:2
# docker build -t 127.0.0.1:5000/data-streamer:latest --no-cache --push -f Dockerfile.python .
docker build -t 127.0.0.1:5000/data-streamer:latest --push -f Dockerfile.python .
# execute
cd $scriptDir
docker stack deploy -d \
-c ../preprocessing/docker-compose.yml \
-c ../clickhouse/docker-compose.yaml \
-c ../ui/docker-compose.yaml \
$stackName
# scripts
# pip install -r "$scriptDir/../final/config_update_scripts/requirements.txt"
# cd $scriptDir/../preprocessing
# python3 update_trigger.py
else
echo "[+] swarm follower"
echo "[+] joining swarm with token $swarmToken"
docker swarm join --token $swarmToken $managerAddr
fi

508
ui/dashboard_1.json Normal file
View File

@ -0,0 +1,508 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"panels": [
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "PDEE91DDB90597936"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 18,
"x": 0,
"y": 0
},
"id": 5,
"options": {
"barRadius": 0,
"barWidth": 0.9,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"orientation": "auto",
"showValue": "never",
"stacking": "normal",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "Port",
"xTickLabelRotation": 0,
"xTickLabelSpacing": 0
},
"pluginVersion": "11.3.1",
"targets": [
{
"editorType": "sql",
"format": 1,
"meta": {
"builderOptions": {
"columns": [],
"database": "",
"limit": 1000,
"mode": "list",
"queryType": "table",
"table": ""
}
},
"pluginVersion": "4.5.1",
"queryType": "table",
"rawSql": "SELECT Port,\r\n src_bw/1024.0/1024.0 AS \"Source Port Bandwidth (MB)\",\r\n dst_bw/1024.0/1024.0 AS \"Destination Port Bandwidth (MB)\"\r\nFROM (\r\n SELECT src_port AS Port,\r\n SUM(pkt_len) AS src_bw\r\n FROM traffic_records_all\r\n GROUP BY src_port\r\n ORDER BY src_bw DESC\r\n LIMIT 40\r\n ) AS src\r\n INNER JOIN (\r\n SELECT dst_port AS Port,\r\n SUM(pkt_len) AS dst_bw\r\n FROM traffic_records_all\r\n GROUP BY dst_port\r\n ORDER BY dst_bw DESC\r\n LIMIT 40\r\n ) AS dst USING (Port)\r\nORDER BY (src_bw + dst_bw) DESC\r\nLIMIT 40;",
"refId": "A"
}
],
"title": "Top ports (by bandwidth)",
"type": "barchart"
},
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "PDEE91DDB90597936"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 18,
"y": 0
},
"id": 1,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^ProtocolCount$/",
"values": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.1",
"targets": [
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "PDEE91DDB90597936"
},
"editorType": "sql",
"format": 1,
"meta": {
"builderOptions": {
"columns": [],
"database": "",
"limit": 1000,
"mode": "list",
"queryType": "table",
"table": ""
}
},
"pluginVersion": "4.5.1",
"queryType": "table",
"rawSql": "SELECT\r\n l4_protocol as Protocol,\r\n COUNT(Protocol) as ProtocolCount\r\n FROM traffic_records_all\r\n GROUP BY Protocol",
"refId": "A"
}
],
"title": "Distribution of L4 protocol",
"type": "piechart"
},
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "PDEE91DDB90597936"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 18,
"x": 0,
"y": 8
},
"id": 4,
"options": {
"barRadius": 0,
"barWidth": 0.9,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"orientation": "auto",
"showValue": "never",
"stacking": "normal",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "Port",
"xTickLabelRotation": 0,
"xTickLabelSpacing": 0
},
"pluginVersion": "11.3.1",
"targets": [
{
"editorType": "sql",
"format": 1,
"meta": {
"builderOptions": {
"columns": [],
"database": "",
"limit": 1000,
"mode": "list",
"queryType": "table",
"table": ""
}
},
"pluginVersion": "4.5.1",
"queryType": "table",
"rawSql": "SELECT \r\n Port, \r\n SourcePortCount, \r\n DestPortCount\r\nFROM\r\n(\r\n SELECT \r\n src_port AS Port, \r\n COUNT(*) AS SourcePortCount\r\n FROM traffic_records_all\r\n GROUP BY src_port\r\n ORDER BY SourcePortCount DESC\r\n LIMIT 40\r\n) AS src\r\nINNER JOIN\r\n(\r\n SELECT \r\n dst_port AS Port, \r\n COUNT(*) AS DestPortCount\r\n FROM traffic_records_all\r\n GROUP BY dst_port\r\n ORDER BY DestPortCount DESC\r\n LIMIT 40\r\n) AS dst\r\nUSING (Port)\r\nORDER BY (SourcePortCount + DestPortCount) DESC\r\nLIMIT 40;\r\n",
"refId": "A"
}
],
"title": "Top ports (by count)",
"type": "barchart"
},
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "PDEE91DDB90597936"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 2,
"options": {
"barRadius": 0,
"barWidth": 0.97,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"orientation": "horizontal",
"showValue": "auto",
"stacking": "none",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "SourcePort",
"xTickLabelRotation": 0,
"xTickLabelSpacing": 100
},
"pluginVersion": "11.3.1",
"targets": [
{
"editorType": "sql",
"format": 1,
"meta": {
"builderOptions": {
"columns": [],
"database": "",
"limit": 1000,
"mode": "list",
"queryType": "table",
"table": ""
}
},
"pluginVersion": "4.5.1",
"queryType": "table",
"rawSql": "SELECT\r\n src_port as SourcePort,\r\n COUNT(SourcePort) as SourcePortCount\r\n FROM traffic_records_all\r\n GROUP BY SourcePort\r\n ORDER BY SourcePortCount DESC\r\n LIMIT 10",
"refId": "A"
}
],
"title": "Top 10 source ports (by count)",
"type": "barchart"
},
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "PDEE91DDB90597936"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 3,
"options": {
"barRadius": 0,
"barWidth": 0.97,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"orientation": "horizontal",
"showValue": "auto",
"stacking": "none",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "DestPort",
"xTickLabelRotation": 0,
"xTickLabelSpacing": 100
},
"pluginVersion": "11.3.1",
"targets": [
{
"editorType": "sql",
"format": 1,
"meta": {
"builderOptions": {
"columns": [],
"database": "",
"limit": 1000,
"mode": "list",
"queryType": "table",
"table": ""
}
},
"pluginVersion": "4.5.1",
"queryType": "table",
"rawSql": "SELECT\r\n dst_port as DestPort,\r\n COUNT(DestPort) as DestPortCount\r\n FROM traffic_records_all\r\n GROUP BY DestPort\r\n ORDER BY DestPortCount DESC\r\n LIMIT 10",
"refId": "A"
}
],
"title": "Top 10 destination ports (by count)",
"type": "barchart"
}
],
"preload": false,
"schemaVersion": 40,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Internet traffic capture analysis",
"uid": "be59fkbp3zs3kc",
"version": 11,
"weekStart": ""
}

View File

@ -4,7 +4,9 @@ services:
container_name: grafana
volumes:
- grafana-storage:/var/lib/grafana
- ../ui/grafana_clickhouse_datasource.yaml:/etc/grafana/provisioning/datasources/clickhouse.yaml # relative to clickhouse directory
- ../ui/grafana_clickhouse_datasource.yaml:/etc/grafana/provisioning/datasources/clickhouse.yaml:ro
- ../ui/grafana_dashboards.yaml:/etc/grafana/provisioning/dashboards/grafana_dashboards.yaml:ro
- ../ui/dashboard_1.json:/var/lib/grafana/dashboards/dashboard_1.json
networks:
outside_net:
aliases:
@ -27,3 +29,4 @@ networks:
volumes:
grafana-storage:
driver: local

View File

@ -0,0 +1,19 @@
apiVersion: 1
providers:
- name: 'The Web Farm'
orgId: 1
# <string> name of the dashboard folder.
# <string> provider type. Default to 'file'
type: file
# <bool> disable dashboard deletion
disableDeletion: false
# <int> how often Grafana will scan for changed dashboards
updateIntervalSeconds: 10
# <bool> allow updating provisioned dashboards from the UI
allowUiUpdates: false
options:
# <string, required> path to dashboard files on disk. Required when using the 'file' type
path: /var/lib/grafana/dashboards
# <bool> use folder names from filesystem to create folders in Grafana
foldersFromFilesStructure: true