mirror of
https://github.com/20kaushik02/real-time-traffic-analysis-clickhouse.git
synced 2025-12-06 06:34:06 +00:00
commit
ede2a760d6
BIN
CSE512 Final Report.pdf
Normal file
BIN
CSE512 Final Report.pdf
Normal file
Binary file not shown.
BIN
Demo_Video.mp4
Normal file
BIN
Demo_Video.mp4
Normal file
Binary file not shown.
35
README.md
35
README.md
@ -1 +1,36 @@
|
|||||||
# Real-time analytics of Internet traffic flow data
|
# Real-time analytics of Internet traffic flow data
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Download the dataset
|
||||||
|
|
||||||
|
- The full preprocessed dataset is hosted [here](https://tmp.knravish.me/512_proj/1M_sample_2023_10_01-2023_10_31.csv) - 1.4GB
|
||||||
|
- Place this file in the `preprocessing` directory
|
||||||
|
- For testing purposes, you can use the sample CSV that has 10k records from each day instead, change the bind path in the Compose file
|
||||||
|
|
||||||
|
## To run the project
|
||||||
|
|
||||||
|
- From the `scripts` directory:
|
||||||
|
- Run `deploy.ps1 -M` for Windows
|
||||||
|
- Run `deploy.sh -M` for Linux/macOS (add `-S` if sudo needed for docker)
|
||||||
|
- See the `README` in `scripts` for more
|
||||||
|
- This sets up the whole stack
|
||||||
|
|
||||||
|
### Access the UI
|
||||||
|
|
||||||
|
- The Grafana web interface is located at `http://localhost:7602`
|
||||||
|
- Login:
|
||||||
|
- Username: `thewebfarm`
|
||||||
|
- Password: `mrafbeweht`
|
||||||
|
- Go to `Dashboards` > `Internet traffic capture analysis`
|
||||||
|
|
||||||
|
### To run the shard creation and scaling script
|
||||||
|
|
||||||
|
- From the `scripts` directory:
|
||||||
|
- Install dependencies: `python3 -r ../clickhouse/update_config_scripts/requirements.txt`
|
||||||
|
- Run `python3 ../clickhouse/update_config_scripts/update_trigger.py`
|
||||||
|
- This checks every 2 minutes and creates a new shard and two server nodes for it based on resource utilization
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- For multi-node deployments using Docker Swarm, the manager node needs to be running on Linux (outside Docker Desktop i.e. standalone Docker installation) due to limitations in the Docker Swarm engine
|
||||||
|
|||||||
BIN
architecture_diagram.png
Normal file
BIN
architecture_diagram.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 120 KiB |
@ -105,3 +105,4 @@ if __name__ == "__main__":
|
|||||||
with open(f'../clickhouse/node{curr_num_servers + i}-config/storage-policy.xml','w') as f4:
|
with open(f'../clickhouse/node{curr_num_servers + i}-config/storage-policy.xml','w') as f4:
|
||||||
f4.write(storage_policy_content)
|
f4.write(storage_policy_content)
|
||||||
|
|
||||||
|
print("Config Files Updated!")
|
||||||
@ -5,6 +5,7 @@ import schedule
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
def check_util_exec():
|
def check_util_exec():
|
||||||
|
print("Performing check")
|
||||||
# extracting details of each running container in json format
|
# extracting details of each running container in json format
|
||||||
try:
|
try:
|
||||||
all_services = subprocess.check_output(["sudo", "docker","stats","--no-stream","--format","json"],text=True).split('\n')[:-1]
|
all_services = subprocess.check_output(["sudo", "docker","stats","--no-stream","--format","json"],text=True).split('\n')[:-1]
|
||||||
@ -16,14 +17,20 @@ def check_util_exec():
|
|||||||
resource_util_exceed_flag = True # Flag to check if all of the containers have exceeded 80% memory utilization
|
resource_util_exceed_flag = True # Flag to check if all of the containers have exceeded 80% memory utilization
|
||||||
for service in all_services:
|
for service in all_services:
|
||||||
if re.findall(r'clickhouse-server',service['Name']):
|
if re.findall(r'clickhouse-server',service['Name']):
|
||||||
if float(service['MemPerc'][:-1]) < 60:
|
if float(service['MemPerc'][:-1]) < 50:
|
||||||
resource_util_exceed_flag = False
|
resource_util_exceed_flag = False
|
||||||
|
|
||||||
if resource_util_exceed_flag:
|
if resource_util_exceed_flag:
|
||||||
process = subprocess.Popen(['python3','../clickhouse/update_config_scripts/update_compose.py'],text=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
print("Config update triggered")
|
||||||
|
process = subprocess.Popen(['python3','../clickhouse/config_update_scripts/update_compose.py'],text=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
||||||
stdout, stderr = process.communicate() # Wait for the process to finish and capture output
|
stdout, stderr = process.communicate() # Wait for the process to finish and capture output
|
||||||
print("Standard Output:", stdout)
|
print("Standard Output:", stdout)
|
||||||
print("Standard Error:", stderr)
|
print("Standard Error:", stderr)
|
||||||
|
if stdout:
|
||||||
|
redeploy = subprocess.Popen(['docker','stack','deploy','-d','-c','../preprocessing/docker-compose.yml','-c','../clickhouse/docker-compose.yaml','-c','../ui/docker-compose.yaml','TheWebFarm'])
|
||||||
|
stdout1, stderr1= redeploy.communicate() # Wait for the process to finish and capture output
|
||||||
|
print("Standard Output:", stdout1)
|
||||||
|
print("Standard Error:", stderr1)
|
||||||
|
time.sleep(120)
|
||||||
# try:
|
# try:
|
||||||
# all_services = subprocess.check_output(["sudo", "docker","stats","--no-stream","--format","json"],text=True).split('\n')[:-1]
|
# all_services = subprocess.check_output(["sudo", "docker","stats","--no-stream","--format","json"],text=True).split('\n')[:-1]
|
||||||
# except subprocess.CalledProcessError as e:
|
# except subprocess.CalledProcessError as e:
|
||||||
|
|||||||
252127
preprocessing/geoip_cc.csv
Normal file
252127
preprocessing/geoip_cc.csv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -46,11 +46,13 @@ elif [[ $masterNode ]]; then
|
|||||||
-c ../clickhouse/docker-compose.yaml \
|
-c ../clickhouse/docker-compose.yaml \
|
||||||
-c ../ui/docker-compose.yaml \
|
-c ../ui/docker-compose.yaml \
|
||||||
$stackName
|
$stackName
|
||||||
elif [[ $autoShard ]]; then
|
|
||||||
cd $scriptDir
|
|
||||||
python3 $scriptDir/../clickhouse/config_update_scripts/update_trigger.py
|
|
||||||
else
|
else
|
||||||
echo "[+] swarm follower"
|
echo "[+] swarm follower"
|
||||||
echo "[+] joining swarm with token $swarmToken"
|
echo "[+] joining swarm with token $swarmToken"
|
||||||
$dockerCmd swarm join --token $swarmToken $managerAddr
|
$dockerCmd swarm join --token $swarmToken $managerAddr
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ $autoShard ]]; then
|
||||||
|
cd $scriptDir
|
||||||
|
python3 ../clickhouse/config_update_scripts/update_trigger.py
|
||||||
|
fi
|
||||||
@ -431,7 +431,7 @@
|
|||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "Top regions (packet count)",
|
"title": "Top regions (packet count in millions)",
|
||||||
"type": "bargauge"
|
"type": "bargauge"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -942,6 +942,6 @@
|
|||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Internet traffic capture analysis",
|
"title": "Internet traffic capture analysis",
|
||||||
"uid": "be59fkbp3zs3kc",
|
"uid": "be59fkbp3zs3kc",
|
||||||
"version": 4,
|
"version": 1,
|
||||||
"weekStart": ""
|
"weekStart": ""
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user