Merge pull request #8 from 20kaushik02/integration_2

fin.
This commit is contained in:
Kaushik Narayan Ravishankar 2024-12-03 00:33:25 -07:00 committed by GitHub
commit ede2a760d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 252180 additions and 601827 deletions

BIN
CSE512 Final Report.pdf Normal file

Binary file not shown.

BIN
Demo_Video.mp4 Normal file

Binary file not shown.

View File

@ -1 +1,36 @@
# Real-time analytics of Internet traffic flow data
![Architecture diagram](./architecture_diagram.png)
## Download the dataset
- The full preprocessed dataset is hosted [here](https://tmp.knravish.me/512_proj/1M_sample_2023_10_01-2023_10_31.csv) - 1.4GB
- Place this file in the `preprocessing` directory
- For testing purposes, you can use the sample CSV that has 10k records from each day instead, change the bind path in the Compose file
## To run the project
- From the `scripts` directory:
- Run `deploy.ps1 -M` for Windows
- Run `deploy.sh -M` for Linux/macOS (add `-S` if sudo needed for docker)
- See the `README` in `scripts` for more
- This sets up the whole stack
### Access the UI
- The Grafana web interface is located at `http://localhost:7602`
- Login:
- Username: `thewebfarm`
- Password: `mrafbeweht`
- Go to `Dashboards` > `Internet traffic capture analysis`
### To run the shard creation and scaling script
- From the `scripts` directory:
- Install dependencies: `python3 -r ../clickhouse/update_config_scripts/requirements.txt`
- Run `python3 ../clickhouse/update_config_scripts/update_trigger.py`
- This checks every 2 minutes and creates a new shard and two server nodes for it based on resource utilization
## Limitations
- For multi-node deployments using Docker Swarm, the manager node needs to be running on Linux (outside Docker Desktop i.e. standalone Docker installation) due to limitations in the Docker Swarm engine

BIN
architecture_diagram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

View File

@ -105,3 +105,4 @@ if __name__ == "__main__":
with open(f'../clickhouse/node{curr_num_servers + i}-config/storage-policy.xml','w') as f4:
f4.write(storage_policy_content)
print("Config Files Updated!")

View File

@ -5,6 +5,7 @@ import schedule
import time
def check_util_exec():
print("Performing check")
# extracting details of each running container in json format
try:
all_services = subprocess.check_output(["sudo", "docker","stats","--no-stream","--format","json"],text=True).split('\n')[:-1]
@ -16,14 +17,20 @@ def check_util_exec():
resource_util_exceed_flag = True # Flag to check if all of the containers have exceeded 80% memory utilization
for service in all_services:
if re.findall(r'clickhouse-server',service['Name']):
if float(service['MemPerc'][:-1]) < 60:
if float(service['MemPerc'][:-1]) < 50:
resource_util_exceed_flag = False
if resource_util_exceed_flag:
process = subprocess.Popen(['python3','../clickhouse/update_config_scripts/update_compose.py'],text=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
print("Config update triggered")
process = subprocess.Popen(['python3','../clickhouse/config_update_scripts/update_compose.py'],text=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
stdout, stderr = process.communicate() # Wait for the process to finish and capture output
print("Standard Output:", stdout)
print("Standard Error:", stderr)
if stdout:
redeploy = subprocess.Popen(['docker','stack','deploy','-d','-c','../preprocessing/docker-compose.yml','-c','../clickhouse/docker-compose.yaml','-c','../ui/docker-compose.yaml','TheWebFarm'])
stdout1, stderr1= redeploy.communicate() # Wait for the process to finish and capture output
print("Standard Output:", stdout1)
print("Standard Error:", stderr1)
time.sleep(120)
# try:
# all_services = subprocess.check_output(["sudo", "docker","stats","--no-stream","--format","json"],text=True).split('\n')[:-1]
# except subprocess.CalledProcessError as e:

252127
preprocessing/geoip_cc.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -46,11 +46,13 @@ elif [[ $masterNode ]]; then
-c ../clickhouse/docker-compose.yaml \
-c ../ui/docker-compose.yaml \
$stackName
elif [[ $autoShard ]]; then
cd $scriptDir
python3 $scriptDir/../clickhouse/config_update_scripts/update_trigger.py
else
echo "[+] swarm follower"
echo "[+] joining swarm with token $swarmToken"
$dockerCmd swarm join --token $swarmToken $managerAddr
fi
if [[ $autoShard ]]; then
cd $scriptDir
python3 ../clickhouse/config_update_scripts/update_trigger.py
fi

View File

@ -431,7 +431,7 @@
"refId": "A"
}
],
"title": "Top regions (packet count)",
"title": "Top regions (packet count in millions)",
"type": "bargauge"
},
{
@ -942,6 +942,6 @@
"timezone": "browser",
"title": "Internet traffic capture analysis",
"uid": "be59fkbp3zs3kc",
"version": 4,
"version": 1,
"weekStart": ""
}