Fixed integration issue with csv streaming

2026-03-12 01:44:04 +00:00 · 2024-11-26 21:41:17 -07:00
parent 5d20e14dbf
commit b1fc1dbc49
5 changed files with 11 additions and 10 deletions
--- a/preprocessing/README.md
+++ b/preprocessing/README.md
@@ -49,3 +49,4 @@
  

 python pcap_processor.py -f C:/Users/akash/storage/Asu/sem3/dds/project/202310081400.pcap -s --stream_size 1000
+python pcap_procesor.py -c sample_output.csv -s --stream_size 1000
--- a/preprocessing/pcap_processor.py
+++ b/preprocessing/pcap_processor.py
@@ -19,7 +19,7 @@ class KafkaClient:
        self.topic_name = topic_name
        if mode == 'producer':
            self.client = KafkaProducer(
-                bootstrap_servers=['localhost:9092'],
+                bootstrap_servers=['kafka:9092'],
                max_request_size = 200000000,
                #api_version=(0,11,5),
                value_serializer=lambda x: json.dumps(x).encode('utf-8'))
@@ -198,6 +198,7 @@ if __name__ == "__main__":

    # if preprocessed data ready for streaming
    if csv_file:
+        #print("true")
        with open(csv_file, newline="") as f:
            csv_rdr = csv.reader(f)
            next(csv_rdr)  # skip headers
@@ -207,10 +208,10 @@ if __name__ == "__main__":
                # direct streaming to kafka goes here
                producer.client.send(KAFKA_TOPIC, row_to_dict(row))
                dbg_print(row_to_dict(row))
-                dbg_print("streamed packet", idx)
+                print("streamed packet", idx)
                if idx > sample_size:
                    break
-            dbg_print(f"total streamed: {idx}")
+            print(f"total streamed: {idx}")

    # otherwise, process packets
    else: