geoip csv

This commit is contained in:
2024-11-26 22:19:11 -07:00
parent ea2017b251
commit 989b2adbe6
2 changed files with 252136 additions and 8 deletions

252127
preprocessing/geoip.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,14 +5,15 @@ import csv
sample_size = 100 sample_size = 100
batch_size = 10000 batch_size = 10000
sample = True sample = False
def int_to_ipv4(num: int) -> str: def int_to_ipv4(num: int) -> str:
return socket.inet_ntoa(struct.pack("!L", num)) return socket.inet_ntoa(struct.pack("!L", num))
with open("IP2LOCATION-LITE-DB3.csv", "r") as input_file, open( # with open("IP2LOCATION-LITE-DB3.csv", "r") as input_file, open(
with open("IP2LOCATION-LITE-DB1.csv", "r") as input_file, open(
"geoip.csv", "w", newline="" "geoip.csv", "w", newline=""
) as output_file: ) as output_file:
reader = csv.reader(input_file) reader = csv.reader(input_file)
@@ -21,11 +22,11 @@ with open("IP2LOCATION-LITE-DB3.csv", "r") as input_file, open(
# header row # header row
writer.writerow( writer.writerow(
[ [
"ip_from", "ip_range_start",
"ip_to", "ip_range_end",
"country", "country",
"region", # "region",
"city", # "city",
] ]
) )
@@ -35,8 +36,8 @@ with open("IP2LOCATION-LITE-DB3.csv", "r") as input_file, open(
int_to_ipv4(int(record[0])), int_to_ipv4(int(record[0])),
int_to_ipv4(int(record[1])), int_to_ipv4(int(record[1])),
record[3], record[3],
record[4], # record[4],
record[5], # record[5],
] ]
records.append(new_record) records.append(new_record)
if sample and idx > sample_size: if sample and idx > sample_size: