diff --git a/clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql b/clickhouse/node-entrypoints/common/01_udf_create.sql similarity index 53% rename from clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql rename to clickhouse/node-entrypoints/common/01_udf_create.sql index dc0d4b6..5a5fa17 100644 --- a/clickhouse/node-entrypoints/main/03_insert_geoip_csv.sql +++ b/clickhouse/node-entrypoints/common/01_udf_create.sql @@ -1,7 +1,3 @@ -INSERT INTO ip_region_map -FROM INFILE '/var/lib/clickhouse/user_files/csv/ip_region_map.csv' -FORMAT CSVWithNames; - -- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if( @@ -24,14 +20,3 @@ CREATE FUNCTION IPv4RangeToCIDRString AS (ip_s, ip_e) -> CONCAT( toString(cidrSuffix(ip_s, ip_e)) ); -ALTER TABLE ip_region_map -ADD COLUMN ip_range_cidr String -MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end); - -CREATE DICTIONARY ip_region_dict (ip_range_cidr String, region String) PRIMARY KEY ip_range_cidr SOURCE(CLICKHOUSE(TABLE 'ip_region_map')) LAYOUT(ip_trie) LIFETIME(3600); - --- SELECT --- *, --- dictGet('ip_region_dict', 'region', tuple(src_ip)) AS region --- FROM traffic_records_all --- LIMIT 10 \ No newline at end of file diff --git a/clickhouse/node-entrypoints/common/01_table_create.sql b/clickhouse/node-entrypoints/common/02_table_dict_create.sql similarity index 73% rename from clickhouse/node-entrypoints/common/01_table_create.sql rename to clickhouse/node-entrypoints/common/02_table_dict_create.sql index 0be4597..53cb730 100644 --- a/clickhouse/node-entrypoints/common/01_table_create.sql +++ b/clickhouse/node-entrypoints/common/02_table_dict_create.sql @@ -20,9 +20,17 @@ CREATE TABLE ip_region_map ( ip_range_start IPv4, ip_range_end IPv4, region LowCardinality(String), + ip_range_cidr String MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end), INDEX region_idx region TYPE bloom_filter ) ENGINE = ReplicatedMergeTree( '/clickhouse/tables/{shard}/ip_region_map', '{replica}' ) -ORDER BY ip_range_start; \ No newline at end of file +ORDER BY ip_range_start; + +CREATE DICTIONARY ip_region_dict +(ip_range_cidr String, region String) +PRIMARY KEY ip_range_cidr +SOURCE(CLICKHOUSE(TABLE 'ip_region_map')) +LAYOUT(ip_trie) +LIFETIME(3600); diff --git a/clickhouse/node-entrypoints/main/01_udf_create.sql b/clickhouse/node-entrypoints/main/01_udf_create.sql new file mode 100644 index 0000000..5a5fa17 --- /dev/null +++ b/clickhouse/node-entrypoints/main/01_udf_create.sql @@ -0,0 +1,22 @@ +-- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation + +CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if( + bitXor(ip_s, ip_e) != 0, + ceil(log2(bitXor(ip_s, ip_e))), 0 +); + +CREATE FUNCTION cidrSuffix AS (ip_s, ip_e) -> 32 - unmatchedBits(ip_s, ip_e); + +CREATE FUNCTION cidrAddress AS (ip_s, ip_e) -> toIPv4( + bitAnd( + bitNot(pow(2, unmatchedBits(ip_s, ip_e)) - 1), + ip_s + )::UInt64 +); + +CREATE FUNCTION IPv4RangeToCIDRString AS (ip_s, ip_e) -> CONCAT( + toString(cidrAddress(ip_s, ip_e)), + '/', + toString(cidrSuffix(ip_s, ip_e)) +); + diff --git a/clickhouse/node-entrypoints/main/01_table_create.sql b/clickhouse/node-entrypoints/main/02_table_dict_create.sql similarity index 73% rename from clickhouse/node-entrypoints/main/01_table_create.sql rename to clickhouse/node-entrypoints/main/02_table_dict_create.sql index 0be4597..53cb730 100644 --- a/clickhouse/node-entrypoints/main/01_table_create.sql +++ b/clickhouse/node-entrypoints/main/02_table_dict_create.sql @@ -20,9 +20,17 @@ CREATE TABLE ip_region_map ( ip_range_start IPv4, ip_range_end IPv4, region LowCardinality(String), + ip_range_cidr String MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end), INDEX region_idx region TYPE bloom_filter ) ENGINE = ReplicatedMergeTree( '/clickhouse/tables/{shard}/ip_region_map', '{replica}' ) -ORDER BY ip_range_start; \ No newline at end of file +ORDER BY ip_range_start; + +CREATE DICTIONARY ip_region_dict +(ip_range_cidr String, region String) +PRIMARY KEY ip_range_cidr +SOURCE(CLICKHOUSE(TABLE 'ip_region_map')) +LAYOUT(ip_trie) +LIFETIME(3600); diff --git a/clickhouse/node-entrypoints/main/02_dist_table_create.sql b/clickhouse/node-entrypoints/main/03_dist_table_create.sql similarity index 100% rename from clickhouse/node-entrypoints/main/02_dist_table_create.sql rename to clickhouse/node-entrypoints/main/03_dist_table_create.sql diff --git a/clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql b/clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql new file mode 100644 index 0000000..51060b1 --- /dev/null +++ b/clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql @@ -0,0 +1,3 @@ +INSERT INTO ip_region_map (ip_range_start, ip_range_end, region) +FROM INFILE '/var/lib/clickhouse/user_files/csv/ip_region_map.csv' +FORMAT CSVWithNames; diff --git a/clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql b/clickhouse/node-entrypoints/main/05_kafka_table_ingest.sql similarity index 100% rename from clickhouse/node-entrypoints/main/04_kafka_table_ingest.sql rename to clickhouse/node-entrypoints/main/05_kafka_table_ingest.sql