mirror of
https://github.com/20kaushik02/real-time-traffic-analysis-clickhouse.git
synced 2025-12-06 08:04:06 +00:00
dict on all nodes
This commit is contained in:
parent
6afaa613b2
commit
ece4ca4508
@ -1,7 +1,3 @@
|
||||
INSERT INTO ip_region_map
|
||||
FROM INFILE '/var/lib/clickhouse/user_files/csv/ip_region_map.csv'
|
||||
FORMAT CSVWithNames;
|
||||
|
||||
-- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation
|
||||
|
||||
CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if(
|
||||
@ -24,14 +20,3 @@ CREATE FUNCTION IPv4RangeToCIDRString AS (ip_s, ip_e) -> CONCAT(
|
||||
toString(cidrSuffix(ip_s, ip_e))
|
||||
);
|
||||
|
||||
ALTER TABLE ip_region_map
|
||||
ADD COLUMN ip_range_cidr String
|
||||
MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end);
|
||||
|
||||
CREATE DICTIONARY ip_region_dict (ip_range_cidr String, region String) PRIMARY KEY ip_range_cidr SOURCE(CLICKHOUSE(TABLE 'ip_region_map')) LAYOUT(ip_trie) LIFETIME(3600);
|
||||
|
||||
-- SELECT
|
||||
-- *,
|
||||
-- dictGet('ip_region_dict', 'region', tuple(src_ip)) AS region
|
||||
-- FROM traffic_records_all
|
||||
-- LIMIT 10
|
||||
@ -20,9 +20,17 @@ CREATE TABLE ip_region_map (
|
||||
ip_range_start IPv4,
|
||||
ip_range_end IPv4,
|
||||
region LowCardinality(String),
|
||||
ip_range_cidr String MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end),
|
||||
INDEX region_idx region TYPE bloom_filter
|
||||
) ENGINE = ReplicatedMergeTree(
|
||||
'/clickhouse/tables/{shard}/ip_region_map',
|
||||
'{replica}'
|
||||
)
|
||||
ORDER BY ip_range_start;
|
||||
|
||||
CREATE DICTIONARY ip_region_dict
|
||||
(ip_range_cidr String, region String)
|
||||
PRIMARY KEY ip_range_cidr
|
||||
SOURCE(CLICKHOUSE(TABLE 'ip_region_map'))
|
||||
LAYOUT(ip_trie)
|
||||
LIFETIME(3600);
|
||||
22
clickhouse/node-entrypoints/main/01_udf_create.sql
Normal file
22
clickhouse/node-entrypoints/main/01_udf_create.sql
Normal file
@ -0,0 +1,22 @@
|
||||
-- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation
|
||||
|
||||
CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if(
|
||||
bitXor(ip_s, ip_e) != 0,
|
||||
ceil(log2(bitXor(ip_s, ip_e))), 0
|
||||
);
|
||||
|
||||
CREATE FUNCTION cidrSuffix AS (ip_s, ip_e) -> 32 - unmatchedBits(ip_s, ip_e);
|
||||
|
||||
CREATE FUNCTION cidrAddress AS (ip_s, ip_e) -> toIPv4(
|
||||
bitAnd(
|
||||
bitNot(pow(2, unmatchedBits(ip_s, ip_e)) - 1),
|
||||
ip_s
|
||||
)::UInt64
|
||||
);
|
||||
|
||||
CREATE FUNCTION IPv4RangeToCIDRString AS (ip_s, ip_e) -> CONCAT(
|
||||
toString(cidrAddress(ip_s, ip_e)),
|
||||
'/',
|
||||
toString(cidrSuffix(ip_s, ip_e))
|
||||
);
|
||||
|
||||
@ -20,9 +20,17 @@ CREATE TABLE ip_region_map (
|
||||
ip_range_start IPv4,
|
||||
ip_range_end IPv4,
|
||||
region LowCardinality(String),
|
||||
ip_range_cidr String MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end),
|
||||
INDEX region_idx region TYPE bloom_filter
|
||||
) ENGINE = ReplicatedMergeTree(
|
||||
'/clickhouse/tables/{shard}/ip_region_map',
|
||||
'{replica}'
|
||||
)
|
||||
ORDER BY ip_range_start;
|
||||
|
||||
CREATE DICTIONARY ip_region_dict
|
||||
(ip_range_cidr String, region String)
|
||||
PRIMARY KEY ip_range_cidr
|
||||
SOURCE(CLICKHOUSE(TABLE 'ip_region_map'))
|
||||
LAYOUT(ip_trie)
|
||||
LIFETIME(3600);
|
||||
3
clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql
Normal file
3
clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql
Normal file
@ -0,0 +1,3 @@
|
||||
INSERT INTO ip_region_map (ip_range_start, ip_range_end, region)
|
||||
FROM INFILE '/var/lib/clickhouse/user_files/csv/ip_region_map.csv'
|
||||
FORMAT CSVWithNames;
|
||||
Loading…
x
Reference in New Issue
Block a user