mirror of
https://github.com/20kaushik02/real-time-traffic-analysis-clickhouse.git
synced 2025-12-06 09:44:06 +00:00
dict on all nodes
This commit is contained in:
parent
6afaa613b2
commit
ece4ca4508
@ -1,7 +1,3 @@
|
|||||||
INSERT INTO ip_region_map
|
|
||||||
FROM INFILE '/var/lib/clickhouse/user_files/csv/ip_region_map.csv'
|
|
||||||
FORMAT CSVWithNames;
|
|
||||||
|
|
||||||
-- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation
|
-- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation
|
||||||
|
|
||||||
CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if(
|
CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if(
|
||||||
@ -24,14 +20,3 @@ CREATE FUNCTION IPv4RangeToCIDRString AS (ip_s, ip_e) -> CONCAT(
|
|||||||
toString(cidrSuffix(ip_s, ip_e))
|
toString(cidrSuffix(ip_s, ip_e))
|
||||||
);
|
);
|
||||||
|
|
||||||
ALTER TABLE ip_region_map
|
|
||||||
ADD COLUMN ip_range_cidr String
|
|
||||||
MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end);
|
|
||||||
|
|
||||||
CREATE DICTIONARY ip_region_dict (ip_range_cidr String, region String) PRIMARY KEY ip_range_cidr SOURCE(CLICKHOUSE(TABLE 'ip_region_map')) LAYOUT(ip_trie) LIFETIME(3600);
|
|
||||||
|
|
||||||
-- SELECT
|
|
||||||
-- *,
|
|
||||||
-- dictGet('ip_region_dict', 'region', tuple(src_ip)) AS region
|
|
||||||
-- FROM traffic_records_all
|
|
||||||
-- LIMIT 10
|
|
||||||
@ -20,9 +20,17 @@ CREATE TABLE ip_region_map (
|
|||||||
ip_range_start IPv4,
|
ip_range_start IPv4,
|
||||||
ip_range_end IPv4,
|
ip_range_end IPv4,
|
||||||
region LowCardinality(String),
|
region LowCardinality(String),
|
||||||
|
ip_range_cidr String MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end),
|
||||||
INDEX region_idx region TYPE bloom_filter
|
INDEX region_idx region TYPE bloom_filter
|
||||||
) ENGINE = ReplicatedMergeTree(
|
) ENGINE = ReplicatedMergeTree(
|
||||||
'/clickhouse/tables/{shard}/ip_region_map',
|
'/clickhouse/tables/{shard}/ip_region_map',
|
||||||
'{replica}'
|
'{replica}'
|
||||||
)
|
)
|
||||||
ORDER BY ip_range_start;
|
ORDER BY ip_range_start;
|
||||||
|
|
||||||
|
CREATE DICTIONARY ip_region_dict
|
||||||
|
(ip_range_cidr String, region String)
|
||||||
|
PRIMARY KEY ip_range_cidr
|
||||||
|
SOURCE(CLICKHOUSE(TABLE 'ip_region_map'))
|
||||||
|
LAYOUT(ip_trie)
|
||||||
|
LIFETIME(3600);
|
||||||
22
clickhouse/node-entrypoints/main/01_udf_create.sql
Normal file
22
clickhouse/node-entrypoints/main/01_udf_create.sql
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
-- https://clickhouse.com/blog/geolocating-ips-in-clickhouse-and-grafana#using-bit-functions-to-convert-ip-ranges-to-cidr-notation
|
||||||
|
|
||||||
|
CREATE FUNCTION unmatchedBits AS (ip_s, ip_e) -> if(
|
||||||
|
bitXor(ip_s, ip_e) != 0,
|
||||||
|
ceil(log2(bitXor(ip_s, ip_e))), 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE FUNCTION cidrSuffix AS (ip_s, ip_e) -> 32 - unmatchedBits(ip_s, ip_e);
|
||||||
|
|
||||||
|
CREATE FUNCTION cidrAddress AS (ip_s, ip_e) -> toIPv4(
|
||||||
|
bitAnd(
|
||||||
|
bitNot(pow(2, unmatchedBits(ip_s, ip_e)) - 1),
|
||||||
|
ip_s
|
||||||
|
)::UInt64
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE FUNCTION IPv4RangeToCIDRString AS (ip_s, ip_e) -> CONCAT(
|
||||||
|
toString(cidrAddress(ip_s, ip_e)),
|
||||||
|
'/',
|
||||||
|
toString(cidrSuffix(ip_s, ip_e))
|
||||||
|
);
|
||||||
|
|
||||||
@ -20,9 +20,17 @@ CREATE TABLE ip_region_map (
|
|||||||
ip_range_start IPv4,
|
ip_range_start IPv4,
|
||||||
ip_range_end IPv4,
|
ip_range_end IPv4,
|
||||||
region LowCardinality(String),
|
region LowCardinality(String),
|
||||||
|
ip_range_cidr String MATERIALIZED IPv4RangeToCIDRString(ip_range_start, ip_range_end),
|
||||||
INDEX region_idx region TYPE bloom_filter
|
INDEX region_idx region TYPE bloom_filter
|
||||||
) ENGINE = ReplicatedMergeTree(
|
) ENGINE = ReplicatedMergeTree(
|
||||||
'/clickhouse/tables/{shard}/ip_region_map',
|
'/clickhouse/tables/{shard}/ip_region_map',
|
||||||
'{replica}'
|
'{replica}'
|
||||||
)
|
)
|
||||||
ORDER BY ip_range_start;
|
ORDER BY ip_range_start;
|
||||||
|
|
||||||
|
CREATE DICTIONARY ip_region_dict
|
||||||
|
(ip_range_cidr String, region String)
|
||||||
|
PRIMARY KEY ip_range_cidr
|
||||||
|
SOURCE(CLICKHOUSE(TABLE 'ip_region_map'))
|
||||||
|
LAYOUT(ip_trie)
|
||||||
|
LIFETIME(3600);
|
||||||
3
clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql
Normal file
3
clickhouse/node-entrypoints/main/04_insert_geoip_csv.sql
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
INSERT INTO ip_region_map (ip_range_start, ip_range_end, region)
|
||||||
|
FROM INFILE '/var/lib/clickhouse/user_files/csv/ip_region_map.csv'
|
||||||
|
FORMAT CSVWithNames;
|
||||||
Loading…
x
Reference in New Issue
Block a user