Sunday, November 30, 2014

Поиск полного адреса в КЛАДР

Зафиксируем формат поисковой строки.

S -> (([тип адреса] [значение адреса])|([значение адреса] [тип адреса]))O
O -> ((,[тип адреса] [значение адреса])|(,[значение адреса] [тип адреса]))O|ε

Для примера выберем адрес: Московская обл, г Жуковский, ул Дугина, дом 10

Создадим индексы
  1. CREATE UNIQUE INDEX W_LOCALITY_CODE_LEVEL_IDX ON
    W_LOCALITY(
    LOWER(NAME),
    get_locality_level(KLADR_ID),
    ID);
  2. CREATE UNIQUE INDEX W_LOCALITY_PARENT_TYPE_NAME_IDX ON
    W_LOCALITY(
    PARENT_ID,
    W_LOCALITY_TYPE_ID,
    LOWER(NAME),
    ID);
  3. CREATE UNIQUE INDEX W_LOCALITY_NAME_IDX ON
    W_LOCALITY(
    LOWER(NAME),
    ID);
  4. CREATE UNIQUE INDEX W_LOCALITY_TYPE_IDX ON
    W_LOCALITY_TYPE(
    ID,
    LOWER(NAME));
Создадим функцию

CREATE OR REPLACE FUNCTION get_locality_level(v_code varchar)
RETURNS integer AS $$
DECLARE
v_res integer := NULL;
BEGIN
IF v_code IS NOT NULL THEN
CASE
WHEN char_length(v_code)=13 THEN
BEGIN
v_res := 0;

IF substring(v_code from 1 for 2)<>lpad('', 2, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 3 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 6 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 9 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;
END;

WHEN char_length(v_code)=17 THEN
BEGIN
v_res := 0;

IF substring(v_code from 1 for 2)<>lpad('', 2, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 3 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 6 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 9 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 12 for 4)<>lpad('', 4, '0') THEN
v_res := v_res + 1;
END IF;
END;

WHEN char_length(v_code)=19 THEN
BEGIN
v_res := 0;

IF substring(v_code from 1 for 2)<>lpad('', 2, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 3 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 6 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 9 for 3)<>lpad('', 3, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 12 for 4)<>lpad('', 4, '0') THEN
v_res := v_res + 1;
END IF;

IF substring(v_code from 16 for 4)<>lpad('', 4, '0') THEN
v_res := v_res + 1;
END IF;
END;

ELSE
v_res := NULL;
END CASE;
END IF;

RETURN v_res;
END;
$$ LANGUAGE plpgsql IMMUTABLE;

Запрос для поиска адреса

WITH RECURSIVE T1 AS (
select
TRIM(
unnest(
string_to_array(
'Московская обл, г Жуковский, ул Дугина, дом 10', ','))) as PART),
T2 AS (
SELECT
row_number() over() as RN,
string_to_array(PART, ' ') AS PART
FROM
T1
WHERE
array_length(string_to_array(PART, ' '), 1) = 2),
T2_1 AS (
SELECT COUNT(*) AS CNT FROM T1),
T2_2 AS (
SELECT COUNT(*) AS CNT FROM T2),
T2_3 AS (
SELECT
T2.RN AS RN,
W_LOCALITY.ID AS ID,
W_LOCALITY_TYPE.NAME AS PREFIX,
W_LOCALITY.NAME AS NAME,
W_LOCALITY.PARENT_ID AS PARENT_ID
FROM
W_LOCALITY INNER JOIN
W_LOCALITY_TYPE ON W_LOCALITY_TYPE.ID=W_LOCALITY.W_LOCALITY_TYPE_ID, T2, T2_1, T2_2
WHERE
get_locality_level(W_LOCALITY.KLADR_ID) < 3 AND
((LOWER(W_LOCALITY_TYPE.NAME)=LOWER(TRIM(T2.PART[1])) AND
LOWER(W_LOCALITY.NAME)=LOWER(TRIM(T2.PART[2]))) OR
(LOWER(W_LOCALITY_TYPE.NAME)=LOWER(TRIM(T2.PART[2])) AND
LOWER(W_LOCALITY.NAME)=LOWER(TRIM(T2.PART[1])))) AND
T2_1.CNT=T2_2.CNT),
T3 AS (
SELECT
T2_3.ID AS ID,
T2_3.PREFIX AS PREFIX,
T2_3.NAME AS NAME,
T2_3.PARENT_ID AS PARENT_ID
FROM
T2_3),
T4(ID, PARENT_ID, LEVEL) AS (
SELECT
ID,
PARENT_ID,
1 AS LEVEL
FROM
T3
UNION ALL
SELECT
T4.ID AS ID,
W_LOCALITY.PARENT_ID AS PARENT_ID,
T4.LEVEL + 1 AS LEVEL
FROM
W_LOCALITY INNER JOIN
T4 ON T4.PARENT_ID=W_LOCALITY.ID
WHERE
T4.PARENT_ID IS NOT NULL AND
T4.PARENT_ID IN (SELECT ID FROM T3)),
T5(ID, LEVEL) AS (
SELECT
ID,
MAX(LEVEL) AS LEVEL
FROM
T4
WHERE
PARENT_ID IN (SELECT ID FROM T3)
GROUP BY ID),
T6 AS (
SELECT COUNT(*) AS CNT FROM T3),
T7 AS (
SELECT
T5.ID AS ID,
T5.LEVEL AS LEVEL
FROM
W_LOCALITY INNER JOIN
T5 ON T5.ID=W_LOCALITY.ID,
T6
WHERE
(T6.CNT > 1 AND
   (SELECT ID FROM T3 WHERE T3.ID=W_LOCALITY.PARENT_ID) IS NOT NULL) OR
T6.CNT=1),
T8 AS (
SELECT
ID,
LEVEL
FROM
T7
WHERE
(SELECT ID FROM T4 WHERE PARENT_ID=T7.ID LIMIT 1) IS NULL),
T9(ROOT_ID, ID, PARENT_ID, LEVEL) AS (
SELECT
T8.ID AS ROOT_ID,
W_LOCALITY.ID AS ID,
W_LOCALITY.PARENT_ID AS PARENT_ID,
T8.LEVEL AS LEVEL
FROM
T8 INNER JOIN
W_LOCALITY ON T8.ID=W_LOCALITY.ID
UNION ALL
SELECT
T9.ROOT_ID AS ROOT_ID,
W_LOCALITY.ID AS ID,
W_LOCALITY.PARENT_ID AS PARENT_ID,
T9.LEVEL - 1 AS LEVEL
FROM
W_LOCALITY INNER JOIN
T9 ON W_LOCALITY.ID=T9.PARENT_ID
WHERE
T9.PARENT_ID IS NOT NULL),
T9_1 AS (
SELECT
RN,
PART
FROM
T2
WHERE
T2.RN NOT IN (SELECT RN FROM T2_3 INNER JOIN T9 ON T9.ID=T2_3.ID)),
T9_2 AS (
SELECT
MAX(LEVEL) AS LEVEL
FROM
T9),
T9_3 AS (
SELECT
T9.ROOT_ID AS ROOT_ID,
ID AS ID,
T9.LEVEL + 1 AS LEVEL
FROM
T9, T9_2
WHERE
T9.LEVEL=T9_2.LEVEL),
T9_4 AS (
SELECT
T9_1.RN AS RN,
T9_3.ROOT_ID AS ROOT_ID,
W_LOCALITY.ID AS ID,
W_LOCALITY.PARENT_ID AS PARENT_ID,
T9_3.LEVEL AS LEVEL
FROM
W_LOCALITY INNER JOIN
W_LOCALITY_TYPE ON W_LOCALITY_TYPE.ID=W_LOCALITY.W_LOCALITY_TYPE_ID INNER JOIN
T9_3 ON W_LOCALITY.PARENT_ID=T9_3.ID, T9_1
WHERE
((LOWER(W_LOCALITY_TYPE.NAME)=LOWER(TRIM(T9_1.PART[1])) AND
   LOWER(W_LOCALITY.NAME)=LOWER(TRIM(T9_1.PART[2]))) OR
(LOWER(W_LOCALITY_TYPE.NAME)=LOWER(TRIM(T9_1.PART[2])) AND
   LOWER(W_LOCALITY.NAME)=LOWER(TRIM(T9_1.PART[1]))))),
T9_5 AS (
SELECT
T9.ROOT_ID AS ROOT_ID,
T9.ID AS ID,
T9.PARENT_ID AS PARENT_ID,
T9.LEVEL AS LEVEL
FROM
T9
UNION ALL
SELECT
T9_4.ROOT_ID AS ROOT_ID,
T9_4.ID AS ID,
T9_4.PARENT_ID AS PARENT_ID,
T9_4.LEVEL AS LEVEL
FROM T9_4),
T9_6 AS (
SELECT
RN,
PART
FROM
T2
WHERE
T2.RN NOT IN (SELECT RN FROM T2_3 INNER JOIN T9 ON T9.ID=T2_3.ID)),
T9_7 AS (
SELECT
T9_6.RN AS RN,
T9_4.ROOT_ID AS ROOT_ID,
W_LOCALITY.ID AS ID,
W_LOCALITY.PARENT_ID AS PARENT_ID,
T9_4.LEVEL + 1 AS LEVEL
FROM
W_LOCALITY INNER JOIN
W_LOCALITY_TYPE ON W_LOCALITY_TYPE.ID=W_LOCALITY.W_LOCALITY_TYPE_ID INNER JOIN
T9_4 ON W_LOCALITY.PARENT_ID=T9_4.ID, T9_6
WHERE
((LOWER(W_LOCALITY_TYPE.NAME)=LOWER(TRIM(T9_6.PART[1])) AND
   LOWER(W_LOCALITY.NAME)=LOWER(TRIM(T9_6.PART[2]))) OR
(LOWER(W_LOCALITY_TYPE.NAME)=LOWER(TRIM(T9_6.PART[2])) AND
   LOWER(W_LOCALITY.NAME)=LOWER(TRIM(T9_6.PART[1]))))),
T9_8 AS (
SELECT
T9_5.ROOT_ID AS ROOT_ID,
T9_5.ID AS ID,
T9_5.PARENT_ID AS PARENT_ID,
T9_5.LEVEL AS LEVEL
FROM
T9_5
UNION ALL
SELECT
T9_7.ROOT_ID AS ROOT_ID,
T9_7.ID AS ID,
T9_7.PARENT_ID AS PARENT_ID,
T9_7.LEVEL AS LEVEL
FROM
T9_7),
T9_9 AS (
SELECT
ROOT_ID,
MAX(LEVEL) AS LEVEL
FROM
T9_8
GROUP BY ROOT_ID
), T9_10(ROOT_ID, ID, PARENT_ID, LEVEL) AS ( SELECT
T9_8.ROOT_ID AS ROOT_ID,
T9_8.ID AS ID,
T9_8.PARENT_ID AS PARENT_ID,
T9_8.LEVEL AS LEVEL
FROM
T9_8 INNER JOIN
T9_9 ON (T9_9.ROOT_ID=T9_8.ROOT_ID AND T9_9.LEVEL=T9_8.LEVEL)
UNION ALL
SELECT
T9_10.ROOT_ID AS ROOT_ID,
T9_8.ID AS ID,
T9_8.PARENT_ID AS PARENT_ID,
T9_8.LEVEL AS LEVEL
FROM
T9_10 INNER JOIN
T9_8 ON (T9_10.PARENT_ID=T9_8.ID AND T9_10.ROOT_ID=T9_8.ROOT_ID)),
T10 AS (
SELECT
1 AS ID
FROM
T6
WHERE
(SELECT COUNT(*) FROM T9_10)=(SELECT COUNT(*) FROM T1))
SELECT
T9_10.ROOT_ID as ROOT_ID,
T9_10.ID AS ID,
T9_10.PARENT_ID AS PARENT_ID,
T9_10.LEVEL AS LEVEL
W_LOCALITY_VIEW.NAME AS NAME,
W_LOCALITY_VIEW.PREFIX AS PREFIX
FROM
T9_10 INNER JOIN W_LOCALITY_VIEW ON W_LOCALITY_VIEW.ID=T9_10.ID, T10
WHERE
T10.ID=1;

Скорость выполнения - 33.126 мсек

Размер таблиц:

W_LOCALITY23,244,680
W_LOCALITY_TYPE114

Вывод данных:

ROOT_IDIDPARENT_IDLEVELNAMEPREFIX
65487146964151037965310ДОМ
654871037965654872Дугинаул
65487654871958361Жуковскийг
65487195836NULL0Московскаяобл

Скрипты для импорта КЛАДР (github)

No comments:

Post a Comment