Неправильное заполнение материализованного представления

Таблица test_sessions

CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,
    name String,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

Данные таблицы test_sessions

INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop',  now() + INTERVAL 1 day),
(2, 'start',  now() + INTERVAL 1 HOUR );

+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
| 1  | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2  | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1  | stop  | 2020-11-11 07:58:19 |
+----+-------+---------------------+

Материализованное представление "finished_sessions"

CREATE MATERIALIZED VIEW finished_sessions (
    id UInt64,
    start_at DateTime,
    end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
    id,
    minIf(created_at, name = 'start') AS start_at,
    maxIf(created_at, name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';

Материализованные данные просмотра "finished_sessions"

SELECT * FROM finished_sessions;

+----+---------------------+---------------------+
| id | start_at            | end_at              |
+----+---------------------+---------------------+
| 1  | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+

До этого момента все работает корректно: есть только 1 закрытый сеанс

После закрытия второй сессии

INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())

Произошло неправильное заполнение

SELECT * from finished_sessions ORDER BY id;

+----+-------------------------------+---------------------+
| id | start_at                      | end_at              |
+----+-------------------------------+---------------------+
| 1  | 2020-11-10 07:58:19           | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2  | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+

Как это исправить?


person cetver    schedule 10.11.2020    source источник


Ответы (1)


  1. Вам следует использовать AggregateFunction или лучше SimpleAggregateFunction

  2. Невозможно разделить таблицу с помощью AggregateFunction. Поскольку AggregateFunction вычисляется во время слияния, а слияние выполняется по разделу.

  3. MV - это триггер вставки. https://youtu.be/ckChUkC3Pns?hl=ru&hl=ru = "https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf" rel = "nofollow noreferrer"> https://den-crane.github.io/Everything_you_should_know_about_materialized_viewdfs_commented.pdf

    CREATE TABLE IF NOT EXISTS test_sessions (
        id UInt64,
        name String,
        created_at DateTime
    )
    ENGINE = MergeTree()
    PARTITION BY toYYYYMM(created_at)
    ORDER BY name;
    
    INSERT INTO test_sessions(id, name, created_at) VALUES
    (1, 'start', now()),
    (1, 'stop',  now() + INTERVAL 1 day),
    (2, 'start',  now() + INTERVAL 1 HOUR );
    
    CREATE MATERIALIZED VIEW finished_sessions
    ENGINE = AggregatingMergeTree
    ORDER BY (id)
    POPULATE AS
    SELECT
        id, 
        minStateIf(created_at, name = 'start') AS start_at,
        maxStateIf(created_at, name = 'stop')  AS end_at
    FROM test_sessions
    GROUP BY id
    
    INSERT INTO test_sessions(id, name, created_at) VALUES
    (2, 'stop', now());
    
    SELECT
        id,
        minMerge(start_at),
        maxMerge(end_at)
    FROM finished_sessions
    GROUP BY id
    
    Query id: d797eee4-6088-40b8-aa12-b10da62b60c5
    
    ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐
    │  2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │
    │  1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │
    └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,
    name String,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop',  now() + INTERVAL 1 day),
(2, 'start',  now() + INTERVAL 1 HOUR );

CREATE MATERIALIZED VIEW finished_sessions
(
    id UInt64,
    start_at SimpleAggregateFunction(min,DateTime),
    end_at SimpleAggregateFunction(max,DateTime)
)
ENGINE = AggregatingMergeTree
ORDER BY (id)
POPULATE AS
SELECT
    id, 
    minIf(created_at, name = 'start') AS start_at,
    maxIf(created_at, name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id;


INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())

optimize table finished_sessions final;


SELECT
        id,
        min(start_at),
        max(end_at)
    FROM finished_sessions
    GROUP BY id

┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐
│  2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │
│  1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │
└────┴─────────────────────┴─────────────────────┘    
person Denny Crane    schedule 10.11.2020
comment
пришел к такому же решению: mv body AggregateFunction(minIf, DateTime, UInt8), mv select minIfState(created_at, name = 'start'). В приведенном примере лучше SimpleAggregateFunction работает некорректно, знаете почему? - person cetver; 10.11.2020