关于sql:关系数据库中的行程时间计算?

关于sql:关系数据库中的行程时间计算?

Trip time calculation in relational databases?

我想到了这个问题,因为我刚刚发现了这个网站,所以我决定在这里发布它。

假设我有一个表,其中包含给定"对象"的时间戳和状态(一般含义,不是 OOP 对象);是否有一种最佳方法可以使用单个 SQL 语句(内部 SELECT 和 UNION 不计算在内)计算一个状态与下一次出现另一个(或相同)状态(我称之为"旅行")之间的时间?

例如:对于以下情况,Initial 和 Done 之间的行程时间为 6 天,但 Initial 和 Review 之间的行程时间为 2 天。

2008-08-01 13:30:00 - Initial
2008-08-02 13:30:00 - Work
2008-08-03 13:30:00 - Review
2008-08-04 13:30:00 - Work
2008-08-05 13:30:00 - Review
2008-08-06 13:30:00 - Accepted
2008-08-07 13:30:00 - Done

不需要是通用的,如果不是通用的,只需说明您的解决方案特定于什么 SGBD。


PostgreSQL 语法:

1
2
3
4
5
6
7
8
DROP TABLE ObjectState;
CREATE TABLE ObjectState (
    object_id INTEGER NOT NULL,--foreign key
    event_time TIMESTAMP NOT NULL,
    state VARCHAR(10) NOT NULL,
    --Other fields
    CONSTRAINT pk_ObjectState PRIMARY KEY (object_id,event_time)
);

对于给定的状态,找到给定类型的第一个后续状态

1
2
3
4
5
6
7
8
9
10
SELECT parent.object_id,parent.event_time,parent.state,MIN(child.event_time) AS ch_event_time,MIN(child.event_time)-parent.event_time AS step_time
FROM
    ObjectState parent
    JOIN ObjectState child ON (parent.object_id=child.object_id AND parent.event_timechild.event_time)
WHERE
    --Starting state
    parent.object_id=1 AND parent.event_time=to_timestamp('01-Aug-2008 13:30:00','dd-Mon-yyyy hh24:mi:ss')
    --needed state
    AND child.state='Review'
GROUP BY parent.object_id,parent.event_time,parent.state;

这个查询不是最短的,但应该很容易理解并用作其他查询的一部分:

列出给定对象的事件及其持续时间

1
2
3
4
5
6
7
8
SELECT parent.object_id,parent.event_time,parent.state,MIN(child.event_time) AS ch_event_time,
       CASE WHEN parent.state'Done' AND MIN(child.event_time) IS NULL THEN (SELECT localtimestamp)-parent.event_time ELSE MIN(child.event_time)-parent.event_time END  AS step_time
FROM
    ObjectState parent
    LEFT OUTER JOIN ObjectState child ON (parent.object_id=child.object_id AND parent.event_timechild.event_time)
WHERE parent.object_id=4    
GROUP BY parent.object_id,parent.event_time,parent.state
ORDER BY parent.object_id,parent.event_time,parent.state;

列出尚未"完成"的对象的当前状态

1
2
3
4
5
6
7
8
9
10
SELECT states.object_id,states.event_time,states.state,(SELECT localtimestamp)-states.event_time AS step_time
FROM
    (SELECT parent.object_id,parent.event_time,parent.state,MIN(child.event_time) AS ch_event_time,MIN(child.event_time)-parent.event_time AS step_time
     FROM
        ObjectState parent
        LEFT OUTER JOIN ObjectState child ON (parent.object_id=child.object_id AND parent.event_timechild.event_time)      
     GROUP BY parent.object_id,parent.event_time,parent.state) states
WHERE    
    states.object_id NOT IN (SELECT object_id FROM ObjectState WHERE state='Done')
    AND ch_event_time IS NULL;

测试数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
INSERT INTO ObjectState (object_id,event_time,state)
SELECT 1,to_timestamp('01-Aug-2008 13:30:00','dd-Mon-yyyy hh24:mi:ss'),'Initial' UNION    ALL
SELECT 1,to_timestamp('02-Aug-2008 13:40:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 1,to_timestamp('03-Aug-2008 13:50:00','dd-Mon-yyyy hh24:mi:ss'),'Review' UNION ALL
SELECT 1,to_timestamp('04-Aug-2008 14:30:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 1,to_timestamp('04-Aug-2008 16:20:00','dd-Mon-yyyy hh24:mi:ss'),'Review' UNION ALL
SELECT 1,to_timestamp('06-Aug-2008 18:00:00','dd-Mon-yyyy hh24:mi:ss'),'Accepted' UNION ALL
SELECT 1,to_timestamp('07-Aug-2008 21:30:00','dd-Mon-yyyy hh24:mi:ss'),'Done';


INSERT INTO ObjectState (object_id,event_time,state)
SELECT 2,to_timestamp('01-Aug-2008 13:30:00','dd-Mon-yyyy hh24:mi:ss'),'Initial' UNION ALL
SELECT 2,to_timestamp('02-Aug-2008 13:40:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 2,to_timestamp('07-Aug-2008 13:50:00','dd-Mon-yyyy hh24:mi:ss'),'Review' UNION ALL
SELECT 2,to_timestamp('14-Aug-2008 14:30:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 2,to_timestamp('15-Aug-2008 16:20:00','dd-Mon-yyyy hh24:mi:ss'),'Review' UNION ALL
SELECT 2,to_timestamp('16-Aug-2008 18:02:00','dd-Mon-yyyy hh24:mi:ss'),'Accepted' UNION ALL
SELECT 2,to_timestamp('17-Aug-2008 22:10:00','dd-Mon-yyyy hh24:mi:ss'),'Done';

INSERT INTO ObjectState (object_id,event_time,state)
SELECT 3,to_timestamp('12-Sep-2008 13:30:00','dd-Mon-yyyy hh24:mi:ss'),'Initial' UNION    ALL
SELECT 3,to_timestamp('13-Sep-2008 13:40:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 3,to_timestamp('14-Sep-2008 13:50:00','dd-Mon-yyyy hh24:mi:ss'),'Review' UNION   ALL
SELECT 3,to_timestamp('15-Sep-2008 14:30:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 3,to_timestamp('16-Sep-2008 16:20:00','dd-Mon-yyyy hh24:mi:ss'),'Review';


INSERT INTO ObjectState (object_id,event_time,state)
SELECT 4,to_timestamp('21-Aug-2008 03:10:00','dd-Mon-yyyy hh24:mi:ss'),'Initial' UNION ALL
SELECT 4,to_timestamp('22-Aug-2008 03:40:00','dd-Mon-yyyy hh24:mi:ss'),'Work' UNION ALL
SELECT 4,to_timestamp('23-Aug-2008 03:20:00','dd-Mon-yyyy hh24:mi:ss'),'Review' UNION ALL
SELECT 4,to_timestamp('24-Aug-2008 04:30:00','dd-Mon-yyyy hh24:mi:ss'),'Work';

这是使用分析函数的 Oracle 方法。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
WITH DATA AS (
SELECT 1 trip_id, to_date('20080801 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Initial'  step FROM dual UNION ALL
SELECT 1 trip_id, to_date('20080802 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Work'     step FROM dual  UNION ALL
SELECT 1 trip_id, to_date('20080803 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Review'   step FROM dual  UNION ALL
SELECT 1 trip_id, to_date('20080804 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Work'     step FROM dual UNION ALL
SELECT 1 trip_id, to_date('20080805 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Review'   step FROM dual  UNION ALL
SELECT 1 trip_id, to_date('20080806 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Accepted' step FROM dual  UNION ALL
SELECT 1 trip_id, to_date('20080807 13:30:00','YYYYMMDD HH24:mi:ss') dt, 'Done'     step FROM dual )
SELECT trip_id,
       step,
       dt - lag(dt) OVER (partition BY trip_id ORDER BY dt) trip_time
FROM  DATA
/


1   Initial
1   WORK        1
1   Review      1
1   WORK        1
1   Review      1
1   Accepted    1
1   Done        1

这些在传统上我们可能会使用自连接的情况下非常常用。


好吧,这有点令人讨厌,但我构建了一个 Web 应用程序来跟踪我妻子在我们生孩子之前的宫缩,这样我就可以在快到医院的时候看到工作了。无论如何,我很容易将这个基本的东西构建为两个视图。

1
2
3
4
5
6
7
8
9
10
11
12
CREATE TABLE contractions time_date TIMESTAMP PRIMARY KEY;

CREATE VIEW contraction_time AS
SELECT a.time_date, MAX(b.prev_time) AS prev_time
   FROM contractions a, ( SELECT contractions.time_date AS prev_time
           FROM contractions) b
  WHERE b.prev_time  a.time_date
  GROUP BY a.time_date;

CREATE VIEW time_between AS
SELECT contraction_time.time_date, contraction_time.prev_time, contraction_time.time_date - contraction_time.prev_time
   FROM contraction_time;

这显然也可以作为子选择来完成,但我也将中间视图用于其他事情,所以效果很好。


我尝试在 MySQL 中执行此操作。你需要使用一个变量,因为 MySQL 中没有 rank 函数,所以它会像这样:

1
2
3
4
5
6
7
SET @trip1 = 0; SET @trip2 = 0;
SELECT trip1.`date` AS startdate, datediff(trip2.`date`, trip1.`date`) length_of_trip
FROM
(SELECT @trip1 := @trip1 + 1 AS rank1, `date` FROM trip WHERE state='Initial') AS trip1
INNER JOIN
(SELECT @trip2 := @trip2 + 1 AS rank2, `date` FROM trip WHERE state='Done') AS trip2
ON rank1 = rank2;

我假设您要计算"初始"和"完成"状态之间的时间。

1
2
3
4
5
+---------------------+----------------+
| startdate           | length_of_trip |
+---------------------+----------------+
| 2008-08-01 13:30:00 |              6 |
+---------------------+----------------+

我认为您的步数(您旅行的每条记录都可以视为一个步数)可以在某处组合在一起,作为同一活动的一部分。然后可以将您的数据分组,例如:

1
2
3
4
5
6
SELECT MIN(Tbl_Step.dateTimeStep) AS tripBegin, _  
       MAX(Tbl_Step.dateTimeStep) AS tripEnd _
FROM
       Tbl_Step
WHERE
       id_Activity = 'AAAAAAA'

使用此原理,您可以计算其他聚合,例如活动中的步数等。但是您不会找到一种 SQL 方法来计算诸如两步之间的差距之类的值,因为这样的数据既不属于第一步也不属于第二步。一些报告工具使用他们所谓的"运行总和"来计算这些中间数据。根据您的目标,这可能是适合您的解决方案。


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
    -- Oracle SQl

    CREATE TABLE ObjectState
    (
        startdate DATE NOT NULL,
        state varchar2(10) NOT NULL
    );



   INSERT INTO ObjectState
   SELECT to_date('01-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Initial' UNION ALL
   SELECT to_date('02-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Work' UNION ALL
   SELECT to_date('03-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Review' UNION ALL
   SELECT to_date('04-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Work' UNION ALL
   SELECT to_date('05-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Review' UNION ALL
   SELECT to_date('06-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Accepted' UNION ALL
   SELECT to_date('07-Aug-2008 13:30:00','dd-Mon-rrrr hh24:mi:ss'),'Done';

-- Days in between two states

  SELECT  o2.startdate - o1.startdate AS days
  FROM ObjectState o1, ObjectState o2
  WHERE o1.state = 'Initial'
  AND o2.state = 'Review';

如果你有一个序列号和时间戳可能会更容易:在大多数 RDBMS 中,你可以创建一个自动增量列而不更改任何 INSERT 语句。然后,您使用自身的副本加入表以获取 deltas

1
2
3
SELECT after.moment - BEFORE.moment, BEFORE.state, after.state
FROM object_states BEFORE, object_states after
WHERE after.sequence + 1 = BEFORE.sequence

(SQL 语法的详细信息将根据数据库系统而有所不同)。


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
CREATE TABLE A (
    At datetime NOT NULL,
    State VARCHAR(20) NOT NULL
)
GO
INSERT INTO A(At,State)
SELECT '2008-08-01T13:30:00','Initial' UNION ALL
SELECT '2008-08-02T13:30:00','Work' UNION ALL
SELECT '2008-08-03T13:30:00','Review' UNION ALL
SELECT '2008-08-04T13:30:00','Work' UNION ALL
SELECT '2008-08-05T13:30:00','Review' UNION ALL
SELECT '2008-08-06T13:30:00','Accepted' UNION ALL
SELECT '2008-08-07T13:30:00','Done'
GO
--Find trip time from Initial to Done
SELECT DATEDIFF(DAY,t1.At,t2.At)
FROM
    A t1
        INNER JOIN
    A t2
        ON
            t1.State = 'Initial' AND
            t2.State = 'Review' AND
            t1.At  t2.At
        LEFT JOIN
    A t3
        ON
            t3.State = 'Initial' AND
            t3.At  t1.At AND
            t4.At  t2.At
        LEFT JOIN
    A t4
        ON
            t4.State = 'Review' AND
            t4.At  t2.At AND
            t4.At  t1.At
WHERE
    t3.At IS NULL AND
    t4.At IS NULL

没有说是否允许加入。加入 t3 和 t4(以及它们的比较)让您说出您想要最早还是最晚出现的开始和结束状态(在这种情况下,我要求最新的"初始"和最早的"审查")

在实际代码中,我的开始和结束状态将是参数

编辑:糟糕,需要包含"t3.At t2.At"和"t4.At t1.At",以修复一些奇怪的状态序列(例如,如果我们删除了第二个"Review"然后从" Work"改为"Review",原来的查询会失败)


我不确定我是否完全理解这个问题,但您可以执行以下操作,一次性读取表格,然后使用派生表格进行计算。 SQL Server 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
CREATE TABLE #testing
(
    eventdatetime datetime NOT NULL,
    state VARCHAR(10) NOT NULL
)

INSERT INTO #testing (
    eventdatetime,
    state
)
SELECT '20080801 13:30:00', 'Initial' UNION ALL
SELECT '20080802 13:30:00', 'Work' UNION ALL
SELECT '20080803 13:30:00', 'Review' UNION ALL
SELECT '20080804 13:30:00', 'Work' UNION ALL
SELECT '20080805 13:30:00', 'Review' UNION ALL
SELECT '20080806 13:30:00', 'Accepted' UNION ALL
SELECT '20080807 13:30:00', 'Done'

SELECT DATEDIFF(dd, Initial, Review)
FROM (
SELECT  MIN(CASE WHEN state='Initial' THEN eventdatetime END) AS Initial,
        MIN(CASE WHEN state='Review' THEN eventdatetime END) AS Review
FROM #testing
) AS A

DROP TABLE #testing

我不认为你可以用一条 SQL 语句得到这个答案,因为你试图从许多记录中获取一个结果。在 SQL 中实现这一点的唯一方法是获取两个不同记录的时间戳字段并计算差异 (datediff)。因此,需要 UNIONS 或 Inner Joins。


推荐阅读