reducing table to avoid space inefficiency

2020-05-01 08:13发布

问题:

I have the following table :

 id    study_start_time  study_end_time 
____________________________________________
1234          168              256
2345          175              233
1234          256              300
1234          300              389
2345          400              425
4567          200              225 

ı want to obtain the below table ;

  id    study_start_time  study_end_time 
____________________________________________
1234          168              389               

process ;

1234 starts at 168. minutes  , work until to 256. minutes
1234   "    "  256.  "       "  "    "    "  300.    "
1234   "    "  300.  "       "  "    "    "  389.    "

but actual table should be reduced to as shown below

1234 starts at 168. minutes ,  work until to 389. minutes

how can I do that ?

回答1:

Won't win an award for beauty but works:

SELECT x.c1id       id,
       MIN(c1study_start_time) study_start_time,
       MAX(c2study_end_time)   study_end_time
FROM   (SELECT c1.id    c1id,
               c1.study_start_time c1study_start_time,
               c1.study_end_time   c1study_end_time,
               c2.id    c2id,
               c2.study_start_time c2study_start_time,
               c2.study_end_time   c2study_end_time
        FROM   c c1
               inner join c c2
                 ON c2.study_start_time = c1.study_end_time) x
WHERE  x.c2id
GROUP  BY c1id

UNION ALL

SELECT x.c1id    id,
       x.c1study_start_time study_start_time,
       x.c1study_end_time   study_end_time
FROM   (SELECT c1.id    c1id,
       c1.study_start_time c1study_start_time,
       c1.study_end_time   c1study_end_time,
       c2.id    c2id,
       c2.study_start_time c2study_start_time,
       c2.study_end_time   c2study_end_time
    FROM   c c1
      left outer join c c2
      ON c2.study_start_time = c1.study_end_time) x
WHERE  x.c2id IS NULL
       AND x.c1id NOT IN (SELECT c1id
       FROM   (SELECT c1.id    c1id,



c1.study_start_time c1study_start_time,

c1.study_end_time   c1study_end_time,

c2.id    c2id,

c2.study_start_time c2study_start_time,

c2.study_end_time   c2study_end_time
FROM   c c1

inner join c c2

 ON c2.study_start_time = c1.study_end_time) x

      WHERE  x.c2id
      GROUP  BY c1id); 

The first part of the union gets you only the courses with no gaps. The second part gets the gaps and excludes the courses already received in the first part.



回答2:

Well, it appears I submitted the answer to the wrong question, and actually got points for it!!! MySQL Query Optimization - inner queries I'll repost here:

-- EXPLAIN ANALYZE
WITH RECURSIVE tree AS (
    SELECT t0.id
        , t0.study_start_time
        , t0.study_end_time
    FROM tab t0
    WHERE NOT EXISTS( SELECT * FROM tab nx WHERE nx.id=t0.id AND nx.study_end_time = t0.study_start_time)
    UNION
    SELECT tt.id
        ,tt.study_start_time
        ,t1.study_end_time
    FROM tab t1
    JOIN tree tt ON t1.id=tt.id AND t1.study_start_time = tt.study_end_time
    )
SELECT * FROM tree
WHERE NOT EXISTS( SELECT * FROM tab nx WHERE nx.id=tree.id AND tree.study_end_time = nx.study_start_time)
ORDER BY id
    ;

Results:

DROP TABLE
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "tab_pkey" for table "tab"
CREATE TABLE
CREATE INDEX
INSERT 0 15
  id  | study_start_time | study_end_time 
------+------------------+----------------
 1234 |              168 |            480
 2345 |              175 |            233
 2345 |              400 |            425
 4567 |              200 |            225
 4567 |              250 |            289
 4567 |              300 |            310
 4567 |              320 |            340
 4567 |              360 |            390
(8 rows)

Query plan:

 Merge Anti Join  (cost=16209.59..16292.13 rows=6386 width=12) (actual time=0.393..0.406 rows=8 loops=1)
   Merge Cond: ((tree.id = nx.id) AND (tree.study_end_time = nx.study_start_time))
   CTE tree
     ->  Recursive Union  (cost=0.00..15348.09 rows=8515 width=12) (actual time=0.038..0.287 rows=15 loops=1)
           ->  Merge Anti Join  (cost=0.00..175.04 rows=1455 width=12) (actual time=0.031..0.081 rows=8 loops=1)
                 Merge Cond: ((t0.id = nx.id) AND (t0.study_start_time = nx.study_end_time))
                 ->  Index Scan using tab_pkey on tab t0  (cost=0.00..77.35 rows=1940 width=12) (actual time=0.015..0.030 rows=15 loops=1)
                 ->  Index Scan using sssss on tab nx  (cost=0.00..77.35 rows=1940 width=8) (actual time=0.006..0.015 rows=14 loops=1)
           ->  Merge Join  (cost=1297.04..1500.28 rows=706 width=12) (actual time=0.021..0.028 rows=1 loops=6)
                 Merge Cond: ((t1.id = tt.id) AND (t1.study_start_time = tt.study_end_time))
                 ->  Index Scan using tab_pkey on tab t1  (cost=0.00..77.35 rows=1940 width=12) (actual time=0.004..0.008 rows=9 loops=6)
                 ->  Sort  (cost=1297.04..1333.42 rows=14550 width=12) (actual time=0.011..0.011 rows=2 loops=6)
                       Sort Key: tt.id, tt.study_end_time
                       Sort Method: quicksort  Memory: 25kB
                       ->  WorkTable Scan on tree tt  (cost=0.00..291.00 rows=14550 width=12) (actual time=0.001..0.001 rows=2 loops=6)
   ->  Sort  (cost=726.15..747.44 rows=8515 width=12) (actual time=0.342..0.346 rows=15 loops=1)
         Sort Key: tree.id, tree.study_end_time
         Sort Method: quicksort  Memory: 25kB
         ->  CTE Scan on tree  (cost=0.00..170.30 rows=8515 width=12) (actual time=0.041..0.306 rows=15 loops=1)
   ->  Sort  (cost=135.34..140.19 rows=1940 width=8) (actual time=0.037..0.040 rows=15 loops=1)
         Sort Key: nx.id, nx.study_start_time
         Sort Method: quicksort  Memory: 25kB
         ->  Seq Scan on tab nx  (cost=0.00..29.40 rows=1940 width=8) (actual time=0.005..0.012 rows=15 loops=1)
 Total runtime: 0.925 ms
(24 rows)


回答3:

Prepare sample data:

create table tab (id int, study_start_time int, study_end_time int);

insert into tab
select * from (
    select 1234 as id, 168 as study_start_time, 256 as study_end_time union all
    select 2345, 175, 233 union all
    select 1234, 256, 300 union all
    select 1234, 300, 389 union all
    select 1234, 389, 439 union all
    select 1234, 439, 460 union all
    select 1234, 460, 480 union all
    select 2345, 400, 425 union all
    select 4567, 200, 225 union all 
    select 4567, 250, 270 union all 
    select 4567, 270, 289 union all 
    select 4567, 300, 310 union all 
    select 4567, 320, 340 union all 
    select 4567, 360, 370 union all 
    select 4567, 370, 390
) t;

The way to "merge" rows:

Maybe I've overcomplicated it but the result is as expected :-). There is a chance to simplify it in SQL Server 2005+ using CTE and still get "oneliner" or use a temporary table (most RDBMS) and do it in more than one line.

select * from (
    select m1.id, m1.study_start_time, m2.study_end_time
    from (
        select t.id, t.study_start_time, t.study_end_time, 
            t2.study_end_time as et, t3.study_start_time as st
        from tab t
        left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
        left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
    ) m1
    join (
        select m1.*
        from (
            select t.id, t.study_start_time, t.study_end_time, 
                t2.study_end_time as et, t3.study_start_time as st
            from tab t
            left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
            left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
        ) m1
        where m1.et is null and m1.st is not null
    ) m2 on m1.id = m2.id and m2.study_end_time = (
        select min(study_end_time) 
        from (
            select t.id, t.study_start_time, t.study_end_time, 
                t2.study_end_time as et, t3.study_start_time as st
            from tab t
            left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
            left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
        ) m3 
        where m3.id = m1.id and m3.study_end_time >= m1.study_end_time and m3.et is null and m3.st is not null
    )
    where m1.et is not null and m1.st is null
    union 
    select id, study_start_time, study_end_time 
    from (
        select t.id, t.study_start_time, t.study_end_time, 
            t2.study_end_time as et, t3.study_start_time as st
        from tab t
        left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
        left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
    ) m3 where  m3.et is null and m3.st is null
) tab
order by id, study_start_time;

Final result:

id          study_start_time study_end_time
----------- ---------------- --------------
1234        168              480
2345        175              233
2345        400              425
4567        200              225
4567        250              289
4567        300              310
4567        320              340
4567        360              390


回答4:

 (Select id, study_start_time where MIN(study_start_time) from (Select id, 
 study_start_time from table)) UNION 
 (Select id, study_end_time where
 MAX(study_end_time) from (Select id, study_end_time from table)