I have the following table :
id study_start_time study_end_time
____________________________________________
1234 168 256
2345 175 233
1234 256 300
1234 300 389
2345 400 425
4567 200 225
ı want to obtain the below table ;
id study_start_time study_end_time
____________________________________________
1234 168 389
process ;
1234 starts at 168. minutes , work until to 256. minutes
1234 " " 256. " " " " " 300. "
1234 " " 300. " " " " " 389. "
but actual table should be reduced to as shown below
1234 starts at 168. minutes , work until to 389. minutes
how can I do that ?
Won't win an award for beauty but works:
SELECT x.c1id id,
MIN(c1study_start_time) study_start_time,
MAX(c2study_end_time) study_end_time
FROM (SELECT c1.id c1id,
c1.study_start_time c1study_start_time,
c1.study_end_time c1study_end_time,
c2.id c2id,
c2.study_start_time c2study_start_time,
c2.study_end_time c2study_end_time
FROM c c1
inner join c c2
ON c2.study_start_time = c1.study_end_time) x
WHERE x.c2id
GROUP BY c1id
UNION ALL
SELECT x.c1id id,
x.c1study_start_time study_start_time,
x.c1study_end_time study_end_time
FROM (SELECT c1.id c1id,
c1.study_start_time c1study_start_time,
c1.study_end_time c1study_end_time,
c2.id c2id,
c2.study_start_time c2study_start_time,
c2.study_end_time c2study_end_time
FROM c c1
left outer join c c2
ON c2.study_start_time = c1.study_end_time) x
WHERE x.c2id IS NULL
AND x.c1id NOT IN (SELECT c1id
FROM (SELECT c1.id c1id,
c1.study_start_time c1study_start_time,
c1.study_end_time c1study_end_time,
c2.id c2id,
c2.study_start_time c2study_start_time,
c2.study_end_time c2study_end_time
FROM c c1
inner join c c2
ON c2.study_start_time = c1.study_end_time) x
WHERE x.c2id
GROUP BY c1id);
The first part of the union gets you only the courses with no gaps.
The second part gets the gaps and excludes the courses already received
in the first part.
Well, it appears I submitted the answer to the wrong question, and actually got points for it!!! MySQL Query Optimization - inner queries I'll repost here:
-- EXPLAIN ANALYZE
WITH RECURSIVE tree AS (
SELECT t0.id
, t0.study_start_time
, t0.study_end_time
FROM tab t0
WHERE NOT EXISTS( SELECT * FROM tab nx WHERE nx.id=t0.id AND nx.study_end_time = t0.study_start_time)
UNION
SELECT tt.id
,tt.study_start_time
,t1.study_end_time
FROM tab t1
JOIN tree tt ON t1.id=tt.id AND t1.study_start_time = tt.study_end_time
)
SELECT * FROM tree
WHERE NOT EXISTS( SELECT * FROM tab nx WHERE nx.id=tree.id AND tree.study_end_time = nx.study_start_time)
ORDER BY id
;
Results:
DROP TABLE
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "tab_pkey" for table "tab"
CREATE TABLE
CREATE INDEX
INSERT 0 15
id | study_start_time | study_end_time
------+------------------+----------------
1234 | 168 | 480
2345 | 175 | 233
2345 | 400 | 425
4567 | 200 | 225
4567 | 250 | 289
4567 | 300 | 310
4567 | 320 | 340
4567 | 360 | 390
(8 rows)
Query plan:
Merge Anti Join (cost=16209.59..16292.13 rows=6386 width=12) (actual time=0.393..0.406 rows=8 loops=1)
Merge Cond: ((tree.id = nx.id) AND (tree.study_end_time = nx.study_start_time))
CTE tree
-> Recursive Union (cost=0.00..15348.09 rows=8515 width=12) (actual time=0.038..0.287 rows=15 loops=1)
-> Merge Anti Join (cost=0.00..175.04 rows=1455 width=12) (actual time=0.031..0.081 rows=8 loops=1)
Merge Cond: ((t0.id = nx.id) AND (t0.study_start_time = nx.study_end_time))
-> Index Scan using tab_pkey on tab t0 (cost=0.00..77.35 rows=1940 width=12) (actual time=0.015..0.030 rows=15 loops=1)
-> Index Scan using sssss on tab nx (cost=0.00..77.35 rows=1940 width=8) (actual time=0.006..0.015 rows=14 loops=1)
-> Merge Join (cost=1297.04..1500.28 rows=706 width=12) (actual time=0.021..0.028 rows=1 loops=6)
Merge Cond: ((t1.id = tt.id) AND (t1.study_start_time = tt.study_end_time))
-> Index Scan using tab_pkey on tab t1 (cost=0.00..77.35 rows=1940 width=12) (actual time=0.004..0.008 rows=9 loops=6)
-> Sort (cost=1297.04..1333.42 rows=14550 width=12) (actual time=0.011..0.011 rows=2 loops=6)
Sort Key: tt.id, tt.study_end_time
Sort Method: quicksort Memory: 25kB
-> WorkTable Scan on tree tt (cost=0.00..291.00 rows=14550 width=12) (actual time=0.001..0.001 rows=2 loops=6)
-> Sort (cost=726.15..747.44 rows=8515 width=12) (actual time=0.342..0.346 rows=15 loops=1)
Sort Key: tree.id, tree.study_end_time
Sort Method: quicksort Memory: 25kB
-> CTE Scan on tree (cost=0.00..170.30 rows=8515 width=12) (actual time=0.041..0.306 rows=15 loops=1)
-> Sort (cost=135.34..140.19 rows=1940 width=8) (actual time=0.037..0.040 rows=15 loops=1)
Sort Key: nx.id, nx.study_start_time
Sort Method: quicksort Memory: 25kB
-> Seq Scan on tab nx (cost=0.00..29.40 rows=1940 width=8) (actual time=0.005..0.012 rows=15 loops=1)
Total runtime: 0.925 ms
(24 rows)
Prepare sample data:
create table tab (id int, study_start_time int, study_end_time int);
insert into tab
select * from (
select 1234 as id, 168 as study_start_time, 256 as study_end_time union all
select 2345, 175, 233 union all
select 1234, 256, 300 union all
select 1234, 300, 389 union all
select 1234, 389, 439 union all
select 1234, 439, 460 union all
select 1234, 460, 480 union all
select 2345, 400, 425 union all
select 4567, 200, 225 union all
select 4567, 250, 270 union all
select 4567, 270, 289 union all
select 4567, 300, 310 union all
select 4567, 320, 340 union all
select 4567, 360, 370 union all
select 4567, 370, 390
) t;
The way to "merge" rows:
Maybe I've overcomplicated it but the result is as expected :-). There is a chance to simplify it in SQL Server 2005+ using CTE and still get "oneliner" or use a temporary table (most RDBMS) and do it in more than one line.
select * from (
select m1.id, m1.study_start_time, m2.study_end_time
from (
select t.id, t.study_start_time, t.study_end_time,
t2.study_end_time as et, t3.study_start_time as st
from tab t
left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
) m1
join (
select m1.*
from (
select t.id, t.study_start_time, t.study_end_time,
t2.study_end_time as et, t3.study_start_time as st
from tab t
left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
) m1
where m1.et is null and m1.st is not null
) m2 on m1.id = m2.id and m2.study_end_time = (
select min(study_end_time)
from (
select t.id, t.study_start_time, t.study_end_time,
t2.study_end_time as et, t3.study_start_time as st
from tab t
left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
) m3
where m3.id = m1.id and m3.study_end_time >= m1.study_end_time and m3.et is null and m3.st is not null
)
where m1.et is not null and m1.st is null
union
select id, study_start_time, study_end_time
from (
select t.id, t.study_start_time, t.study_end_time,
t2.study_end_time as et, t3.study_start_time as st
from tab t
left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
) m3 where m3.et is null and m3.st is null
) tab
order by id, study_start_time;
Final result:
id study_start_time study_end_time
----------- ---------------- --------------
1234 168 480
2345 175 233
2345 400 425
4567 200 225
4567 250 289
4567 300 310
4567 320 340
4567 360 390
(Select id, study_start_time where MIN(study_start_time) from (Select id,
study_start_time from table)) UNION
(Select id, study_end_time where
MAX(study_end_time) from (Select id, study_end_time from table)