Saturday, March 30, 2019

How to detect changed rows and Bound Changes Between Row Values in Table?


Input Data

emp_id WorkingDay first_name Department UpdateTimeStamp
1 3/03/2019 EMP1 D1 3/04/2019 12:01:31
1 3/03/2019 EMP1 D1 3/04/2019 12:11:31
1 3/03/2019 EMP2 D2 3/04/2019 12:21:31
1 3/03/2019 EMP1 D2 3/04/2019 12:26:31
1 3/03/2019 EMP1 D3 3/04/2019 12:29:31
1 3/03/2019 EMP1 D3 3/04/2019 12:31:31
2 3/03/2019 EMP2 D1 3/04/2019 12:01:31
2 3/03/2019 EMP2 D1 3/04/2019 12:11:31
2 3/03/2019 EMP2 D1 3/04/2019 12:21:31
2 3/03/2019 EMP2 D1 3/04/2019 12:31:31
1 2/03/2019 EMP1 D1 2/04/2019 12:01:31
1 2/03/2019 EMP1 D1 2/04/2019 12:11:31
1 2/03/2019 EMP1 D2 2/04/2019 12:21:31
1 2/03/2019 EMP1 D2 2/04/2019 12:26:31
1 2/03/2019 EMP1 D3 2/04/2019 12:31:31





DROP TABLE employee;


CREATE TABLE employee (emp_id INT , WorkingDay date NOT NULL,
                                                    first_name VARCHAR(30) NOT NULL,
                                                                           Department varchar(10) NOT NULL,
                                                                                                  UpdateTimeStamp datetime NOT NULL);


INSERT INTO employee
VALUES (1,'2019-03-03','EMP1','D1',DATEADD(MINUTE,-30, GETDATE())) ,
       (1,'2019-03-03','EMP1','D1',DATEADD(MINUTE,-20, GETDATE())) ,
       (1,'2019-03-03','EMP2','D2',DATEADD(MINUTE,-10, GETDATE())) ,
       (1,'2019-03-03','EMP1','D2',DATEADD(MINUTE,-5, GETDATE())) ,
       (1,'2019-03-03','EMP1','D3',DATEADD(MINUTE,-2, GETDATE())) ,
       (1,'2019-03-03','EMP1','D3',DATEADD(MINUTE,-0, GETDATE())) ,
       (2,'2019-03-03','EMP2','D1',DATEADD(MINUTE,-30, GETDATE())) ,
       (2,'2019-03-03','EMP2','D1',DATEADD(MINUTE,-20, GETDATE())) ,
       (2,'2019-03-03','EMP2','D1',DATEADD(MINUTE,-10, GETDATE())) ,
       (2,'2019-03-03','EMP2','D1',DATEADD(MINUTE,-0, GETDATE())) ,
       (1,'2019-03-02','EMP1','D1',dateAdd(DAY,-1,(DATEADD(MINUTE,-30, GETDATE())))) ,
       (1,'2019-03-02','EMP1','D1',dateAdd(DAY,-1,(DATEADD(MINUTE,-20, GETDATE())))) ,
       (1,'2019-03-02','EMP1','D2',dateAdd(DAY,-1,(DATEADD(MINUTE,-10, GETDATE())))) ,
       (1,'2019-03-02','EMP1','D2',dateAdd(DAY,-1,(DATEADD(MINUTE,-5, GETDATE())))) ,
       (1,'2019-03-02','EMP1','D3',dateAdd(DAY,-1,(DATEADD(MINUTE,-0, GETDATE()))));

WITH GroupedData AS
  (SELECT emp_id,
          WorkingDay,
          first_name,
          Department,
          UpdateTimeStamp,
          dense_rank() OVER (
                             ORDER BY emp_id,
                                      WorkingDay) Group1 --making a group to identify which record to compare with in a Group
,
                            ROW_NUMBER() OVER (
                                               ORDER BY emp_id,
                                                        WorkingDay,
                                                        first_name,
                                                        Department,
                                                        UpdateTimeStamp) rownum
   FROM employee --order by Group1
),
     FirstRowOfChangeData AS
  (SELECT g1.*,
          ROW_NUMBER() OVER (
                             ORDER BY g1.emp_id,
                                      g1.WorkingDay,
                                      g1.first_name,
                                      g1.Department,
                                      g1.UpdateTimeStamp) rownum1 -- another rownum over full dataset to compare 1 row with next one when doing self join

   FROM GroupedData g1
   LEFT OUTER JOIN GroupedData g2 ON g1.Group1=g2.Group1
   AND g1.emp_id=g2.emp_id
   AND g1.WorkingDay=g2.WorkingDay
   AND g1.emp_id=g2.emp_id
   AND g1.Department=g2.Department
   AND g2.rownum=g1.rownum-1 --Join rownumber with 1 less rownumber to detect the row where data is changed.

   WHERE g2.emp_id IS NULL -- pick the 1st row where data was changed.
 )
SELECT f1.*,
       f2.UpdateTimeStamp
FROM FirstRowOfChangeData f1
LEFT OUTER JOIN FirstRowOfChangeData f2 ON f2.rownum1=f1.rownum1+1
AND f2.Group1=f1.Group1 --now lets calculate how much time each employe Spent in a department each day.

Output Format


emp_id WorkingDay first_name Department StartTime EndTime
1 2/03/2019   EMP1 D1 2/04/2019 12:01:31 2/04/2019 12:21:31
1 2/03/2019   EMP1 D2 2/04/2019 12:21:31 2/04/2019 12:31:31
1 2/03/2019   EMP1 D3 2/04/2019 12:31:31 NULL
1 3/03/2019   EMP1 D1 3/04/2019 12:01:31 3/04/2019 12:26:31
1 3/03/2019   EMP1 D2 3/04/2019 12:26:31 3/04/2019 12:29:31
1 3/03/2019   EMP1 D3 3/04/2019 12:29:31 3/04/2019 12:21:31
1 3/03/2019   EMP2 D2 3/04/2019 12:21:31 NULL
2 3/03/2019   EMP2 D1 3/04/2019 12:01:31 NULL

You could use the above approach close endTime where it's NULL when the day ends.


No comments:

Post a Comment

Recently Executed queries

 SELECT     txt.TEXT AS [SQL Statement],     qs.EXECUTION_COUNT [No. Times Executed],     qs.LAST_EXECUTION_TIME AS [Last Time Executed],   ...