Automatic transfer data from the sql to R

2019-07-26 01:20发布

问题:

I get the data from the sql server to perform regression analysis, and then the regression results i return back to another sql table.

library("RODBC")
library(sqldf)

dbHandle <- odbcDriverConnect("driver={SQL Server};server=MYSERVER;database=MYBASE;trusted_connection=true")

sql <- 
  "select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo

from dbo.mytable"

df <- sqlQuery(dbHandle, sql)

reg=lm(SaleCount~IsPromo,data=df)

#save to sql table
sqlSave(dbHandle, as.data.frame(reg), "dbo.mytableforecast", verbose = TRUE)  # use "append = TRUE" to add rows to an existing table

odbcClose(dbHandle)

The question:

The script works automatically, i.e. in the scheduler there is task that script in certain time was launched. How to do that in the next time, when the script runs, it must work not with all table, but only the data that was loaded in sql?

For example, today was loaded 100 observations. From 01.01.2017-10.04.2017 Script performed regression and returned data to sql table. Tomorrow will loaded new 100 observations. 11.04.2017-20.07.2017 I.E. when tomorrow the data will loaded and the script will start at 10 pm, it must work only with data from 11.04.2017-20.07.2017, and not from 01.01.2017-20.07.2017

How can I do this?

data example from sql

df=structure(list(Dt = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 
15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 
18L, 19L), .Label = c("2017-10-12 00:00:00.000", "2017-10-13 00:00:00.000", 
"2017-10-14 00:00:00.000", "2017-10-15 00:00:00.000", "2017-10-16 00:00:00.000", 
"2017-10-17 00:00:00.000", "2017-10-18 00:00:00.000", "2017-10-19 00:00:00.000", 
"2017-10-20 00:00:00.000", "2017-10-21 00:00:00.000", "2017-10-22 00:00:00.000", 
"2017-10-23 00:00:00.000", "2017-10-24 00:00:00.000", "2017-10-25 00:00:00.000", 
"2017-10-26 00:00:00.000", "2017-10-27 00:00:00.000", "2017-10-28 00:00:00.000", 
"2017-10-29 00:00:00.000", "2017-10-30 00:00:00.000"), class = "factor"), 
    CustomerName = structure(c(1L, 11L, 12L, 13L, 14L, 15L, 16L, 
    17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 11L, 12L, 
    13L, 14L, 15L, 16L, 17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
    9L, 10L), .Label = c("x1", "x10", "x11", "x12", "x13", "x14", 
    "x15", "x16", "x17", "x18", "x2", "x3", "x4", "x5", "x6", 
    "x7", "x8", "x9"), class = "factor"), ItemRelation = c(13322L, 
    13322L, 13322L, 13322L, 13322L, 13322L, 13322L, 11706L, 13322L, 
    11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 
    11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13163L, 
    13322L, 158010L, 11706L, 13163L, 13322L, 158010L, 11706L, 
    13163L, 13322L, 158010L, 11706L), SaleCount = c(10L, 3L, 
    1L, 0L, 9L, 5L, 5L, 11L, 7L, 0L, 5L, 11L, 1L, 0L, 0L, 19L, 
    10L, 0L, 1L, 12L, 1L, 11L, 6L, 0L, 167L, 7L, 0L, 16L, 165L, 
    1L, 0L, 0L, 29L, 0L, 0L, 11L), DocumentNum = c(36L, 36L, 
    36L, 36L, 36L, 36L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 
    36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 131L, 36L, 
    89L, 51L, 131L, 36L, 89L, 51L, 131L, 36L, 89L, 51L), DocumentYear = c(2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L), 
    IsPromo = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Dt", "CustomerName", 
"ItemRelation", "SaleCount", "DocumentNum", "DocumentYear", "IsPromo"
), class = "data.frame", row.names = c(NA, -36L))

回答1:

Add a date to your result sql table dbo.mytableforecast

Change the select SQL statement such that it only selects data after

select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo

from dbo.mytable

where Dt > (select max(Dt) from dbo.mytableforecast)