Microsoft R Server Row by Row Insert

2019-09-15 11:13发布

问题:

I have a for loop that usually writes to a flat file. This way, if anything breaks, I can start where I left off. I want to convert this process to read and write from a SQL table directly using the new RevoScaleR functions in a SQL Server 2016 stored procedure that executes my R code.

Here is a simple SPROC:

USE [master]
GO

/****** Object:  StoredProcedure [dbo].[Rscript_geocodeUSACities_TEST]    Script Date: 8/8/2017 11:40:40 AM ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO




CREATE PROCEDURE [dbo].[Rscript_geocodeUSACities_TEST]
    @usrOutputFilePath varchar(150)
    ,@usrOutputFileName varchar(150)

AS
BEGIN

    SET NOCOUNT ON;

DECLARE @rScript nvarchar(max) = N'

#### USER INPUTS ####

usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + '"


#### ESTABLISH ENVIRONMENT ####

library(data.table)
library(foreach)
library(XML)
library(RCurl)
library(RJSONIO)

##turn off scientific notation
options(scipen=999)

##establish compute context
sqlServerConnString <- "Server=.;Database=External;Trusted_Connection=true"
sqlServerCC <- RxInSqlServer(connectionString=sqlServerConnString)
rxSetComputeContext(sqlServerCC)
print(rxGetComputeContext())


#### GEOCODE ####

print(dfInputData)
rxDataStep(data=dfInputData,outFile=imp.USA_Cities_Map,append="rows")

'

EXECUTE  sp_execute_external_script
                @language = N'R'
              , @script = @rScript
              ,@input_data_1 =N'select 5 as test_insert'
            ,@input_data_1_name =N'dfInputData'
              ;

END

Error ouput:

Error in rxDataStep(data = dfInputData, outFile = imp.USA_Cities_Map,  : 
  object 'imp.USA_Cities_Map' not found

回答1:

Here you go. You don't need to set the compute context to SQL Server. But you do have to grant login permissions to the local users running the R external processes. They are all added to a local group called SqlRUserGroup, you just need to replace 'dbrownebook' with your server name.

Note that you don't add a database user for the sqlrusergroup, but only add a login. SQL R Services will impersonate the user calling sp_execute_external_script. This is explained in: https://docs.microsoft.com/en-us/sql/advanced-analytics/r/security-considerations-for-the-r-runtime-in-sql-server

use master
go

create login [dbrownebook\sqlrusergroup] from windows

create database [External]

go

use [External]
go

create schema imp
go
create table imp.USA_Cities_Map(test_insert int)
go


/****** Object:  StoredProcedure [dbo].[Rscript_geocodeUSACities_TEST]    Script Date: 8/8/2017 11:40:40 AM ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO




CREATE OR ALTER PROCEDURE [dbo].[Rscript_geocodeUSACities_TEST]
    @usrOutputFilePath varchar(150)
    ,@usrOutputFileName varchar(150)

AS
BEGIN

    SET NOCOUNT ON;

DECLARE @rScript nvarchar(max) = N'

sqlServerConnString <- "Server=.;Database=External;Trusted_Connection=true"
sqlTable <- RxSqlServerData(table = "imp.USA_Cities_Map", connectionString = sqlServerConnString)

rxDataStep(data=dfInputData,outFile=sqlTable,append="rows")
rxDataStep(data=dfInputData,outFile=sqlTable,append="rows")
rxDataStep(data=dfInputData,outFile=sqlTable,append="rows")

'

EXECUTE  sp_execute_external_script
                @language = N'R'
              , @script = @rScript
              ,@input_data_1 =N'select 5 as test_insert'
            ,@input_data_1_name =N'dfInputData'
              ;

END

GO

exec [Rscript_geocodeUSACities_TEST] '',''

go
select * from imp.USA_Cities_Map