Store big JSON files into Oracle DB

2020-02-13 02:50发布

问题:

I'm using the following script in order to read data from MongoDB as JSON files.

DECLARE
  l_param_list     VARCHAR2(512);

  l_http_request   UTL_HTTP.req;
  l_http_response  UTL_HTTP.resp;
  l_response_text  CLOB;
  --l_response_text  VARCHAR2(32767);

l_list json_list;
A_id           VARCHAR2(100);
Photo          VARCHAR2(32767);
A_Name         VARCHAR2(100);
Remarks        VARCHAR2(100);
Status         VARCHAR2(100);
UserId         VARCHAR2(100);
A_Date         VARCHAR2(100);
A_Time         VARCHAR2(100);
MSG_status     VARCHAR2(100);
Oracle_Flag    VARCHAR2(100);
acl            VARCHAR2(100);


BEGIN

  -- service's input parameters

  -- preparing Request...
  l_http_request := UTL_HTTP.begin_request('https://api.appery.io/rest/1/db/collections/Photos?where=%7B%22Oracle_Flag%22%3A%22Y%22%7D'
                                          , 'GET'
                                          , 'HTTP/1.1');

  -- ...set header's attributes
  UTL_HTTP.set_header(l_http_request, 'X-Appery-Database-Id', '53f2dac5e4b02cca64021dbe');
  --UTL_HTTP.set_header(l_http_request, 'Content-Length', LENGTH(l_param_list));

  -- ...set input parameters
 -- UTL_HTTP.write_text(l_http_request, l_param_list);

  -- get Response and obtain received value
  l_http_response := UTL_HTTP.get_response(l_http_request);

  UTL_HTTP.read_text(l_http_response, l_response_text);

  DBMS_OUTPUT.put_line(l_response_text);
 l_list := json_list(l_response_text);

FOR i IN 1..l_list.count
LOOP
A_id        := json_ext.get_string(json(l_list.get(i)),'_id');
Photo       := json_ext.get_string(json(l_list.get(i)),'Photo');
A_Name      := json_ext.get_string(json(l_list.get(i)),'Name');
Remarks     := json_ext.get_string(json(l_list.get(i)),'Remarks');
Status      := json_ext.get_string(json(l_list.get(i)),'Status');
UserId      := json_ext.get_string(json(l_list.get(i)),'UserId');
A_Date      := json_ext.get_string(json(l_list.get(i)),'Date');
A_Time      := json_ext.get_string(json(l_list.get(i)),'Time');
MSG_status  := json_ext.get_string(json(l_list.get(i)),'MSG_status');
Oracle_Flag := json_ext.get_string(json(l_list.get(i)),'Oracle_Flag');
acl         := json_ext.get_string(json(l_list.get(i)),'acl');


insert into Appery_Photos values(
  A_id,
  Photo,
  A_Name,
  Remarks,
  Status,
  UserId,
  A_Date,
  A_Time,
  MSG_status ,
  Oracle_Flag,
  acl
);
  end loop;

-- finalizing
  UTL_HTTP.end_response(l_http_response);

EXCEPTION 
  WHEN UTL_HTTP.end_of_body 
    THEN UTL_HTTP.end_response(l_http_response);  
END;
/

The script works fine with small JSON files. However, when the file contains base64 file (photos represeted in base64 formate) the script fails giving the error (string ending not found).

Apparently, the error results from the fact that not the entire file is copied, such that the JSON parser cannot find the end of the string "]" or "}".

I tried using CLOB and VARCHAR2 with maximum size 32767 but that is not enough at all.

I thought of decoding base64 file, but the problme is I need to read the file first before I can decode that field.

Any suggestion would be deeply appreciated.

Result

Both answers provide solutions for reading large JSON files (>32KB), I used @Jeffrey Kemp one. However, as a next problem, one of the json_values/fields itself is >32KB, the json_ext.get_string only returns a VARCHAR2, which means it is limited to 32767 byes max. So for the field Photo, where the value is >32KB I used json_ext.get_json_value along with dbms_lob.createtemporary. The resultant relevant script after tidy up a bit is as follows:

DECLARE
  l_val            json_value;
  l_param_list     VARCHAR2(512);
  l_http_request   UTL_HTTP.req;
  l_http_response  UTL_HTTP.resp;
  l_response_text  CLOB;
  --l_response_text  VARCHAR2(32767);

l_list json_list;
A_id           VARCHAR2(100);
Photo          VARCHAR2(32767);
A_Name         VARCHAR2(100);
Remarks        VARCHAR2(100);
Status         VARCHAR2(100);
UserId         VARCHAR2(100);
A_Date         VARCHAR2(100);
A_Time         VARCHAR2(100);
MSG_status     VARCHAR2(100);
Oracle_Flag    VARCHAR2(100);
acl            VARCHAR2(100);


BEGIN

  -- service's input parameters

  -- preparing Request...
  l_http_request := UTL_HTTP.begin_request('https://api.appery.io/rest/1/db/collections/Photos?where=%7B%22Oracle_Flag%22%3A%22Y%22%7D'
                                          , 'GET'
                                          , 'HTTP/1.1');

  -- ...set header's attributes
  UTL_HTTP.set_header(l_http_request, 'X-Appery-Database-Id', '53f2dac5e4b02cca64021dbe');
  --UTL_HTTP.set_header(l_http_request, 'Content-Length', LENGTH(l_param_list));

  -- ...set input parameters
 -- UTL_HTTP.write_text(l_http_request, l_param_list);

  -- get Response and obtain received value
  l_http_response := UTL_HTTP.get_response(l_http_request);
  BEGIN
    LOOP
      UTL_HTTP.read_text(l_http_response, buf);
      l_response_text := l_response_text || buf;
    END LOOP;
  EXCEPTION
  WHEN UTL_HTTP.end_of_body THEN
    NULL;
  END;
  l_list := json_list(l_response_text);


FOR i IN 1..l_list.count
LOOP
A_id        := json_ext.get_string(json(l_list.get(i)),'_id');
l_val := json_ext.get_json_value(json(l_list.get(i)),'Photo');
dbms_lob.createtemporary(Photo, true, 2);
json_value.get_string(l_val, Photo);
A_Name      := json_ext.get_string(json(l_list.get(i)),'Name');
Remarks     := json_ext.get_string(json(l_list.get(i)),'Remarks');
Status      := json_ext.get_string(json(l_list.get(i)),'Status');
UserId      := json_ext.get_string(json(l_list.get(i)),'UserId');
A_Date      := json_ext.get_string(json(l_list.get(i)),'Date');
A_Time      := json_ext.get_string(json(l_list.get(i)),'Time');
MSG_status  := json_ext.get_string(json(l_list.get(i)),'MSG_status');
Oracle_Flag := json_ext.get_string(json(l_list.get(i)),'Oracle_Flag');
acl         := json_ext.get_string(json(l_list.get(i)),'acl');


insert into Appery_Photos values(
  A_id,
  Photo,
  A_Name,
  Remarks,
  Status,
  UserId,
  A_Date,
  A_Time,
  MSG_status ,
  Oracle_Flag,
  acl
);
  end loop;

-- finalizing
  UTL_HTTP.end_response(l_http_response);

EXCEPTION 
  WHEN UTL_HTTP.end_of_body 
    THEN UTL_HTTP.end_response(l_http_response);  
END;
/

回答1:

Your problem is in your call to UTL_HTTP.read_text. You are passing a CLOB, but read_text only accepts VARCHAR2, so it can return a maximum of 32k bytes.

You need to call it in a loop, using a VARCHAR2 buffer, and concatenate the results into your CLOB, e.g.:

DECLARE
  buf VARCHAR2(32767);
BEGIN
  LOOP
    UTL_HTTP.read_text(l_http_response, buf);
    l_response_text := l_response_text || buf;
  END LOOP;
EXCEPTION
  WHEN UTL_HTTP.end_of_body THEN
    NULL;
END;

http://docs.oracle.com/cd/E11882_01/appdev.112/e40758/u_http.htm#ARPLS71074

Your second problem is that json_ext.get_string only returns a VARCHAR2, which means it is limited to 32767 byes max. I've had a browse of the PL/json wiki, you might need to reach out to one of the authors to find out how to use it to get a CLOB value.



回答2:

CLOB have a size limit of 4G

But the limitation here is with UTL_HTTP.read_text that returns the result as a VARCHAR2 (you have an implicit conversion here).

To retrieve large text objects from the web easily, you probably need HttpUriType.getClob


If for some reason you want to stick with UTL_HTTP, you will have to loop to read your data chunk by chunk. Something like that:

BEGIN
  ...
  l_clob           CLOB;
  l_text           VARCHAR2(32767);
BEGIN
  DBMS_LOB.createtemporary(l_clob, FALSE);

  ...
  l_http_request  := UTL_HTTP.begin_request(your_URI);
  l_http_response := UTL_HTTP.get_response(l_http_request);

  -- Loop to read data chunk by chunk up to the end
  BEGIN
    LOOP
      UTL_HTTP.read_text(l_http_response, l_text, 32766);
      DBMS_LOB.writeappend (l_clob, LENGTH(l_text), l_text);
    END LOOP;
  EXCEPTION
    WHEN UTL_HTTP.end_of_body THEN
      UTL_HTTP.end_response(l_http_response);
  END;

See http://oracle-base.com/articles/misc/retrieving-html-and-binaries-into-tables-over-http.php vor various examples