I'm trying to import a large size of JSON FILE from Amazon S3 into AWS RDS-PostgreSQL using Python. But, these errors occured,
Traceback (most recent call last):
File "my_code.py", line 67, in
file_content = obj['Body'].read().decode('utf-8').splitlines(True)
File "/home/user/asd-to-qwe/fgh-to-hjk/env/local/lib/python3.6/site-packages/botocore/response.py", line 76, in read
chunk = self._raw_stream.read(amt)
File "/home/user/asd-to-qwe/fgh-to-hjk/env/local/lib/python3.6/site-packages/botocore/vendored/requests/packages/urllib3/response.py", line 239, in read
data = self._fp.read()
File "/usr/lib64/python3.6/http/client.py", line 462, in read
s = self._safe_read(self.length)
File "/usr/lib64/python3.6/http/client.py", line 617, in _safe_read
return b"".join(s)
MemoryError
// my_code.py
import sys
import boto3
import psycopg2
import zipfile
import io
import json
s3 = boto3.client('s3', aws_access_key_id=<aws_access_key_id>, aws_secret_access_key=<aws_secret_access_key>)
connection = psycopg2.connect(host=<host>, dbname=<dbname>, user=<user>, password=<password>)
cursor = connection.cursor()
bucket = sys.argv[1]
key = sys.argv[2]
obj = s3.get_object(Bucket=bucket, Key=key)
def insert_query(data):
query = """
INSERT INTO data_table
SELECT
(src.test->>'url')::varchar, (src.test->>'id')::bigint,
(src.test->>'external_id')::bigint, (src.test->>'via')::jsonb
FROM (SELECT CAST(%s AS JSONB) AS test) src
"""
cursor.execute(query, (json.dumps(data),))
if key.endswith('.zip'):
zip_files = obj['Body'].read()
with io.BytesIO(zip_files) as zf:
zf.seek(0)
with zipfile.ZipFile(zf, mode='r') as z:
for filename in z.namelist():
with z.open(filename) as f:
for line in f:
insert_query(json.loads(line.decode('utf-8')))
if key.endswith('.json'):
file_content = obj['Body'].read().decode('utf-8').splitlines(True)
for line in file_content:
insert_query(json.loads(line))
connection.commit()
connection.close()
Are there any solutions to these problems? Any help would do, thank you so much!