Fail to get csv from S3 and convert it with Python

2020-02-15 08:38发布

问题:

I need to read csv file from s3 bucket and insert each row on dynamoDB

def load_users_dynamodb():

s3 = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table("test")

obj = s3.get_object(Bucket='test-app-config', Key='extract_Users.csv')
#return obj
data = obj['Body'].read().split('\n')

#return json.dumps(data)

with table.batch_writer() as batch:
    for row in data:
        batch.put_item(Item={
            'registration': row.split(',')[0],
            'name': row.split(',')[1],
            'role': row.split(',')[2],
            'company': row.split(',')[3],
            'hiredcompany': row.split(',')[4],
            'region': row.split(',')[5]
        })

return 'OK'

im getting exception and I can't proceed:

   Response:
{
  "errorMessage": "a bytes-like object is required, not 'str'",
  "errorType": "TypeError",
  "stackTrace": [
  "  File \"/var/task/lambda_function.py\", line 10, in          lambda_handler\n    'body': load_users_dynamodb()\n",
"  File \"/var/task/lambda_function.py\", line 21, in load_users_dynamodb\n    data = obj['Body'].read().split('\\n')\n"
]
}

Someone can help me please? o/

回答1:

Your issue related to decoding the object return from s3.You need to read the file as csv.

Take a look at the following code snippet:

import boto3
import csv

s3 = boto3.client('s3')

def lambda_handler(event, context):
    obj = s3.get_object(Bucket='Bucket_Name', Key='File_Name.csv')
    data = obj['Body'].read().decode('utf-8').splitlines()
    lines = csv.reader(data)
    headers = next(lines)
    print('headers: %s' %(headers))
    for line in lines:
        print(line)

Output :

Dummy csv.