Does writing to S3(aws-sdk nodeJS) conflict with l

2019-08-17 03:47发布

问题:

please bear in mind that I can, at best, be described as a rookie in both node and amazon S3. I have something app that writes to S3 in the background. I want to read from S3 when the file has been written, and only once it's been written. I attempt to check the number of objects and return the result:

function haveFilesBeenWrittenToBucket(bucketName, callback) {
s3.listObjects({ Bucket: bucketName }, function(err, data) {
    const items = data.Contents;
    callback(items);
});

}

and the readFile function:

OSClient.prototype.readFile = function(params, callback) {
haveFilesBeenWrittenToBucket(params.Bucket, items => {
    console.log("Number of items " + items.length);
        if (items.length > 0) {
            const rl = readline.createInterface({
                input: s3.getObject(params).createReadStream()
            });
            const myArray = [];
            rl.on("line", function (line) {
                const lineArray = line.split(",");
                for (const value of lineArray) {
                    if (isNaN(value)) {
                        // line.split creates string elements, adding extraneous quotation marks in a string and converting
                        // number to string, so there is a need to reverse this process.
                        const slicedElement = value.slice(1, -1);
                        myArray.push(slicedElement);
                    } else {
                        const valueOfNumber = Number(value);
                        myArray.push(valueOfNumber);
                    }
                }
            })
                .on("close", function () {
                    callback(myArray);

                });
        }
         else{
                var myfunction = this.readFile.bind(this, params, callback);
                setTimeout(myfunction, 5000);
            }


    });

};

and lastly:

     targetClient.readFile(params, function (arrayResult) {
                            logger.info("Read file:" + fileName + OS_FILE_SUFFIX);
                            readArray = arrayResult;
                        });

If I put a breakpoint on callback(items) (in 'haveFilesBeenWrittenToBucket') everything works fine and I get back the file written in the bucket, but if not, nothing seems to get written to S3. Seems like some race condition, but I'm really clueless and I really would appreciate some help. Is there a conflict between listing objects and writing to S3 (at least not until much later, in some other test, when it shouldn't be (it's part of a mocha test suite - the readFile is in async.waterfall). I have been on this for days and got nowhere. As I said, it's my first exposure to node, so please be patient with me. Thanks.

回答1:

S3 provides eventual consistency for list after read. So, you might observe the following:

A process writes a new object to Amazon S3 and immediately lists keys within its bucket. Until the change is fully propagated, the object might not appear in the list.

The only situation in which S3 provides immediate consistency is read-after-write for PUTS of new objects (with a minor caveat, documented here). More details at S3 consistency model.

Here is an example of how you can use async retry to wait for an object and then retrieve its contents (assumed to be text in this example).

var aws = require("aws-sdk");
var async = require("async");

var s3 = new aws.S3();
var bucket = 'mybucket';
var iteration = 0;

function waitForObjects(bucket, callback) {
    console.error(`Iteration: ${++iteration}`);

    s3.listObjects({Bucket:bucket}, function(err, data) {
        if (err) {
            callback(err);
        } else if (!data.Contents || !data.Contents.length) {
            callback(new Error("No objects"))
        } else {
            callback(null, data);
        }
    });
}

// Try calling waitForObjects 10 times with exponential backoff
// (intervals of 100, 200, 400, 800, 1600, ... milliseconds)
async.retry({
    times: 10,
    interval: function(retryCount) {
        return 50 * Math.pow(2, retryCount);
    }
}, async.apply(waitForObjects, bucket), function(err, data) {
    if (err) {
        console.error(`Error waitForObjects: ${err}`);
    } else {
        console.log(`Object count: ${data.Contents.length}`);

        data.Contents.forEach(function(item, index) {
            console.log(`Object ${index+1} key: ${item.Key}`);

            s3.getObject({Bucket:bucket, Key:item.Key}, function(err, data) {
                console.log(`Object ${index+1} txt: ${data.Body.toString()}`);
            });
        });
    }
});


回答2:

Two things. Firstly, it turns out that my issue was not nodeJS related. Sigh Secondly, the API now provides a 'waitFor' method for polling whether a bucket or objects exists:

http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#waitFor-property