Node.js bulk download and Bluebird promises

2020-06-30 00:47发布

I'm writing a bulk downloader for node.js and trying to understand bluebird promises. I want to limit the number of parallel requests and disk writes. As I understand it, Promise.map() with {concurrent: } should do what I want.

Because pipe() and http.get() can't automatically be promisified, I'm trying to use custom promises.

But I don't fully understand the then() mechanism. To me, it sounds like the returned promise should only be fulfilled when the whole chain has been fulfilled.

However, in my code only the first promise in the chain appears to be waited for by map(), and many request and disk writes happen in parallel.

import Promise from 'bluebird';
import fs from 'fs';
import https from 'https';

Promise.promisifyAll(fs);
Promise.map(Images, image => {
        console.log("Opening image " + image.id);
        let file = fs.createWriteStream(dir + '/' + image.id + '.jpg');
        return new Promise((resolve, reject) => {
              console.log("Downloading image " + image.id);
              https.get(image.url, resolve).on("error", reject);
          })
          .then(response => {
              response.pipe(file);
              console.log("Saving image " + image.id);
              return new Promise((resolve, reject) => {
                  file.on("finish", resolve);
                  file.on("error", reject);
              });
          })
          .then(() => {
              console.log("Finished writing image " + image.id);
              file.close();
          })
          .catch(e => {
              console.log("Error during image save of " + image.id + ": " + e.code)
          });
    }, {concurrent: 50})
      .then(res => {
          console.log("Finished writing all images")
      })
      .catch(e => {
          console.log("Some images failed to be written: " + e.code)
      });
}

What am I doing wrong? Can you help me understand the flow of promise fulfillment and rejection?

1条回答
何必那么认真
2楼-- · 2020-06-30 01:29

From what I can understand, you're trying to download multiple images with promise. Actually you don't need to promisify fs. You should use request module for easier downloading.

Here's a shortest working example I can come up with

var Promise = require('bluebird');
var path = require('path');
var fs = require('fs');
var request = require('request');

var images = [{
    url: 'http://bluebirdjs.com/img/logo.png',
    file_name: 'bluebird.png'
}, {
    url: 'http://design.ubuntu.com/wp-content/uploads/ubuntu-logo32.png',
    file_name: 'ubuntu.png'
}, {
    url: 'https://www.raspberrypi.org/wp-content/uploads/2012/03/raspberry-pi-logo.png',
    file_name: 'raspberry-pi.png'
}];

// To Download Serially
Promise.each(images, image => new Promise((resolve, reject) => {
    console.log('Downloading Image: ' + image.file_name);
    request(image.url).on('error', reject).pipe(fs.createWriteStream(path.join(__dirname, image.file_name))).on('finish', () => {
        console.log('Downloaded Image: ' + image.file_name);
        resolve();
    });
})).then(() => {
    console.log('All Image Downloaded!');
}).catch(err => {
    console.error('Failed: ' + err.message);
});

// To Download in Parallel (with 2 maximum concurrent jobs)
Promise.map(images, image => new Promise((resolve, reject) => {
    console.log('Downloading Image: ' + image.file_name);
    request(image.url).on('error', reject).pipe(fs.createWriteStream(path.join(__dirname, image.file_name))).on('finish', () => {
        console.log('Downloaded Image: ' + image.file_name);
        resolve();
    });
}), {
    concurrency: 2
}).then(() => {
    console.log('All Image Downloaded!');
}).catch(err => {
    console.error('Failed: ' + err.message);
});
查看更多
登录 后发表回答