Querying Large Dataset in Oracle Database from Nod

2020-07-23 04:46发布

I'm currently working on a project from work where i have an Oracle 10 database table with about 310K give or take 10-30K rows.

The goal is to display those rows in an angular frontend, however returning all of those through NodeJS is taking a lot of time.

Given that I'm using both NodeJS and oracledb for the first time, i'm assuming i must be missing something?

var oracledb = require('oracledb');
var config = require(__dirname+'/../db.js');

function get(req,res,next)
{
var table = req.query.table;
var meta;

oracledb.getConnection(config.oracle)
.then( function(connection)
{
    var stream = connection.queryStream('SELECT * FROM '+table);

    stream.on('error', function (error) 
    {
        console.error(error);
        return next(err);
    });

    stream.on('metadata', function (metadata) {
        console.log(metadata);
    });

    stream.on('data', function (data) {
        console.log(data);
    });

    stream.on('end', function () 
    {
      connection.release(
        function(err) {
          if (err) {
            console.error(err.message);
            return next(err);
          }
        });
    });
})
.catch(function(err){
    if(err){
        connection.close(function(err){
            if(err){
                console.error(err.message);
                return next(err);
            }
        });
    }
})
}

module.exports.get = get;

1条回答
Anthone
2楼-- · 2020-07-23 05:20

30 MB is a lot of data to load into the front end. It can work in some cases, such as desktop web apps where the benefits of "caching" the data offset the time needed to load it (and increased stale data is okay). But it will not work well in other cases, such as mobile.

Keep in mind that the 30 MB must be moved from the DB to Node.js and then from Node.js to the client. The network connections between these will greatly impact performance.

I'll point out a few things that can help performance, though not all are exactly related to this question.

First, if you're using a web server, you should be using a connection pool, not dedicated/one-off connections. Generally, you'd create the connection pool in your index/main/app.js and start the web server after that's done and ready.

Here's an example:

const oracledb = require('oracledb');
const express = require('express');
const config = require('./db-config.js');
const thingController = require('./things-controller.js');

// Node.js used 4 background threads by default, increase to handle max DB pool.
// This must be done before any other calls that will use the libuv threadpool.
process.env.UV_THREADPOOL_SIZE = config.poolMax + 4;

// This setting can be used to reduce the number of round trips between Node.js
// and the database.
oracledb.prefetchRows = 10000;

function initDBConnectionPool() {
  console.log('Initializing database connection pool');

  return oracledb.createPool(config);
}

function initWebServer() {
  console.log('Initializing webserver');

  app = express();

  let router = new express.Router();

  router.route('/things')
    .get(thingController.get);  

  app.use('/api', router);

  app.listen(3000, () => {
    console.log('Webserver listening on localhost:3000');
  });
}

initDBConnectionPool()
  .then(() => {
    initWebServer();
  })
  .catch(err => {
    console.log(err);
  });

That will create a pool which is added to the internal pool cache in the driver. This allows you to easily access it from other modules (example later).

Note that when using connection pools, it's generally a good idea to increase the thread pool available to Node.js to allow each connection in the pool to work concurrently. An example of this is included above.

In addition, I'm increasing the value of oracledb.prefetchRows. This setting is directly related to your question. Network round trips are used to move the data between the DB and Node.js. This setting allows you to adjust the number of rows fetched with each round trip. So as prefetchRows goes higher, fewer round trips are needed and performance increases. Just be careful you don't go to high as per the memory you have in your Node.js server.

I ran a generic test that mocked the 30 MB dataset size. When oracledb.prefetchRows was left at the default of 100, the test finished in 1 minute 6 seconds. When I bumped this up to 10,000, it finished in 27 seconds.

Okay, moving on to "things-controller.js" which is based on your code. I've updated the code to do the following:

  • Assert that table is a valid table name. Your current code is vulnerable to SQL injection.
  • Use a promise chain that emulates a try/catch/finally block to close the connection just once and return the first error encountered (if needed).
  • Work so I could run the test.

Here's the result:

const oracledb = require('oracledb');

function get(req, res, next) {
    const table = req.query.table;
    const rows = [];
    let conn;
    let err; // Will store the first error encountered

    // You need something like this to preven SQL injection. The current code
    // is wide open.
    if (!isSimpleSqlName(table)) {
        next(new Error('Not simple SQL name'));
        return;
    }

    // If you don't pass a config, the connection is pulled from the 'default'
    // pool in the cache.
    oracledb.getConnection() 
        .then(c => {
            return new Promise((resolve, reject) => {
                conn = c;

                const stream = conn.queryStream('SELECT * FROM ' + table);

                stream.on('error', err => {
                    reject(err);
                });

                stream.on('data', data => {
                    rows.push(data); 
                });

                stream.on('end', function () {
                    resolve();
                });
            });
        })
        .catch(e => {
            err = err || e;
        })
        .then(() => {
            if (conn) { // conn assignment worked, need to close/release conn
                return conn.close();
            }
        })
        .catch(e => {
            console.log(e); // Just log, error during release doesn't affect other work
        })
        .then(() => {
            if (err) {
                next(err);
                return;
            }

            res.status(200).json(rows);
        });
}

module.exports.get = get;

function isSimpleSqlName(name) {
  if (name.length > 30) {
    return false;
  }

  // Fairly generic, but effective. Would need to be adjusted to accommodate quoted identifiers,
  // schemas, etc.
  if (!/^[a-zA-Z0-9#_$]+$/.test(name)) {
    return false;
  }

  return true;
}

I hope that helps. Let me know if you have questions.

查看更多
登录 后发表回答