nodeJS inserting Data into PostgreSQL error

2019-03-01 04:16发布

问题:

I have a weird error using NodeJS with a PostgreSQL and I hope you can maybe help me out.

I have a huge amount of data sets, about 2 Million entries that I want to insert into my DB.

One data consists of 4 columns:

id: string,
points: float[][]
mid: float[]
occurences: json[]

I am inserting data like so:

let pgp = require('pg-promise')(options);
let connectionString = 'postgres://archiv:archiv@localhost:5432/fotoarchivDB';
let db = pgp(connectionString);

cityNet.forEach((arr) => {
    db
    .none(
        "INSERT INTO currentcitynet(id,points,mid,occurences) VALUES $1",
        Inserts("${id},${points}::double precision[],${mid}::double precision[],${occurences}::json[]",arr))
    .then(data => {
        //success
    })
    .catch(error => {
        console.log(error);
        //error
    });
})

function Inserts(template, data) {
    if (!(this instanceof Inserts)) {
        return new Inserts(template, data);
    }
    this._rawDBType = true;
    this.formatDBType = function() {
    return data.map(d => "(" + pgp.as.format(template, d) + ")").join(",");
};

This works out for exactly for the first 309248 data pieces, then suddenly it just errors out with the following for (what it seems like) every next data it tries to insert:

{ error: syntax error at end of input
at Connection.parseE (/home/christian/Masterarbeit_reworked/projekt/server/node_modules/pg-promise/node_modules/pg/lib/connection.js:539:11)
at Connection.parseMessage (/home/christian/Masterarbeit_reworked/projekt/server/node_modules/pg-promise/node_modules/pg/lib/connection.js:366:17)
at Socket.<anonymous> (/home/christian/Masterarbeit_reworked/projekt/server/node_modules/pg-promise/node_modules/pg/lib/connection.js:105:22)
at emitOne (events.js:96:13)
at Socket.emit (events.js:188:7)
at readableAddChunk (_stream_readable.js:176:18)
at Socket.Readable.push (_stream_readable.js:134:10)
at TCP.onread (net.js:548:20)
name: 'error',
length: 88,
severity: 'ERROR',
code: '42601',
detail: undefined,
hint: undefined,
position: '326824',
internalPosition: undefined,
internalQuery: undefined,
where: undefined,
schema: undefined,
table: undefined,
column: undefined,
dataType: undefined,
constraint: undefined,
file: 'scan.l',
line: '1074',
routine: 'scanner_yyerror' }

The 'position' entry changes for every iterating error-message.

I can redo that and it will always error after 309248 entries. When I try to insert less, like 1000 entries, the error does not occur.

That really confuses me. I thought PostgreSQL does not have any max amount of rows. Also the error message does not help me at all.

SOLVED The error was found. In my data there were "null" entries that have slipped into it. Filtering out null-data worked out. I will try out the other recommendations for inserting data, since the current way works, but the performance is very crappy.

回答1:

I'm not sure, but it looks like you got wrong data structure at the last element(309249) and PostgreSQL cannot parse some property



回答2:

I'm the author of pg-promise. Your whole approach should be changed to the one below.

Proper way to do massive inserts via pg-promise:

const pgp = require('pg-promise')({
    capSQL: true
});

const db = pgp(/*connection details*/);

var cs = new pgp.helpers.ColumnSet([
    'id',
    {name: 'points', cast: 'double precision[]'},
    {name: 'mid', cast: 'double precision[]'},
    {name: 'occurences', cast: 'json[]'}
], {table: 'currentcitynet'});

function getNextInsertBatch(index) {
    // retrieves the next data batch, according to the index, and returns it
    // as an array of objects. A normal batch size: 1000 - 10,000 objects,
    // depending on the size of the objects.
    //
    // returns null when there is no more data left.
}

db.tx('massive-insert', t => {
    return t.sequence(index => {
        const data = getNextInsertBatch(index);
        if (data) {
            const inserts = pgp.helpers.insert(data, cs);
            return t.none(inserts);
        }
    });
})
    .then(data => {
        console.log('Total batches:', data.total, ', Duration:', data.duration);
    })
    .catch(error => {
        console.log(error);
    });

UPDATE

And if getNextInsertBatch can only get the data asynchronously, then return a promise from it, and update the sequence->source callback accordingly:

return t.sequence(index => {
    return getNextInsertBatch(index)
        .then(data => {
            if (data) {
                const inserts = pgp.helpers.insert(data, cs);
                return t.none(inserts);
            }
        });
});

Related Links:

  • tx
  • sequence / spex.sequence
  • ColumnSet
  • Multi-row insert with pg-promise