Javascript code to parse CSV data [duplicate]

2018-12-31 02:16发布

This question already has an answer here:

Does someone have an idea on where I could find some javascript code to parse CSV data ?

8条回答
君临天下
2楼-- · 2018-12-31 02:48

I have an implementation as part of a spreadsheet project.

This code is not yet tested thoroughly, but anyone is welcome to use it.

As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand can actually have commas and newlines inside a field, which breaks most regex and split-based approaches.

var CSV = {
parse: function(csv, reviver) {
    reviver = reviver || function(r, c, v) { return v; };
    var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
    while (c < cc) {
        table.push(row = []);
        while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
            start = end = c;
            if ('"' === chars[c]){
                start = end = ++c;
                while (c < cc) {
                    if ('"' === chars[c]) {
                        if ('"' !== chars[c+1]) { break; }
                        else { chars[++c] = ''; } // unescape ""
                    }
                    end = ++c;
                }
                if ('"' === chars[c]) { ++c; }
                while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) { ++c; }
            } else {
                while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) { end = ++c; }
            }
            row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
            if (',' === chars[c]) { ++c; }
        }
        if ('\r' === chars[c]) { ++c; }
        if ('\n' === chars[c]) { ++c; }
    }
    return table;
},

stringify: function(table, replacer) {
    replacer = replacer || function(r, c, v) { return v; };
    var csv = '', c, cc, r, rr = table.length, cell;
    for (r = 0; r < rr; ++r) {
        if (r) { csv += '\r\n'; }
        for (c = 0, cc = table[r].length; c < cc; ++c) {
            if (c) { csv += ','; }
            cell = replacer(r, c, table[r][c]);
            if (/[,\r\n"]/.test(cell)) { cell = '"' + cell.replace(/"/g, '""') + '"'; }
            csv += (cell || 0 === cell) ? cell : '';
        }
    }
    return csv;
}
};
查看更多
人气声优
3楼-- · 2018-12-31 02:49

Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):

start
  = [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }

line
  = first:field rest:("," text:field { return text; })*
    & { return !!first || rest.length; } // ignore blank lines
    { rest.unshift(first); return rest; }

field
  = '"' text:char* '"' { return text.join(''); }
  / text:[^\n\r,]* { return text.join(''); }

char
  = '"' '"' { return '"'; }
  / [^"]

Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.

查看更多
深知你不懂我心
4楼-- · 2018-12-31 02:49

Im not sure why I couldn't kirtans ex. to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...

This one seems to handle both.

I did not write the parser code, just a wrapper around the parser function to make this work for a file. see Attribution

    var Strings = {
        /**
         * Wrapped csv line parser
         * @param s string delimited csv string
         * @param sep separator override
         * @attribution : http://www.greywyvern.com/?post=258 (comments closed on blog :( )
         */
        parseCSV : function(s,sep) {
            // http://stackoverflow.com/questions/1155678/javascript-string-newline-character
            var universalNewline = /\r\n|\r|\n/g;
            var a = s.split(universalNewline);
            for(var i in a){
                for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
                    if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
                        if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
                            f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
                          } else if (x) {
                        f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
                      } else f = f.shift().split(sep).concat(f);
                    } else f[x].replace(/""/g, '"');
                  } a[i] = f;
        }
        return a;
        }
    }
查看更多
深知你不懂我心
5楼-- · 2018-12-31 02:53

Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or RegEx. It scans the input string 1-2 characters at a time and builds an array.

Test it at http://jsfiddle.net/vHKYH/.

function parseCSV(str) {
    var arr = [];
    var quote = false;  // true means we're inside a quoted field

    // iterate over each character, keep track of current row and column (of the returned array)
    for (var row = 0, col = 0, c = 0; c < str.length; c++) {
        var cc = str[c], nc = str[c+1];        // current character, next character
        arr[row] = arr[row] || [];             // create a new row if necessary
        arr[row][col] = arr[row][col] || '';   // create a new column (start with empty string) if necessary

        // If the current character is a quotation mark, and we're inside a
        // quoted field, and the next character is also a quotation mark,
        // add a quotation mark to the current column and skip the next character
        if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }  

        // If it's just one quotation mark, begin/end quoted field
        if (cc == '"') { quote = !quote; continue; }

        // If it's a comma and we're not in a quoted field, move on to the next column
        if (cc == ',' && !quote) { ++col; continue; }

        // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
        // and move on to the next row and move to column 0 of that new row
        if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }

        // If it's a newline (LF or CR) and we're not in a quoted field,
        // move on to the next row and move to column 0 of that new row
        if (cc == '\n' && !quote) { ++row; col = 0; continue; }
        if (cc == '\r' && !quote) { ++row; col = 0; continue; }

        // Otherwise, append the current character to the current column
        arr[row][col] += cc;
    }
    return arr;
}
查看更多
浮光初槿花落
6楼-- · 2018-12-31 02:54

I think I can sufficiently beat Kirtan's answer

Enter jQuery-CSV

It's a jquery plugin designed to work as an end-to-end solution for parsing CSV into Javascript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google Spreadsheed exports (ie mostly involving null values) that the spec is missing.

Example:

track,artist,album,year

Dangerous,'Busta Rhymes','When Disaster Strikes',1997

// calling this
music = $.csv.toArrays(csv)

// outputs...
[
  ["track","artist","album","year"],
  ["Dangerous","Busta Rhymes","When Disaster Strikes","1997"]
]

console.log(music[1][2]) // outputs: 'When Disaster Strikes'

Update:

Oh yeah, I should also probably mention that it's completely configurable.

music = $.csv.toArrays(csv, {
  delimiter:"'", // sets a custom value delimiter character
  separator:';', // sets a custom field separator character
});

Update 2:

It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same lib.

Update 3:

Since Google Code shutdown, jquery-csv has been migrated to GitHub.

Disclaimer: I am also the author of jQuery-CSV.

查看更多
大哥的爱人
7楼-- · 2018-12-31 02:55

Why not just use .split(',') ?

http://www.w3schools.com/jsref/jsref_split.asp

var str="How are you doing today?";
var n=str.split(" "); 
查看更多
登录 后发表回答