Parsing function for comma-delimited string

2019-08-15 18:26发布

问题:

Suppose I have a string like this "cmd,param1,param2". The String is the Arduino String type. https://www.arduino.cc/en/Reference/String

I want to extract each of the substrings separated by commas. I have successfully written the code for a specific case like this. Here's the code;

String = str_data('cmd,param1,param2');
int firstCommaIndex = str_data.indexOf(',');
int secondCommaIndex = str_data.indexOf(',', firstCommaIndex+1);
String cmd = str_data.substring(0, firstCommaIndex);
String param1 = str_data.substring(firstCommaIndex+1, secondCommaIndex);
String param2 = str_data.substring(secondCommaIndex+1);

My problem is to have a function that solves the general case. The string can be delimited with any number of commas. I would like to have a function that looks like this;

String parserCommaDelimited(String input_delimited_str, int nth_param_num)
{
    //implementation
}

Suppose input_delimited_str="cmd,param1,param2,param3,param4"

parserCommaDelimited(input_delimited_str, 1) returns "cmd". parserCommaDelimited(input_delimited_str, 5) returns "param4".

回答1:

The following is a basic CSV parser:

void readCSVline(char *line);
char *readCSVfield(char *line, char *buf);
void readCSVdemo(void)
{
    char line[]= "0,,10004,10004,\"Albany Hwy After Galliers Av\",\"\",-32.13649428,116.0176090070,3";
    readCSVline(line);

}
/* readCSVline is where you put your "intelligence" about fields to read
 * and what to do with them
 */
void readCSVline(char *line)
{
    char field1[80], *lineptr=line;
    int nfields=0;

    while (*lineptr) {
        lineptr= readCSVfield(lineptr, field1);
        printf("%s\n", field1);
        nfields++;
    }
    printf("%d fields read.\n", nfields);
}
/* readCSVfield reads a field from a CSV line until the next comma or end-of-line.
 * It returns where the reading stopped.
 */
char *readCSVfield(char *line, char *buf)
{
    int instr= FALSE;   // track whether we are in a string
    char *cptr= line;

    while (*cptr)
    {
        if (instr) {
            if (*cptr=='"') {
                char cc= *++cptr;
                if (cc=='"')        // escaped double quote
                    *buf++ = '"';
                else {
                    *buf='\0';
                    cptr--;
                    instr= FALSE;
                }
            }
            else *buf++ = *cptr;
        }
        else switch (*cptr) {
        case '"': instr= TRUE; break;
        case ',': cptr++; *buf= '\0'; return(cptr);
        case ' ': case '\t': case '\n': case '\r': break;
        default: *buf++ = *cptr;
        }
        cptr++;
    }
    *buf= '\0';
    return(cptr);
}


回答2:

You can split string as below and get whatever you want.

int split(char *result[], const char *str, char tok) {
    char buff[1024]; // it's better to use length of str instead of 1024
    int idx = 0;
    int len = 0;
    int ent_cnt = 0;
    int st = 0;

    // parse string
    while(1) {
        char ch = str[len++]; // need checking len to avoid overflow

        // end of string?
        if (ch == '\0')
            break;

        switch(st) {
            case 0: {
                if (ch == tok)
                    st++;
                else
                    buff[idx++] = ch;

                break;
            }

            case 1: {
                if (idx) {
                    char *entry = malloc(idx + 1);
                    int i;

                    for (i = 0; i < idx; i++)
                        entry[i] = buff[i];

                    entry[i] = '\0';
                    result[ent_cnt++] = entry;
                    idx = 0;
                }

                // normal char?
                if (ch != tok)
                    buff[idx++] = ch;

                st--;
                break;
            }

            default:
                break;
        }
    }

    // process last part if any
    if (idx) {
        char *entry = malloc(idx + 1);

        int i = 0; 
        for (i = 0; i < idx; i++)
            entry[i] = buff[i];

        entry[i] = '\0';
        result[ent_cnt++] = entry;
    }

    return ent_cnt;
}

int main(int argc, char *argv[]) {
    char *result[10];
    int cnt = split(result, "s,this,is,a,test,", ',');
    int i = 0;

    for (i = 0; i < cnt; i++) {
        printf("%s\n", result[i]);
    }

    if (cnt != 5) {
        printf("Fail!\n");
    }

    return 0;
}


回答3:

try with split, in c++ is strtok:

variable = strtok(variable,"delimiter"); converts the string into array, in c++ i don't know, I'm programming with php & javascript, but you can watch it on:

http://www.cplusplus.com/reference/cstring/strtok/

I hope it helps you!