可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I have a comma separated string which might contain empty fields. For example:
1,2,,4
Using a basic
sscanf(string,"%[^,],%[^,],%[^,],%[^,],%[^,]", &val1, &val2, &val3, &val4);
I get all the values prior to the empty field, and unexpected results from the empty field onwards.
When I remove the expression for the empty field from the sscanf(),
sscanf(string,"%[^,],%[^,],,%[^,],%[^,]", &val1, &val2, &val3, &val4);
everything works out fine.
Since I don't know when I'm going to get an empty field, is there a way to rewrite the expression to handle empty fields elegantly?
回答1:
If you use strtok
with the comma as your separator character you'll get a list of strings one or more of which will be null/zero length.
Have a look at my answer here for more information.
回答2:
man sscanf:
[
Matches a nonempty sequence of characters from the specified set of
accepted characters;
(emphasis added).
回答3:
This looks like you are currently dealing with CSV values. If you need to extend it to handle quoted strings (so that fields can contain commas, for example), you will find that the scanf
-family can't handle all the complexities of the format. Thus, you will need to use code specifically designed to handle (your variant of) CSV-format.
You will find a discussion of a set CSV library implementations in 'The Practice of Programming' - in C and C++. No doubt there are many others available.
回答4:
scanf()
returns the number of items assigned. Maybe you can use that info ...
char *data = "1, 2,,, 5, 6";
int a[6];
int assigned = sscanf(data, "%d,%d,%d,%d,%d,%d", a, a+1, a+2, a+3, a+4, a+5);
if (assigned < 6) {
char fmt[18];
switch (assigned) {
default: assert(0 && "this did not happen"); break;
case 0: fmt = ",%d,%d,%d,%d,%d"; break;
case 1: fmt = "%d,,%d,%d,%d,%d"; break;
case 2: fmt = "%d,%d,,%d,%d,%d"; break;
case 3: fmt = "%d,%d,%d,,%d,%d"; break;
case 4: fmt = "%d,%d,%d,%d,,%d"; break;
case 5: fmt = "%d,%d,%d,%d,%d,"; break;
}
sscanf(data, fmt, a+(assigned<=0), a+1+(assigned<=1), a+2+(assigned<=2),
a+3+(assigned<=3), a+4+(assigned<=4));
}
Ugh! And that's only for 1 missing value
As has been pointed out by other answers, you're much better off parsing the string in the 'usual' way: fgets()
and strtok()
.
回答5:
Here is my version to scan comma separated int values. The code detect empty and non-integer fields.
#include <stdio.h>
#include <string.h>
int main(){
char str[] = " 1 , 2 x, , 4 ";
printf("str: '%s'\n", str );
for( char *s2 = str; s2; ){
while( *s2 == ' ' || *s2 == '\t' ) s2++;
char *s1 = strsep( &s2, "," );
if( !*s1 ){
printf("val: (empty)\n" );
}
else{
int val;
char ch;
int ret = sscanf( s1, " %i %c", &val, &ch );
if( ret != 1 ){
printf("val: (syntax error)\n" );
}
else{
printf("val: %i\n", val );
}
}
}
return 0;
}
Result:
str: ' 1 , 2 x, , 4 '
val: 1
val: (syntax error)
val: (empty)
val: 4
回答6:
Put a '*' after the '%' to skip reading. In addition it is possible to read only 3 characters noting '%3s' for example.
回答7:
I arrived here looking for answers to the same question. I didn't want to leave behind the scanf funcion either.
In the end, I build a zsscanf myself, where I parsed the format, sscanf'ed every data one by one and checked the return of sscanf to see if I got an empty read in any. This was somewhat my particular case: I wanted only some of the fields, some of which could be empty, and could not assume the separator.
#include <stdarg.h>
#include <stdio.h>
int zsscanf(char *data, char *format, ...)
{
va_list argp;
va_start(argp, format);
int fptr = 0, sptr = 0, iptr = 0, isptr = 0, ok, saved = 0;
char def[32];
while (1)
{
if (format[fptr] != '%')
{
ok = sscanf(&format[fptr], "%28[^%]%n", def, &iptr);
if (!ok) break;
fptr += iptr;
def[iptr] = '%';
def[iptr+1] = 'n';
def[iptr+2] = 0;
ok = sscanf(&data[sptr], def, &isptr);
if (!ok) break;
sptr += isptr;
}
else
if (format[fptr+1] == '%')
{
if (data[sptr] == '%')
{
fptr += 2;
sptr += 1;
}
else
{
ok = -1;
break;
}
}
else
{
void *savehere = NULL;
ok = sscanf(&format[fptr], "%%%28[^%]%n", &def[1], &iptr);
if (!ok) break;
fptr += iptr;
def[0] = '%';
def[iptr] = '%';
def[iptr+1] = 'n';
def[iptr+2] = 0;
isptr = 0;
if (def[1] != '*')
{
savehere = va_arg(argp, void*);
ok = sscanf(&data[sptr], def, savehere, &isptr);
if (ok == 0 && isptr == 0)
{
// Let's assume only char types. Won't hurt in other cases.
((char*)savehere)[0] = 0;
ok = 1;
}
if (ok > 0)
{
saved++;
}
}
else
{
ok = sscanf(&data[sptr], def, &isptr) == 0;
}
if (ok < 0) break;
sptr += isptr;
}
}
va_end(argp);
return saved == 0 ? ok : saved;
}
int main()
{
char *format = "%15[^\t;,]%*1[\t;,]" // NameId
"%*[^\t;,]%*1[\t;,]" // Name
"%*[^\t;,]%*1[\t;,]" // Abbreviation
"%*[^\t;,]%*1[\t;,]" // Description
"%31[^\t;,]"; // Electrical Line
char nameId[16];
char elect[32];
char *line1 = "TVC-CCTV-0002\tTVC-CCTV-0002\tTVC-CCTV-0002\tCCTV DOMO CAMERA 21-32-29\tELECTRICAL_TopoLine_823\tfoo\tbar";
char *line2 = "TVC-CCTV-0000;;;;;foo;bar;";
int ok = zsscanf(line1, format, nameId, elect);
printf ("%d: |%s|%s|\n", ok, nameId, elect);
ok = zsscanf(line2, format, nameId, elect);
printf ("%d: |%s|%s|\n", ok, nameId, elect);
return 0;
}
Output:
2: |TVC-CCTV-0002|ELECTRICAL_TopoLine_823|
2: |TVC-CCTV-0000||
Be warned, it's not fully tested and has severe limitations (the most obvious ones: accepts only %...s
, %...c
, %...[...]
and requires separators as %...[...]
; otherwise I'd really hade to care about the format string, this way I only care about %
).
回答8:
I had to modify this code a bit to work properly:
//rm token_pure;gcc -Wall -O3 -o token_pure token_pure.c; ./token_pure
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] = " 1 , 2 x, , 4 ";
char *s1;
char *s2;
s2=(void*)&str; //this is here to avoid warning of assignment from incompatible pointer type
do {
while( *s2 == ' ' || *s2 == '\t' ) s2++;
s1 = strsep( &s2, "," );
if( !*s1 ){
printf("val: (empty)\n" );
}
else{
int val;
char ch;
int ret = sscanf( s1, " %i %c", &val, &ch );
if( ret != 1 ){
printf("val: (syntax error)\n" );
}
else{
printf("val: %i\n", val );
}
}
} while (s2!=0 );
return 0;
}
and the output:
val: 1
val: (syntax error)
val: (empty)
val: 4
回答9:
I made a modification for tab delimited TSV files, hopefully it may help:
//rm token_tab;gcc -Wall -O3 -o token_tab token_tab.c; ./token_tab
#include <stdio.h>
#include <string.h>
int main ()
{
// char str[] = " 1 2 x text 4 ";
char str[] = " 1\t 2 x\t\t text\t4 ";
char *s1;
char *s2;
s2=(void*)&str; //this is here to avoid warning of assignment from incompatible pointer type
do {
while( *s2 == ' ') s2++;
s1 = strsep( &s2, "\t" );
if( !*s1 ){
printf("val: (empty)\n" );
}
else{
int val;
char ch;
int ret = sscanf( s1, " %i %c", &val, &ch );
if( ret != 1 ){
printf("val: (syntax error or string)=%s\n", s1 );
}
else{
printf("val: %i\n", val );
}
}
} while (s2!=0 );
return 0;
}
And the ouput:
val: 1
val: (syntax error or string)=2 x
val: (empty)
val: (syntax error or string)=text
val: 4
回答10:
There are some problems with strtok() listed here: http://benpfaff.org/writings/clc/strtok.html
Hence, it is better to avoid strtok.
Now, consider a string containing a empty field as follows:
char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here
You can use simple function to be able convert String in CSV format to read them to a float Array:
int strCSV2Float(float *strFloatArray , char *myCSVStringing);
Please find the Usage below:
#include <stdio.h>
#include <stdlib.h>
int strCSV2Float(float *strFloatArray , char *myCSVStringing);
void main()
{
char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here
float floatArr[10]; // specify size here
int totalValues = 0;
printf("myCSVString == %s \n",&myCSVString[0]);
totalValues = strCSV2Float(&floatArr[0] , &myCSVString[0]); // call the function here
int floatValueCount = 0;
for (floatValueCount = 0 ; floatValueCount < totalValues ; floatValueCount++)
{
printf("floatArr[%d] = %f\n",floatValueCount , floatArr[floatValueCount]);
}
}
int strCSV2Float(float *strFloatArray , char *myCSVStringing)
{
int strLen = 0;
int commaCount =0; // count the number of commas
int commaCountOld =0; // count the number of commas
int wordEndChar = 0;
int wordStartChar = -1;
int wordLength =0;
for(strLen=0; myCSVStringing[strLen] != '\0'; strLen++) // first get the string length
{
if ( (myCSVStringing[strLen] == ',') || ( myCSVStringing[strLen+1] == '\0' ))
{
commaCount++;
wordEndChar = strLen;
}
if ( (commaCount - commaCountOld) > 0 )
{
int aIter =0;
wordLength = (wordEndChar - wordStartChar);
char word[55] = "";
for (aIter = 0; aIter < wordLength; aIter++)
{
word[aIter] = myCSVStringing[strLen-wordLength+aIter+1];
}
if (word[aIter-1] == ',')
word[aIter-1] = '\0';
// printf("\n");
word[wordLength] = '\0';
strFloatArray[commaCount-1] = atof(&word[0]);
wordLength = 0;
wordStartChar = wordEndChar;
commaCountOld = commaCount;
}
}
return commaCount;
}
Output is as follows :
myCSVString == -1.4,2.6,,-0.24,1.26
floatArr[0] = -1.400000
floatArr[1] = 2.600000
floatArr[2] = 0.000000
floatArr[3] = -0.240000
floatArr[4] = 1.260000