C read space-separated values from file

2019-02-19 15:52发布

问题:

I need to read text from a file and assign values to a struct based on information read.

Here is the format of the text file:

First Middle Last   Address          city     state zip age sex tenure salary  
\--------------------------------------------------------------  
ADA     A AGUSTA    33 BABBAGE ROAD  LOVELACE    GB 19569 28 F 2 350.50  
ISSAC   A ASIMOV    99 FICTION WAY   AMHERST     MA 63948 58 M 6 423.88  
HUMPHRY R BOGART    71 SAM STREET    HOLLYWOOD   CA 48482 56 M 5 366.00  

And the struct I have to read it into:

typedef struct  
{  
char first[8], initial, last[10],  
street[17], city[12], state[3];  
int age, tenure, zip;  
float salary;  
char sex;  
}Employee;  

The code I have used so far has been:

void inputLine(Employee* e)  
{  
fscanf(pay, "%s %s %s %s %s %s %s %s %d %d %s %d %f",  
           e->first, &e->initial, e->last, e->street,  
           e->street, e->city, e->city, e->state,  
           &e->zip, &e->age, &e->sex, &e->tenure,  
           &e->salary);  
}  

But when I print each line, the first few lines are fine, then things start messing up:

TED L KOPPEL ABC WASHINGTON DC 37376 48 M 9 909.44  
DAVID T LETTERMAN WNBC NEW YORK 0 0   
NY 1 47 5 STEVIE R 0 0   
NICKS 3 MUSIC CHICAGO 23459 38 0 0   
F 8 460.88 P 76 SILLY 0 89   
STREET L GB 44 2 320.50 0 12341   

I have also tried:

void inputLine(Employee* e)  
{  
char line[53];  
fgets(line, 52, pay);  
printf("%s\n", line);  
fflush(stdout);  
sscanf(line, "%s %s %s %s %s %s %s %s %d %d %s %d %f",  
e->first, &e->initial, e->last, e->street,  
e->street, e->city, e->city, e->state,  
&e->zip, &e->age, &e->sex, &e->tenure,  
&e->salary);  
}  

But that gives the same result.

The code cannot be in C++. I am using the Linux GNU GCC 4.8.1 compiler, but I have also tested this on Mac's compiler and it did not work.

回答1:

Global variables such as pay — clearly defined as FILE *pay; — are a bad idea, and completely unnecessary in the example code. Always test the return value from fscanf() and its relatives to ensure that you got all the data you expected.

However, your trouble is that %s stops at the first space, so you have immense problems reading the address field. Your inputs are unconstrained too. You also try to get multiple words of the street address using e->street multiple times; that won't work since the third word will overwrite the first.

You need something like:

int inputLine(FILE *fp, Employee* e)  
{  
    if (fscanf(fp, "%7s %c %9s %16c %11c %2s %d %d %c %d %f",  
               e->first, &e->initial, e->last, e->street, e->city, e->state,  
               &e->zip, &e->age, &e->sex, &e->tenure, &e->salary) != 11)
        return -1;
    e->street[16] = '\0';
    e->city[11] = '\0';
    return 0;
}

This uses %c to read single characters; it uses %16c to read the multi-word street address and %11c to read the (possibly multi-word) city. It uses %7s, %9s, and %2s to prevent overflows of other strings. The assignments after the fscanf() call ensure that the counted %c strings are null terminated; by itself, %16c does not add a null terminator.

The inputLine() function now returns an error indication (-1 chosen) when there's a problem, and 0 to indicate success. This is a common pattern with the Unix system calls, but different from the behaviour of the underlying scanf()-family of functions, as noted in a comment by chux.

Working code 1

This code reads standard input, using fscanf() as in the question. It ensures that there's no overflow of the emp array, too.

#include <stdio.h>

typedef struct  
{  
    char first[8], initial, last[10],  
         street[17], city[12], state[3];  
    int age, tenure, zip;  
    float salary;  
    char sex;  
} Employee; 

void dump_employee(FILE *fp, const char *tag, const Employee *e);
int inputLine(FILE *fp, Employee *e);

enum { MAXEMP = 10 };

int main(void)
{
    char line[4096];
    Employee emp[MAXEMP];

    if (fgets(line, sizeof(line), stdin) == 0 ||
        fgets(line, sizeof(line), stdin) == 0)
        return 1;
    for (int i = 0; i < MAXEMP && inputLine(stdin, &emp[i]) != 0; i++)
        dump_employee(stdout, "Employee", &emp[i]);
    return 0;
}

int inputLine(FILE *fp, Employee *e)  
{  
    if (fscanf(fp, "%7s %c %9s %16c %11c %2s %d %d %c %d %f",  
               e->first, &e->initial, e->last, e->street, e->city, e->state,  
               &e->zip, &e->age, &e->sex, &e->tenure, &e->salary) != 11)
        return -1;
    e->street[16] = '\0';
    e->city[11] = '\0';
    return 0;
}

void dump_employee(FILE *fp, const char *tag, const Employee *e)  
{  
    fprintf(fp, "%s: %-7s %c %-9s %-16s %-11s %-2s %.5d %3d %c %d %6.2f\n",  
               tag, e->first, e->initial, e->last, e->street, e->city, e->state,  
               e->zip, e->age, e->sex, e->tenure, e->salary);
}

Sample output

Employee: ADA     A AGUSTA    33 BABBAGE ROAD  LOVELACE    GB 19569  28 F 2 350.50
Employee: ISSAC   A ASIMOV    99 FICTION WAY   AMHERST     MA 63948  58 M 6 423.88
Employee: HUMPHRY R BOGART    71 SAM STREET    HOLLYWOOD   CA 48482  56 M 5 366.00

Working code 2

This code uses fgets() to read lines and sscanf() to convert the data. It would be much easier to report errors sanely with this version of the code.

#include <stdio.h>

typedef struct  
{  
    char first[8], initial, last[10],  
         street[17], city[12], state[3];  
    int age, tenure, zip;  
    float salary;  
    char sex;  
} Employee; 

void dump_employee(FILE *fp, const char *tag, const Employee *e);
int scan_employee(Employee *e, const char *line);

enum { MAXEMP = 10 };

int main(void)
{
    char line[4096];
    Employee emp[MAXEMP];

    if (fgets(line, sizeof(line), stdin) == 0 ||
        fgets(line, sizeof(line), stdin) == 0)
        return 1;
    for (int i = 0; i < MAXEMP && fgets(line, sizeof(line), stdin) != 0; i++)
    {
        if (scan_employee(&emp[i], line) == 0)
            dump_employee(stdout, "Employee", &emp[i]);
    }
    return 0;
}

int scan_employee(Employee *e, const char *line)  
{  
    if (sscanf(line, "%7s %c %9s %16c %11c %2s %d %d %c %d %f",  
               e->first, &e->initial, e->last, e->street, e->city, e->state,  
               &e->zip, &e->age, &e->sex, &e->tenure, &e->salary) != 11)
        return -1;
    e->street[16] = '\0';
    e->city[11] = '\0';
    return 0;
}

void dump_employee(FILE *fp, const char *tag, const Employee *e)  
{  
    fprintf(fp, "%s: %-7s %c %-9s %-16s %-11s %-2s %.5d %3d %c %d %6.2f\n",  
               tag, e->first, e->initial, e->last, e->street, e->city, e->state,  
               e->zip, e->age, e->sex, e->tenure, e->salary);
}

The output from this is identical to the output from the other for the sample data in the question.



标签: c file fgets scanf