reading matrices from a file and retriving their d

2019-08-18 08:06发布

I have a text file containing two matrices in this form:

1 2 3
4 5 6
1 2 3
*
4 5 6
1 2 3]

I want to be able to read the dimension of the two matrices and the type of operation * + / -. I'd like to retrieve the dimension and read data at the same time.

In my code the get_dim() function goes through the data in the file to get the dimension of the two matrices. I don't know if there is a way to store the values of the matrices already at this point with dynamic memory allocation. With the function read_matrix() one that I know the dimension of the matrices I'm reading again the same data.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>

#define IN 1
#define OUT 0

struct matrix{
    int rows;
    int cols;
    double *data;
};

void f(double x); /*truncate a double */
int get_dim(char *file, int *r, int *col);
void read_matrix(char *file, struct matrix *A, struct matrix *B);
void print_matrix(struct matrix *A);
void multiply(struct matrix *A, struct matrix *B, struct matrix *C);

int main (int argc, char *argv[])
{
    int rows[2]= {0,0};
    int cols[2]= {0,0};
    int operation; /*type of operation 1 for * and 2 for + */

    operation = get_dim(argv[1], rows, cols);   

    struct matrix A;
    struct matrix B;
    struct matrix C;

    A.rows = rows[0];
    A.cols = cols[0];

    B.rows = rows[1];
    B.cols = cols[1];

    C.rows = rows[0];
    C.cols = cols[1];

    A.data = malloc(sizeof(double) * A.rows * A.cols);
    B.data = malloc(sizeof(double) * B.rows * B.cols); 
    C.data = malloc(sizeof(double) * A.rows * B.cols);

    read_matrix(argv[1],&A,&B);

    print_matrix(&A);

    printf("\n*\n");

    print_matrix(&B);
    printf("\n=\n");

    multiply(&A,&B,&C);
    print_matrix(&C);

    free(A.data);
    free(B.data);
    free(C.data);

    return 0;
}

void read_matrix(char *file, struct matrix *A, struct matrix *B){

    int i,j;
    FILE *fp;
    int c=1;

    if((fp = fopen(file, "r")) != NULL ){

        for(i=0; i < A->rows; i++)
            for(j=0; j < A->cols; j++)
                fscanf(fp, "%lf", (A->data + (i * A->cols + j)));

        /*skip the character operator line */

        while(!isdigit(c))
            c=fgetc(fp);

        ungetc(c,fp);

        for(i=0; i < B->rows; i++)
           for(j=0; j < B->cols; j++)
                fscanf(fp, "%lf", (B->data + (i * B->cols + j)));
    }
    fclose(fp);
}

int get_dim(char *file, int *rows, int *cols){

    FILE *fp;
    double a;
    int c =1;
    int n = OUT;
    int op=0;

    if((fp = fopen(file, "r")) == NULL ){
        fprintf(stderr, "matrix: I cannot open %s\n",file);
        exit(1);
    }

    while(fscanf(fp,"%lf",&a)){

            if(n==OUT)
                cols[0]++;

            c=fgetc(fp);

            if(isdigit(c))
                ungetc(c,fp);

            else if(c =='\n'){
                rows[0]++;
                n=IN;                
            }

            else if(c=='*'){
                op=1;
                break;
            }
    }  

    n=OUT;
    printf("\n");

    while(!isdigit(c))
        c=fgetc(fp);

    ungetc(c,fp);   

    while(fscanf(fp,"%lf",&a)){

        if(n==OUT)
            cols[1]++;

        c=fgetc(fp);

        if(isdigit(c))
            ungetc(c,fp);

        else if(c =='\n'){
            rows[1]++;
            n=IN;                
        }

        else if(c == ']'){
                rows[1]++;
                break;    
        }
    }
    fclose(fp);
    return op;
} 

void print_matrix(struct matrix *A){

    int i,j;

/*printing the matrices*/

     double *tmp = A->data;

     for(i=0; i < A->rows; i++){
        for(j=0; j < A->cols; j++){
                f(*(tmp++));
        }
        printf("\n");
    }    
}

void multiply(struct matrix *A, struct matrix *B, struct matrix *C) 
{ 
    int i, j, k;

    /*initialize C to 0*/

   for (i=0; i< C->rows; i++){
    for (j=0; j < C->cols; j++)
        C->data[i * C->cols + j]=0;
   }
// Multiplying matrix A and B and storing in C.
   for(i = 0; i < A->rows; ++i)
        for(j = 0; j < B->cols; ++j)
            for(k=0; k < A->cols; ++k)
                C->data[i * C->cols + j] += A->data[i * A->cols + k] * B->data[k * B->cols + j];
}

void f(double x)
{
    double i,f= modf(x,&i);

    if(f<.00001)
        printf("%.f ",i);
    else printf("%f ",x);    
}

标签: c file matrix
2条回答
戒情不戒烟
2楼-- · 2019-08-18 08:30

I don't know if there is a way to store the values of the matrices already at this point with dynamic memory allocation.

There is, but understand that even is you assume there are only two matricies separated by a single operator, to truly do a dynamic read of matricies of an unknown number of columns and rows takes being able to track the number of rows and columns for each matrix encountered in the file and careful attention to allocation and reallocation throughout the read process.

To simplify the approach you can first assume there will be less than or equal to 512 columns per-row (or some reasonable number that fits your data). This allows you to read a row of integer values into a temporary array before having to allocate storage for the row. (you can of course dynamically allocate and reallocate the temporary array to get to that point, but for purposes here, that just adds an additional set of conditional checks and reallocation -- where there are plenty already).

Now knowing the number of columns-per-row, (which you save in a variable to validate subsequent rows against), you can allocate storage for that row (and the remainder in that matrix until a row begins with a non-digit)

One way to simplify the storage of rows and columns of the matrix along with variables that store the number of rows and columns as a single using is to use a struct to hold the matrix and it's size. (this lends itself to 2 matricies, or any number you like) This allows you to allocate for an array of struct for any number of arrays to be read (or you can simply declare an array of 2 of them for your case and avoid the allocation/reallocation checks). For example:

typedef struct {
    int **a;
    size_t row, col;
} arr_t;

Where a is a pointer-to-pointer-to int and row and col hold the number of rows and columns in the allocated and filled a. The choice of pointer-to-pointer allows native indexing as a[i][j] in normal 2D fashion without having to map 2D indexing into a single offset. (you can do it either way, choice is yours)

The base storage scheme is simple, you allocate (or statically declare) some number of struct, then allocate some initial number of pointers for a, and as you read and convert each line into your temporary array, you allocate storage for each row, copy your temporary row to that block of memory, and assign the starting address of that block to the next available pointer. When the number of pointers used equals the number you initially allocated, you realloc more pointers and keep going. (when you are done with the arrays, make sure you free() everything you have allocated.

That's basically it. The rest is just dotting your i's and crossing your t's on tracking what is allocated, and when a reallocation needs to occur. It's not complicated, it just takes careful attention to detail. The following implementation allocates (and will reallocate) for the structs (number of arrays) as well as the arrays themselves. It reads each line with fgets() assuming each will fit into a 1024 byte buffer (that too can be allocated and reallocated as required, but just as with the temporary array, that additional layer of allocation/reallocation has been omitted for example purposes).

Each line is then converted into integers using sscanf into the temporary array tmp (you would ideally use strtol for the error detecting benefit, but that was omitted to simplify the example). A block of memory is then allocated and the integer from tmp are copied to the new block of memory and its address assigned as the next pointer in the current array. aidx is used as your array of struct index (e.g. for arr[0], arr[1], ...) When a non-digit is encountered as the first character in the line, it is taken as the operator between arrays and it is stored in an array of char, the array index aidx is increment and filling of the next array proceeds.

At the end, the arrays are printed out and all memory previous allocated is freed. Take the time to work through it and understand what is happening at each point and why. (use a piece of paper and pencil to track the iteration logic through -- often much better than staring at a computer screen)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MINA    2   /* if you need a constant, #define one (or more) */
#define MAXC 1024

typedef struct {
    int **a;
    size_t row, col;
} arr_t;

int main (int argc, char **argv) {

    arr_t *arr;                     /* pointer to array type */
    size_t  aidx = 0, maxa = MINA,  /* arr index, max no. allocated */
            nrow = 0, ncol = 0,     /* current row/col count */
            maxrow = MINA, nop = 0; /* alloc'ed rows current array, no. op */
    char buf[MAXC],                 /* buffer to hold each line */
        op[MAXC];                   /* array to hold operators */
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }
    /* allocate/validate maxa no. of arr_t */
    if (!(arr = calloc (maxa, sizeof *arr))) {
        perror ("calloc-arr");
        return 1;
    }

    while (fgets (buf, MAXC, fp)) { /* read each line info buf */
        int off = 0, n;         /* offset from start of line, n for "%n" */
        size_t tidx = 0;        /* temporary array index */
        char *p = buf;          /* pointer to advance in sscanf */
        int tmp[MAXC / 2 + 1];  /* temporary array, sized for max no. ints */

        if (!isdigit(*buf)) {   /* if 1st char non-digit, end of array */
            op[nop++] = *buf;   /* store operator */
            if (nrow)           /* if rows stored */
                arr[aidx++].row = nrow; /* set final number of rows */
            nrow = ncol = 0;    /* reset nrow/ncol counters */
            maxrow = MINA;      /* reset allocate rows */
            continue;           /* get next line of data */
        }
        if (aidx == maxa) {     /* check if no. of structs need realloc */
            void *atmp = realloc (arr, 2 * maxa * sizeof *arr);  /* realloc */
            if (!atmp) {        /* validate */
                perror ("realloc-arr");
                return 1;
            }
            arr = atmp;         /* assign new block to arr */
            /* set all new bytes zero (realloc doesn't initialize) */
            memset (arr + maxa, 0, maxa * sizeof *arr); 
            maxa *= 2;      /* update struct count */
        }

        /* read all integers in line into tmp array */
        while (sscanf (p + off, "%d%n", &tmp[tidx], &n) == 1) {
            off +=  n;
            tidx++;
        }
        if (tidx) { /* if integers stored in tmp */
            if (nrow == 0) {   /* if first row in array */
                /* allocate/validate maxrow pointers */
                if (!(arr[aidx].a = malloc (maxrow * sizeof *arr[aidx].a))) {
                    perror ("malloc-arr[aidx].a");
                    return 1;
                }
                arr[aidx].col = tidx;   /* fix no. cols on 1st row */                
            }
            else if (nrow == maxrow) {  /* realloc of row ptrs req'd? */
                void *atmp =            /* always realloc with temp ptr */
                    realloc (arr[aidx].a, 2 * maxrow * sizeof *arr[aidx].a);
                if (!atmp) {            /* validate every alloc/realloc */
                    perror ("realloc-arr[aidx].a");
                    return 1;
                }
                arr[aidx].a = atmp;     /* assign realloced block to ptr */
                maxrow *= 2;            /* update maxrow to current alloc */
            }
            if (tidx != arr[aidx].col) {    /* validate no. of columns */
                fprintf (stderr, "error: invalid number of columns "
                        "arr[%zu].a[%zu]\n", aidx, nrow);
                return 1;
            }
            if (!(arr[aidx].a[nrow] =   /* allocate storagre for integers */
                            malloc (tidx * sizeof *arr[aidx].a[nrow]))) {
                perror ("malloc-arr[aidx].a[nrow]");
                return 1;
            }
            /* copy integers from tmp to row, increment row count */
            memcpy (arr[aidx].a[nrow++], tmp, tidx * sizeof *tmp);
        }
    }
    if (nrow)   /* handle final array */
        arr[aidx++].row = nrow; /* set final number of rows */

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    for (size_t i = 0; i < aidx; i++) {     /* for each struct */
        printf ("\narray %zu:\n(%zu x %zu)\n",      /* output no. and size */
                i + 1, arr[i].row, arr[i].col);
        for (size_t j = 0; j < arr[i].row; j++) {   /* for each row */
            for (size_t k = 0; k < arr[i].col; k++) /* for each col */
                printf ("%4d", arr[i].a[j][k]);     /* output int */
            putchar ('\n');         /* tidy up with '\n' */
            free (arr[i].a[j]);     /* free row */
        }
        free (arr[i].a);    /* free pointers */
        if (i < nop)
            printf ("\noperator: '%c'\n", op[i]);
    }
    free (arr);     /* free structs */

    return 0;
}

Example Input File

$ cat dat/unknown_arrays.txt
1 2 3
4 5 6
1 2 3
*
4 5 6
1 2 3

Example Use/Output

$ ./bin/read_unknown_arrays dat/unknown_arrays.txt

array 1:
(3 x 3)
   1   2   3
   4   5   6
   1   2   3

operator: '*'

array 2:
(2 x 3)
   4   5   6
   1   2   3

Memory Use/Error Check

In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.

It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.

For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.

$ valgrind ./bin/read_unknown_arrays dat/unknown_arrays.txt
==7714== Memcheck, a memory error detector
==7714== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==7714== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==7714== Command: ./bin/read_unknown_arrays dat/unknown_arrays.txt
==7714==

array 1:
(3 x 3)
   1   2   3
   4   5   6
   1   2   3

  operator: '*'

array 2:
(2 x 3)
   4   5   6
   1   2   3
==7714==
==7714== HEAP SUMMARY:
==7714==     in use at exit: 0 bytes in 0 blocks
==7714==   total heap usage: 10 allocs, 10 frees, 724 bytes allocated
==7714==
==7714== All heap blocks were freed -- no leaks are possible
==7714==
==7714== For counts of detected and suppressed errors, rerun with: -v
==7714== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

Always confirm that you have freed all memory you have allocated and that there are no memory errors.

Work through the example. Understand that it doesn't matter whether you are allocating/reallocating for a 2x2 or 150x150 array, the validation and reallocation checks are the same which is what makes short input files like your seem deceptively over-complicated. They aren't, it just takes the same code to handle a 2x2 or 150x150. Let me know if you have further questions.

查看更多
女痞
3楼-- · 2019-08-18 08:36

Some remarks on the first version of your question

Your loop

 while (chr != EOF)
 {
   //Count whenever new line is encountered
   if (chr == '\n')
       rows++;
   //take next character from file.
   chr = getc(fp);
 }

read up to the end of the file so both matrices, you need to detect the "*" so you do separate the first and the second matrices

you do not detect the number of columns, you need to read line per line, then to count the number of values per line (at least the first)

In

str = (char *) malloc(6 * sizeof(char));

if( fgets (str, 24, fp)!=NULL ) {

you can have an undefined behavior because you read up to 24 characters doing fgets (str, 24, fp) while you allocated only 6

c != EOF requires c is an int, not a char


Here is a proposal, I do not know what are the kind of number you expect so I do not try to read number, I just look at elements separated by space, you can add a sscanf or equivalent to check each element is a number. I also suppose the operator is a character alone on its line (there is a newline just after)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int readMat(FILE * fp, int * cols, int * rows, char * oper)
{
  *cols = *rows = 0;
  *oper = 0;

  char * lineptr = NULL;
  size_t n = 0;

  while (getline(&lineptr, &n, fp) > 0) {
    if (((*lineptr == '*') || (*lineptr == '/') || (*lineptr == '-') || (*lineptr == '+'))
        && (lineptr[1] == '\n')) {
      *oper = *lineptr;
      break;
    }

    if (strtok(lineptr, " ") == NULL) {
      /* invalid input */
      *cols = 0;
      break;
    }

    *rows += 1;

    int c = 1;

    while (strtok(NULL, " ") != NULL)
      c += 1;

    if (*cols == 0)
      *cols = c;
    else if (*cols != c) {
      /* invalid input */
      *cols = 0;
      break;
    }
  }

  free(lineptr);

  return *cols != 0;
}

int main(int argc, char ** argv)
{
  if (argc != 2)
    printf("Usage: %s <file>\n", *argv);
  else {
    FILE * fp = fopen(argv[1], "r");

    if (fp == NULL)
      fprintf(stderr, "cannot open '%s'\n", argv[1]);
    else {
      int cols, rows;
      char oper;

      if (!readMat(fp, &cols, &rows, &oper))
        fputs("invalid first matrice", stderr);
      else if (oper == 0)
        fputs("operator is missing", stderr);
      else {
        printf("first matrice has %d columns and %d rows\noper is %c\n", cols, rows, oper);

        if (!readMat(fp, &cols, &rows, &oper))
          fputs("invalid second matrice", stderr);
        else if (oper != 0)
          fputs("unexpected operator", stderr);
        else
          printf("second matrice has %d columns and %d rows\n", cols, rows);
      }
      fclose(fp);
    }
  }

  return 0;
}

Compilation and execution :

pi@raspberrypi:/tmp $ gcc -g -pedantic -Wall -Wextra m.c
pi@raspberrypi:/tmp $ cat m
1 2 3
4 5 6
1 2 3
*
  44  5.2   6e12
-1     2   333
pi@raspberrypi:/tmp $ ./a.out m
first matrice has 3 columns and 3 rows
oper is *
second matrice has 3 columns and 2 rows
pi@raspberrypi:/tmp $ 

If you do not have getline replace

  char * lineptr = NULL;
  size_t n = 0;

  while (getline(&lineptr, &n, fp) > 0) {

by for instance

  char * lineptr = malloc(1024);

  while (fgets(lineptr, 1024, fp) != NULL) {
查看更多
登录 后发表回答