Dynamic array and prebuilt data

2020-05-14 18:38发布

问题:

In C, I am trying to do the following:

typedef struct {
    int length;
    int items[];     /* 1 */
} wchararray_t;

typedef struct {
    long hash;
    wchararray_t chars;   /* 2 */
} string_t;

static string_t s1 = {
    617862378,
    { 5, { 'H', 'e', 'l', 'l', 'o' } }  /* 3 */
};

In full words, I would like a type string_t that ends in another type wchararray_t that is itself dynamically sized -- its size being stored in length. Moreover, I would also like to write a prebuilt particular string, as static data, here s1 of length 5.

The code above assumes C99 support for /* 1 */. The inclusion of the substructure into the bigger structure at /* 2 */ is, as far as I understand, not supported even by the C99 standard -- but GCC accepts it. However, at /* 3 */ GCC gives up:

error: initialization of flexible array member in a nested context

As a workaround, the ideal code above is so far written as the following hack, which "kind of works":

typedef struct { int length; int items[1]; } wchararray_t;
typedef struct { long hash; wchararray_t chars; } string_t;

typedef struct { int length; int items[5]; } wchararray_len5_t;
typedef struct { long hash; wchararray_len5_t chars; } string_len5_t;

static union { string_len5_t a; string_t b; } s1 = {
    617862378,
    { 5, { 'H', 'e', 'l', 'l', 'o' } }
};

...and we'd use "s1.b" as the prebuilt string_t (and never refer to "s1.a", which is here only for the static declaration of s1). However, it breaks in the newest GCC 4.8, which optimizes away parts of our code because -- obviously -- any loop over the items of a wchararray_t can iterate only once, given that it is an array of length 1.

This particular issue is fixed by giving gcc the option -fno-aggressive-loop-optimizations. It can probably also be fixed by not declaring the length in wchararray_t's items[] array, making it a dynamic array "just because". However, this way to write code is such a hack that I'd prefer a fully different way to approach the problem...

(Note that it is all generated C code produced by PyPy, as opposed to hand-written code; any change is fine, including if it requires changing the way we access the data everywhere, as long as the "valid" C optimizations are not prevented.)

EDIT: replaced "char[]" with "int[]", which doesn't accept the double-quote syntax "hello". This is because I'm looking for a solution for any array type.

NOT RESOLVED: thanks everybody for your suggestions. It seems there is no clean way, so I have implemented the hackish solution: declaring the types k+1 times, once with a flexible array "int items[];" and the k other times with "int items[N];" for the various values of N that are needed. This requires some additional hacks: e.g. not using flexible arrays for MSVC (they work differently there; I didn't investigate to know if exactly the same syntax would work); and GCC follows what C99 says and is not happy with structs that would contain int items[]; as only field. It is however happy if we add a dummy field char _dummy[0];... which is not strictly C99 as far as I know...

回答1:

It's hackish, but could this work?

#include <stdio.h>

typedef struct {
    int length;
    int items[];     /* 1 */
} wchararray_t;

typedef struct {
    long hash;
    wchararray_t chars;   /* 2 */
    int dummy[]; /* hack here */
} string_t;

static string_t s1 = {
    617862378, { 5 },
    { 'H', 'e', 'l', 'l', 'o' }  /* 3: changed assignment */
};

int main(void)
{
    int i;
    for (i=0; i < 5; ++i) {
        putchar(s1.chars.items[i]);
    }
    putchar('\n');
    return 0;
}

GCC gives me warnings:

xx.c:10:22: warning: invalid use of structure with flexible array member [-pedantic]
xx.c:16:9: warning: initialization of a flexible array member [-pedantic]
xx.c:16:9: warning: (near initialization for ‘s1.dummy’) [-pedantic]

But it seems to work.

Reference

Edit: How about adding a "padding member" that makes sure items[] is always properly aligned?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>

/* change to the strictest alignment type */
typedef long aligner;

typedef struct {
    long stuff;   /* to show misalignment on 64-bit */
    int length;
    aligner padding;
    int items[];
} chararray_t;

typedef struct {
    long hash;
    chararray_t chars;
    int dummy[];
} string_t;

static string_t b1 = {
    617862378,
    { 42, 5 },
    {-1, -2, -3, -4, -5}
};

int main(void)
{
    int i;

    printf("sizeof chararray_t: %zu\n", sizeof(chararray_t));
    printf("offsetof items: %zu\n", offsetof(chararray_t, items));

    printf("sizeof string_t: %zu\n", sizeof(string_t));
    printf("offsetof dummy: %zu\n", offsetof(string_t, dummy));

    for (i=0; i < 5; ++i) {
        printf("%d ", b1.chars.items[i]);
    }
    putchar('\n');
    for (i=0; i < 5; ++i) {
        printf("%d ", b1.dummy[i]);
    }
    putchar('\n');
    return 0;
}

When I run the above, I seem to get the correct answer:

sizeof chararray_t: 24
offsetof items: 24
sizeof string_t: 32
offsetof dummy: 32
-1 -2 -3 -4 -5 
-1 -2 -3 -4 -5 


回答2:

Answering my own question to write it down. Yet another hack would be to build on top of Alok's suggestion, which may give an occasionally bogus alignment --- and then fix the alignment by init-time code. This assumes that the big majority of such types used in a program happen to be correctly aligned. Code:

typedef struct {
    long stuff;   /* to show misalignment on 64-bit */
    int length;
    int items[];
} chararray_t;

typedef struct {
    long hash;
    chararray_t chars;
    int dummy[];
} string_t;


static string_t b1 = {
    617862378,
    { 42, 5 },
    {-1, -2, -3, -4, -5}
};
/* same with b2 .. b6 */

void fixme(void) {
    /* often compares as equal, and the whole function is removed */
    if (offsetof(string_t, dummy) !=
            offsetof(string_t, chars) + offsetof(chararray_t, items)) {
        static string_t *p_array[] = { &b1, &b2, &b3, &b4, &b5, &b6 };
        string_t *p;
        int i;
        for (i=0; i<6; i++) {
            p = p_array[i];
            memmove(p->chars.items, p->dummy, p->chars.length * sizeof(int));
        }
    }
}


回答3:

#include <stdio.h>
typedef struct {
    int length;
    char items[];     /* 1 */
} chararray_t;

typedef struct {
    long hash;
    chararray_t chars;   /* 2 */
} string_t;

/*static string_t s1 = {
    617862378,
    { 5, { 'H', 'e', 'l', 'l', 'o' } }  // 3
};*/

static string_t s1 =
{
    617862378,
    {6,"Hello"} /* 3 */
};

int main()
{
    printf("%d %d %s\n",s1.hash,s1.chars.length,s1.chars.items);
    return 0;
}

Add 1 for the null character, et voila! :)

Edit, Also works for 2 levels of nesting (GCC 4.8.0)

#include <stdio.h>
typedef struct {
    int length;
    char items[];     /* 1 */
} chararray_t;

typedef struct {
    long hash;
    chararray_t chars;   /* 2 */
} string_t;

typedef struct {
    long number;
    string_t arr;
}experiment_t;

static experiment_t s1 =
{
    617862378,
    {786,{6,"Hello"}} /* 3 */
};

int main()
{
    printf("%d %d %d %s\n",s1.number,s1.arr.hash,s1.arr.chars.length,s1.arr.chars.items);
    return 0;
}

----------EDIT 2------------------ Found a way around the limitation C initialize array within structure

Final code::

#include <stdio.h>
typedef struct {
    int length;
    int *items;     /* 1 */
} intarray_t;

typedef struct {
    long hash;
    intarray_t chars;   /* 2 */
    int dummy[2];
} string_t;

/*string_t s1 =
{
    617862378,
    {
        6,
        {1,2,3,4,5,6}
    },
    {
        0,0
    }
};*/

string_t s1 = {617862378,{},{0,0}};

int main()
{
    int i=0;
    intarray_t  t1 = {.length = 6, .items = (int[6]){1,2,3,4,5,6}};
    s1.chars = t1;
    printf("%d %d\n",s1.hash,s1.chars.length);
    while(i<s1.chars.length)
    {
        printf("%d",s1.chars.items[i]);
        i++;
    }
    putchar('\n');
    return 0;
}


回答4:

I assume there is some reason for keeping the string "inside" the struct and that you want to save a char, by not initializing with a C-string.

But, if not, you could do:

typedef struct {
    int length;
    char *items;     /* 1 */
} chararray_t;

typedef struct {
    long hash;
    chararray_t chars;   /* 2 */
} string_t;

static string_t s1 = {
    617862378,
    { 5, "Hell" }  /* 3 */
}; 
s1.chars.items[4] = 'o' ;

Looks like you can do the union trick, but with a typecast instead ?

#include <stdio.h>

typedef struct { int length; int items[]; } wchararray_t;
typedef struct { long hash; wchararray_t chars; } string_t;

typedef struct { int length; int items[5]; } wchararray_len5_t;
typedef struct { long hash; wchararray_len5_t chars; } string_len5_t;

static union { string_len5_t a; string_t b; } s5 = {
    617862378,
    { 5, { 'H', 'e', 'l', 'l', 'o' } }
};

string_t *s1 = (string_t*) &s5 ;

int main( int argc, char *argv[])
{

  for( int i = 0 ; i < s1->chars.length ; i++ )
    {
      printf ( "%c", s1->chars.items[i] );
    }
  printf( "\n" );
}


标签: c arrays static