How to extract all Numbers out of a String using s

2019-03-06 19:09发布

问题:

How to extract all Numbers out of a String with following format using sscanf in C ONLY:

" (3, 2, 4,5, 10 )"

Whitespaces can be everywhere but not between Digits of course. There can be 0 Whitespaces or more at every position.

The String has to have the right format or its an error. " (3,2, " is an error for example. Every Character that's not an whitespace or part of the format is an error

Using only sscanf (its not optional)

My Idea, of course, is to while loop with sscanf, but the formatting is my problem

EDIT: The Amount of number inside the brackets can be different. So we can have 0 Numbers or n Numbers inside the brackets. with n being any number. (a,a+1,...,a+n).

EDIT2: Of course its allowed to use everything included in stdio.h, but no extendet librarys

回答1:

Preamble

It seems that the format of the data can be summarized as: white space anywhere; open parenthesis followed by a sequence of one or more plain integers (no signs, no decimal points) separated by commas and terminated by a close parenthesis. Alternatively, there could be zero integers between ( and ).

The data must be processed using sscanf(); there is no stated rule about a single call to sscanf(). One of the advantages of using sscanf() over the file I/O alternatives (scanf(), fscanf(), etc) is that you can retry a scan if necessary.

Note that the scanf() family does not handle 'optional item' very well. White space is mostly trivial — all conversion specifiers except %c, %[…] (scan sets) and %n skip leading white space, and a white space character in a format string matches zero or more white space characters in the input.

Code

The function string_analyzer() does the job; it has a modestly complex interface because it does no I/O itself — it analyzes the string and adds the numbers to an array structure that is passed into the function. It also sets a message that tells the caller a bit about what went wrong. The other functions are support functions (for the struct int_array type) and the test harness (main() plus test_string_analyzer()).

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct int_array
{
    char   *base;
    size_t  maxlen;
    size_t  curlen;
};

static void dump_int_array(const char *tag, const struct int_array *array);
static void add_to_int_array(struct int_array *array, int data);
static void free_int_array(struct int_array *array);
static void test_string_analyzer(const char *str);

static int string_analyzer(const char *str, struct int_array *array, char **msg)
{
    int offset;
    int length = strlen(str);
    int data;
    char trailer;

    if (sscanf(str, " ( %[)] %n", &trailer, &offset) == 1)
    {
        /* Empty list */
        assert(trailer == ')');
        if (offset == length)
        {
            *msg = "it is an empty list";
            return 0;
        }
        else
        {
            *msg = "it has extra characters after an empty list";
            return -1;
        }
    }

    if (sscanf(str, " ( %d %c%n", &data, &trailer, &offset) != 2 ||
        (trailer != ',' && trailer != ')'))
    {
        *msg = "it does not start correctly";
        return -1;
    }
    add_to_int_array(array, data);
    if (trailer == ')' && offset == length)
    {
        *msg = "it is valid";
        return 0;
    }
    const char *source = str + offset;
    while (sscanf(source, "%d %[,)] %n", &data, &trailer, &offset) == 2)
    {
        add_to_int_array(array, data);
        length = strlen(source);
        if (trailer == ')' && offset == length)
        {
            *msg = "it is valid";
            return 0;
        }
        else if (trailer == ')')
        {
            *msg = "it does not end correctly";
            return -1;
        }
        source += offset;
    }
    *msg = "it is incorrectly formatted";
    return -1;
}

int main(void)
{
    const char *samples[] =
    {
        /* Valid */
        " (3, 2, 4,5, 10 )",
        " ( 4 , 6 ) ",
        "(4)",
        "()",
        /* random -n 15 10 99 | commalist -t -B 8 -b '"( ' -T ' )",' */
        "( 65, 83, 81, 60, 61, 23, 48, 89, 67, 27, 73, 25, 92, 13, 67 )",
        /* Invalid */
        " (3,2, ",
        "3,2",
        " (3,2,1) apathy",
        " () apathy",
        " (3,X,1)",
        " (3X,1)",
    };
    enum { NUM_SAMPLES = sizeof(samples) / sizeof(samples[0]) };

    for (int i = 0; i < NUM_SAMPLES; i++)
        test_string_analyzer(samples[i]);

    return 0;
}

static void test_string_analyzer(const char *str)
{
    struct int_array array = { 0, 0, 0 };
    char *msg = 0;

    printf("Analyzing [%s]\n", str);
    int rc = string_analyzer(str, &array, &msg);
    if (rc == 0)
        printf("String '%s' OK: %s\n", str, msg);
    else
        printf("String '%s' is misformatted: %s\n", str, msg);
    dump_int_array("List contents", &array);

    free_int_array(&array);
}

static void dump_int_array(const char *tag, const struct int_array *array)
{
    int length = printf("%s (%zu): ", tag, array->curlen);
    const char *pad = "";
    for (size_t i = 0; i < array->curlen; i++)
    {
        length += printf("%s%d", pad, array->base[i]);
        pad = ", ";
        if (length >= 64)
        {
            pad = "    ";
            length = 0;
            putchar('\n');
        }
    }
    if (length > 0)
        putchar('\n');
}

static void add_to_int_array(struct int_array *array, int data)
{
    if (array->curlen >= array->maxlen)
    {
        assert(array->curlen == array->maxlen);
        size_t newlen = array->maxlen * 2 + 2;
        void  *newarr = realloc(array->base, newlen * sizeof(array->base[0]));
        if (newarr == NULL)
        {
        }
        array->base = newarr;
        array->maxlen = newlen;
    }
    array->base[array->curlen++] = data;
}

static void free_int_array(struct int_array *array)
{
    free(array->base);
    array->base = 0;
    array->maxlen = 0;
    array->curlen = 0;
}

Sample output:

Analyzing [ (3, 2, 4,5, 10 )]
String ' (3, 2, 4,5, 10 )' OK: it is valid
List contents (5): 3, 2, 4, 5, 10
Analyzing [ ( 4 , 6 ) ]
String ' ( 4 , 6 ) ' OK: it is valid
List contents (2): 4, 6
Analyzing [(4)]
String '(4)' OK: it is valid
List contents (1): 4
Analyzing [()]
String '()' OK: it is an empty list
List contents (0): 
Analyzing [( 65, 83, 81, 60, 61, 23, 48, 89, 67, 27, 73, 25, 92, 13, 67 )]
String '( 65, 83, 81, 60, 61, 23, 48, 89, 67, 27, 73, 25, 92, 13, 67 )' OK: it is valid
List contents (15): 65, 83, 81, 60, 61, 23, 48, 89, 67, 27, 73, 25
    92, 13, 67
Analyzing [ (3,2, ]
String ' (3,2, ' is misformatted: it is incorrectly formatted
List contents (2): 3, 2
Analyzing [3,2]
String '3,2' is misformatted: it does not start correctly
List contents (0): 
Analyzing [ (3,2,1) apathy]
String ' (3,2,1) apathy' is misformatted: it does not end correctly
List contents (3): 3, 2, 1
Analyzing [ () apathy]
String ' () apathy' is misformatted: it has extra characters after an empty list
List contents (0): 
Analyzing [ (3,X,1)]
String ' (3,X,1)' is misformatted: it is incorrectly formatted
List contents (1): 3
Analyzing [ (3X,1)]
String ' (3X,1)' is misformatted: it does not start correctly
List contents (0): 


标签: c string scanf