Split string with delimiters in C

2018-12-31 02:17发布

How do I write a function to split and return an array for a string with delimiters in the C programming language?

char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
str_split(str,',');

标签: c string split
25条回答
只若初见
2楼-- · 2018-12-31 02:58

This is a string splitting function that can handle multi-character delimiters. Note that if the delimiter is longer than the string that is being split, then buffer and stringLengths will be set to (void *) 0, and numStrings will be set to 0.

This algorithm has been tested, and works. (Disclaimer: It has not been tested for non-ASCII strings, and it assumes that the caller gave valid parameters)

void splitString(const char *original, const char *delimiter, char ** & buffer, int & numStrings, int * & stringLengths){
    const int lo = strlen(original);
    const int ld = strlen(delimiter);
    if(ld > lo){
        buffer = (void *)0;
        numStrings = 0;
        stringLengths = (void *)0;
        return;
    }

    numStrings = 1;

    for(int i = 0;i < (lo - ld);i++){
        if(strncmp(&original[i], delimiter, ld) == 0) {
            i += (ld - 1);
            numStrings++;
        }
    }

    stringLengths = (int *) malloc(sizeof(int) * numStrings);

    int currentStringLength = 0;
    int currentStringNumber = 0;
    int delimiterTokenDecrementCounter = 0;
    for(int i = 0;i < lo;i++){
        if(delimiterTokenDecrementCounter > 0){
            delimiterTokenDecrementCounter--;
        } else if(i < (lo - ld)){
            if(strncmp(&original[i], delimiter, ld) == 0){
                stringLengths[currentStringNumber] = currentStringLength;
                currentStringNumber++;
                currentStringLength = 0;
                delimiterTokenDecrementCounter = ld - 1;
            } else {
                currentStringLength++;
            }
        } else {
            currentStringLength++;
        }

        if(i == (lo - 1)){
            stringLengths[currentStringNumber] = currentStringLength;
        }
    }

    buffer = (char **) malloc(sizeof(char *) * numStrings);
    for(int i = 0;i < numStrings;i++){
        buffer[i] = (char *) malloc(sizeof(char) * (stringLengths[i] + 1));
    }

    currentStringNumber = 0;
    currentStringLength = 0;
    delimiterTokenDecrementCounter = 0;
    for(int i = 0;i < lo;i++){
        if(delimiterTokenDecrementCounter > 0){
            delimiterTokenDecrementCounter--;
        } else if(currentStringLength >= stringLengths[currentStringNumber]){
            buffer[currentStringNumber][currentStringLength] = 0;
            delimiterTokenDecrementCounter = ld - 1;
            currentStringLength = 0;
            currentStringNumber++;
        } else {
            buffer[currentStringNumber][currentStringLength] = (char)original[i];
            currentStringLength++;
        }
    }
    buffer[currentStringNumber][currentStringLength] = 0;
}

Sample code:

int main(){
    const char *string = "STRING-1 DELIM string-2 DELIM sTrInG-3";
    char **buffer;
    int numStrings;
    int * stringLengths;

    splitString(string, " DELIM ", buffer, numStrings, stringLengths);

    for(int i = 0;i < numStrings;i++){
        printf("String: %s\n", buffer[i]);
    }
}

Libraries:

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
查看更多
美炸的是我
3楼-- · 2018-12-31 02:59

Try use this.

char** strsplit(char* str, const char* delim){
    char** res = NULL;
    char*  part;
    int i = 0;

    char* aux = strdup(str);

    part = strdup(strtok(aux, delim));

    while(part){
        res = (char**)realloc(res, (i + 1) * sizeof(char*));
        *(res + i) = strdup(part);

        part = strdup(strtok(NULL, delim));
        i++;
    }

    res = (char**)realloc(res, i * sizeof(char*));
    *(res + i) = NULL;

    return res;
}
查看更多
几人难应
4楼-- · 2018-12-31 03:00

You can use the strtok() function to split a string (and specify the delimiter to use). Note that strtok() will modify the string passed into it. If the original string is required elsewhere make a copy of it and pass the copy to strtok().

EDIT:

Example (note it does not handle consecutive delimiters, "JAN,,,FEB,MAR" for example):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

char** str_split(char* a_str, const char a_delim)
{
    char** result    = 0;
    size_t count     = 0;
    char* tmp        = a_str;
    char* last_comma = 0;
    char delim[2];
    delim[0] = a_delim;
    delim[1] = 0;

    /* Count how many elements will be extracted. */
    while (*tmp)
    {
        if (a_delim == *tmp)
        {
            count++;
            last_comma = tmp;
        }
        tmp++;
    }

    /* Add space for trailing token. */
    count += last_comma < (a_str + strlen(a_str) - 1);

    /* Add space for terminating null string so caller
       knows where the list of returned strings ends. */
    count++;

    result = malloc(sizeof(char*) * count);

    if (result)
    {
        size_t idx  = 0;
        char* token = strtok(a_str, delim);

        while (token)
        {
            assert(idx < count);
            *(result + idx++) = strdup(token);
            token = strtok(0, delim);
        }
        assert(idx == count - 1);
        *(result + idx) = 0;
    }

    return result;
}

int main()
{
    char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
    char** tokens;

    printf("months=[%s]\n\n", months);

    tokens = str_split(months, ',');

    if (tokens)
    {
        int i;
        for (i = 0; *(tokens + i); i++)
        {
            printf("month=[%s]\n", *(tokens + i));
            free(*(tokens + i));
        }
        printf("\n");
        free(tokens);
    }

    return 0;
}

Output:

$ ./main.exe
months=[JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC]

month=[JAN]
month=[FEB]
month=[MAR]
month=[APR]
month=[MAY]
month=[JUN]
month=[JUL]
month=[AUG]
month=[SEP]
month=[OCT]
month=[NOV]
month=[DEC]
查看更多
裙下三千臣
5楼-- · 2018-12-31 03:03

String tokenizer this code should put you in the right direction.

int main(void) {
  char st[] ="Where there is will, there is a way.";
  char *ch;
  ch = strtok(st, " ");
  while (ch != NULL) {
  printf("%s\n", ch);
  ch = strtok(NULL, " ,");
  }
  getch();
  return 0;
}
查看更多
与风俱净
6楼-- · 2018-12-31 03:04
static int count_token(char *iptr, char delim) {

        int token_count = 0;
        while (*iptr && isspace(*iptr))
            iptr++;
        while (*iptr) {
            if ((*iptr != delim)) {
                token_count++;
                while (*iptr && (*iptr != delim))
                    iptr++;
            }
            else {
                iptr++;
            }
        }
        return token_count;
    }

    static char** split(char* input, int* argc){
        char**  argv;
        int token_count = count_token(input, ' ');
        argv = (char**)malloc(sizeof(char*)*token_count);

        int i = 0;
        char *token = strtok(input, " ");
        while(token) {
            puts(token);
            argv[i] = strdup(token);
            token = strtok(NULL, " ");
            i++;
        }
        assert(i == token_count);
        *argc = token_count;
        return argv;
    }
查看更多
只若初见
7楼-- · 2018-12-31 03:06

This function takes a char* string and splits it by the deliminator. There can be multiple deliminators in a row. Note that the function modifies the orignal string. You must make a copy of the original string first if you need the original to stay unaltered. This function doesn't use any cstring function calls so it might be a little faster than others. If you don't care about memory allocation, you can allocate sub_strings at the top of the function with size strlen(src_str)/2 and (like the c++ "version" mentioned) skip the bottom half of the function. If you do this, the function is reduced to O(N), but the memory optimized way shown below is O(2N).

The function:

char** str_split(char *src_str, const char deliminator, size_t &num_sub_str){
  //replace deliminator's with zeros and count how many
  //sub strings with length >= 1 exist
  num_sub_str = 0;
  char *src_str_tmp = src_str;
  bool found_delim = true;
  while(*src_str_tmp){
    if(*src_str_tmp == deliminator){
      *src_str_tmp = 0;
      found_delim = true;
    }
    else if(found_delim){ //found first character of a new string
      num_sub_str++;
      found_delim = false;
      //sub_str_vec.push_back(src_str_tmp); //for c++
    }
    src_str_tmp++;
  }
  printf("Start - found %d sub strings\n", num_sub_str);
  if(num_sub_str <= 0){
    printf("str_split() - no substrings were found\n");
    return(0);
  }

  //if you want to use a c++ vector and push onto it, the rest of this function
  //can be omitted (obviously modifying input parameters to take a vector, etc)

  char **sub_strings = (char **)malloc( (sizeof(char*) * num_sub_str) + 1);
  const char *src_str_terminator = src_str_tmp;
  src_str_tmp = src_str;
  bool found_null = true;
  size_t idx = 0;
  while(src_str_tmp < src_str_terminator){
    if(!*src_str_tmp) //found a NULL
      found_null = true;
    else if(found_null){
      sub_strings[idx++] = src_str_tmp;
      //printf("sub_string_%d: [%s]\n", idx-1, sub_strings[idx-1]);
      found_null = false;
    }
    src_str_tmp++;
  }
  sub_strings[num_sub_str] = NULL;

  return(sub_strings);
}

How to use it:

  char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
  char *str = strdup(months);
  size_t num_sub_str;
  char **sub_strings = str_split(str, ',', num_sub_str);
  char *endptr;
  if(sub_strings){
    for(int i = 0; sub_strings[i]; i++)
      printf("[%s]\n", sub_strings[i]);
  }
  free(sub_strings);
  free(str);
查看更多
登录 后发表回答