Split string with delimiters in C

2018-12-31 02:17发布

How do I write a function to split and return an array for a string with delimiters in the C programming language?

char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
str_split(str,',');

标签: c string split
25条回答
泛滥B
2楼-- · 2018-12-31 02:50
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

/**
 *  splits str on delim and dynamically allocates an array of pointers.
 *
 *  On error -1 is returned, check errno
 *  On success size of array is returned, which may be 0 on an empty string
 *  or 1 if no delim was found.  
 *
 *  You could rewrite this to return the char ** array instead and upon NULL
 *  know it's an allocation problem but I did the triple array here.  Note that
 *  upon the hitting two delim's in a row "foo,,bar" the array would be:
 *  { "foo", NULL, "bar" } 
 * 
 *  You need to define the semantics of a trailing delim Like "foo," is that a
 *  2 count array or an array of one?  I choose the two count with the second entry
 *  set to NULL since it's valueless.
 *  Modifies str so make a copy if this is a problem
 */
int split( char * str, char delim, char ***array, int *length ) {
  char *p;
  char **res;
  int count=0;
  int k=0;

  p = str;
  // Count occurance of delim in string
  while( (p=strchr(p,delim)) != NULL ) {
    *p = 0; // Null terminate the deliminator.
    p++; // Skip past our new null
    count++;
  }

  // allocate dynamic array
  res = calloc( 1, count * sizeof(char *));
  if( !res ) return -1;

  p = str;
  for( k=0; k<count; k++ ){
    if( *p ) res[k] = p;  // Copy start of string
    p = strchr(p, 0 );    // Look for next null
    p++; // Start of next string
  }

  *array = res;
  *length = count;

  return 0;
}

char str[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,";

int main() {
  char **res;
  int k=0;
  int count =0;
  int rc;

  rc = split( str, ',', &res, &count );
  if( rc ) {
    printf("Error: %s errno: %d \n", strerror(errno), errno);
  }

  printf("count: %d\n", count );
  for( k=0; k<count; k++ ) {
    printf("str: %s\n", res[k]);
  }

  free(res );
  return 0;
}
查看更多
浅入江南
3楼-- · 2018-12-31 02:50

This optimized method create (or update an existing) array of pointers in *result and returns the number of elements in *count.

Use "max" to indicate the maximum number of strings you expect (when you specify an existing array or any other reaseon), else set it to 0

To compare against a list of delimiters, define delim as a char* and replace the line:

if (str[i]==delim) {

with the two following lines:

 char *c=delim; while(*c && *c!=str[i]) c++;
 if (*c) {

Enjoy

#include <stdlib.h>
#include <string.h>

char **split(char *str, size_t len, char delim, char ***result, unsigned long *count, unsigned long max) {
  size_t i;
  char **_result;

  // there is at least one string returned
  *count=1;

  _result= *result;

  // when the result array is specified, fill it during the first pass
  if (_result) {
    _result[0]=str;
  }

  // scan the string for delimiter, up to specified length
  for (i=0; i<len; ++i) {

    // to compare against a list of delimiters,
    // define delim as a string and replace 
    // the next line:
    //     if (str[i]==delim) {
    //
    // with the two following lines:
    //     char *c=delim; while(*c && *c!=str[i]) c++;
    //     if (*c) {
    //       
    if (str[i]==delim) {

      // replace delimiter with zero
      str[i]=0;

      // when result array is specified, fill it during the first pass
      if (_result) {
        _result[*count]=str+i+1;
      }

      // increment count for each separator found
      ++(*count);

      // if max is specified, dont go further
      if (max && *count==max)  {
        break;
      }

    }
  }

  // when result array is specified, we are done here
  if (_result) {
    return _result;
  }

  // else allocate memory for result
  // and fill the result array                                                                                    

  *result=malloc((*count)*sizeof(char*));
  if (!*result) {
    return NULL;
  }
  _result=*result;

  // add first string to result
  _result[0]=str;

  // if theres more strings
  for (i=1; i<*count; ++i) {

    // find next string
    while(*str) ++str;
    ++str;

    // add next string to result
    _result[i]=str;

  }

  return _result;
}  

Usage example:

#include <stdio.h>

int main(int argc, char **argv) {
  char *str="JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
  char **result=malloc(6*sizeof(char*));
  char **result2=0;
  unsigned long count;
  unsigned long count2;
  unsigned long i;

  split(strdup(str),strlen(str),',',&result,&count,6);
  split(strdup(str),strlen(str),',',&result2,&count2,0);

  if (result)
  for (i=0; i<count; ++i) {
    printf("%s\n",result[i]);
  }

  printf("\n");

  if (result2)
  for (i=0; i<count2; ++i) {
    printf("%s\n", result2[i]);
  }

  return 0;

}
查看更多
伤终究还是伤i
4楼-- · 2018-12-31 02:51

My approach is to scan the string and let the pointers point to every character after the deliminators(and the first character), at the same time assign the appearances of deliminator in string to '\0'.
First make a copy of original string(since it's constant), then get the number of splits by scan it pass it to pointer parameter len. After that, point the first result pointer to the copy string pointer, then scan the copy string: once encounter a deliminator, assign it to '\0' thus the previous result string is terminated, and point the next result string pointer to the next character pointer.

char** split(char* a_str, const char a_delim, int* len){
    char* s = (char*)malloc(sizeof(char) * strlen(a_str));
    strcpy(s, a_str);
    char* tmp = a_str;
    int count = 0;
    while (*tmp != '\0'){
        if (*tmp == a_delim) count += 1;
        tmp += 1;
    }
    *len = count;
    char** results = (char**)malloc(count * sizeof(char*));
    results[0] = s;
    int i = 1;
    while (*s!='\0'){
        if (*s == a_delim){
            *s = '\0';
            s += 1;
            results[i++] = s;
        }
        else s += 1;
    }
    return results;
}
查看更多
回忆,回不去的记忆
5楼-- · 2018-12-31 02:51

I think the following solution is ideal:

  • Doesn't destroy the source string
  • Re-entrant - i.e., you can safely call it from anywhere in one or more threads
  • Portable
  • Handles multiple separators correctly
  • Fast and efficient

Explanation of the code:

  1. Define a structure token to store the address and lengths of the tokens
  2. Allocate enough memory for these in the worst case, which is when str is made up entirely of separators so there are strlen(str) + 1 tokens, all of them empty strings
  3. Scan str recording the address and length of every token
  4. Use this to allocate the output array of the correct size, including an extra space for a NULL sentinel value
  5. Allocate, copy, and add the tokens using the start and length information - use memcpy as it's faster than strcpy and we know the lengths
  6. Free the token address and length array
  7. Return the array of tokens
typedef struct {
    const char *start;
    size_t len;
} token;

char **split(const char *str, char sep)
{
    char **array;
    unsigned int start = 0, stop, toks = 0, t;
    token *tokens = malloc((strlen(str) + 1) * sizeof(token));
    for (stop = 0; str[stop]; stop++) {
        if (str[stop] == sep) {
            tokens[toks].start = str + start;
            tokens[toks].len = stop - start;
            toks++;
            start = stop + 1;
        }
    }
    /* Mop up the last token */
    tokens[toks].start = str + start;
    tokens[toks].len = stop - start;
    toks++;
    array = malloc((toks + 1) * sizeof(char*));
    for (t = 0; t < toks; t++) {
        /* Calloc makes it nul-terminated */
        char *token = calloc(tokens[t].len + 1, 1);
        memcpy(token, tokens[t].start, tokens[t].len);
        array[t] = token;
    }
    /* Add a sentinel */
    array[t] = NULL; 
    free(tokens);
    return array;
}

Note malloc checking omitted for brevity.

In general, I wouldn't return an array of char * pointers from a split function like this as it places a lot of responsibility on the caller to free them correctly. An interface I prefer is to allow the caller to pass a callback function and call this for every token, as I have described here: Split a String in C.

查看更多
琉璃瓶的回忆
6楼-- · 2018-12-31 02:51

There are some problems with strtok() listed here: http://benpfaff.org/writings/clc/strtok.html

Hence, it is better to avoid strtok.

Now, consider a string containing a empty field as follows:

char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here

You can use simple function to be able convert String in CSV format to read them to a float Array:

int strCSV2Float(float *strFloatArray , char *myCSVStringing , char delim);

We specified the delimiter here as a comma. It works with other single character delimiter.

Please find the Usage below:

#include <stdio.h>
#include <stdlib.h>



int strCSV2Float(float *strFloatArray , char *myCSVStringing , char delim);

  void main()
 {

    char myCSVString[101] = "-1.4,2.6,,-0.24,1.26"; // specify input here
    float floatArr[10]; // specify size of float array here 
    int totalValues = 0;
    char myDelim = ','; // specify delimiter here 

    printf("myCSVString == %s \n",&myCSVString[0]);

    totalValues = strCSV2Float(&floatArr[0] , &myCSVString[0], myDelim); // call the function here 

    int floatValueCount = 0;

    for (floatValueCount = 0 ; floatValueCount < totalValues ; floatValueCount++)
    {

      printf("floatArr[%d] = %f\n",floatValueCount , floatArr[floatValueCount]);

    }

 }




int strCSV2Float(float *strFloatArray , char *myCSVStringing , char delim)
{

int strLen = 0;
int commaCount =0; // count the number of commas
int commaCountOld =0; // count the number of commas
int wordEndChar = 0;
int wordStartChar = -1;
int wordLength =0;


   for(strLen=0; myCSVStringing[strLen] != '\0'; strLen++) // first get the string length
   {

       if ( (myCSVStringing[strLen] == delim)  || ( myCSVStringing[strLen+1] == '\0' ))
        {
           commaCount++;
           wordEndChar = strLen;
        }
       if ( (commaCount - commaCountOld) > 0 )
        {
          int aIter =0;
          wordLength = (wordEndChar - wordStartChar);
          char word[55] = "";
          for (aIter = 0;  aIter < wordLength; aIter++)
          {
            word[aIter] = myCSVStringing[strLen-wordLength+aIter+1];
          }

          if (word[aIter-1] == delim) 
           word[aIter-1] = '\0';

          //  printf("\n");
          word[wordLength] = '\0';
          strFloatArray[commaCount-1] = atof(&word[0]);

          wordLength = 0;
          wordStartChar = wordEndChar;
          commaCountOld = commaCount;

        }  
  }

  return commaCount;

}

Output is as follows :

myCSVString == -1.4,2.6,,-0.24,1.26 
floatArr[0] = -1.400000
floatArr[1] = 2.600000
floatArr[2] = 0.000000
floatArr[3] = -0.240000
floatArr[4] = 1.260000
查看更多
闭嘴吧你
7楼-- · 2018-12-31 02:54

Here is my two cents:

int split (const char *txt, char delim, char ***tokens)
{
    int *tklen, *t, count = 1;
    char **arr, *p = (char *) txt;

    while (*p != '\0') if (*p++ == delim) count += 1;
    t = tklen = calloc (count, sizeof (int));
    for (p = (char *) txt; *p != '\0'; p++) *p == delim ? *t++ : (*t)++;
    *tokens = arr = malloc (count * sizeof (char *));
    t = tklen;
    p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
    while (*txt != '\0')
    {
        if (*txt == delim)
        {
            p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
            txt++;
        }
        else *p++ = *txt++;
    }
    free (tklen);
    return count;
}

Usage:

char **tokens;
int count, i;
const char *str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";

count = split (str, ',', &tokens);
for (i = 0; i < count; i++) printf ("%s\n", tokens[i]);

/* freeing tokens */
for (i = 0; i < count; i++) free (tokens[i]);
free (tokens);
查看更多
登录 后发表回答