I need to extract the text between 2 string patterns in c.
Example:
aaaaaa<BBBB>TEXT TO EXTRACT</BBBB>aaaaaaaaa
PATTERN1=<BBBB>
PATTERN2=</BBBB>
Thanks.
I need to extract the text between 2 string patterns in c.
Example:
aaaaaa<BBBB>TEXT TO EXTRACT</BBBB>aaaaaaaaa
PATTERN1=<BBBB>
PATTERN2=</BBBB>
Thanks.
Here is an alive example of how to do this
#include <stdio.h>
#include <string.h>
int main(void)
{
const char *s = "aaaaaa<BBBB>TEXT TO EXTRACT</BBBB>aaaaaaaaa";
const char *PATTERN1 = "<BBBB>";
const char *PATTERN2 = "</BBBB>";
char *target = NULL;
char *start, *end;
if ( start = strstr( s, PATTERN1 ) )
{
start += strlen( PATTERN1 );
if ( end = strstr( start, PATTERN2 ) )
{
target = ( char * )malloc( end - start + 1 );
memcpy( target, start, end - start );
target[end - start] = '\0';
}
}
if ( target ) printf( "%s\n", target );
free( target );
return 0;
}
The output is
TEXT TO EXTRACT
Just use strstr()
.
First once to find the start marker, then call it again with a pointer to the first character after the start marker, to find the end marker:
char * extract_between(const char *str, const char *p1, const char *p2)
{
const char *i1 = strstr(str, p1);
if(i1 != NULL)
{
const size_t pl1 = strlen(p1);
const char *i2 = strstr(i1 + pl1, p2);
if(p2 != NULL)
{
/* Found both markers, extract text. */
const size_t mlen = i2 - (i1 + pl1);
char *ret = malloc(mlen + 1);
if(ret != NULL)
{
memcpy(ret, i1 + pl1, mlen);
ret[mlen] = '\0';
return ret;
}
}
}
Please test the above for off-by-ones, I wrote it pretty quickly. return NULL; }
This will maybe not be optimal in performance, but very very simple to both implement, get right, read and understand.
char * start = strstr(PATTERN1, text);
char * end = strstr(PATTERN2, text);
if (end == NULL || start == NULL)
return;
*end = '\0';
printf("%s\n", start);
Note: if you can't modify the original string, copy it to a buffer you can play with.
Alternate version:
char * start = strstr(PATTERN1, text);
char * end = strstr(PATTERN2, text);
if (end == NULL || start == NULL)
return;
int len = end - start;
if (len <= 0)
return;
char * parsed = (char *)calloc(len+1, sizeof(char));
if (parsed == NULL)
return;
memcpy(parsed, start, len);
printf("%s\n", parsed);
free(parsed);
ps: I haven't tested any of the code