Hey guys I am needing to meet a certain client demand for PCI . I am fairly comfortable in C and I really don't want to redo the wheel here. I have an regex example in python that I would like to have applied in C.
pan_regexs = {'Mastercard': re.compile('(?:\D|^)(5[1-5][0-9]{2}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4})(?:\D|$)'), \
'Visa': re.compile('(?:\D|^)(4[0-9]{3}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4})(?:\D|$)'), \
'AMEX': re.compile('(?:\D|^)((?:34|37)[0-9]{2}(?:\ |\-|)[0-9]{6}(?:\ |\-|)[0-9]{5})(?:\D|$)')}
I have found some POSIX library "regex.h " and this seems to used the really old regex standard.
I have found two examples one uses POSIX regex which seems to be limited at best. Stolen from Here
#include <regex.h>
regex_t regex;
int reti;
char msgbuf[100];
/* Compile regular expression */
reti = regcomp(®ex, "^a[[:alnum:]]", 0);
if (reti) {
fprintf(stderr, "Could not compile regex\n");
exit(1);
}
/* Execute regular expression */
reti = regexec(®ex, "abc", 0, NULL, 0);
if (!reti) {
puts("Match");
}
else if (reti == REG_NOMATCH) {
puts("No match");
}
else {
regerror(reti, ®ex, msgbuf, sizeof(msgbuf));
fprintf(stderr, "Regex match failed: %s\n", msgbuf);
exit(1);
}
/* Free memory allocated to the pattern buffer by regcomp() */
regfree(®ex);
The problem I see with the above is that it uses (from what I have gathered) Old Regex which doesn't support removing spaces and dashes.It also only really seems to perform some matching and looking for decent examples for this is not been turning many results for me in Google. So I looked further and in that answer it ( the question above) mentions the use of PCRE.
I found some demo Here
As I said before, I don't want to reinvent the wheel . I think it would be terrible to write my own regex for what could contain potential flaws when something clean and simple probably exists.
The PCI question comes from a client that we need to be able to monitor systems for how/where they are storing PANs and such. It's sort of a DLP, sphere of audit,and proving that CCNS are stored correctly.
How can I use regex in C to search for credit card numbers?
PS. I am ok with this regex, and open for better ways of doing this regex.
Ok I ended up using PCRE2 and it works remarkably well.
The source code I think I saw on Github, but it's is also Here too.
I downloaded it, compiled it , and installed it. Doing the following..
Keep in mind there is an 8,16,and a 32 bit formatting for this. All the examples I saw used the 8 bit and I found that it worked best for what I was doing.
./configure --enable-pcre2-8
make
make install
I had to also make the libraries it produces searchable by the linked by using.
ldconfig
Of course when you're compiling your program you need to link your program to the library. using -lpcre2-8 the library will be named pcre2-16 , pcre2-32 if you're using these versions.
After that I compiled their example using
cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
If you don't want to read all the things that they wrote in their gigantic example, I made my own... Warning I am stripping this apart from a few functions right now so you might want to double check for problems. The example I will provide does work however.
// YOU MUST SPECIFY THE UNIT WIDTH BEFORE THE INCLUDE OF THE pcre.h
#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdio.h>
#include <string.h>
#include <pcre2.h>
#include <stdbool.h>
int main(){
bool Debug = true;
bool Found = false;
pcre2_code *re;
PCRE2_SPTR pattern;
PCRE2_SPTR subject;
int errornumber;
int i;
int rc;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
size_t subject_length;
pcre2_match_data *match_data;
char * RegexStr = "(?:\\D|^)(5[1-5][0-9]{2}(?:\\ |\\-|)[0-9]{4}(?:\\ |\\-|)[0-9]{4}(?:\\ |\\-|)[0-9]{4})(?:\\D|$)";
char * source = "5111 2222 3333 4444";
pattern = (PCRE2_SPTR)RegexStr;// <<<<< This is where you pass your REGEX
subject = (PCRE2_SPTR)source;// <<<<< This is where you pass your bufer that will be checked.
subject_length = strlen((char *)subject);
re = pcre2_compile(
pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */
&errornumber, /* for error number */
&erroroffset, /* for error offset */
NULL); /* use default compile context */
/* Compilation failed: print the error message and exit. */
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,buffer);
return 1;
}
match_data = pcre2_match_data_create_from_pattern(re, NULL);
rc = pcre2_match(
re,
subject, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
match_data, /* block for storing the result */
NULL);
if (rc < 0)
{
switch(rc)
{
case PCRE2_ERROR_NOMATCH: //printf("No match\n"); //
pcre2_match_data_free(match_data);
pcre2_code_free(re);
Found = 0;
return Found;
// break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); //break;
}
pcre2_match_data_free(match_data); /* Release memory used for the match */
pcre2_code_free(re);
Found = 0; /* data and the compiled pattern. */
return Found;
}
if (Debug){
ovector = pcre2_get_ovector_pointer(match_data);
printf("Match succeeded at offset %d\n", (int)ovector[0]);
if (rc == 0)
printf("ovector was not big enough for all the captured substrings\n");
if (ovector[0] > ovector[1])
{
printf("\\K was used in an assertion to set the match start after its end.\n"
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
(char *)(subject + ovector[1]));
printf("Run abandoned\n");
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 0;
}
for (i = 0; i < rc; i++)
{
PCRE2_SPTR substring_start = subject + ovector[2*i];
size_t substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
}
}
else{
if(rc > 0){
Found = true;
}
}
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return Found;
}