#include #include #include #include // Receives a regular expression by command line argument // Test texts given in standard input against the regular expression // http://pubs.opengroup.org/onlinepubs/009695399/functions/regcomp.html // Compile with: gcc -Wall -Wextra -std=c11 -o test_regex test_regex.c #define SZ_LINE 1024 bool analyze_flags(int argc, char* argv[], int* cflags, int *eflags); void print_subexpression(const char* line, size_t pos, size_t number, regoff_t start, regoff_t end); void print_regex_error(int error, regex_t* regex); int main(int argc, char* argv[]) { // One argument is mandatory: the regular expression if ( argc <= 1 ) return fprintf(stderr, "usage: test_regex 'regular expression'" " [global] [extended|icase|nosub|newline] [notbol|noteol]\n"), 1; // Set flags from command line arguments int cflags = 0, eflags = 0; bool global = analyze_flags(argc, argv, &cflags, &eflags); // Compile the regular expression regex_t regex; int error = regcomp(®ex, argv[1], cflags); if ( error ) return fprintf(stderr, "error: could not compile regex '%s'\n", argv[1]), 1; // Array of subexpressions (expressions inside round parentesis) regmatch_t subexpressions[regex.re_nsub + 1]; // While there are lines in stdin char line[SZ_LINE]; while ( fgets(line, SZ_LINE, stdin) ) { // Remove trailing new line char size_t len = strlen(line), pos = 0; line[len - 1] = '\0'; bool first_match = true; do { // Execute regular expression against line error = regexec(®ex, line + pos, regex.re_nsub + 1, subexpressions, eflags); // If no errors, print the subexpressions matches if ( error == 0 ) { // Separate this match from previous ones with a new line if ( first_match ) first_match = false; else putchar('\n'); // Print subexpressions and move pos index after this match for ( size_t index = 0; index <= regex.re_nsub; ++index ) print_subexpression(line, pos, index, subexpressions[index].rm_so, subexpressions[index].rm_eo); pos += subexpressions[0].rm_eo; } else if ( error == REG_NOMATCH ) ; else print_regex_error(error, ®ex); } while ( global && error == 0 && pos < len ); } // Free memory allocated to the pattern buffer by regcomp() regfree(®ex); return 0; } bool analyze_flags(int argc, char* argv[], int* cflags, int *eflags) { bool global = false; for ( int index = 2; index < argc; ++index ) { if ( strcmp(argv[index], "global") == 0 ) global = true; else if ( strcmp(argv[index], "extended") == 0 ) *cflags |= REG_EXTENDED; else if ( strcmp(argv[index], "icase") == 0 ) *cflags |= REG_ICASE; else if ( strcmp(argv[index], "NOSUB") == 0 ) *cflags |= REG_NOSUB; else if ( strcmp(argv[index], "newline") == 0 ) *cflags |= REG_NEWLINE; else if ( strcmp(argv[index], "notbol") == 0 ) *eflags |= REG_NOTBOL; else if ( strcmp(argv[index], "noteol") == 0 ) *eflags |= REG_NOTEOL; else fprintf(stderr, "warning: ignoring flag: %s\n", argv[index]); } return global; } void print_subexpression(const char* line, size_t pos, size_t number, regoff_t start, regoff_t end) { printf("\t%zd [%lld, %lld[ = '", number, pos + start, pos + end); while ( start < end ) putchar(line[pos + start++]); printf("'\n"); } void print_regex_error(int error, regex_t* regex) { char message[SZ_LINE]; regerror(error, regex, message, sizeof(message)); fprintf(stderr, "Regex match failed: %s\n", message); }