User:Justin545/regtst.cpp

From Wikipedia, the free encyclopedia
////////////////////////////////////////////////////////////////////////////////
//
//  * Examples
//
//      justin@justin-desktop:~/workspace/regtst/Debug$ ./regtst -e '^a(.*)c' 'a
//      b
//      c'
//      cflags |= REG_EXTENDED;
//      *** Flags ***
//      cflags = 0x1;
//      eflags = 0x0;
//      *** Regex ***
//      ^a(.*)c
//      *** Text ***
//      a
//      b
//      c
//      *** Substrings ***
//      \0 (0, 5): "a
//      b
//      c"
//      \1 (1, 4): "
//      b
//      "
//      justin@justin-desktop:~/workspace/regtst/Debug$ ./regtst -en '^a(.*)c' 'ac'
//      cflags |= REG_EXTENDED;
//      cflags |= REG_NEWLINE;
//      *** Flags ***
//      cflags = 0x5;
//      eflags = 0x0;
//      *** Regex ***
//      ^a(.*)c
//      *** Text ***
//      ac
//      *** Substrings ***
//      \0 (0, 2): "ac"
//      \1 (1, 1): ""
//      justin@justin-desktop:~/workspace/regtst/Debug$
//
////////////////////////////////////////////////////////////////////////////////

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/types.h>
#include <regex.h>

int main(int argc, char** argv)
{
   int aRes, aCFlags, aEFlags;
   const char* aRegex;
   const char* aText;
   regex_t aReg;
   regmatch_t aMatch[32];
   char aBuf[1024];
   int aI, aLen;

   aCFlags = aEFlags = 0;
   while (1)
   {
      aRes = getopt(argc, argv, "eisnbc"); // REG_EXTENDED (e), REG_ICASE (i), REG_NOSUB (s), REG_NEWLINE (n), REG_NOTBOL (b), REG_NOTEOL (c).
      if (aRes == -1) break;
      switch(aRes)
      {
      case 'e': aCFlags |= REG_EXTENDED; printf("cflags |= REG_EXTENDED;\n"); break;
      case 'i': aCFlags |= REG_ICASE;    printf("cflags |= REG_ICASE;\n"   ); break;
      case 's': aCFlags |= REG_NOSUB;    printf("cflags |= REG_NOSUB;\n");    break;
      case 'n': aCFlags |= REG_NEWLINE;  printf("cflags |= REG_NEWLINE;\n");  break;
      case 'b': aEFlags |= REG_NOTBOL;   printf("eflags |= REG_NOTBOL;\n");   break;
      case 'c': aEFlags |= REG_NOTEOL;   printf("eflags |= REG_NOTEOL;\n");   break;
      default: // '?'.
         printf("Unknown option \"-%c\".\n", optopt);
         return 1;
      }
   }

   printf("*** Flags ***\ncflags = 0x%X;\neflags = 0x%X;\n", aCFlags, aEFlags);

   if ((optind + 2) != argc)
   {
      //  GETOPT(3): By default, getopt() permutes the contents of argv as it
      //  scans, so that eventually all the non-options are at the end.
      printf("Expected exactly 2 arguments (regex, text) beside options.\n");
      return 2;
   }
   aRegex = argv[optind];
   aText = argv[optind + 1];

   printf("*** Regex ***\n%s\n", aRegex);
   printf("*** Text ***\n%s\n", aText);

   aRes = regcomp(&aReg, aRegex, aCFlags);
   if (aRes != 0)
   {
      regerror(aRes, &aReg, aBuf, sizeof(aBuf));
      printf("Fail to compile the regex: %s\n", aBuf);
      return 3;
   }

for(int aI = 0; aI < int(sizeof(aMatch) / sizeof(regmatch_t)); aI++)
{
        aMatch[aI].rm_so = 77;
        aMatch[aI].rm_eo = 77;
}

   //  `regfree()' can be put right after the `regexec()' (see the example in
   //  <http://www.opengroup.org/onlinepubs/000095399/functions/regcomp.html>)
   //  but we are calling `regerror()' which needs a valid `regex_t'.
   aRes = regexec(&aReg, aText, sizeof(aMatch) / sizeof(regmatch_t), aMatch, aEFlags);
   if (aRes != 0)
   {
      regerror(aRes, &aReg, aBuf, sizeof(aBuf));
      regfree(&aReg);
      printf("Problem occurs when execute the regex: %s\n", aBuf);
      return 4;
   }
   regfree(&aReg);

   printf("*** Substrings ***\n");
   for (aI = 0; aI < int(sizeof(aMatch) / sizeof(regmatch_t)); aI++)
   {
      if (aMatch[aI].rm_so == -1) break;
      aLen = aMatch[aI].rm_eo - aMatch[aI].rm_so;
      strncpy(aBuf, aText + aMatch[aI].rm_so, aLen);
      aBuf[aLen] = 0;
      printf("\\%d (%d, %d): \"%s\"\n", aI, aMatch[aI].rm_so, aMatch[aI].rm_eo, aBuf);
   }

   return 0;
}