The Machine Perception Toolbox

[Introduction]- [News]- [Download]- [Screenshots]- [Manual (pdf)]- [Forums]- [API Reference]- [Repository ]

 

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

regexp.c File Reference

#include "regexp.h"
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

Include dependency graph for regexp.c:

Include dependency graph

Go to the source code of this file.

Defines

#define ANY   3
#define ANYBUT   5
#define ANYOF   4
#define BACK   7
#define BOL   1
#define BRANCH   6
#define CLOSE   30
#define END   0
#define EOL   2
#define EXACTLY   8
#define FAIL(m)   { regerror(m); return(NULL); }
#define HASWIDTH   01
#define ISMULT(c)   ((c) == '*' || (c) == '+' || (c) == '?')
#define NEXT(p)   (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
#define NOTHING   9
#define OP(p)   (*(p))
#define OPEN   20
#define OPERAND(p)   ((p) + 3)
#define PLUS   11
#define SIMPLE   02
#define SPSTART   04
#define STAR   10
#define STATIC   static
#define UCHARAT(p)   ((int)*(unsigned char *)(p))
#define WORDA   12
#define WORDZ   13
#define WORST   0

Functions

STATIC char * reg (int paren, int *flagp)
STATIC char * regatom (int *flagp)
STATIC char * regbranch (int *flagp)
STATIC void regc (int b)
regexpregcomp (char *exp)
int regexec (register regexp *prog, register char *string)
STATIC void reginsert (char op, char *opnd)
STATIC int regmatch (char *prog)
STATIC char * regnext (register char *p)
STATIC char * regnode (int op)
STATIC void regoptail (char *p, char *val)
STATIC char * regpiece (int *flagp)
STATIC int regrepeat (char *p)
STATIC void regtail (char *p, char *val)
STATIC int regtry (regexp *prog, char *string)

Variables

char * regbol
char * regcode
char regdummy
char ** regendp
char * reginput
int regnpar
char * regparse
long regsize
char ** regstartp


Define Documentation

#define ANY   3
 

Definition at line 92 of file regexp.c.

Referenced by regatom(), regmatch(), and regrepeat().

#define ANYBUT   5
 

Definition at line 94 of file regexp.c.

Referenced by regatom(), regmatch(), and regrepeat().

#define ANYOF   4
 

Definition at line 93 of file regexp.c.

Referenced by regatom(), regmatch(), and regrepeat().

#define BACK   7
 

Definition at line 96 of file regexp.c.

Referenced by regmatch(), and regpiece().

#define BOL   1
 

Definition at line 90 of file regexp.c.

Referenced by regatom(), and regmatch().

#define BRANCH   6
 

Definition at line 95 of file regexp.c.

Referenced by regbranch(), regmatch(), and regpiece().

#define CLOSE   30
 

Definition at line 105 of file regexp.c.

Referenced by reg(), and regmatch().

#define END   0
 

Definition at line 89 of file regexp.c.

Referenced by reg(), and regmatch().

#define EOL   2
 

Definition at line 91 of file regexp.c.

Referenced by regatom(), and regmatch().

#define EXACTLY   8
 

Definition at line 97 of file regexp.c.

Referenced by regatom(), regcomp(), regmatch(), and regrepeat().

#define FAIL m   )     { regerror(m); return(NULL); }
 

Definition at line 157 of file regexp.c.

Referenced by reg(), regatom(), regcomp(), and regpiece().

#define HASWIDTH   01
 

Definition at line 163 of file regexp.c.

Referenced by regatom().

#define ISMULT c   )     ((c) == '*' || (c) == '+' || (c) == '?')
 

Definition at line 158 of file regexp.c.

Referenced by regpiece().

#define NEXT p   )     (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
 

Definition at line 140 of file regexp.c.

Referenced by regnext().

#define NOTHING   9
 

Definition at line 98 of file regexp.c.

Referenced by regbranch(), regmatch(), and regpiece().

#define OP p   )     (*(p))
 

Definition at line 139 of file regexp.c.

Referenced by regcomp(), regmatch(), regnext(), regoptail(), regrepeat(), and regtail().

#define OPEN   20
 

Definition at line 103 of file regexp.c.

Referenced by reg(), and regmatch().

#define OPERAND p   )     ((p) + 3)
 

Definition at line 141 of file regexp.c.

Referenced by regcomp(), regmatch(), regoptail(), and regrepeat().

#define PLUS   11
 

Definition at line 100 of file regexp.c.

Referenced by regmatch(), and regpiece().

#define SIMPLE   02
 

Definition at line 164 of file regexp.c.

#define SPSTART   04
 

Definition at line 165 of file regexp.c.

#define STAR   10
 

Definition at line 99 of file regexp.c.

Referenced by regmatch(), and regpiece().

#define STATIC   static
 

Definition at line 181 of file regexp.c.

#define UCHARAT p   )     ((int)*(unsigned char *)(p))
 

Definition at line 152 of file regexp.c.

Referenced by regatom(), and regexec().

#define WORDA   12
 

Definition at line 101 of file regexp.c.

Referenced by regatom(), and regmatch().

#define WORDZ   13
 

Definition at line 102 of file regexp.c.

Referenced by regatom(), and regmatch().

#define WORST   0
 

Definition at line 166 of file regexp.c.

Referenced by regpiece().


Function Documentation

char * reg int  paren,
int *  flagp
 

Definition at line 302 of file regexp.c.

References br, CLOSE, END, FAIL, OPEN, regbranch(), regnext(), regnode(), regnpar, regoptail(), regparse, and regtail().

Referenced by regatom(), and regcomp().

00305 {
00306         register char *ret;
00307         register char *br;
00308         register char *ender;
00309         register int parno;
00310         int flags;
00311 
00312         *flagp = HASWIDTH;      /* Tentatively. */
00313 
00314         /* Make an OPEN node, if parenthesized. */
00315         if (paren) {
00316                 if (regnpar >= NSUBEXP)
00317                         FAIL("too many ()");
00318                 parno = regnpar;
00319                 regnpar++;
00320                 ret = regnode(OPEN+parno);
00321         } else
00322                 ret = NULL;
00323 
00324         /* Pick up the branches, linking them together. */
00325         br = regbranch(&flags);
00326         if (br == NULL)
00327                 return(NULL);
00328         if (ret != NULL)
00329                 regtail(ret, br);       /* OPEN -> first. */
00330         else
00331                 ret = br;
00332         if (!(flags&HASWIDTH))
00333                 *flagp &= ~HASWIDTH;
00334         *flagp |= flags&SPSTART;
00335         while (*regparse == '|' || *regparse == '\n') {
00336                 regparse++;
00337                 br = regbranch(&flags);
00338                 if (br == NULL)
00339                         return(NULL);
00340                 regtail(ret, br);       /* BRANCH -> BRANCH. */
00341                 if (!(flags&HASWIDTH))
00342                         *flagp &= ~HASWIDTH;
00343                 *flagp |= flags&SPSTART;
00344         }
00345 
00346         /* Make a closing node, and hook it on the end. */
00347         ender = regnode((paren) ? CLOSE+parno : END);   
00348         regtail(ret, ender);
00349 
00350         /* Hook the tails of the branches to the closing node. */
00351         for (br = ret; br != NULL; br = regnext(br))
00352                 regoptail(br, ender);
00353 
00354         /* Check for proper termination. */
00355         if (paren && *regparse++ != ')') {
00356                 FAIL("unmatched ()");
00357         } else if (!paren && *regparse != '\0') {
00358                 if (*regparse == ')') {
00359                         FAIL("unmatched ()");
00360                 } else
00361                         FAIL("junk on end");    /* "Can't happen". */
00362                 /* NOTREACHED */
00363         }
00364 
00365         return(ret);
00366 }

Here is the call graph for this function:

char * regatom int *  flagp  ) 
 

Definition at line 476 of file regexp.c.

References ANY, ANYBUT, ANYOF, BOL, EOL, EXACTLY, FAIL, HASWIDTH, reg(), regc(), regnode(), regparse, UCHARAT, WORDA, and WORDZ.

Referenced by regpiece().

00477 {
00478         register char *ret;
00479         int flags;
00480 
00481         *flagp = WORST;         /* Tentatively. */
00482 
00483         switch (*regparse++) {
00484         /* FIXME: these chars only have meaning at beg/end of pat? */
00485         case '^':
00486                 ret = regnode(BOL);
00487                 break;
00488         case '$':
00489                 ret = regnode(EOL);
00490                 break;
00491         case '.':
00492                 ret = regnode(ANY);
00493                 *flagp |= HASWIDTH|SIMPLE;
00494                 break;
00495         case '[': {
00496                         register int classr;
00497                         register int classend;
00498 
00499                         if (*regparse == '^') { /* Complement of range. */
00500                                 ret = regnode(ANYBUT);
00501                                 regparse++;
00502                         } else
00503                                 ret = regnode(ANYOF);
00504                         if (*regparse == ']' || *regparse == '-')
00505                                 regc(*regparse++);
00506                         while (*regparse != '\0' && *regparse != ']') {
00507                                 if (*regparse == '-') {
00508                                         regparse++;
00509                                         if (*regparse == ']' || *regparse == '\0')
00510                                                 regc('-');
00511                                         else {
00512                                                 classr = UCHARAT(regparse-2)+1;
00513                                                 classend = UCHARAT(regparse);
00514                                                 if (classr > classend+1)
00515                                                         FAIL("invalid [] range");
00516                                                 for (; classr <= classend; classr++)
00517                                                         regc(classr);
00518                                                 regparse++;
00519                                         }
00520                                 } else
00521                                         regc(*regparse++);
00522                         }
00523                         regc('\0');
00524                         if (*regparse != ']')
00525                                 FAIL("unmatched []");
00526                         regparse++;
00527                         *flagp |= HASWIDTH|SIMPLE;
00528                 }
00529                 break;
00530         case '(':
00531                 ret = reg(1, &flags);
00532                 if (ret == NULL)
00533                         return(NULL);
00534                 *flagp |= flags&(HASWIDTH|SPSTART);
00535                 break;
00536         case '\0':
00537         case '|':
00538         case '\n':
00539         case ')':
00540                 FAIL("internal urp");   /* Supposed to be caught earlier. */
00541                 break;
00542         case '?':
00543         case '+':
00544         case '*':
00545                 FAIL("?+* follows nothing");
00546                 break;
00547         case '\\':
00548                 switch (*regparse++) {
00549                 case '\0':
00550                         FAIL("trailing \\");
00551                         break;
00552                 case '<':
00553                         ret = regnode(WORDA);
00554                         break;
00555                 case '>':
00556                         ret = regnode(WORDZ);
00557                         break;
00558                 /* FIXME: Someday handle \1, \2, ... */
00559                 default:
00560                         /* Handle general quoted chars in exact-match routine */
00561                         goto de_fault;
00562                 }
00563                 break;
00564         de_fault:
00565         default:
00566                 /*
00567                  * Encode a string of characters to be matched exactly.
00568                  *
00569                  * This is a bit tricky due to quoted chars and due to
00570                  * '*', '+', and '?' taking the SINGLE char previous
00571                  * as their operand.
00572                  *
00573                  * On entry, the char at regparse[-1] is going to go
00574                  * into the string, no matter what it is.  (It could be
00575                  * following a \ if we are entered from the '\' case.)
00576                  * 
00577                  * Basic idea is to pick up a good char in  ch  and
00578                  * examine the next char.  If it's *+? then we twiddle.
00579                  * If it's \ then we frozzle.  If it's other magic char
00580                  * we push  ch  and terminate the string.  If none of the
00581                  * above, we push  ch  on the string and go around again.
00582                  *
00583                  *  regprev  is used to remember where "the current char"
00584                  * starts in the string, if due to a *+? we need to back
00585                  * up and put the current char in a separate, 1-char, string.
00586                  * When  regprev  is NULL,  ch  is the only char in the
00587                  * string; this is used in *+? handling, and in setting
00588                  * flags |= SIMPLE at the end.
00589                  */
00590                 {
00591                         char *regprev;
00592                         register char ch;
00593 
00594                         regparse--;                     /* Look at cur char */
00595                         ret = regnode(EXACTLY);
00596                         for ( regprev = 0 ; ; ) {
00597                                 ch = *regparse++;       /* Get current char */
00598                                 switch (*regparse) {    /* look at next one */
00599 
00600                                 default:
00601                                         regc(ch);       /* Add cur to string */
00602                                         break;
00603 
00604                                 case '.': case '[': case '(':
00605                                 case ')': case '|': case '\n':
00606                                 case '$': case '^':
00607                                 case '\0':
00608                                 /* FIXME, $ and ^ should not always be magic */
00609                                 magic:
00610                                         regc(ch);       /* dump cur char */
00611                                         goto done;      /* and we are done */
00612 
00613                                 case '?': case '+': case '*':
00614                                         if (!regprev)   /* If just ch in str, */
00615                                                 goto magic;     /* use it */
00616                                         /* End mult-char string one early */
00617                                         regparse = regprev; /* Back up parse */
00618                                         goto done;
00619 
00620                                 case '\\':
00621                                         regc(ch);       /* Cur char OK */
00622                                         switch (regparse[1]){ /* Look after \ */
00623                                         case '\0':
00624                                         case '<':
00625                                         case '>':
00626                                         /* FIXME: Someday handle \1, \2, ... */
00627                                                 goto done; /* Not quoted */
00628                                         default:
00629                                                 /* Backup point is \, scan                                                       * point is after it. */
00630                                                 regprev = regparse;
00631                                                 regparse++; 
00632                                                 continue;       /* NOT break; */
00633                                         }
00634                                 }
00635                                 regprev = regparse;     /* Set backup point */
00636                         }
00637                 done:
00638                         regc('\0');
00639                         *flagp |= HASWIDTH;
00640                         if (!regprev)           /* One char? */
00641                                 *flagp |= SIMPLE;
00642                 }
00643                 break;
00644         }
00645 
00646         return(ret);
00647 }

Here is the call graph for this function:

char * regbranch int *  flagp  ) 
 

Definition at line 374 of file regexp.c.

References BRANCH, NOTHING, regnode(), regparse, regpiece(), and regtail().

Referenced by reg().

00375 {
00376         register char *ret;
00377         register char *chain;
00378         register char *latest;
00379         int flags;
00380 
00381         *flagp = WORST;         /* Tentatively. */
00382 
00383         ret = regnode(BRANCH);
00384         chain = NULL;
00385         while (*regparse != '\0' && *regparse != ')' &&
00386                *regparse != '\n' && *regparse != '|') {
00387                 latest = regpiece(&flags);
00388                 if (latest == NULL)
00389                         return(NULL);
00390                 *flagp |= flags&HASWIDTH;
00391                 if (chain == NULL)      /* First piece. */
00392                         *flagp |= flags&SPSTART;
00393                 else
00394                         regtail(chain, latest);
00395                 chain = latest;
00396         }
00397         if (chain == NULL)      /* Loop ran zero times. */
00398                 (void) regnode(NOTHING);
00399 
00400         return(ret);
00401 }

Here is the call graph for this function:

void regc int  b  ) 
 

Definition at line 677 of file regexp.c.

References regcode, and regsize.

Referenced by regatom(), and regcomp().

00678 {
00679         if (regcode != &regdummy)
00680                 *regcode++ = b;
00681         else
00682                 regsize++;
00683 }

regexp* regcomp char *  exp  ) 
 

Definition at line 213 of file regexp.c.

References EXACTLY, FAIL, MAGIC, OP, OPERAND, regexp::program, r, reg(), regexp::reganch, regc(), regcode, regexp::regmlen, regexp::regmust, regnext(), regnpar, regparse, regsize, and regexp::regstart.

Referenced by regex_compile().

00214 {
00215         register regexp *r;
00216         register char *scan;
00217         register char *longest;
00218         register unsigned len;
00219         int flags;
00220 
00221         if (exp == NULL)
00222                 FAIL("NULL argument");
00223 
00224         /* First pass: determine size, legality. */
00225 #ifdef notdef
00226         if (exp[0] == '.' && exp[1] == '*') exp += 2;  /* aid grep */
00227 #endif
00228         regparse = (char *)exp;
00229         regnpar = 1;
00230         regsize = 0L;
00231         regcode = &regdummy;
00232         regc(MAGIC);
00233         if (reg(0, &flags) == NULL)
00234                 return(NULL);
00235 
00236         /* Small enough for pointer-storage convention? */
00237         if (regsize >= 32767L)          /* Probably could be 65535L. */
00238                 FAIL("regexp too big");
00239 
00240         /* Allocate space. */
00241         r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
00242         if (r == NULL)
00243                 FAIL("out of space");
00244 
00245         /* Second pass: emit code. */
00246         regparse = (char *)exp;
00247         regnpar = 1;
00248         regcode = r->program;
00249         regc(MAGIC);
00250         if (reg(0, &flags) == NULL)
00251                 return(NULL);
00252 
00253         /* Dig out information for optimizations. */
00254         r->regstart = '\0';     /* Worst-case defaults. */
00255         r->reganch = 0;
00256         r->regmust = NULL;
00257         r->regmlen = 0;
00258         scan = r->program+1;                    /* First BRANCH. */
00259         if (OP(regnext(scan)) == END) {         /* Only one top-level choice. */
00260                 scan = OPERAND(scan);
00261 
00262                 /* Starting-point info. */
00263                 if (OP(scan) == EXACTLY)
00264                         r->regstart = *OPERAND(scan);
00265                 else if (OP(scan) == BOL)
00266                         r->reganch++;
00267 
00268                 /*
00269                  * If there's something expensive in the r.e., find the
00270                  * longest literal string that must appear and make it the
00271                  * regmust.  Resolve ties in favor of later strings, since
00272                  * the regstart check works with the beginning of the r.e.
00273                  * and avoiding duplication strengthens checking.  Not a
00274                  * strong reason, but sufficient in the absence of others.
00275                  */
00276                 if (flags&SPSTART) {
00277                         longest = NULL;
00278                         len = 0;
00279                         for (; scan != NULL; scan = regnext(scan))
00280                                 if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
00281                                         longest = OPERAND(scan);
00282                                         len = strlen(OPERAND(scan));
00283                                 }
00284                         r->regmust = longest;
00285                         r->regmlen = len;
00286                 }
00287         }
00288 
00289         return(r);
00290 }

Here is the call graph for this function:

int regexec register regexp prog,
register char *  string
 

Definition at line 792 of file regexp.c.

References regbol, regerror(), regtry(), s, string, and UCHARAT.

00795 {
00796         register char *s;
00797 
00798         /* Be paranoid... */
00799         if (prog == NULL || string == NULL) {
00800                 regerror("NULL parameter");
00801                 return(0);
00802         }
00803 
00804         /* Check validity of program. */
00805         if (UCHARAT(prog->program) != MAGIC) {
00806                 regerror("corrupted program");
00807                 return(0);
00808         }
00809 
00810         /* If there is a "must appear" string, look for it. */
00811         if (prog->regmust != NULL) {
00812                 s = (char *)string;
00813                 while ((s = strchr(s, prog->regmust[0])) != NULL) {
00814                         if (strncmp(s, prog->regmust, prog->regmlen) == 0)
00815                                 break;  /* Found it. */
00816                         s++;
00817                 }
00818                 if (s == NULL)  /* Not present. */
00819                         return(0);
00820         }
00821 
00822         /* Mark beginning of line for ^ . */
00823         regbol = (char *)string;
00824 
00825         /* Simplest case:  anchored match need be tried only once. */
00826         if (prog->reganch)
00827                 return(regtry(prog, string));
00828 
00829         /* Messy cases:  unanchored match. */
00830         s = (char *)string;
00831         if (prog->regstart != '\0')
00832                 /* We know what char it must start with. */
00833                 while ((s = strchr(s, prog->regstart)) != NULL) {
00834                         if (regtry(prog, s))
00835                                 return(1);
00836                         s++;
00837                 }
00838         else
00839                 /* We don't -- general case. */
00840                 do {
00841                         if (regtry(prog, s))
00842                                 return(1);
00843                 } while (*s++ != '\0');
00844 
00845         /* Failure. */
00846         return(0);
00847 }

Here is the call graph for this function:

void reginsert char  op,
char *  opnd
 

Definition at line 691 of file regexp.c.

References regcode, and regsize.

Referenced by regpiece().

00694 {
00695         register char *src;
00696         register char *dst;
00697         register char *place;
00698 
00699         if (regcode == &regdummy) {
00700                 regsize += 3;
00701                 return;
00702         }
00703 
00704         src = regcode;
00705         regcode += 3;
00706         dst = regcode;
00707         while (src > opnd)
00708                 *--dst = *--src;
00709 
00710         place = opnd;           /* Op node, where operand used to be. */
00711         *place++ = op;
00712         *place++ = '\0';
00713         *place++ = '\0';
00714 }

int regmatch char *  prog  ) 
 

Definition at line 890 of file regexp.c.

References ANY, ANYBUT, ANYOF, BACK, BOL, BRANCH, CLOSE, END, EOL, EXACTLY, fprintf(), NOTHING, OP, OPEN, OPERAND, PLUS, regbol, regendp, regerror(), reginput, regnext(), regrepeat(), regstartp, STAR, WORDA, and WORDZ.

Referenced by regtry().

00891 {
00892         register char *scan;    /* Current node. */
00893         char *next;             /* Next node. */
00894 
00895         scan = prog;
00896 #ifdef DEBUG
00897         if (scan != NULL && regnarrate)
00898                 fprintf(stderr, "%s(\n", regprop(scan));
00899 #endif
00900         while (scan != NULL) {
00901 #ifdef DEBUG
00902                 if (regnarrate)
00903                         fprintf(stderr, "%s...\n", regprop(scan));
00904 #endif
00905                 next = regnext(scan);
00906 
00907                 switch (OP(scan)) {
00908                 case BOL:
00909                         if (reginput != regbol)
00910                                 return(0);
00911                         break;
00912                 case EOL:
00913                         if (*reginput != '\0')
00914                                 return(0);
00915                         break;
00916                 case WORDA:
00917                         /* Must be looking at a letter, digit, or _ */
00918                         if ((!isalnum(*reginput)) && *reginput != '_')
00919                                 return(0);
00920                         /* Prev must be BOL or nonword */
00921                         if (reginput > regbol &&
00922                             (isalnum(reginput[-1]) || reginput[-1] == '_'))
00923                                 return(0);
00924                         break;
00925                 case WORDZ:
00926                         /* Must be looking at non letter, digit, or _ */
00927                         if (isalnum(*reginput) || *reginput == '_')
00928                                 return(0);
00929                         /* We don't care what the previous char was */
00930                         break;
00931                 case ANY:
00932                         if (*reginput == '\0')
00933                                 return(0);
00934                         reginput++;
00935                         break;
00936                 case EXACTLY: {
00937                                 register int len;
00938                                 register char *opnd;
00939 
00940                                 opnd = OPERAND(scan);
00941                                 /* Inline the first character, for speed. */
00942                                 if (*opnd != *reginput)
00943                                         return(0);
00944                                 len = strlen(opnd);
00945                                 if (len > 1 && strncmp(opnd, reginput, len) != 0)
00946                                         return(0);
00947                                 reginput += len;
00948                         }
00949                         break;
00950                 case ANYOF:
00951                         if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
00952                                 return(0);
00953                         reginput++;
00954                         break;
00955                 case ANYBUT:
00956                         if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
00957                                 return(0);
00958                         reginput++;
00959                         break;
00960                 case NOTHING:
00961                         break;
00962                 case BACK:
00963                         break;
00964                 case OPEN+1:
00965                 case OPEN+2:
00966                 case OPEN+3:
00967                 case OPEN+4:
00968                 case OPEN+5:
00969                 case OPEN+6:
00970                 case OPEN+7:
00971                 case OPEN+8:
00972                 case OPEN+9: {
00973                                 register int no;
00974                                 register char *save;
00975 
00976                                 no = OP(scan) - OPEN;
00977                                 save = reginput;
00978 
00979                                 if (regmatch(next)) {
00980                                         /*
00981                                          * Don't set startp if some later
00982                                          * invocation of the same parentheses
00983                                          * already has.
00984                                          */
00985                                         if (regstartp[no] == NULL)
00986                                                 regstartp[no] = save;
00987                                         return(1);
00988                                 } else
00989                                         return(0);
00990                         }
00991                         break;
00992                 case CLOSE+1:
00993                 case CLOSE+2:
00994                 case CLOSE+3:
00995                 case CLOSE+4:
00996                 case CLOSE+5:
00997                 case CLOSE+6:
00998                 case CLOSE+7:
00999                 case CLOSE+8:
01000                 case CLOSE+9: {
01001                                 register int no;
01002                                 register char *save;
01003 
01004                                 no = OP(scan) - CLOSE;
01005                                 save = reginput;
01006 
01007                                 if (regmatch(next)) {
01008                                         /*
01009                                          * Don't set endp if some later
01010                                          * invocation of the same parentheses
01011                                          * already has.
01012                                          */
01013                                         if (regendp[no] == NULL)
01014                                                 regendp[no] = save;
01015                                         return(1);
01016                                 } else
01017                                         return(0);
01018                         }
01019                         break;
01020                 case BRANCH: {
01021                                 register char *save;
01022 
01023                                 if (OP(next) != BRANCH)         /* No choice. */
01024                                         next = OPERAND(scan);   /* Avoid recursion. */
01025                                 else {
01026                                         do {
01027                                                 save = reginput;
01028                                                 if (regmatch(OPERAND(scan)))
01029                                                         return(1);
01030                                                 reginput = save;
01031                                                 scan = regnext(scan);
01032                                         } while (scan != NULL && OP(scan) == BRANCH);
01033                                         return(0);
01034                                         /* NOTREACHED */
01035                                 }
01036                         }
01037                         break;
01038                 case STAR:
01039                 case PLUS: {
01040                                 register char nextch;
01041                                 register int no;
01042                                 register char *save;
01043                                 register int min;
01044 
01045                                 /*
01046                                  * Lookahead to avoid useless match attempts
01047                                  * when we know what character comes next.
01048                                  */
01049                                 nextch = '\0';
01050                                 if (OP(next) == EXACTLY)
01051                                         nextch = *OPERAND(next);
01052                                 min = (OP(scan) == STAR) ? 0 : 1;
01053                                 save = reginput;
01054                                 no = regrepeat(OPERAND(scan));
01055                                 while (no >= min) {
01056                                         /* If it could work, try it. */
01057                                         if (nextch == '\0' || *reginput == nextch)
01058                                                 if (regmatch(next))
01059                                                         return(1);
01060                                         /* Couldn't or didn't -- back up. */
01061                                         no--;
01062                                         reginput = save + no;
01063                                 }
01064                                 return(0);
01065                         }
01066                         break;
01067                 case END:
01068                         return(1);      /* Success! */
01069                         break;
01070                 default:
01071                         regerror("memory corruption");
01072                         return(0);
01073                         break;
01074                 }
01075 
01076                 scan = next;
01077         }
01078 
01079         /*
01080          * We get here only if there's trouble -- normally "case END" is
01081          * the terminating point.
01082          */
01083         regerror("corrupted pointers");
01084         return(0);
01085 }

Here is the call graph for this function:

char * regnext register char *  p  ) 
 

Definition at line 1136 of file regexp.c.

References NEXT, OP, and p.

Referenced by reg(), regcomp(), regmatch(), and regtail().

01137 {
01138         register int offset;
01139 
01140         if (p == &regdummy)
01141                 return(NULL);
01142 
01143         offset = NEXT(p);
01144         if (offset == 0)
01145                 return(NULL);
01146 
01147         if (OP(p) == BACK)
01148                 return(p-offset);
01149         else
01150                 return(p+offset);
01151 }

char * regnode int  op  ) 
 

Definition at line 653 of file regexp.c.

References regcode, and regsize.

Referenced by reg(), regatom(), regbranch(), and regpiece().

00654 {
00655         register char *ret;
00656         register char *ptr;
00657 
00658         ret = regcode;
00659         if (ret == &regdummy) {
00660                 regsize += 3;
00661                 return(ret);
00662         }
00663 
00664         ptr = ret;
00665         *ptr++ = op;
00666         *ptr++ = '\0';          /* Null "next" pointer. */
00667         *ptr++ = '\0';
00668         regcode = ptr;
00669 
00670         return(ret);
00671 }

void regoptail char *  p,
char *  val
 

Definition at line 753 of file regexp.c.

References OP, OPERAND, p, regdummy, and regtail().

Referenced by reg(), and regpiece().

00756 {
00757         /* "Operandless" and "op != BRANCH" are synonymous in practice. */
00758         if (p == NULL || p == &regdummy || OP(p) != BRANCH)
00759                 return;
00760         regtail(OPERAND(p), val);
00761 }

Here is the call graph for this function:

char * regpiece int *  flagp  ) 
 

Definition at line 413 of file regexp.c.

References BACK, BRANCH, FAIL, ISMULT, NOTHING, PLUS, regatom(), reginsert(), regnode(), regoptail(), regparse, regtail(), STAR, and WORST.

Referenced by regbranch().

00414 {
00415         register char *ret;
00416         register char op;
00417         register char *next;
00418         int flags;
00419 
00420         ret = regatom(&flags);
00421         if (ret == NULL)
00422                 return(NULL);
00423 
00424         op = *regparse;
00425         if (!ISMULT(op)) {
00426                 *flagp = flags;
00427                 return(ret);
00428         }
00429 
00430         if (!(flags&HASWIDTH) && op != '?')
00431                 FAIL("*+ operand could be empty");
00432         *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
00433 
00434         if (op == '*' && (flags&SIMPLE))
00435                 reginsert(STAR, ret);
00436         else if (op == '*') {
00437                 /* Emit x* as (x&|), where & means "self". */
00438                 reginsert(BRANCH, ret);                 /* Either x */
00439                 regoptail(ret, regnode(BACK));          /* and loop */
00440                 regoptail(ret, ret);                    /* back */
00441                 regtail(ret, regnode(BRANCH));          /* or */
00442                 regtail(ret, regnode(NOTHING));         /* null. */
00443         } else if (op == '+' && (flags&SIMPLE))
00444                 reginsert(PLUS, ret);
00445         else if (op == '+') {
00446                 /* Emit x+ as x(&|), where & means "self". */
00447                 next = regnode(BRANCH);                 /* Either */
00448                 regtail(ret, next);
00449                 regtail(regnode(BACK), ret);            /* loop back */
00450                 regtail(next, regnode(BRANCH));         /* or */
00451                 regtail(ret, regnode(NOTHING));         /* null. */
00452         } else if (op == '?') {
00453                 /* Emit x? as (x|) */
00454                 reginsert(BRANCH, ret);                 /* Either x */
00455                 regtail(ret, regnode(BRANCH));          /* or */
00456                 next = regnode(NOTHING);                /* null. */
00457                 regtail(ret, next);
00458                 regoptail(ret, next);
00459         }
00460         regparse++;
00461         if (ISMULT(*regparse))
00462                 FAIL("nested *?+");
00463 
00464         return(ret);
00465 }

Here is the call graph for this function:

int regrepeat char *  p  ) 
 

Definition at line 1091 of file regexp.c.

References ANY, ANYBUT, ANYOF, EXACTLY, OP, OPERAND, p, regerror(), and reginput.

Referenced by regmatch().

01092 {
01093         register int count = 0;
01094         register char *scan;
01095         register char *opnd;
01096 
01097         scan = reginput;
01098         opnd = OPERAND(p);
01099         switch (OP(p)) {
01100         case ANY:
01101                 count = strlen(scan);
01102                 scan += count;
01103                 break;
01104         case EXACTLY:
01105                 while (*opnd == *scan) {
01106                         count++;
01107                         scan++;
01108                 }
01109                 break;
01110         case ANYOF:
01111                 while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
01112                         count++;
01113                         scan++;
01114                 }
01115                 break;
01116         case ANYBUT:
01117                 while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
01118                         count++;
01119                         scan++;
01120                 }
01121                 break;
01122         default:                /* Oh dear.  Called inappropriately. */
01123                 regerror("internal foulup");
01124                 count = 0;      /* Best compromise. */
01125                 break;
01126         }
01127         reginput = scan;
01128 
01129         return(count);
01130 }

Here is the call graph for this function:

void regtail char *  p,
char *  val
 

Definition at line 720 of file regexp.c.

References OP, p, and regnext().

Referenced by reg(), regbranch(), regoptail(), and regpiece().

00723 {
00724         register char *scan;
00725         register char *temp;
00726         register int offset;
00727 
00728         if (p == &regdummy)
00729                 return;
00730 
00731         /* Find last node. */
00732         scan = p;
00733         for (;;) {
00734                 temp = regnext(scan);
00735                 if (temp == NULL)
00736                         break;
00737                 scan = temp;
00738         }
00739 
00740         if (OP(scan) == BACK)
00741                 offset = scan - val;
00742         else
00743                 offset = val - scan;
00744         *(scan+1) = (offset>>8)&0377;
00745         *(scan+2) = offset&0377;
00746 }

Here is the call graph for this function:

int regtry regexp prog,
char *  string
 

Definition at line 853 of file regexp.c.

References regexp::endp, i, regexp::program, regendp, reginput, regmatch(), regstartp, and regexp::startp.

Referenced by regexec().

00856 {
00857         register int i;
00858         register char **sp;
00859         register char **ep;
00860 
00861         reginput = string;
00862         regstartp = prog->startp;
00863         regendp = prog->endp;
00864 
00865         sp = prog->startp;
00866         ep = prog->endp;
00867         for (i = NSUBEXP; i > 0; i--) {
00868                 *sp++ = NULL;
00869                 *ep++ = NULL;
00870         }
00871         if (regmatch(prog->program + 1)) {
00872                 prog->startp[0] = string;
00873                 prog->endp[0] = reginput;
00874                 return(1);
00875         } else
00876                 return(0);
00877 }

Here is the call graph for this function:


Variable Documentation

char* regbol [static]
 

Definition at line 771 of file regexp.c.

Referenced by regexec(), and regmatch().

char* regcode [static]
 

Definition at line 174 of file regexp.c.

Referenced by regc(), regcomp(), reginsert(), and regnode().

char regdummy [static]
 

Definition at line 173 of file regexp.c.

Referenced by regoptail().

char** regendp [static]
 

Definition at line 773 of file regexp.c.

Referenced by regmatch(), and regtry().

char* reginput [static]
 

Definition at line 770 of file regexp.c.

Referenced by regmatch(), regrepeat(), and regtry().

int regnpar [static]
 

Definition at line 172 of file regexp.c.

Referenced by reg(), and regcomp().

char* regparse [static]
 

Definition at line 171 of file regexp.c.

Referenced by reg(), regatom(), regbranch(), regcomp(), and regpiece().

long regsize [static]
 

Definition at line 175 of file regexp.c.

Referenced by regc(), regcomp(), reginsert(), and regnode().

char** regstartp [static]
 

Definition at line 772 of file regexp.c.

Referenced by regmatch(), and regtry().


Generated on Mon Nov 8 17:08:16 2004 for MPT by  doxygen 1.3.9.1