Main Page | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

tcregex.c

Go to the documentation of this file.
00001 /*
00002  * finflect - Algorithms and tools for inflecting Finnish nouns
00003  * Copyright (C) 2004, 2005  The FinFlect Team
00004  * 
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  * 
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018  * 
00019  * 
00020  * For the complete legal text of the GNU Lesser General Public License,
00021  * see the file LICENSE. For a complete list of authors and copyright
00022  * holders, see the file AUTHORS.
00023  */
00024 
00025 /**
00026  * @file tcregex.c Regex substitution implementation from tclib.
00027  */
00028 
00029 /*
00030     Copyright (C) 2003  Michael Ahlberg, Måns Rullgård
00031  
00032     Permission is hereby granted, free of charge, to any person
00033     obtaining a copy of this software and associated documentation
00034     files (the "Software"), to deal in the Software without
00035     restriction, including without limitation the rights to use, copy,
00036     modify, merge, publish, distribute, sublicense, and/or sell copies
00037     of the Software, and to permit persons to whom the Software is
00038     furnished to do so, subject to the following conditions:
00039  
00040     The above copyright notice and this permission notice shall be
00041     included in all copies or substantial portions of the Software.
00042  
00043     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00044     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00045     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00046     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
00047     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
00048     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00049     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00050     DEALINGS IN THE SOFTWARE.
00051 **/
00052 
00053 #include <stdio.h>
00054 #include <stdlib.h>
00055 #include <string.h>
00056 #include <strings.h>
00057 /*#include <tcstring.h>
00058 #include <tctypes.h>*/
00059 
00060 #ifdef __WIN32__
00061 #include "../windows/regex/regex.h"
00062 #else
00063 #include <regex.h>
00064 #endif
00065 
00066 #include "tcrip.h"
00067 
00068 typedef struct regsub {
00069   regex_t rx;
00070   regmatch_t *m;
00071   const char *s;
00072 }
00073 regsub_t;
00074 
00075 static char *rs_lookup(char *n, void *d) {
00076   regsub_t *rs = d;
00077   char *t;
00078   int ml;
00079   unsigned int m = strtol(n, &t, 0);
00080 
00081   if(*t)
00082     return NULL;
00083   if(m > rs->rx.re_nsub)
00084     return NULL;
00085   if(rs->m[m].rm_so < 0)
00086     return strdup("");
00087 
00088   ml = rs->m[m].rm_eo - rs->m[m].rm_so;
00089   t = malloc(ml + 1);
00090   strncpy(t, rs->s + rs->m[m].rm_so, ml);
00091   t[ml] = 0;
00092 
00093   return t;
00094 }
00095 
00096 char* tcregsub(const char *str, const char *pat, const char *sub, int cflags) {
00097   regsub_t rs;
00098   const char *s;
00099   char *ss, *p;
00100   int r, l;
00101 
00102   if((r = regcomp(&rs.rx, pat, cflags))) {
00103     char buf[256];
00104     regerror(r, &rs.rx, buf, sizeof(buf));
00105     fprintf(stderr, "tcregsub: %s\n", buf);
00106     return NULL;
00107   }
00108 
00109   rs.m = calloc(rs.rx.re_nsub + 1, sizeof(*rs.m));
00110   l = strlen(str);
00111   ss = malloc(l + 1);
00112   p = ss;
00113   s = str;
00114 
00115   while(*s) {
00116     int ml;
00117     if(regexec(&rs.rx, s, rs.rx.re_nsub + 1, rs.m, 0))
00118       break;
00119 
00120     strncpy(p, s, rs.m[0].rm_so);
00121     p += rs.m[0].rm_so;
00122     ml = rs.m[0].rm_eo - rs.m[0].rm_so;
00123     if(ml > 0) {
00124       char *ms = malloc(ml + 1);
00125       char *rp;
00126       int sl, o;
00127 
00128       rs.s = s;
00129       rp = tcstrexp(sub, "{", "}", 0, rs_lookup, &rs,
00130                     TCSTREXP_KEEPUNDEF | TCSTREXP_FREE);
00131       sl = strlen(rp);
00132       o = p - ss;
00133       ss = realloc(ss, l += sl);
00134       p = ss + o;
00135 
00136       strcpy(p, rp);
00137       p += sl;
00138       free(rp);
00139       free(ms);
00140     }
00141     s += rs.m[0].rm_eo;
00142   }
00143 
00144   strcpy(p, s);
00145   free(rs.m);
00146   regfree(&rs.rx);
00147   return ss;
00148 }
00149 

Generated on Thu Jun 2 23:16:59 2005 for FinFlect by  doxygen 1.4.2