Main Page | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

tcregex.c

Go to the documentation of this file.
00001 /*
00002  * finflect - Algorithms and tools for inflecting Finnish nouns
00003  * Copyright (C) 2004, 2005  The FinFlect Team
00004  * 
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  * 
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018  * 
00019  * 
00020  * For the complete legal text of the GNU Lesser General Public License,
00021  * see the file LICENSE. For a complete list of authors and copyright
00022  * holders, see the file AUTHORS.
00023  */
00024 
00025 /**
00026  * @file tcregex.c Regex substitution implementation from tclib.
00027  */
00028  
00029 /*
00030     Copyright (C) 2003  Michael Ahlberg, Måns Rullgård
00031  
00032     Permission is hereby granted, free of charge, to any person
00033     obtaining a copy of this software and associated documentation
00034     files (the "Software"), to deal in the Software without
00035     restriction, including without limitation the rights to use, copy,
00036     modify, merge, publish, distribute, sublicense, and/or sell copies
00037     of the Software, and to permit persons to whom the Software is
00038     furnished to do so, subject to the following conditions:
00039  
00040     The above copyright notice and this permission notice shall be
00041     included in all copies or substantial portions of the Software.
00042  
00043     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00044     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00045     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00046     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
00047     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
00048     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00049     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00050     DEALINGS IN THE SOFTWARE.
00051 **/
00052 
00053 #include <stdio.h>
00054 #include <stdlib.h>
00055 #include <string.h>
00056 #include <strings.h>
00057 /*#include <tcstring.h>
00058 #include <tctypes.h>*/
00059 #include <regex.h>
00060 
00061 #include "tcrip.h"
00062 
00063 typedef struct regsub
00064 {
00065   regex_t rx;
00066   regmatch_t *m;
00067   const char *s;
00068 }
00069 regsub_t;
00070 
00071 static char *rs_lookup(char *n, void *d)
00072 {
00073   regsub_t *rs = d;
00074   char *t;
00075   int ml;
00076   unsigned int m = strtol(n, &t, 0);
00077 
00078   if(*t)
00079     return NULL;
00080   if(m > rs->rx.re_nsub)
00081     return NULL;
00082   if(rs->m[m].rm_so < 0)
00083     return strdup("");
00084 
00085   ml = rs->m[m].rm_eo - rs->m[m].rm_so;
00086   t = malloc(ml + 1);
00087   strncpy(t, rs->s + rs->m[m].rm_so, ml);
00088   t[ml] = 0;
00089 
00090   return t;
00091 }
00092 
00093 char* tcregsub(const char *str, const char *pat, const char *sub, int cflags)
00094 {
00095   regsub_t rs;
00096   const char *s;
00097   char *ss, *p;
00098   int r, l;
00099 
00100   if((r = regcomp(&rs.rx, pat, cflags)))
00101   {
00102     char buf[256];
00103     regerror(r, &rs.rx, buf, sizeof(buf));
00104     fprintf(stderr, "tcregsub: %s\n", buf);
00105     return NULL;
00106   }
00107 
00108   rs.m = calloc(rs.rx.re_nsub + 1, sizeof(*rs.m));
00109   l = strlen(str);
00110   ss = malloc(l + 1);
00111   p = ss;
00112   s = str;
00113 
00114   while(*s)
00115   {
00116     int ml;
00117     if(regexec(&rs.rx, s, rs.rx.re_nsub + 1, rs.m, 0))
00118       break;
00119 
00120     strncpy(p, s, rs.m[0].rm_so);
00121     p += rs.m[0].rm_so;
00122     ml = rs.m[0].rm_eo - rs.m[0].rm_so;
00123     if(ml > 0)
00124     {
00125       char *ms = malloc(ml + 1);
00126       char *rp;
00127       int sl, o;
00128 
00129       rs.s = s;
00130       rp = tcstrexp(sub, "{", "}", 0, rs_lookup, &rs,
00131                     TCSTREXP_KEEPUNDEF | TCSTREXP_FREE);
00132       sl = strlen(rp);
00133       o = p - ss;
00134       ss = realloc(ss, l += sl);
00135       p = ss + o;
00136 
00137       strcpy(p, rp);
00138       p += sl;
00139       free(rp);
00140       free(ms);
00141     }
00142     s += rs.m[0].rm_eo;
00143   }
00144 
00145   strcpy(p, s);
00146   free(rs.m);
00147   regfree(&rs.rx);
00148   return ss;
00149 }
00150 

Generated on Sun May 15 21:50:47 2005 for FinFlect by  doxygen 1.4.1