Main Page | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

tcstring.c

Go to the documentation of this file.
00001 /*
00002  * finflect - Algorithms and tools for inflecting Finnish nouns
00003  * Copyright (C) 2004, 2005  The FinFlect Team
00004  * 
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  * 
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018  * 
00019  * 
00020  * For the complete legal text of the GNU Lesser General Public License,
00021  * see the file LICENSE. For a complete list of authors and copyright
00022  * holders, see the file AUTHORS.
00023  */
00024 
00025 /**
00026  * @file tcstring.c A few support functions necessary for tcregsub.
00027  */
00028 
00029 
00030 /**
00031     Copyright (C) 2003  Michael Ahlberg, Måns Rullgård
00032  
00033     Permission is hereby granted, free of charge, to any person
00034     obtaining a copy of this software and associated documentation
00035     files (the "Software"), to deal in the Software without
00036     restriction, including without limitation the rights to use, copy,
00037     modify, merge, publish, distribute, sublicense, and/or sell copies
00038     of the Software, and to permit persons to whom the Software is
00039     furnished to do so, subject to the following conditions:
00040  
00041     The above copyright notice and this permission notice shall be
00042     included in all copies or substantial portions of the Software.
00043  
00044     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00045     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00046     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00047     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
00048     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
00049     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00050     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00051     DEALINGS IN THE SOFTWARE.
00052 **/
00053 
00054 #include <stdio.h>
00055 #include <stdlib.h>
00056 #include <string.h>
00057 #include <strings.h>
00058 /*#include <tcstring.h>
00059 #include <tctypes.h>*/
00060 #include <ctype.h>
00061 #include <limits.h>
00062 
00063 #include "tcrip.h"
00064 
00065 #define hex(x) (((x)<0x3a)? ((x)-'0'): (((x)<0x60)? ((x)-0x37): ((x)-0x57)))
00066 
00067 static const char *
00068 escape(char *d, const char *s) {
00069   int i;
00070 
00071 #define bsub(c,e) case c: *d = e; s++; break
00072 
00073   switch(*s) {
00074     bsub('t', '\t');
00075     bsub('n', '\n');
00076     bsub('r', '\r');
00077     bsub('f', '\f');
00078     bsub('b', '\b');
00079     bsub('a', '\a');
00080     bsub('e', 0x1b);
00081   case '0':
00082   case '1':
00083   case '2':
00084   case '3':
00085     *d = 0;
00086     for(i = 0; i < 3 && isdigit(*s); i++) {
00087       *d *= 8;
00088       *d += *s++ - '0';
00089     }
00090     break;
00091   case 'x':
00092     if(s[1] && s[2] && isxdigit(s[1]) && isxdigit(s[2])) {
00093       *d++ = hex(s[1]) * 16 + hex(s[2]);
00094       s += 3;
00095     } else {
00096       *d = *s++;
00097     }
00098     break;
00099   case 'c':
00100     if(s[1]) {
00101       *d = (toupper(s[1]) - 0x40) & 0x7f;
00102       s += 2;
00103     } else {
00104       *d = *s++;
00105     }
00106     break;
00107   default:
00108     *d = *s++;
00109     break;
00110   }
00111 
00112   return s;
00113 }
00114 
00115 extern char *
00116   tcstrexp(const char *s, const char *sd, const char *ed, char fs,
00117            char *(*lookup)(char *, void *), void *ld, int flags) {
00118   int l = strlen(s) + 1;
00119   char *expo = malloc(l);
00120   char *p = expo;
00121   char *d, *f;
00122 
00123 #define ext(n) do {                             \
00124     int o = p - expo;                           \
00125     expo = realloc(expo, l += n);                       \
00126     p = expo + o;                               \
00127 } while(0)
00128 
00129   while(*s) {
00130     switch(*s) {
00131     case '\\':
00132       if(flags & TCSTREXP_ESCAPE) {
00133         s = escape(p++, ++s);
00134       } else {
00135         *p++ = *s++;
00136       }
00137       break;
00138 
00139     case '$':
00140       d = strchr(sd, *++s);
00141       if(d) {
00142         const char *e = ++s;
00143         char ec = ed[d - sd];
00144         int n = 0;
00145 
00146         /* Find the matching closing paren */
00147         while(*e) {
00148           if(*e == *d && *(e - 1) == '$') {
00149             n++;
00150           } else if(*e == ec) {
00151             if(!n)
00152               break;
00153             n--;
00154           }
00155           e++;
00156         }
00157 
00158         if(*e) {
00159           int vl = e - s;
00160           char *vn = malloc(vl + 1);
00161           char *v;
00162           char *def = NULL, *alt = NULL;
00163           int upcase = 0, downcase = 0;
00164           int sss = 0, ssl = INT_MAX;
00165           char *rx = NULL, *rsub = NULL, rd;
00166 
00167           strncpy(vn, s, vl);
00168           vn[vl] = 0;
00169           if(fs && (f = strchr(vn, fs))) {
00170             int fl = 1;
00171             *f++ = 0;
00172             while(fl && *f) {
00173               switch(*f++) {
00174               case '-':
00175                 def = f;
00176                 fl = 0;
00177                 break;
00178               case '+':
00179                 alt = f;
00180                 fl = 0;
00181                 break;
00182               case 'u':
00183                 upcase = 1;
00184                 break;
00185               case 'l':
00186                 downcase = 1;
00187                 break;
00188               case 's':
00189                 f++;
00190               case '0':
00191               case '1':
00192               case '2':
00193               case '3':
00194               case '4':
00195               case '5':
00196               case '6':
00197               case '7':
00198               case '8':
00199               case '9':
00200                 sss = strtol(f-1, &f, 0);
00201                 if(*f == ':') {
00202                   f++;
00203                   ssl = strtol(f, &f, 0);
00204                 }
00205                 break;
00206               case '/':
00207                 f--;
00208               case 'r':
00209                 rd = *f;
00210                 rx = ++f;
00211                 if((rsub = strchr(rx, rd))) {
00212                   char *re;
00213                   *rsub++ = 0;
00214                   if((re = strchr(rsub, rd))) {
00215                     *re = 0;
00216                     f = re + 1;
00217                   } else {
00218                     fl = 0;
00219                   }
00220                 } else {
00221                   rx = NULL;
00222                 }
00223                 break;
00224               }
00225             }
00226           }
00227           if((v = lookup(vn, ld))) {
00228             char *ov = v;
00229             if(alt)
00230               v = tcstrexp(alt, sd, ed, fs, lookup, ld, flags);
00231             else
00232               v = strdup(v);
00233             if(flags & TCSTREXP_FREE)
00234               free(ov);
00235           } else if(def) {
00236             v = tcstrexp(def, sd, ed, fs, lookup, ld, flags);
00237           }
00238           if(v) {
00239             int sl = strlen(v);
00240             char *vo = v;
00241 
00242             if(sss < 0) {
00243               if(-sss < sl) {
00244                 v += sl + sss;
00245                 sl = -sss;
00246               }
00247             } else if(sss <= sl) {
00248               v += sss;
00249               sl -= sss;
00250             } else {
00251               v += sl;
00252               sl = 0;
00253             }
00254 
00255             if(ssl < 0) {
00256               if(-ssl < sl) {
00257                 v[sl + ssl] = 0;
00258                 sl += ssl;
00259               } else {
00260                 sl = 0;
00261               }
00262             } else if(ssl < sl) {
00263               v[ssl] = 0;
00264               sl = ssl;
00265             }
00266 
00267             if(rx) {
00268               char *rs = tcregsub(v, rx, rsub, 0);
00269               if(rs) {
00270                 free(vo);
00271                 vo = v = rs;
00272                 sl = strlen(rs);
00273               }
00274             }
00275 
00276             if(sl) {
00277               ext(sl + 1);
00278 
00279               if(upcase) {
00280                 char *c = v;
00281                 while(*c)
00282                   *p++ = toupper(*c++);
00283               } else if(downcase) {
00284                 char *c = v;
00285                 while(*c)
00286                   *p++ = tolower(*c++);
00287               } else {
00288                 strcpy(p, v);
00289                 p += sl;
00290               }
00291             }
00292             free(vo);
00293           } else if(flags & TCSTREXP_KEEPUNDEF) {
00294             int n = e - s + 3;
00295             ext(n);
00296             memcpy(p, s - 2, n);
00297             p += n;
00298           }
00299           s = e + 1;
00300           free(vn);
00301         }
00302       } else {
00303         *p++ = '$';
00304       }
00305       break;
00306     default:
00307       *p++ = *s++;
00308       break;
00309     }
00310   }
00311 
00312   *p = 0;
00313 
00314 #undef ext
00315 
00316   return expo;
00317 }
00318 
00319 extern int
00320   tcstresc(char *dst, const char *src) {
00321   char *d = dst;
00322 
00323   while(*src) {
00324     if(*src == '\\')
00325       src = escape(d++, ++src);
00326     else
00327       *d++ = *src++;
00328   }
00329 
00330   *d = 0;
00331 
00332   return d - dst;
00333 }

Generated on Thu Jun 2 23:16:59 2005 for FinFlect by  doxygen 1.4.2