Main Page | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

tcstring.c

Go to the documentation of this file.
00001 /*
00002  * finflect - Algorithms and tools for inflecting Finnish nouns
00003  * Copyright (C) 2004, 2005  The FinFlect Team
00004  * 
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  * 
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018  * 
00019  * 
00020  * For the complete legal text of the GNU Lesser General Public License,
00021  * see the file LICENSE. For a complete list of authors and copyright
00022  * holders, see the file AUTHORS.
00023  */
00024 
00025 /** 
00026  * @file tcstring.c A few support functions necessary for tcregsub.
00027  */
00028 
00029 
00030 /**
00031     Copyright (C) 2003  Michael Ahlberg, Måns Rullgård
00032  
00033     Permission is hereby granted, free of charge, to any person
00034     obtaining a copy of this software and associated documentation
00035     files (the "Software"), to deal in the Software without
00036     restriction, including without limitation the rights to use, copy,
00037     modify, merge, publish, distribute, sublicense, and/or sell copies
00038     of the Software, and to permit persons to whom the Software is
00039     furnished to do so, subject to the following conditions:
00040  
00041     The above copyright notice and this permission notice shall be
00042     included in all copies or substantial portions of the Software.
00043  
00044     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00045     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00046     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00047     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
00048     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
00049     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00050     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00051     DEALINGS IN THE SOFTWARE.
00052 **/
00053 
00054 #include <stdio.h>
00055 #include <stdlib.h>
00056 #include <string.h>
00057 #include <strings.h>
00058 /*#include <tcstring.h>
00059 #include <tctypes.h>*/
00060 #include <ctype.h>
00061 #include <limits.h>
00062 
00063 #include "tcrip.h"
00064 
00065 #define hex(x) (((x)<0x3a)? ((x)-'0'): (((x)<0x60)? ((x)-0x37): ((x)-0x57)))
00066 
00067 static const char *
00068 escape(char *d, const char *s)
00069 {
00070   int i;
00071 
00072 #define bsub(c,e) case c: *d = e; s++; break
00073 
00074   switch(*s)
00075   {
00076     bsub('t', '\t');
00077     bsub('n', '\n');
00078     bsub('r', '\r');
00079     bsub('f', '\f');
00080     bsub('b', '\b');
00081     bsub('a', '\a');
00082     bsub('e', 0x1b);
00083   case '0':
00084   case '1':
00085   case '2':
00086   case '3':
00087     *d = 0;
00088     for(i = 0; i < 3 && isdigit(*s); i++)
00089     {
00090       *d *= 8;
00091       *d += *s++ - '0';
00092     }
00093     break;
00094   case 'x':
00095     if(s[1] && s[2] && isxdigit(s[1]) && isxdigit(s[2]))
00096     {
00097       *d++ = hex(s[1]) * 16 + hex(s[2]);
00098       s += 3;
00099     }
00100     else
00101     {
00102       *d = *s++;
00103     }
00104     break;
00105   case 'c':
00106     if(s[1])
00107     {
00108       *d = (toupper(s[1]) - 0x40) & 0x7f;
00109       s += 2;
00110     }
00111     else
00112     {
00113       *d = *s++;
00114     }
00115     break;
00116   default:
00117     *d = *s++;
00118     break;
00119   }
00120 
00121   return s;
00122 }
00123 
00124 extern char *
00125   tcstrexp(const char *s, const char *sd, const char *ed, char fs,
00126            char *(*lookup)(char *, void *), void *ld, int flags)
00127 {
00128   int l = strlen(s) + 1;
00129   char *expo = malloc(l);
00130   char *p = expo;
00131   char *d, *f;
00132 
00133 #define ext(n) do {                             \
00134     int o = p - expo;                           \
00135     expo = realloc(expo, l += n);                       \
00136     p = expo + o;                               \
00137 } while(0)
00138 
00139   while(*s)
00140   {
00141     switch(*s)
00142     {
00143     case '\\':
00144       if(flags & TCSTREXP_ESCAPE)
00145       {
00146         s = escape(p++, ++s);
00147       }
00148       else
00149       {
00150         *p++ = *s++;
00151       }
00152       break;
00153 
00154     case '$':
00155       d = strchr(sd, *++s);
00156       if(d)
00157       {
00158         const char *e = ++s;
00159         char ec = ed[d - sd];
00160         int n = 0;
00161 
00162         /* Find the matching closing paren */
00163         while(*e)
00164         {
00165           if(*e == *d && *(e - 1) == '$')
00166           {
00167             n++;
00168           }
00169           else if(*e == ec)
00170           {
00171             if(!n)
00172               break;
00173             n--;
00174           }
00175           e++;
00176         }
00177 
00178         if(*e)
00179         {
00180           int vl = e - s;
00181           char *vn = malloc(vl + 1);
00182           char *v;
00183           char *def = NULL, *alt = NULL;
00184           int upcase = 0, downcase = 0;
00185           int sss = 0, ssl = INT_MAX;
00186           char *rx = NULL, *rsub = NULL, rd;
00187 
00188           strncpy(vn, s, vl);
00189           vn[vl] = 0;
00190           if(fs && (f = strchr(vn, fs)))
00191           {
00192             int fl = 1;
00193             *f++ = 0;
00194             while(fl && *f)
00195             {
00196               switch(*f++)
00197               {
00198               case '-':
00199                 def = f;
00200                 fl = 0;
00201                 break;
00202               case '+':
00203                 alt = f;
00204                 fl = 0;
00205                 break;
00206               case 'u':
00207                 upcase = 1;
00208                 break;
00209               case 'l':
00210                 downcase = 1;
00211                 break;
00212               case 's':
00213                 f++;
00214               case '0':
00215               case '1':
00216               case '2':
00217               case '3':
00218               case '4':
00219               case '5':
00220               case '6':
00221               case '7':
00222               case '8':
00223               case '9':
00224                 sss = strtol(f-1, &f, 0);
00225                 if(*f == ':')
00226                 {
00227                   f++;
00228                   ssl = strtol(f, &f, 0);
00229                 }
00230                 break;
00231               case '/':
00232                 f--;
00233               case 'r':
00234                 rd = *f;
00235                 rx = ++f;
00236                 if((rsub = strchr(rx, rd)))
00237                 {
00238                   char *re;
00239                   *rsub++ = 0;
00240                   if((re = strchr(rsub, rd)))
00241                   {
00242                     *re = 0;
00243                     f = re + 1;
00244                   }
00245                   else
00246                   {
00247                     fl = 0;
00248                   }
00249                 }
00250                 else
00251                 {
00252                   rx = NULL;
00253                 }
00254                 break;
00255               }
00256             }
00257           }
00258           if((v = lookup(vn, ld)))
00259           {
00260             char *ov = v;
00261             if(alt)
00262               v = tcstrexp(alt, sd, ed, fs, lookup, ld, flags);
00263             else
00264               v = strdup(v);
00265             if(flags & TCSTREXP_FREE)
00266               free(ov);
00267           }
00268           else if(def)
00269           {
00270             v = tcstrexp(def, sd, ed, fs, lookup, ld, flags);
00271           }
00272           if(v)
00273           {
00274             int sl = strlen(v);
00275             char *vo = v;
00276 
00277             if(sss < 0)
00278             {
00279               if(-sss < sl)
00280               {
00281                 v += sl + sss;
00282                 sl = -sss;
00283               }
00284             }
00285             else if(sss <= sl)
00286             {
00287               v += sss;
00288               sl -= sss;
00289             }
00290             else
00291             {
00292               v += sl;
00293               sl = 0;
00294             }
00295 
00296             if(ssl < 0)
00297             {
00298               if(-ssl < sl)
00299               {
00300                 v[sl + ssl] = 0;
00301                 sl += ssl;
00302               }
00303               else
00304               {
00305                 sl = 0;
00306               }
00307             }
00308             else if(ssl < sl)
00309             {
00310               v[ssl] = 0;
00311               sl = ssl;
00312             }
00313 
00314             if(rx)
00315             {
00316               char *rs = tcregsub(v, rx, rsub, 0);
00317               if(rs)
00318               {
00319                 free(vo);
00320                 vo = v = rs;
00321                 sl = strlen(rs);
00322               }
00323             }
00324 
00325             if(sl)
00326             {
00327               ext(sl + 1);
00328 
00329               if(upcase)
00330               {
00331                 char *c = v;
00332                 while(*c)
00333                   *p++ = toupper(*c++);
00334               }
00335               else if(downcase)
00336               {
00337                 char *c = v;
00338                 while(*c)
00339                   *p++ = tolower(*c++);
00340               }
00341               else
00342               {
00343                 strcpy(p, v);
00344                 p += sl;
00345               }
00346             }
00347             free(vo);
00348           }
00349           else if(flags & TCSTREXP_KEEPUNDEF)
00350           {
00351             int n = e - s + 3;
00352             ext(n);
00353             memcpy(p, s - 2, n);
00354             p += n;
00355           }
00356           s = e + 1;
00357           free(vn);
00358         }
00359       }
00360       else
00361       {
00362         *p++ = '$';
00363       }
00364       break;
00365     default:
00366       *p++ = *s++;
00367       break;
00368     }
00369   }
00370 
00371   *p = 0;
00372 
00373 #undef ext
00374   return expo;
00375 }
00376 
00377 extern int
00378   tcstresc(char *dst, const char *src)
00379 {
00380   char *d = dst;
00381 
00382   while(*src)
00383   {
00384     if(*src == '\\')
00385       src = escape(d++, ++src);
00386     else
00387       *d++ = *src++;
00388   }
00389 
00390   *d = 0;
00391 
00392   return d - dst;
00393 }

Generated on Sun May 15 21:50:47 2005 for FinFlect by  doxygen 1.4.1