mirror of
https://repo.or.cz/socat.git
synced 2025-07-12 06:33:24 +00:00
socat V1.6.0.0 (initial GIT commit)
This commit is contained in:
commit
b819572f5e
170 changed files with 59193 additions and 0 deletions
237
nestlex.c
Normal file
237
nestlex.c
Normal file
|
@ -0,0 +1,237 @@
|
|||
/* $Id: nestlex.c,v 1.4 2006/06/23 17:04:36 gerhard Exp $ */
|
||||
/* Copyright Gerhard Rieger 2006 */
|
||||
/* Published under the GNU General Public License V.2, see file COPYING */
|
||||
|
||||
/* a function for lexical scanning of nested character patterns */
|
||||
|
||||
#include "config.h"
|
||||
#include "mytypes.h"
|
||||
|
||||
#include "sysincludes.h"
|
||||
|
||||
|
||||
/* sub: scan a string and copy its value to output string
|
||||
end scanning when an unescaped, unnested string from ends array is found
|
||||
does not copy the end pattern
|
||||
does not write a trailing \0 to token
|
||||
allows escaping with \ and quoting (\ and quotes are removed)
|
||||
allows nesting with div. parens
|
||||
returns -1 if out string was too small
|
||||
returns 1 if addr ended unexpectedly
|
||||
returns 0 if token could be extracted successfully
|
||||
*/
|
||||
int nestlex(const char **addr, /* input string; aft points to end token */
|
||||
char **token, /* output token; aft points to first unwritten
|
||||
char (caller might want to set it to \0) */
|
||||
size_t *len, /* remaining bytes in token space (incl. \0) */
|
||||
const char *ends[], /* list of end strings */
|
||||
const char *hquotes[],/* list of strings that quote (hard qu.) */
|
||||
const char *squotes[],/* list of strings that quote softly */
|
||||
const char *nests[],/* list of strings that start nesting;
|
||||
every second one is matching end */
|
||||
bool dropquotes, /* drop the outermost quotes */
|
||||
bool c_esc, /* solve C char escapes: \n \t \0 etc */
|
||||
bool html_esc /* solve HTML char escapes: %0d %08 etc */
|
||||
) {
|
||||
const char *in = *addr; /* pointer into input string */
|
||||
const char **endx; /* loops over end patterns */
|
||||
const char **quotx; /* loops over quote patterns */
|
||||
const char **nestx; /* loops over nest patterns */
|
||||
char *out = *token; /* pointer into output token */
|
||||
char c;
|
||||
int i;
|
||||
int result;
|
||||
|
||||
while (true) {
|
||||
|
||||
/* is this end of input string? */
|
||||
if (*in == 0) {
|
||||
|
||||
break; /* end of string */
|
||||
}
|
||||
|
||||
/* first check the end patterns (e.g. for ']') */
|
||||
endx = ends; i = 0;
|
||||
while (*endx) {
|
||||
if (!strncmp(in, *endx, strlen(*endx))) {
|
||||
/* this end pattern matches */
|
||||
*addr = in;
|
||||
*token = out;
|
||||
return 0;
|
||||
}
|
||||
++endx;
|
||||
}
|
||||
|
||||
/* check for hard quoting pattern */
|
||||
quotx = hquotes;
|
||||
while (hquotes && *quotx) {
|
||||
if (!strncmp(in, *quotx, strlen(*quotx))) {
|
||||
/* this quote pattern matches */
|
||||
const char *endnest[2];
|
||||
if (dropquotes) {
|
||||
/* we strip this quote */
|
||||
in += strlen(*quotx);
|
||||
} else {
|
||||
for (i = strlen(*quotx); i > 0; --i) {
|
||||
*out++ = *in++;
|
||||
if (--*len <= 0) { *addr = in; *token = out; return -1; }
|
||||
}
|
||||
}
|
||||
/* we call nestlex recursively */
|
||||
endnest[0] = *quotx;
|
||||
endnest[1] = NULL;
|
||||
result =
|
||||
nestlex(&in, &out, len, endnest, NULL/*hquotes*/,
|
||||
NULL/*squotes*/, NULL/*nests*/,
|
||||
false, c_esc, html_esc);
|
||||
if (result == 0 && dropquotes) {
|
||||
/* we strip this quote */
|
||||
in += strlen(*quotx);
|
||||
} else {
|
||||
/* we copy the trailing quote */
|
||||
for (i = strlen(*quotx); i > 0; --i) {
|
||||
*out++ = *in++;
|
||||
if (--*len <= 0) { *addr = in; *token = out; return -1; }
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
++quotx;
|
||||
}
|
||||
if (hquotes && *quotx != NULL) {
|
||||
/* there was a quote; string might continue with hard quote */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* check for soft quoting pattern */
|
||||
quotx = squotes;
|
||||
while (squotes && *quotx) {
|
||||
if (!strncmp(in, *quotx, strlen(*quotx))) {
|
||||
/* this quote pattern matches */
|
||||
/* we strip this quote */
|
||||
/* we call nestlex recursively */
|
||||
const char *endnest[2];
|
||||
if (dropquotes) {
|
||||
/* we strip this quote */
|
||||
in += strlen(*quotx);
|
||||
} else {
|
||||
for (i = strlen(*quotx); i > 0; --i) {
|
||||
*out++ = *in++;
|
||||
if (--*len <= 0) { *addr = in; *token = out; return -1; }
|
||||
}
|
||||
}
|
||||
endnest[0] = *quotx;
|
||||
endnest[1] = NULL;
|
||||
result =
|
||||
nestlex(&in, &out, len, endnest, hquotes,
|
||||
squotes, nests,
|
||||
false, c_esc, html_esc);
|
||||
|
||||
if (result == 0 && dropquotes) {
|
||||
/* we strip the trailing quote */
|
||||
in += strlen(*quotx);
|
||||
} else {
|
||||
/* we copy the trailing quote */
|
||||
for (i = strlen(*quotx); i > 0; --i) {
|
||||
*out++ = *in++;
|
||||
if (--*len <= 0) { *addr = in; *token = out; return -1; }
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
++quotx;
|
||||
}
|
||||
if (squotes && *quotx != NULL) {
|
||||
/* there was a soft quote; string might continue with any quote */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* check patterns that start a nested clause */
|
||||
nestx = nests; i = 0;
|
||||
while (nests && *nestx) {
|
||||
if (!strncmp(in, *nestx, strlen(*nestx))) {
|
||||
/* this nest pattern matches */
|
||||
const char *endnest[2];
|
||||
endnest[0] = nestx[1];
|
||||
endnest[1] = NULL;
|
||||
|
||||
for (i = strlen(nestx[1]); i > 0; --i) {
|
||||
*out++ = *in++;
|
||||
if (--*len <= 0) { *addr = in; *token = out; return -1; }
|
||||
}
|
||||
|
||||
result =
|
||||
nestlex(&in, &out, len, endnest, hquotes, squotes, nests,
|
||||
false, c_esc, html_esc);
|
||||
if (result == 0) {
|
||||
/* copy endnest */
|
||||
i = strlen(nestx[1]); while (i > 0) {
|
||||
*out++ = *in++;
|
||||
if (--*len <= 0) {
|
||||
*addr = in;
|
||||
*token = out;
|
||||
return -1;
|
||||
}
|
||||
--i;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
nestx += 2; /* skip matching end pattern in table */
|
||||
}
|
||||
if (nests && *nestx) {
|
||||
/* we handled a nested expression, continue loop */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* "normal" data, possibly escaped */
|
||||
c = *in++;
|
||||
if (c == '\\') {
|
||||
/* found a plain \ escaped part */
|
||||
c = *in++;
|
||||
if (c == 0) { /* Warn("trailing '\\'");*/ break; }
|
||||
if (c_esc) { /* solve C char escapes: \n \t \0 etc */
|
||||
switch (c) {
|
||||
case '0': c = '\0'; break;
|
||||
case 'a': c = '\a'; break;
|
||||
case 'b': c = '\b'; break;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case 'v': c = '\v'; break;
|
||||
#if LATER
|
||||
case 'x': !!! 1 to 2 hex digits; break;
|
||||
case 'u': !!! 4 hex digits?; break;
|
||||
case 'U': !!! 8 hex digits?; break;
|
||||
#endif
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
*out++ = c;
|
||||
--*len;
|
||||
if (len == 0) {
|
||||
*addr = in;
|
||||
*token = out;
|
||||
return -1; /* output overflow */
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* just a simple char */
|
||||
*out++ = c;
|
||||
--*len;
|
||||
if (len == 0) {
|
||||
*addr = in;
|
||||
*token = out;
|
||||
return -1; /* output overflow */
|
||||
}
|
||||
|
||||
}
|
||||
/* never come here? */
|
||||
|
||||
*addr = in;
|
||||
*token = out;
|
||||
return 0; /* OK */
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue