Add wordsplit test (from mailutils)

* tests/wordsplit.at: New testcase.
* tests/wsp.c: New test program.
This commit is contained in:
Sergey Poznyakoff 2014-09-09 00:35:55 +03:00 committed by Sergey Poznyakoff
parent 6e9ebcab71
commit 29fb748305
2 changed files with 818 additions and 0 deletions

441
tests/wordsplit.at Normal file
View file

@ -0,0 +1,441 @@
# This file is part of grecs -*- Autotest -*-
# Copyright (C) 2014 Sergey Poznyakoff
#
# Grecs is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# Grecs is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Grecs. If not, see <http://www.gnu.org/licenses/>.
AT_BANNER(Wordsplit)
dnl ------------------------------------------------------------
dnl TESTWSP([NAME], [KW = `'], [OPTS], [INPUT], [STDOUT = `'],
dnl [STDERR = `'], [ENV])
dnl
m4_pushdef([TESTWSP],[
AT_SETUP([$1])
AT_KEYWORDS([wordsplit wsp $2])
AT_CHECK([$7 wsp $3 <<'EOT'
[$4]
EOT
],
[0],
[$5],
[$6])
AT_CLEANUP
])
dnl ------------------------------------------------------------
dnl The first part reproduces legacy argcv tests
dnl ------------------------------------------------------------
TESTWSP([simple input],[wsp-simple wsp00],[],
[1 2 3],
[NF: 3
0: 1
1: 2
2: 3
])
TESTWSP([quoted space],[wsp-quoted wsp01],[],
[quoted\ space],
[NF: 1
0: "quoted space"
])
TESTWSP([tab character],[wsp-tab wsp02],[],
[a "tab character"],
[NF: 2
0: a
1: tab\tcharacter
])
TESTWSP([octal and hex escapes],[wsp-escape wsp-escape0 wsp03],[],
[\157\143\164\141\154\40and\x20\x68\x65\x78],
[NF: 1
0: "octal and hex"
])
TESTWSP([octal and hex escapes 2],[wsp-escape wsp-escape1 wsp04],[],
[\157\143\164\141\154\40 and \x20\x68\x65\x78],
[NF: 3
0: "octal "
1: and
2: " hex"
])
TESTWSP([escape representation],[wsp-escape wsp-escape2 wsp05],[],
[A\x3-\48\39],
[NF: 1
0: A\003-\0048\0039
])
dnl ------------------------------------------------------------
dnl Test worsplit-specific behavior
dnl ------------------------------------------------------------
TESTWSP([append],[wsp-append wsp06],[append],
[jeden dwa trzy
cztery
piec szesc],
[NF: 3
0: jeden
1: dwa
2: trzy
NF: 4
0: jeden
1: dwa
2: trzy
3: cztery
NF: 6
0: jeden
1: dwa
2: trzy
3: cztery
4: piec
5: szesc
])
TESTWSP([dooffs],[wsp-doofs wsp07],[dooffs 3 jeden dwa trzy],
[cztery piec],
[NF: 2 (3)
(0): jeden
(1): dwa
(2): trzy
3: cztery
4: piec
])
TESTWSP([variable substitutions: single var],[wsp-var wsp-var00 wsp08],[],
[a $FOO test],
[NF: 3
0: a
1: bar
2: test
],
[],
[FOO=bar])
TESTWSP([variable substitutions: concatenated vars],[wsp-var wsp-var01 wsp09],
[],
[a $FOO${BAR}ent test],
[NF: 3
0: a
1: stringent
2: test
],
[],
[FOO=str BAR=ing])
TESTWSP([variable substitutions: field splitting],[wsp-var wsp-var02 wsp10],[],
[a $FOO test],
[NF: 4
0: a
1: variable
2: substitution
3: test
],
[],
[FOO="variable substitution"])
TESTWSP([variable substitutions: double-quoted variable],
[wsp-var wsp-var03 wsp11],[],
[a "$FOO" test],
[NF: 3
0: a
1: "variable substitution"
2: test
],
[],
[FOO="variable substitution"])
TESTWSP([variable substitutions: single-quoted variable],
[wsp-var wsp-var04 wsp12],[],
[a '$FOO' test],
[NF: 3
0: a
1: $FOO
2: test
],
[],
[FOO="variable substitution"])
TESTWSP([undefined variables 1],[wsp-var wsp-var05 wsp13],[],
[a $FOO test a${FOO}b],
[NF: 3
0: a
1: test
2: ab
],
[],
[unset FOO;])
TESTWSP([undefined variables 2],[wsp-var wsp-var06 wsp14],[keepundef],
[a $FOO test a${FOO}b],
[NF: 4
0: a
1: $FOO
2: test
3: a${FOO}b
],
[],
[unset FOO;])
TESTWSP([warn about undefined variables],[wsp-var wsp-var07 wsp15],[warnundef],
[$FOO],
[NF: 0
],
[warning: undefined variable `FOO'
],
[unset FOO;])
TESTWSP([bail out on undefined variables],[wsp-var wsp-var08 wsp16],[undef],
[$FOO],
[],
[undefined variable
],
[unset FOO;])
TESTWSP([disable variable expansion],[wsp-var wsp-var09 wsp17],[novar],
[$FOO],
[NF: 1
0: $FOO
],
[],
[FOO=bar])
TESTWSP([K/V environment],[wsp-var wsp-var10 wsp18 wsp-env-kv wsp-env_kv],
[env_kv],
[$FOO a$BAZ],
[NF: 2
0: bar
1: aqux
],
[],
[FOO=bar BAZ=qux])
TESTWSP([nosplit with expansion],
[wsp-var wsp-var11 wsp19 wsp-var-nosplit],[nosplit],
[a $FOO test],
[NF: 1
0: "a variable expansion test\n"
],
[],
[FOO="variable expansion"])
TESTWSP([nosplit without expansion],[wsp-var wsp-var12 wsp20],[nosplit novar],
[a $FOO test],
[NF: 1
0: "a $FOO test\n"
],
[],
[FOO="variable expansion"])
TESTWSP([ignore quotes],[wsp-ignore-quotes wsp21],[-quote],
["a text"],
[NF: 2
0: "\"a"
1: "text\""
])
TESTWSP([custom delimiters (squeeze)],[wsp-delim wsp-delim00 wsp22],
[delim : -ws trimnl],
[semicolon: separated::list: of :words],
[NF: 5
0: semicolon
1: " separated"
2: list
3: " of "
4: words
])
TESTWSP([custom delimiters (no squeeze)],[wsp-delim wsp-delim01 wsp23],
[delim : -ws -squeeze_delims trimnl],
[semicolon: separated::list: of :words],
[NF: 6
0: semicolon
1: " separated"
2: ""
3: list
4: " of "
5: words
])
TESTWSP([custom, with returned delimiters],[wsp-delim wsp-delim02 wsp24],
[delim : -ws trimnl return_delims],
[semicolon: separated::list: of :words],
[NF: 9
0: semicolon
1: :
2: " separated"
3: :
4: list
5: :
6: " of "
7: :
8: words
])
TESTWSP([custom, with returned & squeezed delimiters],
[wsp-delim wsp-delim03 wsp25],
[delim : -ws trimnl return_delims -squeeze_delims],
[semicolon: separated::list: of :words],
[NF: 10
0: semicolon
1: :
2: " separated"
3: :
4: :
5: list
6: :
7: " of "
8: :
9: words
])
TESTWSP([sed expressions],[wsp-sed wsp-sed00 wsp26],[sed],
[arg1 s/foo/bar/g;s/bar baz/quz quux/ arg2],
[NF: 3
0: arg1
1: "s/foo/bar/g;s/bar baz/quz quux/"
2: arg2
])
TESTWSP([C escapes on],[wcp-c-escape wsp27],[cescapes],
[a\ttab form\ffeed and new\nline],
[NF: 4
0: a\ttab
1: form\ffeed
2: and
3: new\nline
])
TESTWSP([C escapes off],[wcp-c-escape-off wsp28],[-cescapes],
[a\ttab form\ffeed and new\nline],
[NF: 4
0: attab
1: formffeed
2: and
3: newnline
])
TESTWSP([ws elimination],[wsp-ws-elim wsp29],[delim ' ()' ws return_delims],
[( list items )],
[NF: 4
0: (
1: list
2: items
3: )
])
TESTWSP([empty quotes],[wsp-empty-quotes wsp30],[delim : ws return_delims],
[t=""],
[NF: 1
0: t=
])
TESTWSP([delimiter following empty quotes],
[wsp31],[delim : ws return_delims],
[t="":r],
[NF: 3
0: t=
1: :
2: r
])
TESTWSP([suppress ws trimming within quotes],
[wsp32],
[default delim , ws return_delims],
[nocomponent,nonewline, formatfield="In message %{text}, "],
[NF: 5
0: nocomponent
1: ,
2: nonewline
3: ,
4: "formatfield=In message %{text}, "
])
TESTWSP([unescape],
[wsp-unescape wsp33],[-default novar nocmd quote escape '\"'],
[\Seen "quote \"" "bs \\"],
[NF: 3
0: \\Seen
1: "quote \""
2: "bs \\"
])
TESTWSP([dquote],[wsp34],[-default novar nocmd dquote],
[a "quoted example" isn't it],
[NF: 4
0: a
1: "quoted example"
2: isn't
3: it
])
TESTWSP([squote],[wsp35],[-default novar nocmd squote],
[a 'quoted example' isn"t it],
[NF: 4
0: a
1: "quoted example"
2: "isn\"t"
3: it
])
TESTWSP([incremental],[wsp-incr wsp36],[incremental],
[incremental "input test" line
],
[NF: 1
0: incremental
NF: 1
0: "input test"
NF: 1
0: line
],
[input exhausted
])
TESTWSP([incremental append],[wsp-incr wsp37],[incremental append],
[incremental "input test" line
],
[NF: 1
0: incremental
NF: 2
0: incremental
1: "input test"
NF: 3
0: incremental
1: "input test"
2: line
],
[input exhausted
])
TESTWSP([incremental ws],
[wsp-incr wsp38],[return_delims -squeeze_delims incremental ws],
[a list test
],
[NF: 1
0: a
NF: 1
0: list
NF: 1
0: test
],
[input exhausted
])
m4_popdef([TESTWSP])

377
tests/wsp.c Normal file
View file

@ -0,0 +1,377 @@
/* grecs - Gray's Extensible Configuration System
Copyright (C) 2014 Sergey Poznyakoff
Grecs is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
Grecs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with Grecs. If not, see <http://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "wordsplit.h"
extern char **environ;
char *progname;
struct kwd
{
const char *name;
int tok;
};
struct kwd bool_keytab[] = {
{ "append", WRDSF_APPEND },
/*{ "reuse", WRDSF_REUSE },*/
{ "undef", WRDSF_UNDEF },
{ "novar", WRDSF_NOVAR },
{ "nocmd", WRDSF_NOCMD },
{ "ws", WRDSF_WS },
{ "quote", WRDSF_QUOTE },
{ "squote", WRDSF_SQUOTE },
{ "dquote", WRDSF_DQUOTE },
{ "squeeze_delims", WRDSF_SQUEEZE_DELIMS },
{ "return_delims", WRDSF_RETURN_DELIMS },
{ "sed", WRDSF_SED_EXPR },
{ "debug", WRDSF_SHOWDBG },
{ "nosplit", WRDSF_NOSPLIT },
{ "keepundef", WRDSF_KEEPUNDEF },
{ "warnundef", WRDSF_WARNUNDEF },
{ "cescapes", WRDSF_CESCAPES },
{ "default", WRDSF_DEFFLAGS },
{ "env_kv", WRDSF_ENV_KV },
{ "incremental", WRDSF_INCREMENTAL },
{ NULL, 0 }
};
struct kwd string_keytab[] = {
{ "delim", WRDSF_DELIM },
{ "comment", WRDSF_COMMENT },
{ "escape", WRDSF_ESCAPE },
{ NULL, 0 }
};
static int
kwxlat (struct kwd *kwp, const char *str, int *res)
{
for (; kwp->name; kwp++)
if (strcmp (kwp->name, str) == 0)
{
*res = kwp->tok;
return 0;
}
return -1;
}
static void
help ()
{
size_t i;
printf ("usage: %s [options]\n", progname);
printf ("options are:\n");
printf (" [-]trimnl\n");
printf (" [-]plaintext\n");
putchar ('\n');
for (i = 0; bool_keytab[i].name; i++)
printf (" [-]%s\n", bool_keytab[i].name);
putchar ('\n');
for (i = 0; string_keytab[i].name; i++)
{
printf (" -%s\n", bool_keytab[i].name);
printf (" %s ARG\n", bool_keytab[i].name);
}
putchar ('\n');
printf (" -dooffs\n");
printf (" dooffs COUNT ARGS...\n");
exit (0);
}
void
print_qword (const char *word, int plaintext)
{
static char *qbuf = NULL;
static size_t qlen = 0;
int quote;
size_t size = wordsplit_c_quoted_length (word, 0, &quote);
if (plaintext)
{
printf ("%s", word);
return;
}
if (*word == 0)
quote = 1;
if (size >= qlen)
{
qlen = size + 1;
qbuf = realloc (qbuf, qlen);
assert (qbuf != NULL);
}
wordsplit_c_quote_copy (qbuf, word, 0);
qbuf[size] = 0;
if (quote)
printf ("\"%s\"", qbuf);
else
printf ("%s", qbuf);
}
/* Convert environment to K/V form */
static char **
make_env_kv ()
{
size_t i, j, size;
char **newenv;
/* Count the number of entries */
for (i = 0; environ[i]; i++)
;
size = (i - 1) * 2 + 1;
newenv = calloc (size, sizeof (newenv[0]));
assert (newenv != NULL);
for (i = j = 0; environ[i]; i++)
{
size_t len = strcspn (environ[i], "=");
char *p = malloc (len+1);
assert (p != NULL);
memcpy (p, environ[i], len);
p[len] = 0;
newenv[j++] = p;
p = strdup (environ[i] + len + 1);
assert (p != NULL);
newenv[j++] = p;
}
newenv[j] = NULL;
return newenv;
}
int
main (int argc, char **argv)
{
char buf[1024], *ptr;
int i, offarg = 0;
int trimnl_option = 0;
int plaintext_option = 0;
int wsflags = (WRDSF_DEFFLAGS & ~WRDSF_NOVAR) |
WRDSF_ENOMEMABRT |
WRDSF_ENV | WRDSF_SHOWERR;
struct wordsplit ws;
int next_call = 0;
progname = argv[0];
for (i = 1; i < argc; i++)
{
char *opt = argv[i];
int negate;
int flag;
if (opt[0] == '-')
{
negate = 1;
opt++;
}
else if (opt[0] == '+')
{
negate = 0;
opt++;
}
else
negate = 0;
if (strcmp (opt, "h") == 0 ||
strcmp (opt, "help") == 0 ||
strcmp (opt, "-help") == 0)
{
help ();
}
if (strcmp (opt, "trimnl") == 0)
{
trimnl_option = !negate;
continue;
}
if (strcmp (opt, "plaintext") == 0)
{
plaintext_option = !negate;
continue;
}
if (kwxlat (bool_keytab, opt, &flag) == 0)
{
if (negate)
wsflags &= ~flag;
else
wsflags |= flag;
continue;
}
if (kwxlat (string_keytab, opt, &flag) == 0)
{
if (negate)
wsflags &= ~flag;
else
{
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing argument for %s\n",
progname, opt);
exit (1);
}
switch (flag)
{
case WRDSF_DELIM:
ws.ws_delim = argv[i];
break;
case WRDSF_COMMENT:
ws.ws_comment = argv[i];
break;
case WRDSF_ESCAPE:
ws.ws_escape = argv[i];
break;
}
wsflags |= flag;
}
continue;
}
if (strcmp (opt, "dooffs") == 0)
{
if (negate)
wsflags &= ~WRDSF_DOOFFS;
else
{
char *p;
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing arguments for %s\n",
progname, opt);
exit (1);
}
ws.ws_offs = strtoul (argv[i], &p, 10);
if (*p)
{
fprintf (stderr, "%s: invalid number: %s\n",
progname, argv[i]);
exit (1);
}
i++;
if (i + ws.ws_offs > argc)
{
fprintf (stderr, "%s: not enough arguments for %s\n",
progname, opt);
exit (1);
}
offarg = i;
i += ws.ws_offs - 1;
wsflags |= WRDSF_DOOFFS;
}
continue;
}
fprintf (stderr, "%s: unrecognized argument for %s\n",
progname, opt);
exit (1);
}
if (wsflags & WRDSF_ENV_KV)
ws.ws_env = (const char **) make_env_kv ();
else
ws.ws_env = (const char **) environ;
if (wsflags & WRDSF_INCREMENTAL)
trimnl_option = 1;
next_call = 0;
while ((ptr = fgets (buf, sizeof (buf), stdin)))
{
int rc;
size_t i;
if (trimnl_option)
{
size_t len = strlen (ptr);
if (len && ptr[len-1] == '\n')
ptr[len-1] = 0;
}
if (wsflags & WRDSF_INCREMENTAL)
{
if (next_call)
{
if (*ptr == 0)
ptr = NULL;
else
free ((void*)ws.ws_input);
}
else
next_call = 1;
if (ptr)
{
ptr = strdup (ptr);
assert (ptr != NULL);
}
}
rc = wordsplit (ptr, &ws, wsflags);
if (rc)
{
if (!(wsflags & WRDSF_SHOWERR))
wordsplit_perror (&ws);
continue;
}
if (offarg)
{
for (i = 0; i < ws.ws_offs; i++)
ws.ws_wordv[i] = argv[offarg + i];
offarg = 0;
}
wsflags |= WRDSF_REUSE;
printf ("NF: %lu", (unsigned long) ws.ws_wordc);
if (wsflags & WRDSF_DOOFFS)
printf (" (%lu)", (unsigned long) ws.ws_offs);
putchar ('\n');
for (i = 0; i < ws.ws_offs; i++)
{
printf ("(%lu): ", (unsigned long) i);
print_qword (ws.ws_wordv[i], plaintext_option);
putchar ('\n');
}
for (; i < ws.ws_offs + ws.ws_wordc; i++)
{
printf ("%lu: ", (unsigned long) i);
print_qword (ws.ws_wordv[i], plaintext_option);
putchar ('\n');
}
}
return 0;
}