From 29fb74830536284532988805c8ffeb65f5ad150e Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Tue, 9 Sep 2014 00:35:55 +0300 Subject: [PATCH] Add wordsplit test (from mailutils) * tests/wordsplit.at: New testcase. * tests/wsp.c: New test program. --- tests/wordsplit.at | 441 +++++++++++++++++++++++++++++++++++++++++++++ tests/wsp.c | 377 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 818 insertions(+) create mode 100644 tests/wordsplit.at create mode 100644 tests/wsp.c diff --git a/tests/wordsplit.at b/tests/wordsplit.at new file mode 100644 index 0000000..d74cbe9 --- /dev/null +++ b/tests/wordsplit.at @@ -0,0 +1,441 @@ +# This file is part of grecs -*- Autotest -*- +# Copyright (C) 2014 Sergey Poznyakoff +# +# Grecs is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Grecs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Grecs. If not, see . + +AT_BANNER(Wordsplit) + +dnl ------------------------------------------------------------ +dnl TESTWSP([NAME], [KW = `'], [OPTS], [INPUT], [STDOUT = `'], +dnl [STDERR = `'], [ENV]) +dnl +m4_pushdef([TESTWSP],[ +AT_SETUP([$1]) +AT_KEYWORDS([wordsplit wsp $2]) +AT_CHECK([$7 wsp $3 <<'EOT' +[$4] +EOT +], +[0], +[$5], +[$6]) +AT_CLEANUP +]) + +dnl ------------------------------------------------------------ +dnl The first part reproduces legacy argcv tests +dnl ------------------------------------------------------------ + +TESTWSP([simple input],[wsp-simple wsp00],[], +[1 2 3], +[NF: 3 +0: 1 +1: 2 +2: 3 +]) + +TESTWSP([quoted space],[wsp-quoted wsp01],[], +[quoted\ space], +[NF: 1 +0: "quoted space" +]) + +TESTWSP([tab character],[wsp-tab wsp02],[], +[a "tab character"], +[NF: 2 +0: a +1: tab\tcharacter +]) + +TESTWSP([octal and hex escapes],[wsp-escape wsp-escape0 wsp03],[], +[\157\143\164\141\154\40and\x20\x68\x65\x78], +[NF: 1 +0: "octal and hex" +]) + +TESTWSP([octal and hex escapes 2],[wsp-escape wsp-escape1 wsp04],[], +[\157\143\164\141\154\40 and \x20\x68\x65\x78], +[NF: 3 +0: "octal " +1: and +2: " hex" +]) + +TESTWSP([escape representation],[wsp-escape wsp-escape2 wsp05],[], +[A\x3-\48\39], +[NF: 1 +0: A\003-\0048\0039 +]) + +dnl ------------------------------------------------------------ +dnl Test worsplit-specific behavior +dnl ------------------------------------------------------------ +TESTWSP([append],[wsp-append wsp06],[append], +[jeden dwa trzy +cztery +piec szesc], +[NF: 3 +0: jeden +1: dwa +2: trzy +NF: 4 +0: jeden +1: dwa +2: trzy +3: cztery +NF: 6 +0: jeden +1: dwa +2: trzy +3: cztery +4: piec +5: szesc +]) + +TESTWSP([dooffs],[wsp-doofs wsp07],[dooffs 3 jeden dwa trzy], +[cztery piec], +[NF: 2 (3) +(0): jeden +(1): dwa +(2): trzy +3: cztery +4: piec +]) + +TESTWSP([variable substitutions: single var],[wsp-var wsp-var00 wsp08],[], +[a $FOO test], +[NF: 3 +0: a +1: bar +2: test +], +[], +[FOO=bar]) + +TESTWSP([variable substitutions: concatenated vars],[wsp-var wsp-var01 wsp09], +[], +[a $FOO${BAR}ent test], +[NF: 3 +0: a +1: stringent +2: test +], +[], +[FOO=str BAR=ing]) + +TESTWSP([variable substitutions: field splitting],[wsp-var wsp-var02 wsp10],[], +[a $FOO test], +[NF: 4 +0: a +1: variable +2: substitution +3: test +], +[], +[FOO="variable substitution"]) + +TESTWSP([variable substitutions: double-quoted variable], +[wsp-var wsp-var03 wsp11],[], +[a "$FOO" test], +[NF: 3 +0: a +1: "variable substitution" +2: test +], +[], +[FOO="variable substitution"]) + +TESTWSP([variable substitutions: single-quoted variable], +[wsp-var wsp-var04 wsp12],[], +[a '$FOO' test], +[NF: 3 +0: a +1: $FOO +2: test +], +[], +[FOO="variable substitution"]) + +TESTWSP([undefined variables 1],[wsp-var wsp-var05 wsp13],[], +[a $FOO test a${FOO}b], +[NF: 3 +0: a +1: test +2: ab +], +[], +[unset FOO;]) + +TESTWSP([undefined variables 2],[wsp-var wsp-var06 wsp14],[keepundef], +[a $FOO test a${FOO}b], +[NF: 4 +0: a +1: $FOO +2: test +3: a${FOO}b +], +[], +[unset FOO;]) + +TESTWSP([warn about undefined variables],[wsp-var wsp-var07 wsp15],[warnundef], +[$FOO], +[NF: 0 +], +[warning: undefined variable `FOO' +], +[unset FOO;]) + +TESTWSP([bail out on undefined variables],[wsp-var wsp-var08 wsp16],[undef], +[$FOO], +[], +[undefined variable +], +[unset FOO;]) + +TESTWSP([disable variable expansion],[wsp-var wsp-var09 wsp17],[novar], +[$FOO], +[NF: 1 +0: $FOO +], +[], +[FOO=bar]) + +TESTWSP([K/V environment],[wsp-var wsp-var10 wsp18 wsp-env-kv wsp-env_kv], +[env_kv], +[$FOO a$BAZ], +[NF: 2 +0: bar +1: aqux +], +[], +[FOO=bar BAZ=qux]) + +TESTWSP([nosplit with expansion], +[wsp-var wsp-var11 wsp19 wsp-var-nosplit],[nosplit], +[a $FOO test], +[NF: 1 +0: "a variable expansion test\n" +], +[], +[FOO="variable expansion"]) + +TESTWSP([nosplit without expansion],[wsp-var wsp-var12 wsp20],[nosplit novar], +[a $FOO test], +[NF: 1 +0: "a $FOO test\n" +], +[], +[FOO="variable expansion"]) + +TESTWSP([ignore quotes],[wsp-ignore-quotes wsp21],[-quote], +["a text"], +[NF: 2 +0: "\"a" +1: "text\"" +]) + +TESTWSP([custom delimiters (squeeze)],[wsp-delim wsp-delim00 wsp22], +[delim : -ws trimnl], +[semicolon: separated::list: of :words], +[NF: 5 +0: semicolon +1: " separated" +2: list +3: " of " +4: words +]) + +TESTWSP([custom delimiters (no squeeze)],[wsp-delim wsp-delim01 wsp23], +[delim : -ws -squeeze_delims trimnl], +[semicolon: separated::list: of :words], +[NF: 6 +0: semicolon +1: " separated" +2: "" +3: list +4: " of " +5: words +]) + +TESTWSP([custom, with returned delimiters],[wsp-delim wsp-delim02 wsp24], +[delim : -ws trimnl return_delims], +[semicolon: separated::list: of :words], +[NF: 9 +0: semicolon +1: : +2: " separated" +3: : +4: list +5: : +6: " of " +7: : +8: words +]) + +TESTWSP([custom, with returned & squeezed delimiters], +[wsp-delim wsp-delim03 wsp25], +[delim : -ws trimnl return_delims -squeeze_delims], +[semicolon: separated::list: of :words], +[NF: 10 +0: semicolon +1: : +2: " separated" +3: : +4: : +5: list +6: : +7: " of " +8: : +9: words +]) + +TESTWSP([sed expressions],[wsp-sed wsp-sed00 wsp26],[sed], +[arg1 s/foo/bar/g;s/bar baz/quz quux/ arg2], +[NF: 3 +0: arg1 +1: "s/foo/bar/g;s/bar baz/quz quux/" +2: arg2 +]) + +TESTWSP([C escapes on],[wcp-c-escape wsp27],[cescapes], +[a\ttab form\ffeed and new\nline], +[NF: 4 +0: a\ttab +1: form\ffeed +2: and +3: new\nline +]) + +TESTWSP([C escapes off],[wcp-c-escape-off wsp28],[-cescapes], +[a\ttab form\ffeed and new\nline], +[NF: 4 +0: attab +1: formffeed +2: and +3: newnline +]) + +TESTWSP([ws elimination],[wsp-ws-elim wsp29],[delim ' ()' ws return_delims], +[( list items )], +[NF: 4 +0: ( +1: list +2: items +3: ) +]) + +TESTWSP([empty quotes],[wsp-empty-quotes wsp30],[delim : ws return_delims], +[t=""], +[NF: 1 +0: t= +]) + +TESTWSP([delimiter following empty quotes], +[wsp31],[delim : ws return_delims], +[t="":r], +[NF: 3 +0: t= +1: : +2: r +]) + +TESTWSP([suppress ws trimming within quotes], +[wsp32], +[default delim , ws return_delims], +[nocomponent,nonewline, formatfield="In message %{text}, "], +[NF: 5 +0: nocomponent +1: , +2: nonewline +3: , +4: "formatfield=In message %{text}, " +]) + +TESTWSP([unescape], +[wsp-unescape wsp33],[-default novar nocmd quote escape '\"'], +[\Seen "quote \"" "bs \\"], +[NF: 3 +0: \\Seen +1: "quote \"" +2: "bs \\" +]) + +TESTWSP([dquote],[wsp34],[-default novar nocmd dquote], +[a "quoted example" isn't it], +[NF: 4 +0: a +1: "quoted example" +2: isn't +3: it +]) + +TESTWSP([squote],[wsp35],[-default novar nocmd squote], +[a 'quoted example' isn"t it], +[NF: 4 +0: a +1: "quoted example" +2: "isn\"t" +3: it +]) + +TESTWSP([incremental],[wsp-incr wsp36],[incremental], +[incremental "input test" line + + +], +[NF: 1 +0: incremental +NF: 1 +0: "input test" +NF: 1 +0: line +], +[input exhausted +]) + +TESTWSP([incremental append],[wsp-incr wsp37],[incremental append], +[incremental "input test" line + + +], +[NF: 1 +0: incremental +NF: 2 +0: incremental +1: "input test" +NF: 3 +0: incremental +1: "input test" +2: line +], +[input exhausted +]) + +TESTWSP([incremental ws], +[wsp-incr wsp38],[return_delims -squeeze_delims incremental ws], +[a list test + + +], +[NF: 1 +0: a +NF: 1 +0: list +NF: 1 +0: test +], +[input exhausted +]) + +m4_popdef([TESTWSP]) diff --git a/tests/wsp.c b/tests/wsp.c new file mode 100644 index 0000000..e06008f --- /dev/null +++ b/tests/wsp.c @@ -0,0 +1,377 @@ +/* grecs - Gray's Extensible Configuration System + Copyright (C) 2014 Sergey Poznyakoff + + Grecs is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + Grecs is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with Grecs. If not, see . */ + +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include +#include "wordsplit.h" + +extern char **environ; + +char *progname; + +struct kwd +{ + const char *name; + int tok; +}; + +struct kwd bool_keytab[] = { + { "append", WRDSF_APPEND }, + /*{ "reuse", WRDSF_REUSE },*/ + { "undef", WRDSF_UNDEF }, + { "novar", WRDSF_NOVAR }, + { "nocmd", WRDSF_NOCMD }, + { "ws", WRDSF_WS }, + { "quote", WRDSF_QUOTE }, + { "squote", WRDSF_SQUOTE }, + { "dquote", WRDSF_DQUOTE }, + { "squeeze_delims", WRDSF_SQUEEZE_DELIMS }, + { "return_delims", WRDSF_RETURN_DELIMS }, + { "sed", WRDSF_SED_EXPR }, + { "debug", WRDSF_SHOWDBG }, + { "nosplit", WRDSF_NOSPLIT }, + { "keepundef", WRDSF_KEEPUNDEF }, + { "warnundef", WRDSF_WARNUNDEF }, + { "cescapes", WRDSF_CESCAPES }, + { "default", WRDSF_DEFFLAGS }, + { "env_kv", WRDSF_ENV_KV }, + { "incremental", WRDSF_INCREMENTAL }, + { NULL, 0 } +}; + +struct kwd string_keytab[] = { + { "delim", WRDSF_DELIM }, + { "comment", WRDSF_COMMENT }, + { "escape", WRDSF_ESCAPE }, + { NULL, 0 } +}; + +static int +kwxlat (struct kwd *kwp, const char *str, int *res) +{ + for (; kwp->name; kwp++) + if (strcmp (kwp->name, str) == 0) + { + *res = kwp->tok; + return 0; + } + return -1; +} + +static void +help () +{ + size_t i; + + printf ("usage: %s [options]\n", progname); + printf ("options are:\n"); + printf (" [-]trimnl\n"); + printf (" [-]plaintext\n"); + putchar ('\n'); + for (i = 0; bool_keytab[i].name; i++) + printf (" [-]%s\n", bool_keytab[i].name); + putchar ('\n'); + for (i = 0; string_keytab[i].name; i++) + { + printf (" -%s\n", bool_keytab[i].name); + printf (" %s ARG\n", bool_keytab[i].name); + } + putchar ('\n'); + printf (" -dooffs\n"); + printf (" dooffs COUNT ARGS...\n"); + exit (0); +} + +void +print_qword (const char *word, int plaintext) +{ + static char *qbuf = NULL; + static size_t qlen = 0; + int quote; + size_t size = wordsplit_c_quoted_length (word, 0, "e); + + if (plaintext) + { + printf ("%s", word); + return; + } + + if (*word == 0) + quote = 1; + + if (size >= qlen) + { + qlen = size + 1; + qbuf = realloc (qbuf, qlen); + assert (qbuf != NULL); + } + wordsplit_c_quote_copy (qbuf, word, 0); + qbuf[size] = 0; + if (quote) + printf ("\"%s\"", qbuf); + else + printf ("%s", qbuf); +} + +/* Convert environment to K/V form */ +static char ** +make_env_kv () +{ + size_t i, j, size; + char **newenv; + + /* Count the number of entries */ + for (i = 0; environ[i]; i++) + ; + + size = (i - 1) * 2 + 1; + newenv = calloc (size, sizeof (newenv[0])); + assert (newenv != NULL); + + for (i = j = 0; environ[i]; i++) + { + size_t len = strcspn (environ[i], "="); + char *p = malloc (len+1); + assert (p != NULL); + memcpy (p, environ[i], len); + p[len] = 0; + newenv[j++] = p; + p = strdup (environ[i] + len + 1); + assert (p != NULL); + newenv[j++] = p; + } + newenv[j] = NULL; + return newenv; +} + +int +main (int argc, char **argv) +{ + char buf[1024], *ptr; + int i, offarg = 0; + int trimnl_option = 0; + int plaintext_option = 0; + int wsflags = (WRDSF_DEFFLAGS & ~WRDSF_NOVAR) | + WRDSF_ENOMEMABRT | + WRDSF_ENV | WRDSF_SHOWERR; + struct wordsplit ws; + int next_call = 0; + + progname = argv[0]; + + for (i = 1; i < argc; i++) + { + char *opt = argv[i]; + int negate; + int flag; + + if (opt[0] == '-') + { + negate = 1; + opt++; + } + else if (opt[0] == '+') + { + negate = 0; + opt++; + } + else + negate = 0; + + if (strcmp (opt, "h") == 0 || + strcmp (opt, "help") == 0 || + strcmp (opt, "-help") == 0) + { + help (); + } + + if (strcmp (opt, "trimnl") == 0) + { + trimnl_option = !negate; + continue; + } + + if (strcmp (opt, "plaintext") == 0) + { + plaintext_option = !negate; + continue; + } + + if (kwxlat (bool_keytab, opt, &flag) == 0) + { + if (negate) + wsflags &= ~flag; + else + wsflags |= flag; + continue; + } + + if (kwxlat (string_keytab, opt, &flag) == 0) + { + if (negate) + wsflags &= ~flag; + else + { + i++; + if (i == argc) + { + fprintf (stderr, "%s: missing argument for %s\n", + progname, opt); + exit (1); + } + + switch (flag) + { + case WRDSF_DELIM: + ws.ws_delim = argv[i]; + break; + + case WRDSF_COMMENT: + ws.ws_comment = argv[i]; + break; + + case WRDSF_ESCAPE: + ws.ws_escape = argv[i]; + break; + } + + wsflags |= flag; + } + continue; + } + + if (strcmp (opt, "dooffs") == 0) + { + if (negate) + wsflags &= ~WRDSF_DOOFFS; + else + { + char *p; + + i++; + + if (i == argc) + { + fprintf (stderr, "%s: missing arguments for %s\n", + progname, opt); + exit (1); + } + ws.ws_offs = strtoul (argv[i], &p, 10); + if (*p) + { + fprintf (stderr, "%s: invalid number: %s\n", + progname, argv[i]); + exit (1); + } + + i++; + if (i + ws.ws_offs > argc) + { + fprintf (stderr, "%s: not enough arguments for %s\n", + progname, opt); + exit (1); + } + offarg = i; + i += ws.ws_offs - 1; + wsflags |= WRDSF_DOOFFS; + } + continue; + } + + fprintf (stderr, "%s: unrecognized argument for %s\n", + progname, opt); + exit (1); + } + + if (wsflags & WRDSF_ENV_KV) + ws.ws_env = (const char **) make_env_kv (); + else + ws.ws_env = (const char **) environ; + + if (wsflags & WRDSF_INCREMENTAL) + trimnl_option = 1; + + next_call = 0; + while ((ptr = fgets (buf, sizeof (buf), stdin))) + { + int rc; + size_t i; + + if (trimnl_option) + { + size_t len = strlen (ptr); + if (len && ptr[len-1] == '\n') + ptr[len-1] = 0; + } + + if (wsflags & WRDSF_INCREMENTAL) + { + if (next_call) + { + if (*ptr == 0) + ptr = NULL; + else + free ((void*)ws.ws_input); + } + else + next_call = 1; + if (ptr) + { + ptr = strdup (ptr); + assert (ptr != NULL); + } + } + + rc = wordsplit (ptr, &ws, wsflags); + if (rc) + { + if (!(wsflags & WRDSF_SHOWERR)) + wordsplit_perror (&ws); + continue; + } + + if (offarg) + { + for (i = 0; i < ws.ws_offs; i++) + ws.ws_wordv[i] = argv[offarg + i]; + offarg = 0; + } + + wsflags |= WRDSF_REUSE; + printf ("NF: %lu", (unsigned long) ws.ws_wordc); + if (wsflags & WRDSF_DOOFFS) + printf (" (%lu)", (unsigned long) ws.ws_offs); + putchar ('\n'); + for (i = 0; i < ws.ws_offs; i++) + { + printf ("(%lu): ", (unsigned long) i); + print_qword (ws.ws_wordv[i], plaintext_option); + putchar ('\n'); + } + for (; i < ws.ws_offs + ws.ws_wordc; i++) + { + printf ("%lu: ", (unsigned long) i); + print_qword (ws.ws_wordv[i], plaintext_option); + putchar ('\n'); + } + } + return 0; +}