diff --git a/README b/README
index bfd1bb6..9e68b9a 100644
--- a/README
+++ b/README
@@ -278,7 +278,7 @@ the following information:
* Copying
-Copyright (C) 2009-2023 Sergey Poznyakoff
+Copyright (C) 2009-2025 Sergey Poznyakoff
Permission is granted to anyone to make or distribute verbatim copies
of this document as received, in any medium, provided that the
diff --git a/wordsplit.3 b/wordsplit.3
index 4f86f1a..06af7bd 100644
--- a/wordsplit.3
+++ b/wordsplit.3
@@ -1,5 +1,5 @@
.\" This file is part of wordsplit -*- nroff -*-
-.\" Copyright (C) 2009-2021 Sergey Poznyakoff
+.\" Copyright (C) 2009-2025 Sergey Poznyakoff
.\"
.\" Wordsplit is free software; you can redistribute it and/or modify
.\" it under the terms of the GNU General Public License as published by
@@ -14,7 +14,7 @@
.\" You should have received a copy of the GNU General Public License
.\" along with wordsplit. If not, see .
.\"
-.TH WORDSPLIT 3 "June 22, 2023" "WORDSPLIT" "Wordsplit User Reference"
+.TH WORDSPLIT 3 "March 15, 2025" "WORDSPLIT" "Wordsplit User Reference"
.SH NAME
wordsplit \- split string into words
.SH SYNOPSIS
@@ -558,6 +558,43 @@ the last word. For example, if the input to the above fragment were
"is"
"the time for all good men"
.EE
+.SH COMPATIBILITY QUIRKS
+If
+.B WRDSF_RETURN_DELIMS
+is set and
+.B WRDSF_SQUEEZE_DELIMS
+is not,
+.B wordsplit
+returns an empty word between each pair of contiguous delimiters.
+Consider, for example, the following fragmen:
+.PP
+.EX
+struct wordsplit ws;
+ws.ws_delim = ":";
+wordsplit(str, &ws, WRDSF_DELIM | WRDSF_RETURN_DELIMS);
+.EE
+.PP
+If \fIstr\fR contained \fBroot:x:0:0::/root:/bin/sh\fR, the
+resulting \fBws.ws_wordv\fR array would be:
+.PP
+.EX
+{ "root", ":", "0", ":", "0", ":", "", ":", "/root", ":", "/bin/sh" }
+.EE
+.PP
+Notice the empty word at index 6. Earlier versions of
+.B wordsplit
+(up to v1.1-7-g0e1a09c) behaved differently: several contiguous
+delimiters were returned one after another, without empty words in
+between, like that:
+.PP
+.EX
+{ "root", ":", "0", ":", "0", ":", ":", "/root", ":", "/bin/sh" }
+.EE
+.PP
+To request this behavior, use the
+.B WRDSO_RETDELNOTEMPTY
+option. It is not advised to be used, except to
+ensure backward compatibility with earlier wordsplit versions.
.SH WORDSPLIT_T STRUCTURE
The data type \fBwordsplit_t\fR has three members that contain
output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
@@ -1256,7 +1293,7 @@ Backtick command expansion is not supported.
.SH "BUG REPORTS"
Report bugs to .
.SH COPYRIGHT
-Copyright \(co 2009-2019 Sergey Poznyakoff
+Copyright \(co 2009\(en2025 Sergey Poznyakoff
.br
.na
License GPLv3+: GNU GPL version 3 or later
diff --git a/wordsplit.at b/wordsplit.at
index 38114c5..c22711b 100644
--- a/wordsplit.at
+++ b/wordsplit.at
@@ -1,5 +1,5 @@
# Test suite for wordsplit -*- Autotest -*-
-# Copyright (C) 2014-2023 Sergey Poznyakoff
+# Copyright (C) 2014-2025 Sergey Poznyakoff
#
# Wordsplit is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -496,6 +496,24 @@ TOTAL: 9
TESTWSP([custom, with returned & squeezed delimiters],[],
[-delim : -nows -trimnl -return_delims -nosqueeze_delims],
[semicolon: separated::list: of :words],
+[NF: 11
+0: semicolon
+1: :
+2: " separated"
+3: :
+4: ""
+5: :
+6: list
+7: :
+8: " of "
+9: :
+10: words
+TOTAL: 11
+])
+
+TESTWSP([custom, with returned & squeezed delimiters (compat)],[],
+[-delim : -nows -trimnl -return_delims -retdelnotempty -nosqueeze_delims],
+[semicolon: separated::list: of :words],
[NF: 10
0: semicolon
1: :
@@ -510,6 +528,17 @@ TESTWSP([custom, with returned & squeezed delimiters],[],
TOTAL: 10
])
+TESTWSP([with maxwords limit],[],
+[-nodefault -delim : -trimnl -maxwords 4],
+[foo::baz:qux],
+[NF: 4
+0: foo
+1: ""
+2: baz
+3: qux
+TOTAL: 4
+])
+
WSPGROUP(wsp-sed)
TESTWSP([sed expressions],[],[-sed],
@@ -922,6 +951,21 @@ TESTWSP([maxwords return_delims -squeeze_delims],[],
[NF: 8
0: foo
1: :
+2: ""
+3: :
+4: ""
+5: :
+6: bar
+7: -:baz:qux-
+TOTAL: 8
+])
+
+TESTWSP([maxwords return_delims -squeeze_delims (compat)],[],
+[-trimnl -maxwords 8 -return_delims -retdelnotempty -nosqueeze_delims -delim :-],
+[foo:::bar-:baz:qux-],
+[NF: 8
+0: foo
+1: :
2: :
3: :
4: bar
@@ -1161,7 +1205,6 @@ TESTWSP([alternate value],[wsp-alt wsp-alt03],
TOTAL: 2
])
-
m4_popdef([TESTWSP])
m4_popdef([wspnum])
m4_popdef([wspid])
diff --git a/wordsplit.c b/wordsplit.c
index 59830f0..2ce6d3b 100644
--- a/wordsplit.c
+++ b/wordsplit.c
@@ -1,5 +1,5 @@
/* wordsplit - a word splitter
- Copyright (C) 2009-2023 Sergey Poznyakoff
+ Copyright (C) 2009-2025 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -185,7 +185,7 @@ _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
wss->ws_options = wsp->ws_options & ~WRDSO_MAXWORDS;
wss->ws_namechar = wsp->ws_namechar;
-
+
flags |= WRDSF_DELIM
| WRDSF_ALLOC_DIE
| WRDSF_ERROR
@@ -330,7 +330,7 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
wsp->ws_escape[WRDSX_WORD] = wordsplit_escape[WS_ESC_C_WS];
wsp->ws_escape[WRDSX_QUOTE] = wordsplit_escape[WS_ESC_C];
wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
- | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
}
else
{
@@ -353,7 +353,7 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
}
else
wsp->ws_namechar = NULL;
-
+
wsp->ws_endp = 0;
wsp->ws_wordi = 0;
@@ -794,7 +794,6 @@ wordsplit_finish (struct wordsplit *wsp)
is set.
*/
again:
- delim = 0; /* Delimiter being processed (if any) */
n = 0; /* Number of words processed so far */
p = wsp->ws_head; /* Current node */
@@ -803,56 +802,48 @@ wordsplit_finish (struct wordsplit *wsp)
struct wordsplit_node *next = p->next;
if (p->flags & _WSNF_DELIM)
{
- if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
+ if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
{
- if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
+ if (next)
{
- char const *s = wsnode_ptr (wsp, p);
- if (delim)
+ if ((next->flags & _WSNF_DELIM) &&
+ (wsnode_ptr (wsp, p))[0] == (wsnode_ptr (wsp, next))[0])
{
- if (delim == *s)
- {
- wsnode_remove (wsp, p);
- p = next;
- continue;
- }
- else
- {
- delim = 0;
- n++; /* Count this node; it will be returned */
- }
- }
- else
- {
- delim = *s;
+ wsnode_remove (wsp, p);
p = next;
continue;
}
}
+ else if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ goto restart;
}
- else if (wsp->ws_options & WRDSO_MAXWORDS)
+ else if ((next && (next->flags & _WSNF_DELIM)) &&
+ (!(wsp->ws_options & WRDSO_RETDELNOTEMPTY)))
+ {
+ int rc;
+ struct wordsplit_node *nulnode;
+ if ((rc = wsnode_new (wsp, &nulnode)) != 0)
+ return rc;
+ nulnode->flags = _WSNF_NULL | _WSNF_NOEXPAND;
+ wsnode_insert (wsp, nulnode, p, 0);
+ next = nulnode;
+ }
+
+ if ((wsp->ws_options & WRDSO_MAXWORDS) &&
+ !(wsp->ws_flags & WRDSF_RETURN_DELIMS))
{
wsnode_remove (wsp, p);
p = next;
continue;
}
}
- else
- {
- if (delim)
- {
- /* Last node was a delimiter or a compressed run of delimiters;
- Count it, and clear the delimiter marker */
- n++;
- delim = 0;
- }
- if (wsp->ws_options & WRDSO_MAXWORDS)
- {
- if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
- break;
- }
- }
+
n++;
+
+ if ((wsp->ws_options & WRDSO_MAXWORDS) &&
+ (wsp->ws_wordi + n == wsp->ws_maxwords))
+ break;
+
if (wsp->ws_flags & WRDSF_INCREMENTAL)
p = NULL; /* Break the loop */
else
@@ -875,6 +866,7 @@ wordsplit_finish (struct wordsplit *wsp)
if (wsp->ws_flags & WRDSF_INCREMENTAL)
{
/* Restart the processing, if there's any input left. */
+ restart:
if (wsp->ws_endp < wsp->ws_len)
{
int rc;
@@ -1072,7 +1064,7 @@ wsplt_env_find (struct wordsplit *wsp, const char *name, size_t len)
break;
}
}
- else
+ else
{
/* Usual (A=B) environment. */
for (i = 0; wsp->ws_env[i]; i++)
@@ -1680,7 +1672,7 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_NULL;
+ newnode->flags = _WSNF_NULL | _WSNF_NOEXPAND;
}
else
{
@@ -1726,7 +1718,7 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_NULL;
+ newnode->flags = _WSNF_NULL | _WSNF_NOEXPAND;
}
return 0;
}
@@ -1897,7 +1889,7 @@ expcmd (struct wordsplit *wsp, const char *str, size_t len,
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_NULL;
+ newnode->flags = _WSNF_NULL | _WSNF_NOEXPAND;
}
else
{
@@ -1928,7 +1920,7 @@ expcmd (struct wordsplit *wsp, const char *str, size_t len,
return 1;
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_NULL;
+ newnode->flags = _WSNF_NULL | _WSNF_NOEXPAND;
}
return 0;
}
@@ -1983,7 +1975,7 @@ wordsplit_trimws (struct wordsplit *wsp)
n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
p->v.segm.end = n;
if (p->v.segm.beg == p->v.segm.end)
- p->flags |= _WSNF_NULL;
+ p->flags |= _WSNF_NULL | _WSNF_NOEXPAND;
}
wsnode_nullelim (wsp);
@@ -2355,7 +2347,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all)
else if (WSP_RETURN_DELIMS (wsp))
{
i++;
- flags |= _WSNF_DELIM;
+ flags |= _WSNF_DELIM | _WSNF_NOEXPAND;
}
else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
flags |= _WSNF_EMPTYOK;
diff --git a/wordsplit.h b/wordsplit.h
index 768df34..c5eb54c 100644
--- a/wordsplit.h
+++ b/wordsplit.h
@@ -1,5 +1,5 @@
/* wordsplit - a word splitter
- Copyright (C) 2009-2023 Sergey Poznyakoff
+ Copyright (C) 2009-2025 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -254,6 +254,16 @@ struct wordsplit
/* ws_namechar member is initialized */
#define WRDSO_NAMECHAR 0x00010000
+/* If WRDSF_RETURN_DELIMS is set and WRDSF_SQUEEZE_DELIMS is not, wordsplit
+ returns an empty word between each pair of contiguous delimiters. This
+ behavior is consistent with that without the WRDSF_RETURN_DELIMS flag.
+ However, earlier versions (v1.1) behaved differently: several contiguous
+ delimiters were returned one after another, without empty words in between.
+ The WRDSO_RETDELNOTEMPTY option mimics that behaviour. It is not advised to
+ be used, except to ensure backward compatibility with earlier wordsplit
+ versions. */
+#define WRDSO_RETDELNOTEMPTY 0x00020000
+
#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
#define WRDSO_OESC WRDSO_OESC_WORD
#define WRDSO_XESC WRDSO_XESC_WORD
diff --git a/wsp.c b/wsp.c
index 58a9c8d..79d577e 100644
--- a/wsp.c
+++ b/wsp.c
@@ -1,5 +1,5 @@
/* wsp - test program for wordsplit
- Copyright (C) 2014-2023 Sergey Poznyakoff
+ Copyright (C) 2014-2025 Sergey Poznyakoff
Wordsplit is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -458,6 +458,7 @@ struct wsopt opttab[] = {
{ "nocmdsplit", WRDSO_NOCMDSPLIT, ws_boolean, setfn_option },
{ "maxwords", WRDSO_MAXWORDS, ws_required_argument, setfn_maxwords },
{ "namechar", WRDSO_NAMECHAR, ws_required_argument, setfn_namechar },
+ { "retdelnotempty", WRDSO_RETDELNOTEMPTY, ws_boolean, setfn_option },
/* String options */
{ "delim", WRDSF_DELIM, ws_required_argument, setfn_delim },
{ "comment", WRDSF_COMMENT,ws_required_argument, setfn_comment },