Skip to content

Commit c5d6918

Browse files
committed
Add apr_strqtok() function to tokenise quoted strings.
git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@1928796 13f79535-47bb-0310-9956-ffa450edef68
1 parent c7f0d22 commit c5d6918

File tree

4 files changed

+203
-2
lines changed

4 files changed

+203
-2
lines changed

CHANGES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
-*- coding: utf-8 -*-
22
Changes for APR 2.0.0
33

4+
*) apr_strings: Add apr_strqtok() function to tokenise quoted strings.
5+
[Graham Leggett]
6+
47
*) apr_uri: Update apr_uri_parse() and apr_uri_unparse() to decode / encode
58
a % separating a zone identifier in an IPv6 literal if detected following
69
RFC6874. PR69754. [Jens Finkhaeuser <[email protected]>, Ruediger Pluem]

include/apr_strings.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,19 +291,40 @@ APR_DECLARE(apr_status_t) apr_tokenize_to_argv(const char *arg_str,
291291
* argument.
292292
* @param str The string to separate; this should be specified on the
293293
* first call to apr_strtok() for a given string, and NULL
294-
* on subsequent calls.
294+
* on subsequent calls. This string is modified in place.
295295
* @param sep The set of delimiters
296296
* @param last State saved by apr_strtok() between calls.
297297
* @return The next token from the string
298298
* @note the 'last' state points to the trailing NUL char of the final
299299
* token, otherwise it points to the character following the current
300-
* token (all successive or empty occurances of sep are skiped on the
300+
* token (all successive or empty occurances of sep are skipped on the
301301
* subsequent call to apr_strtok). Therefore it is possible to avoid
302302
* a strlen() determination, with the following logic;
303303
* toklen = last - retval; if (*last) --toklen;
304304
*/
305305
APR_DECLARE(char *) apr_strtok(char *str, const char *sep, char **last);
306306

307+
/**
308+
* Split a string into separate null-terminated possibly quoted tokens.
309+
* The tokens are delimited in the string by one or more characters
310+
* from the sep argument. A quoted token may be separated by single or
311+
* double quotes, and quoted sections may appear more than once in each
312+
* token. The backslash character escapes each quote. The apr_strqtok
313+
* function can be used interchangeably with the apr_strtok function
314+
* using the same state variable.
315+
* @param str The string to separate; this should be specified on the
316+
* first call to apr_strtok() for a given string, and NULL
317+
* on subsequent calls. This string is modified in place.
318+
* @param sep The set of delimiters
319+
* @param last State saved by apr_strqtok() between calls.
320+
* @return The next token from the string
321+
* @note while the 'last' state points to the trailing NUL char of the
322+
* final token, otherwise it points to the character following the
323+
* current token, no string length can be inferred as quoted characters
324+
* and backslash escape characters are removed from the final token.
325+
*/
326+
APR_DECLARE(char *) apr_strqtok(char *str, const char *sep, char **last);
327+
307328
/**
308329
* @defgroup APR_Strings_Snprintf snprintf implementations
309330
* @warning

strings/apr_strtok.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,85 @@ APR_DECLARE(char *) apr_strtok(char *str, const char *sep, char **last)
5454

5555
return token;
5656
}
57+
58+
APR_DECLARE(char *) apr_strqtok(char *str, const char *sep, char **last)
59+
{
60+
char *token;
61+
apr_size_t rewind = 0;
62+
char c, q = 0, s = 0;
63+
64+
if (!str) { /* subsequent call */
65+
str = *last; /* start where we left off */
66+
}
67+
68+
/* skip characters in sep (will terminate at '\0') */
69+
while (*str && strchr(sep, *str)) {
70+
++str;
71+
}
72+
73+
if (!*str) { /* no more tokens */
74+
return NULL;
75+
}
76+
77+
token = str;
78+
79+
/* skip quoted sections */
80+
while ((c = *str)) {
81+
82+
if (!q) {
83+
if ('\'' == c) {
84+
q = '\'';
85+
rewind++;
86+
}
87+
else if ('\"' == c) {
88+
q = '\"';
89+
rewind++;
90+
}
91+
else if (strchr(sep, c)) {
92+
break;
93+
}
94+
else if (rewind) {
95+
str[-rewind] = c;
96+
}
97+
}
98+
else {
99+
if (!s) {
100+
if ('\\' == c) {
101+
s = c;
102+
rewind++;
103+
}
104+
else if (!s && q == c) {
105+
rewind++;
106+
q = 0;
107+
}
108+
else if (rewind) {
109+
str[-rewind] = c;
110+
}
111+
}
112+
else {
113+
s = 0;
114+
if (rewind) {
115+
str[-rewind] = c;
116+
}
117+
}
118+
}
119+
120+
str++;
121+
}
122+
123+
if (rewind) {
124+
str[-rewind] = '\0';
125+
}
126+
127+
/* prepare for the next call (will terminate at '\0)
128+
*/
129+
*last = str;
130+
131+
if (**last) {
132+
**last = '\0';
133+
++*last;
134+
}
135+
136+
return token;
137+
}
138+

test/teststr.c

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,100 @@ static void test_strtok(abts_case *tc, void *data)
9494
}
9595
}
9696

97+
static void test_strqtok(abts_case *tc, void *data)
98+
{
99+
char *retval1, *retval2;
100+
char *str1, *str2;
101+
char *state1, *state2;
102+
103+
/* test empty string */
104+
str1 = str2 = "";
105+
str1 = apr_pstrdup(p, str1);
106+
str2 = apr_pstrdup(p, str2);
107+
108+
retval1 = apr_strtok(str1, ",", &state1);
109+
retval2 = apr_strqtok(str2, ",", &state2);
110+
111+
ABTS_TRUE(tc, retval1 == NULL);
112+
ABTS_TRUE(tc, retval2 == NULL);
113+
114+
/* test delimiters only */
115+
str1 = str2 = ",";
116+
str1 = apr_pstrdup(p, str1);
117+
str2 = apr_pstrdup(p, str2);
118+
119+
retval1 = apr_strtok(str1, ",", &state1);
120+
retval2 = apr_strqtok(str2, ",", &state2);
121+
122+
/* test unquoted string */
123+
str1 = str2 = "key";
124+
str1 = apr_pstrdup(p, str1);
125+
str2 = apr_pstrdup(p, str2);
126+
127+
retval1 = apr_strtok(str1, "=", &state1);
128+
retval2 = apr_strqtok(str2, "=", &state2);
129+
130+
ABTS_STR_EQUAL(tc, retval1, "key");
131+
ABTS_STR_EQUAL(tc, retval2, "key");
132+
133+
/* test quoted string */
134+
str1 = str2 = "\"key\"";
135+
str1 = apr_pstrdup(p, str1);
136+
str2 = apr_pstrdup(p, str2);
137+
138+
retval1 = apr_strtok(str1, "=", &state1);
139+
retval2 = apr_strqtok(str2, "=", &state2);
140+
141+
ABTS_STR_EQUAL(tc, retval1, "\"key\"");
142+
ABTS_STR_EQUAL(tc, retval2, "key");
143+
144+
/* test quoted key value pair */
145+
str1 = str2 = "\"key\"='value'";
146+
str1 = apr_pstrdup(p, str1);
147+
str2 = apr_pstrdup(p, str2);
148+
149+
retval1 = apr_strtok(str1, "=", &state1);
150+
retval2 = apr_strqtok(str2, "=", &state2);
151+
152+
ABTS_STR_EQUAL(tc, retval1, "\"key\"");
153+
ABTS_STR_EQUAL(tc, retval2, "key");
154+
155+
retval1 = apr_strtok(NULL, "=", &state1);
156+
retval2 = apr_strqtok(NULL, "=", &state2);
157+
158+
ABTS_STR_EQUAL(tc, retval1, "'value'");
159+
ABTS_STR_EQUAL(tc, retval2, "value");
160+
161+
retval1 = apr_strtok(NULL, "=", &state1);
162+
retval2 = apr_strqtok(NULL, "=", &state2);
163+
164+
ABTS_TRUE(tc, retval1 == NULL);
165+
ABTS_TRUE(tc, retval2 == NULL);
166+
167+
/* test quoted against quoted */
168+
str1 = str2 = "\"k\"'ey'";
169+
str1 = apr_pstrdup(p, str1);
170+
str2 = apr_pstrdup(p, str2);
171+
172+
retval1 = apr_strtok(str1, "=", &state1);
173+
retval2 = apr_strqtok(str2, "=", &state2);
174+
175+
ABTS_STR_EQUAL(tc, retval1, "\"k\"'ey'");
176+
ABTS_STR_EQUAL(tc, retval2, "key");
177+
178+
/* test escapes, unquoted against quoted */
179+
str1 = str2 = "outside\\'in\\'sid=e'";
180+
str1 = apr_pstrdup(p, str1);
181+
str2 = apr_pstrdup(p, str2);
182+
183+
retval1 = apr_strtok(str1, "=", &state1);
184+
retval2 = apr_strqtok(str2, "=", &state2);
185+
186+
ABTS_STR_EQUAL(tc, retval1, "outside\\'in\\'sid");
187+
ABTS_STR_EQUAL(tc, retval2, "outside\\in'sid=e");
188+
189+
}
190+
97191
static void snprintf_noNULL(abts_case *tc, void *data)
98192
{
99193
char buff[100];
@@ -512,6 +606,7 @@ abts_suite *teststr(abts_suite *suite)
512606
abts_run_test(suite, snprintf_noNULL, NULL);
513607
abts_run_test(suite, snprintf_underflow, NULL);
514608
abts_run_test(suite, test_strtok, NULL);
609+
abts_run_test(suite, test_strqtok, NULL);
515610
abts_run_test(suite, string_error, NULL);
516611
abts_run_test(suite, string_long, NULL);
517612
abts_run_test(suite, string_strtoi64, NULL);

0 commit comments

Comments
 (0)