Tokenizer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 24 SBuf */
10 
11 #include "squid.h"
12 #include "Debug.h"
13 #include "parser/Tokenizer.h"
14 
15 #include <cerrno>
16 #if HAVE_CTYPE_H
17 #include <ctype.h>
18 #endif
19 
21 SBuf
23 {
24  // careful: n may be npos!
25  debugs(24, 5, "consuming " << n << " bytes");
26  const SBuf result = buf_.consume(n);
27  parsed_ += result.length();
28  return result;
29 }
30 
34 {
35  return consume(n).length();
36 }
37 
39 SBuf
41 {
42  debugs(24, 5, "consuming " << n << " bytes");
43 
44  // If n is npos, we consume everything from buf_ (and nothing from result).
45  const SBuf::size_type parsed = (n == SBuf::npos) ? buf_.length() : n;
46 
47  SBuf result = buf_;
48  buf_ = result.consume(buf_.length() - parsed);
49  parsed_ += parsed;
50  return result;
51 }
52 
56 {
57  return consumeTrailing(n).length();
58 }
59 
60 bool
61 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
62 {
63  const Tokenizer saved(*this);
64  skipAll(delimiters);
65  const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
66  if (tokenLen == SBuf::npos) {
67  debugs(24, 8, "no token found for delimiters " << delimiters.name);
68  *this = saved;
69  return false;
70  }
71  returnedToken = consume(tokenLen); // cannot be empty
72  skipAll(delimiters);
73  debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
74  returnedToken << '\'');
75  return true;
76 }
77 
78 bool
79 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
80 {
81  SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
82  if (prefixLen == 0) {
83  debugs(24, 8, "no prefix for set " << tokenChars.name);
84  return false;
85  }
86  if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
87  debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
88  return false;
89  }
90  if (prefixLen == SBuf::npos && limit > 0) {
91  debugs(24, 8, "whole haystack matched");
92  prefixLen = limit;
93  }
94  debugs(24, 8, "found with length " << prefixLen);
95  returnedToken = consume(prefixLen); // cannot be empty after the npos check
96  return true;
97 }
98 
99 bool
100 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
101 {
102  SBuf span = buf_;
103 
104  if (limit < buf_.length())
105  span.consume(buf_.length() - limit); // ignore the N prefix characters
106 
107  auto i = span.rbegin();
108  SBuf::size_type found = 0;
109  while (i != span.rend() && tokenChars[*i]) {
110  ++i;
111  ++found;
112  }
113  if (!found)
114  return false;
115  returnedToken = consumeTrailing(found);
116  return true;
117 }
118 
121 {
122  const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
123  if (prefixLen == 0) {
124  debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
125  return 0;
126  }
127  debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
128  return success(prefixLen);
129 }
130 
131 bool
133 {
134  if (!buf_.isEmpty() && chars[buf_[0]]) {
135  debugs(24, 8, "skipping one-of " << chars.name);
136  return success(1);
137  }
138  debugs(24, 8, "no match while skipping one-of " << chars.name);
139  return false;
140 }
141 
142 bool
144 {
145  if (buf_.length() < tokenToSkip.length())
146  return false;
147 
148  SBuf::size_type offset = 0;
149  if (tokenToSkip.length() < buf_.length())
150  offset = buf_.length() - tokenToSkip.length();
151 
152  if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
153  debugs(24, 8, "skipping " << tokenToSkip.length());
154  return successTrailing(tokenToSkip.length());
155  }
156  return false;
157 }
158 
159 bool
160 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
161 {
162  if (buf_.startsWith(tokenToSkip)) {
163  debugs(24, 8, "skipping " << tokenToSkip.length());
164  return success(tokenToSkip.length());
165  }
166  debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
167  return false;
168 }
169 
170 bool
171 Parser::Tokenizer::skip(const char tokenChar)
172 {
173  if (!buf_.isEmpty() && buf_[0] == tokenChar) {
174  debugs(24, 8, "skipping char '" << tokenChar << '\'');
175  return success(1);
176  }
177  debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
178  return false;
179 }
180 
181 bool
183 {
184  if (!buf_.isEmpty() && skippable[buf_[buf_.length()-1]]) {
185  debugs(24, 8, "skipping one-of " << skippable.name);
186  return successTrailing(1);
187  }
188  debugs(24, 8, "no match while skipping one-of " << skippable.name);
189  return false;
190 }
191 
194 {
195  const SBuf::size_type prefixEnd = buf_.findLastNotOf(skippable);
196  const SBuf::size_type prefixLen = prefixEnd == SBuf::npos ?
197  0 : (prefixEnd + 1);
198  const SBuf::size_type suffixLen = buf_.length() - prefixLen;
199  if (suffixLen == 0) {
200  debugs(24, 8, "no match when trying to skip " << skippable.name);
201  return 0;
202  }
203  debugs(24, 8, "skipping in " << skippable.name << " len " << suffixLen);
204  return successTrailing(suffixLen);
205 }
206 
207 /* reworked from compat/strtoll.c */
208 bool
209 Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
210 {
211  if (atEnd() || limit == 0)
212  return false;
213 
214  const SBuf range(buf_.substr(0,limit));
215 
216  //fixme: account for buf_.size()
217  bool neg = false;
218  const char *s = range.rawContent();
219  const char *end = range.rawContent() + range.length();
220 
221  if (allowSign) {
222  if (*s == '-') {
223  neg = true;
224  ++s;
225  } else if (*s == '+') {
226  ++s;
227  }
228  if (s >= end) return false;
229  }
230  if (( base == 0 || base == 16) && *s == '0' && (s+1 < end ) &&
231  tolower(*(s+1)) == 'x') {
232  s += 2;
233  base = 16;
234  }
235  if (base == 0) {
236  if ( *s == '0') {
237  base = 8;
238  ++s;
239  } else {
240  base = 10;
241  }
242  }
243  if (s >= end) return false;
244 
245  uint64_t cutoff;
246 
247  cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
248  const int cutlim = cutoff % static_cast<int64_t>(base);
249  cutoff /= static_cast<uint64_t>(base);
250 
251  int any = 0, c;
252  int64_t acc = 0;
253  do {
254  c = *s;
255  if (xisdigit(c)) {
256  c -= '0';
257  } else if (xisalpha(c)) {
258  c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
259  } else {
260  break;
261  }
262  if (c >= base)
263  break;
264  if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
265  any = -1;
266  else {
267  any = 1;
268  acc *= base;
269  acc += c;
270  }
271  } while (++s < end);
272 
273  if (any == 0) // nothing was parsed
274  return false;
275  if (any < 0) {
276  acc = neg ? INT64_MIN : INT64_MAX;
277  errno = ERANGE;
278  return false;
279  } else if (neg)
280  acc = -acc;
281 
282  result = acc;
283  return success(s - range.rawContent());
284 }
285 
const char * name
optional set label for debugging (default: &quot;anonymous&quot;)
Definition: CharacterSet.h:69
SBuf consume(const SBuf::size_type n)
convenience method: consumes up to n bytes, counts, and returns them
Definition: Tokenizer.cc:22
Definition: SBuf.h:86
int i
Definition: membanger.c:49
const_reverse_iterator rbegin() const
Definition: SBuf.h:580
SBuf::size_type successTrailing(const SBuf::size_type n)
convenience method: consumes up to n last bytes and returns their count
Definition: Tokenizer.cc:55
bool skipSuffix(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:143
SBuf consumeTrailing(const SBuf::size_type n)
convenience method: consumes up to n last bytes and returns them
Definition: Tokenizer.cc:40
SBuf::size_type parsed_
bytes successfully parsed, including skipped
Definition: Tokenizer.h:157
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:404
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Debug.h:124
SBuf::size_type skipAllTrailing(const CharacterSet &discardables)
Definition: Tokenizer.cc:193
SBuf buf_
yet unparsed input
Definition: Tokenizer.h:156
bool skipOneTrailing(const CharacterSet &discardables)
Definition: Tokenizer.cc:182
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
bool skipOne(const CharacterSet &discardables)
Definition: Tokenizer.cc:132
SBuf consume(size_type n=npos)
Definition: SBuf.cc:491
SBuf::size_type success(const SBuf::size_type n)
convenience method: consume()s up to n bytes and returns their count
Definition: Tokenizer.cc:33
bool skip(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:160
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:79
#define INT64_MAX
Definition: strtoll.c:70
bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:100
#define xisdigit(x)
Definition: xis.h:20
static const size_type npos
Definition: SBuf.h:92
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition: Tokenizer.cc:120
#define xisupper(x)
Definition: xis.h:28
const_reverse_iterator rend() const
Definition: SBuf.h:584
#define xisalpha(x)
Definition: xis.h:23
#define DBG_DATA
Definition: Debug.h:48
const char * rawContent() const
Definition: SBuf.cc:519
MemBlob::size_type size_type
Definition: SBuf.h:89
#define INT64_MIN
Definition: strtoll.c:60
bool token(SBuf &returnedToken, const CharacterSet &delimiters)
Definition: Tokenizer.cc:61
bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:209

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors