Libparserutils
filter.c
Go to the documentation of this file.
1 /*
2  * This file is part of LibParserUtils.
3  * Licensed under the MIT License,
4  * http://www.opensource.org/licenses/mit-license.php
5  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6  */
7 
8 #include <errno.h>
9 #include <stdbool.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #ifndef WITHOUT_ICONV_FILTER
14 #include <iconv.h>
15 #endif
16 
19 
20 #include "input/filter.h"
21 #include "utils/utils.h"
22 
25 #ifndef WITHOUT_ICONV_FILTER
26  iconv_t cd;
27  uint16_t int_enc;
28 #else
29  parserutils_charset_codec *read_codec;
30  parserutils_charset_codec *write_codec;
32  uint32_t pivot_buf[64];
34  bool leftover;
35  uint8_t *pivot_left;
36  size_t pivot_len;
37 #endif
38 
39  struct {
40  uint16_t encoding;
41  } settings;
42 };
43 
46  const char *enc);
47 
59  parserutils_filter **filter)
60 {
62  parserutils_error error;
63 
64  if (int_enc == NULL || filter == NULL)
65  return PARSERUTILS_BADPARM;
66 
67  f = malloc(sizeof(parserutils_filter));
68  if (f == NULL)
69  return PARSERUTILS_NOMEM;
70 
71 #ifndef WITHOUT_ICONV_FILTER
72  f->cd = (iconv_t) -1;
74  int_enc, strlen(int_enc));
75  if (f->int_enc == 0) {
76  free(f);
78  }
79 #else
80  f->leftover = false;
81  f->pivot_left = NULL;
82  f->pivot_len = 0;
83 #endif
84 
85  error = filter_set_defaults(f);
86  if (error != PARSERUTILS_OK) {
87  free(f);
88  return error;
89  }
90 
91 #ifdef WITHOUT_ICONV_FILTER
92  error = parserutils_charset_codec_create(int_enc, &f->write_codec);
93  if (error != PARSERUTILS_OK) {
94  if (f->read_codec != NULL) {
95  parserutils_charset_codec_destroy(f->read_codec);
96  f->read_codec = NULL;
97  }
98  free(f);
99  return error;
100  }
101 #endif
102 
103  *filter = f;
104 
105  return PARSERUTILS_OK;
106 }
107 
115 {
116  if (input == NULL)
117  return PARSERUTILS_BADPARM;
118 
119 #ifndef WITHOUT_ICONV_FILTER
120  if (input->cd != (iconv_t) -1) {
121  iconv_close(input->cd);
122  input->cd = (iconv_t) -1;
123  }
124 #else
125  if (input->read_codec != NULL) {
126  parserutils_charset_codec_destroy(input->read_codec);
127  input->read_codec = NULL;
128  }
129 
130  if (input->write_codec != NULL) {
131  parserutils_charset_codec_destroy(input->write_codec);
132  input->write_codec = NULL;
133  }
134 #endif
135 
136  free(input);
137 
138  return PARSERUTILS_OK;
139 }
140 
152 {
154 
155  if (input == NULL || params == NULL)
156  return PARSERUTILS_BADPARM;
157 
158  switch (type) {
160  error = filter_set_encoding(input, params->encoding.name);
161  break;
162  }
163 
164  return error;
165 }
166 
180  const uint8_t **data, size_t *len,
181  uint8_t **output, size_t *outlen)
182 {
183  if (input == NULL || data == NULL || *data == NULL || len == NULL ||
184  output == NULL || *output == NULL || outlen == NULL)
185  return PARSERUTILS_BADPARM;
186 
187 #ifndef WITHOUT_ICONV_FILTER
188  if (iconv(input->cd, (void *) data, len,
189  (char **) output, outlen) == (size_t) -1) {
190  switch (errno) {
191  case E2BIG:
192  return PARSERUTILS_NOMEM;
193  case EILSEQ:
194  if (*outlen < 3)
195  return PARSERUTILS_NOMEM;
196 
197  (*output)[0] = 0xef;
198  (*output)[1] = 0xbf;
199  (*output)[2] = 0xbd;
200 
201  *output += 3;
202  *outlen -= 3;
203 
204  (*data)++;
205  (*len)--;
206 
207  while (*len > 0) {
208  size_t ret;
209 
210  ret = iconv(input->cd, (void *) data, len,
211  (char **) output, outlen);
212  if (ret != (size_t) -1 || errno != EILSEQ)
213  break;
214 
215  if (*outlen < 3)
216  return PARSERUTILS_NOMEM;
217 
218  (*output)[0] = 0xef;
219  (*output)[1] = 0xbf;
220  (*output)[2] = 0xbd;
221 
222  *output += 3;
223  *outlen -= 3;
224 
225  (*data)++;
226  (*len)--;
227  }
228 
229  return errno == E2BIG ? PARSERUTILS_NOMEM
230  : PARSERUTILS_OK;
231  }
232  }
233 
234  return PARSERUTILS_OK;
235 #else
236  if (input->leftover) {
237  parserutils_error write_error;
238 
239  /* Some data left to be written from last call */
240 
241  /* Attempt to flush the remaining data. */
242  write_error = parserutils_charset_codec_encode(
243  input->write_codec,
244  (const uint8_t **) &input->pivot_left,
245  &input->pivot_len,
246  output, outlen);
247 
248  if (write_error != PARSERUTILS_OK)
249  return write_error;
250 
251 
252  /* And clear leftover */
253  input->pivot_left = NULL;
254  input->pivot_len = 0;
255  input->leftover = false;
256  }
257 
258  while (*len > 0) {
259  parserutils_error read_error, write_error;
260  size_t pivot_len = sizeof(input->pivot_buf);
261  uint8_t *pivot = (uint8_t *) input->pivot_buf;
262 
263  read_error = parserutils_charset_codec_decode(input->read_codec,
264  data, len,
265  (uint8_t **) &pivot, &pivot_len);
266 
267  pivot = (uint8_t *) input->pivot_buf;
268  pivot_len = sizeof(input->pivot_buf) - pivot_len;
269 
270  if (pivot_len > 0) {
271  write_error = parserutils_charset_codec_encode(
272  input->write_codec,
273  (const uint8_t **) &pivot,
274  &pivot_len,
275  output, outlen);
276 
277  if (write_error != PARSERUTILS_OK) {
278  input->leftover = true;
279  input->pivot_left = pivot;
280  input->pivot_len = pivot_len;
281 
282  return write_error;
283  }
284  }
285 
286  if (read_error != PARSERUTILS_OK &&
287  read_error != PARSERUTILS_NOMEM)
288  return read_error;
289  }
290 
291  return PARSERUTILS_OK;
292 #endif
293 }
294 
302 {
304 
305  if (input == NULL)
306  return PARSERUTILS_BADPARM;
307 
308 #ifndef WITHOUT_ICONV_FILTER
309  iconv(input->cd, NULL, 0, NULL, 0);
310 #else
311  /* Clear pivot buffer leftovers */
312  input->pivot_left = NULL;
313  input->pivot_len = 0;
314  input->leftover = false;
315 
316  /* Reset read codec */
317  error = parserutils_charset_codec_reset(input->read_codec);
318  if (error != PARSERUTILS_OK)
319  return error;
320 
321  /* Reset write codec */
322  error = parserutils_charset_codec_reset(input->write_codec);
323  if (error != PARSERUTILS_OK)
324  return error;
325 #endif
326 
327  return error;
328 }
329 
337 {
338  parserutils_error error;
339 
340  if (input == NULL)
341  return PARSERUTILS_BADPARM;
342 
343 #ifdef WITHOUT_ICONV_FILTER
344  input->read_codec = NULL;
345  input->write_codec = NULL;
346 #endif
347 
348  input->settings.encoding = 0;
349  error = filter_set_encoding(input, "UTF-8");
350  if (error != PARSERUTILS_OK)
351  return error;
352 
353  return PARSERUTILS_OK;
354 }
355 
364  const char *enc)
365 {
367  uint16_t mibenum;
368 
369  if (input == NULL || enc == NULL)
370  return PARSERUTILS_BADPARM;
371 
372  mibenum = parserutils_charset_mibenum_from_name(enc, strlen(enc));
373  if (mibenum == 0)
375 
376  /* Exit early if we're already using this encoding */
377  if (input->settings.encoding == mibenum)
378  return PARSERUTILS_OK;
379 
380 #ifndef WITHOUT_ICONV_FILTER
381  if (input->cd != (iconv_t) -1) {
382  iconv_close(input->cd);
383  input->cd = (iconv_t) -1;
384  }
385 
386  input->cd = iconv_open(
389  if (input->cd == (iconv_t) -1) {
390  return (errno == EINVAL) ? PARSERUTILS_BADENCODING
392  }
393 #else
394  if (input->read_codec != NULL) {
395  parserutils_charset_codec_destroy(input->read_codec);
396  input->read_codec = NULL;
397  }
398 
399  error = parserutils_charset_codec_create(enc, &input->read_codec);
400  if (error != PARSERUTILS_OK)
401  return error;
402 #endif
403 
404  input->settings.encoding = mibenum;
405 
406  return error;
407 
408 }
static parserutils_error filter_set_defaults(parserutils_filter *input)
Set an input filter&#39;s default settings.
Definition: filter.c:336
struct parserutils_filter_optparams::@5 encoding
Parameters for encoding setting.
uint16_t int_enc
The internal encoding.
Definition: filter.c:27
const char * parserutils_charset_mibenum_to_name(uint16_t mibenum)
Retrieve the canonical name of an encoding from the MIB enum.
Definition: aliases.c:127
Input filter option parameters.
Definition: filter.h:28
struct parserutils_filter::@4 settings
Filter settings.
uint16_t encoding
Input encoding.
Definition: filter.c:40
parserutils_error parserutils__filter_setopt(parserutils_filter *input, parserutils_filter_opttype type, parserutils_filter_optparams *params)
Configure an input filter.
Definition: filter.c:149
parserutils_error parserutils_charset_codec_encode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Encode a chunk of UCS-4 data into a codec&#39;s charset.
Definition: codec.c:136
parserutils_error parserutils_charset_codec_reset(parserutils_charset_codec *codec)
Clear a charset codec&#39;s encoding state.
Definition: codec.c:182
const char * name
Encoding name.
Definition: filter.h:32
parserutils_error parserutils__filter_process_chunk(parserutils_filter *input, const uint8_t **data, size_t *len, uint8_t **output, size_t *outlen)
Process a chunk of data.
Definition: filter.c:179
parserutils_error
Definition: errors.h:18
iconv_t cd
Iconv conversion descriptor.
Definition: filter.c:26
parserutils_error parserutils_charset_codec_destroy(parserutils_charset_codec *codec)
Destroy a charset codec.
Definition: codec.c:86
static parserutils_error filter_set_encoding(parserutils_filter *input, const char *enc)
Set an input filter&#39;s encoding.
Definition: filter.c:363
size_t len
Definition: codec_8859.c:23
Input filter.
Definition: filter.c:24
parserutils_error parserutils_charset_codec_decode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Decode a chunk of data in a codec&#39;s charset into UCS-4.
Definition: codec.c:163
parserutils_error parserutils_charset_codec_create(const char *charset, parserutils_charset_codec **codec)
Create a charset codec.
Definition: codec.c:38
parserutils_error parserutils__filter_destroy(parserutils_filter *input)
Destroy an input filter.
Definition: filter.c:114
parserutils_filter_opttype
Input filter option types.
Definition: filter.h:21
parserutils_error parserutils__filter_reset(parserutils_filter *input)
Reset an input filter&#39;s state.
Definition: filter.c:301
Core charset codec definition; implementations extend this.
Definition: codec_impl.h:19
uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len)
Retrieve the MIB enum value assigned to an encoding name.
Definition: aliases.c:107
parserutils_error parserutils__filter_create(const char *int_enc, parserutils_filter **filter)
Create an input filter.
Definition: filter.c:58