1954 lines
47 KiB
C
1954 lines
47 KiB
C
/*
|
|
* Copyright (C) 2000 Ximian Inc.
|
|
*
|
|
* Authors: Michael Zucchi <notzed@ximian.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public License
|
|
* as published by the Free Software Foundation; either version 2 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
/* What should hopefully be a fast mail parser */
|
|
|
|
/* Do not change this code without asking me (Michael Zucchi) first
|
|
|
|
There is almost always a reason something was done a certain way.
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
|
|
#include <regex.h>
|
|
#include <ctype.h>
|
|
|
|
#include <glib.h>
|
|
#include "camel-mime-parser.h"
|
|
#include "camel-mime-utils.h"
|
|
#include "camel-mime-filter.h"
|
|
#include "camel-stream.h"
|
|
#include "camel-seekable-stream.h"
|
|
|
|
#define r(x)
|
|
#define h(x)
|
|
#define c(x)
|
|
#define d(x)
|
|
|
|
/*#define PURIFY*/
|
|
|
|
#define MEMPOOL
|
|
|
|
#define STRUCT_ALIGN 4
|
|
|
|
#ifdef PURIFY
|
|
int inend_id = -1,
|
|
inbuffer_id = -1;
|
|
#endif
|
|
|
|
#if 0
|
|
extern int strdup_count;
|
|
extern int malloc_count;
|
|
extern int free_count;
|
|
|
|
#define g_strdup(x) (strdup_count++, g_strdup(x))
|
|
#define g_malloc(x) (malloc_count++, g_malloc(x))
|
|
#define g_free(x) (free_count++, g_free(x))
|
|
#endif
|
|
|
|
#ifdef MEMPOOL
|
|
typedef struct _MemPoolNode {
|
|
struct _MemPoolNode *next;
|
|
|
|
int free;
|
|
char data[1];
|
|
} MemPoolNode;
|
|
|
|
typedef struct _MemPoolThresholdNode {
|
|
struct _MemPoolThresholdNode *next;
|
|
char data[1];
|
|
} MemPoolThresholdNode;
|
|
|
|
typedef struct _MemPool {
|
|
int blocksize;
|
|
int threshold;
|
|
struct _MemPoolNode *blocks;
|
|
struct _MemPoolThresholdNode *threshold_blocks;
|
|
} MemPool;
|
|
|
|
MemPool *mempool_new(int blocksize, int threshold);
|
|
void *mempool_alloc(MemPool *pool, int size);
|
|
void mempool_flush(MemPool *pool, int freeall);
|
|
void mempool_free(MemPool *pool);
|
|
|
|
MemPool *mempool_new(int blocksize, int threshold)
|
|
{
|
|
MemPool *pool;
|
|
|
|
pool = g_malloc(sizeof(*pool));
|
|
if (threshold >= blocksize)
|
|
threshold = blocksize * 2 / 3;
|
|
pool->blocksize = blocksize;
|
|
pool->threshold = threshold;
|
|
pool->blocks = NULL;
|
|
pool->threshold_blocks = NULL;
|
|
return pool;
|
|
}
|
|
|
|
void *mempool_alloc(MemPool *pool, int size)
|
|
{
|
|
size = (size + STRUCT_ALIGN) & (~(STRUCT_ALIGN-1));
|
|
if (size>=pool->threshold) {
|
|
MemPoolThresholdNode *n;
|
|
|
|
n = g_malloc(sizeof(*n) - sizeof(char) + size);
|
|
n->next = pool->threshold_blocks;
|
|
pool->threshold_blocks = n;
|
|
return &n->data[0];
|
|
} else {
|
|
MemPoolNode *n;
|
|
|
|
n = pool->blocks;
|
|
while (n) {
|
|
if (n->free >= size) {
|
|
n->free -= size;
|
|
return &n->data[n->free];
|
|
}
|
|
n = n->next;
|
|
}
|
|
|
|
n = g_malloc(sizeof(*n) - sizeof(char) + pool->blocksize);
|
|
n->next = pool->blocks;
|
|
pool->blocks = n;
|
|
n->free = pool->blocksize - size;
|
|
return &n->data[n->free];
|
|
}
|
|
}
|
|
|
|
void mempool_flush(MemPool *pool, int freeall)
|
|
{
|
|
MemPoolThresholdNode *tn, *tw;
|
|
MemPoolNode *pw, *pn;
|
|
|
|
tw = pool->threshold_blocks;
|
|
while (tw) {
|
|
tn = tw->next;
|
|
g_free(tw);
|
|
tw = tn;
|
|
}
|
|
pool->threshold_blocks = NULL;
|
|
|
|
if (freeall) {
|
|
pw = pool->blocks;
|
|
while (pw) {
|
|
pn = pw->next;
|
|
g_free(pw);
|
|
pw = pn;
|
|
}
|
|
pool->blocks = NULL;
|
|
} else {
|
|
pw = pool->blocks;
|
|
while (pw) {
|
|
pw->free = pool->blocksize;
|
|
pw = pw->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
void mempool_free(MemPool *pool)
|
|
{
|
|
if (pool) {
|
|
mempool_flush(pool, 1);
|
|
g_free(pool);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define SCAN_BUF 4096 /* size of read buffer */
|
|
#define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */
|
|
|
|
/* a little hacky, but i couldn't be bothered renaming everything */
|
|
#define _header_scan_state _CamelMimeParserPrivate
|
|
#define _PRIVATE(o) (((CamelMimeParser *)(o))->priv)
|
|
|
|
struct _header_scan_state {
|
|
|
|
/* global state */
|
|
|
|
enum _header_state state;
|
|
|
|
/* for building headers during scanning */
|
|
char *outbuf;
|
|
char *outptr;
|
|
char *outend;
|
|
|
|
int fd; /* input for a fd input */
|
|
CamelStream *stream; /* or for a stream */
|
|
|
|
/* for scanning input buffers */
|
|
char *realbuf; /* the real buffer, SCAN_HEAD*2 + SCAN_BUF bytes */
|
|
char *inbuf; /* points to a subset of the allocated memory, the underflow */
|
|
char *inptr; /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
|
|
char *inend;
|
|
|
|
int atleast;
|
|
|
|
int seek; /* current offset to start of buffer */
|
|
int unstep; /* how many states to 'unstep' (repeat the current state) */
|
|
|
|
unsigned int midline:1; /* are we mid-line interrupted? */
|
|
unsigned int scan_from:1; /* do we care about From lines? */
|
|
unsigned int scan_pre_from:1; /* do we return pre-from data? */
|
|
|
|
int start_of_from; /* where from started */
|
|
int start_of_headers; /* where headers started from the last scan */
|
|
|
|
int header_start; /* start of last header, or -1 */
|
|
|
|
/* filters to apply to all content before output */
|
|
int filterid; /* id of next filter */
|
|
struct _header_scan_filter *filters;
|
|
|
|
/* per message/part info */
|
|
struct _header_scan_stack *parts;
|
|
|
|
};
|
|
|
|
struct _header_scan_stack {
|
|
struct _header_scan_stack *parent;
|
|
|
|
enum _header_state savestate; /* state at invocation of this part */
|
|
|
|
#ifdef MEMPOOL
|
|
MemPool *pool; /* memory pool to keep track of headers/etc at this level */
|
|
#endif
|
|
struct _header_raw *headers; /* headers for this part */
|
|
|
|
struct _header_content_type *content_type;
|
|
|
|
/* I dont use GString's casue you can't efficiently append a buffer to them */
|
|
GByteArray *pretext; /* for multipart types, save the pre-boundary data here */
|
|
GByteArray *posttext; /* for multipart types, save the post-boundary data here */
|
|
int prestage; /* used to determine if it is a pre-boundary or post-boundary data segment */
|
|
|
|
GByteArray *from_line; /* the from line */
|
|
|
|
char *boundary; /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
|
|
int boundarylen; /* actual length of boundary, including leading -- if there is one */
|
|
int boundarylenfinal; /* length of boundary, including trailing -- if there is one */
|
|
int atleast; /* the biggest boundary from here to the parent */
|
|
};
|
|
|
|
struct _header_scan_filter {
|
|
struct _header_scan_filter *next;
|
|
int id;
|
|
CamelMimeFilter *filter;
|
|
};
|
|
|
|
static void folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength);
|
|
static void folder_scan_drop_step(struct _header_scan_state *s);
|
|
static int folder_scan_init_with_fd(struct _header_scan_state *s, int fd);
|
|
static int folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream);
|
|
static struct _header_scan_state *folder_scan_init(void);
|
|
static void folder_scan_close(struct _header_scan_state *s);
|
|
static struct _header_scan_stack *folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length);
|
|
static struct _header_scan_stack *folder_scan_header(struct _header_scan_state *s, int *lastone);
|
|
static int folder_scan_skip_line(struct _header_scan_state *s, GByteArray *save);
|
|
static off_t folder_seek(struct _header_scan_state *s, off_t offset, int whence);
|
|
static off_t folder_tell(struct _header_scan_state *s);
|
|
static int folder_read(struct _header_scan_state *s);
|
|
#ifdef MEMPOOL
|
|
static void header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset);
|
|
#endif
|
|
|
|
static void camel_mime_parser_class_init (CamelMimeParserClass *klass);
|
|
static void camel_mime_parser_init (CamelMimeParser *obj);
|
|
|
|
#if d(!)0
|
|
static char *states[] = {
|
|
"HSCAN_INITIAL",
|
|
"HSCAN_PRE_FROM", /* pre-from data */
|
|
"HSCAN_FROM", /* got 'From' line */
|
|
"HSCAN_HEADER", /* toplevel header */
|
|
"HSCAN_BODY", /* scanning body of message */
|
|
"HSCAN_MULTIPART", /* got multipart header */
|
|
"HSCAN_MESSAGE", /* rfc822/news message */
|
|
|
|
"HSCAN_PART", /* part of a multipart */
|
|
|
|
"HSCAN_EOF", /* end of file */
|
|
"HSCAN_PRE_FROM_END",
|
|
"HSCAN_FROM_END",
|
|
"HSCAN_HEAER_END",
|
|
"HSCAN_BODY_END",
|
|
"HSCAN_MULTIPART_END",
|
|
"HSCAN_MESSAGE_END",
|
|
};
|
|
#endif
|
|
|
|
static CamelObjectClass *camel_mime_parser_parent;
|
|
|
|
static void
|
|
camel_mime_parser_class_init (CamelMimeParserClass *klass)
|
|
{
|
|
camel_mime_parser_parent = camel_type_get_global_classfuncs (camel_object_get_type ());
|
|
}
|
|
|
|
static void
|
|
camel_mime_parser_init (CamelMimeParser *obj)
|
|
{
|
|
struct _header_scan_state *s;
|
|
|
|
s = folder_scan_init();
|
|
_PRIVATE(obj) = s;
|
|
}
|
|
|
|
static void
|
|
camel_mime_parser_finalise(CamelObject *o)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(o);
|
|
#ifdef PURIFY
|
|
purify_watch_remove_all();
|
|
#endif
|
|
folder_scan_close(s);
|
|
}
|
|
|
|
CamelType
|
|
camel_mime_parser_get_type (void)
|
|
{
|
|
static CamelType type = CAMEL_INVALID_TYPE;
|
|
|
|
if (type == CAMEL_INVALID_TYPE) {
|
|
type = camel_type_register (camel_object_get_type (), "CamelMimeParser",
|
|
sizeof (CamelMimeParser),
|
|
sizeof (CamelMimeParserClass),
|
|
(CamelObjectClassInitFunc) camel_mime_parser_class_init,
|
|
NULL,
|
|
(CamelObjectInitFunc) camel_mime_parser_init,
|
|
(CamelObjectFinalizeFunc) camel_mime_parser_finalise);
|
|
}
|
|
|
|
return type;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_new:
|
|
*
|
|
* Create a new CamelMimeParser object.
|
|
*
|
|
* Return value: A new CamelMimeParser widget.
|
|
**/
|
|
CamelMimeParser *
|
|
camel_mime_parser_new (void)
|
|
{
|
|
CamelMimeParser *new = CAMEL_MIME_PARSER ( camel_object_new (camel_mime_parser_get_type ()));
|
|
return new;
|
|
}
|
|
|
|
|
|
/**
|
|
* camel_mime_parser_filter_add:
|
|
* @m:
|
|
* @mf:
|
|
*
|
|
* Add a filter that will be applied to any body content before it is passed
|
|
* to the caller. Filters may be pipelined to perform multi-pass operations
|
|
* on the content, and are applied in the order they were added.
|
|
*
|
|
* Note that filters are only applied to the body content of messages, and once
|
|
* a filter has been set, all content returned by a filter_step() with a state
|
|
* of HSCAN_BODY will have passed through the filter.
|
|
*
|
|
* Return value: An id that may be passed to filter_remove() to remove
|
|
* the filter, or -1 if the operation failed.
|
|
**/
|
|
int
|
|
camel_mime_parser_filter_add(CamelMimeParser *m, CamelMimeFilter *mf)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
struct _header_scan_filter *f, *new;
|
|
|
|
new = g_malloc(sizeof(*new));
|
|
new->filter = mf;
|
|
new->id = s->filterid++;
|
|
if (s->filterid == -1)
|
|
s->filterid++;
|
|
new->next = 0;
|
|
camel_object_ref((CamelObject *)mf);
|
|
|
|
/* yes, this is correct, since 'next' is the first element of the struct */
|
|
f = (struct _header_scan_filter *)&s->filters;
|
|
while (f->next)
|
|
f = f->next;
|
|
f->next = new;
|
|
return new->id;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_filter_remove:
|
|
* @m:
|
|
* @id:
|
|
*
|
|
* Remove a processing filter from the pipeline. There is no
|
|
* restriction on the order the filters can be removed.
|
|
**/
|
|
void
|
|
camel_mime_parser_filter_remove(CamelMimeParser *m, int id)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
struct _header_scan_filter *f, *old;
|
|
|
|
f = (struct _header_scan_filter *)&s->filters;
|
|
while (f && f->next) {
|
|
old = f->next;
|
|
if (old->id == id) {
|
|
camel_object_unref((CamelObject *)old->filter);
|
|
f->next = old->next;
|
|
g_free(old);
|
|
/* there should only be a single matching id, but
|
|
scan the whole lot anyway */
|
|
}
|
|
f = f->next;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_header:
|
|
* @m:
|
|
* @name: Name of header.
|
|
* @offset: Pointer that can receive the offset of the header in
|
|
* the stream from the start of parsing.
|
|
*
|
|
* Lookup a header by name.
|
|
*
|
|
* Return value: The header value, or NULL if the header is not
|
|
* defined.
|
|
**/
|
|
const char *
|
|
camel_mime_parser_header(CamelMimeParser *m, const char *name, int *offset)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
if (s->parts &&
|
|
s->parts->headers) {
|
|
return header_raw_find(&s->parts->headers, name, offset);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_headers_raw:
|
|
* @m:
|
|
*
|
|
* Get the list of the raw headers which are defined for the
|
|
* current state of the parser. These headers are valid
|
|
* until the next call to parser_step(), or parser_drop_step().
|
|
*
|
|
* Return value: The raw headers, or NULL if there are no headers
|
|
* defined for the current part or state. These are READ ONLY.
|
|
**/
|
|
struct _header_raw *
|
|
camel_mime_parser_headers_raw(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
if (s->parts)
|
|
return s->parts->headers;
|
|
return NULL;
|
|
}
|
|
|
|
static const char *
|
|
byte_array_to_string(GByteArray *array)
|
|
{
|
|
if (array == NULL)
|
|
return NULL;
|
|
|
|
if (array->len == 0 || array->data[array->len-1] != '\0')
|
|
g_byte_array_append(array, "", 1);
|
|
|
|
return array->data;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_preface:
|
|
* @m:
|
|
*
|
|
* Retrieve the preface text for the current multipart.
|
|
* Can only be used when the state is HSCAN_MULTIPART_END.
|
|
*
|
|
* Return value: The preface text, or NULL if there wasn't any.
|
|
**/
|
|
const char *
|
|
camel_mime_parser_preface(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
if (s->parts)
|
|
return byte_array_to_string(s->parts->pretext);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_postface:
|
|
* @m:
|
|
*
|
|
* Retrieve the postface text for the current multipart.
|
|
* Only returns valid data when the current state if
|
|
* HSCAN_MULTIPART_END.
|
|
*
|
|
* Return value: The postface text, or NULL if there wasn't any.
|
|
**/
|
|
const char *
|
|
camel_mime_parser_postface(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
if (s->parts)
|
|
return byte_array_to_string(s->parts->posttext);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_from_line:
|
|
* @m:
|
|
*
|
|
* Get the last scanned "From " line, from a recently scanned from.
|
|
* This should only be called in the HSCAN_FROM state. The
|
|
* from line will include the closing \n found (if there was one).
|
|
*
|
|
* The return value will remain valid while in the HSCAN_FROM
|
|
* state, or any deeper state.
|
|
*
|
|
* Return value: The From line, or NULL if called out of context.
|
|
**/
|
|
const char *
|
|
camel_mime_parser_from_line(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
if (s->parts)
|
|
return byte_array_to_string(s->parts->from_line);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_init_with_fd:
|
|
* @m:
|
|
* @fd: A valid file descriptor.
|
|
*
|
|
* Initialise the scanner with an fd. The scanner's offsets
|
|
* will be relative to the current file position of the file
|
|
* descriptor. As a result, seekable descritors should
|
|
* be seeked using the parser seek functions.
|
|
*
|
|
* An initial buffer will be read from the file descriptor
|
|
* immediately, although no parsing will occur.
|
|
*
|
|
* Return value: Returns -1 on error.
|
|
**/
|
|
int
|
|
camel_mime_parser_init_with_fd(CamelMimeParser *m, int fd)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
return folder_scan_init_with_fd(s, fd);
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_init_with_stream:
|
|
* @m:
|
|
* @stream:
|
|
*
|
|
* Initialise the scanner with a source stream. The scanner's
|
|
* offsets will be relative to the current file position of
|
|
* the stream. As a result, seekable streams should only
|
|
* be seeked using the parser seek function.
|
|
*
|
|
* An initial buffer will be read from the stream
|
|
* immediately, although no parsing will occur.
|
|
*
|
|
* Return value: -1 on error.
|
|
**/
|
|
int
|
|
camel_mime_parser_init_with_stream(CamelMimeParser *m, CamelStream *stream)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
return folder_scan_init_with_stream(s, stream);
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_scan_from:
|
|
* @m:
|
|
* @scan_from: #TRUE if the scanner should scan From lines.
|
|
*
|
|
* Tell the scanner if it should scan "^From " lines or not.
|
|
*
|
|
* If the scanner is scanning from lines, two additional
|
|
* states HSCAN_FROM and HSCAN_FROM_END will be returned
|
|
* to the caller during parsing.
|
|
*
|
|
* This may also be preceeded by an optional
|
|
* HSCAN_PRE_FROM state which contains the scanned data
|
|
* found before the From line is encountered. See also
|
|
* scan_pre_from().
|
|
**/
|
|
void
|
|
camel_mime_parser_scan_from(CamelMimeParser *m, int scan_from)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
s->scan_from = scan_from;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_scan_pre_from:
|
|
* @:
|
|
* @scan_pre_from: #TRUE if we want to get pre-from data.
|
|
*
|
|
* Tell the scanner whether we want to know abou the pre-from
|
|
* data during a scan. If we do, then we may get an additional
|
|
* state HSCAN_PRE_FROM which returns the specified data.
|
|
**/
|
|
void
|
|
camel_mime_parser_scan_pre_from(CamelMimeParser *m, int scan_pre_from)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
s->scan_pre_from = scan_pre_from;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_content_type:
|
|
* @m:
|
|
*
|
|
* Get the content type defined in the current part.
|
|
*
|
|
* Return value: A content_type structure, or NULL if there
|
|
* is no content-type defined for this part of state of the
|
|
* parser.
|
|
**/
|
|
struct _header_content_type *
|
|
camel_mime_parser_content_type(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
/* FIXME: should this search up until it's found the 'right'
|
|
content-type? can it? */
|
|
if (s->parts)
|
|
return s->parts->content_type;
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_unstep:
|
|
* @m:
|
|
*
|
|
* Cause the last step operation to repeat itself. If this is
|
|
* called repeated times, then the same step will be repeated
|
|
* that many times.
|
|
*
|
|
* Note that it is not possible to scan back using this function,
|
|
* only to have a way of peeking the next state.
|
|
**/
|
|
void camel_mime_parser_unstep(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
s->unstep++;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_drop_step:
|
|
* @m:
|
|
*
|
|
* Drop the last step call. This should only be used
|
|
* in conjunction with seeking of the stream as the
|
|
* stream may be in an undefined state relative to the
|
|
* state of the parser.
|
|
*
|
|
* Use this call with care.
|
|
**/
|
|
void camel_mime_parser_drop_step(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
s->unstep = 0;
|
|
folder_scan_drop_step(s);
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_step:
|
|
* @m:
|
|
* @databuffer: Pointer to accept a pointer to the data
|
|
* associated with this step (if any). May be #NULL,
|
|
* in which case datalength is also ingored.
|
|
* @datalength: Pointer to accept a pointer to the data
|
|
* length associated with this step (if any).
|
|
*
|
|
* Parse the next part of the MIME message. If _unstep()
|
|
* has been called, then continue to return the same state
|
|
* for that many calls.
|
|
*
|
|
* If the step is HSCAN_BODY then the databuffer and datalength
|
|
* pointers will be setup to point to the internal data buffer
|
|
* of the scanner and may be processed as required. Any
|
|
* filters will have already been applied to this data.
|
|
*
|
|
* Refer to the state diagram elsewhere for a full listing of
|
|
* the states an application is gauranteed to get from the
|
|
* scanner.
|
|
*
|
|
* Return value: The current new state of the parser
|
|
* is returned.
|
|
**/
|
|
enum _header_state
|
|
camel_mime_parser_step(CamelMimeParser *m, char **databuffer, int *datalength)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
d(printf("OLD STATE: '%s' :\n", states[s->state]));
|
|
|
|
if (s->unstep <= 0) {
|
|
char *dummy;
|
|
int dummylength;
|
|
|
|
if (databuffer == NULL) {
|
|
databuffer = &dummy;
|
|
datalength = &dummylength;
|
|
}
|
|
|
|
folder_scan_step(s, databuffer, datalength);
|
|
} else
|
|
s->unstep--;
|
|
|
|
d(printf("NEW STATE: '%s' :\n", states[s->state]));
|
|
|
|
return s->state;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_read:
|
|
* @m:
|
|
* @databuffer:
|
|
* @len:
|
|
*
|
|
* Read at most @len bytes from the internal mime parser buffer.
|
|
*
|
|
* Returns the address of the internal buffer in @databuffer,
|
|
* and the length of useful data.
|
|
*
|
|
* @len may be specified as INT_MAX, in which case you will
|
|
* get the full remainder of the buffer at each call.
|
|
*
|
|
* Note that no parsing of the data read through this function
|
|
* occurs, so no state changes occur, but the seek position
|
|
* is updated appropriately.
|
|
*
|
|
* Return value: The number of bytes available, or -1 on error.
|
|
**/
|
|
int
|
|
camel_mime_parser_read(CamelMimeParser *m, const char **databuffer, int len)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
int there;
|
|
|
|
if (len == 0)
|
|
return 0;
|
|
|
|
d(printf("parser::read() reading %d bytes\n", len));
|
|
|
|
there = MIN(s->inend - s->inptr, len);
|
|
d(printf("parser::read() there = %d bytes\n", there));
|
|
if (there > 0) {
|
|
*databuffer = s->inptr;
|
|
s->inptr += there;
|
|
return there;
|
|
}
|
|
|
|
if (folder_read(s) == -1)
|
|
return -1;
|
|
|
|
there = MIN(s->inend - s->inptr, len);
|
|
d(printf("parser::read() had to re-read, now there = %d bytes\n", there));
|
|
|
|
*databuffer = s->inptr;
|
|
s->inptr += there;
|
|
|
|
return there;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_tell:
|
|
* @m:
|
|
*
|
|
* Return the current scanning offset. The meaning of this
|
|
* value will depend on the current state of the parser.
|
|
*
|
|
* An incomplete listing of the states:
|
|
*
|
|
* HSCAN_INITIAL, The start of the current message.
|
|
* HSCAN_HEADER, HSCAN_MESSAGE, HSCAN_MULTIPART, the character
|
|
* position immediately after the end of the header.
|
|
* HSCAN_BODY, Position within the message of the start
|
|
* of the current data block.
|
|
* HSCAN_*_END, The position of the character starting
|
|
* the next section of the scan (the last position + 1 of
|
|
* the respective current state).
|
|
*
|
|
* Return value: See above.
|
|
**/
|
|
off_t camel_mime_parser_tell(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
return folder_tell(s);
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_tell_start_headers:
|
|
* @m:
|
|
*
|
|
* Find out the position within the file of where the
|
|
* headers started, this is cached by the parser
|
|
* at the time.
|
|
*
|
|
* Return value: The header start position, or -1 if
|
|
* no headers were scanned in the current state.
|
|
**/
|
|
off_t camel_mime_parser_tell_start_headers(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
return s->start_of_headers;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_tell_start_from:
|
|
* @m:
|
|
*
|
|
* If the parser is scanning From lines, then this returns
|
|
* the position of the start of the From line.
|
|
*
|
|
* Return value: The start of the from line, or -1 if there
|
|
* was no From line, or From lines are not being scanned.
|
|
**/
|
|
off_t camel_mime_parser_tell_start_from(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
|
|
return s->start_of_from;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_seek:
|
|
* @m:
|
|
* @off: Number of bytes to offset the seek by.
|
|
* @whence: SEEK_SET, SEEK_CUR, SEEK_END
|
|
*
|
|
* Reset the source position to a known value.
|
|
*
|
|
* Note that if the source stream/descriptor was not
|
|
* positioned at 0 to begin with, and an absolute seek
|
|
* is specified (whence != SEEK_CUR), then the seek
|
|
* position may not match the desired seek position.
|
|
*
|
|
* Return value: The new seek offset, or -1 on
|
|
* an error (for example, trying to seek on a non-seekable
|
|
* stream or file descriptor).
|
|
**/
|
|
off_t camel_mime_parser_seek(CamelMimeParser *m, off_t off, int whence)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
return folder_seek(s, off, whence);
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_state:
|
|
* @m:
|
|
*
|
|
* Get the current parser state.
|
|
*
|
|
* Return value: The current parser state.
|
|
**/
|
|
enum _header_state camel_mime_parser_state(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
return s->state;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_stream:
|
|
* @m:
|
|
*
|
|
* Get the stream, if any, the parser has been initialised
|
|
* with. May be used to setup sub-streams, but should not
|
|
* be read from directly (without saving and restoring
|
|
* the seek position in between).
|
|
*
|
|
* Return value: The stream from _init_with_stream(), or NULL
|
|
* if the parser is reading from a file descriptor or is
|
|
* uninitialised.
|
|
**/
|
|
CamelStream *camel_mime_parser_stream(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
return s->stream;
|
|
}
|
|
|
|
/**
|
|
* camel_mime_parser_fd:
|
|
* @m:
|
|
*
|
|
* Return the file descriptor, if any, the parser has been
|
|
* initialised with.
|
|
*
|
|
* Should not be read from unless the parser it to terminate,
|
|
* or the seek offset can be reset before the next parse
|
|
* step.
|
|
*
|
|
* Return value: The file descriptor or -1 if the parser
|
|
* is reading from a stream or has not been initialised.
|
|
**/
|
|
int camel_mime_parser_fd(CamelMimeParser *m)
|
|
{
|
|
struct _header_scan_state *s = _PRIVATE(m);
|
|
return s->fd;
|
|
}
|
|
|
|
/* ********************************************************************** */
|
|
/* Implementation */
|
|
/* ********************************************************************** */
|
|
|
|
/* read the next bit of data, ensure there is enough room 'atleast' bytes */
|
|
static int
|
|
folder_read(struct _header_scan_state *s)
|
|
{
|
|
int len;
|
|
int inoffset;
|
|
|
|
if (s->inptr<s->inend-s->atleast)
|
|
return s->inend-s->inptr;
|
|
#ifdef PURIFY
|
|
purify_watch_remove(inend_id);
|
|
purify_watch_remove(inbuffer_id);
|
|
#endif
|
|
/* check for any remaning bytes (under the atleast limit( */
|
|
inoffset = s->inend - s->inptr;
|
|
if (inoffset>0) {
|
|
memcpy(s->inbuf, s->inptr, inoffset);
|
|
}
|
|
if (s->stream) {
|
|
len = camel_stream_read(s->stream, s->inbuf+inoffset, SCAN_BUF-inoffset);
|
|
} else {
|
|
len = read(s->fd, s->inbuf+inoffset, SCAN_BUF-inoffset);
|
|
}
|
|
r(printf("read %d bytes, offset = %d\n", len, inoffset));
|
|
if (len>=0) {
|
|
/* add on the last read block */
|
|
s->seek += s->inptr - s->inbuf;
|
|
s->inptr = s->inbuf;
|
|
s->inend = s->inbuf+len+inoffset;
|
|
r(printf("content = %d '%.*s'\n",s->inend - s->inptr, s->inend - s->inptr, s->inptr));
|
|
}
|
|
|
|
g_assert(s->inptr<=s->inend);
|
|
#ifdef PURIFY
|
|
inend_id = purify_watch(&s->inend);
|
|
inbuffer_id = purify_watch_n(s->inend+1, SCAN_HEAD-1, "rw");
|
|
#endif
|
|
r(printf("content = %d '%.*s'\n", s->inend - s->inptr, s->inend - s->inptr, s->inptr));
|
|
/* set a sentinal, for the inner loops to check against */
|
|
s->inend[0] = '\n';
|
|
return s->inend-s->inptr;
|
|
}
|
|
|
|
/* return the current absolute position of the data pointer */
|
|
static off_t
|
|
folder_tell(struct _header_scan_state *s)
|
|
{
|
|
return s->seek + (s->inptr - s->inbuf);
|
|
}
|
|
|
|
/*
|
|
need some way to prime the parser state, so this actually works for
|
|
other than top-level messages
|
|
*/
|
|
static off_t
|
|
folder_seek(struct _header_scan_state *s, off_t offset, int whence)
|
|
{
|
|
off_t newoffset;
|
|
int len;
|
|
|
|
if (s->stream) {
|
|
if (CAMEL_IS_SEEKABLE_STREAM(s->stream)) {
|
|
/* NOTE: assumes whence seekable stream == whence libc, which is probably
|
|
the case (or bloody well should've been) */
|
|
newoffset = camel_seekable_stream_seek((CamelSeekableStream *)s->stream, offset, whence);
|
|
} else {
|
|
newoffset = -1;
|
|
errno = EINVAL;
|
|
}
|
|
} else {
|
|
newoffset = lseek(s->fd, offset, whence);
|
|
}
|
|
#ifdef PURIFY
|
|
purify_watch_remove(inend_id);
|
|
purify_watch_remove(inbuffer_id);
|
|
#endif
|
|
if (newoffset != -1) {
|
|
s->seek = newoffset;
|
|
s->inptr = s->inbuf;
|
|
s->inend = s->inbuf;
|
|
if (s->stream)
|
|
len = camel_stream_read(s->stream, s->inbuf, SCAN_BUF);
|
|
else
|
|
len = read(s->fd, s->inbuf, SCAN_BUF);
|
|
if (len>=0) {
|
|
s->inend = s->inbuf+len;
|
|
s->inend[0] = '\n';
|
|
} else
|
|
newoffset = -1;
|
|
}
|
|
#ifdef PURIFY
|
|
inend_id = purify_watch(&s->inend);
|
|
inbuffer_id = purify_watch_n(s->inend+1, SCAN_HEAD-1, "rw");
|
|
#endif
|
|
return newoffset;
|
|
}
|
|
|
|
static void
|
|
folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h)
|
|
{
|
|
if (s->parts && s->parts->atleast > h->boundarylenfinal)
|
|
h->atleast = s->parts->atleast;
|
|
else
|
|
h->atleast = MAX(h->boundarylenfinal, 1);
|
|
|
|
h->parent = s->parts;
|
|
s->parts = h;
|
|
}
|
|
|
|
static void
|
|
folder_pull_part(struct _header_scan_state *s)
|
|
{
|
|
struct _header_scan_stack *h;
|
|
|
|
h = s->parts;
|
|
if (h) {
|
|
s->parts = h->parent;
|
|
g_free(h->boundary);
|
|
#ifdef MEMPOOL
|
|
mempool_free(h->pool);
|
|
#else
|
|
header_raw_clear(&h->headers);
|
|
#endif
|
|
header_content_type_unref(h->content_type);
|
|
if (h->pretext)
|
|
g_byte_array_free(h->pretext, TRUE);
|
|
if (h->posttext)
|
|
g_byte_array_free(h->posttext, TRUE);
|
|
if (h->from_line)
|
|
g_byte_array_free(h->from_line, TRUE);
|
|
g_free(h);
|
|
} else {
|
|
g_warning("Header stack underflow!\n");
|
|
}
|
|
}
|
|
|
|
static int
|
|
folder_scan_skip_line(struct _header_scan_state *s, GByteArray *save)
|
|
{
|
|
int atleast = s->atleast;
|
|
register char *inptr, *inend, c;
|
|
int len;
|
|
|
|
s->atleast = 1;
|
|
|
|
while ( (len = folder_read(s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
|
|
inptr = s->inptr;
|
|
inend = s->inend-1;
|
|
|
|
c = -1;
|
|
while (inptr<inend
|
|
&& (c = *inptr++)!='\n')
|
|
;
|
|
|
|
if (save)
|
|
g_byte_array_append(save, s->inptr, inptr-s->inptr);
|
|
|
|
s->inptr = inptr;
|
|
|
|
if (c=='\n') {
|
|
s->atleast = atleast;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
s->atleast = atleast;
|
|
|
|
return -1; /* not found */
|
|
}
|
|
|
|
/* TODO: Is there any way to make this run faster? It gets called a lot ... */
|
|
static struct _header_scan_stack *
|
|
folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone)
|
|
{
|
|
struct _header_scan_stack *part;
|
|
int len = s->atleast; /* make sure we dont access past the buffer */
|
|
|
|
h(printf("checking boundary marker upto %d bytes\n", len));
|
|
part = s->parts;
|
|
while (part) {
|
|
h(printf(" boundary: %s\n", part->boundary));
|
|
h(printf(" against: '%.*s'\n", s->atleast, boundary));
|
|
if (part->boundary
|
|
&& part->boundarylen <= len
|
|
&& memcmp(boundary, part->boundary, part->boundarylen)==0) {
|
|
h(printf("matched boundary: %s\n", part->boundary));
|
|
/* again, make sure we're in range */
|
|
if (part->boundarylenfinal <= len) {
|
|
int extra = part->boundarylenfinal - part->boundarylen;
|
|
|
|
/* check the extra stuff on an final boundary, normally -- for mime parts */
|
|
if (extra>0) {
|
|
*lastone = memcmp(&boundary[part->boundarylen],
|
|
&part->boundary[part->boundarylen],
|
|
extra) == 0;
|
|
} else {
|
|
*lastone = TRUE;
|
|
}
|
|
h(printf("checking lastone = %s\n", *lastone?"TRUE":"FALSE"));
|
|
} else {
|
|
h(printf("not enough room to check last one?\n"));
|
|
*lastone = FALSE;
|
|
}
|
|
/*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
|
|
return part;
|
|
}
|
|
part = part->parent;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef MEMPOOL
|
|
static void
|
|
header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset)
|
|
{
|
|
struct _header_raw *l, *n;
|
|
char *content;
|
|
|
|
content = strchr(header, ':');
|
|
if (content) {
|
|
register int len;
|
|
n = mempool_alloc(h->pool, sizeof(*n));
|
|
n->next = NULL;
|
|
|
|
len = content-header;
|
|
n->name = mempool_alloc(h->pool, len+1);
|
|
memcpy(n->name, header, len);
|
|
n->name[len] = 0;
|
|
|
|
content++;
|
|
|
|
len = s->outptr - content;
|
|
n->value = mempool_alloc(h->pool, len+1);
|
|
memcpy(n->value, content, len);
|
|
n->value[len] = 0;
|
|
|
|
n->offset = offset;
|
|
|
|
l = (struct _header_raw *)&h->headers;
|
|
while (l->next) {
|
|
l = l->next;
|
|
}
|
|
l->next = n;
|
|
}
|
|
|
|
}
|
|
|
|
#define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c))
|
|
|
|
#endif
|
|
|
|
/* Copy the string start->inptr into the header buffer (s->outbuf),
|
|
grow if necessary
|
|
remove trailing \r chars (\n's assumed already removed)
|
|
and track the start offset of the header */
|
|
/* Basically an optimised version of g_byte_array_append() */
|
|
#define header_append(s, start, inptr) \
|
|
{ \
|
|
register int headerlen = inptr-start; \
|
|
\
|
|
if (headerlen > 0) { \
|
|
if (headerlen >= (s->outend - s->outptr)) { \
|
|
register char *outnew; \
|
|
register int len = ((s->outend - s->outbuf)+headerlen)*2+1; \
|
|
outnew = g_realloc(s->outbuf, len); \
|
|
s->outptr = s->outptr - s->outbuf + outnew; \
|
|
s->outbuf = outnew; \
|
|
s->outend = outnew + len; \
|
|
} \
|
|
if (start[headerlen-1] == '\r') \
|
|
headerlen--; \
|
|
memcpy(s->outptr, start, headerlen); \
|
|
s->outptr += headerlen; \
|
|
} \
|
|
if (s->header_start == -1) \
|
|
s->header_start = (start-s->inbuf) + s->seek; \
|
|
}
|
|
|
|
static struct _header_scan_stack *
|
|
folder_scan_header(struct _header_scan_state *s, int *lastone)
|
|
{
|
|
int atleast = s->atleast, newatleast;
|
|
char *start = NULL;
|
|
int len;
|
|
struct _header_scan_stack *h;
|
|
char *inend;
|
|
register char *inptr;
|
|
|
|
h(printf("scanning first bit\n"));
|
|
|
|
h = g_malloc0(sizeof(*h));
|
|
#ifdef MEMPOOL
|
|
h->pool = mempool_new(8192, 4096);
|
|
#endif
|
|
|
|
if (s->parts)
|
|
newatleast = s->parts->atleast;
|
|
else
|
|
newatleast = 1;
|
|
*lastone = FALSE;
|
|
|
|
do {
|
|
s->atleast = newatleast;
|
|
|
|
h(printf("atleast = %d\n", s->atleast));
|
|
|
|
while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
|
|
inptr = s->inptr;
|
|
inend = s->inend-s->atleast+1;
|
|
|
|
while (inptr<inend) {
|
|
if (!s->midline) {
|
|
if (folder_boundary_check(s, inptr, lastone)) {
|
|
if ((s->outptr>s->outbuf))
|
|
goto header_truncated; /* may not actually be truncated */
|
|
|
|
goto header_done;
|
|
}
|
|
}
|
|
|
|
start = inptr;
|
|
|
|
/* goto next line/sentinal */
|
|
while ((*inptr++)!='\n')
|
|
;
|
|
|
|
g_assert(inptr<=s->inend+1);
|
|
|
|
/* check for sentinal or real end of line */
|
|
if (inptr > inend) {
|
|
h(printf("not at end of line yet, going further\n"));
|
|
/* didn't find end of line within our allowed area */
|
|
inptr = inend;
|
|
s->midline = TRUE;
|
|
header_append(s, start, inptr);
|
|
} else {
|
|
h(printf("got line part: '%.*s'\n", inptr-1-start, start));
|
|
/* got a line, strip and add it, process it */
|
|
s->midline = FALSE;
|
|
header_append(s, start, inptr-1);
|
|
|
|
/* check for end of headers */
|
|
if (s->outbuf == s->outptr)
|
|
goto header_done;
|
|
|
|
/* check for continuation/compress headers, we have atleast 1 char here to work with */
|
|
if (inptr[0] == ' ' || inptr[0] == '\t') {
|
|
h(printf("continuation\n"));
|
|
/* TODO: this wont catch multiple space continuation across a read boundary, but
|
|
that is assumed rare, and not fatal anyway */
|
|
do
|
|
inptr++;
|
|
while (*inptr == ' ' || *inptr == '\t');
|
|
inptr--;
|
|
*inptr = ' ';
|
|
} else {
|
|
/* otherwise, complete header, add it */
|
|
s->outptr[0] = 0;
|
|
|
|
h(printf("header '%.20s' at %d\n", s->outbuf, s->header_start));
|
|
|
|
header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
|
|
s->outptr = s->outbuf;
|
|
s->header_start = -1;
|
|
}
|
|
}
|
|
}
|
|
s->inptr = inptr;
|
|
}
|
|
h(printf("end of file? read %d bytes\n", len));
|
|
newatleast = 1;
|
|
} while (s->atleast > 1);
|
|
|
|
if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
|
|
start = s->inptr;
|
|
inptr = s->inend;
|
|
if (inptr > start) {
|
|
if (inptr[-1] == '\n')
|
|
inptr--;
|
|
}
|
|
goto header_truncated;
|
|
}
|
|
|
|
s->atleast = atleast;
|
|
|
|
return h;
|
|
|
|
header_truncated:
|
|
header_append(s, start, inptr);
|
|
|
|
s->outptr[0] = 0;
|
|
if (s->outbuf == s->outptr)
|
|
goto header_done;
|
|
|
|
header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
|
|
|
|
s->outptr = s->outbuf;
|
|
header_done:
|
|
s->inptr = inptr;
|
|
s->atleast = atleast;
|
|
s->header_start = -1;
|
|
return h;
|
|
}
|
|
|
|
static struct _header_scan_stack *
|
|
folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length)
|
|
{
|
|
int atleast = s->atleast, newatleast;
|
|
register char *inptr;
|
|
char *inend;
|
|
char *start;
|
|
int len;
|
|
struct _header_scan_stack *part;
|
|
int onboundary = FALSE;
|
|
|
|
c(printf("scanning content\n"));
|
|
|
|
part = s->parts;
|
|
if (part)
|
|
newatleast = part->atleast;
|
|
else
|
|
newatleast = 1;
|
|
*lastone = FALSE;
|
|
|
|
c(printf("atleast = %d\n", s->atleast));
|
|
|
|
do {
|
|
s->atleast = newatleast;
|
|
|
|
while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
|
|
inptr = s->inptr;
|
|
inend = s->inend-s->atleast+1;
|
|
start = inptr;
|
|
|
|
c(printf("inptr = %p, inend = %p\n", inptr, inend));
|
|
|
|
while (inptr<inend) {
|
|
if (!s->midline
|
|
&& (part = folder_boundary_check(s, inptr, lastone))) {
|
|
onboundary = TRUE;
|
|
|
|
/* since we truncate the boundary data, we need at least 1 char here spare,
|
|
to remain in the same state */
|
|
if ( (inptr-start) > 1)
|
|
goto content;
|
|
|
|
/* otherwise, jump to the state of the boundary we actually found */
|
|
goto normal_exit;
|
|
}
|
|
|
|
/* goto the next line */
|
|
while ((*inptr++)!='\n')
|
|
;
|
|
|
|
/* check the sentinal, if we went past the atleast limit, and reset it to there */
|
|
if (inptr > inend) {
|
|
s->midline = TRUE;
|
|
inptr = inend;
|
|
} else {
|
|
s->midline = FALSE;
|
|
}
|
|
}
|
|
|
|
c(printf("ran out of input, dumping what i have (%d) bytes midline = %s\n",
|
|
inptr-start, s->midline?"TRUE":"FALSE"));
|
|
goto content;
|
|
}
|
|
newatleast = 1;
|
|
} while (s->atleast > 1);
|
|
|
|
c(printf("length read = %d\n", len));
|
|
|
|
if (s->inend > s->inptr) {
|
|
start = s->inptr;
|
|
inptr = s->inend;
|
|
goto content;
|
|
}
|
|
|
|
*length = 0;
|
|
s->atleast = atleast;
|
|
return NULL;
|
|
|
|
content:
|
|
part = s->parts;
|
|
normal_exit:
|
|
s->atleast = atleast;
|
|
s->inptr = inptr;
|
|
|
|
*data = start;
|
|
/* if we hit a boundary, we should not include the closing \n */
|
|
if (onboundary && (inptr-start)>0)
|
|
*length = inptr-start-1;
|
|
else
|
|
*length = inptr-start;
|
|
|
|
/*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/
|
|
|
|
return part;
|
|
}
|
|
|
|
|
|
static void
|
|
folder_scan_close(struct _header_scan_state *s)
|
|
{
|
|
g_free(s->realbuf);
|
|
g_free(s->outbuf);
|
|
while (s->parts)
|
|
folder_pull_part(s);
|
|
if (s->fd != -1)
|
|
close(s->fd);
|
|
if (s->stream) {
|
|
camel_object_unref((CamelObject *)s->stream);
|
|
}
|
|
g_free(s);
|
|
}
|
|
|
|
|
|
static struct _header_scan_state *
|
|
folder_scan_init(void)
|
|
{
|
|
struct _header_scan_state *s;
|
|
|
|
s = g_malloc(sizeof(*s));
|
|
|
|
s->fd = -1;
|
|
s->stream = NULL;
|
|
|
|
s->outbuf = g_malloc(1024);
|
|
s->outptr = s->outbuf;
|
|
s->outend = s->outbuf+1024;
|
|
|
|
s->realbuf = g_malloc(SCAN_BUF + SCAN_HEAD*2);
|
|
s->inbuf = s->realbuf + SCAN_HEAD;
|
|
s->inptr = s->inbuf;
|
|
s->inend = s->inbuf;
|
|
s->atleast = 0;
|
|
|
|
s->seek = 0; /* current character position in file of the last read block */
|
|
s->unstep = 0;
|
|
|
|
s->header_start = -1;
|
|
|
|
s->start_of_from = -1;
|
|
s->start_of_headers = -1;
|
|
|
|
s->midline = FALSE;
|
|
s->scan_from = FALSE;
|
|
s->scan_pre_from = FALSE;
|
|
|
|
s->filters = NULL;
|
|
s->filterid = 1;
|
|
|
|
s->parts = NULL;
|
|
|
|
s->state = HSCAN_INITIAL;
|
|
return s;
|
|
}
|
|
|
|
static int
|
|
folder_scan_init_with_fd(struct _header_scan_state *s, int fd)
|
|
{
|
|
int len;
|
|
|
|
len = read(fd, s->inbuf, SCAN_BUF);
|
|
if (len>=0) {
|
|
s->inend = s->inbuf+len;
|
|
s->inptr = s->inbuf;
|
|
s->inend[0] = '\n';
|
|
if (s->fd != -1)
|
|
close(s->fd);
|
|
s->fd = fd;
|
|
if (s->stream) {
|
|
camel_object_unref((CamelObject *)s->stream);
|
|
s->stream = NULL;
|
|
}
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static int
|
|
folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream)
|
|
{
|
|
int len;
|
|
|
|
len = camel_stream_read(stream, s->inbuf, SCAN_BUF);
|
|
if (len >= 0) {
|
|
s->inend = s->inbuf+len;
|
|
s->inptr = s->inbuf;
|
|
s->inend[0] = '\n';
|
|
if (s->stream)
|
|
camel_object_unref((CamelObject *)s->stream);
|
|
s->stream = stream;
|
|
camel_object_ref((CamelObject *)stream);
|
|
if (s->fd != -1) {
|
|
close(s->fd);
|
|
s->fd = -1;
|
|
}
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
#define USE_FROM
|
|
|
|
static void
|
|
folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength)
|
|
{
|
|
struct _header_scan_stack *h, *hb;
|
|
const char *content;
|
|
const char *bound;
|
|
int type;
|
|
int state;
|
|
struct _header_content_type *ct = NULL;
|
|
struct _header_scan_filter *f;
|
|
size_t presize;
|
|
|
|
/* printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/
|
|
|
|
tail_recurse:
|
|
d({
|
|
printf("\nSCAN STACK:\n");
|
|
printf(" '%s' :\n", states[s->state]);
|
|
hb = s->parts;
|
|
while (hb) {
|
|
printf(" '%s' : %s ", states[hb->savestate], hb->boundary);
|
|
if (hb->content_type) {
|
|
printf("(%s/%s)", hb->content_type->type, hb->content_type->subtype);
|
|
} else {
|
|
printf("(default)");
|
|
}
|
|
printf("\n");
|
|
hb = hb->parent;
|
|
}
|
|
printf("\n");
|
|
});
|
|
|
|
switch (s->state) {
|
|
|
|
#ifdef USE_FROM
|
|
case HSCAN_INITIAL:
|
|
if (s->scan_from) {
|
|
h = g_malloc0(sizeof(*h));
|
|
h->boundary = g_strdup("From ");
|
|
h->boundarylen = strlen(h->boundary);
|
|
h->boundarylenfinal = h->boundarylen;
|
|
h->from_line = g_byte_array_new();
|
|
folder_push_part(s, h);
|
|
s->state = HSCAN_PRE_FROM;
|
|
} else {
|
|
s->start_of_from = -1;
|
|
goto scan_header;
|
|
}
|
|
|
|
case HSCAN_PRE_FROM:
|
|
|
|
h = s->parts;
|
|
do {
|
|
hb = folder_scan_content(s, &state, databuffer, datalength);
|
|
if (s->scan_pre_from && *datalength > 0) {
|
|
d(printf("got pre-from content %d bytes\n", *datalength));
|
|
return;
|
|
}
|
|
} while (hb==h && *datalength>0);
|
|
|
|
if (*datalength==0 && hb==h) {
|
|
d(printf("found 'From '\n"));
|
|
s->start_of_from = folder_tell(s);
|
|
folder_scan_skip_line(s, h->from_line);
|
|
h->savestate = HSCAN_INITIAL;
|
|
s->state = HSCAN_FROM;
|
|
} else {
|
|
folder_pull_part(s);
|
|
s->state = HSCAN_EOF;
|
|
}
|
|
return;
|
|
#else
|
|
case HSCAN_INITIAL:
|
|
case HSCAN_PRE_FROM:
|
|
#endif /* !USE_FROM */
|
|
|
|
scan_header:
|
|
case HSCAN_FROM:
|
|
s->start_of_headers = folder_tell(s);
|
|
h = folder_scan_header(s, &state);
|
|
#ifdef USE_FROM
|
|
if (s->scan_from)
|
|
h->savestate = HSCAN_FROM_END;
|
|
else
|
|
#endif
|
|
h->savestate = HSCAN_EOF;
|
|
|
|
/* FIXME: should this check for MIME-Version: 1.0 as well? */
|
|
|
|
type = HSCAN_HEADER;
|
|
if ( (content = header_raw_find(&h->headers, "Content-Type", NULL))
|
|
&& (ct = header_content_type_decode(content))) {
|
|
if (!strcasecmp(ct->type, "multipart")) {
|
|
bound = header_content_type_param(ct, "boundary");
|
|
if (bound) {
|
|
d(printf("multipart, boundary = %s\n", bound));
|
|
h->boundarylen = strlen(bound)+2;
|
|
h->boundarylenfinal = h->boundarylen+2;
|
|
h->boundary = g_malloc(h->boundarylen+3);
|
|
sprintf(h->boundary, "--%s--", bound);
|
|
type = HSCAN_MULTIPART;
|
|
} else {
|
|
header_content_type_unref(ct);
|
|
ct = header_content_type_decode("text/plain");
|
|
/* We can't quite do this, as it will mess up all the offsets ... */
|
|
/* header_raw_replace(&h->headers, "Content-Type", "text/plain", offset);*/
|
|
g_warning("Multipart with no boundary, treating as text/plain");
|
|
}
|
|
} else if (!strcasecmp(ct->type, "message")) {
|
|
if (!strcasecmp(ct->subtype, "rfc822")
|
|
|| !strcasecmp(ct->subtype, "news")
|
|
/*|| !strcasecmp(ct->subtype, "partial")*/) {
|
|
type = HSCAN_MESSAGE;
|
|
}
|
|
}
|
|
} else {
|
|
/* make the default type for multipart/digest be message/rfc822 */
|
|
if ((s->parts
|
|
&& header_content_type_is(s->parts->content_type, "multipart", "digest"))) {
|
|
ct = header_content_type_decode("message/rfc822");
|
|
type = HSCAN_MESSAGE;
|
|
d(printf("parent was multipart/digest, autoupgrading to message/rfc822?\n"));
|
|
/* maybe we should do this too?
|
|
header_raw_append_parse(&h->headers, "Content-Type: message/rfc822", -1);*/
|
|
}
|
|
}
|
|
h->content_type = ct;
|
|
folder_push_part(s, h);
|
|
s->state = type;
|
|
return;
|
|
|
|
case HSCAN_HEADER:
|
|
s->state = HSCAN_BODY;
|
|
|
|
case HSCAN_BODY:
|
|
h = s->parts;
|
|
*datalength = 0;
|
|
presize = SCAN_HEAD;
|
|
f = s->filters;
|
|
|
|
do {
|
|
hb = folder_scan_content (s, &state, databuffer, datalength);
|
|
|
|
d(printf ("\n\nOriginal content: '"));
|
|
d(fwrite(*databuffer, sizeof(char), *datalength, stdout));
|
|
d(printf("'\n"));
|
|
|
|
if (*datalength > 0) {
|
|
while (f) {
|
|
camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
|
|
databuffer, datalength, &presize);
|
|
d(printf ("Filtered content (%s): '",
|
|
camel_type_to_name(((CamelObject *)f->filter)->s.type)));
|
|
d(fwrite(*databuffer, sizeof(char), *datalength, stdout));
|
|
d(printf("'\n"));
|
|
f = f->next;
|
|
}
|
|
return;
|
|
}
|
|
} while (hb == h && *datalength > 0);
|
|
|
|
/* check for any filter completion data */
|
|
while (f) {
|
|
camel_mime_filter_complete(f->filter, *databuffer, *datalength, presize,
|
|
databuffer, datalength, &presize);
|
|
f = f->next;
|
|
}
|
|
|
|
if (*datalength > 0)
|
|
return;
|
|
|
|
s->state = HSCAN_BODY_END;
|
|
break;
|
|
|
|
case HSCAN_MULTIPART:
|
|
h = s->parts;
|
|
do {
|
|
do {
|
|
hb = folder_scan_content(s, &state, databuffer, datalength);
|
|
if (*datalength>0) {
|
|
/* instead of a new state, we'll just store it locally and provide
|
|
an accessor function */
|
|
d(printf("Multipart %s Content %p: '%.*s'\n",
|
|
h->prestage>0?"post":"pre", h, *datalength, *databuffer));
|
|
if (h->prestage > 0) {
|
|
if (h->posttext == NULL)
|
|
h->posttext = g_byte_array_new();
|
|
g_byte_array_append(h->posttext, *databuffer, *datalength);
|
|
} else {
|
|
if (h->pretext == NULL)
|
|
h->pretext = g_byte_array_new();
|
|
g_byte_array_append(h->pretext, *databuffer, *datalength);
|
|
}
|
|
}
|
|
} while (hb==h && *datalength>0);
|
|
h->prestage++;
|
|
if (*datalength==0 && hb==h) {
|
|
d(printf("got boundary: %s\n", hb->boundary));
|
|
folder_scan_skip_line(s, NULL);
|
|
if (!state) {
|
|
s->state = HSCAN_FROM;
|
|
folder_scan_step(s, databuffer, datalength);
|
|
s->parts->savestate = HSCAN_MULTIPART; /* set return state for the new head part */
|
|
return;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
} while (1);
|
|
|
|
s->state = HSCAN_MULTIPART_END;
|
|
break;
|
|
|
|
case HSCAN_MESSAGE:
|
|
s->state = HSCAN_FROM;
|
|
folder_scan_step(s, databuffer, datalength);
|
|
s->parts->savestate = HSCAN_MESSAGE_END;
|
|
break;
|
|
|
|
case HSCAN_FROM_END:
|
|
case HSCAN_BODY_END:
|
|
case HSCAN_MULTIPART_END:
|
|
case HSCAN_MESSAGE_END:
|
|
s->state = s->parts->savestate;
|
|
folder_pull_part(s);
|
|
if (s->state & HSCAN_END)
|
|
return;
|
|
goto tail_recurse;
|
|
|
|
case HSCAN_EOF:
|
|
return;
|
|
|
|
default:
|
|
g_warning("Invalid state in camel-mime-parser: %d", s->state);
|
|
break;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/* drops the current state back one */
|
|
static void
|
|
folder_scan_drop_step(struct _header_scan_state *s)
|
|
{
|
|
switch (s->state) {
|
|
case HSCAN_INITIAL:
|
|
case HSCAN_EOF:
|
|
return;
|
|
|
|
case HSCAN_FROM:
|
|
case HSCAN_PRE_FROM:
|
|
s->state = HSCAN_INITIAL;
|
|
folder_pull_part(s);
|
|
return;
|
|
|
|
case HSCAN_MESSAGE:
|
|
case HSCAN_HEADER:
|
|
case HSCAN_MULTIPART:
|
|
|
|
case HSCAN_FROM_END:
|
|
case HSCAN_BODY_END:
|
|
case HSCAN_MULTIPART_END:
|
|
case HSCAN_MESSAGE_END:
|
|
|
|
s->state = s->parts->savestate;
|
|
folder_pull_part(s);
|
|
if (s->state & HSCAN_END) {
|
|
s->state &= ~HSCAN_END;
|
|
}
|
|
return;
|
|
default:
|
|
/* FIXME: not sure if this is entirely right */
|
|
}
|
|
}
|
|
|
|
#ifdef STANDALONE
|
|
int main(int argc, char **argv)
|
|
{
|
|
int fd;
|
|
struct _header_scan_state *s;
|
|
char *data;
|
|
int len;
|
|
int state;
|
|
char *name = "/tmp/evmail/Inbox";
|
|
struct _header_scan_stack *h;
|
|
int i;
|
|
int attach = 0;
|
|
|
|
if (argc==2)
|
|
name = argv[1];
|
|
|
|
printf("opening: %s", name);
|
|
|
|
for (i=1;i<argc;i++) {
|
|
const char *encoding = NULL, *charset = NULL;
|
|
char *attachname;
|
|
|
|
name = argv[i];
|
|
printf("opening: %s", name);
|
|
|
|
fd = open(name, O_RDONLY);
|
|
if (fd==-1) {
|
|
perror("Cannot open mailbox");
|
|
exit(1);
|
|
}
|
|
s = folder_scan_init();
|
|
folder_scan_init_with_fd(s, fd);
|
|
s->scan_from = FALSE;
|
|
#if 0
|
|
h = g_malloc0(sizeof(*h));
|
|
h->savestate = HSCAN_EOF;
|
|
folder_push_part(s, h);
|
|
#endif
|
|
while (s->state != HSCAN_EOF) {
|
|
folder_scan_step(s, &data, &len);
|
|
printf("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
|
|
switch (s->state) {
|
|
case HSCAN_HEADER:
|
|
if (s->parts->content_type
|
|
&& (charset = header_content_type_param(s->parts->content_type, "charset"))) {
|
|
if (strcasecmp(charset, "us-ascii")) {
|
|
#if 0
|
|
folder_push_filter_charset(s, "UTF-8", charset);
|
|
#endif
|
|
} else {
|
|
charset = NULL;
|
|
}
|
|
} else {
|
|
charset = NULL;
|
|
}
|
|
|
|
encoding = header_raw_find(&s->parts->headers, "Content-transfer-encoding", 0);
|
|
printf("encoding = '%s'\n", encoding);
|
|
if (encoding && !strncasecmp(encoding, " base64", 7)) {
|
|
printf("adding base64 filter\n");
|
|
attachname = g_strdup_printf("attach.%d.%d", i, attach++);
|
|
#if 0
|
|
folder_push_filter_save(s, attachname);
|
|
#endif
|
|
g_free(attachname);
|
|
#if 0
|
|
folder_push_filter_mime(s, 0);
|
|
#endif
|
|
}
|
|
if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) {
|
|
printf("adding quoted-printable filter\n");
|
|
attachname = g_strdup_printf("attach.%d.%d", i, attach++);
|
|
#if 0
|
|
folder_push_filter_save(s, attachname);
|
|
#endif
|
|
g_free(attachname);
|
|
#if 0
|
|
folder_push_filter_mime(s, 1);
|
|
#endif
|
|
}
|
|
|
|
break;
|
|
case HSCAN_BODY:
|
|
printf("got body %d '%.*s'\n", len, len, data);
|
|
break;
|
|
case HSCAN_BODY_END:
|
|
printf("end body %d '%.*s'\n", len, len, data);
|
|
if (encoding && !strncasecmp(encoding, " base64", 7)) {
|
|
printf("removing filters\n");
|
|
#if 0
|
|
folder_filter_pull(s);
|
|
folder_filter_pull(s);
|
|
#endif
|
|
}
|
|
if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) {
|
|
printf("removing filters\n");
|
|
#if 0
|
|
folder_filter_pull(s);
|
|
folder_filter_pull(s);
|
|
#endif
|
|
}
|
|
if (charset) {
|
|
#if 0
|
|
folder_filter_pull(s);
|
|
#endif
|
|
charset = NULL;
|
|
}
|
|
encoding = NULL;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
folder_scan_close(s);
|
|
close(fd);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif /* STANDALONE */
|
|
|