/* * This file is part of DisOrder * Copyright (C) 2005, 2007, 2008 Richard Kettlewell * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** @file lib/mime.c * @brief Support for MIME and allied protocols */ #include "common.h" #include #include "mem.h" #include "mime.h" #include "vector.h" #include "hex.h" #include "log.h" #include "base64.h" #include "kvp.h" /** @brief Match whitespace characters */ static int whitespace(int c) { switch(c) { case ' ': case '\t': case '\r': case '\n': return 1; default: return 0; } } /** @brief Match RFC2045 tspecial characters */ int mime_tspecial(int c) { switch(c) { case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '"': case '/': case '[': case ']': case '?': case '=': return 1; default: return 0; } } /** @brief Match RFC2616 separator characters */ int mime_http_separator(int c) { switch(c) { case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '"': case '/': case '[': case ']': case '?': case '=': case '{': case '}': case ' ': case '\t': return 1; default: return 0; } } /** @brief Match CRLF */ static int iscrlf(const char *ptr) { return ptr[0] == '\r' && ptr[1] == '\n'; } /** @brief Skip whitespace * @param s Pointer into string * @param rfc822_comments If true, skip RFC822 nested comments * @return Pointer into string after whitespace */ static const char *skipwhite(const char *s, int rfc822_comments) { int c, depth; for(;;) { switch(c = *s) { case ' ': case '\t': case '\r': case '\n': ++s; break; case '(': if(!rfc822_comments) return s; ++s; depth = 1; while(*s && depth) { c = *s++; switch(c) { case '(': ++depth; break; case ')': --depth; break; case '\\': if(!*s) return 0; ++s; break; } } if(depth) return 0; break; default: return s; } } } /** @brief Test for a word character * @param c Character to test * @param special mime_tspecial() (MIME/RFC2405) or mime_http_separator() (HTTP/RFC2616) * @return 1 if @p c is a word character, else 0 */ static int iswordchar(int c, int (*special)(int)) { return !(c <= ' ' || c > '~' || special(c)); } /** @brief Parse an RFC1521/RFC2616 word * @param s Pointer to start of word * @param valuep Where to store value * @param special mime_tspecial() (MIME/RFC2405) or mime_http_separator() (HTTP/RFC2616) * @return Pointer just after end of word or NULL if there's no word * * A word is a token or a quoted-string. */ const char *mime_parse_word(const char *s, char **valuep, int (*special)(int)) { struct dynstr value[1]; int c; dynstr_init(value); if(*s == '"') { ++s; while((c = *s++) != '"') { switch(c) { case '\\': if(!(c = *s++)) return 0; default: dynstr_append(value, c); break; } } if(!c) return 0; } else { if(!iswordchar((unsigned char)*s, special)) return NULL; dynstr_init(value); while(iswordchar((unsigned char)*s, special)) dynstr_append(value, *s++); } dynstr_terminate(value); *valuep = value->vec; return s; } /** @brief Parse an RFC1521/RFC2616 token * @param s Pointer to start of token * @param valuep Where to store value * @param special mime_tspecial() (MIME/RFC2405) or mime_http_separator() (HTTP/RFC2616) * @return Pointer just after end of token or NULL if there's no token */ static const char *parsetoken(const char *s, char **valuep, int (*special)(int)) { if(*s == '"') return 0; return mime_parse_word(s, valuep, special); } /** @brief Parse a MIME content-type field * @param s Start of field * @param typep Where to store type * @param parametersp Where to store parameter list * @return 0 on success, non-0 on error * * See RFC 2045 s5. */ int mime_content_type(const char *s, char **typep, struct kvp **parametersp) { struct dynstr type, parametername; struct kvp *parameters = 0; char *parametervalue; dynstr_init(&type); if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '/') return -1; dynstr_append(&type, '/'); if(!(s = skipwhite(s, 1))) return -1; while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); if(!(s = skipwhite(s, 1))) return -1; while(*s == ';') { dynstr_init(¶metername); ++s; if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; if(!(s = skipwhite(s, 1))) return -1; if(!(s = mime_parse_word(s, ¶metervalue, mime_tspecial))) return -1; if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); kvp_set(¶meters, parametername.vec, parametervalue); } dynstr_terminate(&type); *typep = type.vec; *parametersp = parameters; return 0; } /** @brief Parse a MIME message * @param s Start of message * @param callback Called for each header field * @param u Passed to callback * @return Pointer to decoded body (might be in original string), or NULL on error * * This does an RFC 822-style parse and honors Content-Transfer-Encoding as * described in RFC 2045 * s6. @p callback is called for each header field encountered, in order, * with ASCII characters in the header name forced to lower case. */ const char *mime_parse(const char *s, int (*callback)(const char *name, const char *value, void *u), void *u) { struct dynstr name, value; char *cte = 0, *p; while(*s && !iscrlf(s)) { dynstr_init(&name); dynstr_init(&value); while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&name, tolower((unsigned char)*s++)); if(!(s = skipwhite(s, 1))) return 0; if(*s != ':') return 0; ++s; while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) { const int c = *s++; /* Strip leading whitespace */ if(value.nvec || !(c == ' ' || c == '\t' || c == '\n' || c == '\r')) dynstr_append(&value, c); } /* Strip trailing whitespace */ while(value.nvec > 0 && (value.vec[value.nvec - 1] == ' ' || value.vec[value.nvec - 1] == '\t' || value.vec[value.nvec - 1] == '\n' || value.vec[value.nvec - 1] == '\r')) --value.nvec; if(*s) ++s; dynstr_terminate(&name); dynstr_terminate(&value); if(!strcmp(name.vec, "content-transfer-encoding")) { cte = xstrdup(value.vec); for(p = cte; *p; p++) *p = tolower((unsigned char)*p); } if(callback(name.vec, value.vec, u)) return 0; } if(*s) s += 2; if(cte) { if(!strcmp(cte, "base64")) return mime_base64(s, 0); if(!strcmp(cte, "quoted-printable")) return mime_qp(s); if(!strcmp(cte, "7bit") || !strcmp(cte, "8bit")) return s; disorder_error(0, "unknown content-transfer-encoding '%s'", cte); return 0; } return s; } /** @brief Match the boundary string */ static int isboundary(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' && !strncmp(ptr + 2, boundary, bl) && (iscrlf(ptr + bl + 2) || (ptr[bl + 2] == '-' && ptr[bl + 3] == '-' && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)))); } /** @brief Match the final boundary string */ static int isfinal(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' && !strncmp(ptr + 2, boundary, bl) && ptr[bl + 2] == '-' && ptr[bl + 3] == '-' && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)); } /** @brief Parse a multipart MIME body * @param s Start of message * @param callback Callback for each part * @param boundary Boundary string * @param u Passed to callback * @return 0 on success, non-0 on error * * See RFC 2046 * s5.1. @p callback is called for each part (not yet decoded in any way) * in succession; you should probably call mime_parse() for each part. */ int mime_multipart(const char *s, int (*callback)(const char *s, void *u), const char *boundary, void *u) { size_t bl = strlen(boundary); const char *start, *e; int ret; /* We must start with a boundary string */ if(!isboundary(s, boundary, bl)) { disorder_error(0, "mime_multipart: first line is not the boundary string"); return -1; } /* Keep going until we hit a final boundary */ while(!isfinal(s, boundary, bl)) { s = strstr(s, "\r\n") + 2; start = s; while(!isboundary(s, boundary, bl)) { if(!(e = strstr(s, "\r\n"))) { disorder_error(0, "mime_multipart: line does not end CRLF"); return -1; } s = e + 2; } if((ret = callback(xstrndup(start, s == start ? 0 : s - start - 2), u))) return ret; } return 0; } /** @brief Parse an RFC2388-style content-disposition field * @param s Start of field * @param dispositionp Where to store disposition * @param parameternamep Where to store parameter name * @param parametervaluep Wher to store parameter value * @return 0 on success, non-0 on error * * See RFC 2388 s3 * and RFC 2183. */ int mime_rfc2388_content_disposition(const char *s, char **dispositionp, char **parameternamep, char **parametervaluep) { struct dynstr disposition, parametername; dynstr_init(&disposition); if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&disposition, tolower((unsigned char)*s++)); if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; if(!(s = skipwhite(s, 1))) return -1; if(!(s = mime_parse_word(s, parametervaluep, mime_tspecial))) return -1; if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else *parametervaluep = *parameternamep = 0; dynstr_terminate(&disposition); *dispositionp = disposition.vec; return 0; } /** @brief Convert MIME quoted-printable * @param s Quoted-printable data * @return Decoded data * * See RFC 2045 * s6.7. */ char *mime_qp(const char *s) { struct dynstr d; int c, a, b; const char *t; dynstr_init(&d); while((c = *s++)) { switch(c) { case '=': if((a = unhexdigitq(s[0])) != -1 && (b = unhexdigitq(s[1])) != -1) { dynstr_append(&d, a * 16 + b); s += 2; } else { t = s; while(*t == ' ' || *t == '\t') ++t; if(iscrlf(t)) { /* soft line break */ s = t + 2; } else return 0; } break; case ' ': case '\t': t = s; while(*t == ' ' || *t == '\t') ++t; if(iscrlf(t)) /* trailing space is always eliminated */ s = t; else dynstr_append(&d, c); break; default: dynstr_append(&d, c); break; } } dynstr_terminate(&d); return d.vec; } /** @brief Match cookie separator characters * * This is a subset of the RFC2616 specials, and technically is in breach of * the specification. However rejecting (in particular) slashes is * unreasonably strict and has broken at least one (admittedly somewhat * obscure) browser, so we're more forgiving. */ static int cookie_separator(int c) { switch(c) { case '(': case ')': case ',': case ';': case '=': case ' ': case '"': case '\t': return 1; default: return 0; } } /** @brief Match cookie value separator characters * * Same as cookie_separator() but allows for @c = in cookie values. */ static int cookie_value_separator(int c) { switch(c) { case '(': case ')': case ',': case ';': case ' ': case '"': case '\t': return 1; default: return 0; } } /** @brief Parse a RFC2109 Cookie: header * @param s Header field value * @param cd Where to store result * @return 0 on success, non-0 on error * * See RFC 2109. */ int parse_cookie(const char *s, struct cookiedata *cd) { char *n = 0, *v = 0; memset(cd, 0, sizeof *cd); s = skipwhite(s, 0); while(*s) { /* Skip separators */ if(*s == ';' || *s == ',') { ++s; s = skipwhite(s, 0); continue; } if(!(s = parsetoken(s, &n, cookie_separator))) { disorder_error(0, "parse_cookie: cannot parse attribute name"); return -1; } s = skipwhite(s, 0); if(*s++ != '=') { disorder_error(0, "parse_cookie: did not find expected '='"); return -1; } s = skipwhite(s, 0); if(!(s = mime_parse_word(s, &v, cookie_value_separator))) { disorder_error(0, "parse_cookie: cannot parse value for '%s'", n); return -1; } if(n[0] == '$') { /* Some bit of meta-information */ if(!strcmp(n, "$Version")) cd->version = v; else if(!strcmp(n, "$Path")) { if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) cd->cookies[cd->ncookies-1].path = v; else { disorder_error(0, "redundant $Path in Cookie: header"); return -1; } } else if(!strcmp(n, "$Domain")) { if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) cd->cookies[cd->ncookies-1].domain = v; else { disorder_error(0, "redundant $Domain in Cookie: header"); return -1; } } } else { /* It's a new cookie */ cd->cookies = xrealloc(cd->cookies, (cd->ncookies + 1) * sizeof (struct cookie)); cd->cookies[cd->ncookies].name = n; cd->cookies[cd->ncookies].value = v; cd->cookies[cd->ncookies].path = 0; cd->cookies[cd->ncookies].domain = 0; ++cd->ncookies; } s = skipwhite(s, 0); if(*s && (*s != ',' && *s != ';')) { disorder_error(0, "missing separator in Cookie: header"); return -1; } } return 0; } /** @brief Find a named cookie * @param cd Parse cookie data * @param name Name of cookie * @return Cookie structure or NULL if not found */ const struct cookie *find_cookie(const struct cookiedata *cd, const char *name) { int n; for(n = 0; n < cd->ncookies; ++n) if(!strcmp(cd->cookies[n].name, name)) return &cd->cookies[n]; return 0; } /** @brief RFC822 quoting * @param s String to quote * @param force If non-0, always quote * @return Possibly quoted string */ char *quote822(const char *s, int force) { const char *t; struct dynstr d[1]; int c; if(!force) { /* See if we need to quote */ for(t = s; (c = (unsigned char)*t); ++t) { if(mime_tspecial(c) || mime_http_separator(c) || whitespace(c)) break; } if(*t) force = 1; } if(!force) return xstrdup(s); dynstr_init(d); dynstr_append(d, '"'); for(t = s; (c = (unsigned char)*t); ++t) { if(c == '"' || c == '\\') dynstr_append(d, '\\'); dynstr_append(d, c); } dynstr_append(d, '"'); dynstr_terminate(d); return d->vec; } /** @brief Return true if @p ptr points at trailing space */ static int is_trailing_space(const char *ptr) { if(*ptr == ' ' || *ptr == '\t') { while(*ptr == ' ' || *ptr == '\t') ++ptr; return *ptr == '\n' || *ptr == 0; } else return 0; } /** @brief Encoding text as quoted-printable * @param text String to encode * @return Encoded string * * See RFC2045 * s6.7. */ char *mime_to_qp(const char *text) { struct dynstr d[1]; int linelength = 0; /* length of current line */ char buffer[10]; dynstr_init(d); /* The rules are: * 1. Anything except newline can be replaced with =%02X * 2. Newline, 33-60 and 62-126 stand for themselves (i.e. not '=') * 3. Non-trailing space/tab stand for themselves. * 4. Output lines are limited to 76 chars, with = being used * as a soft line break * 5. Newlines aren't counted towards the 76 char limit. */ while(*text) { const int c = (unsigned char)*text; if(c == '\n') { /* Newline stands as itself */ dynstr_append(d, '\n'); linelength = 0; } else if((c >= 33 && c <= 126 && c != '=') || ((c == ' ' || c == '\t') && !is_trailing_space(text))) { /* Things that can stand for themselves */ dynstr_append(d, c); ++linelength; } else { /* Anything else that needs encoding */ snprintf(buffer, sizeof buffer, "=%02X", c); dynstr_append_string(d, buffer); linelength += 3; } ++text; if(linelength > 73 && *text && *text != '\n') { /* Next character might overflow 76 character limit if encoded, so we * insert a soft break */ dynstr_append_string(d, "=\n"); linelength = 0; } } /* Ensure there is a final newline */ if(linelength) dynstr_append(d, '\n'); /* That's all */ dynstr_terminate(d); return d->vec; } /** @brief Encode text * @param text Underlying UTF-8 text * @param charsetp Where to store charset string * @param encodingp Where to store encoding string * @return Encoded text (might be @p text) */ const char *mime_encode_text(const char *text, const char **charsetp, const char **encodingp) { const char *ptr; /* See if there are in fact any non-ASCII characters */ for(ptr = text; *ptr; ++ptr) if((unsigned char)*ptr >= 128) break; if(!*ptr) { /* Plain old ASCII, no encoding required */ *charsetp = "us-ascii"; *encodingp = "7bit"; return text; } *charsetp = "utf-8"; *encodingp = "quoted-printable"; return mime_to_qp(text); } /* Local Variables: c-basic-offset:2 comment-column:40 fill-column:79 End: */