libkdenetwork Library API Documentation

kmime_header_parsing.cpp

00001 /* -*- c++ -*- 00002 kmime_header_parsing.cpp 00003 00004 This file is part of KMime, the KDE internet mail/usenet news message library. 00005 Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> 00006 00007 KMime is free software; you can redistribute it and/or modify it 00008 under the terms of the GNU General Public License, version 2, as 00009 published by the Free Software Foundation. 00010 00011 KMime is distributed in the hope that it will be useful, but 00012 WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this library; if not, write to the Free Software 00018 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 00020 In addition, as a special exception, the copyright holders give 00021 permission to link the code of this library with any edition of 00022 the Qt library by Trolltech AS, Norway (or with modified versions 00023 of Qt that use the same license as Qt), and distribute linked 00024 combinations including the two. You must obey the GNU General 00025 Public License in all respects for all of the code used other than 00026 Qt. If you modify this file, you may extend this exception to 00027 your version of the file, but you are not obligated to do so. If 00028 you do not wish to do so, delete this exception statement from 00029 your version. 00030 */ 00031 00032 #include <config.h> 00033 #include "kmime_header_parsing.h" 00034 00035 #include "kmime_codecs.h" 00036 #include "kmime_util.h" 00037 #include "kmime_warning.h" 00038 00039 #include <kglobal.h> 00040 #include <kcharsets.h> 00041 00042 #include <qtextcodec.h> 00043 #include <qmap.h> 00044 #include <qcstring.h> 00045 #include <qstringlist.h> 00046 00047 #include <ctype.h> // for isdigit 00048 #include <cassert> 00049 00050 using namespace KMime; 00051 using namespace KMime::Types; 00052 00053 namespace KMime { 00054 00055 namespace Types { 00056 00057 QString AddrSpec::asString() const { 00058 bool needsQuotes = false; 00059 QString result; 00060 for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) { 00061 const char ch = localPart[i].latin1(); 00062 if ( ch == '.' || isAText( ch ) ) 00063 result += ch; 00064 else { 00065 needsQuotes = true; 00066 if ( ch == '\\' || ch == '"' ) 00067 result += '\\'; 00068 result += ch; 00069 } 00070 } 00071 if ( needsQuotes ) 00072 return '"' + result + "\"@" + domain; 00073 else 00074 return result + '@' + domain; 00075 } 00076 00077 } 00078 00079 namespace HeaderParsing { 00080 00081 // parse the encoded-word (scursor points to after the initial '=') 00082 bool parseEncodedWord( const char* & scursor, const char * const send, 00083 QString & result, QCString & language ) { 00084 00085 // make sure the caller already did a bit of the work. 00086 assert( *(scursor-1) == '=' ); 00087 00088 // 00089 // STEP 1: 00090 // scan for the charset/language portion of the encoded-word 00091 // 00092 00093 char ch = *scursor++; 00094 00095 if ( ch != '?' ) { 00096 kdDebug() << "first" << endl; 00097 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00098 return false; 00099 } 00100 00101 // remember start of charset (ie. just after the initial "=?") and 00102 // language (just after the first '*') fields: 00103 const char * charsetStart = scursor; 00104 const char * languageStart = 0; 00105 00106 // find delimiting '?' (and the '*' separating charset and language 00107 // tags, if any): 00108 for ( ; scursor != send ; scursor++ ) 00109 if ( *scursor == '?') 00110 break; 00111 else if ( *scursor == '*' && !languageStart ) 00112 languageStart = scursor + 1; 00113 00114 // not found? can't be an encoded-word! 00115 if ( scursor == send || *scursor != '?' ) { 00116 kdDebug() << "second" << endl; 00117 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00118 return false; 00119 } 00120 00121 // extract the language information, if any (if languageStart is 0, 00122 // language will be null, too): 00123 QCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/); 00124 // extract charset information (keep in mind: the size given to the 00125 // ctor is one off due to the \0 terminator): 00126 QCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart ); 00127 00128 // 00129 // STEP 2: 00130 // scan for the encoding portion of the encoded-word 00131 // 00132 00133 00134 // remember start of encoding (just _after_ the second '?'): 00135 scursor++; 00136 const char * encodingStart = scursor; 00137 00138 // find next '?' (ending the encoding tag): 00139 for ( ; scursor != send ; scursor++ ) 00140 if ( *scursor == '?' ) break; 00141 00142 // not found? Can't be an encoded-word! 00143 if ( scursor == send || *scursor != '?' ) { 00144 kdDebug() << "third" << endl; 00145 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00146 return false; 00147 } 00148 00149 // extract the encoding information: 00150 QCString maybeEncoding( encodingStart, scursor - encodingStart + 1 ); 00151 00152 00153 kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset 00154 << "\"; language == \"" << maybeLanguage 00155 << "\"; encoding == \"" << maybeEncoding << "\"" << endl; 00156 00157 // 00158 // STEP 3: 00159 // scan for encoded-text portion of encoded-word 00160 // 00161 00162 00163 // remember start of encoded-text (just after the third '?'): 00164 scursor++; 00165 const char * encodedTextStart = scursor; 00166 00167 // find next '?' (ending the encoded-text): 00168 for ( ; scursor != send ; scursor++ ) 00169 if ( *scursor == '?' ) break; 00170 00171 // not found? Can't be an encoded-word! 00172 // ### maybe evaluate it nonetheless if the rest is OK? 00173 if ( scursor == send || *scursor != '?' ) { 00174 kdDebug() << "fourth" << endl; 00175 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00176 return false; 00177 } 00178 scursor++; 00179 // check for trailing '=': 00180 if ( scursor == send || *scursor != '=' ) { 00181 kdDebug() << "fifth" << endl; 00182 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00183 return false; 00184 } 00185 scursor++; 00186 00187 // set end sentinel for encoded-text: 00188 const char * const encodedTextEnd = scursor - 2; 00189 00190 // 00191 // STEP 4: 00192 // setup decoders for the transfer encoding and the charset 00193 // 00194 00195 00196 // try if there's a codec for the encoding found: 00197 Codec * codec = Codec::codecForName( maybeEncoding ); 00198 if ( !codec ) { 00199 KMIME_WARN_UNKNOWN(Encoding,maybeEncoding); 00200 return false; 00201 } 00202 00203 // get an instance of a corresponding decoder: 00204 Decoder * dec = codec->makeDecoder(); 00205 assert( dec ); 00206 00207 // try if there's a (text)codec for the charset found: 00208 bool matchOK = false; 00209 QTextCodec 00210 *textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK ); 00211 00212 if ( !matchOK || !textCodec ) { 00213 KMIME_WARN_UNKNOWN(Charset,maybeCharset); 00214 delete dec; 00215 return false; 00216 }; 00217 00218 kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl; 00219 00220 // allocate a temporary buffer to store the 8bit text: 00221 int encodedTextLength = encodedTextEnd - encodedTextStart; 00222 QByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) ); 00223 QByteArray::Iterator bit = buffer.begin(); 00224 QByteArray::ConstIterator bend = buffer.end(); 00225 00226 // 00227 // STEP 5: 00228 // do the actual decoding 00229 // 00230 00231 if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) ) 00232 KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( " 00233 << encodedTextLength << " )\nresult may be truncated" << endl; 00234 00235 result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() ); 00236 00237 kdDebug() << "result now: \"" << result << "\"" << endl; 00238 // cleanup: 00239 delete dec; 00240 language = maybeLanguage; 00241 00242 return true; 00243 } 00244 00245 static inline void eatWhiteSpace( const char* & scursor, const char * const send ) { 00246 while ( scursor != send 00247 && ( *scursor == ' ' || *scursor == '\n' || 00248 *scursor == '\t' || *scursor == '\r' ) ) 00249 scursor++; 00250 } 00251 00252 bool parseAtom( const char * & scursor, const char * const send, 00253 QString & result, bool allow8Bit ) 00254 { 00255 QPair<const char*,int> maybeResult; 00256 00257 if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { 00258 result += QString::fromLatin1( maybeResult.first, maybeResult.second ); 00259 return true; 00260 } 00261 00262 return false; 00263 } 00264 00265 bool parseAtom( const char * & scursor, const char * const send, 00266 QPair<const char*,int> & result, bool allow8Bit ) { 00267 bool success = false; 00268 const char * start = scursor; 00269 00270 while ( scursor != send ) { 00271 signed char ch = *scursor++; 00272 if ( ch > 0 && isAText(ch) ) { 00273 // AText: OK 00274 success = true; 00275 } else if ( allow8Bit && ch < 0 ) { 00276 // 8bit char: not OK, but be tolerant. 00277 KMIME_WARN_8BIT(ch); 00278 success = true; 00279 } else { 00280 // CTL or special - marking the end of the atom: 00281 // re-set sursor to point to the offending 00282 // char and return: 00283 scursor--; 00284 break; 00285 } 00286 } 00287 result.first = start; 00288 result.second = scursor - start; 00289 return success; 00290 } 00291 00292 bool parseToken( const char * & scursor, const char * const send, 00293 QString & result, bool allow8Bit ) 00294 { 00295 QPair<const char*,int> maybeResult; 00296 00297 if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { 00298 result += QString::fromLatin1( maybeResult.first, maybeResult.second ); 00299 return true; 00300 } 00301 00302 return false; 00303 } 00304 00305 bool parseToken( const char * & scursor, const char * const send, 00306 QPair<const char*,int> & result, bool allow8Bit ) 00307 { 00308 bool success = false; 00309 const char * start = scursor; 00310 00311 while ( scursor != send ) { 00312 signed char ch = *scursor++; 00313 if ( ch > 0 && isTText(ch) ) { 00314 // TText: OK 00315 success = true; 00316 } else if ( allow8Bit && ch < 0 ) { 00317 // 8bit char: not OK, but be tolerant. 00318 KMIME_WARN_8BIT(ch); 00319 success = true; 00320 } else { 00321 // CTL or tspecial - marking the end of the atom: 00322 // re-set sursor to point to the offending 00323 // char and return: 00324 scursor--; 00325 break; 00326 } 00327 } 00328 result.first = start; 00329 result.second = scursor - start; 00330 return success; 00331 } 00332 00333 #define READ_ch_OR_FAIL if ( scursor == send ) { \ 00334 KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \ 00335 return false; \ 00336 } else { \ 00337 ch = *scursor++; \ 00338 } 00339 00340 // known issues: 00341 // 00342 // - doesn't handle quoted CRLF 00343 00344 bool parseGenericQuotedString( const char* & scursor, const char * const send, 00345 QString & result, bool isCRLF, 00346 const char openChar, const char closeChar ) 00347 { 00348 char ch; 00349 // We are in a quoted-string or domain-literal or comment and the 00350 // cursor points to the first char after the openChar. 00351 // We will apply unfolding and quoted-pair removal. 00352 // We return when we either encounter the end or unescaped openChar 00353 // or closeChar. 00354 00355 assert( *(scursor-1) == openChar || *(scursor-1) == closeChar ); 00356 00357 while ( scursor != send ) { 00358 ch = *scursor++; 00359 00360 if ( ch == closeChar || ch == openChar ) { 00361 // end of quoted-string or another opening char: 00362 // let caller decide what to do. 00363 return true; 00364 } 00365 00366 switch( ch ) { 00367 case '\\': // quoted-pair 00368 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 00369 READ_ch_OR_FAIL; 00370 KMIME_WARN_IF_8BIT(ch); 00371 result += QChar(ch); 00372 break; 00373 case '\r': 00374 // ### 00375 // The case of lonely '\r' is easy to solve, as they're 00376 // not part of Unix Line-ending conventions. 00377 // But I see a problem if we are given Unix-native 00378 // line-ending-mails, where we cannot determine anymore 00379 // whether a given '\n' was part of a CRLF or was occurring 00380 // on it's own. 00381 READ_ch_OR_FAIL; 00382 if ( ch != '\n' ) { 00383 // CR on it's own... 00384 KMIME_WARN_LONE(CR); 00385 result += QChar('\r'); 00386 scursor--; // points to after the '\r' again 00387 } else { 00388 // CRLF encountered. 00389 // lookahead: check for folding 00390 READ_ch_OR_FAIL; 00391 if ( ch == ' ' || ch == '\t' ) { 00392 // correct folding; 00393 // position cursor behind the CRLF WSP (unfolding) 00394 // and add the WSP to the result 00395 result += QChar(ch); 00396 } else { 00397 // this is the "shouldn't happen"-case. There is a CRLF 00398 // inside a quoted-string without it being part of FWS. 00399 // We take it verbatim. 00400 KMIME_WARN_NON_FOLDING(CRLF); 00401 result += "\r\n"; 00402 // the cursor is decremented again, so's we need not 00403 // duplicate the whole switch here. "ch" could've been 00404 // everything (incl. openChar or closeChar). 00405 scursor--; 00406 } 00407 } 00408 break; 00409 case '\n': 00410 // Note: CRLF has been handled above already! 00411 // ### LF needs special treatment, depending on whether isCRLF 00412 // is true (we can be sure a lonely '\n' was meant this way) or 00413 // false ('\n' alone could have meant LF or CRLF in the original 00414 // message. This parser assumes CRLF iff the LF is followed by 00415 // either WSP (folding) or NULL (premature end of quoted-string; 00416 // Should be fixed, since NULL is allowed as per rfc822). 00417 READ_ch_OR_FAIL; 00418 if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { 00419 // folding 00420 // correct folding 00421 result += QChar(ch); 00422 } else { 00423 // non-folding 00424 KMIME_WARN_LONE(LF); 00425 result += QChar('\n'); 00426 // pos is decremented, so's we need not duplicate the whole 00427 // switch here. ch could've been everything (incl. <">, "\"). 00428 scursor--; 00429 } 00430 break; 00431 default: 00432 KMIME_WARN_IF_8BIT(ch); 00433 result += QChar(ch); 00434 } 00435 } 00436 00437 return false; 00438 } 00439 00440 // known issues: 00441 // 00442 // - doesn't handle encoded-word inside comments. 00443 00444 bool parseComment( const char* & scursor, const char * const send, 00445 QString & result, bool isCRLF, bool reallySave ) 00446 { 00447 int commentNestingDepth = 1; 00448 const char * afterLastClosingParenPos = 0; 00449 QString maybeCmnt; 00450 const char * oldscursor = scursor; 00451 00452 assert( *(scursor-1) == '(' ); 00453 00454 while ( commentNestingDepth ) { 00455 QString cmntPart; 00456 if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { 00457 assert( *(scursor-1) == ')' || *(scursor-1) == '(' ); 00458 // see the kdoc for above function for the possible conditions 00459 // we have to check: 00460 switch ( *(scursor-1) ) { 00461 case ')': 00462 if ( reallySave ) { 00463 // add the chunk that's now surely inside the comment. 00464 result += maybeCmnt; 00465 result += cmntPart; 00466 if ( commentNestingDepth > 1 ) // don't add the outermost ')'... 00467 result += QChar(')'); 00468 maybeCmnt = QString::null; 00469 } 00470 afterLastClosingParenPos = scursor; 00471 --commentNestingDepth; 00472 break; 00473 case '(': 00474 if ( reallySave ) { 00475 // don't add to "result" yet, because we might find that we 00476 // are already outside the (broken) comment... 00477 maybeCmnt += cmntPart; 00478 maybeCmnt += QChar('('); 00479 } 00480 ++commentNestingDepth; 00481 break; 00482 default: assert( 0 ); 00483 } // switch 00484 } else { 00485 // !parseGenericQuotedString, ie. premature end 00486 if ( afterLastClosingParenPos ) 00487 scursor = afterLastClosingParenPos; 00488 else 00489 scursor = oldscursor; 00490 return false; 00491 } 00492 } // while 00493 00494 return true; 00495 } 00496 00497 00498 // known issues: none. 00499 00500 bool parsePhrase( const char* & scursor, const char * const send, 00501 QString & result, bool isCRLF ) 00502 { 00503 enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None; 00504 QString tmp; 00505 QCString lang; 00506 const char * successfullyParsed = 0; 00507 // only used by the encoded-word branch 00508 const char * oldscursor; 00509 // used to suppress whitespace between adjacent encoded-words 00510 // (rfc2047, 6.2): 00511 bool lastWasEncodedWord = false; 00512 00513 while ( scursor != send ) { 00514 char ch = *scursor++; 00515 switch ( ch ) { 00516 case '.': // broken, but allow for intorop's sake 00517 if ( found == None ) { 00518 --scursor; 00519 return false; 00520 } else { 00521 if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) 00522 result += ". "; 00523 else 00524 result += '.'; 00525 successfullyParsed = scursor; 00526 } 00527 break; 00528 case '"': // quoted-string 00529 tmp = QString::null; 00530 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { 00531 successfullyParsed = scursor; 00532 assert( *(scursor-1) == '"' ); 00533 switch ( found ) { 00534 case None: 00535 found = QuotedString; 00536 break; 00537 case Phrase: 00538 case Atom: 00539 case EncodedWord: 00540 case QuotedString: 00541 found = Phrase; 00542 result += QChar(' '); // rfc822, 3.4.4 00543 break; 00544 default: 00545 assert( 0 ); 00546 } 00547 lastWasEncodedWord = false; 00548 result += tmp; 00549 } else { 00550 // premature end of quoted string. 00551 // What to do? Return leading '"' as special? Return as quoted-string? 00552 // We do the latter if we already found something, else signal failure. 00553 if ( found == None ) { 00554 return false; 00555 } else { 00556 result += QChar(' '); // rfc822, 3.4.4 00557 result += tmp; 00558 return true; 00559 } 00560 } 00561 break; 00562 case '(': // comment 00563 // parse it, but ignore content: 00564 tmp = QString::null; 00565 if ( parseComment( scursor, send, tmp, isCRLF, 00566 false /*don't bother with the content*/ ) ) { 00567 successfullyParsed = scursor; 00568 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 00569 } else { 00570 if ( found == None ) 00571 return false; 00572 else { 00573 scursor = successfullyParsed; 00574 return true; 00575 } 00576 } 00577 break; 00578 case '=': // encoded-word 00579 tmp = QString::null; 00580 oldscursor = scursor; 00581 lang = 0; 00582 if ( parseEncodedWord( scursor, send, tmp, lang ) ) { 00583 successfullyParsed = scursor; 00584 switch ( found ) { 00585 case None: 00586 found = EncodedWord; 00587 break; 00588 case Phrase: 00589 case EncodedWord: 00590 case Atom: 00591 case QuotedString: 00592 if ( !lastWasEncodedWord ) 00593 result += QChar(' '); // rfc822, 3.4.4 00594 found = Phrase; 00595 break; 00596 default: assert( 0 ); 00597 } 00598 lastWasEncodedWord = true; 00599 result += tmp; 00600 break; 00601 } else 00602 // parse as atom: 00603 scursor = oldscursor; 00604 // fall though... 00605 00606 default: //atom 00607 tmp = QString::null; 00608 scursor--; 00609 if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { 00610 successfullyParsed = scursor; 00611 switch ( found ) { 00612 case None: 00613 found = Atom; 00614 break; 00615 case Phrase: 00616 case Atom: 00617 case EncodedWord: 00618 case QuotedString: 00619 found = Phrase; 00620 result += QChar(' '); // rfc822, 3.4.4 00621 break; 00622 default: 00623 assert( 0 ); 00624 } 00625 lastWasEncodedWord = false; 00626 result += tmp; 00627 } else { 00628 if ( found == None ) 00629 return false; 00630 else { 00631 scursor = successfullyParsed; 00632 return true; 00633 } 00634 } 00635 } 00636 eatWhiteSpace( scursor, send ); 00637 } 00638 00639 return ( found != None ); 00640 } 00641 00642 00643 bool parseDotAtom( const char* & scursor, const char * const send, 00644 QString & result, bool isCRLF ) 00645 { 00646 // always points to just after the last atom parsed: 00647 const char * successfullyParsed; 00648 00649 QString tmp; 00650 if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) 00651 return false; 00652 result += tmp; 00653 successfullyParsed = scursor; 00654 00655 while ( scursor != send ) { 00656 eatCFWS( scursor, send, isCRLF ); 00657 00658 // end of header or no '.' -> return 00659 if ( scursor == send || *scursor != '.' ) return true; 00660 scursor++; // eat '.' 00661 00662 eatCFWS( scursor, send, isCRLF ); 00663 00664 if ( scursor == send || !isAText( *scursor ) ) { 00665 // end of header or no AText, but this time following a '.'!: 00666 // reset cursor to just after last successfully parsed char and 00667 // return: 00668 scursor = successfullyParsed; 00669 return true; 00670 } 00671 00672 // try to parse the next atom: 00673 QString maybeAtom; 00674 if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { 00675 scursor = successfullyParsed; 00676 return true; 00677 } 00678 00679 result += QChar('.'); 00680 result += maybeAtom; 00681 successfullyParsed = scursor; 00682 } 00683 00684 scursor = successfullyParsed; 00685 return true; 00686 } 00687 00688 00689 void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) { 00690 QString dummy; 00691 00692 while ( scursor != send ) { 00693 const char * oldscursor = scursor; 00694 00695 char ch = *scursor++; 00696 00697 switch( ch ) { 00698 case ' ': 00699 case '\t': // whitespace 00700 case '\r': 00701 case '\n': // folding 00702 continue; 00703 00704 case '(': // comment 00705 if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) 00706 continue; 00707 scursor = oldscursor; 00708 return; 00709 00710 default: 00711 scursor = oldscursor; 00712 return; 00713 } 00714 00715 } 00716 } 00717 00718 bool parseDomain( const char* & scursor, const char * const send, 00719 QString & result, bool isCRLF ) { 00720 eatCFWS( scursor, send, isCRLF ); 00721 if ( scursor == send ) return false; 00722 00723 // domain := dot-atom / domain-literal / atom *("." atom) 00724 // 00725 // equivalent to: 00726 // domain = dot-atom / domain-literal, 00727 // since parseDotAtom does allow CFWS between atoms and dots 00728 00729 if ( *scursor == '[' ) { 00730 // domain-literal: 00731 QString maybeDomainLiteral; 00732 // eat '[': 00733 scursor++; 00734 while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, 00735 isCRLF, '[', ']' ) ) { 00736 if ( scursor == send ) { 00737 // end of header: check for closing ']': 00738 if ( *(scursor-1) == ']' ) { 00739 // OK, last char was ']': 00740 result = maybeDomainLiteral; 00741 return true; 00742 } else { 00743 // not OK, domain-literal wasn't closed: 00744 return false; 00745 } 00746 } 00747 // we hit openChar in parseGenericQuotedString. 00748 // include it in maybeDomainLiteral and keep on parsing: 00749 if ( *(scursor-1) == '[' ) { 00750 maybeDomainLiteral += QChar('['); 00751 continue; 00752 } 00753 // OK, real end of domain-literal: 00754 result = maybeDomainLiteral; 00755 return true; 00756 } 00757 } else { 00758 // dot-atom: 00759 QString maybeDotAtom; 00760 if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { 00761 result = maybeDotAtom; 00762 return true; 00763 } 00764 } 00765 return false; 00766 } 00767 00768 bool parseObsRoute( const char* & scursor, const char* const send, 00769 QStringList & result, bool isCRLF, bool save ) { 00770 while ( scursor != send ) { 00771 eatCFWS( scursor, send, isCRLF ); 00772 if ( scursor == send ) return false; 00773 00774 // empty entry: 00775 if ( *scursor == ',' ) { 00776 scursor++; 00777 if ( save ) result.append( QString::null ); 00778 continue; 00779 } 00780 00781 // empty entry ending the list: 00782 if ( *scursor == ':' ) { 00783 scursor++; 00784 if ( save ) result.append( QString::null ); 00785 return true; 00786 } 00787 00788 // each non-empty entry must begin with '@': 00789 if ( *scursor != '@' ) 00790 return false; 00791 else 00792 scursor++; 00793 00794 QString maybeDomain; 00795 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false; 00796 if ( save ) result.append( maybeDomain ); 00797 00798 // eat the following (optional) comma: 00799 eatCFWS( scursor, send, isCRLF ); 00800 if ( scursor == send ) return false; 00801 if ( *scursor == ':' ) { scursor++; return true; } 00802 if ( *scursor == ',' ) scursor++; 00803 00804 } 00805 00806 return false; 00807 } 00808 00809 bool parseAddrSpec( const char* & scursor, const char * const send, 00810 AddrSpec & result, bool isCRLF ) { 00811 // 00812 // STEP 1: 00813 // local-part := dot-atom / quoted-string / word *("." word) 00814 // 00815 // this is equivalent to: 00816 // local-part := word *("." word) 00817 00818 QString maybeLocalPart; 00819 QString tmp; 00820 00821 while ( scursor != send ) { 00822 // first, eat any whitespace 00823 eatCFWS( scursor, send, isCRLF ); 00824 00825 char ch = *scursor++; 00826 switch ( ch ) { 00827 case '.': // dot 00828 maybeLocalPart += QChar('.'); 00829 break; 00830 00831 case '@': 00832 goto SAW_AT_SIGN; 00833 break; 00834 00835 case '"': // quoted-string 00836 tmp = QString::null; 00837 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) 00838 maybeLocalPart += tmp; 00839 else 00840 return false; 00841 break; 00842 00843 default: // atom 00844 scursor--; // re-set scursor to point to ch again 00845 tmp = QString::null; 00846 if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) 00847 maybeLocalPart += tmp; 00848 else 00849 return false; // parseAtom can only fail if the first char is non-atext. 00850 break; 00851 } 00852 } 00853 00854 return false; 00855 00856 00857 // 00858 // STEP 2: 00859 // domain 00860 // 00861 00862 SAW_AT_SIGN: 00863 00864 assert( *(scursor-1) == '@' ); 00865 00866 QString maybeDomain; 00867 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) 00868 return false; 00869 00870 result.localPart = maybeLocalPart; 00871 result.domain = maybeDomain; 00872 00873 return true; 00874 } 00875 00876 00877 bool parseAngleAddr( const char* & scursor, const char * const send, 00878 AddrSpec & result, bool isCRLF ) { 00879 // first, we need an opening angle bracket: 00880 eatCFWS( scursor, send, isCRLF ); 00881 if ( scursor == send || *scursor != '<' ) return false; 00882 scursor++; // eat '<' 00883 00884 eatCFWS( scursor, send, isCRLF ); 00885 if ( scursor == send ) return false; 00886 00887 if ( *scursor == '@' || *scursor == ',' ) { 00888 // obs-route: parse, but ignore: 00889 KMIME_WARN << "obsolete source route found! ignoring." << endl; 00890 QStringList dummy; 00891 if ( !parseObsRoute( scursor, send, dummy, 00892 isCRLF, false /* don't save */ ) ) 00893 return false; 00894 // angle-addr isn't complete until after the '>': 00895 if ( scursor == send ) return false; 00896 } 00897 00898 // parse addr-spec: 00899 AddrSpec maybeAddrSpec; 00900 if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false; 00901 00902 eatCFWS( scursor, send, isCRLF ); 00903 if ( scursor == send || *scursor != '>' ) return false; 00904 scursor++; 00905 00906 result = maybeAddrSpec; 00907 return true; 00908 00909 } 00910 00911 bool parseMailbox( const char* & scursor, const char * const send, 00912 Mailbox & result, bool isCRLF ) { 00913 00914 // rfc: 00915 // mailbox := addr-spec / ([ display-name ] angle-addr) 00916 // us: 00917 // mailbox := addr-spec / ([ display-name ] angle-addr) 00918 // / (angle-addr "(" display-name ")") 00919 00920 eatCFWS( scursor, send, isCRLF ); 00921 if ( scursor == send ) return false; 00922 00923 AddrSpec maybeAddrSpec; 00924 00925 // first, try if it's a vanilla addr-spec: 00926 const char * oldscursor = scursor; 00927 if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { 00928 result.displayName = QString::null; 00929 result.addrSpec = maybeAddrSpec; 00930 return true; 00931 } 00932 scursor = oldscursor; 00933 00934 // second, see if there's a display-name: 00935 QString maybeDisplayName; 00936 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { 00937 // failed: reset cursor, note absent display-name 00938 maybeDisplayName = QString::null; 00939 scursor = oldscursor; 00940 } else { 00941 // succeeded: eat CFWS 00942 eatCFWS( scursor, send, isCRLF ); 00943 if ( scursor == send ) return false; 00944 } 00945 00946 // third, parse the angle-addr: 00947 if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) 00948 return false; 00949 00950 if ( maybeDisplayName.isNull() ) { 00951 // check for the obsolete form of display-name (as comment): 00952 eatWhiteSpace( scursor, send ); 00953 if ( scursor != send && *scursor == '(' ) { 00954 scursor++; 00955 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) 00956 return false; 00957 } 00958 } 00959 00960 result.displayName = maybeDisplayName; 00961 result.addrSpec = maybeAddrSpec; 00962 return true; 00963 } 00964 00965 bool parseGroup( const char* & scursor, const char * const send, 00966 Address & result, bool isCRLF ) { 00967 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] 00968 // 00969 // equivalent to: 00970 // group := display-name ":" [ obs-mbox-list ] ";" 00971 00972 eatCFWS( scursor, send, isCRLF ); 00973 if ( scursor == send ) return false; 00974 00975 // get display-name: 00976 QString maybeDisplayName; 00977 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) 00978 return false; 00979 00980 // get ":": 00981 eatCFWS( scursor, send, isCRLF ); 00982 if ( scursor == send || *scursor != ':' ) return false; 00983 00984 result.displayName = maybeDisplayName; 00985 00986 // get obs-mbox-list (may contain empty entries): 00987 scursor++; 00988 while ( scursor != send ) { 00989 eatCFWS( scursor, send, isCRLF ); 00990 if ( scursor == send ) return false; 00991 00992 // empty entry: 00993 if ( *scursor == ',' ) { scursor++; continue; } 00994 00995 // empty entry ending the list: 00996 if ( *scursor == ';' ) { scursor++; return true; } 00997 00998 Mailbox maybeMailbox; 00999 if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) 01000 return false; 01001 result.mailboxList.append( maybeMailbox ); 01002 01003 eatCFWS( scursor, send, isCRLF ); 01004 // premature end: 01005 if ( scursor == send ) return false; 01006 // regular end of the list: 01007 if ( *scursor == ';' ) { scursor++; return true; } 01008 // eat regular list entry separator: 01009 if ( *scursor == ',' ) scursor++; 01010 } 01011 return false; 01012 } 01013 01014 01015 bool parseAddress( const char* & scursor, const char * const send, 01016 Address & result, bool isCRLF ) { 01017 // address := mailbox / group 01018 01019 eatCFWS( scursor, send, isCRLF ); 01020 if ( scursor == send ) return false; 01021 01022 // first try if it's a single mailbox: 01023 Mailbox maybeMailbox; 01024 const char * oldscursor = scursor; 01025 if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { 01026 // yes, it is: 01027 result.displayName = QString::null; 01028 result.mailboxList.append( maybeMailbox ); 01029 return true; 01030 } 01031 scursor = oldscursor; 01032 01033 Address maybeAddress; 01034 01035 // no, it's not a single mailbox. Try if it's a group: 01036 if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) 01037 return false; 01038 01039 result = maybeAddress; 01040 return true; 01041 } 01042 01043 bool parseAddressList( const char* & scursor, const char * const send, 01044 AddressList & result, bool isCRLF ) { 01045 while ( scursor != send ) { 01046 eatCFWS( scursor, send, isCRLF ); 01047 // end of header: this is OK. 01048 if ( scursor == send ) return true; 01049 // empty entry: ignore: 01050 if ( *scursor == ',' ) { scursor++; continue; } 01051 01052 // parse one entry 01053 Address maybeAddress; 01054 if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false; 01055 result.append( maybeAddress ); 01056 01057 eatCFWS( scursor, send, isCRLF ); 01058 // end of header: this is OK. 01059 if ( scursor == send ) return true; 01060 // comma separating entries: eat it. 01061 if ( *scursor == ',' ) scursor++; 01062 } 01063 return true; 01064 } 01065 01066 01067 static QString asterisk = QString::fromLatin1("*0*",1); 01068 static QString asteriskZero = QString::fromLatin1("*0*",2); 01069 //static QString asteriskZeroAsterisk = QString::fromLatin1("*0*",3); 01070 01071 bool parseParameter( const char* & scursor, const char * const send, 01072 QPair<QString,QStringOrQPair> & result, bool isCRLF ) { 01073 // parameter = regular-parameter / extended-parameter 01074 // regular-parameter = regular-parameter-name "=" value 01075 // extended-parameter = 01076 // value = token / quoted-string 01077 // 01078 // note that rfc2231 handling is out of the scope of this function. 01079 // Therefore we return the attribute as QString and the value as 01080 // (start,length) tupel if we see that the value is encoded 01081 // (trailing asterisk), for parseParameterList to decode... 01082 01083 eatCFWS( scursor, send, isCRLF ); 01084 if ( scursor == send ) return false; 01085 01086 // 01087 // parse the parameter name: 01088 // 01089 QString maybeAttribute; 01090 if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) 01091 return false; 01092 01093 eatCFWS( scursor, send, isCRLF ); 01094 // premature end: not OK (haven't seen '=' yet). 01095 if ( scursor == send || *scursor != '=' ) return false; 01096 scursor++; // eat '=' 01097 01098 eatCFWS( scursor, send, isCRLF ); 01099 if ( scursor == send ) { 01100 // don't choke on attribute=, meaning the value was omitted: 01101 if ( maybeAttribute.endsWith( asterisk ) ) { 01102 KMIME_WARN << "attribute ends with \"*\", but value is empty! " 01103 "Chopping away \"*\"." << endl; 01104 maybeAttribute.truncate( maybeAttribute.length() - 1 ); 01105 } 01106 result = qMakePair( maybeAttribute.lower(), QStringOrQPair() ); 01107 return true; 01108 } 01109 01110 const char * oldscursor = scursor; 01111 01112 // 01113 // parse the parameter value: 01114 // 01115 QStringOrQPair maybeValue; 01116 if ( *scursor == '"' ) { 01117 // value is a quoted-string: 01118 scursor++; 01119 if ( maybeAttribute.endsWith( asterisk ) ) { 01120 // attributes ending with "*" designate extended-parameters, 01121 // which cannot have quoted-strings as values. So we remove the 01122 // trailing "*" to not confuse upper layers. 01123 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! " 01124 "Chopping away \"*\"." << endl; 01125 maybeAttribute.truncate( maybeAttribute.length() - 1 ); 01126 } 01127 01128 if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { 01129 scursor = oldscursor; 01130 result = qMakePair( maybeAttribute.lower(), QStringOrQPair() ); 01131 return false; // this case needs further processing by upper layers!! 01132 } 01133 } else { 01134 // value is a token: 01135 if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { 01136 scursor = oldscursor; 01137 result = qMakePair( maybeAttribute.lower(), QStringOrQPair() ); 01138 return false; // this case needs further processing by upper layers!! 01139 } 01140 } 01141 01142 result = qMakePair( maybeAttribute.lower(), maybeValue ); 01143 return true; 01144 } 01145 01146 01147 01148 bool parseRawParameterList( const char* & scursor, const char * const send, 01149 QMap<QString,QStringOrQPair> & result, 01150 bool isCRLF ) { 01151 // we use parseParameter() consecutively to obtain a map of raw 01152 // attributes to raw values. "Raw" here means that we don't do 01153 // rfc2231 decoding and concatenation. This is left to 01154 // parseParameterList(), which will call this function. 01155 // 01156 // The main reason for making this chunk of code a separate 01157 // (private) method is that we can deal with broken parameters 01158 // _here_ and leave the rfc2231 handling solely to 01159 // parseParameterList(), which will still be enough work. 01160 01161 while ( scursor != send ) { 01162 eatCFWS( scursor, send, isCRLF ); 01163 // empty entry ending the list: OK. 01164 if ( scursor == send ) return true; 01165 // empty list entry: ignore. 01166 if ( *scursor == ';' ) { scursor++; continue; } 01167 01168 QPair<QString,QStringOrQPair> maybeParameter; 01169 if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { 01170 // we need to do a bit of work if the attribute is not 01171 // NULL. These are the cases marked with "needs further 01172 // processing" in parseParameter(). Specifically, parsing of the 01173 // token or the quoted-string, which should represent the value, 01174 // failed. We take the easy way out and simply search for the 01175 // next ';' to start parsing again. (Another option would be to 01176 // take the text between '=' and ';' as value) 01177 if ( maybeParameter.first.isNull() ) return false; 01178 while ( scursor != send ) { 01179 if ( *scursor++ == ';' ) goto IS_SEMICOLON; 01180 } 01181 // scursor == send case: end of list. 01182 return true; 01183 IS_SEMICOLON: 01184 // *scursor == ';' case: parse next entry. 01185 continue; 01186 } 01187 // successful parsing brings us here: 01188 result.insert( maybeParameter.first, maybeParameter.second ); 01189 01190 eatCFWS( scursor, send, isCRLF ); 01191 // end of header: ends list. 01192 if ( scursor == send ) return true; 01193 // regular separator: eat it. 01194 if ( *scursor == ';' ) scursor++; 01195 } 01196 return true; 01197 } 01198 01199 01200 static void decodeRFC2231Value( Codec* & rfc2231Codec, 01201 QTextCodec* & textcodec, 01202 bool isContinuation, QString & value, 01203 QPair<const char*,int> & source ) { 01204 01205 // 01206 // parse the raw value into (charset,language,text): 01207 // 01208 01209 const char * decBegin = source.first; 01210 const char * decCursor = decBegin; 01211 const char * decEnd = decCursor + source.second; 01212 01213 if ( !isContinuation ) { 01214 // find the first single quote 01215 while ( decCursor != decEnd ) { 01216 if ( *decCursor == '\'' ) break; 01217 else decCursor++; 01218 } 01219 01220 if ( decCursor == decEnd ) { 01221 // there wasn't a single single quote at all! 01222 // take the whole value to be in latin-1: 01223 KMIME_WARN << "No charset in extended-initial-value. " 01224 "Assuming \"iso-8859-1\"." << endl; 01225 value += QString::fromLatin1( decBegin, source.second ); 01226 return; 01227 } 01228 01229 QCString charset( decBegin, decCursor - decBegin + 1 ); 01230 01231 const char * oldDecCursor = ++decCursor; 01232 // find the second single quote (we ignore the language tag): 01233 while ( decCursor != decEnd ) { 01234 if ( *decCursor == '\'' ) break; 01235 else decCursor++; 01236 } 01237 if ( decCursor == decEnd ) { 01238 KMIME_WARN << "No language in extended-initial-value. " 01239 "Trying to recover." << endl; 01240 decCursor = oldDecCursor; 01241 } else 01242 decCursor++; 01243 01244 // decCursor now points to the start of the 01245 // "extended-other-values": 01246 01247 // 01248 // get the decoders: 01249 // 01250 01251 bool matchOK = false; 01252 textcodec = KGlobal::charsets()->codecForName( charset, matchOK ); 01253 if ( !matchOK ) { 01254 textcodec = 0; 01255 KMIME_WARN_UNKNOWN(Charset,charset); 01256 } 01257 } 01258 01259 if ( !rfc2231Codec ) { 01260 rfc2231Codec = Codec::codecForName("x-kmime-rfc2231"); 01261 assert( rfc2231Codec ); 01262 } 01263 01264 if ( !textcodec ) { 01265 value += QString::fromLatin1( decCursor, decEnd - decCursor ); 01266 return; 01267 } 01268 01269 Decoder * dec = rfc2231Codec->makeDecoder(); 01270 assert( dec ); 01271 01272 // 01273 // do the decoding: 01274 // 01275 01276 QByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); 01277 QByteArray::Iterator bit = buffer.begin(); 01278 QByteArray::ConstIterator bend = buffer.end(); 01279 01280 if ( !dec->decode( decCursor, decEnd, bit, bend ) ) 01281 KMIME_WARN << rfc2231Codec->name() 01282 << " codec lies about it's maxDecodedSizeFor()\n" 01283 "result may be truncated" << endl; 01284 01285 value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); 01286 01287 kdDebug() << "value now: \"" << value << "\"" << endl; 01288 // cleanup: 01289 delete dec; 01290 } 01291 01292 // known issues: 01293 // - permutes rfc2231 continuations when the total number of parts 01294 // exceeds 10 (other-sections then becomes *xy, ie. two digits) 01295 01296 bool parseParameterList( const char* & scursor, const char * const send, 01297 QMap<QString,QString> & result, bool isCRLF ) { 01298 // parse the list into raw attribute-value pairs: 01299 QMap<QString,QStringOrQPair> rawParameterList; 01300 if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) 01301 return false; 01302 01303 if ( rawParameterList.isEmpty() ) return true; 01304 01305 // decode rfc 2231 continuations and alternate charset encoding: 01306 01307 // NOTE: this code assumes that what QMapIterator delivers is sorted 01308 // by the key! 01309 01310 Codec * rfc2231Codec = 0; 01311 QTextCodec * textcodec = 0; 01312 QString attribute; 01313 QString value; 01314 enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode; 01315 01316 QMapIterator<QString,QStringOrQPair> it, end = rawParameterList.end(); 01317 01318 for ( it = rawParameterList.begin() ; it != end ; ++it ) { 01319 if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { 01320 // 01321 // new attribute: 01322 // 01323 01324 // store the last attribute/value pair in the result map now: 01325 if ( !attribute.isNull() ) result.insert( attribute, value ); 01326 // and extract the information from the new raw attribute: 01327 value = QString::null; 01328 attribute = it.key(); 01329 mode = NoMode; 01330 // is the value encoded? 01331 if ( attribute.endsWith( asterisk ) ) { 01332 attribute.truncate( attribute.length() - 1 ); 01333 mode = (Modes) ((int) mode | Encoded); 01334 } 01335 // is the value continued? 01336 if ( attribute.endsWith( asteriskZero ) ) { 01337 attribute.truncate( attribute.length() - 2 ); 01338 mode = (Modes) ((int) mode | Continued); 01339 } 01340 // 01341 // decode if necessary: 01342 // 01343 if ( mode & Encoded ) { 01344 decodeRFC2231Value( rfc2231Codec, textcodec, 01345 false, /* isn't continuation */ 01346 value, (*it).qpair ); 01347 } else { 01348 // not encoded. 01349 if ( (*it).qpair.first ) 01350 value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); 01351 else 01352 value += (*it).qstring; 01353 } 01354 01355 // 01356 // shortcut-processing when the value isn't encoded: 01357 // 01358 01359 if ( !(mode & Continued) ) { 01360 // save result already: 01361 result.insert( attribute, value ); 01362 // force begin of a new attribute: 01363 attribute = QString::null; 01364 } 01365 } else /* it.key().startsWith( attribute ) */ { 01366 // 01367 // continuation 01368 // 01369 01370 // ignore the section and trust QMap to have sorted the keys: 01371 if ( it.key().endsWith( asterisk ) ) { 01372 // encoded 01373 decodeRFC2231Value( rfc2231Codec, textcodec, 01374 true, /* is continuation */ 01375 value, (*it).qpair ); 01376 } else { 01377 // not encoded 01378 if ( (*it).qpair.first ) 01379 value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); 01380 else 01381 value += (*it).qstring; 01382 } 01383 } 01384 } 01385 01386 // write last attr/value pair: 01387 if ( !attribute.isNull() ) 01388 result.insert( attribute, value ); 01389 01390 return true; 01391 } 01392 01393 static const char * stdDayNames[] = { 01394 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" 01395 }; 01396 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames; 01397 01398 static bool parseDayName( const char* & scursor, const char * const send ) 01399 { 01400 // check bounds: 01401 if ( send - scursor < 3 ) return false; 01402 01403 for ( int i = 0 ; i < stdDayNamesLen ; ++i ) 01404 if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { 01405 scursor += 3; 01406 kdDebug() << "found " << stdDayNames[i] << endl; 01407 return true; 01408 } 01409 01410 return false; 01411 } 01412 01413 01414 static const char * stdMonthNames[] = { 01415 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 01416 "Jul", "Aug", "Sep", "Oct", "Nov", "Dez" 01417 }; 01418 static const int stdMonthNamesLen = 01419 sizeof stdMonthNames / sizeof *stdMonthNames; 01420 01421 static bool parseMonthName( const char* & scursor, const char * const send, 01422 int & result ) 01423 { 01424 // check bounds: 01425 if ( send - scursor < 3 ) return false; 01426 01427 for ( result = 0 ; result < stdMonthNamesLen ; ++result ) 01428 if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { 01429 scursor += 3; 01430 return true; 01431 } 01432 01433 // not found: 01434 return false; 01435 } 01436 01437 static const struct { 01438 const char * tzName; 01439 long int secsEastOfGMT; 01440 } timeZones[] = { 01441 // rfc 822 timezones: 01442 { "GMT", 0 }, 01443 { "UT", 0 }, 01444 { "EDT", -4*3600 }, 01445 { "EST", -5*3600 }, 01446 { "MST", -5*3600 }, 01447 { "CST", -6*3600 }, 01448 { "MDT", -6*3600 }, 01449 { "MST", -7*3600 }, 01450 { "PDT", -7*3600 }, 01451 { "PST", -8*3600 }, 01452 // common, non-rfc-822 zones: 01453 { "CET", 1*3600 }, 01454 { "MET", 1*3600 }, 01455 { "UTC", 0 }, 01456 { "CEST", 2*3600 }, 01457 { "BST", 1*3600 }, 01458 // rfc 822 military timezones: 01459 { "Z", 0 }, 01460 { "A", -1*3600 }, 01461 { "B", -2*3600 }, 01462 { "C", -3*3600 }, 01463 { "D", -4*3600 }, 01464 { "E", -5*3600 }, 01465 { "F", -6*3600 }, 01466 { "G", -7*3600 }, 01467 { "H", -8*3600 }, 01468 { "I", -9*3600 }, 01469 // J is not used! 01470 { "K", -10*3600 }, 01471 { "L", -11*3600 }, 01472 { "M", -12*3600 }, 01473 { "N", 1*3600 }, 01474 { "O", 2*3600 }, 01475 { "P", 3*3600 }, 01476 { "Q", 4*3600 }, 01477 { "R", 5*3600 }, 01478 { "S", 6*3600 }, 01479 { "T", 7*3600 }, 01480 { "U", 8*3600 }, 01481 { "V", 9*3600 }, 01482 { "W", 10*3600 }, 01483 { "X", 11*3600 }, 01484 { "Y", 12*3600 }, 01485 }; 01486 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones; 01487 01488 static bool parseAlphaNumericTimeZone( const char* & scursor, 01489 const char * const send, 01490 long int & secsEastOfGMT, 01491 bool & timeZoneKnown ) 01492 { 01493 QPair<const char*,int> maybeTimeZone(0,0); 01494 if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) 01495 return false; 01496 for ( int i = 0 ; i < timeZonesLen ; ++i ) 01497 if ( qstrnicmp( timeZones[i].tzName, 01498 maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { 01499 scursor += maybeTimeZone.second; 01500 secsEastOfGMT = timeZones[i].secsEastOfGMT; 01501 timeZoneKnown = true; 01502 return true; 01503 } 01504 01505 // don't choke just because we don't happen to know the time zone 01506 KMIME_WARN_UNKNOWN(time zone,QCString( maybeTimeZone.first, maybeTimeZone.second+1 )); 01507 secsEastOfGMT = 0; 01508 timeZoneKnown = false; 01509 return true; 01510 } 01511 01512 // parse a number and return the number of digits parsed: 01513 static int parseDigits( const char* & scursor, const char * const send, 01514 int & result ) 01515 { 01516 result = 0; 01517 int digits = 0; 01518 for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { 01519 result *= 10; 01520 result += int( *scursor - '0' ); 01521 } 01522 return digits; 01523 } 01524 01525 static bool parseTimeOfDay( const char* & scursor, const char * const send, 01526 int & hour, int & min, int & sec, bool isCRLF=false ) 01527 { 01528 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] 01529 01530 // 01531 // 2DIGIT representing "hour": 01532 // 01533 if ( !parseDigits( scursor, send, hour ) ) return false; 01534 01535 eatCFWS( scursor, send, isCRLF ); 01536 if ( scursor == send || *scursor != ':' ) return false; 01537 scursor++; // eat ':' 01538 01539 eatCFWS( scursor, send, isCRLF ); 01540 if ( scursor == send ) return false; 01541 01542 // 01543 // 2DIGIT representing "minute": 01544 // 01545 if ( !parseDigits( scursor, send, min ) ) return false; 01546 01547 eatCFWS( scursor, send, isCRLF ); 01548 if ( scursor == send ) return true; // seconds are optional 01549 01550 // 01551 // let's see if we have a 2DIGIT representing "second": 01552 // 01553 if ( *scursor == ':' ) { 01554 // yepp, there are seconds: 01555 scursor++; // eat ':' 01556 eatCFWS( scursor, send, isCRLF ); 01557 if ( scursor == send ) return false; 01558 01559 if ( !parseDigits( scursor, send, sec ) ) return false; 01560 } else { 01561 sec = 0; 01562 } 01563 01564 return true; 01565 } 01566 01567 01568 bool parseTime( const char* & scursor, const char * send, 01569 int & hour, int & min, int & sec, long int & secsEastOfGMT, 01570 bool & timeZoneKnown, bool isCRLF ) 01571 { 01572 // time := time-of-day CFWS ( zone / obs-zone ) 01573 // 01574 // obs-zone := "UT" / "GMT" / 01575 // "EST" / "EDT" / ; -0500 / -0400 01576 // "CST" / "CDT" / ; -0600 / -0500 01577 // "MST" / "MDT" / ; -0700 / -0600 01578 // "PST" / "PDT" / ; -0800 / -0700 01579 // "A"-"I" / "a"-"i" / 01580 // "K"-"Z" / "k"-"z" 01581 01582 eatCFWS( scursor, send, isCRLF ); 01583 if ( scursor == send ) return false; 01584 01585 if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) 01586 return false; 01587 01588 eatCFWS( scursor, send, isCRLF ); 01589 if ( scursor == send ) { 01590 timeZoneKnown = false; 01591 secsEastOfGMT = 0; 01592 return true; // allow missing timezone 01593 } 01594 01595 timeZoneKnown = true; 01596 if ( *scursor == '+' || *scursor == '-' ) { 01597 // remember and eat '-'/'+': 01598 const char sign = *scursor++; 01599 // numerical timezone: 01600 int maybeTimeZone; 01601 if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false; 01602 secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); 01603 if ( sign == '-' ) { 01604 secsEastOfGMT *= -1; 01605 if ( secsEastOfGMT == 0 ) 01606 timeZoneKnown = false; // -0000 means indetermined tz 01607 } 01608 } else { 01609 // maybe alphanumeric timezone: 01610 if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) 01611 return false; 01612 } 01613 return true; 01614 } 01615 01616 01617 bool parseDateTime( const char* & scursor, const char * const send, 01618 Types::DateTime & result, bool isCRLF ) 01619 { 01620 // Parsing date-time; strict mode: 01621 // 01622 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday 01623 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date 01624 // time 01625 // 01626 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" 01627 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / 01628 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez" 01629 01630 struct tm maybeDateTime = { 01631 #ifdef HAVE_TM_GMTOFF 01632 0, 0, // initializers for members tm_gmtoff and tm_zone 01633 #endif 01634 0, 0, 0, 0, 0, 0, 0, 0, 0 01635 }; 01636 01637 eatCFWS( scursor, send, isCRLF ); 01638 if ( scursor == send ) return false; 01639 01640 // 01641 // let's see if there's a day-of-week: 01642 // 01643 if ( parseDayName( scursor, send ) ) { 01644 eatCFWS( scursor, send, isCRLF ); 01645 if ( scursor == send ) return false; 01646 // day-name should be followed by ',' but we treat it as optional: 01647 if ( *scursor == ',' ) { 01648 scursor++; // eat ',' 01649 eatCFWS( scursor, send, isCRLF ); 01650 } 01651 } 01652 01653 // 01654 // 1*2DIGIT representing "day" (of month): 01655 // 01656 int maybeDay; 01657 if ( !parseDigits( scursor, send, maybeDay ) ) return false; 01658 01659 eatCFWS( scursor, send, isCRLF ); 01660 if ( scursor == send ) return false; 01661 01662 // success: store maybeDay in maybeDateTime: 01663 maybeDateTime.tm_mday = maybeDay; 01664 01665 // 01666 // month-name: 01667 // 01668 int maybeMonth = 0; 01669 if ( !parseMonthName( scursor, send, maybeMonth ) ) return false; 01670 if ( scursor == send ) return false; 01671 assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); 01672 01673 eatCFWS( scursor, send, isCRLF ); 01674 if ( scursor == send ) return false; 01675 01676 // success: store maybeMonth in maybeDateTime: 01677 maybeDateTime.tm_mon = maybeMonth; 01678 01679 // 01680 // 2*DIGIT representing "year": 01681 // 01682 int maybeYear; 01683 if ( !parseDigits( scursor, send, maybeYear ) ) return false; 01684 // RFC 2822 4.3 processing: 01685 if ( maybeYear < 50 ) 01686 maybeYear += 2000; 01687 else if ( maybeYear < 1000 ) 01688 maybeYear += 1900; 01689 // else keep as is 01690 if ( maybeYear < 1900 ) return false; // rfc2822, 3.3 01691 01692 eatCFWS( scursor, send, isCRLF ); 01693 if ( scursor == send ) return false; 01694 01695 // success: store maybeYear in maybeDateTime: 01696 maybeDateTime.tm_year = maybeYear - 1900; 01697 01698 // 01699 // time 01700 // 01701 int maybeHour, maybeMinute, maybeSecond; 01702 long int secsEastOfGMT; 01703 bool timeZoneKnown = true; 01704 01705 if ( !parseTime( scursor, send, 01706 maybeHour, maybeMinute, maybeSecond, 01707 secsEastOfGMT, timeZoneKnown, isCRLF ) ) 01708 return false; 01709 01710 // success: store everything in maybeDateTime: 01711 maybeDateTime.tm_hour = maybeHour; 01712 maybeDateTime.tm_min = maybeMinute; 01713 maybeDateTime.tm_sec = maybeSecond; 01714 maybeDateTime.tm_isdst = DateFormatter::isDaylight(); 01715 // now put everything together and check if mktime(3) likes it: 01716 result.time = mktime( &maybeDateTime ); 01717 if ( result.time == (time_t)(-1) ) return false; 01718 01719 // adjust to UTC/GMT: 01720 //result.time -= secsEastOfGMT; 01721 result.secsEastOfGMT = secsEastOfGMT; 01722 result.timeZoneKnown = timeZoneKnown; 01723 01724 return true; 01725 } 01726 01727 #if 0 01728 bool tryToMakeAnySenseOfDateString( const char* & scursor, 01729 const char * const send, 01730 time_t & result, bool isCRLF ) 01731 { 01732 return false; 01733 } 01734 #endif 01735 01736 } // namespace HeaderParsing 01737 01738 } // namespace KMime
KDE Logo
This file is part of the documentation for libkdenetwork Library Version 3.3.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Thu Oct 21 19:46:16 2004 by doxygen 1.3.7 written by Dimitri van Heesch, © 1997-2003