00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
#include "kmime_charfreq.h"
00016
00017
namespace KMime {
00018
00019 CharFreq::CharFreq(
const QByteArray & buf )
00020 : NUL(0),
00021 CTL(0),
00022 CR(0), LF(0),
00023 CRLF(0),
00024 printable(0),
00025 eightBit(0),
00026 total(0),
00027 lineMin(0xffffffff),
00028 lineMax(0),
00029 mTrailingWS(false),
00030 mLeadingFrom(false)
00031 {
00032
if ( !buf.isEmpty() )
00033 count( buf.data(), buf.size() );
00034 }
00035
00036 CharFreq::CharFreq(
const char * buf, size_t len )
00037 : NUL(0),
00038 CTL(0),
00039 CR(0), LF(0),
00040 CRLF(0),
00041 printable(0),
00042 eightBit(0),
00043 total(0),
00044 lineMin(0xffffffff),
00045 lineMax(0),
00046 mTrailingWS(false),
00047 mLeadingFrom(false)
00048 {
00049
if ( buf && len > 0 )
00050 count( buf, len );
00051 }
00052
00053
static inline bool isWS(
char ch ) {
return ( ch ==
'\t' || ch ==
' ' ); }
00054
00055
void CharFreq::count(
const char * it, size_t len ) {
00056
00057
const char * end = it + len;
00058 uint currentLineLength = 0;
00059
00060
00061
char prevChar =
'\n';
00062
char prevPrevChar = 0;
00063
00064
for ( ; it != end ; ++it ) {
00065 ++currentLineLength;
00066
switch ( *it ) {
00067
case '\0': ++NUL;
break;
00068
case '\r': ++CR;
break;
00069
case '\n': ++LF;
00070
if ( prevChar ==
'\r' ) { --currentLineLength; ++CRLF; }
00071
if ( currentLineLength >= lineMax ) lineMax = currentLineLength-1;
00072
if ( currentLineLength <= lineMin ) lineMin = currentLineLength-1;
00073
if ( !mTrailingWS )
00074
if ( isWS( prevChar ) || ( prevChar ==
'\r' && isWS( prevPrevChar ) ) )
00075 mTrailingWS =
true;
00076 currentLineLength = 0;
00077
break;
00078
case 'F':
00079
if ( !mLeadingFrom )
00080
if ( prevChar ==
'\n' && end - it >= 5 && !qstrncmp(
"From ", it, 5 ) )
00081 mLeadingFrom =
true;
00082 ++printable;
00083
break;
00084
default:
00085 {
00086 uchar c = *it;
00087
if ( c ==
'\t' || c >=
' ' && c <=
'~' )
00088 ++printable;
00089
else if ( c == 127 || c <
' ' )
00090 ++CTL;
00091
else
00092 ++eightBit;
00093 }
00094 }
00095 prevPrevChar = prevChar;
00096 prevChar = *it;
00097 }
00098
00099
00100
if ( currentLineLength >= lineMax ) lineMax = currentLineLength;
00101
if ( currentLineLength <= lineMin ) lineMin = currentLineLength;
00102
00103
00104
if ( isWS( prevChar ) )
00105 mTrailingWS =
true;
00106
00107 total = len;
00108 }
00109
00110
bool CharFreq::isEightBitData()
const {
00111
return type() == EightBitData;
00112 }
00113
00114
bool CharFreq::isEightBitText()
const {
00115
return type() == EightBitText;
00116 }
00117
00118
bool CharFreq::isSevenBitData()
const {
00119
return type() == SevenBitData;
00120 }
00121
00122
bool CharFreq::isSevenBitText()
const {
00123
return type() == SevenBitText;
00124 }
00125
00126
bool CharFreq::hasTrailingWhitespace()
const {
00127
return mTrailingWS;
00128 }
00129
00130
bool CharFreq::hasLeadingFrom()
const {
00131
return mLeadingFrom;
00132 }
00133
00134 CharFreq::Type CharFreq::type()
const {
00135
#if 0
00136
qDebug(
"Total: %d; NUL: %d; CTL: %d;\n"
00137
"CR: %d; LF: %d; CRLF: %d;\n"
00138
"lineMin: %d; lineMax: %d;\n"
00139
"printable: %d; eightBit: %d;\n"
00140
"trailing whitespace: %s;\n"
00141
"leading 'From ': %s;\n",
00142 total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
00143 printable, eightBit,
00144 mTrailingWS ?
"yes" :
"no" , mLeadingFrom ?
"yes" :
"no" );
00145
#endif
00146
if ( NUL )
00147
return Binary;
00148
00149
00150
if ( eightBit ) {
00151
if ( lineMax > 988 )
return EightBitData;
00152
if ( CR != CRLF || controlCodesRatio() > 0.2 )
return EightBitData;
00153
return EightBitText;
00154 }
00155
00156
00157
if ( lineMax > 988 )
return SevenBitData;
00158
if ( CR != CRLF || controlCodesRatio() > 0.2 )
return SevenBitData;
00159
00160
00161
return SevenBitText;
00162 }
00163
00164
float CharFreq::printableRatio()
const {
00165
if ( total )
return float(printable) / float(total);
00166
else return 0;
00167 }
00168
00169
float CharFreq::controlCodesRatio()
const {
00170
if ( total )
return float(CTL) / float(total);
00171
else return 0;
00172 }
00173
00174 }
00175
00176