00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "loader.h"
00012 #include "document.h"
00013
00014 #include <kio/job.h>
00015 #include <kprocess.h>
00016 #include <kstaticdeleter.h>
00017 #include <kurl.h>
00018 #include <kdebug.h>
00019
00020 #include <qdom.h>
00021 #include <qbuffer.h>
00022 #include <qregexp.h>
00023 #include <qstring.h>
00024 #include <qstringlist.h>
00025 #include <qtimer.h>
00026
00027 using namespace RSS;
00028
00029 DataRetriever::DataRetriever()
00030 {
00031 }
00032
00033 DataRetriever::~DataRetriever()
00034 {
00035 }
00036
00037 class FileRetriever::Private
00038 {
00039 public:
00040
00041 Private()
00042 : buffer(NULL),
00043 lastError(0), job(NULL)
00044 {
00045 }
00046
00047 ~Private()
00048 {
00049 delete buffer;
00050 }
00051
00052 QBuffer *buffer;
00053 int lastError;
00054 KIO::Job *job;
00055 static KStaticDeleter<QString> userAgentsd;
00056 static QString* userAgent;
00057 };
00058
00059 KStaticDeleter<QString> FileRetriever::Private::userAgentsd;
00060 QString* FileRetriever::Private::userAgent = 0L;
00061 FileRetriever::FileRetriever()
00062 : d(new Private)
00063 {
00064 }
00065
00066 FileRetriever::~FileRetriever()
00067 {
00068 delete d;
00069 }
00070
00071 bool FileRetriever::m_useCache = true;
00072
00073 QString FileRetriever::userAgent()
00074 {
00075 if (Private::userAgent == 0L)
00076 FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString);
00077 return *Private::userAgent;
00078 }
00079
00080 void FileRetriever::setUserAgent(const QString &ua)
00081 {
00082 if (Private::userAgent == 0L)
00083 FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString);
00084 (*Private::userAgent) = ua;
00085 }
00086
00087 void FileRetriever::setUseCache(bool enabled)
00088 {
00089 m_useCache = enabled;
00090 }
00091
00092 void FileRetriever::retrieveData(const KURL &url)
00093 {
00094 if (d->buffer)
00095 return;
00096
00097 d->buffer = new QBuffer;
00098 d->buffer->open(IO_WriteOnly);
00099
00100 KURL u=url;
00101
00102 if (u.protocol()=="feed")
00103 u.setProtocol("http");
00104
00105 d->job = KIO::get(u, false, false);
00106 d->job->addMetaData("cache", m_useCache ? "refresh" : "reload");
00107
00108 QString ua = userAgent();
00109 if (!ua.isEmpty())
00110 d->job->addMetaData("UserAgent", ua);
00111
00112
00113 QTimer::singleShot(1000*90, this, SLOT(slotTimeout()));
00114
00115 connect(d->job, SIGNAL(data(KIO::Job *, const QByteArray &)),
00116 SLOT(slotData(KIO::Job *, const QByteArray &)));
00117 connect(d->job, SIGNAL(result(KIO::Job *)), SLOT(slotResult(KIO::Job *)));
00118 connect(d->job, SIGNAL(permanentRedirection(KIO::Job *, const KURL &, const KURL &)),
00119 SLOT(slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &)));
00120 }
00121
00122 void FileRetriever::slotTimeout()
00123 {
00124 abort();
00125
00126 delete d->buffer;
00127 d->buffer = NULL;
00128
00129 d->lastError = KIO::ERR_SERVER_TIMEOUT;
00130
00131 emit dataRetrieved(QByteArray(), false);
00132 }
00133
00134 int FileRetriever::errorCode() const
00135 {
00136 return d->lastError;
00137 }
00138
00139 void FileRetriever::slotData(KIO::Job *, const QByteArray &data)
00140 {
00141 d->buffer->writeBlock(data.data(), data.size());
00142 }
00143
00144 void FileRetriever::slotResult(KIO::Job *job)
00145 {
00146 QByteArray data = d->buffer->buffer();
00147 data.detach();
00148
00149 delete d->buffer;
00150 d->buffer = NULL;
00151
00152 d->lastError = job->error();
00153 emit dataRetrieved(data, d->lastError == 0);
00154 }
00155
00156 void FileRetriever::slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &newUrl)
00157 {
00158 emit permanentRedirection(newUrl);
00159 }
00160
00161 void FileRetriever::abort()
00162 {
00163 if (d->job)
00164 {
00165 d->job->kill(true);
00166 d->job = NULL;
00167 }
00168 }
00169
00170 struct OutputRetriever::Private
00171 {
00172 Private() : process(NULL),
00173 buffer(NULL),
00174 lastError(0)
00175 {
00176 }
00177
00178 ~Private()
00179 {
00180 delete process;
00181 delete buffer;
00182 }
00183
00184 KShellProcess *process;
00185 QBuffer *buffer;
00186 int lastError;
00187 };
00188
00189 OutputRetriever::OutputRetriever() :
00190 d(new Private)
00191 {
00192 }
00193
00194 OutputRetriever::~OutputRetriever()
00195 {
00196 delete d;
00197 }
00198
00199 void OutputRetriever::retrieveData(const KURL &url)
00200 {
00201
00202 if (d->buffer || d->process)
00203 return;
00204
00205 d->buffer = new QBuffer;
00206 d->buffer->open(IO_WriteOnly);
00207
00208 d->process = new KShellProcess();
00209 connect(d->process, SIGNAL(processExited(KProcess *)),
00210 SLOT(slotExited(KProcess *)));
00211 connect(d->process, SIGNAL(receivedStdout(KProcess *, char *, int)),
00212 SLOT(slotOutput(KProcess *, char *, int)));
00213 *d->process << url.path();
00214 d->process->start(KProcess::NotifyOnExit, KProcess::Stdout);
00215 }
00216
00217 int OutputRetriever::errorCode() const
00218 {
00219 return d->lastError;
00220 }
00221
00222 void OutputRetriever::slotOutput(KProcess *, char *data, int length)
00223 {
00224 d->buffer->writeBlock(data, length);
00225 }
00226
00227 void OutputRetriever::slotExited(KProcess *p)
00228 {
00229 if (!p->normalExit())
00230 d->lastError = p->exitStatus();
00231
00232 QByteArray data = d->buffer->buffer();
00233 data.detach();
00234
00235 delete d->buffer;
00236 d->buffer = NULL;
00237
00238 delete d->process;
00239 d->process = NULL;
00240
00241 emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
00242 }
00243
00244 struct Loader::Private
00245 {
00246 Private() : retriever(NULL),
00247 lastError(0)
00248 {
00249 }
00250
00251 ~Private()
00252 {
00253 delete retriever;
00254 }
00255
00256 DataRetriever *retriever;
00257 int lastError;
00258 KURL discoveredFeedURL;
00259 KURL url;
00260 };
00261
00262 Loader *Loader::create()
00263 {
00264 return new Loader;
00265 }
00266
00267 Loader *Loader::create(QObject *object, const char *slot)
00268 {
00269 Loader *loader = create();
00270 connect(loader, SIGNAL(loadingComplete(Loader *, Document, Status)),
00271 object, slot);
00272 return loader;
00273 }
00274
00275 Loader::Loader() : d(new Private)
00276 {
00277 }
00278
00279 Loader::~Loader()
00280 {
00281 delete d;
00282 }
00283
00284 void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
00285 {
00286 if (d->retriever != NULL)
00287 return;
00288
00289 d->url=url;
00290 d->retriever = retriever;
00291
00292 connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray &, bool)),
00293 this, SLOT(slotRetrieverDone(const QByteArray &, bool)));
00294
00295 d->retriever->retrieveData(url);
00296 }
00297
00298 int Loader::errorCode() const
00299 {
00300 return d->lastError;
00301 }
00302
00303 void Loader::abort()
00304 {
00305 if (d && d->retriever)
00306 {
00307 d->retriever->abort();
00308 delete d->retriever;
00309 d->retriever=NULL;
00310 }
00311 emit loadingComplete(this, QDomDocument(), Aborted);
00312 delete this;
00313 }
00314
00315 const KURL &Loader::discoveredFeedURL() const
00316 {
00317 return d->discoveredFeedURL;
00318 }
00319
00320 void Loader::slotRetrieverDone(const QByteArray &data, bool success)
00321 {
00322 d->lastError = d->retriever->errorCode();
00323
00324 delete d->retriever;
00325 d->retriever = NULL;
00326
00327 Document rssDoc;
00328 Status status = Success;
00329
00330 if (success) {
00331 QDomDocument doc;
00332
00333
00334
00335
00336
00337
00338 const char *charData = data.data();
00339 int len = data.count();
00340
00341 while (len && QChar(*charData).isSpace()) {
00342 --len;
00343 ++charData;
00344 }
00345
00346 if ( len > 3 && QChar(*charData) == QChar(0357) ) {
00347 len -= 3;
00348 charData += 3;
00349 }
00350 QByteArray tmpData;
00351 tmpData.setRawData(charData, len);
00352
00353 if (doc.setContent(tmpData))
00354 {
00355 rssDoc = Document(doc);
00356 if (!rssDoc.isValid())
00357 {
00358 discoverFeeds(tmpData);
00359 status = ParseError;
00360 }
00361 }
00362 else
00363 {
00364 discoverFeeds(tmpData);
00365 status = ParseError;
00366 }
00367
00368 tmpData.resetRawData(charData, len);
00369 } else
00370 status = RetrieveError;
00371
00372 emit loadingComplete(this, rssDoc, status);
00373
00374 delete this;
00375 }
00376
00377 void Loader::discoverFeeds(const QByteArray &data)
00378 {
00379 QString str = QString(data).simplifyWhiteSpace();
00380 QString s2;
00381
00382
00383
00384
00385
00386
00387 QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", false);
00388 if (rx.search(str)!=-1)
00389 s2=rx.cap(1);
00390 else{
00391
00392 int pos=0;
00393 QStringList feeds;
00394 QString host=d->url.host();
00395 rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)");
00396 while ( pos >= 0 ) {
00397 pos = rx.search( str, pos );
00398 s2=rx.cap(1);
00399 if (s2.endsWith(".rdf") || s2.endsWith(".rss") || s2.endsWith(".xml"))
00400 feeds.append(s2);
00401 if ( pos >= 0 ) {
00402 pos += rx.matchedLength();
00403 }
00404 }
00405
00406 s2=feeds.first();
00407 KURL testURL;
00408
00409 QStringList::Iterator end( feeds.end() );
00410 for ( QStringList::Iterator it = feeds.begin(); it != end; ++it ) {
00411 testURL=*it;
00412 if (testURL.host()==host)
00413 {
00414 s2=*it;
00415 break;
00416 }
00417 }
00418 }
00419
00420 if (s2.isNull()) {
00421
00422 return;
00423 }
00424
00425 if (KURL::isRelativeURL(s2))
00426 {
00427 if (s2.startsWith("//"))
00428 {
00429 s2=s2.prepend(d->url.protocol()+":");
00430 d->discoveredFeedURL=s2;
00431 }
00432 else if (s2.startsWith("/"))
00433 {
00434 d->discoveredFeedURL=d->url;
00435 d->discoveredFeedURL.setPath(s2);
00436 }
00437 else
00438 {
00439 d->discoveredFeedURL=d->url;
00440 d->discoveredFeedURL.addPath(s2);
00441 }
00442 d->discoveredFeedURL.cleanPath();
00443 }
00444 else
00445 d->discoveredFeedURL=s2;
00446
00447 d->discoveredFeedURL.cleanPath();
00448 }
00449
00450 #include "loader.moc"
00451