akregator/src/librss

loader.cpp

00001 /*
00002  * loader.cpp
00003  *
00004  * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
00005  *
00006  * This program is distributed in the hope that it will be useful, but WITHOUT
00007  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00008  * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
00009  * accompanying file 'COPYING'.
00010  */
00011 #include "loader.h"
00012 #include "document.h"
00013 
00014 #include <kio/job.h>
00015 #include <kprocess.h>
00016 #include <kstaticdeleter.h>
00017 #include <kurl.h>
00018 #include <kdebug.h>
00019 
00020 #include <qdom.h>
00021 #include <qbuffer.h>
00022 #include <qregexp.h>
00023 #include <qstring.h>
00024 #include <qstringlist.h>
00025 #include <qtimer.h>
00026 
00027 using namespace RSS;
00028 
00029 DataRetriever::DataRetriever()
00030 {
00031 }
00032 
00033 DataRetriever::~DataRetriever()
00034 {
00035 }
00036 
00037 class FileRetriever::Private
00038 {
00039     public:
00040         
00041    Private()
00042       : buffer(NULL),
00043         lastError(0), job(NULL)
00044    {
00045    }
00046 
00047    ~Private()
00048    {
00049       delete buffer;
00050    }
00051 
00052    QBuffer *buffer;
00053    int lastError;
00054    KIO::Job *job;
00055    static KStaticDeleter<QString> userAgentsd;
00056    static QString* userAgent;
00057 };
00058 
00059 KStaticDeleter<QString> FileRetriever::Private::userAgentsd;
00060 QString* FileRetriever::Private::userAgent = 0L;
00061 FileRetriever::FileRetriever()
00062    : d(new Private)
00063 {
00064 }
00065 
00066 FileRetriever::~FileRetriever()
00067 {
00068    delete d;
00069 }
00070 
00071 bool FileRetriever::m_useCache = true;
00072 
00073 QString FileRetriever::userAgent()
00074 {
00075     if (Private::userAgent == 0L)
00076         FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString);
00077     return *Private::userAgent;
00078 }
00079 
00080 void FileRetriever::setUserAgent(const QString &ua)
00081 {
00082     if (Private::userAgent == 0L)
00083         FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString);
00084     (*Private::userAgent) = ua;
00085 }
00086 
00087 void FileRetriever::setUseCache(bool enabled)
00088 {
00089     m_useCache = enabled;
00090 }
00091 
00092 void FileRetriever::retrieveData(const KURL &url)
00093 {
00094    if (d->buffer)
00095       return;
00096 
00097    d->buffer = new QBuffer;
00098    d->buffer->open(IO_WriteOnly);
00099 
00100    KURL u=url;
00101 
00102    if (u.protocol()=="feed")
00103        u.setProtocol("http");
00104 
00105    d->job = KIO::get(u, false, false);
00106    d->job->addMetaData("cache", m_useCache ? "refresh" : "reload");
00107 
00108    QString ua = userAgent();
00109    if (!ua.isEmpty())
00110       d->job->addMetaData("UserAgent", ua);
00111 
00112 
00113    QTimer::singleShot(1000*90, this, SLOT(slotTimeout()));
00114 
00115    connect(d->job, SIGNAL(data(KIO::Job *, const QByteArray &)),
00116                 SLOT(slotData(KIO::Job *, const QByteArray &)));
00117    connect(d->job, SIGNAL(result(KIO::Job *)), SLOT(slotResult(KIO::Job *)));
00118    connect(d->job, SIGNAL(permanentRedirection(KIO::Job *, const KURL &, const KURL &)),
00119                 SLOT(slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &)));
00120 }
00121 
00122 void FileRetriever::slotTimeout()
00123 {
00124     abort();
00125 
00126     delete d->buffer;
00127     d->buffer = NULL;
00128 
00129     d->lastError = KIO::ERR_SERVER_TIMEOUT;
00130 
00131     emit dataRetrieved(QByteArray(), false);
00132 }
00133 
00134 int FileRetriever::errorCode() const
00135 {
00136    return d->lastError;
00137 }
00138 
00139 void FileRetriever::slotData(KIO::Job *, const QByteArray &data)
00140 {
00141    d->buffer->writeBlock(data.data(), data.size());
00142 }
00143 
00144 void FileRetriever::slotResult(KIO::Job *job)
00145 {
00146    QByteArray data = d->buffer->buffer();
00147    data.detach();
00148 
00149    delete d->buffer;
00150    d->buffer = NULL;
00151 
00152    d->lastError = job->error();
00153    emit dataRetrieved(data, d->lastError == 0);
00154 }
00155 
00156 void FileRetriever::slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &newUrl)
00157 {
00158    emit permanentRedirection(newUrl);
00159 }
00160 
00161 void FileRetriever::abort()
00162 {
00163     if (d->job)
00164     {
00165         d->job->kill(true);
00166         d->job = NULL;
00167     }
00168 }
00169 
00170 struct OutputRetriever::Private
00171 {
00172    Private() : process(NULL),
00173       buffer(NULL),
00174       lastError(0)
00175    {
00176    }
00177 
00178    ~Private()
00179    {
00180       delete process;
00181       delete buffer;
00182    }
00183 
00184    KShellProcess *process;
00185    QBuffer *buffer;
00186    int lastError;
00187 };
00188 
00189 OutputRetriever::OutputRetriever() :
00190    d(new Private)
00191 {
00192 }
00193 
00194 OutputRetriever::~OutputRetriever()
00195 {
00196    delete d;
00197 }
00198 
00199 void OutputRetriever::retrieveData(const KURL &url)
00200 {
00201    // Ignore subsequent calls if we didn't finish the previous job yet.
00202    if (d->buffer || d->process)
00203       return;
00204 
00205    d->buffer = new QBuffer;
00206    d->buffer->open(IO_WriteOnly);
00207 
00208    d->process = new KShellProcess();
00209    connect(d->process, SIGNAL(processExited(KProcess *)),
00210                        SLOT(slotExited(KProcess *)));
00211    connect(d->process, SIGNAL(receivedStdout(KProcess *, char *, int)),
00212                        SLOT(slotOutput(KProcess *, char *, int)));
00213    *d->process << url.path();
00214    d->process->start(KProcess::NotifyOnExit, KProcess::Stdout);
00215 }
00216 
00217 int OutputRetriever::errorCode() const
00218 {
00219    return d->lastError;
00220 }
00221 
00222 void OutputRetriever::slotOutput(KProcess *, char *data, int length)
00223 {
00224    d->buffer->writeBlock(data, length);
00225 }
00226 
00227 void OutputRetriever::slotExited(KProcess *p)
00228 {
00229    if (!p->normalExit())
00230       d->lastError = p->exitStatus();
00231 
00232    QByteArray data = d->buffer->buffer();
00233    data.detach();
00234 
00235    delete d->buffer;
00236    d->buffer = NULL;
00237 
00238    delete d->process;
00239    d->process = NULL;
00240 
00241    emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
00242 }
00243 
00244 struct Loader::Private
00245 {
00246    Private() : retriever(NULL),
00247       lastError(0)
00248    {
00249    }
00250 
00251    ~Private()
00252    {
00253       delete retriever;
00254    }
00255 
00256    DataRetriever *retriever;
00257    int lastError;
00258    KURL discoveredFeedURL;
00259    KURL url;
00260 };
00261 
00262 Loader *Loader::create()
00263 {
00264    return new Loader;
00265 }
00266 
00267 Loader *Loader::create(QObject *object, const char *slot)
00268 {
00269    Loader *loader = create();
00270    connect(loader, SIGNAL(loadingComplete(Loader *, Document, Status)),
00271            object, slot);
00272    return loader;
00273 }
00274 
00275 Loader::Loader() : d(new Private)
00276 {
00277 }
00278 
00279 Loader::~Loader()
00280 {
00281     delete d;
00282 }
00283 
00284 void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
00285 {
00286    if (d->retriever != NULL)
00287       return;
00288 
00289    d->url=url;
00290    d->retriever = retriever;
00291 
00292    connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray &, bool)),
00293            this, SLOT(slotRetrieverDone(const QByteArray &, bool)));
00294 
00295    d->retriever->retrieveData(url);
00296 }
00297 
00298 int Loader::errorCode() const
00299 {
00300    return d->lastError;
00301 }
00302 
00303 void Loader::abort()
00304 {
00305     if (d && d->retriever)
00306     {
00307         d->retriever->abort();
00308         delete d->retriever;
00309         d->retriever=NULL;
00310     }
00311     emit loadingComplete(this, QDomDocument(), Aborted);
00312     delete this;
00313 }
00314 
00315 const KURL &Loader::discoveredFeedURL() const
00316 {
00317    return d->discoveredFeedURL;
00318 }
00319 
00320 void Loader::slotRetrieverDone(const QByteArray &data, bool success)
00321 {
00322    d->lastError = d->retriever->errorCode();
00323 
00324    delete d->retriever;
00325    d->retriever = NULL;
00326 
00327    Document rssDoc;
00328    Status status = Success;
00329 
00330    if (success) {
00331       QDomDocument doc;
00332 
00333       /* Some servers insert whitespace before the <?xml...?> declaration.
00334        * QDom doesn't tolerate that (and it's right, that's invalid XML),
00335        * so we strip that.
00336        */
00337 
00338       const char *charData = data.data();
00339       int len = data.count();
00340 
00341       while (len && QChar(*charData).isSpace()) {
00342          --len;
00343          ++charData;
00344       }
00345 
00346       if ( len > 3 && QChar(*charData) == QChar(0357) ) { // 0357 0273 0277
00347               len -= 3;
00348               charData += 3;
00349       }
00350       QByteArray tmpData;
00351       tmpData.setRawData(charData, len);
00352 
00353       if (doc.setContent(tmpData))
00354       {
00355          rssDoc = Document(doc);
00356          if (!rssDoc.isValid())
00357          {
00358             discoverFeeds(tmpData);
00359             status = ParseError;
00360          }
00361       }
00362       else
00363       {
00364          discoverFeeds(tmpData);
00365          status = ParseError;
00366       }
00367 
00368       tmpData.resetRawData(charData, len);
00369    } else
00370       status = RetrieveError;
00371 
00372    emit loadingComplete(this, rssDoc, status);
00373 
00374    delete this;
00375 }
00376 
00377 void Loader::discoverFeeds(const QByteArray &data)
00378 {
00379     QString str = QString(data).simplifyWhiteSpace();
00380     QString s2;
00381     //QTextStream ts( &str, IO_WriteOnly );
00382     //ts << data.data();
00383 
00384     // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
00385     // "type[\\s]=[\\s]\\\"application/rss+xml\\\""
00386     // "href[\\s]=[\\s]\\\"application/rss+xml\\\""
00387     QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", false);
00388     if (rx.search(str)!=-1)
00389         s2=rx.cap(1);
00390     else{
00391     // does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
00392         int pos=0;
00393         QStringList feeds;
00394         QString host=d->url.host();
00395         rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)");
00396         while ( pos >= 0 ) {
00397             pos = rx.search( str, pos );
00398             s2=rx.cap(1);
00399             if (s2.endsWith(".rdf") || s2.endsWith(".rss") || s2.endsWith(".xml"))
00400                     feeds.append(s2);
00401             if ( pos >= 0 ) {
00402                 pos += rx.matchedLength();
00403             }
00404         }
00405 
00406         s2=feeds.first();
00407         KURL testURL;
00408         // loop through, prefer feeds on same host
00409         QStringList::Iterator end( feeds.end() );
00410         for ( QStringList::Iterator it = feeds.begin(); it != end; ++it ) {
00411             testURL=*it;
00412             if (testURL.host()==host)
00413             {
00414                 s2=*it;
00415                 break;
00416             }
00417         }
00418     }
00419 
00420     if (s2.isNull()) {
00421         //kdDebug() << "No feed found for a site" << endl;
00422         return;
00423     }
00424 
00425     if (KURL::isRelativeURL(s2))
00426     {
00427         if (s2.startsWith("//"))
00428         {
00429             s2=s2.prepend(d->url.protocol()+":");
00430             d->discoveredFeedURL=s2;
00431         }
00432         else if (s2.startsWith("/"))
00433         {
00434             d->discoveredFeedURL=d->url;
00435             d->discoveredFeedURL.setPath(s2);
00436         }
00437         else
00438         {
00439             d->discoveredFeedURL=d->url;
00440             d->discoveredFeedURL.addPath(s2);
00441         }
00442         d->discoveredFeedURL.cleanPath();
00443     }
00444     else
00445         d->discoveredFeedURL=s2;
00446 
00447     d->discoveredFeedURL.cleanPath();
00448 }
00449 
00450 #include "loader.moc"
00451 // vim:noet:ts=4
KDE Home | KDE Accessibility Home | Description of Access Keys