Syndication Library
loader.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "loader.h"
00012 #include "dataretriever.h"
00013 #include "documentsource.h"
00014 #include "feed.h"
00015 #include "global.h"
00016 #include "parsercollection.h"
00017
00018 #include <kio/global.h>
00019 #include <kurl.h>
00020
00021 #include <QtCore/QBuffer>
00022 #include <QtCore/QRegExp>
00023 #include <QtCore/QStringList>
00024
00025 #include <boost/shared_ptr.hpp>
00026
00027
00028 #include <iostream>
00029
00030 namespace Syndication {
00031
00032 struct Loader::LoaderPrivate
00033 {
00034 LoaderPrivate() : retriever(0), lastError(Success),
00035 retrieverError(0)
00036 {
00037 }
00038
00039 ~LoaderPrivate()
00040 {
00041 delete retriever;
00042 }
00043
00044 DataRetriever* retriever;
00045 Syndication::ErrorCode lastError;
00046 int retrieverError;
00047 KUrl discoveredFeedURL;
00048 KUrl url;
00049 };
00050
00051 Loader* Loader::create()
00052 {
00053 return new Loader;
00054 }
00055
00056 Loader *Loader::create(QObject* object, const char* slot)
00057 {
00058 Loader *loader = create();
00059 connect(loader, SIGNAL(loadingComplete(Syndication::Loader*,
00060 Syndication::FeedPtr, Syndication::ErrorCode)),
00061 object, slot);
00062 return loader;
00063 }
00064
00065 Loader::Loader() : d(new LoaderPrivate)
00066 {
00067 }
00068
00069 Loader::~Loader()
00070 {
00071 delete d;
00072 }
00073
00074 void Loader::loadFrom(const KUrl& url)
00075 {
00076 loadFrom(url, new FileRetriever);
00077 }
00078
00079 void Loader::loadFrom(const KUrl &url, DataRetriever *retriever)
00080 {
00081 if (d->retriever != 0L)
00082 return;
00083
00084 d->url = url;
00085 d->retriever = retriever;
00086
00087 connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray&, bool)),
00088 this, SLOT(slotRetrieverDone(const QByteArray&, bool)));
00089
00090 d->retriever->retrieveData(url);
00091 }
00092
00093 int Loader::retrieverError() const
00094 {
00095 return d->retrieverError;
00096 }
00097
00098 Syndication::ErrorCode Loader::errorCode() const
00099 {
00100 return d->lastError;
00101 }
00102
00103 void Loader::abort()
00104 {
00105 if (d && d->retriever)
00106 {
00107 d->retriever->abort();
00108 delete d->retriever;
00109 d->retriever = 0L;
00110 }
00111
00112 emit loadingComplete(this, FeedPtr(), Aborted);
00113 delete this;
00114 }
00115
00116 KUrl Loader::discoveredFeedURL() const
00117 {
00118 return d->discoveredFeedURL;
00119 }
00120
00121 void Loader::slotRetrieverDone(const QByteArray& data, bool success)
00122 {
00123 d->retrieverError = d->retriever->errorCode();
00124 ErrorCode status = Success;
00125 FeedPtr feed;
00126 bool isFileRetriever = dynamic_cast<FileRetriever*>(d->retriever) != 0;
00127 delete d->retriever;
00128 d->retriever = 0;
00129
00130 if (success)
00131 {
00132 DocumentSource src(data, d->url.url());
00133 feed = parserCollection()->parse(src);
00134
00135 if (parserCollection()->lastError() != Syndication::Success)
00136 {
00137 status = parserCollection()->lastError();
00138 discoverFeeds(data);
00139 }
00140 }
00141 else
00142 {
00143 if (isFileRetriever)
00144 {
00145
00146
00147 status = FileNotFound;
00148 std::cout << "file retriever error: " << d->retrieverError << std::endl;
00149 }
00150 else
00151 {
00152
00153 status = OtherRetrieverError;
00154 }
00155 }
00156
00157 emit loadingComplete(this, feed, status);
00158
00159 delete this;
00160 }
00161
00162 void Loader::discoverFeeds(const QByteArray &data)
00163 {
00164 QString str = QString(data).simplified();
00165 QString s2;
00166
00167
00168
00169
00170
00171
00172 QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", Qt::CaseInsensitive );
00173 if (rx.indexIn(str)!=-1)
00174 s2=rx.cap(1);
00175 else{
00176
00177 int pos=0;
00178 QStringList feeds;
00179 QString host=d->url.host();
00180 rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)");
00181 while ( pos >= 0 ) {
00182 pos = rx.indexIn( str, pos );
00183 s2=rx.cap(1);
00184 if (s2.endsWith(".rdf") || s2.endsWith(".rss") || s2.endsWith(".xml"))
00185 feeds.append(s2);
00186 if ( pos >= 0 ) {
00187 pos += rx.matchedLength();
00188 }
00189 }
00190
00191 KUrl testURL;
00192
00193 QStringList::const_iterator end( feeds.constEnd() );
00194 for ( QStringList::const_iterator it = feeds.constBegin(); it != end; ++it ) {
00195 testURL=*it;
00196 if (testURL.host()==host)
00197 {
00198 s2=*it;
00199 break;
00200 }
00201 }
00202 }
00203
00204 if (s2.isNull())
00205 {
00206 return;
00207 }
00208
00209 if (KUrl::isRelativeUrl(s2))
00210 {
00211 if (s2.startsWith("//"))
00212 {
00213 s2=s2.prepend(d->url.protocol()+':');
00214 d->discoveredFeedURL=s2;
00215 }
00216 else if (s2.startsWith('/'))
00217 {
00218 d->discoveredFeedURL=d->url;
00219 d->discoveredFeedURL.setPath(s2);
00220 }
00221 else
00222 {
00223 d->discoveredFeedURL=d->url;
00224 d->discoveredFeedURL.addPath(s2);
00225 }
00226 d->discoveredFeedURL.cleanPath();
00227 }
00228 else
00229 d->discoveredFeedURL=s2;
00230
00231 d->discoveredFeedURL.cleanPath();
00232 }
00233
00234 }
00235
00236 #include "loader.moc"