qBittorrent
RSS::Private::Parser Class Reference

#include <rss_parser.h>

Inheritance diagram for RSS::Private::Parser:
Collaboration diagram for RSS::Private::Parser:

Signals

void finished (const RSS::Private::ParsingResult &result)
 

Public Member Functions

 Parser (QString lastBuildDate)
 
void parse (const QByteArray &feedData)
 

Private Member Functions

Q_INVOKABLE void parse_impl (const QByteArray &feedData)
 
void parseRssArticle (QXmlStreamReader &xml)
 
void parseRSSChannel (QXmlStreamReader &xml)
 
void parseAtomArticle (QXmlStreamReader &xml)
 
void parseAtomChannel (QXmlStreamReader &xml)
 
void addArticle (QVariantHash article)
 

Private Attributes

QString m_baseUrl
 
ParsingResult m_result
 
QSet< QString > m_articleIDs
 

Detailed Description

Definition at line 52 of file rss_parser.h.

Constructor & Destructor Documentation

◆ Parser()

Parser::Parser ( QString  lastBuildDate)
explicit

Definition at line 545 of file rss_parser.cpp.

546 {
547  m_result.lastBuildDate = lastBuildDate;
548 }
ParsingResult m_result
Definition: rss_parser.h:72

References RSS::Private::ParsingResult::lastBuildDate, and m_result.

Member Function Documentation

◆ addArticle()

void Parser::addArticle ( QVariantHash  article)
private

Definition at line 822 of file rss_parser.cpp.

823 {
824  QVariant &torrentURL = article[Article::KeyTorrentURL];
825  if (torrentURL.toString().isEmpty())
826  torrentURL = article.value(Article::KeyLink);
827 
828  // If item does not have an ID, fall back to some other identifier.
829  QVariant &localId = article[Article::KeyId];
830  if (localId.toString().isEmpty())
831  {
832  localId = article.value(Article::KeyTorrentURL);
833  if (localId.toString().isEmpty())
834  {
835  localId = article.value(Article::KeyTitle);
836  if (localId.toString().isEmpty())
837  {
838  // The article could not be uniquely identified
839  // since it has no appropriate data.
840  // Just ignore it.
841  return;
842  }
843  }
844  }
845 
846  if (m_articleIDs.contains(localId.toString()))
847  {
848  // The article could not be uniquely identified
849  // since the Feed has duplicate identifiers.
850  // Just ignore it.
851  return;
852  }
853 
854  m_articleIDs.insert(localId.toString());
855  m_result.articles.prepend(article);
856 }
static const QString KeyLink
Definition: rss_article.h:59
static const QString KeyId
Definition: rss_article.h:53
static const QString KeyTitle
Definition: rss_article.h:55
static const QString KeyTorrentURL
Definition: rss_article.h:58
QSet< QString > m_articleIDs
Definition: rss_parser.h:73
QList< QVariantHash > articles
Definition: rss_parser.h:49

References RSS::Private::ParsingResult::articles, RSS::Article::KeyId, RSS::Article::KeyLink, RSS::Article::KeyTitle, RSS::Article::KeyTorrentURL, m_articleIDs, and m_result.

Referenced by parseAtomArticle(), and parseRssArticle().

Here is the caller graph for this function:

◆ finished

void RSS::Private::Parser::finished ( const RSS::Private::ParsingResult result)
signal

Referenced by RSS::Feed::Feed(), and parse_impl().

Here is the caller graph for this function:

◆ parse()

void Parser::parse ( const QByteArray &  feedData)

Definition at line 550 of file rss_parser.cpp.

551 {
552  QMetaObject::invokeMethod(this, [this, feedData]() { parse_impl(feedData); }
553  , Qt::QueuedConnection);
554 }
Q_INVOKABLE void parse_impl(const QByteArray &feedData)
Definition: rss_parser.cpp:557

References parse_impl().

Referenced by RSS::Feed::handleDownloadFinished().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ parse_impl()

void Parser::parse_impl ( const QByteArray &  feedData)
private

Definition at line 557 of file rss_parser.cpp.

558 {
559  QXmlStreamReader xml(feedData);
560  XmlStreamEntityResolver resolver;
561  xml.setEntityResolver(&resolver);
562  bool foundChannel = false;
563 
564  while (xml.readNextStartElement())
565  {
566  if (xml.name() == QLatin1String("rss"))
567  {
568  // Find channels
569  while (xml.readNextStartElement())
570  {
571  if (xml.name() == QLatin1String("channel"))
572  {
573  parseRSSChannel(xml);
574  foundChannel = true;
575  break;
576  }
577 
578  qDebug() << "Skip rss item: " << xml.name();
579  xml.skipCurrentElement();
580  }
581  break;
582  }
583  if (xml.name() == QLatin1String("feed"))
584  { // Atom feed
585  parseAtomChannel(xml);
586  foundChannel = true;
587  break;
588  }
589 
590  qDebug() << "Skip root item: " << xml.name();
591  xml.skipCurrentElement();
592  }
593 
594  if (!foundChannel)
595  {
596  m_result.error = tr("Invalid RSS feed.");
597  }
598  else if (xml.hasError())
599  {
600  m_result.error = tr("%1 (line: %2, column: %3, offset: %4).")
601  .arg(xml.errorString()).arg(xml.lineNumber())
602  .arg(xml.columnNumber()).arg(xml.characterOffset());
603  }
604 
605  emit finished(m_result);
606  m_result.articles.clear(); // clear articles only
607  m_articleIDs.clear();
608 }
void parseRSSChannel(QXmlStreamReader &xml)
Definition: rss_parser.cpp:673
void parseAtomChannel(QXmlStreamReader &xml)
Definition: rss_parser.cpp:787
void finished(const RSS::Private::ParsingResult &result)

References RSS::Private::ParsingResult::articles, RSS::Private::ParsingResult::error, finished(), m_articleIDs, m_result, parseAtomChannel(), and parseRSSChannel().

Referenced by parse().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ parseAtomArticle()

void Parser::parseAtomArticle ( QXmlStreamReader &  xml)
private

Definition at line 706 of file rss_parser.cpp.

707 {
708  QVariantHash article;
709  bool doubleContent = false;
710 
711  while (!xml.atEnd())
712  {
713  xml.readNext();
714  const QString name(xml.name().toString());
715 
716  if (xml.isEndElement() && (name == QLatin1String("entry")))
717  break;
718 
719  if (xml.isStartElement())
720  {
721  if (name == QLatin1String("title"))
722  {
723  article[Article::KeyTitle] = xml.readElementText().trimmed();
724  }
725  else if (name == QLatin1String("link"))
726  {
727  const QString link = (xml.attributes().isEmpty()
728  ? xml.readElementText().trimmed()
729  : xml.attributes().value(QLatin1String("href")).toString());
730 
731  if (link.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
732  article[Article::KeyTorrentURL] = link; // magnet link instead of a news URL
733  else
734  // Atom feeds can have relative links, work around this and
735  // take the stress of figuring article full URI from UI
736  // Assemble full URI
737  article[Article::KeyLink] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link);
738 
739  }
740  else if ((name == QLatin1String("summary")) || (name == QLatin1String("content")))
741  {
742  if (doubleContent)
743  { // Duplicate content -> ignore
744  xml.skipCurrentElement();
745  continue;
746  }
747 
748  // Try to also parse broken articles, which don't use html '&' escapes
749  // Actually works great for non-broken content too
750  const QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed();
751  if (!feedText.isEmpty())
752  {
753  article[Article::KeyDescription] = feedText;
754  doubleContent = true;
755  }
756  }
757  else if (name == QLatin1String("updated"))
758  {
759  // ATOM uses standard compliant date, don't do fancy stuff
760  const QDateTime articleDate = QDateTime::fromString(xml.readElementText().trimmed(), Qt::ISODate);
761  article[Article::KeyDate] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime());
762  }
763  else if (name == QLatin1String("author"))
764  {
765  while (xml.readNextStartElement())
766  {
767  if (xml.name() == QLatin1String("name"))
768  article[Article::KeyAuthor] = xml.readElementText().trimmed();
769  else
770  xml.skipCurrentElement();
771  }
772  }
773  else if (name == QLatin1String("id"))
774  {
775  article[Article::KeyId] = xml.readElementText().trimmed();
776  }
777  else
778  {
779  article[name] = xml.readElementText(QXmlStreamReader::IncludeChildElements);
780  }
781  }
782  }
783 
784  addArticle(article);
785 }
static const QString KeyAuthor
Definition: rss_article.h:56
static const QString KeyDate
Definition: rss_article.h:54
static const QString KeyDescription
Definition: rss_article.h:57
void addArticle(QVariantHash article)
Definition: rss_parser.cpp:822

References addArticle(), RSS::Article::KeyAuthor, RSS::Article::KeyDate, RSS::Article::KeyDescription, RSS::Article::KeyId, RSS::Article::KeyLink, RSS::Article::KeyTitle, RSS::Article::KeyTorrentURL, and m_baseUrl.

Referenced by parseAtomChannel().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ parseAtomChannel()

void Parser::parseAtomChannel ( QXmlStreamReader &  xml)
private

Definition at line 787 of file rss_parser.cpp.

788 {
789  m_baseUrl = xml.attributes().value("xml:base").toString();
790 
791  while (!xml.atEnd())
792  {
793  xml.readNext();
794 
795  if (xml.isStartElement())
796  {
797  if (xml.name() == QLatin1String("title"))
798  {
799  m_result.title = xml.readElementText();
800  }
801  else if (xml.name() == QLatin1String("updated"))
802  {
803  const QString lastBuildDate = xml.readElementText();
804  if (!lastBuildDate.isEmpty())
805  {
806  if (m_result.lastBuildDate == lastBuildDate)
807  {
808  qDebug() << "The RSS feed has not changed since last time, aborting parsing.";
809  return;
810  }
811  m_result.lastBuildDate = lastBuildDate;
812  }
813  }
814  else if (xml.name() == QLatin1String("entry"))
815  {
816  parseAtomArticle(xml);
817  }
818  }
819  }
820 }
void parseAtomArticle(QXmlStreamReader &xml)
Definition: rss_parser.cpp:706

References RSS::Private::ParsingResult::lastBuildDate, m_baseUrl, m_result, parseAtomArticle(), and RSS::Private::ParsingResult::title.

Referenced by parse_impl().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ parseRssArticle()

void Parser::parseRssArticle ( QXmlStreamReader &  xml)
private

Definition at line 610 of file rss_parser.cpp.

611 {
612  QVariantHash article;
613  QString altTorrentUrl;
614 
615  while (!xml.atEnd())
616  {
617  xml.readNext();
618  const QString name(xml.name().toString());
619 
620  if (xml.isEndElement() && (name == QLatin1String("item")))
621  break;
622 
623  if (xml.isStartElement())
624  {
625  if (name == QLatin1String("title"))
626  {
627  article[Article::KeyTitle] = xml.readElementText().trimmed();
628  }
629  else if (name == QLatin1String("enclosure"))
630  {
631  if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent"))
632  article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString();
633  else if (xml.attributes().value("type").isEmpty())
634  altTorrentUrl = xml.attributes().value(QLatin1String("url")).toString();
635  }
636  else if (name == QLatin1String("link"))
637  {
638  const QString text {xml.readElementText().trimmed()};
639  if (text.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
640  article[Article::KeyTorrentURL] = text; // magnet link instead of a news URL
641  else
642  article[Article::KeyLink] = text;
643  }
644  else if (name == QLatin1String("description"))
645  {
646  article[Article::KeyDescription] = xml.readElementText(QXmlStreamReader::IncludeChildElements);
647  }
648  else if (name == QLatin1String("pubDate"))
649  {
650  article[Article::KeyDate] = parseDate(xml.readElementText().trimmed());
651  }
652  else if (name == QLatin1String("author"))
653  {
654  article[Article::KeyAuthor] = xml.readElementText().trimmed();
655  }
656  else if (name == QLatin1String("guid"))
657  {
658  article[Article::KeyId] = xml.readElementText().trimmed();
659  }
660  else
661  {
662  article[name] = xml.readElementText(QXmlStreamReader::IncludeChildElements);
663  }
664  }
665  }
666 
667  if (article[Article::KeyTorrentURL].toString().isEmpty())
668  article[Article::KeyTorrentURL] = altTorrentUrl;
669 
670  addArticle(article);
671 }
QDateTime parseDate(const QString &string)
Definition: rss_parser.cpp:361
QString toString(const lt::socket_type_t socketType)
Definition: session.cpp:183

References addArticle(), RSS::Article::KeyAuthor, RSS::Article::KeyDate, RSS::Article::KeyDescription, RSS::Article::KeyId, RSS::Article::KeyLink, RSS::Article::KeyTitle, RSS::Article::KeyTorrentURL, anonymous_namespace{rss_parser.cpp}::parseDate(), and anonymous_namespace{session.cpp}::toString().

Referenced by parseRSSChannel().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ parseRSSChannel()

void Parser::parseRSSChannel ( QXmlStreamReader &  xml)
private

Definition at line 673 of file rss_parser.cpp.

674 {
675  while (!xml.atEnd())
676  {
677  xml.readNext();
678 
679  if (xml.isStartElement())
680  {
681  if (xml.name() == QLatin1String("title"))
682  {
683  m_result.title = xml.readElementText();
684  }
685  else if (xml.name() == QLatin1String("lastBuildDate"))
686  {
687  const QString lastBuildDate = xml.readElementText();
688  if (!lastBuildDate.isEmpty())
689  {
690  if (m_result.lastBuildDate == lastBuildDate)
691  {
692  qDebug() << "The RSS feed has not changed since last time, aborting parsing.";
693  return;
694  }
695  m_result.lastBuildDate = lastBuildDate;
696  }
697  }
698  else if (xml.name() == QLatin1String("item"))
699  {
700  parseRssArticle(xml);
701  }
702  }
703  }
704 }
void parseRssArticle(QXmlStreamReader &xml)
Definition: rss_parser.cpp:610

References RSS::Private::ParsingResult::lastBuildDate, m_result, parseRssArticle(), and RSS::Private::ParsingResult::title.

Referenced by parse_impl().

Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ m_articleIDs

QSet<QString> RSS::Private::Parser::m_articleIDs
private

Definition at line 73 of file rss_parser.h.

Referenced by addArticle(), and parse_impl().

◆ m_baseUrl

QString RSS::Private::Parser::m_baseUrl
private

Definition at line 71 of file rss_parser.h.

Referenced by parseAtomArticle(), and parseAtomChannel().

◆ m_result

ParsingResult RSS::Private::Parser::m_result
private

Definition at line 72 of file rss_parser.h.

Referenced by addArticle(), parse_impl(), parseAtomChannel(), Parser(), and parseRSSChannel().


The documentation for this class was generated from the following files: