53#include <QWebEnginePage>
54#include <QWebEngineUrlRequestInfo>
57#include <QtCore/private/qurl_p.h>
58#include <QtNetwork/private/qtldurl_p.h>
60static QString getTopLevelDomain(
const QUrl &url)
65 const QString domainLower = url.host().toLower();
66 QVector<QStringView> sections = QStringView{domainLower}.split(QLatin1Char(
'.'), Qt::SkipEmptyParts);
67 if (sections.isEmpty())
71 for (
int j = sections.count() - 1; j >= 0; --j) {
72 level.prepend(QLatin1Char(
'.') + sections.at(j));
73 if (qIsEffectiveTLD(QStringView{level}.right(level.size() - 1)))
79 return QString(QString::fromUtf8(QUrl::toAce(tld)));
82static QString toSecondLevelDomain(
const QUrl &url)
84 const QString topLevelDomain = getTopLevelDomain(url);
85 const QString urlHost = url.host();
87 if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
91 QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
93 if (domain.count(
QL1C(
'.')) == 0) {
97 while (domain.count(
QL1C(
'.')) != 0) {
98 domain = domain.mid(domain.indexOf(
QL1C(
'.')) + 1);
101 return domain + topLevelDomain;
105 : m_subscription(subscription)
106 , m_type(StringContainsMatchRule)
107 , m_caseSensitivity(Qt::CaseInsensitive)
109 , m_isException(false)
110 , m_isInternalDisabled(false)
124 rule->m_subscription = m_subscription;
125 rule->m_type = m_type;
126 rule->m_options = m_options;
127 rule->m_exceptions = m_exceptions;
128 rule->m_filter = m_filter;
129 rule->m_matchString = m_matchString;
130 rule->m_caseSensitivity = m_caseSensitivity;
131 rule->m_isEnabled = m_isEnabled;
132 rule->m_isException = m_isException;
133 rule->m_isInternalDisabled = m_isInternalDisabled;
134 rule->m_allowedDomains = m_allowedDomains;
135 rule->m_blockedDomains = m_blockedDomains;
138 rule->m_regExp =
new RegExp;
139 rule->m_regExp->regExp = m_regExp->regExp;
140 rule->m_regExp->matchers = m_regExp->matchers;
148 return m_subscription;
169 return m_type == CssRule;
174 return m_matchString;
179 return m_type == ExtendedCssRule || m_type == SnippetRule || m_isInternalDisabled;
184 return hasOption(DocumentOption);
189 return hasOption(ElementHideOption);
194 return hasOption(GenericHideOption);
199 return hasOption(DomainRestrictedOption);
204 return m_isException;
209 return m_filter.startsWith(
QL1C(
'!'));
219 m_isEnabled = enabled;
224 return m_regExp !=
nullptr;
229 return m_isInternalDisabled;
234 if (!hasOption(DocumentOption) && !hasOption(ElementHideOption) && !hasOption(GenericHideOption) && !hasOption(GenericBlockOption)) {
238 const QString encodedUrl = QString::fromUtf8(url.toEncoded());
239 const QString domain = url.host();
246 if (m_type == CssRule || !m_isEnabled || m_isInternalDisabled) {
254 if (hasOption(DomainRestrictedOption) && !
matchDomain(request.firstPartyUrl().host())) {
264 if (((m_exceptions | m_options) & TypeOptions) && !
matchType(request))
277 if (!hasOption(DomainRestrictedOption)) {
281 if (m_blockedDomains.isEmpty()) {
282 for (
const QString &d : std::as_const(m_allowedDomains)) {
288 else if (m_allowedDomains.isEmpty()) {
289 for (
const QString &d : std::as_const(m_blockedDomains)) {
297 for (
const QString &d : std::as_const(m_blockedDomains)) {
303 for (
const QString &d : std::as_const(m_allowedDomains)) {
316 const QString firstPartyHost = toSecondLevelDomain(request.firstPartyUrl());
317 const QString host = toSecondLevelDomain(request.requestUrl());
319 bool match = firstPartyHost != host;
321 return hasException(ThirdPartyOption) ? !match : match;
327 switch (request.resourceType()) {
328 case QWebEngineUrlRequestInfo::ResourceTypeMainFrame:
329 type = DocumentOption;
331 case QWebEngineUrlRequestInfo::ResourceTypeSubFrame:
332 type = SubdocumentOption;
334 case QWebEngineUrlRequestInfo::ResourceTypeStylesheet:
335 type = StyleSheetOption;
337 case QWebEngineUrlRequestInfo::ResourceTypeScript:
340 case QWebEngineUrlRequestInfo::ResourceTypeImage:
343 case QWebEngineUrlRequestInfo::ResourceTypeFontResource:
346 case QWebEngineUrlRequestInfo::ResourceTypeObject:
349 case QWebEngineUrlRequestInfo::ResourceTypeMedia:
352 case QWebEngineUrlRequestInfo::ResourceTypeXhr:
353 type = XMLHttpRequestOption;
355 case QWebEngineUrlRequestInfo::ResourceTypePing:
358 case QWebEngineUrlRequestInfo::ResourceTypePluginResource:
359 type = ObjectSubrequestOption;
361 case QWebEngineUrlRequestInfo::ResourceTypeSubResource:
362 case QWebEngineUrlRequestInfo::ResourceTypeWorker:
363 case QWebEngineUrlRequestInfo::ResourceTypeSharedWorker:
364 case QWebEngineUrlRequestInfo::ResourceTypePrefetch:
365 case QWebEngineUrlRequestInfo::ResourceTypeFavicon:
366 case QWebEngineUrlRequestInfo::ResourceTypeServiceWorker:
367 case QWebEngineUrlRequestInfo::ResourceTypeCspReport:
368 case QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadMainFrame:
369 case QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadSubFrame:
370 case QWebEngineUrlRequestInfo::ResourceTypeUnknown:
376 return m_options.testFlag(type);
377 return !m_exceptions.testFlag(type);
380void AdBlockRule::parseFilter()
382 QString parsedLine = m_filter;
385 if (m_filter.trimmed().isEmpty() || m_filter.startsWith(
QL1C(
'!'))) {
389 m_isInternalDisabled =
true;
395 if (parsedLine.startsWith(
QL1S(
"@@"))) {
396 m_isException =
true;
397 parsedLine.remove(0, 2);
401 if (parsedLine.contains(
QL1S(
"#?#"))) {
402 m_type = ExtendedCssRule;
403 int pos = parsedLine.indexOf(
QL1C(
'#'));
404 if (!parsedLine.startsWith(
QL1S(
"#"))) {
405 QString domains = parsedLine.left(pos);
406 parseDomains(domains,
QL1C(
','));
408 m_matchString = parsedLine.mid(pos + 3);
414 if (parsedLine.contains(
QL1S(
"#$#"))) {
415 m_type = SnippetRule;
416 int pos = parsedLine.indexOf(
QL1C(
'#'));
417 if (!parsedLine.startsWith(
QL1S(
"#"))) {
418 QString domains = parsedLine.left(pos);
419 parseDomains(domains,
QL1C(
','));
421 m_matchString = parsedLine.mid(pos + 3);
426 if (parsedLine.contains(
QL1S(
"##")) || parsedLine.contains(
QL1S(
"#@#"))) {
428 int pos = parsedLine.indexOf(
QL1C(
'#'));
431 if (!parsedLine.startsWith(
QL1S(
"#"))) {
432 QString domains = parsedLine.left(pos);
433 parseDomains(domains,
QL1C(
','));
436 m_isException = parsedLine.at(pos + 1) ==
QL1C(
'@');
437 m_matchString = parsedLine.mid(m_isException ? pos + 3 : pos + 2);
444 int optionsIndex = parsedLine.indexOf(
QL1C(
'$'));
445 if (optionsIndex >= 0) {
446 const QStringList options = parsedLine.mid(optionsIndex + 1).split(
QL1C(
','), Qt::SkipEmptyParts);
448 int handledOptions = 0;
449 for (
const QString &option : options) {
450 if (option.startsWith(
QL1S(
"domain="))) {
451 parseDomains(option.mid(7),
QL1C(
'|'));
454 else if (option ==
QL1S(
"match-case")) {
455 m_caseSensitivity = Qt::CaseSensitive;
458 else if (option.endsWith(
QL1S(
"third-party"))) {
459 setOption(ThirdPartyOption);
460 setException(ThirdPartyOption, option.startsWith(
QL1C(
'~')));
463 else if (option.endsWith(
QL1S(
"object"))) {
464 setOption(ObjectOption);
465 setException(ObjectOption, option.startsWith(
QL1C(
'~')));
468 else if (option.endsWith(
QL1S(
"subdocument"))) {
469 setOption(SubdocumentOption);
470 setException(SubdocumentOption, option.startsWith(
QL1C(
'~')));
473 else if (option.endsWith(
QL1S(
"xmlhttprequest"))) {
474 setOption(XMLHttpRequestOption);
475 setException(XMLHttpRequestOption, option.startsWith(
QL1C(
'~')));
478 else if (option.endsWith(
QL1S(
"image"))) {
479 setOption(ImageOption);
480 setException(ImageOption, option.startsWith(
QL1C(
'~')));
483 else if (option.endsWith(
QL1S(
"script"))) {
484 setOption(ScriptOption);
485 setException(ScriptOption, option.startsWith(
QL1C(
'~')));
488 else if (option.endsWith(
QL1S(
"stylesheet"))) {
489 setOption(StyleSheetOption);
490 setException(StyleSheetOption, option.startsWith(
QL1C(
'~')));
493 else if (option.endsWith(
QL1S(
"object-subrequest"))) {
494 setOption(ObjectSubrequestOption);
495 setException(ObjectSubrequestOption, option.startsWith(
QL1C(
'~')));
498 else if (option.endsWith(
QL1S(
"ping"))) {
499 setOption(PingOption);
500 setException(PingOption, option.startsWith(
QL1C(
'~')));
503 else if (option.endsWith(
QL1S(
"media"))) {
504 setOption(MediaOption);
505 setException(MediaOption, option.startsWith(
QL1C(
'~')));
508 else if (option.endsWith(
QL1S(
"font"))) {
509 setOption(FontOption);
510 setException(FontOption, option.startsWith(
QL1C(
'~')));
513 else if (option.endsWith(
QL1S(
"other"))) {
514 setOption(OtherOption);
515 setException(OtherOption, option.startsWith(
QL1C(
'~')));
518 else if (option ==
QL1S(
"collapse")) {
522 else if (option ==
QL1S(
"popup")) {
524 setOption(PopupOption);
527 else if (option ==
QL1S(
"document") && m_isException) {
528 setOption(DocumentOption);
531 else if (option ==
QL1S(
"elemhide") && m_isException) {
532 setOption(ElementHideOption);
535 else if (option ==
QL1S(
"generichide") && m_isException) {
536 setOption(GenericHideOption);
539 else if (option ==
QL1S(
"genericblock") && m_isException) {
541 setOption(GenericBlockOption);
547 if (handledOptions != options.count()) {
548 m_isInternalDisabled =
true;
553 parsedLine.truncate(optionsIndex);
557 if (parsedLine.startsWith(
QL1C(
'/')) && parsedLine.endsWith(
QL1C(
'/'))) {
558 parsedLine.remove(0, 1);
559 parsedLine = parsedLine.left(parsedLine.size() - 1);
561 m_type = RegExpMatchRule;
562 m_regExp =
new RegExp;
563 m_regExp->regExp = QRegularExpression(parsedLine, QRegularExpression::InvertedGreedinessOption);
564 if (m_caseSensitivity == Qt::CaseInsensitive) {
565 m_regExp->regExp.setPatternOptions(m_regExp->regExp.patternOptions() | QRegularExpression::CaseInsensitiveOption);
572 if (parsedLine.startsWith(
QL1C(
'*'))) {
573 parsedLine.remove(0, 1);
576 if (parsedLine.endsWith(
QL1C(
'*'))) {
577 parsedLine = parsedLine.left(parsedLine.size() - 1);
581 if (filterIsOnlyDomain(parsedLine)) {
582 parsedLine.remove(0, 2);
583 parsedLine = parsedLine.left(parsedLine.size() - 1);
585 m_type = DomainMatchRule;
586 m_matchString = parsedLine;
591 if (filterIsOnlyEndsMatch(parsedLine)) {
592 parsedLine = parsedLine.left(parsedLine.size() - 1);
594 m_type = StringEndsMatchRule;
595 m_matchString = parsedLine;
601 if (parsedLine.contains(
QL1C(
'*')) ||
602 parsedLine.contains(
QL1C(
'^')) ||
603 parsedLine.contains(
QL1C(
'|'))
605 m_type = RegExpMatchRule;
606 m_regExp =
new RegExp;
607 m_regExp->regExp = QRegularExpression(createRegExpFromFilter(parsedLine), QRegularExpression::InvertedGreedinessOption);
608 if (m_caseSensitivity == Qt::CaseInsensitive) {
609 m_regExp->regExp.setPatternOptions(m_regExp->regExp.patternOptions() | QRegularExpression::CaseInsensitiveOption);
616 if (parsedLine.isEmpty()) {
617 if (m_options == NoOption) {
618 qWarning() <<
"Disabling unrestricted rule that would block all requests" << m_filter;
619 m_isInternalDisabled =
true;
623 m_type = MatchAllUrlsRule;
628 m_type = StringContainsMatchRule;
629 m_matchString = parsedLine;
632void AdBlockRule::parseDomains(
const QString &domains,
const QChar &separator)
634 const QStringList domainsList = domains.split(separator, Qt::SkipEmptyParts);
636 for (
const QString &domain : domainsList) {
637 if (domain.isEmpty()) {
640 if (domain.startsWith(
QL1C(
'~'))) {
641 m_blockedDomains.append(domain.mid(1));
644 m_allowedDomains.append(domain);
648 if (!m_blockedDomains.isEmpty() || !m_allowedDomains.isEmpty()) {
649 setOption(DomainRestrictedOption);
653bool AdBlockRule::filterIsOnlyDomain(
const QString &filter)
const
658 for (
int i = 0;
i <
filter.size(); ++
i) {
659 switch (
filter.at(
i).toLatin1()) {
675bool AdBlockRule::filterIsOnlyEndsMatch(
const QString &filter)
const
677 for (
int i = 0;
i <
filter.size(); ++
i) {
678 switch (
filter.at(
i).toLatin1()) {
692static bool wordCharacter(
const QChar &c)
694 return c.isLetterOrNumber() || c.isMark() || c ==
QL1C(
'_');
697QString AdBlockRule::createRegExpFromFilter(
const QString &filter)
const
700 parsed.reserve(
filter.size());
702 bool hadWildcard =
false;
704 for (
int i = 0;
i <
filter.size(); ++
i) {
706 switch (c.toLatin1()) {
708 parsed.append(
QL1S(
"(?:[^\\w\\d\\-.%]|$)"));
713 parsed.append(
QL1S(
".*"));
719 parsed.append(
QL1S(
"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"));
723 parsed.append(
QL1C(
'^'));
727 else if (
i ==
filter.size() - 1) {
728 parsed.append(
QL1C(
'$'));
734 if (!wordCharacter(c))
735 parsed.append(
QL1C(
'\\') + c);
740 hadWildcard = c ==
QL1C(
'*');
746QList<QStringMatcher> AdBlockRule::createStringMatchers(
const QStringList &filters)
const
748 QList<QStringMatcher> matchers;
749 matchers.reserve(filters.size());
751 for (
const QString &
filter : filters) {
752 matchers.append(QStringMatcher(
filter, m_caseSensitivity));
761 case StringContainsMatchRule:
762 return encodedUrl.contains(m_matchString, m_caseSensitivity);
764 case DomainMatchRule:
767 case StringEndsMatchRule:
768 return encodedUrl.endsWith(m_matchString, m_caseSensitivity);
770 case RegExpMatchRule:
774 return m_regExp->regExp.match(encodedUrl).hasMatch();
776 case MatchAllUrlsRule:
793 const auto matchers = m_regExp->matchers;
794 for (
const QStringMatcher &matcher : matchers) {
795 if (matcher.indexIn(url) == -1)
809 for (
int i = 0;
i <
filter.size(); ++
i) {
812 if (c ==
QL1C(
'|') || c ==
QL1C(
'*') || c ==
QL1C(
'^')) {
813 const QString sub =
filter.mid(startPos,
i - startPos);
820 const QString sub =
filter.mid(startPos);
824 list.removeDuplicates();
829bool AdBlockRule::hasOption(
const AdBlockRule::RuleOption &opt)
const
831 return (m_options & opt);
834bool AdBlockRule::hasException(
const AdBlockRule::RuleOption &opt)
const
836 return (m_exceptions & opt);
839void AdBlockRule::setOption(
const AdBlockRule::RuleOption &opt)
844void AdBlockRule::setException(
const AdBlockRule::RuleOption &opt,
bool on)
bool isDomainRestricted() const
bool isGenerichide() const
bool isMatchingRegExpStrings(const QString &url) const
AdBlockSubscription * subscription() const
bool isMatchingDomain(const QString &domain, const QString &filter) const
bool matchDomain(const QString &domain) const
QString cssSelector() const
QStringList parseRegExpFilter(const QString &filter) const
AdBlockRule(const QString &filter=QString(), AdBlockSubscription *subscription=nullptr)
bool isInternalDisabled() const
bool isUnsupportedRule() const
bool stringMatch(const QString &domain, const QString &encodedUrl) const
bool networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const
bool urlMatch(const QUrl &url) const
bool matchType(const QWebEngineUrlRequestInfo &request) const
AdBlockRule * copy() const
void setEnabled(bool enabled)
void setFilter(const QString &filter)
void setSubscription(AdBlockSubscription *subscription)
bool matchThirdParty(const QWebEngineUrlRequestInfo &request) const