Falkon Develop
Cross-platform Qt-based web browser
htmlimporter.cpp
Go to the documentation of this file.
1/* ============================================================
2* Falkon - Qt web browser
3* Copyright (C) 2010-2017 David Rosca <nowrep@gmail.com>
4*
5* This program is free software: you can redistribute it and/or modify
6* it under the terms of the GNU General Public License as published by
7* the Free Software Foundation, either version 3 of the License, or
8* (at your option) any later version.
9*
10* This program is distributed in the hope that it will be useful,
11* but WITHOUT ANY WARRANTY; without even the implied warranty of
12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13* GNU General Public License for more details.
14*
15* You should have received a copy of the GNU General Public License
16* along with this program. If not, see <http://www.gnu.org/licenses/>.
17* ============================================================ */
18#include "htmlimporter.h"
19#include "bookmarkitem.h"
20
21#include <QUrl>
22#include <QFileDialog>
23#include <QRegularExpression>
24
26 : BookmarksImporter(parent)
27{
28}
29
31{
32 return BookmarksImporter::tr("You can import bookmarks from any browser that supports HTML exporting. "
33 "This file has usually these suffixes");
34}
35
37{
38 return QStringLiteral(".htm, .html");
39}
40
41QString HtmlImporter::getPath(QWidget* parent)
42{
43 const QString filter = BookmarksImporter::tr("HTML Bookmarks") + QLatin1String(" (*.htm *.html)");
44 m_path = QFileDialog::getOpenFileName(parent, BookmarksImporter::tr("Choose file..."), QDir::homePath(), filter);
45 return m_path;
46}
47
49{
50 m_file.setFileName(m_path);
51
52 if (!m_file.open(QFile::ReadOnly)) {
53 setError(BookmarksImporter::tr("Unable to open file."));
54 return false;
55 }
56
57 return true;
58}
59
60static int qzMin(int a, int b)
61{
62 if (a > -1 && b > -1) {
63 return qMin(a, b);
64 }
65
66 if (a > -1) {
67 return a;
68 }
69 else {
70 return b;
71 }
72}
73
75{
76 QString bookmarks = QString::fromUtf8(m_file.readAll());
77 m_file.close();
78
79 // Converting tags to lower case -,-
80 // For some reason Qt::CaseInsensitive is not every time insensitive :-D
81
82 bookmarks.replace(QLatin1String("<DL"), QLatin1String("<dl"));
83 bookmarks.replace(QLatin1String("</DL"), QLatin1String("</dl"));
84 bookmarks.replace(QLatin1String("<DT"), QLatin1String("<dt"));
85 bookmarks.replace(QLatin1String("</DT"), QLatin1String("</dt"));
86 bookmarks.replace(QLatin1String("<P"), QLatin1String("<p"));
87 bookmarks.replace(QLatin1String("</P"), QLatin1String("</p"));
88 bookmarks.replace(QLatin1String("<A"), QLatin1String("<a"));
89 bookmarks.replace(QLatin1String("</A"), QLatin1String("</a"));
90 bookmarks.replace(QLatin1String("HREF="), QLatin1String("href="));
91 bookmarks.replace(QLatin1String("<H3"), QLatin1String("<h3"));
92 bookmarks.replace(QLatin1String("</H3"), QLatin1String("</h3"));
93
94 bookmarks = bookmarks.left(bookmarks.lastIndexOf(QLatin1String("</dl><p>")));
95 int start = bookmarks.indexOf(QLatin1String("<dl><p>"));
96
97 auto* root = new BookmarkItem(BookmarkItem::Folder);
98 root->setTitle(QStringLiteral("HTML Import"));
99
100 QList<BookmarkItem*> folders;
101 folders.append(root);
102
103 while (start > 0) {
104 QString string = bookmarks.mid(start);
105
106 int posOfFolder = string.indexOf(QLatin1String("<dt><h3"));
107 int posOfEndFolder = string.indexOf(QLatin1String("</dl><p>"));
108 int posOfLink = string.indexOf(QLatin1String("<dt><a"));
109
110 int nearest = qzMin(posOfLink, qzMin(posOfFolder, posOfEndFolder));
111 if (nearest == -1) {
112 break;
113 }
114
115 if (nearest == posOfFolder) {
116 // Next is folder
117 QRegularExpression rx(QSL("<dt><h3(.*)>(.*)</h3>"), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption);
118 QRegularExpressionMatch match = rx.match(string);
119 QString folderName = match.captured(2).trimmed();
120
121 auto* folder = new BookmarkItem(BookmarkItem::Folder, folders.isEmpty() ? root : folders.last());
122 folder->setTitle(folderName);
123 folders.append(folder);
124
125 start += posOfFolder + match.captured(0).size();
126 }
127 else if (nearest == posOfEndFolder) {
128 // Next is end of folder
129 if (!folders.isEmpty()) {
130 folders.removeLast();
131 }
132
133 start += posOfEndFolder + 8;
134 }
135 else {
136 // Next is link
137 QRegularExpression rx(QSL("<dt><a(.*)>(.*)</a>"), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption);
138 QRegularExpressionMatch match = rx.match(string);
139
140 QString arguments = match.captured(1);
141 QString linkName = match.captured(2).trimmed();
142
143 QRegularExpression rx2(QSL("href=\"(.*)\""), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption);
144 QRegularExpressionMatch match2 = rx2.match(arguments);
145
146 QUrl url = QUrl::fromEncoded(match2.captured(1).trimmed().toUtf8());
147
148 start += posOfLink + match.captured(0).size();
149
150 if (url.isEmpty() || url.scheme() == QL1S("place") || url.scheme() == QL1S("about"))
151 continue;
152
153 auto* b = new BookmarkItem(BookmarkItem::Url, folders.isEmpty() ? root : folders.last());
154 b->setTitle(linkName.isEmpty() ? url.toString() : linkName);
155 b->setUrl(url);
156 }
157 }
158
159 return root;
160}
void setError(const QString &error)
QString standardPath() const override
QString getPath(QWidget *parent) override
QString description() const override
HtmlImporter(QObject *parent=nullptr)
bool prepareImport() override
BookmarkItem * importBookmarks() override
#define QL1S(x)
Definition: qzcommon.h:44
#define QSL(x)
Definition: qzcommon.h:40