Switch to FTS5 with unicode61 (#229)

* Switch to FTS5 with unicode61

* Update required sqlite version in README

* Update README

* Change back db file
This commit is contained in:
Jonas Kvinge 2019-07-30 22:45:22 +02:00 committed by GitHub
parent 02cda47c28
commit 8a0e66bf11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 104 additions and 263 deletions

View File

@ -105,7 +105,7 @@ pkg_check_modules(GSTREAMER_TAG gstreamer-tag-1.0)
pkg_check_modules(LIBXINE libxine)
pkg_check_modules(LIBVLC libvlc)
pkg_check_modules(PHONON phonon4qt5)
pkg_check_modules(SQLITE REQUIRED sqlite3>=3.7)
pkg_check_modules(SQLITE REQUIRED sqlite3>=3.9)
pkg_check_modules(LIBPULSE libpulse)
pkg_check_modules(CHROMAPRINT libchromaprint)
pkg_check_modules(LIBGPOD libgpod-1.0>=0.7.92)

View File

@ -42,7 +42,7 @@ To build Strawberry from source you need the following installed on your system
* [Protobuf library and compiler](https://developers.google.com/protocol-buffers/)
* [Qt 5 with components Core, Gui, Widgets, Concurrent, Network and Sql](https://www.qt.io/)
* [Qt 5 components X11Extras and DBus for Linux/BSD, MacExtras for macOS and WinExtras for Windows](https://www.qt.io/)
* [SQLite3](https://www.sqlite.org)
* [SQLite 3.9 or newer with FTS5](https://www.sqlite.org)
* [Chromaprint library](https://acoustid.org/chromaprint)
* [ALSA library (linux)](https://www.alsa-project.org/)
* [DBus (linux)](https://www.freedesktop.org/wiki/Software/dbus/)

View File

@ -9,6 +9,7 @@
<file>schema/schema-6.sql</file>
<file>schema/schema-7.sql</file>
<file>schema/schema-8.sql</file>
<file>schema/schema-9.sql</file>
<file>schema/device-schema.sql</file>
<file>style/strawberry.css</file>
<file>html/playing-tooltip-plain.html</file>

View File

@ -70,9 +70,9 @@ CREATE INDEX idx_device_%deviceid_songs_album ON device_%deviceid_songs (album);
CREATE INDEX idx_device_%deviceid_songs_comp_artist ON device_%deviceid_songs (compilation_effective, artist);
CREATE VIRTUAL TABLE device_%deviceid_fts USING fts3(
CREATE VIRTUAL TABLE device_%deviceid_fts USING fts5(
ftstitle, ftsalbum, ftsartist, ftsalbumartist, ftscomposer, ftsperformer, ftsgrouping, ftsgenre, ftscomment,
tokenize=unicode
tokenize=unicode61
);
UPDATE devices SET schema_version=0 WHERE ROWID=%deviceid;

41
data/schema/schema-9.sql Normal file
View File

@ -0,0 +1,41 @@
DROP TABLE %allsongstables_fts;
DROP TABLE playlist_items_fts_;
CREATE VIRTUAL TABLE %allsongstables_fts USING fts5(
ftstitle,
ftsalbum,
ftsartist,
ftsalbumartist,
ftscomposer,
ftsperformer,
ftsgrouping,
ftsgenre,
ftscomment,
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE playlist_items_fts_ USING fts5(
ftstitle,
ftsalbum,
ftsartist,
ftsalbumartist,
ftscomposer,
ftsperformer,
ftsgrouping,
ftsgenre,
ftscomment,
tokenize = "unicode61 remove_diacritics 0"
);
INSERT INTO %allsongstables_fts (ROWID, ftstitle, ftsalbum, ftsartist, ftsalbumartist, ftscomposer, ftsperformer, ftsgrouping, ftsgenre, ftscomment)
SELECT ROWID, title, album, artist, albumartist, composer, performer, grouping, genre, comment FROM %allsongstables;
INSERT INTO playlist_items_fts_ (ROWID, ftstitle, ftsalbum, ftsartist, ftsalbumartist, ftscomposer, ftsperformer, ftsgrouping, ftsgenre, ftscomment)
SELECT ROWID, title, album, artist, albumartist, composer, performer, grouping, genre, comment FROM playlist_items;
UPDATE schema_version SET version=9;

View File

@ -4,7 +4,7 @@ CREATE TABLE IF NOT EXISTS schema_version (
DELETE FROM schema_version;
INSERT INTO schema_version (version) VALUES (8);
INSERT INTO schema_version (version) VALUES (9);
CREATE TABLE IF NOT EXISTS directories (
path TEXT NOT NULL,
@ -560,13 +560,17 @@ CREATE INDEX IF NOT EXISTS idx_url ON songs (url);
CREATE INDEX IF NOT EXISTS idx_comp_artist ON songs (compilation_effective, artist);
CREATE INDEX IF NOT EXISTS idx_albumartist ON songs (albumartist);
CREATE INDEX IF NOT EXISTS idx_artist ON songs (artist);
CREATE INDEX IF NOT EXISTS idx_album ON songs (album);
CREATE INDEX IF NOT EXISTS idx_title ON songs (title);
CREATE VIEW IF NOT EXISTS duplicated_songs as select artist dup_artist, album dup_album, title dup_title from songs as inner_songs where artist != '' and album != '' and title != '' and unavailable = 0 group by artist, album , title having count(*) > 1;
CREATE VIRTUAL TABLE IF NOT EXISTS songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -577,11 +581,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS tidal_artists_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS tidal_artists_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -592,11 +596,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS tidal_artists_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS tidal_albums_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS tidal_albums_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -607,11 +611,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS tidal_albums_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS tidal_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS tidal_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -622,11 +626,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS tidal_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS subsonic_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS subsonic_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -637,11 +641,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS subsonic_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_artists_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_artists_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -652,11 +656,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_artists_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_albums_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_albums_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -667,11 +671,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_albums_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_songs_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_songs_fts USING fts5(
ftstitle,
ftsalbum,
@ -682,11 +686,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS qobuz_songs_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS playlist_items_fts_ USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS playlist_items_fts_ USING fts5(
ftstitle,
ftsalbum,
@ -697,11 +701,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS playlist_items_fts_ USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
CREATE VIRTUAL TABLE IF NOT EXISTS %allsongstables_fts USING fts3(
CREATE VIRTUAL TABLE IF NOT EXISTS %allsongstables_fts USING fts5(
ftstitle,
ftsalbum,
@ -712,12 +716,6 @@ CREATE VIRTUAL TABLE IF NOT EXISTS %allsongstables_fts USING fts3(
ftsgrouping,
ftsgenre,
ftscomment,
tokenize=unicode
tokenize = "unicode61 remove_diacritics 0"
);
INSERT INTO songs_fts (ROWID, ftstitle, ftsalbum, ftsartist, ftsalbumartist, ftscomposer, ftsperformer, ftsgrouping, ftsgenre, ftscomment)
SELECT ROWID, title, album, artist, albumartist, composer, performer, grouping, genre, comment FROM songs;
INSERT INTO %allsongstables_fts (ROWID, ftstitle, ftsalbum, ftsartist, ftsalbumartist, ftscomposer, ftsperformer, ftsgrouping, ftsgenre, ftscomment)
SELECT ROWID, title, album, artist, albumartist, composer, performer, grouping, genre, comment FROM %allsongstables;

View File

@ -44,7 +44,7 @@ BuildRequires: pkgconfig(dbus-1)
BuildRequires: pkgconfig(gnutls)
BuildRequires: pkgconfig(alsa)
BuildRequires: pkgconfig(protobuf)
BuildRequires: pkgconfig(sqlite3)
BuildRequires: pkgconfig(sqlite3) >= 3.9
%if ! 0%{?centos} && ! 0%{?mageia}
BuildRequires: pkgconfig(taglib)
%endif

View File

@ -40,7 +40,7 @@ CollectionQuery::CollectionQuery(const QueryOptions &options)
: include_unavailable_(false), join_with_fts_(false), limit_(-1) {
if (!options.filter().isEmpty()) {
// We need to munge the filter text a little bit to get it to work as expected with sqlite's FTS3:
// We need to munge the filter text a little bit to get it to work as expected with sqlite's FTS5:
// 1) Append * to all tokens.
// 2) Prefix "fts" to column names.
// 3) Remove colons which don't correspond to column names.

View File

@ -41,6 +41,7 @@
#include <QSqlDriver>
#include <QSqlDatabase>
#include <QSqlQuery>
#include <QSqlResult>
#include <QSqlError>
#include <QStandardPaths>
#include <QtDebug>
@ -52,177 +53,12 @@
#include "scopedtransaction.h"
const char *Database::kDatabaseFilename = "strawberry.db";
const int Database::kSchemaVersion = 8;
const int Database::kSchemaVersion = 9;
const char *Database::kMagicAllSongsTables = "%allsongstables";
int Database::sNextConnectionId = 1;
QMutex Database::sNextConnectionIdMutex;
Database::Token::Token(const QString &token, int start, int end)
: token(token), start_offset(start), end_offset(end) {}
struct sqlite3_tokenizer_module {
int iVersion;
int (*xCreate)(int argc, /* Size of argv array */
const char *const *argv, /* Tokenizer argument strings */
sqlite3_tokenizer** ppTokenizer); /* OUT: Created tokenizer */
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char* *ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition); /* OUT: Number of tokens returned before this one */
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
sqlite3_tokenizer_module *Database::sFTSTokenizer = nullptr;
int Database::FTSCreate(int argc, const char *const *argv, sqlite3_tokenizer **tokenizer) {
*tokenizer = reinterpret_cast<sqlite3_tokenizer*>(new UnicodeTokenizer);
return SQLITE_OK;
}
int Database::FTSDestroy(sqlite3_tokenizer *tokenizer) {
UnicodeTokenizer *real_tokenizer = reinterpret_cast<UnicodeTokenizer*>(tokenizer);
delete real_tokenizer;
return SQLITE_OK;
}
int Database::FTSOpen(sqlite3_tokenizer *pTokenizer, const char *input, int bytes, sqlite3_tokenizer_cursor **cursor) {
UnicodeTokenizerCursor *new_cursor = new UnicodeTokenizerCursor;
new_cursor->pTokenizer = pTokenizer;
new_cursor->position = 0;
QString str = QString::fromUtf8(input, bytes).toLower();
QChar *data = str.data();
// Decompose and strip punctuation.
QList<Token> tokens;
QString token;
int start_offset = 0;
int offset = 0;
for (int i = 0; i < str.length(); ++i) {
QChar c = data[i];
ushort unicode = c.unicode();
if (unicode <= 0x007f) {
offset += 1;
}
else if (unicode >= 0x0080 && unicode <= 0x07ff) {
offset += 2;
}
else if (unicode >= 0x0800) {
offset += 3;
}
// Unicode astral planes unsupported in Qt?
/*else if (unicode >= 0x010000 && unicode <= 0x10ffff) {
offset += 4;
}*/
if (!data[i].isLetterOrNumber()) {
// Token finished.
if (token.length() != 0) {
tokens << Token(token, start_offset, offset - 1);
start_offset = offset;
token.clear();
}
else {
++start_offset;
}
}
else {
if (data[i].decompositionTag() != QChar::NoDecomposition) {
token.push_back(data[i].decomposition()[0]);
} else {
token.push_back(data[i]);
}
}
if (i == str.length() - 1) {
if (token.length() != 0) {
tokens << Token(token, start_offset, offset);
token.clear();
}
}
}
new_cursor->tokens = tokens;
*cursor = reinterpret_cast<sqlite3_tokenizer_cursor*>(new_cursor);
return SQLITE_OK;
}
int Database::FTSClose(sqlite3_tokenizer_cursor *cursor) {
UnicodeTokenizerCursor *real_cursor = reinterpret_cast<UnicodeTokenizerCursor*>(cursor);
delete real_cursor;
return SQLITE_OK;
}
int Database::FTSNext(sqlite3_tokenizer_cursor *cursor, const char* *token, int *bytes, int *start_offset, int *end_offset, int *position) {
UnicodeTokenizerCursor *real_cursor = reinterpret_cast<UnicodeTokenizerCursor*>(cursor);
QList<Token> tokens = real_cursor->tokens;
if (real_cursor->position >= tokens.size()) {
return SQLITE_DONE;
}
Token t = tokens[real_cursor->position];
QByteArray utf8 = t.token.toUtf8();
*token = utf8.constData();
*bytes = utf8.size();
*start_offset = t.start_offset;
*end_offset = t.end_offset;
*position = real_cursor->position++;
real_cursor->current_utf8 = utf8;
return SQLITE_OK;
}
void Database::StaticInit() {
if (sFTSTokenizer) return;
sFTSTokenizer = new sqlite3_tokenizer_module;
sFTSTokenizer->iVersion = 0;
sFTSTokenizer->xCreate = &Database::FTSCreate;
sFTSTokenizer->xDestroy = &Database::FTSDestroy;
sFTSTokenizer->xOpen = &Database::FTSOpen;
sFTSTokenizer->xNext = &Database::FTSNext;
sFTSTokenizer->xClose = &Database::FTSClose;
return;
}
Database::Database(Application *app, QObject *parent, const QString &database_name) :
QObject(parent),
app_(app),
@ -254,9 +90,6 @@ Database::~Database() {
qLog(Error) << "Connection" << connection_id << "is still open!";
}
if (sFTSTokenizer)
delete sFTSTokenizer;
}
void Database::ExitAsync() {
@ -308,14 +141,14 @@ QSqlDatabase Database::Connect() {
return db;
}
// Find Sqlite3 functions in the Qt plugin.
if (!sFTSTokenizer) StaticInit();
{
if (db.tables().count() == 0) {
// Set up initial schema
qLog(Info) << "Creating initial database schema";
UpdateDatabaseSchema(0, db);
}
else if (SchemaVersion(&db) <= 8) { // Register unicode from unicode61 tokenizer to drop old FTS3 tables.
#ifdef SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER
// In case sqlite>=3.12 is compiled without -DSQLITE_ENABLE_FTS3_TOKENIZER
// (generally a good idea due to security reasons) the fts3 support should be enabled explicitly.
QVariant v = db.driver()->handle();
if (v.isValid() && qstrcmp(v.typeName(), "sqlite3*") == 0) {
sqlite3 *handle = *static_cast<sqlite3**>(v.data());
@ -326,20 +159,21 @@ QSqlDatabase Database::Connect() {
else qLog(Fatal) << "Unable to enable FTS3 tokenizer";
}
#endif
QSqlQuery set_fts_tokenizer(db);
set_fts_tokenizer.prepare("SELECT fts3_tokenizer(:name, :pointer)");
set_fts_tokenizer.bindValue(":name", "unicode");
set_fts_tokenizer.bindValue(":pointer", QByteArray(reinterpret_cast<const char*>(&sFTSTokenizer), sizeof(&sFTSTokenizer)));
if (!set_fts_tokenizer.exec()) {
qLog(Warning) << "Couldn't register FTS3 tokenizer : " << set_fts_tokenizer.lastError();
QSqlQuery get_fts_tokenizer(db);
get_fts_tokenizer.prepare("SELECT fts3_tokenizer(:name)");
get_fts_tokenizer.bindValue(":name", "unicode61");
if (get_fts_tokenizer.exec() && get_fts_tokenizer.next()) {
QSqlQuery set_fts_tokenizer(db);
set_fts_tokenizer.prepare("SELECT fts3_tokenizer(:name, :pointer)");
set_fts_tokenizer.bindValue(":name", "unicode");
set_fts_tokenizer.bindValue(":pointer", get_fts_tokenizer.value(0));
if (!set_fts_tokenizer.exec()) {
qLog(Warning) << "Couldn't register FTS3 tokenizer : " << set_fts_tokenizer.lastError();
}
}
else {
qLog(Warning) << "Couldn't get FTS3 tokenizer : " << get_fts_tokenizer.lastError();
}
// Implicit invocation of ~QSqlQuery() when leaving the scope to release any remaining database locks!
}
if (db.tables().count() == 0) {
// Set up initial schema
qLog(Info) << "Creating initial database schema";
UpdateDatabaseSchema(0, db);
}
// Attach external databases
@ -399,7 +233,7 @@ void Database::Close() {
}
void Database::UpdateMainSchema(QSqlDatabase *db) {
int Database::SchemaVersion(QSqlDatabase *db) {
// Get the database's schema version
int schema_version = 0;
@ -408,7 +242,13 @@ void Database::UpdateMainSchema(QSqlDatabase *db) {
if (q.next()) schema_version = q.value(0).toInt();
// Implicit invocation of ~QSqlQuery() when leaving the scope to release any remaining database locks!
}
return schema_version;
}
void Database::UpdateMainSchema(QSqlDatabase *db) {
int schema_version = SchemaVersion(db);
startup_schema_version_ = schema_version;
if (schema_version > kSchemaVersion) {
@ -591,6 +431,7 @@ void Database::ExecSongTablesCommands(QSqlDatabase &db, const QStringList &song_
if (CheckErrors(query)) qFatal("Unable to update music collection database");
}
}
}
QStringList Database::SongsTables(QSqlDatabase &db, int schema_version) const {
@ -745,4 +586,3 @@ void Database::BackupFile(const QString &filename) {
sqlite3_backup_finish(backup);
}

View File

@ -37,13 +37,6 @@
#include <QString>
#include <QStringList>
extern "C" {
struct sqlite3_tokenizer;
struct sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module;
}
class QThread;
class Application;
@ -95,6 +88,7 @@ class Database : public QObject {
void DoBackup();
private:
int SchemaVersion(QSqlDatabase *db);
void UpdateMainSchema(QSqlDatabase *db);
void ExecSchemaCommandsFromFile(QSqlDatabase &db, const QString &filename, int schema_version, bool in_transaction = false);
@ -133,38 +127,6 @@ class Database : public QObject {
QThread *original_thread_;
// Do static initialisation like loading sqlite functions.
static void StaticInit();
typedef int (*Sqlite3CreateFunc)(sqlite3*, const char*, int, int, void*, void (*)(sqlite3_context*, int, sqlite3_value**), void (*)(sqlite3_context*, int, sqlite3_value**), void (*)(sqlite3_context*));
static sqlite3_tokenizer_module *sFTSTokenizer;
static int FTSCreate(int argc, const char *const *argv, sqlite3_tokenizer **tokenizer);
static int FTSDestroy(sqlite3_tokenizer *tokenizer);
static int FTSOpen(sqlite3_tokenizer *tokenizer, const char *input, int bytes, sqlite3_tokenizer_cursor **cursor);
static int FTSClose(sqlite3_tokenizer_cursor *cursor);
static int FTSNext(sqlite3_tokenizer_cursor *cursor, const char **token, int *bytes, int *start_offset, int *end_offset, int *position);
struct Token {
Token(const QString &token, int start, int end);
QString token;
int start_offset;
int end_offset;
};
// Based on sqlite3_tokenizer.
struct UnicodeTokenizer {
const sqlite3_tokenizer_module *pModule;
};
struct UnicodeTokenizerCursor {
const sqlite3_tokenizer *pTokenizer;
QList<Token> tokens;
int position;
QByteArray current_utf8;
};
};
class MemoryDatabase : public Database {
@ -178,4 +140,3 @@ class MemoryDatabase : public Database {
};
#endif // DATABASE_H