2019-04-01 18:34:39 +02:00
|
|
|
/*
|
|
|
|
|
* This file is part of the Flowee project
|
2022-09-07 15:08:34 +02:00
|
|
|
* Copyright (C) 2019-2022 Tom Zander <tom@flowee.org>
|
2019-04-01 18:34:39 +02:00
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <QString>
|
|
|
|
|
#include <Logger.h>
|
2019-04-01 22:53:39 +02:00
|
|
|
#include <QDataStream>
|
2019-04-04 10:36:15 +02:00
|
|
|
#include <qfileinfo.h>
|
2019-04-09 13:24:18 +02:00
|
|
|
#include <QMapIterator>
|
2019-04-01 18:34:39 +02:00
|
|
|
|
|
|
|
|
#include "HashStorage.h"
|
|
|
|
|
#include "HashStorage_p.h"
|
|
|
|
|
|
2019-03-31 15:11:03 +02:00
|
|
|
#include <boost/filesystem.hpp>
|
2021-02-07 13:46:57 +01:00
|
|
|
#include <unordered_map>
|
2019-03-31 15:11:03 +02:00
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
#define WIDTH 32
|
2019-04-02 12:07:08 +02:00
|
|
|
|
2019-04-03 18:55:33 +02:00
|
|
|
namespace {
|
2019-04-08 13:19:54 +02:00
|
|
|
struct Pair {
|
2019-10-16 22:42:44 +02:00
|
|
|
Pair(const uint256 *h, int i) : hash(h), index(i) {}
|
|
|
|
|
const uint256 *hash;
|
2019-04-08 13:19:54 +02:00
|
|
|
int index;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct PairSorter {
|
2022-09-07 15:08:34 +02:00
|
|
|
explicit PairSorter(const std::unordered_map<uint256, int, HashShortener, HashComparison> &orig) {
|
2019-04-08 13:19:54 +02:00
|
|
|
pairs.reserve(orig.size());
|
2019-10-16 22:42:44 +02:00
|
|
|
for (auto iter = orig.begin(); iter != orig.end(); ++iter) {
|
|
|
|
|
pairs.push_back(Pair(&iter->first, iter->second));
|
2019-04-08 13:19:54 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<Pair> pairs;
|
|
|
|
|
};
|
2019-04-09 11:19:03 +02:00
|
|
|
|
2019-04-09 12:02:45 +02:00
|
|
|
bool sortPairs(const Pair &a, const Pair &b) {
|
|
|
|
|
return a.hash->Compare(*b.hash) <= 0;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-09 11:19:03 +02:00
|
|
|
struct PartHashTip {
|
|
|
|
|
int partIndex;
|
|
|
|
|
int value;
|
2019-10-16 22:42:44 +02:00
|
|
|
const uint256 *key;
|
2019-04-09 11:19:03 +02:00
|
|
|
};
|
|
|
|
|
struct HashListPartProxy {
|
|
|
|
|
const uchar *file;
|
|
|
|
|
int pos, rows;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct HashCollector {
|
2022-09-07 15:08:34 +02:00
|
|
|
explicit HashCollector(const QList<HashListPart*> &parts)
|
2019-04-09 11:19:03 +02:00
|
|
|
{
|
|
|
|
|
m_tips.reserve(parts.size());
|
|
|
|
|
m_parts.reserve(parts.size());
|
|
|
|
|
for (int i = 0; i < parts.size(); ++i) {
|
|
|
|
|
auto &p = parts.at(i);
|
2021-02-07 10:57:45 +01:00
|
|
|
m_parts.push_back({p->sorted, 0, (int)(p->sortedFileSize / (WIDTH + sizeof(int)))});
|
2019-04-09 11:19:03 +02:00
|
|
|
sortInTip(i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void sortInTip(int partIndex) {
|
|
|
|
|
PartHashTip tip;
|
|
|
|
|
tip.partIndex = partIndex;
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(int(m_parts.size()) > partIndex);
|
2019-04-09 11:19:03 +02:00
|
|
|
HashListPartProxy &p = m_parts.at(partIndex);
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(p.pos < p.rows);
|
2019-04-09 11:19:03 +02:00
|
|
|
const uchar *recordStart = (p.file + p.pos * (WIDTH + sizeof(int)));
|
2019-10-16 22:42:44 +02:00
|
|
|
tip.key = reinterpret_cast<const uint256*>(recordStart);
|
2019-04-09 11:19:03 +02:00
|
|
|
tip.value = *reinterpret_cast<const int*>(recordStart + WIDTH);
|
|
|
|
|
++p.pos;
|
|
|
|
|
|
|
|
|
|
if (m_tips.empty()) {
|
|
|
|
|
m_tips.append(tip);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
int l = 0;
|
|
|
|
|
int r = m_tips.size() - 1;
|
|
|
|
|
while (l <= r) {
|
|
|
|
|
int m = (l + r) / 2;
|
|
|
|
|
int comp = m_tips.at(m).key->Compare(*tip.key);
|
|
|
|
|
if (comp < 0)
|
|
|
|
|
l = m + 1;
|
|
|
|
|
else if (comp > 0)
|
|
|
|
|
r = m - 1;
|
|
|
|
|
else {
|
|
|
|
|
assert(false);
|
|
|
|
|
throw std::runtime_error("Duplicate entries in HashStorage");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
m_tips.insert(l, tip);
|
|
|
|
|
|
|
|
|
|
// logFatal() << "ran a sortInTip";
|
|
|
|
|
// for (int i = 0; i < m_tips.size(); ++i) {
|
|
|
|
|
// logFatal() << *m_tips.at(i).key;
|
|
|
|
|
// }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void writeHashesToFile(QFile *outFile) {
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(outFile);
|
|
|
|
|
assert(outFile->isOpen());
|
2019-04-09 11:19:03 +02:00
|
|
|
while (!m_tips.isEmpty()) {
|
|
|
|
|
auto item = m_tips.takeFirst();
|
|
|
|
|
outFile->write(reinterpret_cast<const char*>(item.key->begin()), WIDTH);
|
|
|
|
|
outFile->write(reinterpret_cast<const char*>(&item.value), sizeof(int));
|
|
|
|
|
|
|
|
|
|
auto &p = m_parts.at(item.partIndex);
|
|
|
|
|
m_revertLookup.insert(item.value, m_revertLookup.size());
|
|
|
|
|
if (p.pos < p.rows)
|
|
|
|
|
sortInTip(item.partIndex);
|
|
|
|
|
}
|
|
|
|
|
outFile->close();
|
|
|
|
|
}
|
|
|
|
|
void writeRevertLookup(QFile *outFile) {
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(outFile);
|
|
|
|
|
assert(outFile->isOpen());
|
2019-04-09 11:19:03 +02:00
|
|
|
for (auto iter = m_revertLookup.begin(); iter != m_revertLookup.end(); ++iter) {
|
|
|
|
|
outFile->write(reinterpret_cast<const char*>(&iter.value()), sizeof(int));
|
|
|
|
|
}
|
|
|
|
|
m_revertLookup.clear();
|
|
|
|
|
outFile->close();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
QList<PartHashTip> m_tips;
|
|
|
|
|
std::vector<HashListPartProxy> m_parts;
|
|
|
|
|
QMap<int, int> m_revertLookup;
|
|
|
|
|
};
|
|
|
|
|
|
2019-04-03 18:55:33 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
uint256 HashStoragePrivate::s_null = uint256();
|
2019-04-01 18:34:39 +02:00
|
|
|
|
2019-04-09 11:19:03 +02:00
|
|
|
// -----------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
HashListPart::HashListPart(const QString &partBase)
|
|
|
|
|
: sortedFile(partBase + ".db"),
|
|
|
|
|
reverseLookupFile(partBase + ".index")
|
|
|
|
|
{
|
2019-04-09 13:24:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HashListPart::~HashListPart()
|
|
|
|
|
{
|
|
|
|
|
closeFiles();
|
2019-04-09 11:19:03 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void HashListPart::openFiles()
|
|
|
|
|
{
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(sorted == nullptr);
|
|
|
|
|
assert(reverseLookup == nullptr);
|
2019-04-09 11:19:03 +02:00
|
|
|
if (sortedFile.open(QIODevice::ReadOnly)) {
|
2021-02-07 10:57:45 +01:00
|
|
|
sortedFileSize = sortedFile.size();
|
|
|
|
|
sorted = sortedFile.map(0, sortedFileSize);
|
2019-04-09 11:19:03 +02:00
|
|
|
sortedFile.close();
|
|
|
|
|
}
|
|
|
|
|
if (reverseLookupFile.open(QIODevice::ReadOnly)) {
|
|
|
|
|
reverseLookup = reverseLookupFile.map(0, reverseLookupFile.size());
|
|
|
|
|
reverseLookupFile.close();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void HashListPart::closeFiles()
|
|
|
|
|
{
|
|
|
|
|
// technically speaking, the files are not actually open. They are just mapped.
|
2019-04-09 13:24:18 +02:00
|
|
|
if (sorted) {
|
|
|
|
|
sortedFile.unmap(sorted);
|
|
|
|
|
sorted = nullptr;
|
|
|
|
|
}
|
|
|
|
|
if (reverseLookup) {
|
|
|
|
|
reverseLookupFile.unmap(reverseLookup);
|
|
|
|
|
reverseLookup = nullptr;
|
|
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------
|
|
|
|
|
|
2019-04-01 18:34:39 +02:00
|
|
|
HashStorage::HashStorage(const boost::filesystem::path &basedir)
|
|
|
|
|
: d(new HashStoragePrivate(basedir))
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HashStorage::~HashStorage()
|
|
|
|
|
{
|
|
|
|
|
delete d;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-05 13:50:41 +02:00
|
|
|
int HashStorage::databaseCount() const
|
|
|
|
|
{
|
|
|
|
|
return d->dbs.length();
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
HashIndexPoint HashStorage::append(const uint256 &hash)
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
|
|
|
|
QList<HashList*> dbs(d->dbs);
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(!dbs.isEmpty());
|
2019-04-04 10:36:15 +02:00
|
|
|
auto db = dbs.last();
|
|
|
|
|
int index = db->append(hash);
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(index >= 0);
|
2021-02-07 20:02:10 +01:00
|
|
|
|
|
|
|
|
// Because we are going to memmap the total file we aim to have a file
|
|
|
|
|
// that is approx (but absolutely not one byte over) a power of two.
|
|
|
|
|
// Lets aim for 0xFFFFFFF (256MB)
|
|
|
|
|
// for similar reasons we have 8 parts with an equal share of the whole each
|
|
|
|
|
if (db->m_cacheMap.size() > 932064)
|
2019-04-09 11:19:03 +02:00
|
|
|
db->stabilize();
|
2021-02-07 20:02:10 +01:00
|
|
|
else if (db->m_parts.size() > 7 && dbs == d->dbs)
|
2019-04-09 11:19:03 +02:00
|
|
|
finalize();
|
2022-09-07 13:04:30 +02:00
|
|
|
return HashIndexPoint {static_cast<int>(dbs.size() - 1), index};
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
const uint256 &HashStorage::find(HashIndexPoint point) const
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(point.db >= 0);
|
|
|
|
|
assert(point.row >= 0);
|
2019-04-01 18:34:39 +02:00
|
|
|
QList<HashList*> dbs(d->dbs);
|
2022-09-07 13:04:30 +02:00
|
|
|
if (point.db >= dbs.size())
|
2019-04-01 18:34:39 +02:00
|
|
|
return HashStoragePrivate::s_null;
|
|
|
|
|
return dbs.at(point.db)->at(point.row);
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
HashIndexPoint HashStorage::lookup(const uint256 &hash) const
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
|
|
|
|
QList<HashList*> dbs(d->dbs);
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(!dbs.isEmpty());
|
2019-11-07 16:09:42 +01:00
|
|
|
for (int i = dbs.length() - 1; i >= 0; --i) {
|
2019-04-09 13:24:18 +02:00
|
|
|
int result = dbs.at(i)->lookup(hash);
|
2019-04-01 18:34:39 +02:00
|
|
|
if (result >= 0) {
|
2022-08-20 19:18:34 +02:00
|
|
|
return HashIndexPoint {i, result};
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return HashIndexPoint();
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-01 21:15:24 +02:00
|
|
|
void HashStorage::finalize()
|
|
|
|
|
{
|
2019-04-08 13:19:54 +02:00
|
|
|
d->dbs.last()->finalize();
|
|
|
|
|
d->dbs.append(HashList::createEmpty(d->basedir, d->dbs.size() + 1));
|
2019-04-01 21:15:24 +02:00
|
|
|
}
|
|
|
|
|
|
2019-04-01 18:34:39 +02:00
|
|
|
|
2019-04-01 22:53:39 +02:00
|
|
|
// -----------------------------------------------------------------
|
2019-04-01 18:34:39 +02:00
|
|
|
|
2019-04-04 10:36:15 +02:00
|
|
|
HashList::HashList(const QString &dbBase)
|
2019-04-08 13:19:54 +02:00
|
|
|
: m_filebase(dbBase),
|
|
|
|
|
m_sortedFile(m_filebase + ".db"),
|
|
|
|
|
m_reverseLookupFile(m_filebase + ".index")
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
2021-02-07 18:08:59 +01:00
|
|
|
memset(m_offsets, 0, sizeof(uint32_t) * 256);
|
2019-04-01 22:53:39 +02:00
|
|
|
QFile info(m_filebase + ".info");
|
2019-04-09 11:19:03 +02:00
|
|
|
int partCount = 0;
|
2019-04-01 22:53:39 +02:00
|
|
|
if (info.open(QIODevice::ReadOnly)) {
|
|
|
|
|
QDataStream in(&info);
|
|
|
|
|
in >> m_nextId;
|
2019-04-09 11:19:03 +02:00
|
|
|
in >> partCount;
|
2022-09-07 15:08:34 +02:00
|
|
|
for (int i = 0; i < 256; ++i) {
|
|
|
|
|
in >> m_offsets[i];
|
2021-02-07 18:08:59 +01:00
|
|
|
}
|
2019-04-01 22:53:39 +02:00
|
|
|
}
|
2021-02-07 18:08:59 +01:00
|
|
|
info.close();
|
2019-04-01 22:53:39 +02:00
|
|
|
|
2019-04-08 13:19:54 +02:00
|
|
|
// is it finalized?
|
|
|
|
|
if (m_sortedFile.open(QIODevice::ReadOnly)) {
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(partCount == 0);
|
2021-02-07 10:57:45 +01:00
|
|
|
m_sortedFileSize = m_sortedFile.size();
|
|
|
|
|
m_sorted = m_sortedFile.map(0, m_sortedFileSize);
|
2019-04-08 13:19:54 +02:00
|
|
|
m_sortedFile.close();
|
|
|
|
|
if (m_reverseLookupFile.open(QIODevice::ReadOnly)) {
|
2022-09-07 15:08:34 +02:00
|
|
|
m_reverseLookupFileSize = m_reverseLookupFile.size();
|
|
|
|
|
m_reverseLookup = m_reverseLookupFile.map(0, m_reverseLookupFileSize);
|
2019-04-08 13:19:54 +02:00
|
|
|
m_reverseLookupFile.close();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else { // We are not finalized, so we should have a log
|
|
|
|
|
m_log = new QFile(m_filebase + ".log");
|
|
|
|
|
if (!m_log->open(QIODevice::ReadWrite))
|
|
|
|
|
throw std::runtime_error("HashList: failed to open log file");
|
|
|
|
|
while(true) {
|
2019-10-16 22:42:44 +02:00
|
|
|
uint256 item;
|
2019-04-08 13:19:54 +02:00
|
|
|
auto byteCount = m_log->read(reinterpret_cast<char*>(item.begin()), WIDTH);
|
|
|
|
|
if (byteCount == WIDTH) {
|
2019-10-16 22:42:44 +02:00
|
|
|
m_cacheMap.insert(std::make_pair(item, m_nextId++));
|
2019-04-08 13:19:54 +02:00
|
|
|
} else if (byteCount == 0) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
m_parts.reserve(partCount);
|
|
|
|
|
for (int i = 0; i < partCount; ++i) {
|
|
|
|
|
m_parts.append(new HashListPart(QString("%1_%2").arg(m_filebase)
|
|
|
|
|
.arg(i, 2, 10, QChar('0'))));
|
2019-04-09 13:24:18 +02:00
|
|
|
m_parts.last()->openFiles();
|
2019-04-09 11:19:03 +02:00
|
|
|
}
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HashList::~HashList()
|
|
|
|
|
{
|
2019-04-08 13:19:54 +02:00
|
|
|
if (m_log) {
|
|
|
|
|
m_log->close();
|
|
|
|
|
delete m_log;
|
|
|
|
|
}
|
2019-04-01 22:53:39 +02:00
|
|
|
if (m_sorted)
|
2019-04-08 13:19:54 +02:00
|
|
|
m_sortedFile.unmap(m_sorted);
|
|
|
|
|
if (m_reverseLookup)
|
|
|
|
|
m_reverseLookupFile.unmap(m_reverseLookup);
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
|
|
|
|
|
2019-04-08 13:19:54 +02:00
|
|
|
HashList *HashList::createEmpty(const QString &dbBase, int index)
|
2019-03-31 15:11:03 +02:00
|
|
|
{
|
2019-10-16 22:42:44 +02:00
|
|
|
QString name("/hashlist-%1");
|
2019-04-08 13:19:54 +02:00
|
|
|
HashList *answer = new HashList(dbBase + name.arg(index, 3, 10, QChar('0')));
|
2019-03-31 15:11:03 +02:00
|
|
|
return answer;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
int HashList::append(const uint256 &hash)
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
|
|
|
|
QMutexLocker lock(&m_mutex);
|
|
|
|
|
const int id = m_nextId++;
|
2019-10-16 22:42:44 +02:00
|
|
|
m_cacheMap.insert(std::make_pair(hash, id));
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(m_log);
|
2019-04-02 12:07:08 +02:00
|
|
|
m_log->write(reinterpret_cast<const char*>(hash.begin()), WIDTH);
|
2019-04-01 18:34:39 +02:00
|
|
|
|
|
|
|
|
return id;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
int HashList::lookup(const uint256 &hash) const
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
|
|
|
|
QMutexLocker lock(&m_mutex);
|
2019-04-05 13:50:41 +02:00
|
|
|
auto item = m_cacheMap.find(hash);
|
|
|
|
|
if (item != m_cacheMap.end())
|
2019-10-16 22:42:44 +02:00
|
|
|
return item->second;
|
2019-04-01 18:34:39 +02:00
|
|
|
|
2021-02-07 18:08:59 +01:00
|
|
|
if (m_sortedFileSize > 0) {
|
|
|
|
|
const uint8_t firstByte = hash.begin()[WIDTH -1]; // due to our sorting method this is actually the last byte of the hash
|
|
|
|
|
// Limit our search through the items starting with the same byte
|
|
|
|
|
int pos = m_offsets[firstByte] / (WIDTH + sizeof(int));
|
|
|
|
|
int endpos;
|
|
|
|
|
if (firstByte == 255)
|
|
|
|
|
endpos = m_sortedFileSize;
|
2019-04-02 12:07:08 +02:00
|
|
|
else
|
2021-02-07 18:08:59 +01:00
|
|
|
endpos = m_offsets[firstByte +1];
|
|
|
|
|
endpos = endpos / (WIDTH + sizeof(int));
|
|
|
|
|
endpos -= 1; // last item of section
|
|
|
|
|
while (pos <= endpos) {
|
|
|
|
|
int m = (pos + endpos) / 2;
|
2022-09-07 15:08:34 +02:00
|
|
|
uint256 *i = reinterpret_cast<uint256*>(m_sorted + m * (WIDTH + sizeof(int)));
|
|
|
|
|
const int comp = i->Compare(hash);
|
2021-02-07 18:08:59 +01:00
|
|
|
if (comp < 0)
|
|
|
|
|
pos = m + 1;
|
|
|
|
|
else if (comp > 0)
|
|
|
|
|
endpos = m - 1;
|
|
|
|
|
else
|
|
|
|
|
return *reinterpret_cast<int*>(m_sorted + m * (WIDTH + sizeof(int)) + WIDTH);
|
|
|
|
|
}
|
2019-04-01 22:53:39 +02:00
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
for (auto part : m_parts) {
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(part->reverseLookup);
|
|
|
|
|
assert(part->sorted);
|
2021-02-07 18:08:59 +01:00
|
|
|
int pos = 0;
|
|
|
|
|
int endpos = part->sortedFileSize / (WIDTH + sizeof(int)) - 1;
|
2019-04-09 11:19:03 +02:00
|
|
|
while (pos <= endpos) {
|
|
|
|
|
int m = (pos + endpos) / 2;
|
2022-09-07 15:08:34 +02:00
|
|
|
uint256 *i = reinterpret_cast<uint256*>(part->sorted + m * (WIDTH + sizeof(int)));
|
|
|
|
|
const int comp = i->Compare(hash);
|
2019-04-09 11:19:03 +02:00
|
|
|
if (comp < 0)
|
|
|
|
|
pos = m + 1;
|
|
|
|
|
else if (comp > 0)
|
|
|
|
|
endpos = m - 1;
|
|
|
|
|
else
|
|
|
|
|
return *reinterpret_cast<int*>(part->sorted + m * (WIDTH + sizeof(int)) + WIDTH);
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-04-01 22:53:39 +02:00
|
|
|
|
2019-04-01 18:34:39 +02:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-16 22:42:44 +02:00
|
|
|
const uint256 &HashList::at(int index) const
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(index >= 0);
|
2019-04-01 18:34:39 +02:00
|
|
|
QMutexLocker lock(&m_mutex);
|
2019-04-08 13:19:54 +02:00
|
|
|
if (m_reverseLookup) {
|
2019-10-16 22:42:44 +02:00
|
|
|
if (m_reverseLookupFile.size() / qint64(sizeof(int)) < qint64(index))
|
2019-04-08 13:19:54 +02:00
|
|
|
throw std::runtime_error("row out of bounds");
|
|
|
|
|
|
|
|
|
|
// map index to row
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(m_reverseLookupFileSize > index * sizeof(int));
|
|
|
|
|
const int *mapped = reinterpret_cast<int*>(m_reverseLookup);
|
|
|
|
|
const int row = mapped[index];
|
|
|
|
|
const int offset = row * (WIDTH + sizeof(int)); // full row-width is hash + int
|
|
|
|
|
assert(offset + (WIDTH + (int)sizeof(int)) <= m_sortedFileSize); // ensure we don't read out of bounds
|
|
|
|
|
const uint256 *dummy = reinterpret_cast<uint256*>(m_sorted + offset);
|
2019-04-08 13:19:54 +02:00
|
|
|
return *dummy;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// also check the dirty cache. Do this at end as this is a slow lookup
|
2019-10-16 22:42:44 +02:00
|
|
|
for (auto iter = m_cacheMap.begin(); iter != m_cacheMap.end(); ++iter) {
|
|
|
|
|
if (iter->second == index)
|
|
|
|
|
return iter->first;
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
2019-04-08 13:19:54 +02:00
|
|
|
return HashStoragePrivate::s_null;
|
2019-04-01 21:15:24 +02:00
|
|
|
}
|
2019-04-01 18:34:39 +02:00
|
|
|
|
2019-04-09 11:19:03 +02:00
|
|
|
void HashList::stabilize()
|
|
|
|
|
{
|
|
|
|
|
QMutexLocker lock(&m_mutex);
|
|
|
|
|
auto *part = new HashListPart(QString("%1_%2").arg(m_filebase)
|
|
|
|
|
.arg(m_parts.size(), 2, 10, QChar('0')));
|
|
|
|
|
if (!part->sortedFile.open(QIODevice::ReadWrite | QIODevice::Truncate)) {
|
|
|
|
|
throw std::runtime_error("Failed to open db file for writing");
|
|
|
|
|
}
|
|
|
|
|
if (!part->reverseLookupFile.open(QIODevice::ReadWrite | QIODevice::Truncate)) {
|
|
|
|
|
throw std::runtime_error("Failed to open index file for writing");
|
|
|
|
|
}
|
|
|
|
|
m_parts.append(part);
|
|
|
|
|
|
|
|
|
|
PairSorter sorted(m_cacheMap);
|
2019-04-09 12:02:45 +02:00
|
|
|
std::sort(sorted.pairs.begin(), sorted.pairs.end(), &sortPairs);
|
2021-02-07 13:46:57 +01:00
|
|
|
std::map<int, int> lookupTable;
|
2019-04-09 11:19:03 +02:00
|
|
|
for (auto iter = sorted.pairs.begin(); iter != sorted.pairs.end(); ++iter) {
|
|
|
|
|
assert(iter->index >= 0); // no negative numbers, please.
|
|
|
|
|
part->sortedFile.write(reinterpret_cast<const char*>(iter->hash->begin()), WIDTH);
|
|
|
|
|
part->sortedFile.write(reinterpret_cast<const char*>(&iter->index), sizeof(int));
|
2021-02-07 13:46:57 +01:00
|
|
|
lookupTable.insert(std::make_pair(iter->index, lookupTable.size()));
|
2019-04-09 11:19:03 +02:00
|
|
|
}
|
|
|
|
|
sorted.pairs.clear();
|
|
|
|
|
m_cacheMap.clear();
|
|
|
|
|
part->sortedFile.close();
|
2021-02-07 10:57:45 +01:00
|
|
|
part->sortedFileSize = part->sortedFile.size();
|
2019-04-09 11:19:03 +02:00
|
|
|
for (auto iter = lookupTable.begin(); iter != lookupTable.end(); ++iter) {
|
2021-02-07 13:46:57 +01:00
|
|
|
part->reverseLookupFile.write(reinterpret_cast<const char*>(&iter->second), sizeof(int));
|
2019-04-09 11:19:03 +02:00
|
|
|
}
|
|
|
|
|
lookupTable.clear();
|
|
|
|
|
part->reverseLookupFile.close();
|
|
|
|
|
m_log->close();
|
|
|
|
|
m_log->open(QIODevice::WriteOnly | QIODevice::Truncate);
|
|
|
|
|
part->openFiles();
|
|
|
|
|
writeInfoFile();
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-07 18:08:59 +01:00
|
|
|
void HashList::fillOffsetsTable()
|
|
|
|
|
{
|
2022-09-07 15:08:34 +02:00
|
|
|
/*
|
|
|
|
|
* This populates 'm_offsets'
|
|
|
|
|
* The idea is that m_offets is a list of offsets where the 'key' is the last byte of the hash and
|
|
|
|
|
* the value is the offset in the 'm_sorted' bytearray where that hash is stored.
|
|
|
|
|
*
|
|
|
|
|
* The biggest trick is that all 'keys' that don't actually map to any hashes are given the offset of the
|
|
|
|
|
* next item.
|
|
|
|
|
*/
|
|
|
|
|
assert(m_sorted);
|
2021-02-07 18:08:59 +01:00
|
|
|
uint32_t data = 0; // the first byte of the KEY
|
|
|
|
|
uint32_t offset = 0; // the offset in file.
|
|
|
|
|
while (offset < m_sortedFileSize) {
|
|
|
|
|
// we sort the file by the 'WIDTH'-bytes hash, but the least signficant byte first.
|
|
|
|
|
const uint8_t x = m_sorted[offset + WIDTH - 1];
|
|
|
|
|
if (x > data) {
|
|
|
|
|
do {
|
|
|
|
|
m_offsets[++data] = offset;
|
|
|
|
|
} while (data < x);
|
|
|
|
|
}
|
|
|
|
|
offset += WIDTH + sizeof(int);
|
|
|
|
|
}
|
|
|
|
|
while (data < 255) {
|
|
|
|
|
m_offsets[++data] = offset;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-09 11:19:03 +02:00
|
|
|
void HashList::writeInfoFile() const
|
|
|
|
|
{
|
|
|
|
|
QFile info(m_filebase + ".info");
|
|
|
|
|
if (info.open(QIODevice::WriteOnly)) {
|
|
|
|
|
QDataStream out(&info);
|
|
|
|
|
out << m_nextId;
|
2022-09-07 15:08:34 +02:00
|
|
|
out << (int) m_parts.length();
|
2021-02-07 18:08:59 +01:00
|
|
|
|
|
|
|
|
for (int i = 0; i < 256; ++i) {
|
|
|
|
|
out << m_offsets[i];
|
|
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-01 21:15:24 +02:00
|
|
|
void HashList::finalize()
|
|
|
|
|
{
|
2019-10-16 22:42:44 +02:00
|
|
|
if (!m_cacheMap.empty())
|
2019-04-09 11:19:03 +02:00
|
|
|
stabilize();
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(m_cacheMap.empty());
|
|
|
|
|
assert(!m_sortedFile.exists());
|
|
|
|
|
assert(!m_sorted);
|
|
|
|
|
assert(!m_reverseLookup);
|
2019-04-01 22:53:39 +02:00
|
|
|
QMutexLocker lock(&m_mutex);
|
2019-04-08 13:19:54 +02:00
|
|
|
if (!m_sortedFile.open(QIODevice::ReadWrite | QIODevice::Truncate)) {
|
|
|
|
|
throw std::runtime_error("Failed to open db file for writing");
|
2019-04-01 21:15:24 +02:00
|
|
|
}
|
2019-04-08 13:19:54 +02:00
|
|
|
if (!m_reverseLookupFile.open(QIODevice::ReadWrite | QIODevice::Truncate)) {
|
|
|
|
|
throw std::runtime_error("Failed to open index file for writing");
|
2019-04-05 13:50:41 +02:00
|
|
|
}
|
2019-04-01 22:53:39 +02:00
|
|
|
|
2019-04-09 11:19:03 +02:00
|
|
|
HashCollector collector(m_parts);
|
|
|
|
|
collector.writeHashesToFile(&m_sortedFile);
|
|
|
|
|
collector.writeRevertLookup(&m_reverseLookupFile);
|
|
|
|
|
|
2020-12-25 23:51:06 +01:00
|
|
|
for (auto &p : m_parts) {
|
2019-04-09 11:19:03 +02:00
|
|
|
p->closeFiles();
|
|
|
|
|
p->reverseLookupFile.remove();
|
|
|
|
|
p->sortedFile.remove();
|
2019-04-01 21:15:24 +02:00
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
qDeleteAll(m_parts);
|
|
|
|
|
m_parts.clear();
|
2019-04-08 13:19:54 +02:00
|
|
|
m_log->close();
|
|
|
|
|
bool ok = m_log->remove();
|
2022-09-07 15:08:34 +02:00
|
|
|
assert(ok); Q_UNUSED(ok)
|
2019-04-08 13:19:54 +02:00
|
|
|
delete m_log;
|
|
|
|
|
m_log = nullptr;
|
2019-04-09 11:19:03 +02:00
|
|
|
m_sortedFile.close();
|
2021-02-07 10:57:45 +01:00
|
|
|
m_sortedFileSize = 0;
|
2019-04-08 13:19:54 +02:00
|
|
|
if (m_sortedFile.open(QIODevice::ReadOnly)) {
|
2021-02-07 10:57:45 +01:00
|
|
|
m_sortedFileSize = m_sortedFile.size();
|
|
|
|
|
m_sorted = m_sortedFile.map(0, m_sortedFileSize);
|
2019-04-08 13:19:54 +02:00
|
|
|
m_sortedFile.close();
|
2021-02-07 18:08:59 +01:00
|
|
|
|
|
|
|
|
fillOffsetsTable();
|
2019-04-08 13:19:54 +02:00
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
m_reverseLookupFile.close();
|
2019-04-08 13:19:54 +02:00
|
|
|
if (m_reverseLookupFile.open(QIODevice::ReadOnly)) {
|
2022-09-07 15:08:34 +02:00
|
|
|
m_reverseLookupFileSize = m_reverseLookupFile.size();
|
|
|
|
|
m_reverseLookup = m_reverseLookupFile.map(0, m_reverseLookupFileSize);
|
2019-04-08 13:19:54 +02:00
|
|
|
m_reverseLookupFile.close();
|
2019-04-01 22:53:39 +02:00
|
|
|
}
|
2019-04-09 11:19:03 +02:00
|
|
|
writeInfoFile();
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2019-04-01 22:53:39 +02:00
|
|
|
// -----------------------------------------------------------------
|
|
|
|
|
|
2019-04-04 10:36:15 +02:00
|
|
|
HashStoragePrivate::HashStoragePrivate(const boost::filesystem::path &basedir_)
|
|
|
|
|
: basedir(QString::fromStdWString(basedir_.wstring()))
|
2019-04-01 18:34:39 +02:00
|
|
|
{
|
2020-11-14 23:59:01 +01:00
|
|
|
boost::system::error_code error;
|
|
|
|
|
boost::filesystem::create_directories(basedir_, error);
|
|
|
|
|
if (error && !boost::filesystem::exists(basedir_) && !boost::filesystem::is_directory(basedir_)) {
|
|
|
|
|
logFatal() << "HashStorage can't save. Failed creating the dir:" << basedir_.string();
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-04-04 10:36:15 +02:00
|
|
|
int index = 1;
|
2019-10-16 22:42:44 +02:00
|
|
|
const QString fileBase = QString("%1/hashlist-%2").arg(basedir);
|
2019-04-04 10:36:15 +02:00
|
|
|
while (true) {
|
2019-04-08 13:19:54 +02:00
|
|
|
QString dbFilename = fileBase.arg(index, 3, 10, QChar('0'));
|
|
|
|
|
QFileInfo dbInfo(dbFilename + ".db");
|
|
|
|
|
if (!dbInfo.exists()) {
|
|
|
|
|
// not finalized yet only has a log
|
|
|
|
|
QFileInfo dbLog(dbFilename + ".log");
|
|
|
|
|
if (!dbLog.exists())
|
|
|
|
|
break;
|
|
|
|
|
}
|
2019-04-04 10:36:15 +02:00
|
|
|
dbs.append(new HashList(dbFilename));
|
|
|
|
|
++index;
|
|
|
|
|
}
|
2019-03-31 15:11:03 +02:00
|
|
|
if (dbs.isEmpty()) {
|
2019-04-08 13:19:54 +02:00
|
|
|
dbs.append(HashList::createEmpty(basedir, 1));
|
2019-03-31 15:11:03 +02:00
|
|
|
}
|
2019-04-01 18:34:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HashStoragePrivate::~HashStoragePrivate()
|
|
|
|
|
{
|
|
|
|
|
qDeleteAll(dbs);
|
|
|
|
|
dbs.clear();
|
|
|
|
|
}
|