From e396abbb5ef7ef015215c5267f8f3cb921663e89 Mon Sep 17 00:00:00 2001 From: TomZ Date: Wed, 9 Apr 2025 21:50:32 +0200 Subject: [PATCH] Make the basic work. --- design | 6 +- src/Processor.cpp | 181 +++++++++++++++++++++++++++++++++++++++++++++- src/Processor.h | 25 ++++++- 3 files changed, 207 insertions(+), 5 deletions(-) diff --git a/design b/design index 7bd61fd..3e75c26 100644 --- a/design +++ b/design @@ -116,8 +116,10 @@ What about an app that takes as input a directory (git repo) of all known bcmr files. input: bcmrs/ - hash - hash.tx (the raw auth-head transaction) + by-name + by-name.tx (the raw auth-head transaction) + I should be able to just parse this and based on the hash + check if we have the bcmr already trust/ category-hash with a trust-level. Really only if it is known spam. diff --git a/src/Processor.cpp b/src/Processor.cpp index b996d40..57932e2 100644 --- a/src/Processor.cpp +++ b/src/Processor.cpp @@ -2,10 +2,22 @@ #include #include +#include +#include +#include +#include +#include + +// if the passed path doesn't have a trailing slash, add one. +static QString pathWithSlash(const QString &input) { + if (input.endsWith('/')) + return input; + return input + '/'; +} Processor::Processor(const QString &inDir, const QString &outDir) : m_inDir(inDir), - m_outDir(outDir) + m_outDir(pathWithSlash(outDir)) { } @@ -17,7 +29,172 @@ int Processor::run() } QDirIterator iter(m_inDir, QDir::NoDotAndDotDot | QDir::Files | QDir::Dirs, QDirIterator::Subdirectories); while (iter.hasNext()) { - logFatal() << "File:" << iter.next(); + const QString path = iter.next(); + if (path.indexOf("bcmrs/", m_inDir.size()) == m_inDir.size()) { + parseBCMR(path); + } } + + for (auto *source : m_sources) { + assert(source); + QString outPath = m_outDir + source->hash + ".json"; + if (!QFile::exists(outPath)) { + logCritical() << "Placing BCMR " << source->name << "as" << source->hash; + QFile::copy(source->origFilename, outPath); + } + } + + logInfo() << "num categories found:" << m_categories.size(); + for (auto i = m_categories.begin(); i != m_categories.end(); ++i) { + MetaCategory *mc = i->second; + QJsonArray sources; + for (auto owner : mc->owners) { + QJsonObject o; + o.insert("bcmr", owner->hash); + o.insert("name", owner->name); + sources.append(o); + } + QJsonObject root; + root.insert("sources", sources); + QJsonDocument doc; + doc.setObject(root); + auto memData = doc.toJson(QJsonDocument::Compact); + + QString outPath = m_outDir + mc->category + ".json"; + if (QFile::exists(outPath)) { + // only overwrite if different. + QFile disk(outPath); + if (disk.open(QIODevice::ReadOnly)) { + CSHA256 hasher; + auto fileData = disk.readAll(); + disk.close(); + hasher.write(fileData.constData(), fileData.size()); + fileData = QByteArray(); + char fileHash[CSHA256::OUTPUT_SIZE]; + hasher.finalize(fileHash); + hasher.reset(); + hasher.write(memData.constData(), memData.size()); + char memHash[CSHA256::OUTPUT_SIZE]; + hasher.finalize(memHash); + if (memcmp(fileHash, memHash, sizeof(memHash)) == 0) + continue; + } + } + logCritical() << "Placing or updating category file" << mc->category; + QFile out(outPath); + if (!out.open(QIODevice::WriteOnly)) { + logCritical() << "Failed to open file for writing:" << outPath; + logFatal() << "Giving up"; + return 10; + } + out.write(memData); + } + return 0; } + +void Processor::parseBCMR(const QString &path) +{ + logFatal() << "doing it" << path; + QFile in(path); + if (in.open(QIODevice::ReadOnly)) { + char signature[4]; + in.read(signature, 4); + bool isTx = true; + int isJson = 0; + for (int i = 0; i < 4; ++i) { + uint8_t k = static_cast(signature[i]); + if (k < 10) { + isTx = isTx && true; + isJson = 10; + } + if (isJson <= 1 && (k == ' ' || k == '\n' || k == '\r' || k == '\t')) + isJson = 1; + else if (isJson < 2 && k == '{') + isJson = 2; + else if (isJson == 2 && (k < 34 || k > 'z')) { + isJson = 10; // fail + } + } + if (isJson == 0 && isTx) { + parseBCHTx(in); + return; + } + else if (isJson != 2) { + logCritical() << "Magic detection of file:" << path << "failed. What is it?"; + return; + } + } + // still here, then it is a json. + in.seek(0); + auto data = in.readAll(); + QJsonDocument doc = QJsonDocument::fromJson(data); + in.close(); + if (!doc.isObject()) { + logCritical() << "Failed to parse json document:" << path; + return; + } + auto ids = doc.object().value("identities").toObject(); + if (ids.isEmpty()) { + logCritical() << "No identities in BCMR:" << path; + return; + } + MetaBCMR *me = nullptr; + for (auto identity = ids.begin(); identity != ids.end(); ++identity) { + // The 'key' can be a category or an auth-base, relevant unless + // duplicated in the 'token' section below. + auto revisions = identity->toObject(); + auto revisionDates = revisions.keys(); // sorted list. + if (revisionDates.isEmpty()) + continue; + const auto revision = revisions[revisionDates.back()].toObject(); + if (revision.isEmpty()) + continue; + if (me == nullptr) { + me = new MetaBCMR(); + me->origFilename = path; + me->name = revision["name"].toString(); + // calc hash + CSHA256 hasher; + hasher.write(data.constData(), data.size()); + char buf[CSHA256::OUTPUT_SIZE]; + hasher.finalize(buf); + me->hash = QByteArray(buf, sizeof(buf)).toHex(); + + m_sources.push_back(me); + } + logInfo() << "Found BCMR for" << me->name; + const auto token_ = revision["token"]; + if (token_.isObject()) { + const auto token = token_.toObject(); + const auto cat = token["category"].toString(); + if (cat.size() != 64) { + logCritical() << "found token, but category is invalid length" << cat.size(); + } else { + MetaCategory *mc = fetchOrCreate(cat); + me->tokens.push_back(mc); + mc->owners.insert(me); + } + } + } +} + +void Processor::parseBCHTx(QFile &file) +{ + // TODO + logFatal() << "parse tx is a TODO"; +} + +Processor::MetaCategory *Processor::fetchOrCreate(const QString &catId) +{ + auto iter = m_categories.find(catId); + if (iter != m_categories.end()) { + return iter->second; + } + + auto *mc = new MetaCategory(); + mc->category = catId; + m_categories.insert({mc->category, mc}); + + return mc; +} diff --git a/src/Processor.h b/src/Processor.h index 8f14b71..0730072 100644 --- a/src/Processor.h +++ b/src/Processor.h @@ -2,7 +2,9 @@ #define PROCESSOR_H #include - +#include +#include +#include class Processor { @@ -12,7 +14,28 @@ public: int run(); private: + void parseBCMR(const QString &path); + void parseBCHTx(QFile &file); + struct MetaCategory; + MetaCategory *fetchOrCreate(const QString &catId); + + struct MetaBCMR { + QString hash; + QString origFilename; + QString name; + std::vector tokens; + + }; + + struct MetaCategory { + QString category; + std::set owners; + }; + const QString m_inDir, m_outDir; + std::vector m_sources; + typedef std::unordered_map CatMap; + CatMap m_categories; }; #endif