diff --git a/tooling/files.sh b/tooling/files.sh index 582afe4d..4a0ec799 100644 --- a/tooling/files.sh +++ b/tooling/files.sh @@ -18,3 +18,4 @@ done $tool @$file_list -v --log-file=log-functions.txt $tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt +$tool -mduplicates -v --log-file=log-duplicates.txt diff --git a/tooling/tool.cpp b/tooling/tool.cpp index a6378546..a68c667d 100644 --- a/tooling/tool.cpp +++ b/tooling/tool.cpp @@ -192,6 +192,126 @@ public: void rollbackTransaction() { sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); } + + // New methods for duplicate checking + bool checkDuplicateAddresses() { + const char *sql = R"( + WITH all_addresses AS ( + SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' + UNION ALL + SELECT 'Imports' as table_name, name, address, filepath FROM Imports WHERE address != '' + UNION ALL + SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != '' + ) + SELECT address, COUNT(*) as count, + GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries + FROM all_addresses + GROUP BY address + HAVING COUNT(*) > 1 + ORDER BY address; + )"; + + sqlite3_stmt *stmt; + if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) { + spdlog::error("Failed to prepare duplicate address query: {}", + sqlite3_errmsg(db)); + return false; + } + + bool found_duplicates = false; + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *address = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *entries = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address, + count, entries); + } + + sqlite3_finalize(stmt); + return found_duplicates; + } + + bool checkDuplicateNames() { + bool found_duplicates = false; + + // Check Functions table + const char *functions_sql = R"( + SELECT name, COUNT(*) as count, + GROUP_CONCAT(filepath, '; ') as filepaths + FROM Functions + GROUP BY name + HAVING COUNT(*) > 1 + ORDER BY name; + )"; + + sqlite3_stmt *stmt; + if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) == + SQLITE_OK) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *name = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error( + "DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name, + count, filepaths); + } + sqlite3_finalize(stmt); + } + + // Check Imports table + const char *imports_sql = R"( + SELECT name, COUNT(*) as count, + GROUP_CONCAT(filepath, '; ') as filepaths + FROM Imports + GROUP BY name + HAVING COUNT(*) > 1 + ORDER BY name; + )"; + + if (sqlite3_prepare_v2(db, imports_sql, -1, &stmt, nullptr) == SQLITE_OK) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *name = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error( + "DUPLICATE IMPORT NAME: '{}' appears {} times in files: {}", name, + count, filepaths); + } + sqlite3_finalize(stmt); + } + + // Check Globals table + const char *globals_sql = R"( + SELECT name, COUNT(*) as count, + GROUP_CONCAT(filepath, '; ') as filepaths + FROM Globals + GROUP BY name + HAVING COUNT(*) > 1 + ORDER BY name; + )"; + + if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *name = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error( + "DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", name, + count, filepaths); + } + sqlite3_finalize(stmt); + } + + return found_duplicates; + } }; std::string extractAddress(const std::string &comment) { @@ -611,6 +731,29 @@ bool dumpTreeFile(const std::string &filepath) { return true; } +bool processDuplicates(DatabaseManager &db) { + spdlog::info("=== Checking for duplicate addresses ==="); + bool found_address_duplicates = db.checkDuplicateAddresses(); + + spdlog::info("=== Checking for duplicate names ==="); + bool found_name_duplicates = db.checkDuplicateNames(); + + if (!found_address_duplicates && !found_name_duplicates) { + spdlog::info("No duplicates found in the database."); + return true; + } + + if (found_address_duplicates) { + spdlog::error("Found duplicate addresses in the database!"); + } + + if (found_name_duplicates) { + spdlog::error("Found duplicate names in the database!"); + } + + return false; // Return false to indicate errors were found +} + int main(int argc, char *argv[]) { // Initialize spdlog auto console = spdlog::stdout_color_mt("console"); @@ -634,9 +777,12 @@ int main(int argc, char *argv[]) { "File containing list of files to process"); app.add_option("-d,--database", db_path, "SQLite database path") ->default_val("gh.db"); - app.add_option("-m,--mode", mode, "Processing mode: 'functions', 'globals', or 'dump-tree'") + app.add_option("-m,--mode", mode, + "Processing mode: 'functions', 'globals', 'duplicates', or " + "'dump-tree'") ->default_val("functions") - ->check(CLI::IsMember({"functions", "globals", "dump-tree"})); + ->check( + CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"})); app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)"); app.add_flag("--log-file", log_file, "Enable logging to file"); @@ -647,6 +793,8 @@ int main(int argc, char *argv[]) { spdlog::set_level(spdlog::level::debug); } + spdlog::set_pattern(std::string("[%^%l%$] %v")); + if (!log_file.empty()) { auto log_sink = std::make_shared(log_file, true); @@ -654,28 +802,31 @@ int main(int argc, char *argv[]) { } std::vector files_to_process; + bool needFiles = mode != "duplicates"; - if (!list_file.empty()) { - auto list_files = readFileList(list_file); - files_to_process.insert(files_to_process.end(), list_files.begin(), - list_files.end()); - } - - for (const auto &input : input_files) { - if (input.starts_with("@")) { - auto list_files = readFileList(input.substr(1)); + if (needFiles) { + if (!list_file.empty()) { + auto list_files = readFileList(list_file); files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); - } else if (std::filesystem::exists(input)) { - files_to_process.push_back(input); - } else { - spdlog::warn("File not found: {}", input); } - } - if (files_to_process.empty()) { - spdlog::error("No files to process. Use --help for usage information."); - return 1; + for (const auto &input : input_files) { + if (input.starts_with("@")) { + auto list_files = readFileList(input.substr(1)); + files_to_process.insert(files_to_process.end(), list_files.begin(), + list_files.end()); + } else if (std::filesystem::exists(input)) { + files_to_process.push_back(input); + } else { + spdlog::warn("File not found: {}", input); + } + } + + if (files_to_process.empty()) { + spdlog::error("No files to process. Use --help for usage information."); + return 1; + } } try { @@ -689,8 +840,18 @@ int main(int argc, char *argv[]) { processed_count++; } } + } else if (mode == "duplicates") { + DatabaseManager db(db_path); + // For duplicates mode, we only check the database, no file processing + spdlog::info("=== Checking database for duplicates ==="); + bool has_duplicates = !processDuplicates(db); + spdlog::info("=== Summary ==="); + spdlog::info("Mode: {}", mode); + spdlog::info("Database: {}", db_path); + return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none } else { DatabaseManager db(db_path); + const size_t batch_size = 50; size_t current_batch = 0; @@ -710,7 +871,8 @@ int main(int argc, char *argv[]) { if (++current_batch >= batch_size) { db.commitTransaction(); - spdlog::info("Committed batch of {} files to database", current_batch); + spdlog::info("Committed batch of {} files to database", + current_batch); db.beginTransaction(); current_batch = 0; }