WIP Duplicate detection mode
This commit is contained in:
parent
ec5d40be0c
commit
a958b0268e
|
@ -18,3 +18,4 @@ done
|
|||
|
||||
$tool @$file_list -v --log-file=log-functions.txt
|
||||
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
||||
$tool -mduplicates -v --log-file=log-duplicates.txt
|
||||
|
|
202
tooling/tool.cpp
202
tooling/tool.cpp
|
@ -192,6 +192,126 @@ public:
|
|||
void rollbackTransaction() {
|
||||
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
||||
}
|
||||
|
||||
// New methods for duplicate checking
|
||||
bool checkDuplicateAddresses() {
|
||||
const char *sql = R"(
|
||||
WITH all_addresses AS (
|
||||
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != ''
|
||||
UNION ALL
|
||||
SELECT 'Imports' as table_name, name, address, filepath FROM Imports WHERE address != ''
|
||||
UNION ALL
|
||||
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
|
||||
)
|
||||
SELECT address, COUNT(*) as count,
|
||||
GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries
|
||||
FROM all_addresses
|
||||
GROUP BY address
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY address;
|
||||
)";
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) {
|
||||
spdlog::error("Failed to prepare duplicate address query: {}",
|
||||
sqlite3_errmsg(db));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool found_duplicates = false;
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *address = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *entries = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address,
|
||||
count, entries);
|
||||
}
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
return found_duplicates;
|
||||
}
|
||||
|
||||
bool checkDuplicateNames() {
|
||||
bool found_duplicates = false;
|
||||
|
||||
// Check Functions table
|
||||
const char *functions_sql = R"(
|
||||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Functions
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
)";
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) ==
|
||||
SQLITE_OK) {
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error(
|
||||
"DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name,
|
||||
count, filepaths);
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
// Check Imports table
|
||||
const char *imports_sql = R"(
|
||||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Imports
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
)";
|
||||
|
||||
if (sqlite3_prepare_v2(db, imports_sql, -1, &stmt, nullptr) == SQLITE_OK) {
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error(
|
||||
"DUPLICATE IMPORT NAME: '{}' appears {} times in files: {}", name,
|
||||
count, filepaths);
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
// Check Globals table
|
||||
const char *globals_sql = R"(
|
||||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Globals
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
)";
|
||||
|
||||
if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) {
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error(
|
||||
"DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", name,
|
||||
count, filepaths);
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
return found_duplicates;
|
||||
}
|
||||
};
|
||||
|
||||
std::string extractAddress(const std::string &comment) {
|
||||
|
@ -611,6 +731,29 @@ bool dumpTreeFile(const std::string &filepath) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool processDuplicates(DatabaseManager &db) {
|
||||
spdlog::info("=== Checking for duplicate addresses ===");
|
||||
bool found_address_duplicates = db.checkDuplicateAddresses();
|
||||
|
||||
spdlog::info("=== Checking for duplicate names ===");
|
||||
bool found_name_duplicates = db.checkDuplicateNames();
|
||||
|
||||
if (!found_address_duplicates && !found_name_duplicates) {
|
||||
spdlog::info("No duplicates found in the database.");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (found_address_duplicates) {
|
||||
spdlog::error("Found duplicate addresses in the database!");
|
||||
}
|
||||
|
||||
if (found_name_duplicates) {
|
||||
spdlog::error("Found duplicate names in the database!");
|
||||
}
|
||||
|
||||
return false; // Return false to indicate errors were found
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Initialize spdlog
|
||||
auto console = spdlog::stdout_color_mt("console");
|
||||
|
@ -634,9 +777,12 @@ int main(int argc, char *argv[]) {
|
|||
"File containing list of files to process");
|
||||
app.add_option("-d,--database", db_path, "SQLite database path")
|
||||
->default_val("gh.db");
|
||||
app.add_option("-m,--mode", mode, "Processing mode: 'functions', 'globals', or 'dump-tree'")
|
||||
app.add_option("-m,--mode", mode,
|
||||
"Processing mode: 'functions', 'globals', 'duplicates', or "
|
||||
"'dump-tree'")
|
||||
->default_val("functions")
|
||||
->check(CLI::IsMember({"functions", "globals", "dump-tree"}));
|
||||
->check(
|
||||
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
|
||||
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
||||
app.add_flag("--log-file", log_file, "Enable logging to file");
|
||||
|
||||
|
@ -647,6 +793,8 @@ int main(int argc, char *argv[]) {
|
|||
spdlog::set_level(spdlog::level::debug);
|
||||
}
|
||||
|
||||
spdlog::set_pattern(std::string("[%^%l%$] %v"));
|
||||
|
||||
if (!log_file.empty()) {
|
||||
auto log_sink =
|
||||
std::make_shared<spdlog::sinks::basic_file_sink_mt>(log_file, true);
|
||||
|
@ -654,28 +802,31 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
std::vector<std::string> files_to_process;
|
||||
bool needFiles = mode != "duplicates";
|
||||
|
||||
if (!list_file.empty()) {
|
||||
auto list_files = readFileList(list_file);
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
}
|
||||
|
||||
for (const auto &input : input_files) {
|
||||
if (input.starts_with("@")) {
|
||||
auto list_files = readFileList(input.substr(1));
|
||||
if (needFiles) {
|
||||
if (!list_file.empty()) {
|
||||
auto list_files = readFileList(list_file);
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
} else if (std::filesystem::exists(input)) {
|
||||
files_to_process.push_back(input);
|
||||
} else {
|
||||
spdlog::warn("File not found: {}", input);
|
||||
}
|
||||
}
|
||||
|
||||
if (files_to_process.empty()) {
|
||||
spdlog::error("No files to process. Use --help for usage information.");
|
||||
return 1;
|
||||
for (const auto &input : input_files) {
|
||||
if (input.starts_with("@")) {
|
||||
auto list_files = readFileList(input.substr(1));
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
} else if (std::filesystem::exists(input)) {
|
||||
files_to_process.push_back(input);
|
||||
} else {
|
||||
spdlog::warn("File not found: {}", input);
|
||||
}
|
||||
}
|
||||
|
||||
if (files_to_process.empty()) {
|
||||
spdlog::error("No files to process. Use --help for usage information.");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -689,8 +840,18 @@ int main(int argc, char *argv[]) {
|
|||
processed_count++;
|
||||
}
|
||||
}
|
||||
} else if (mode == "duplicates") {
|
||||
DatabaseManager db(db_path);
|
||||
// For duplicates mode, we only check the database, no file processing
|
||||
spdlog::info("=== Checking database for duplicates ===");
|
||||
bool has_duplicates = !processDuplicates(db);
|
||||
spdlog::info("=== Summary ===");
|
||||
spdlog::info("Mode: {}", mode);
|
||||
spdlog::info("Database: {}", db_path);
|
||||
return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none
|
||||
} else {
|
||||
DatabaseManager db(db_path);
|
||||
|
||||
const size_t batch_size = 50;
|
||||
size_t current_batch = 0;
|
||||
|
||||
|
@ -710,7 +871,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
if (++current_batch >= batch_size) {
|
||||
db.commitTransaction();
|
||||
spdlog::info("Committed batch of {} files to database", current_batch);
|
||||
spdlog::info("Committed batch of {} files to database",
|
||||
current_batch);
|
||||
db.beginTransaction();
|
||||
current_batch = 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue