WIP Duplicate detection mode
This commit is contained in:
parent
ec5d40be0c
commit
a958b0268e
|
@ -18,3 +18,4 @@ done
|
||||||
|
|
||||||
$tool @$file_list -v --log-file=log-functions.txt
|
$tool @$file_list -v --log-file=log-functions.txt
|
||||||
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
||||||
|
$tool -mduplicates -v --log-file=log-duplicates.txt
|
||||||
|
|
202
tooling/tool.cpp
202
tooling/tool.cpp
|
@ -192,6 +192,126 @@ public:
|
||||||
void rollbackTransaction() {
|
void rollbackTransaction() {
|
||||||
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New methods for duplicate checking
|
||||||
|
bool checkDuplicateAddresses() {
|
||||||
|
const char *sql = R"(
|
||||||
|
WITH all_addresses AS (
|
||||||
|
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != ''
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'Imports' as table_name, name, address, filepath FROM Imports WHERE address != ''
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
|
||||||
|
)
|
||||||
|
SELECT address, COUNT(*) as count,
|
||||||
|
GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries
|
||||||
|
FROM all_addresses
|
||||||
|
GROUP BY address
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY address;
|
||||||
|
)";
|
||||||
|
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) {
|
||||||
|
spdlog::error("Failed to prepare duplicate address query: {}",
|
||||||
|
sqlite3_errmsg(db));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool found_duplicates = false;
|
||||||
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||||
|
found_duplicates = true;
|
||||||
|
const char *address = (const char *)sqlite3_column_text(stmt, 0);
|
||||||
|
int count = sqlite3_column_int(stmt, 1);
|
||||||
|
const char *entries = (const char *)sqlite3_column_text(stmt, 2);
|
||||||
|
|
||||||
|
spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address,
|
||||||
|
count, entries);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
return found_duplicates;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool checkDuplicateNames() {
|
||||||
|
bool found_duplicates = false;
|
||||||
|
|
||||||
|
// Check Functions table
|
||||||
|
const char *functions_sql = R"(
|
||||||
|
SELECT name, COUNT(*) as count,
|
||||||
|
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||||
|
FROM Functions
|
||||||
|
GROUP BY name
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY name;
|
||||||
|
)";
|
||||||
|
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) ==
|
||||||
|
SQLITE_OK) {
|
||||||
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||||
|
found_duplicates = true;
|
||||||
|
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||||
|
int count = sqlite3_column_int(stmt, 1);
|
||||||
|
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||||
|
|
||||||
|
spdlog::error(
|
||||||
|
"DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name,
|
||||||
|
count, filepaths);
|
||||||
|
}
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Imports table
|
||||||
|
const char *imports_sql = R"(
|
||||||
|
SELECT name, COUNT(*) as count,
|
||||||
|
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||||
|
FROM Imports
|
||||||
|
GROUP BY name
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY name;
|
||||||
|
)";
|
||||||
|
|
||||||
|
if (sqlite3_prepare_v2(db, imports_sql, -1, &stmt, nullptr) == SQLITE_OK) {
|
||||||
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||||
|
found_duplicates = true;
|
||||||
|
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||||
|
int count = sqlite3_column_int(stmt, 1);
|
||||||
|
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||||
|
|
||||||
|
spdlog::error(
|
||||||
|
"DUPLICATE IMPORT NAME: '{}' appears {} times in files: {}", name,
|
||||||
|
count, filepaths);
|
||||||
|
}
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Globals table
|
||||||
|
const char *globals_sql = R"(
|
||||||
|
SELECT name, COUNT(*) as count,
|
||||||
|
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||||
|
FROM Globals
|
||||||
|
GROUP BY name
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY name;
|
||||||
|
)";
|
||||||
|
|
||||||
|
if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) {
|
||||||
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||||
|
found_duplicates = true;
|
||||||
|
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||||
|
int count = sqlite3_column_int(stmt, 1);
|
||||||
|
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||||
|
|
||||||
|
spdlog::error(
|
||||||
|
"DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", name,
|
||||||
|
count, filepaths);
|
||||||
|
}
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
return found_duplicates;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string extractAddress(const std::string &comment) {
|
std::string extractAddress(const std::string &comment) {
|
||||||
|
@ -611,6 +731,29 @@ bool dumpTreeFile(const std::string &filepath) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool processDuplicates(DatabaseManager &db) {
|
||||||
|
spdlog::info("=== Checking for duplicate addresses ===");
|
||||||
|
bool found_address_duplicates = db.checkDuplicateAddresses();
|
||||||
|
|
||||||
|
spdlog::info("=== Checking for duplicate names ===");
|
||||||
|
bool found_name_duplicates = db.checkDuplicateNames();
|
||||||
|
|
||||||
|
if (!found_address_duplicates && !found_name_duplicates) {
|
||||||
|
spdlog::info("No duplicates found in the database.");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found_address_duplicates) {
|
||||||
|
spdlog::error("Found duplicate addresses in the database!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found_name_duplicates) {
|
||||||
|
spdlog::error("Found duplicate names in the database!");
|
||||||
|
}
|
||||||
|
|
||||||
|
return false; // Return false to indicate errors were found
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
// Initialize spdlog
|
// Initialize spdlog
|
||||||
auto console = spdlog::stdout_color_mt("console");
|
auto console = spdlog::stdout_color_mt("console");
|
||||||
|
@ -634,9 +777,12 @@ int main(int argc, char *argv[]) {
|
||||||
"File containing list of files to process");
|
"File containing list of files to process");
|
||||||
app.add_option("-d,--database", db_path, "SQLite database path")
|
app.add_option("-d,--database", db_path, "SQLite database path")
|
||||||
->default_val("gh.db");
|
->default_val("gh.db");
|
||||||
app.add_option("-m,--mode", mode, "Processing mode: 'functions', 'globals', or 'dump-tree'")
|
app.add_option("-m,--mode", mode,
|
||||||
|
"Processing mode: 'functions', 'globals', 'duplicates', or "
|
||||||
|
"'dump-tree'")
|
||||||
->default_val("functions")
|
->default_val("functions")
|
||||||
->check(CLI::IsMember({"functions", "globals", "dump-tree"}));
|
->check(
|
||||||
|
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
|
||||||
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
||||||
app.add_flag("--log-file", log_file, "Enable logging to file");
|
app.add_flag("--log-file", log_file, "Enable logging to file");
|
||||||
|
|
||||||
|
@ -647,6 +793,8 @@ int main(int argc, char *argv[]) {
|
||||||
spdlog::set_level(spdlog::level::debug);
|
spdlog::set_level(spdlog::level::debug);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spdlog::set_pattern(std::string("[%^%l%$] %v"));
|
||||||
|
|
||||||
if (!log_file.empty()) {
|
if (!log_file.empty()) {
|
||||||
auto log_sink =
|
auto log_sink =
|
||||||
std::make_shared<spdlog::sinks::basic_file_sink_mt>(log_file, true);
|
std::make_shared<spdlog::sinks::basic_file_sink_mt>(log_file, true);
|
||||||
|
@ -654,28 +802,31 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> files_to_process;
|
std::vector<std::string> files_to_process;
|
||||||
|
bool needFiles = mode != "duplicates";
|
||||||
|
|
||||||
if (!list_file.empty()) {
|
if (needFiles) {
|
||||||
auto list_files = readFileList(list_file);
|
if (!list_file.empty()) {
|
||||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
auto list_files = readFileList(list_file);
|
||||||
list_files.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto &input : input_files) {
|
|
||||||
if (input.starts_with("@")) {
|
|
||||||
auto list_files = readFileList(input.substr(1));
|
|
||||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||||
list_files.end());
|
list_files.end());
|
||||||
} else if (std::filesystem::exists(input)) {
|
|
||||||
files_to_process.push_back(input);
|
|
||||||
} else {
|
|
||||||
spdlog::warn("File not found: {}", input);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (files_to_process.empty()) {
|
for (const auto &input : input_files) {
|
||||||
spdlog::error("No files to process. Use --help for usage information.");
|
if (input.starts_with("@")) {
|
||||||
return 1;
|
auto list_files = readFileList(input.substr(1));
|
||||||
|
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||||
|
list_files.end());
|
||||||
|
} else if (std::filesystem::exists(input)) {
|
||||||
|
files_to_process.push_back(input);
|
||||||
|
} else {
|
||||||
|
spdlog::warn("File not found: {}", input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (files_to_process.empty()) {
|
||||||
|
spdlog::error("No files to process. Use --help for usage information.");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -689,8 +840,18 @@ int main(int argc, char *argv[]) {
|
||||||
processed_count++;
|
processed_count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (mode == "duplicates") {
|
||||||
|
DatabaseManager db(db_path);
|
||||||
|
// For duplicates mode, we only check the database, no file processing
|
||||||
|
spdlog::info("=== Checking database for duplicates ===");
|
||||||
|
bool has_duplicates = !processDuplicates(db);
|
||||||
|
spdlog::info("=== Summary ===");
|
||||||
|
spdlog::info("Mode: {}", mode);
|
||||||
|
spdlog::info("Database: {}", db_path);
|
||||||
|
return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none
|
||||||
} else {
|
} else {
|
||||||
DatabaseManager db(db_path);
|
DatabaseManager db(db_path);
|
||||||
|
|
||||||
const size_t batch_size = 50;
|
const size_t batch_size = 50;
|
||||||
size_t current_batch = 0;
|
size_t current_batch = 0;
|
||||||
|
|
||||||
|
@ -710,7 +871,8 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
if (++current_batch >= batch_size) {
|
if (++current_batch >= batch_size) {
|
||||||
db.commitTransaction();
|
db.commitTransaction();
|
||||||
spdlog::info("Committed batch of {} files to database", current_batch);
|
spdlog::info("Committed batch of {} files to database",
|
||||||
|
current_batch);
|
||||||
db.beginTransaction();
|
db.beginTransaction();
|
||||||
current_batch = 0;
|
current_batch = 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue