diff --git a/tooling/CMakeLists.txt b/tooling/CMakeLists.txt index 381bbcc4..454657b2 100644 --- a/tooling/CMakeLists.txt +++ b/tooling/CMakeLists.txt @@ -15,9 +15,18 @@ target_include_directories(sqlite3 PUBLIC ${SQLITE_SRC}) add_library(CLI11 INTERFACE) target_include_directories(CLI11 INTERFACE third_party/CLI11) -add_executable(r3_gh_tool tool.cpp) -target_link_libraries(r3_gh_tool PRIVATE spdlog::spdlog tree-sitter tree-sitter-cpp sqlite3 CLI11) -target_compile_features(r3_gh_tool PRIVATE cxx_std_23) +add_executable(gh_tool + tool.cpp + file_processor.cpp + database.cpp + parser.cpp + cmd_scan.cpp + cmd_dump.cpp + cmd_verify.cpp + cmd_hooks.cpp +) +target_link_libraries(gh_tool PRIVATE spdlog::spdlog tree-sitter tree-sitter-cpp sqlite3 CLI11) +target_compile_features(gh_tool PRIVATE cxx_std_23) add_executable(generate_dbg_sec generate_dbg_sec.cpp) target_compile_features(generate_dbg_sec PRIVATE cxx_std_23) diff --git a/tooling/cmd_dump.cpp b/tooling/cmd_dump.cpp new file mode 100644 index 00000000..2132d590 --- /dev/null +++ b/tooling/cmd_dump.cpp @@ -0,0 +1,88 @@ +#include "tool.hpp" +#include +#include + +// Forward declarations +extern "C" TSLanguage *tree_sitter_cpp(); + +static std::string filepath; + +// Helper function to dump Tree-sitter AST +void dumpTreeSitterAST(TSNode node, const char *source_code, int depth) { + std::string indent(depth * 2, ' '); + const char *type = ts_node_type(node); + + uint32_t start = ts_node_start_byte(node); + uint32_t end = ts_node_end_byte(node); + + // Get the text content for leaf nodes or small nodes + std::string content; + if (end - start < 100) { // Only show content for small nodes + content = extractNodeText(node, source_code); + // Replace newlines with \n for better readability + std::regex newline_regex("\n"); + content = std::regex_replace(content, newline_regex, "\\n"); + // Truncate if still too long + if (content.length() > 50) { + content = content.substr(0, 47) + "..."; + } + } + + if (!content.empty()) { + spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content); + } else { + spdlog::info("{}{}[{}:{}]", indent, type, start, end); + } + + // Recursively dump children + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + dumpTreeSitterAST(child, source_code, depth + 1); + } +} + +bool dumpTreeFile(const std::string &filepath) { + std::ifstream file(filepath); + if (!file.is_open()) { + spdlog::error("Could not open file {}", filepath); + return false; + } + + std::string file_content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, tree_sitter_cpp()); + + TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), + file_content.length()); + TSNode root_node = ts_tree_root_node(tree); + + if (ts_node_is_null(root_node)) { + spdlog::error("Failed to parse file {}", filepath); + ts_tree_delete(tree); + ts_parser_delete(parser); + return false; + } + + spdlog::info("=== Tree-sitter AST for {} ===", filepath); + dumpTreeSitterAST(root_node, file_content.c_str()); + spdlog::info("=== End of AST dump ==="); + + ts_tree_delete(tree); + ts_parser_delete(parser); + return true; +} + +void register_cmd_dump(CLI::App &app) { + auto cmd = + app.add_subcommand("dump-tree", "Dump the tree-sitter AST for a file"); + cmd->add_option("-f,--filepath", filepath, + "File to dump the tree-sitter AST for") + ->required(); + cmd->final_callback([]() { + spdlog::info("=== Processing: {} ===", filepath); + dumpTreeFile(filepath); + }); +} diff --git a/tooling/cmd_hooks.cpp b/tooling/cmd_hooks.cpp new file mode 100644 index 00000000..e69de29b diff --git a/tooling/cmd_scan.cpp b/tooling/cmd_scan.cpp new file mode 100644 index 00000000..71874753 --- /dev/null +++ b/tooling/cmd_scan.cpp @@ -0,0 +1,109 @@ +#include "tool.hpp" +#include + +static std::vector files; +static std::string list_file; +static std::string type_str; +FileType file_type; // Add type string variable + +std::vector getFilesToProcess() { + std::vector files_to_process; + if (!list_file.empty()) { + auto list_files = readFileList(list_file); + files_to_process.insert(files_to_process.end(), list_files.begin(), + list_files.end()); + } + + for (const auto &input : files) { + if (input.starts_with("@")) { + auto list_files = readFileList(input.substr(1)); + files_to_process.insert(files_to_process.end(), list_files.begin(), + list_files.end()); + } else if (std::filesystem::exists(input)) { + files_to_process.push_back(input); + } else { + spdlog::warn("File not found: {}", input); + } + } + + return files_to_process; +} + +static void setupCommand(CLI::App &app, std::string mode) { + auto cmd = app.add_subcommand(mode, "Scan for functions and globals"); + + cmd->add_option("files", files, + "Input C++ files to parse (supports @listfile.txt syntax)"); + cmd->add_option("-l,--list", list_file, + "File containing list of files to process"); + cmd->add_option("-t,--type", type_str, + "File type: 'auto', 'fix', 'stub', or 'ref'") + ->default_val("auto") + ->check(CLI::IsMember({"auto", "fix", "stub", "ref"})); + cmd->final_callback([mode]() { + if (files.empty() && list_file.empty()) { + spdlog::error("No files to process. Use --help for usage information."); + exit(1); + } + + // Convert string to FileType enum + try { + file_type = stringToFileType(type_str); + } catch (const std::invalid_argument &e) { + spdlog::error("Invalid file type: {}", type_str); + exit(1); + } + + auto files_to_process = getFilesToProcess(); + if (files_to_process.empty()) { + spdlog::error("No files to process. Use --help for usage information."); + exit(1); + } + + auto &options = Options::get(); + DatabaseManager db(options.db_path); + + const size_t batch_size = 50; + size_t current_batch = 0; + int processed_count = 0; + + db.beginTransaction(); + + for (const auto &filepath : files_to_process) { + spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str); + bool success = false; + if (mode == "functions") { + success = processFile(filepath, db, file_type); + } else if (mode == "globals") { + success = processGlobalsFile(filepath, db); + } + + if (success) + processed_count++; + + if (++current_batch >= batch_size) { + db.commitTransaction(); + spdlog::info("Committed batch of {} files to database", current_batch); + db.beginTransaction(); + current_batch = 0; + } + } + + if (current_batch > 0) { + db.commitTransaction(); + spdlog::info("Committed final batch of {} files to database", + current_batch); + } + + spdlog::info("=== Summary ==="); + spdlog::info("Processed {} files successfully", processed_count); + spdlog::info("Mode: {}", mode); + spdlog::info("File type: {}", type_str); + spdlog::info("Database saved to: {}", options.db_path); + }); +} + +void register_cmd_scan(CLI::App &app) { + setupCommand(app, "functions"); + setupCommand(app, "globals"); +} diff --git a/tooling/cmd_verify.cpp b/tooling/cmd_verify.cpp new file mode 100644 index 00000000..99ffc6a0 --- /dev/null +++ b/tooling/cmd_verify.cpp @@ -0,0 +1,38 @@ +#include "tool.hpp" +#include + +bool processDuplicates(DatabaseManager &db) { + spdlog::info("=== Checking for duplicate addresses ==="); + bool found_address_duplicates = db.checkDuplicateAddresses(); + if (found_address_duplicates) { + spdlog::error("Found duplicate addresses in the database!"); + } else { + spdlog::info("No duplicate addresses found in the database."); + } + + spdlog::info("=== Checking for duplicate names ==="); + bool found_name_duplicates = db.checkDuplicateNames(); + if (found_name_duplicates) { + spdlog::error("Found duplicate names in the database!"); + } else { + spdlog::info("No duplicate names found in the database."); + } + + return !found_address_duplicates && !found_name_duplicates; +} + +void register_cmd_verify(CLI::App &app) { + auto cmd = app.add_subcommand("verify", "Verify the database"); + + cmd->final_callback([]() { + auto &options = Options::get(); + DatabaseManager db(options.db_path); + // For duplicates mode, we only check the database, no file processing + spdlog::info("=== Checking database for duplicates ==="); + bool has_duplicates = !processDuplicates(db); + spdlog::info("=== Summary ==="); + spdlog::info("Mode: {}", options.mode); + spdlog::info("Database: {}", options.db_path); + return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none + }); +} diff --git a/tooling/database.cpp b/tooling/database.cpp new file mode 100644 index 00000000..c5410597 --- /dev/null +++ b/tooling/database.cpp @@ -0,0 +1,222 @@ +#include "tool.hpp" +#include +#include + +// Database classes +class PreparedStatements { +public: + sqlite3 *db; + sqlite3_stmt *delete_functions_stmt; + sqlite3_stmt *delete_imports_stmt; + sqlite3_stmt *insert_functions_stmt; + sqlite3_stmt *insert_imports_stmt; + sqlite3_stmt *delete_globals_stmt; + sqlite3_stmt *insert_globals_stmt; + + void prepareStatement(const char *sql, sqlite3_stmt **stmt, + const std::string &error_msg); + + PreparedStatements(sqlite3 *database) : db(database) { + prepareStatement("DELETE FROM Functions WHERE filepath = ?", + &delete_functions_stmt, + "Failed to prepare delete functions statement"); + prepareStatement("DELETE FROM Imports WHERE filepath = ?", + &delete_imports_stmt, + "Failed to prepare delete imports statement"); + prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, " + "address, type) VALUES (?, ?, ?, ?)", + &insert_functions_stmt, + "Failed to prepare insert functions statement"); + prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, " + "type) VALUES (?, ?, ?, ?)", + &insert_imports_stmt, + "Failed to prepare insert imports statement"); + prepareStatement("DELETE FROM Globals WHERE filepath = ?", + &delete_globals_stmt, + "Failed to prepare delete globals statement"); + prepareStatement("INSERT OR REPLACE INTO Globals (filepath, name, address) " + "VALUES (?, ?, ?)", + &insert_globals_stmt, + "Failed to prepare insert globals statement"); + } + + ~PreparedStatements() { + sqlite3_finalize(delete_functions_stmt); + sqlite3_finalize(delete_imports_stmt); + sqlite3_finalize(insert_functions_stmt); + sqlite3_finalize(insert_imports_stmt); + sqlite3_finalize(delete_globals_stmt); + sqlite3_finalize(insert_globals_stmt); + } +}; + +void PreparedStatements::prepareStatement(const char *sql, sqlite3_stmt **stmt, + const std::string &error_msg) { + if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) { + throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db)); + } +} + +DatabaseManager::DatabaseManager(const std::string &db_path) : db(nullptr) { + if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) { + spdlog::error("Can't open database: {}", sqlite3_errmsg(db)); + sqlite3_close(db); + throw std::runtime_error("Failed to open database"); + } + + const char *create_tables = R"( + CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath)); + CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath)); + CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT); + )"; + + sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr); + prepared_stmts = std::make_shared(db); +} + +DatabaseManager::~DatabaseManager() { + if (db) + sqlite3_close(db); +} + +void DatabaseManager::clearEntriesForFile(const std::string &filepath) { + for (auto stmt : {prepared_stmts->delete_functions_stmt, + prepared_stmts->delete_imports_stmt}) { + sqlite3_reset(stmt); + sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_step(stmt); + } +} + +void DatabaseManager::clearGlobalsForFile(const std::string &filepath) { + sqlite3_reset(prepared_stmts->delete_globals_stmt); + sqlite3_bind_text(prepared_stmts->delete_globals_stmt, 1, filepath.c_str(), + -1, SQLITE_STATIC); + sqlite3_step(prepared_stmts->delete_globals_stmt); +} + +void DatabaseManager::insertFunction(const FunctionInfo &func) { + sqlite3_stmt *stmt = func.is_import ? prepared_stmts->insert_imports_stmt + : prepared_stmts->insert_functions_stmt; + sqlite3_reset(stmt); + sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 4, static_cast(func.type)); + sqlite3_step(stmt); +} + +void DatabaseManager::insertGlobal(const GlobalInfo &global) { + sqlite3_reset(prepared_stmts->insert_globals_stmt); + sqlite3_bind_text(prepared_stmts->insert_globals_stmt, 1, + global.filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(prepared_stmts->insert_globals_stmt, 2, global.name.c_str(), + -1, SQLITE_STATIC); + sqlite3_bind_text(prepared_stmts->insert_globals_stmt, 3, + global.address.c_str(), -1, SQLITE_STATIC); + sqlite3_step(prepared_stmts->insert_globals_stmt); +} + +void DatabaseManager::beginTransaction() { + sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr); +} + +void DatabaseManager::commitTransaction() { + sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); +} + +void DatabaseManager::rollbackTransaction() { + sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); +} + +bool DatabaseManager::checkDuplicateAddresses() { + const char *sql = R"( + WITH all_addresses AS ( + SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3 + UNION ALL + SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != '' + ) + SELECT address, COUNT(*) as count, + GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries + FROM all_addresses + GROUP BY address + HAVING COUNT(*) > 1 + ORDER BY address; + )"; + + sqlite3_stmt *stmt; + if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) { + spdlog::error("Failed to prepare duplicate address query: {}", + sqlite3_errmsg(db)); + return false; + } + + bool found_duplicates = false; + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *address = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *entries = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address, + count, entries); + } + + sqlite3_finalize(stmt); + return found_duplicates; +} + +bool DatabaseManager::checkDuplicateNames() { + bool found_duplicates = false; + + // Check Functions table + const char *functions_sql = R"( + SELECT name, COUNT(*) as count, + GROUP_CONCAT(filepath, '; ') as filepaths + FROM Functions + WHERE type != 3 + GROUP BY name + HAVING COUNT(*) > 1 + ORDER BY name; + )"; + + sqlite3_stmt *stmt; + if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) == SQLITE_OK) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *name = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error( + "DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name, + count, filepaths); + } + sqlite3_finalize(stmt); + } + + // Check Globals table + const char *globals_sql = R"( + SELECT name, COUNT(*) as count, + GROUP_CONCAT(filepath, '; ') as filepaths + FROM Globals + GROUP BY name + HAVING COUNT(*) > 1 + ORDER BY name; + )"; + + if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) { + while (sqlite3_step(stmt) == SQLITE_ROW) { + found_duplicates = true; + const char *name = (const char *)sqlite3_column_text(stmt, 0); + int count = sqlite3_column_int(stmt, 1); + const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); + + spdlog::error("DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", + name, count, filepaths); + } + sqlite3_finalize(stmt); + } + + return found_duplicates; +} diff --git a/tooling/file_processor.cpp b/tooling/file_processor.cpp new file mode 100644 index 00000000..d9088b59 --- /dev/null +++ b/tooling/file_processor.cpp @@ -0,0 +1,126 @@ +#include "tool.hpp" +#include +#include +#include +#include +#include + +// Forward declarations +extern "C" TSLanguage *tree_sitter_cpp(); + +std::vector readFileList(const std::string &list_file) { + std::vector files; + std::ifstream file(list_file); + if (!file.is_open()) { + spdlog::error("Could not open list file {}", list_file); + return files; + } + + std::string line; + while (std::getline(file, line)) { + if (line.empty() || line[0] == '#') + continue; + + if (line.find('*') != std::string::npos) { + spdlog::info("Skipping wildcard pattern: {}", line); + continue; + } + + if (std::filesystem::exists(line)) { + files.push_back(line); + } else { + spdlog::warn("File not found: {}", line); + } + } + return files; +} + +bool processFile(const std::string &filepath, DatabaseManager &db, + FileType file_type) { + std::ifstream file(filepath); + if (!file.is_open()) { + spdlog::error("Could not open file {}", filepath); + return false; + } + + std::string file_content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, tree_sitter_cpp()); + + TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), + file_content.length()); + TSNode root_node = ts_tree_root_node(tree); + + if (ts_node_is_null(root_node)) { + spdlog::error("Failed to parse file {}", filepath); + ts_tree_delete(tree); + ts_parser_delete(parser); + return false; + } + + db.clearEntriesForFile(filepath); + + std::vector functions; + findFunctions(root_node, file_content.c_str(), file_content.length(), + functions, file_type); + + for (auto &func : functions) { + func.filepath = filepath; + db.insertFunction(func); + spdlog::debug("{}: {} @ {} in {} (type: {})", + func.is_import ? "Import" : "Function", func.name, + func.address, filepath, fileTypeToString(func.type)); + } + + spdlog::info("Processed {} functions/imports from {} (type: {})", + functions.size(), filepath, fileTypeToString(file_type)); + + ts_tree_delete(tree); + ts_parser_delete(parser); + return true; +} + +bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { + std::ifstream file(filepath); + if (!file.is_open()) { + spdlog::error("Could not open file {}", filepath); + return false; + } + + std::string file_content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, tree_sitter_cpp()); + + TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), + file_content.length()); + TSNode root_node = ts_tree_root_node(tree); + + if (ts_node_is_null(root_node)) { + spdlog::error("Failed to parse file {}", filepath); + ts_tree_delete(tree); + ts_parser_delete(parser); + return false; + } + + db.clearGlobalsForFile(filepath); + + std::vector globals; + findGlobals(root_node, file_content.c_str(), file_content.length(), globals); + + for (auto &global : globals) { + global.filepath = filepath; + db.insertGlobal(global); + spdlog::debug("Global: {} @ {} in {}", global.name, global.address, + filepath); + } + + spdlog::info("Processed {} globals from {}", globals.size(), filepath); + + ts_tree_delete(tree); + ts_parser_delete(parser); + return true; +} diff --git a/tooling/parser.cpp b/tooling/parser.cpp new file mode 100644 index 00000000..01d27d2c --- /dev/null +++ b/tooling/parser.cpp @@ -0,0 +1,296 @@ +#include "tool.hpp" +#include +#include +#include + +// Global address regex pattern +const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))"); + +// Helper function to convert string to FileType +FileType stringToFileType(const std::string &type_str) { + if (type_str == "auto") + return FileType::Auto; + if (type_str == "fix") + return FileType::Fix; + if (type_str == "stub") + return FileType::Stub; + if (type_str == "ref") + return FileType::Ref; + throw std::invalid_argument("Invalid file type: " + type_str); +} + +// Helper function to convert FileType to string +std::string fileTypeToString(FileType type) { + switch (type) { + case FileType::Auto: + return "auto"; + case FileType::Fix: + return "fix"; + case FileType::Stub: + return "stub"; + case FileType::Ref: + return "ref"; + default: + throw std::logic_error("Invalid file type: " + std::to_string((int)type)); + } +} + +// Helper function to check if a comment contains an address +bool hasAddressPattern(const std::string &comment) { + return std::regex_search(comment, ADDRESS_REGEX); +} + +std::string extractAddress(const std::string &comment) { + std::smatch match; + return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : ""; +} + +// Helper function to extract text from a TSNode +std::string extractNodeText(TSNode node, const char *source_code) { + uint32_t start = ts_node_start_byte(node); + uint32_t end = ts_node_end_byte(node); + return std::string(source_code + start, end - start); +} + +// Helper function to find first identifier in a node +std::string findIdentifierInNode(TSNode node, const char *source_code) { + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + if (strcmp(ts_node_type(child), "identifier") == 0) { + return extractNodeText(child, source_code); + } + } + return ""; +} + +// Helper function to recursively find identifier in any declarator +std::string findIdentifierInDeclarator(TSNode node, const char *source_code) { + const char *type = ts_node_type(node); + + // If this is an identifier, return it + if (strcmp(type, "identifier") == 0) { + return extractNodeText(node, source_code); + } + + // Recursively search all children + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + std::string result = findIdentifierInDeclarator(child, source_code); + if (!result.empty()) { + return result; + } + } + + return ""; +} + +std::string getFunctionName(TSNode node, const char *source_code) { + uint32_t child_count = ts_node_child_count(node); + + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + const char *type = ts_node_type(child); + + if (strcmp(type, "function_declarator") == 0) { + std::string name = findIdentifierInNode(child, source_code); + if (!name.empty()) + return name; + } else if (strcmp(type, "identifier") == 0) { + return extractNodeText(child, source_code); + } else if (strcmp(type, "pointer_declarator") == 0) { + std::string name = getFunctionName(child, source_code); + if (!name.empty()) + return name; + } + } + return ""; +} + +std::string getGlobalName(TSNode node, const char *source_code) { + uint32_t child_count = ts_node_child_count(node); + + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + const char *type = ts_node_type(child); + + // Look for any kind of declarator and recursively search for identifier + if (strcmp(type, "init_declarator") == 0 || + strcmp(type, "declarator") == 0 || + strcmp(type, "reference_declarator") == 0 || + strcmp(type, "pointer_declarator") == 0 || + strcmp(type, "parenthesized_declarator") == 0 || + strcmp(type, "array_declarator") == 0) { + std::string name = findIdentifierInDeclarator(child, source_code); + if (!name.empty()) { + return name; + } + } + // Direct identifier child + else if (strcmp(type, "identifier") == 0) { + return extractNodeText(child, source_code); + } + } + return ""; +} + +std::string getComment(TSNode node, const char *source_code, + uint32_t source_length, bool search_before) { + TSNode current = node; + + if (search_before) { + // Look for comments before the current node + while (!ts_node_is_null(current)) { + TSNode prev_sibling = ts_node_prev_sibling(current); + + while (!ts_node_is_null(prev_sibling)) { + const char *type = ts_node_type(prev_sibling); + + if (strcmp(type, "comment") == 0) { + std::string comment_text = extractNodeText(prev_sibling, source_code); + + // Check if it contains an address pattern + if (hasAddressPattern(comment_text)) { + return comment_text; + } + } + // Skip whitespace and continue looking + else if (strcmp(type, "ERROR") != 0) { + // If we hit non-comment, non-whitespace content, stop searching + break; + } + + prev_sibling = ts_node_prev_sibling(prev_sibling); + } + + // Move up to parent and continue searching + current = ts_node_parent(current); + } + } else { + // Look for comments after the current node + TSNode next_sibling = ts_node_next_sibling(node); + + while (!ts_node_is_null(next_sibling)) { + const char *type = ts_node_type(next_sibling); + + if (strcmp(type, "comment") == 0) { + std::string comment_text = extractNodeText(next_sibling, source_code); + + // Check if it contains an address pattern + if (hasAddressPattern(comment_text)) { + return comment_text; + } + } + // Skip whitespace and continue looking + else if (strcmp(type, "ERROR") != 0) { + // If we hit non-comment, non-whitespace content, stop searching + break; + } + + next_sibling = ts_node_next_sibling(next_sibling); + } + } + + return ""; +} + +bool hasFunctionBody(TSNode node) { + if (strcmp(ts_node_type(node), "function_definition") != 0) + return false; + + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") == + 0) { + return true; + } + } + return false; +} + +void findFunctions(TSNode node, const char *source_code, uint32_t source_length, + std::vector &functions, FileType file_type) { + const char *type = ts_node_type(node); + + if (strcmp(type, "function_definition") == 0 || + strcmp(type, "declaration") == 0) { + std::string func_name = getFunctionName(node, source_code); + if (!func_name.empty()) { + std::string address = + extractAddress(getComment(node, source_code, source_length, false)); + + if (address.empty() && strcmp(type, "function_definition") == 0) { + address = + extractAddress(getComment(node, source_code, source_length, true)); + } + + if (!address.empty()) { + FunctionInfo func{func_name, address, "", + strcmp(type, "function_definition") == 0 + ? !hasFunctionBody(node) + : true, + file_type}; // Add file_type parameter + functions.push_back(func); + } + // We'll never nest function declarations + return; + } else { + spdlog::error("Failed to get function name for {}", + extractNodeText(node, source_code)); + } + } + + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + findFunctions(ts_node_child(node, i), source_code, source_length, functions, + file_type); + } +} + +void findGlobals(TSNode node, const char *source_code, uint32_t source_length, + std::vector &globals) { + const char *type = ts_node_type(node); + + // Look for extern declarations + if (strcmp(type, "declaration") == 0) { + // Check if this is an extern declaration + uint32_t child_count = ts_node_child_count(node); + bool is_extern = false; + + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) { + std::string storage_class = extractNodeText(child, source_code); + if (storage_class == "extern") { + is_extern = true; + break; + } + } + } + + if (is_extern) { + std::string global_name = getGlobalName(node, source_code); + if (!global_name.empty()) { + // Look for address comment after the declaration + std::string address = + extractAddress(getComment(node, source_code, source_length, false)); + + if (!address.empty()) { + GlobalInfo global{global_name, address, ""}; + globals.push_back(global); + } + } else { + std::string src = extractNodeText(node, source_code); + spdlog::error("Failed to get global name for {}", src); + } + return; + } + } + + // Recursively search child nodes + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + findGlobals(ts_node_child(node, i), source_code, source_length, globals); + } +} diff --git a/tooling/tool.cpp b/tooling/tool.cpp index fcb0560f..7f73327a 100644 --- a/tooling/tool.cpp +++ b/tooling/tool.cpp @@ -1,926 +1,45 @@ -#include -#include -#include -#include -#include -#include -#include +#include "tool.hpp" #include -#include -#include -#include #include #include #include -extern "C" TSLanguage *tree_sitter_cpp(); - -// Global address regex pattern -const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))"); - -// Add enum for file types -enum class FileType { Auto, Fix, Stub, Ref }; - -// Helper function to convert string to FileType -FileType stringToFileType(const std::string &type_str) { - if (type_str == "auto") - return FileType::Auto; - if (type_str == "fix") - return FileType::Fix; - if (type_str == "stub") - return FileType::Stub; - if (type_str == "ref") - return FileType::Ref; - throw std::invalid_argument("Invalid file type: " + type_str); -} - -// Helper function to convert FileType to string -std::string fileTypeToString(FileType type) { - switch (type) { - case FileType::Auto: - return "auto"; - case FileType::Fix: - return "fix"; - case FileType::Stub: - return "stub"; - case FileType::Ref: - return "ref"; - default: - throw std::logic_error(fmt::format("Invalid file type: {}", (int)type)); - } -} - -// Helper function to check if a comment contains an address -bool hasAddressPattern(const std::string &comment) { - return std::regex_search(comment, ADDRESS_REGEX); -} - -// Helper function to extract text from a TSNode -std::string extractNodeText(TSNode node, const char *source_code) { - uint32_t start = ts_node_start_byte(node); - uint32_t end = ts_node_end_byte(node); - return std::string(source_code + start, end - start); -} - -// Helper function to find first identifier in a node -std::string findIdentifierInNode(TSNode node, const char *source_code) { - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - if (strcmp(ts_node_type(child), "identifier") == 0) { - return extractNodeText(child, source_code); - } - } - return ""; -} - -struct FunctionInfo { - std::string name; - std::string address; - std::string filepath; - bool is_import; - FileType type; // Add type field -}; - -struct GlobalInfo { - std::string name; - std::string address; - std::string filepath; -}; - -class PreparedStatements { -private: - sqlite3 *db; - sqlite3_stmt *delete_functions_stmt; - sqlite3_stmt *delete_imports_stmt; - sqlite3_stmt *insert_functions_stmt; - sqlite3_stmt *insert_imports_stmt; - sqlite3_stmt *delete_globals_stmt; - sqlite3_stmt *insert_globals_stmt; - - void prepareStatement(const char *sql, sqlite3_stmt **stmt, - const std::string &error_msg) { - if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) { - throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db)); - } - } - -public: - PreparedStatements(sqlite3 *database) : db(database) { - prepareStatement("DELETE FROM Functions WHERE filepath = ?", - &delete_functions_stmt, - "Failed to prepare delete functions statement"); - prepareStatement("DELETE FROM Imports WHERE filepath = ?", - &delete_imports_stmt, - "Failed to prepare delete imports statement"); - prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, " - "address, type) VALUES (?, ?, ?, ?)", - &insert_functions_stmt, - "Failed to prepare insert functions statement"); - prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, " - "type) VALUES (?, ?, ?, ?)", - &insert_imports_stmt, - "Failed to prepare insert imports statement"); - prepareStatement("DELETE FROM Globals WHERE filepath = ?", - &delete_globals_stmt, - "Failed to prepare delete globals statement"); - prepareStatement("INSERT OR REPLACE INTO Globals (filepath, name, address) " - "VALUES (?, ?, ?)", - &insert_globals_stmt, - "Failed to prepare insert globals statement"); - } - - ~PreparedStatements() { - sqlite3_finalize(delete_functions_stmt); - sqlite3_finalize(delete_imports_stmt); - sqlite3_finalize(insert_functions_stmt); - sqlite3_finalize(insert_imports_stmt); - sqlite3_finalize(delete_globals_stmt); - sqlite3_finalize(insert_globals_stmt); - } - - void clearEntriesForFile(const std::string &filepath) { - for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) { - sqlite3_reset(stmt); - sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_step(stmt); - } - } - - void clearGlobalsForFile(const std::string &filepath) { - sqlite3_reset(delete_globals_stmt); - sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1, - SQLITE_STATIC); - sqlite3_step(delete_globals_stmt); - } - - void insertFunction(const FunctionInfo &func) { - sqlite3_stmt *stmt = - func.is_import ? insert_imports_stmt : insert_functions_stmt; - sqlite3_reset(stmt); - sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_int(stmt, 4, static_cast(func.type)); - sqlite3_step(stmt); - } - - void insertGlobal(const GlobalInfo &global) { - sqlite3_reset(insert_globals_stmt); - sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1, - SQLITE_STATIC); - sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1, - SQLITE_STATIC); - sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1, - SQLITE_STATIC); - sqlite3_step(insert_globals_stmt); - } -}; - -class DatabaseManager { -private: - sqlite3 *db; - std::unique_ptr prepared_stmts; - -public: - DatabaseManager(const std::string &db_path) : db(nullptr) { - if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) { - spdlog::error("Can't open database: {}", sqlite3_errmsg(db)); - sqlite3_close(db); - throw std::runtime_error("Failed to open database"); - } - - const char *create_tables = R"( - CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath)); - CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath)); - CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT); - )"; - - sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr); - prepared_stmts = std::make_unique(db); - } - - ~DatabaseManager() { - if (db) - sqlite3_close(db); - } - - void clearEntriesForFile(const std::string &filepath) { - prepared_stmts->clearEntriesForFile(filepath); - } - void insertFunction(const FunctionInfo &func) { - prepared_stmts->insertFunction(func); - } - void clearGlobalsForFile(const std::string &filepath) { - prepared_stmts->clearGlobalsForFile(filepath); - } - void insertGlobal(const GlobalInfo &global) { - prepared_stmts->insertGlobal(global); - } - void beginTransaction() { - sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr); - } - void commitTransaction() { - sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); - } - void rollbackTransaction() { - sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); - } - - // New methods for duplicate checking - bool checkDuplicateAddresses() { - const char *sql = R"( - WITH all_addresses AS ( - SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3 - UNION ALL - SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != '' - ) - SELECT address, COUNT(*) as count, - GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries - FROM all_addresses - GROUP BY address - HAVING COUNT(*) > 1 - ORDER BY address; - )"; - - sqlite3_stmt *stmt; - if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) { - spdlog::error("Failed to prepare duplicate address query: {}", - sqlite3_errmsg(db)); - return false; - } - - bool found_duplicates = false; - while (sqlite3_step(stmt) == SQLITE_ROW) { - found_duplicates = true; - const char *address = (const char *)sqlite3_column_text(stmt, 0); - int count = sqlite3_column_int(stmt, 1); - const char *entries = (const char *)sqlite3_column_text(stmt, 2); - - spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address, - count, entries); - } - - sqlite3_finalize(stmt); - return found_duplicates; - } - - bool checkDuplicateNames() { - bool found_duplicates = false; - - // Check Functions table - const char *functions_sql = R"( - SELECT name, COUNT(*) as count, - GROUP_CONCAT(filepath, '; ') as filepaths - FROM Functions - WHERE type != 3 - GROUP BY name - HAVING COUNT(*) > 1 - ORDER BY name; - )"; - - sqlite3_stmt *stmt; - if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) == - SQLITE_OK) { - while (sqlite3_step(stmt) == SQLITE_ROW) { - found_duplicates = true; - const char *name = (const char *)sqlite3_column_text(stmt, 0); - int count = sqlite3_column_int(stmt, 1); - const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); - - spdlog::error( - "DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name, - count, filepaths); - } - sqlite3_finalize(stmt); - } - - // Check Globals table - const char *globals_sql = R"( - SELECT name, COUNT(*) as count, - GROUP_CONCAT(filepath, '; ') as filepaths - FROM Globals - GROUP BY name - HAVING COUNT(*) > 1 - ORDER BY name; - )"; - - if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) { - while (sqlite3_step(stmt) == SQLITE_ROW) { - found_duplicates = true; - const char *name = (const char *)sqlite3_column_text(stmt, 0); - int count = sqlite3_column_int(stmt, 1); - const char *filepaths = (const char *)sqlite3_column_text(stmt, 2); - - spdlog::error( - "DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", name, - count, filepaths); - } - sqlite3_finalize(stmt); - } - - return found_duplicates; - } -}; - -std::string extractAddress(const std::string &comment) { - std::smatch match; - return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : ""; -} - -std::string getFunctionName(TSNode node, const char *source_code) { - uint32_t child_count = ts_node_child_count(node); - - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - const char *type = ts_node_type(child); - - if (strcmp(type, "function_declarator") == 0) { - std::string name = findIdentifierInNode(child, source_code); - if (!name.empty()) - return name; - } else if (strcmp(type, "identifier") == 0) { - return extractNodeText(child, source_code); - } else if (strcmp(type, "pointer_declarator") == 0) { - std::string name = getFunctionName(child, source_code); - if (!name.empty()) - return name; - } - } - return ""; -} - -std::string getComment(TSNode node, const char *source_code, - uint32_t source_length, bool search_before) { - TSNode current = node; - - if (search_before) { - // Look for comments before the current node - while (!ts_node_is_null(current)) { - TSNode prev_sibling = ts_node_prev_sibling(current); - - while (!ts_node_is_null(prev_sibling)) { - const char *type = ts_node_type(prev_sibling); - - if (strcmp(type, "comment") == 0) { - std::string comment_text = extractNodeText(prev_sibling, source_code); - - // Check if it contains an address pattern - if (hasAddressPattern(comment_text)) { - return comment_text; - } - } - // Skip whitespace and continue looking - else if (strcmp(type, "ERROR") != 0) { - // If we hit non-comment, non-whitespace content, stop searching - break; - } - - prev_sibling = ts_node_prev_sibling(prev_sibling); - } - - // Move up to parent and continue searching - current = ts_node_parent(current); - } - } else { - // Look for comments after the current node - TSNode next_sibling = ts_node_next_sibling(node); - - while (!ts_node_is_null(next_sibling)) { - const char *type = ts_node_type(next_sibling); - - if (strcmp(type, "comment") == 0) { - std::string comment_text = extractNodeText(next_sibling, source_code); - - // Check if it contains an address pattern - if (hasAddressPattern(comment_text)) { - return comment_text; - } - } - // Skip whitespace and continue looking - else if (strcmp(type, "ERROR") != 0) { - // If we hit non-comment, non-whitespace content, stop searching - break; - } - - next_sibling = ts_node_next_sibling(next_sibling); - } - } - - return ""; -} - -bool hasFunctionBody(TSNode node) { - if (strcmp(ts_node_type(node), "function_definition") != 0) - return false; - - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") == - 0) { - return true; - } - } - return false; -} - -void findFunctions(TSNode node, const char *source_code, uint32_t source_length, - std::vector &functions, FileType file_type) { - const char *type = ts_node_type(node); - - if (strcmp(type, "function_definition") == 0 || - strcmp(type, "declaration") == 0) { - std::string func_name = getFunctionName(node, source_code); - if (!func_name.empty()) { - std::string address = - extractAddress(getComment(node, source_code, source_length, false)); - - if (address.empty() && strcmp(type, "function_definition") == 0) { - address = - extractAddress(getComment(node, source_code, source_length, true)); - } - - if (!address.empty()) { - FunctionInfo func{func_name, address, "", - strcmp(type, "function_definition") == 0 - ? !hasFunctionBody(node) - : true, - file_type}; // Add file_type parameter - functions.push_back(func); - } - // We'll never nest function declarations - return; - } else { - spdlog::error("Failed to get function name for {}", - extractNodeText(node, source_code)); - } - } - - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - findFunctions(ts_node_child(node, i), source_code, source_length, functions, - file_type); - } -} - -std::vector readFileList(const std::string &list_file) { - std::vector files; - std::ifstream file(list_file); - if (!file.is_open()) { - spdlog::error("Could not open list file {}", list_file); - return files; - } - - std::string line; - while (std::getline(file, line)) { - if (line.empty() || line[0] == '#') - continue; - - if (line.find('*') != std::string::npos) { - spdlog::info("Skipping wildcard pattern: {}", line); - continue; - } - - if (std::filesystem::exists(line)) { - files.push_back(line); - } else { - spdlog::warn("File not found: {}", line); - } - } - return files; -} - -bool processFile(const std::string &filepath, DatabaseManager &db, - FileType file_type) { - std::ifstream file(filepath); - if (!file.is_open()) { - spdlog::error("Could not open file {}", filepath); - return false; - } - - std::string file_content((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); - - TSParser *parser = ts_parser_new(); - ts_parser_set_language(parser, tree_sitter_cpp()); - - TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), - file_content.length()); - TSNode root_node = ts_tree_root_node(tree); - - if (ts_node_is_null(root_node)) { - spdlog::error("Failed to parse file {}", filepath); - ts_tree_delete(tree); - ts_parser_delete(parser); - return false; - } - - db.clearEntriesForFile(filepath); - - std::vector functions; - findFunctions(root_node, file_content.c_str(), file_content.length(), - functions, file_type); - - for (auto &func : functions) { - func.filepath = filepath; - db.insertFunction(func); - spdlog::debug("{}: {} @ {} in {} (type: {})", - func.is_import ? "Import" : "Function", func.name, - func.address, filepath, fileTypeToString(func.type)); - } - - spdlog::info("Processed {} functions/imports from {} (type: {})", - functions.size(), filepath, fileTypeToString(file_type)); - - ts_tree_delete(tree); - ts_parser_delete(parser); - return true; -} - -// Helper function to recursively find identifier in any declarator -std::string findIdentifierInDeclarator(TSNode node, const char *source_code) { - const char *type = ts_node_type(node); - - // If this is an identifier, return it - if (strcmp(type, "identifier") == 0) { - return extractNodeText(node, source_code); - } - - // Recursively search all children - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - std::string result = findIdentifierInDeclarator(child, source_code); - if (!result.empty()) { - return result; - } - } - - return ""; -} - -std::string getGlobalName(TSNode node, const char *source_code) { - uint32_t child_count = ts_node_child_count(node); - - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - const char *type = ts_node_type(child); - - // Look for any kind of declarator and recursively search for identifier - if (strcmp(type, "init_declarator") == 0 || - strcmp(type, "declarator") == 0 || - strcmp(type, "reference_declarator") == 0 || - strcmp(type, "pointer_declarator") == 0 || - strcmp(type, "parenthesized_declarator") == 0 || - strcmp(type, "array_declarator") == 0) { - std::string name = findIdentifierInDeclarator(child, source_code); - if (!name.empty()) { - return name; - } - } - // Direct identifier child - else if (strcmp(type, "identifier") == 0) { - return extractNodeText(child, source_code); - } - } - return ""; -} - -void findGlobals(TSNode node, const char *source_code, uint32_t source_length, - std::vector &globals) { - const char *type = ts_node_type(node); - - // Look for extern declarations - if (strcmp(type, "declaration") == 0) { - // Check if this is an extern declaration - uint32_t child_count = ts_node_child_count(node); - bool is_extern = false; - - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) { - std::string storage_class = extractNodeText(child, source_code); - if (storage_class == "extern") { - is_extern = true; - break; - } - } - } - - if (is_extern) { - std::string global_name = getGlobalName(node, source_code); - if (!global_name.empty()) { - // Look for address comment after the declaration - std::string address = - extractAddress(getComment(node, source_code, source_length, false)); - - if (!address.empty()) { - GlobalInfo global{global_name, address, ""}; - globals.push_back(global); - } - } else { - std::string src = extractNodeText(node, source_code); - SPDLOG_ERROR("Failed to get global name for {}", src); - } - return; - } - } - - // Recursively search child nodes - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - findGlobals(ts_node_child(node, i), source_code, source_length, globals); - } -} - -bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { - std::ifstream file(filepath); - if (!file.is_open()) { - spdlog::error("Could not open file {}", filepath); - return false; - } - - std::string file_content((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); - - TSParser *parser = ts_parser_new(); - ts_parser_set_language(parser, tree_sitter_cpp()); - - TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), - file_content.length()); - TSNode root_node = ts_tree_root_node(tree); - - if (ts_node_is_null(root_node)) { - spdlog::error("Failed to parse file {}", filepath); - ts_tree_delete(tree); - ts_parser_delete(parser); - return false; - } - - db.clearGlobalsForFile(filepath); - - std::vector globals; - findGlobals(root_node, file_content.c_str(), file_content.length(), globals); - - for (auto &global : globals) { - global.filepath = filepath; - db.insertGlobal(global); - spdlog::debug("Global: {} @ {} in {}", global.name, global.address, - filepath); - } - - spdlog::info("Processed {} globals from {}", globals.size(), filepath); - - ts_tree_delete(tree); - ts_parser_delete(parser); - return true; -} - -// Helper function to dump Tree-sitter AST -void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0) { - std::string indent(depth * 2, ' '); - const char *type = ts_node_type(node); - - uint32_t start = ts_node_start_byte(node); - uint32_t end = ts_node_end_byte(node); - - // Get the text content for leaf nodes or small nodes - std::string content; - if (end - start < 100) { // Only show content for small nodes - content = extractNodeText(node, source_code); - // Replace newlines with \n for better readability - std::regex newline_regex("\n"); - content = std::regex_replace(content, newline_regex, "\\n"); - // Truncate if still too long - if (content.length() > 50) { - content = content.substr(0, 47) + "..."; - } - } - - if (!content.empty()) { - spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content); - } else { - spdlog::info("{}{}[{}:{}]", indent, type, start, end); - } - - // Recursively dump children - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - dumpTreeSitterAST(child, source_code, depth + 1); - } -} - -bool dumpTreeFile(const std::string &filepath) { - std::ifstream file(filepath); - if (!file.is_open()) { - spdlog::error("Could not open file {}", filepath); - return false; - } - - std::string file_content((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); - - TSParser *parser = ts_parser_new(); - ts_parser_set_language(parser, tree_sitter_cpp()); - - TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), - file_content.length()); - TSNode root_node = ts_tree_root_node(tree); - - if (ts_node_is_null(root_node)) { - spdlog::error("Failed to parse file {}", filepath); - ts_tree_delete(tree); - ts_parser_delete(parser); - return false; - } - - spdlog::info("=== Tree-sitter AST for {} ===", filepath); - dumpTreeSitterAST(root_node, file_content.c_str()); - spdlog::info("=== End of AST dump ==="); - - ts_tree_delete(tree); - ts_parser_delete(parser); - return true; -} - -bool processDuplicates(DatabaseManager &db) { - spdlog::info("=== Checking for duplicate addresses ==="); - bool found_address_duplicates = db.checkDuplicateAddresses(); - if (found_address_duplicates) { - spdlog::error("Found duplicate addresses in the database!"); - } else { - spdlog::info("No duplicate addresses found in the database."); - } - - spdlog::info("=== Checking for duplicate names ==="); - bool found_name_duplicates = db.checkDuplicateNames(); - if (found_name_duplicates) { - spdlog::error("Found duplicate names in the database!"); - } else { - spdlog::info("No duplicate names found in the database."); - } - - return !found_address_duplicates && !found_name_duplicates; -} +void register_cmd_scan(CLI::App &app); +void register_cmd_dump(CLI::App &app); +void register_cmd_verify(CLI::App &app); int main(int argc, char *argv[]) { // Initialize spdlog auto console = spdlog::stdout_color_mt("console"); spdlog::set_default_logger(console); spdlog::set_level(spdlog::level::info); // Default to info level - spdlog::set_pattern("[%H:%M:%S] [%^%l%$] %v"); - - CLI::App app{"C++ Function/Global Parser - Extracts function addresses or " - "global variable addresses from C++ files"}; - - std::vector input_files; - std::string list_file; - std::string db_path = "gh.db"; - std::string mode = "functions"; - std::string log_file = ""; - std::string type_str = "auto"; // Add type string variable - bool verbose = false; - - app.add_option("files", input_files, - "Input C++ files to parse (supports @listfile.txt syntax)"); - app.add_option("-l,--list", list_file, - "File containing list of files to process"); - app.add_option("-d,--database", db_path, "SQLite database path") - ->default_val("gh.db"); - app.add_option("-m,--mode", mode, - "Processing mode: 'functions', 'globals', 'duplicates', or " - "'dump-tree'") - ->default_val("functions") - ->check( - CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"})); - app.add_option("-t,--type", type_str, - "File type: 'auto', 'fix', 'stub', or 'ref'") - ->default_val("auto") - ->check(CLI::IsMember({"auto", "fix", "stub", "ref"})); - app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)"); - app.add_flag("--log-file", log_file, "Enable logging to file"); - - CLI11_PARSE(app, argc, argv); - - // Convert string to FileType enum - FileType file_type; - try { - file_type = stringToFileType(type_str); - } catch (const std::invalid_argument &e) { - spdlog::error("Invalid file type: {}", type_str); - return 1; - } - - // Set log level based on verbose flag - if (verbose) { - spdlog::set_level(spdlog::level::debug); - } - spdlog::set_pattern(std::string("[%^%l%$] %v")); - if (!log_file.empty()) { - auto log_sink = - std::make_shared(log_file, true); - spdlog::get("console")->sinks().push_back(log_sink); - } + CLI::App app{"Ghidra decompile database tool"}; - std::vector files_to_process; - bool needFiles = mode != "duplicates"; + std::string type_str = "auto"; - if (needFiles) { - if (!list_file.empty()) { - auto list_files = readFileList(list_file); - files_to_process.insert(files_to_process.end(), list_files.begin(), - list_files.end()); - } - - for (const auto &input : input_files) { - if (input.starts_with("@")) { - auto list_files = readFileList(input.substr(1)); - files_to_process.insert(files_to_process.end(), list_files.begin(), - list_files.end()); - } else if (std::filesystem::exists(input)) { - files_to_process.push_back(input); - } else { - spdlog::warn("File not found: {}", input); - } - } - - if (files_to_process.empty()) { - spdlog::error("No files to process. Use --help for usage information."); - return 1; - } - } - - try { - int processed_count = 0; - - // For dump-tree mode, we don't need database operations - if (mode == "dump-tree") { - for (const auto &filepath : files_to_process) { - spdlog::info("=== Processing: {} ===", filepath); - if (dumpTreeFile(filepath)) { - processed_count++; + Options &options = Options::get(); + app.add_option("-d,--database", options.db_path, "SQLite database path") + ->default_val("gh.db"); + app.add_flag("-v,--verbose", options.verbose, + "Enable verbose logging (debug level)") + ->each([&](std::string) { + if (options.verbose) { + spdlog::set_level(spdlog::level::debug); } - } - } else if (mode == "duplicates") { - DatabaseManager db(db_path); - // For duplicates mode, we only check the database, no file processing - spdlog::info("=== Checking database for duplicates ==="); - bool has_duplicates = !processDuplicates(db); - spdlog::info("=== Summary ==="); - spdlog::info("Mode: {}", mode); - spdlog::info("Database: {}", db_path); - return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none - } else { - DatabaseManager db(db_path); - - const size_t batch_size = 50; - size_t current_batch = 0; - - db.beginTransaction(); - - for (const auto &filepath : files_to_process) { - spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str); - bool success = false; - if (mode == "functions") { - success = processFile(filepath, db, file_type); - } else if (mode == "globals") { - success = processGlobalsFile(filepath, db); - } - - if (success) - processed_count++; - - if (++current_batch >= batch_size) { - db.commitTransaction(); - spdlog::info("Committed batch of {} files to database", - current_batch); - db.beginTransaction(); - current_batch = 0; - } - } - - if (current_batch > 0) { - db.commitTransaction(); - spdlog::info("Committed final batch of {} files to database", - current_batch); - } - } - - spdlog::info("=== Summary ==="); - spdlog::info("Processed {} files successfully", processed_count); - spdlog::info("Mode: {}", mode); - spdlog::info("File type: {}", type_str); - if (mode != "dump-tree") { - spdlog::info("Database saved to: {}", db_path); - } - - } catch (const std::exception &e) { - spdlog::error("Database error: {}", e.what()); - return 1; - } + }); + app.add_flag("--log-file", options.log_file, "Enable logging to file") + ->each([&](std::string) { + auto log_sink = std::make_shared( + options.log_file, true); + console->sinks().push_back(log_sink); + }); + app.require_subcommand(); + register_cmd_scan(app); + register_cmd_dump(app); + register_cmd_verify(app); + CLI11_PARSE(app, argc, argv); return 0; } diff --git a/tooling/tool.hpp b/tooling/tool.hpp new file mode 100644 index 00000000..14951b63 --- /dev/null +++ b/tooling/tool.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// Global constants +extern const std::regex ADDRESS_REGEX; + +// Enums +enum class FileType { Auto, Fix, Stub, Ref }; + +// Data structures +struct FunctionInfo { + std::string name; + std::string address; + std::string filepath; + bool is_import; + FileType type; +}; + +struct GlobalInfo { + std::string name; + std::string address; + std::string filepath; +}; + +// Utility functions +FileType stringToFileType(const std::string &type_str); +std::string fileTypeToString(FileType type); +bool hasAddressPattern(const std::string &comment); +std::string extractAddress(const std::string &comment); + +// Tree-sitter parsing functions +std::string extractNodeText(TSNode node, const char *source_code); +std::string findIdentifierInNode(TSNode node, const char *source_code); +std::string findIdentifierInDeclarator(TSNode node, const char *source_code); +std::string getFunctionName(TSNode node, const char *source_code); +std::string getGlobalName(TSNode node, const char *source_code); +std::string getComment(TSNode node, const char *source_code, uint32_t source_length, bool search_before); +bool hasFunctionBody(TSNode node); +void findFunctions(TSNode node, const char *source_code, uint32_t source_length, + std::vector &functions, FileType file_type); +void findGlobals(TSNode node, const char *source_code, uint32_t source_length, + std::vector &globals); +void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0); + +struct PreparedStatements; +class DatabaseManager { +private: + sqlite3 *db; + std::shared_ptr prepared_stmts; + +public: + DatabaseManager(const std::string &db_path); + ~DatabaseManager(); + + void clearEntriesForFile(const std::string &filepath); + void insertFunction(const FunctionInfo &func); + void clearGlobalsForFile(const std::string &filepath); + void insertGlobal(const GlobalInfo &global); + void beginTransaction(); + void commitTransaction(); + void rollbackTransaction(); + bool checkDuplicateAddresses(); + bool checkDuplicateNames(); +}; + +// File processing functions +std::vector readFileList(const std::string &list_file); +bool processFile(const std::string &filepath, DatabaseManager &db, FileType file_type); +bool processGlobalsFile(const std::string &filepath, DatabaseManager &db); +bool dumpTreeFile(const std::string &filepath); +bool processDuplicates(DatabaseManager &db); + +struct Options { + std::string db_path = "gh.db"; + std::string mode = "functions"; + std::string log_file = ""; + bool verbose = false; + + static Options &get() { + static Options options; + return options; + } +};