From dfe339bcd36c06e2c5cc1b249765fe27b4edde1a Mon Sep 17 00:00:00 2001 From: Guus Waals <_@guusw.nl> Date: Tue, 27 May 2025 00:18:17 +0800 Subject: [PATCH] Simplify tool --- tooling2/tool.cpp | 357 +++++++++++----------------------------------- 1 file changed, 85 insertions(+), 272 deletions(-) diff --git a/tooling2/tool.cpp b/tooling2/tool.cpp index d999c2c5..d2f346ac 100644 --- a/tooling2/tool.cpp +++ b/tooling2/tool.cpp @@ -16,7 +16,7 @@ struct FunctionInfo { std::string name; std::string address; std::string filepath; - bool is_import; // true for extern declarations, false for definitions + bool is_import; }; class PreparedStatements { @@ -27,61 +27,37 @@ private: sqlite3_stmt* insert_functions_stmt; sqlite3_stmt* insert_imports_stmt; + void prepareStatement(const char* sql, sqlite3_stmt** stmt, const std::string& error_msg) { + if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) { + throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db)); + } + } + public: PreparedStatements(sqlite3* database) : db(database) { - // Prepare delete statements - const char* delete_functions_sql = "DELETE FROM Functions WHERE filepath = ?"; - const char* delete_imports_sql = "DELETE FROM Imports WHERE filepath = ?"; - - // Prepare insert statements - const char* insert_functions_sql = "INSERT OR REPLACE INTO Functions (filepath, name, address) VALUES (?, ?, ?)"; - const char* insert_imports_sql = "INSERT OR REPLACE INTO Imports (filepath, name, address) VALUES (?, ?, ?)"; - - // Prepare all statements - int rc; - rc = sqlite3_prepare_v2(db, delete_functions_sql, -1, &delete_functions_stmt, nullptr); - if (rc != SQLITE_OK) { - throw std::runtime_error("Failed to prepare delete functions statement: " + std::string(sqlite3_errmsg(db))); - } - - rc = sqlite3_prepare_v2(db, delete_imports_sql, -1, &delete_imports_stmt, nullptr); - if (rc != SQLITE_OK) { - throw std::runtime_error("Failed to prepare delete imports statement: " + std::string(sqlite3_errmsg(db))); - } - - rc = sqlite3_prepare_v2(db, insert_functions_sql, -1, &insert_functions_stmt, nullptr); - if (rc != SQLITE_OK) { - throw std::runtime_error("Failed to prepare insert functions statement: " + std::string(sqlite3_errmsg(db))); - } - - rc = sqlite3_prepare_v2(db, insert_imports_sql, -1, &insert_imports_stmt, nullptr); - if (rc != SQLITE_OK) { - throw std::runtime_error("Failed to prepare insert imports statement: " + std::string(sqlite3_errmsg(db))); - } + prepareStatement("DELETE FROM Functions WHERE filepath = ?", &delete_functions_stmt, "Failed to prepare delete functions statement"); + prepareStatement("DELETE FROM Imports WHERE filepath = ?", &delete_imports_stmt, "Failed to prepare delete imports statement"); + prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, address) VALUES (?, ?, ?)", &insert_functions_stmt, "Failed to prepare insert functions statement"); + prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) VALUES (?, ?, ?)", &insert_imports_stmt, "Failed to prepare insert imports statement"); } ~PreparedStatements() { - if (delete_functions_stmt) sqlite3_finalize(delete_functions_stmt); - if (delete_imports_stmt) sqlite3_finalize(delete_imports_stmt); - if (insert_functions_stmt) sqlite3_finalize(insert_functions_stmt); - if (insert_imports_stmt) sqlite3_finalize(insert_imports_stmt); + sqlite3_finalize(delete_functions_stmt); + sqlite3_finalize(delete_imports_stmt); + sqlite3_finalize(insert_functions_stmt); + sqlite3_finalize(insert_imports_stmt); } void clearEntriesForFile(const std::string& filepath) { - // Clear functions - sqlite3_reset(delete_functions_stmt); - sqlite3_bind_text(delete_functions_stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_step(delete_functions_stmt); - - // Clear imports - sqlite3_reset(delete_imports_stmt); - sqlite3_bind_text(delete_imports_stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_step(delete_imports_stmt); + for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) { + sqlite3_reset(stmt); + sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_step(stmt); + } } void insertFunction(const FunctionInfo& func) { sqlite3_stmt* stmt = func.is_import ? insert_imports_stmt : insert_functions_stmt; - sqlite3_reset(stmt); sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC); @@ -97,81 +73,39 @@ private: public: DatabaseManager(const std::string& db_path) : db(nullptr) { - int rc = sqlite3_open(db_path.c_str(), &db); - if (rc) { + if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) { std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl; sqlite3_close(db); - db = nullptr; throw std::runtime_error("Failed to open database"); } - // Create tables if they don't exist - const char* create_functions_table = R"( - CREATE TABLE IF NOT EXISTS Functions ( - filepath TEXT, - name TEXT, - address TEXT, - PRIMARY KEY (name, filepath) - ) + const char* create_tables = R"( + CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); + CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); )"; - const char* create_imports_table = R"( - CREATE TABLE IF NOT EXISTS Imports ( - filepath TEXT, - name TEXT, - address TEXT, - PRIMARY KEY (name, filepath) - ) - )"; - - sqlite3_exec(db, create_functions_table, nullptr, nullptr, nullptr); - sqlite3_exec(db, create_imports_table, nullptr, nullptr, nullptr); - - // Initialize prepared statements + sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr); prepared_stmts = std::make_unique(db); } ~DatabaseManager() { - // prepared_stmts will be destroyed automatically before db is closed - if (db) { - sqlite3_close(db); - } + if (db) sqlite3_close(db); } - void clearEntriesForFile(const std::string& filepath) { - prepared_stmts->clearEntriesForFile(filepath); - } - - void insertFunction(const FunctionInfo& func) { - prepared_stmts->insertFunction(func); - } - - void beginTransaction() { - sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr); - } - - void commitTransaction() { - sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); - } - - void rollbackTransaction() { - sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); - } + void clearEntriesForFile(const std::string& filepath) { prepared_stmts->clearEntriesForFile(filepath); } + void insertFunction(const FunctionInfo& func) { prepared_stmts->insertFunction(func); } + void beginTransaction() { sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr); } + void commitTransaction() { sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); } + void rollbackTransaction() { sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); } }; std::string extractAddress(const std::string& comment) { - // Look for hex addresses in comments like "// 0043e4f0" or "// 0043e4f0 // FUN_0043e4f0" std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); std::smatch match; - - if (std::regex_search(comment, match, addr_regex)) { - return match[1].str(); - } - return ""; + return std::regex_search(comment, match, addr_regex) ? match[1].str() : ""; } std::string getFunctionName(TSNode node, const char* source_code) { - // For function declarations/definitions, find the function name uint32_t child_count = ts_node_child_count(node); for (uint32_t i = 0; i < child_count; i++) { @@ -179,7 +113,6 @@ std::string getFunctionName(TSNode node, const char* source_code) { const char* type = ts_node_type(child); if (strcmp(type, "function_declarator") == 0) { - // Find the identifier within the function_declarator uint32_t declarator_children = ts_node_child_count(child); for (uint32_t j = 0; j < declarator_children; j++) { TSNode declarator_child = ts_node_child(child, j); @@ -191,7 +124,6 @@ std::string getFunctionName(TSNode node, const char* source_code) { } } else if (strcmp(type, "identifier") == 0) { - // Direct identifier (simpler cases) uint32_t start = ts_node_start_byte(child); uint32_t end = ts_node_end_byte(child); return std::string(source_code + start, end - start); @@ -200,165 +132,80 @@ std::string getFunctionName(TSNode node, const char* source_code) { return ""; } -std::string getCommentBeforeNode(TSNode node, const char* source_code) { - uint32_t start_byte = ts_node_start_byte(node); - - // Look backwards from the start of the node to find comments - if (start_byte == 0) return ""; - - // Get text before the node - std::string before_text(source_code, start_byte); - - // Find all "//" comments before this node and look for addresses - std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); - std::smatch match; - std::string found_address; - - // Search backwards through all comment lines - size_t search_pos = before_text.length(); - while (search_pos > 0) { - size_t comment_pos = before_text.rfind("//", search_pos - 1); - if (comment_pos == std::string::npos) { - break; - } +std::string getComment(TSNode node, const char* source_code, uint32_t source_length, bool search_before) { + if (search_before) { + uint32_t start_byte = ts_node_start_byte(node); + if (start_byte == 0) return ""; - // Find the end of this comment line - size_t line_end = before_text.find('\n', comment_pos); - if (line_end == std::string::npos) { - line_end = before_text.length(); - } + std::string before_text(source_code, start_byte); + std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); + std::smatch match; - // Extract this comment line - std::string comment_line = before_text.substr(comment_pos, line_end - comment_pos); - - // Check if this comment contains an address - if (std::regex_search(comment_line, match, addr_regex)) { - // Check if this comment is reasonably close to the function (within 20 lines) - size_t newlines_between = 0; - for (size_t i = comment_pos; i < start_byte; i++) { - if (before_text[i] == '\n') newlines_between++; - } + size_t search_pos = before_text.length(); + while (search_pos > 0) { + size_t comment_pos = before_text.rfind("//", search_pos - 1); + if (comment_pos == std::string::npos) break; - if (newlines_between <= 20) { - return comment_line; + size_t line_end = before_text.find('\n', comment_pos); + if (line_end == std::string::npos) line_end = before_text.length(); + + std::string comment_line = before_text.substr(comment_pos, line_end - comment_pos); + + if (std::regex_search(comment_line, match, addr_regex)) { + size_t newlines_between = std::count(before_text.begin() + comment_pos, before_text.begin() + start_byte, '\n'); + if (newlines_between <= 20) return comment_line; } + search_pos = comment_pos; } + } else { + uint32_t end_byte = ts_node_end_byte(node); + std::string remaining(source_code + end_byte, source_length - end_byte); - search_pos = comment_pos; + size_t comment_pos = remaining.find("//"); + if (comment_pos != std::string::npos) { + size_t line_end = remaining.find('\n', comment_pos); + if (line_end == std::string::npos) line_end = remaining.length(); + return remaining.substr(comment_pos, line_end - comment_pos); + } } - return ""; } -std::string getCommentAfterNode(TSNode node, const char* source_code, uint32_t source_length) { - uint32_t end_byte = ts_node_end_byte(node); - - // Look for comment on the same line or next line - std::string remaining(source_code + end_byte, source_length - end_byte); - - // Find the first comment marker "//" - size_t comment_pos = remaining.find("//"); - if (comment_pos != std::string::npos) { - // Extract until end of line - size_t line_end = remaining.find('\n', comment_pos); - if (line_end == std::string::npos) { - line_end = remaining.length(); - } - return remaining.substr(comment_pos, line_end - comment_pos); - } - - return ""; -} - -bool isExternDeclaration(TSNode node, const char* source_code) { - // Check if this is inside an extern "C" block or has extern storage class - TSNode current = ts_node_parent(node); - while (!ts_node_is_null(current)) { - const char* type = ts_node_type(current); - if (strcmp(type, "linkage_specification") == 0) { - return true; - } - current = ts_node_parent(current); - } - - // Also check for explicit extern keyword - const char* node_type = ts_node_type(node); - if (strcmp(node_type, "declaration") == 0) { - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) { - uint32_t start = ts_node_start_byte(child); - uint32_t end = ts_node_end_byte(child); - std::string text(source_code + start, end - start); - if (text == "extern") { - return true; - } - } - } - } - - return false; -} - bool hasFunctionBody(TSNode node) { - // Check if this function definition has a compound statement (body) - if (strcmp(ts_node_type(node), "function_definition") == 0) { - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - const char* child_type = ts_node_type(child); - if (strcmp(child_type, "compound_statement") == 0) { - return true; - } + if (strcmp(ts_node_type(node), "function_definition") != 0) return false; + + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") == 0) { + return true; } } return false; } -void findFunctions(TSNode node, const char* source_code, uint32_t source_length, - std::vector& functions) { +void findFunctions(TSNode node, const char* source_code, uint32_t source_length, std::vector& functions) { const char* type = ts_node_type(node); - // Check for function declarations and definitions - if (strcmp(type, "function_definition") == 0 || - strcmp(type, "declaration") == 0) { - + if (strcmp(type, "function_definition") == 0 || strcmp(type, "declaration") == 0) { std::string func_name = getFunctionName(node, source_code); if (!func_name.empty()) { - std::string comment = getCommentAfterNode(node, source_code, source_length); - std::string address = extractAddress(comment); + std::string address = extractAddress(getComment(node, source_code, source_length, false)); - // If no address found after, try looking before (for function definitions) if (address.empty() && strcmp(type, "function_definition") == 0) { - comment = getCommentBeforeNode(node, source_code); - address = extractAddress(comment); + address = extractAddress(getComment(node, source_code, source_length, true)); } if (!address.empty()) { - FunctionInfo func; - func.name = func_name; - func.address = address; - - // Determine if it's an import based on whether it has a body - // Function definitions with bodies are actual functions - // Declarations without bodies are imports - if (strcmp(type, "function_definition") == 0) { - func.is_import = !hasFunctionBody(node); - } else { - func.is_import = true; // Pure declarations are always imports - } - + FunctionInfo func{func_name, address, "", + strcmp(type, "function_definition") == 0 ? !hasFunctionBody(node) : true}; functions.push_back(func); } } } - // Recursively check children uint32_t child_count = ts_node_child_count(node); for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - findFunctions(child, source_code, source_length, functions); + findFunctions(ts_node_child(node, i), source_code, source_length, functions); } } @@ -372,26 +219,19 @@ std::vector readFileList(const std::string& list_file) { std::string line; while (std::getline(file, line)) { - // Skip empty lines and comments - if (line.empty() || line[0] == '#') { - continue; - } + if (line.empty() || line[0] == '#') continue; - // Handle wildcard patterns like "tmps/gh_fix/*.h" if (line.find('*') != std::string::npos) { - // For now, skip wildcard patterns as they need more complex handling std::cout << "Skipping wildcard pattern: " << line << std::endl; continue; } - // Check if file exists if (std::filesystem::exists(line)) { files.push_back(line); } else { std::cout << "Warning: File not found: " << line << std::endl; } } - return files; } @@ -402,15 +242,12 @@ bool processFile(const std::string& filepath, DatabaseManager& db) { return false; } - std::stringstream buffer; - buffer << file.rdbuf(); - std::string file_content = buffer.str(); - const char *source_code = file_content.c_str(); + std::string file_content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); TSParser *parser = ts_parser_new(); ts_parser_set_language(parser, tree_sitter_cpp()); - TSTree *tree = ts_parser_parse_string(parser, nullptr, source_code, file_content.length()); + TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), file_content.length()); TSNode root_node = ts_tree_root_node(tree); if (ts_node_is_null(root_node)) { @@ -420,18 +257,14 @@ bool processFile(const std::string& filepath, DatabaseManager& db) { return false; } - // Clear existing entries for this file db.clearEntriesForFile(filepath); - // Find all functions with addresses std::vector functions; - findFunctions(root_node, source_code, file_content.length(), functions); + findFunctions(root_node, file_content.c_str(), file_content.length(), functions); - // Insert into database for (auto& func : functions) { func.filepath = filepath; db.insertFunction(func); - std::cout << (func.is_import ? "Import: " : "Function: ") << func.name << " @ " << func.address << " in " << filepath << std::endl; } @@ -440,7 +273,6 @@ bool processFile(const std::string& filepath, DatabaseManager& db) { ts_tree_delete(tree); ts_parser_delete(parser); - return true; } @@ -451,36 +283,27 @@ int main(int argc, char* argv[]) { std::string list_file; std::string db_path = "functions.db"; - // Add options app.add_option("files", input_files, "Input C++ files to parse (supports @listfile.txt syntax)"); app.add_option("-l,--list", list_file, "File containing list of files to process"); app.add_option("-d,--database", db_path, "SQLite database path")->default_val("functions.db"); CLI11_PARSE(app, argc, argv); - // Collect all files to process std::vector files_to_process; - // Handle list file option if (!list_file.empty()) { auto list_files = readFileList(list_file); files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); } - // Handle input files (including @listfile.txt syntax) for (const auto& input : input_files) { if (input.starts_with("@")) { - // Handle @listfile.txt syntax - std::string list_path = input.substr(1); - auto list_files = readFileList(list_path); + auto list_files = readFileList(input.substr(1)); files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); + } else if (std::filesystem::exists(input)) { + files_to_process.push_back(input); } else { - // Regular file - if (std::filesystem::exists(input)) { - files_to_process.push_back(input); - } else { - std::cout << "Warning: File not found: " << input << std::endl; - } + std::cout << "Warning: File not found: " << input << std::endl; } } @@ -491,26 +314,17 @@ int main(int argc, char* argv[]) { try { DatabaseManager db(db_path); - int processed_count = 0; - int total_functions = 0; - - // Use transactions for better performance when processing multiple files - const size_t batch_size = 50; // Process files in batches for optimal performance + const size_t batch_size = 50; size_t current_batch = 0; db.beginTransaction(); for (const auto& filepath : files_to_process) { std::cout << "\n=== Processing: " << filepath << " ===" << std::endl; - if (processFile(filepath, db)) { - processed_count++; - } + if (processFile(filepath, db)) processed_count++; - current_batch++; - - // Commit transaction every batch_size files to avoid long-running transactions - if (current_batch >= batch_size) { + if (++current_batch >= batch_size) { db.commitTransaction(); std::cout << "Committed batch of " << current_batch << " files to database" << std::endl; db.beginTransaction(); @@ -518,7 +332,6 @@ int main(int argc, char* argv[]) { } } - // Commit any remaining files in the final batch if (current_batch > 0) { db.commitTransaction(); std::cout << "Committed final batch of " << current_batch << " files to database" << std::endl;