diff --git a/tooling2/tool.cpp b/tooling2/tool.cpp index d2f346ac..51f60df2 100644 --- a/tooling2/tool.cpp +++ b/tooling2/tool.cpp @@ -10,341 +10,398 @@ #include #include #include + extern "C" TSLanguage *tree_sitter_cpp(); struct FunctionInfo { - std::string name; - std::string address; - std::string filepath; - bool is_import; + std::string name; + std::string address; + std::string filepath; + bool is_import; }; class PreparedStatements { private: - sqlite3* db; - sqlite3_stmt* delete_functions_stmt; - sqlite3_stmt* delete_imports_stmt; - sqlite3_stmt* insert_functions_stmt; - sqlite3_stmt* insert_imports_stmt; + sqlite3 *db; + sqlite3_stmt *delete_functions_stmt; + sqlite3_stmt *delete_imports_stmt; + sqlite3_stmt *insert_functions_stmt; + sqlite3_stmt *insert_imports_stmt; - void prepareStatement(const char* sql, sqlite3_stmt** stmt, const std::string& error_msg) { - if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) { - throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db)); - } + void prepareStatement(const char *sql, sqlite3_stmt **stmt, + const std::string &error_msg) { + if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) { + throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db)); } + } public: - PreparedStatements(sqlite3* database) : db(database) { - prepareStatement("DELETE FROM Functions WHERE filepath = ?", &delete_functions_stmt, "Failed to prepare delete functions statement"); - prepareStatement("DELETE FROM Imports WHERE filepath = ?", &delete_imports_stmt, "Failed to prepare delete imports statement"); - prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, address) VALUES (?, ?, ?)", &insert_functions_stmt, "Failed to prepare insert functions statement"); - prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) VALUES (?, ?, ?)", &insert_imports_stmt, "Failed to prepare insert imports statement"); - } - - ~PreparedStatements() { - sqlite3_finalize(delete_functions_stmt); - sqlite3_finalize(delete_imports_stmt); - sqlite3_finalize(insert_functions_stmt); - sqlite3_finalize(insert_imports_stmt); - } - - void clearEntriesForFile(const std::string& filepath) { - for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) { - sqlite3_reset(stmt); - sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_step(stmt); - } - } - - void insertFunction(const FunctionInfo& func) { - sqlite3_stmt* stmt = func.is_import ? insert_imports_stmt : insert_functions_stmt; - sqlite3_reset(stmt); - sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC); - sqlite3_step(stmt); + PreparedStatements(sqlite3 *database) : db(database) { + prepareStatement("DELETE FROM Functions WHERE filepath = ?", + &delete_functions_stmt, + "Failed to prepare delete functions statement"); + prepareStatement("DELETE FROM Imports WHERE filepath = ?", + &delete_imports_stmt, + "Failed to prepare delete imports statement"); + prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, " + "address) VALUES (?, ?, ?)", + &insert_functions_stmt, + "Failed to prepare insert functions statement"); + prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) " + "VALUES (?, ?, ?)", + &insert_imports_stmt, + "Failed to prepare insert imports statement"); + } + + ~PreparedStatements() { + sqlite3_finalize(delete_functions_stmt); + sqlite3_finalize(delete_imports_stmt); + sqlite3_finalize(insert_functions_stmt); + sqlite3_finalize(insert_imports_stmt); + } + + void clearEntriesForFile(const std::string &filepath) { + for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) { + sqlite3_reset(stmt); + sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_step(stmt); } + } + + void insertFunction(const FunctionInfo &func) { + sqlite3_stmt *stmt = + func.is_import ? insert_imports_stmt : insert_functions_stmt; + sqlite3_reset(stmt); + sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC); + sqlite3_step(stmt); + } }; class DatabaseManager { private: - sqlite3* db; - std::unique_ptr prepared_stmts; - + sqlite3 *db; + std::unique_ptr prepared_stmts; + public: - DatabaseManager(const std::string& db_path) : db(nullptr) { - if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) { - std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl; - sqlite3_close(db); - throw std::runtime_error("Failed to open database"); - } - - const char* create_tables = R"( + DatabaseManager(const std::string &db_path) : db(nullptr) { + if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) { + std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl; + sqlite3_close(db); + throw std::runtime_error("Failed to open database"); + } + + const char *create_tables = R"( CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); )"; - - sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr); - prepared_stmts = std::make_unique(db); - } - - ~DatabaseManager() { - if (db) sqlite3_close(db); - } - - void clearEntriesForFile(const std::string& filepath) { prepared_stmts->clearEntriesForFile(filepath); } - void insertFunction(const FunctionInfo& func) { prepared_stmts->insertFunction(func); } - void beginTransaction() { sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr); } - void commitTransaction() { sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); } - void rollbackTransaction() { sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); } + + sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr); + prepared_stmts = std::make_unique(db); + } + + ~DatabaseManager() { + if (db) + sqlite3_close(db); + } + + void clearEntriesForFile(const std::string &filepath) { + prepared_stmts->clearEntriesForFile(filepath); + } + void insertFunction(const FunctionInfo &func) { + prepared_stmts->insertFunction(func); + } + void beginTransaction() { + sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr); + } + void commitTransaction() { + sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr); + } + void rollbackTransaction() { + sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr); + } }; -std::string extractAddress(const std::string& comment) { +std::string extractAddress(const std::string &comment) { + std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); + std::smatch match; + return std::regex_search(comment, match, addr_regex) ? match[1].str() : ""; +} + +std::string getFunctionName(TSNode node, const char *source_code) { + uint32_t child_count = ts_node_child_count(node); + + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + const char *type = ts_node_type(child); + + if (strcmp(type, "function_declarator") == 0) { + uint32_t declarator_children = ts_node_child_count(child); + for (uint32_t j = 0; j < declarator_children; j++) { + TSNode declarator_child = ts_node_child(child, j); + if (strcmp(ts_node_type(declarator_child), "identifier") == 0) { + uint32_t start = ts_node_start_byte(declarator_child); + uint32_t end = ts_node_end_byte(declarator_child); + return std::string(source_code + start, end - start); + } + } + } else if (strcmp(type, "identifier") == 0) { + uint32_t start = ts_node_start_byte(child); + uint32_t end = ts_node_end_byte(child); + return std::string(source_code + start, end - start); + } + } + return ""; +} + +std::string getComment(TSNode node, const char *source_code, + uint32_t source_length, bool search_before) { + if (search_before) { + uint32_t start_byte = ts_node_start_byte(node); + if (start_byte == 0) + return ""; + + std::string before_text(source_code, start_byte); std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); std::smatch match; - return std::regex_search(comment, match, addr_regex) ? match[1].str() : ""; -} -std::string getFunctionName(TSNode node, const char* source_code) { - uint32_t child_count = ts_node_child_count(node); - - for (uint32_t i = 0; i < child_count; i++) { - TSNode child = ts_node_child(node, i); - const char* type = ts_node_type(child); - - if (strcmp(type, "function_declarator") == 0) { - uint32_t declarator_children = ts_node_child_count(child); - for (uint32_t j = 0; j < declarator_children; j++) { - TSNode declarator_child = ts_node_child(child, j); - if (strcmp(ts_node_type(declarator_child), "identifier") == 0) { - uint32_t start = ts_node_start_byte(declarator_child); - uint32_t end = ts_node_end_byte(declarator_child); - return std::string(source_code + start, end - start); - } - } - } - else if (strcmp(type, "identifier") == 0) { - uint32_t start = ts_node_start_byte(child); - uint32_t end = ts_node_end_byte(child); - return std::string(source_code + start, end - start); - } - } - return ""; -} + size_t search_pos = before_text.length(); + while (search_pos > 0) { + size_t comment_pos = before_text.rfind("//", search_pos - 1); + if (comment_pos == std::string::npos) + break; -std::string getComment(TSNode node, const char* source_code, uint32_t source_length, bool search_before) { - if (search_before) { - uint32_t start_byte = ts_node_start_byte(node); - if (start_byte == 0) return ""; - - std::string before_text(source_code, start_byte); - std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); - std::smatch match; - - size_t search_pos = before_text.length(); - while (search_pos > 0) { - size_t comment_pos = before_text.rfind("//", search_pos - 1); - if (comment_pos == std::string::npos) break; - - size_t line_end = before_text.find('\n', comment_pos); - if (line_end == std::string::npos) line_end = before_text.length(); - - std::string comment_line = before_text.substr(comment_pos, line_end - comment_pos); - - if (std::regex_search(comment_line, match, addr_regex)) { - size_t newlines_between = std::count(before_text.begin() + comment_pos, before_text.begin() + start_byte, '\n'); - if (newlines_between <= 20) return comment_line; - } - search_pos = comment_pos; - } - } else { - uint32_t end_byte = ts_node_end_byte(node); - std::string remaining(source_code + end_byte, source_length - end_byte); - - size_t comment_pos = remaining.find("//"); - if (comment_pos != std::string::npos) { - size_t line_end = remaining.find('\n', comment_pos); - if (line_end == std::string::npos) line_end = remaining.length(); - return remaining.substr(comment_pos, line_end - comment_pos); - } + size_t line_end = before_text.find('\n', comment_pos); + if (line_end == std::string::npos) + line_end = before_text.length(); + + std::string comment_line = + before_text.substr(comment_pos, line_end - comment_pos); + + if (std::regex_search(comment_line, match, addr_regex)) { + size_t newlines_between = + std::count(before_text.begin() + comment_pos, + before_text.begin() + start_byte, '\n'); + if (newlines_between <= 20) + return comment_line; + } + search_pos = comment_pos; } - return ""; + } else { + uint32_t end_byte = ts_node_end_byte(node); + std::string remaining(source_code + end_byte, source_length - end_byte); + + size_t comment_pos = remaining.find("//"); + if (comment_pos != std::string::npos) { + size_t line_end = remaining.find('\n', comment_pos); + if (line_end == std::string::npos) + line_end = remaining.length(); + return remaining.substr(comment_pos, line_end - comment_pos); + } + } + return ""; } bool hasFunctionBody(TSNode node) { - if (strcmp(ts_node_type(node), "function_definition") != 0) return false; - - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") == 0) { - return true; - } - } + if (strcmp(ts_node_type(node), "function_definition") != 0) return false; + + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") == + 0) { + return true; + } + } + return false; } -void findFunctions(TSNode node, const char* source_code, uint32_t source_length, std::vector& functions) { - const char* type = ts_node_type(node); - - if (strcmp(type, "function_definition") == 0 || strcmp(type, "declaration") == 0) { - std::string func_name = getFunctionName(node, source_code); - if (!func_name.empty()) { - std::string address = extractAddress(getComment(node, source_code, source_length, false)); - - if (address.empty() && strcmp(type, "function_definition") == 0) { - address = extractAddress(getComment(node, source_code, source_length, true)); - } - - if (!address.empty()) { - FunctionInfo func{func_name, address, "", - strcmp(type, "function_definition") == 0 ? !hasFunctionBody(node) : true}; - functions.push_back(func); - } - } - } - - uint32_t child_count = ts_node_child_count(node); - for (uint32_t i = 0; i < child_count; i++) { - findFunctions(ts_node_child(node, i), source_code, source_length, functions); +void findFunctions(TSNode node, const char *source_code, uint32_t source_length, + std::vector &functions) { + const char *type = ts_node_type(node); + + if (strcmp(type, "function_definition") == 0 || + strcmp(type, "declaration") == 0) { + std::string func_name = getFunctionName(node, source_code); + if (!func_name.empty()) { + std::string address = + extractAddress(getComment(node, source_code, source_length, false)); + + if (address.empty() && strcmp(type, "function_definition") == 0) { + address = + extractAddress(getComment(node, source_code, source_length, true)); + } + + if (!address.empty()) { + FunctionInfo func{func_name, address, "", + strcmp(type, "function_definition") == 0 + ? !hasFunctionBody(node) + : true}; + functions.push_back(func); + } } + } + + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + findFunctions(ts_node_child(node, i), source_code, source_length, + functions); + } } -std::vector readFileList(const std::string& list_file) { - std::vector files; - std::ifstream file(list_file); - if (!file.is_open()) { - std::cerr << "Error: Could not open list file " << list_file << std::endl; - return files; - } - - std::string line; - while (std::getline(file, line)) { - if (line.empty() || line[0] == '#') continue; - - if (line.find('*') != std::string::npos) { - std::cout << "Skipping wildcard pattern: " << line << std::endl; - continue; - } - - if (std::filesystem::exists(line)) { - files.push_back(line); - } else { - std::cout << "Warning: File not found: " << line << std::endl; - } - } +std::vector readFileList(const std::string &list_file) { + std::vector files; + std::ifstream file(list_file); + if (!file.is_open()) { + std::cerr << "Error: Could not open list file " << list_file << std::endl; return files; + } + + std::string line; + while (std::getline(file, line)) { + if (line.empty() || line[0] == '#') + continue; + + if (line.find('*') != std::string::npos) { + std::cout << "Skipping wildcard pattern: " << line << std::endl; + continue; + } + + if (std::filesystem::exists(line)) { + files.push_back(line); + } else { + std::cout << "Warning: File not found: " << line << std::endl; + } + } + return files; } -bool processFile(const std::string& filepath, DatabaseManager& db) { - std::ifstream file(filepath); - if (!file.is_open()) { - std::cerr << "Error: Could not open file " << filepath << std::endl; - return false; - } - - std::string file_content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); - - TSParser *parser = ts_parser_new(); - ts_parser_set_language(parser, tree_sitter_cpp()); - - TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), file_content.length()); - TSNode root_node = ts_tree_root_node(tree); - - if (ts_node_is_null(root_node)) { - std::cerr << "Error: Failed to parse file " << filepath << std::endl; - ts_tree_delete(tree); - ts_parser_delete(parser); - return false; - } - - db.clearEntriesForFile(filepath); - - std::vector functions; - findFunctions(root_node, file_content.c_str(), file_content.length(), functions); - - for (auto& func : functions) { - func.filepath = filepath; - db.insertFunction(func); - std::cout << (func.is_import ? "Import: " : "Function: ") - << func.name << " @ " << func.address << " in " << filepath << std::endl; - } - - std::cout << "Processed " << functions.size() << " functions/imports from " << filepath << std::endl; - +bool processFile(const std::string &filepath, DatabaseManager &db) { + std::ifstream file(filepath); + if (!file.is_open()) { + std::cerr << "Error: Could not open file " << filepath << std::endl; + return false; + } + + std::string file_content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, tree_sitter_cpp()); + + TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), + file_content.length()); + TSNode root_node = ts_tree_root_node(tree); + + if (ts_node_is_null(root_node)) { + std::cerr << "Error: Failed to parse file " << filepath << std::endl; ts_tree_delete(tree); ts_parser_delete(parser); - return true; + return false; + } + + db.clearEntriesForFile(filepath); + + std::vector functions; + findFunctions(root_node, file_content.c_str(), file_content.length(), + functions); + + for (auto &func : functions) { + func.filepath = filepath; + db.insertFunction(func); + std::cout << (func.is_import ? "Import: " : "Function: ") << func.name + << " @ " << func.address << " in " << filepath << std::endl; + } + + std::cout << "Processed " << functions.size() << " functions/imports from " + << filepath << std::endl; + + ts_tree_delete(tree); + ts_parser_delete(parser); + return true; } -int main(int argc, char* argv[]) { - CLI::App app{"C++ Function Parser - Extracts function addresses from C++ files"}; - - std::vector input_files; - std::string list_file; - std::string db_path = "functions.db"; - - app.add_option("files", input_files, "Input C++ files to parse (supports @listfile.txt syntax)"); - app.add_option("-l,--list", list_file, "File containing list of files to process"); - app.add_option("-d,--database", db_path, "SQLite database path")->default_val("functions.db"); - - CLI11_PARSE(app, argc, argv); - - std::vector files_to_process; - - if (!list_file.empty()) { - auto list_files = readFileList(list_file); - files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); +int main(int argc, char *argv[]) { + CLI::App app{ + "C++ Function Parser - Extracts function addresses from C++ files"}; + + std::vector input_files; + std::string list_file; + std::string db_path = "functions.db"; + + app.add_option("files", input_files, + "Input C++ files to parse (supports @listfile.txt syntax)"); + app.add_option("-l,--list", list_file, + "File containing list of files to process"); + app.add_option("-d,--database", db_path, "SQLite database path") + ->default_val("functions.db"); + + CLI11_PARSE(app, argc, argv); + + std::vector files_to_process; + + if (!list_file.empty()) { + auto list_files = readFileList(list_file); + files_to_process.insert(files_to_process.end(), list_files.begin(), + list_files.end()); + } + + for (const auto &input : input_files) { + if (input.starts_with("@")) { + auto list_files = readFileList(input.substr(1)); + files_to_process.insert(files_to_process.end(), list_files.begin(), + list_files.end()); + } else if (std::filesystem::exists(input)) { + files_to_process.push_back(input); + } else { + std::cout << "Warning: File not found: " << input << std::endl; } - - for (const auto& input : input_files) { - if (input.starts_with("@")) { - auto list_files = readFileList(input.substr(1)); - files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); - } else if (std::filesystem::exists(input)) { - files_to_process.push_back(input); - } else { - std::cout << "Warning: File not found: " << input << std::endl; - } - } - - if (files_to_process.empty()) { - std::cerr << "No files to process. Use --help for usage information." << std::endl; - return 1; - } - - try { - DatabaseManager db(db_path); - int processed_count = 0; - const size_t batch_size = 50; - size_t current_batch = 0; - + } + + if (files_to_process.empty()) { + std::cerr << "No files to process. Use --help for usage information." + << std::endl; + return 1; + } + + try { + DatabaseManager db(db_path); + int processed_count = 0; + const size_t batch_size = 50; + size_t current_batch = 0; + + db.beginTransaction(); + + for (const auto &filepath : files_to_process) { + std::cout << "\n=== Processing: " << filepath << " ===" << std::endl; + if (processFile(filepath, db)) + processed_count++; + + if (++current_batch >= batch_size) { + db.commitTransaction(); + std::cout << "Committed batch of " << current_batch + << " files to database" << std::endl; db.beginTransaction(); - - for (const auto& filepath : files_to_process) { - std::cout << "\n=== Processing: " << filepath << " ===" << std::endl; - if (processFile(filepath, db)) processed_count++; - - if (++current_batch >= batch_size) { - db.commitTransaction(); - std::cout << "Committed batch of " << current_batch << " files to database" << std::endl; - db.beginTransaction(); - current_batch = 0; - } - } - - if (current_batch > 0) { - db.commitTransaction(); - std::cout << "Committed final batch of " << current_batch << " files to database" << std::endl; - } - - std::cout << "\n=== Summary ===" << std::endl; - std::cout << "Processed " << processed_count << " files successfully" << std::endl; - std::cout << "Database saved to: " << db_path << std::endl; - - } catch (const std::exception& e) { - std::cerr << "Database error: " << e.what() << std::endl; - return 1; + current_batch = 0; + } } - - return 0; + + if (current_batch > 0) { + db.commitTransaction(); + std::cout << "Committed final batch of " << current_batch + << " files to database" << std::endl; + } + + std::cout << "\n=== Summary ===" << std::endl; + std::cout << "Processed " << processed_count << " files successfully" + << std::endl; + std::cout << "Database saved to: " << db_path << std::endl; + + } catch (const std::exception &e) { + std::cerr << "Database error: " << e.what() << std::endl; + return 1; + } + + return 0; }