#include #include #include #include #include #include #include #include #include #include #include extern "C" TSLanguage *tree_sitter_cpp(); struct FunctionInfo { std::string name; std::string address; std::string filepath; bool is_import; // true for extern declarations, false for definitions }; class DatabaseManager { private: sqlite3* db; public: DatabaseManager(const std::string& db_path) : db(nullptr) { int rc = sqlite3_open(db_path.c_str(), &db); if (rc) { std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl; sqlite3_close(db); db = nullptr; throw std::runtime_error("Failed to open database"); } // Create tables if they don't exist const char* create_functions_table = R"( CREATE TABLE IF NOT EXISTS Functions ( filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath) ) )"; const char* create_imports_table = R"( CREATE TABLE IF NOT EXISTS Imports ( filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath) ) )"; sqlite3_exec(db, create_functions_table, nullptr, nullptr, nullptr); sqlite3_exec(db, create_imports_table, nullptr, nullptr, nullptr); } ~DatabaseManager() { if (db) { sqlite3_close(db); } } void clearEntriesForFile(const std::string& filepath) { const char* delete_functions = "DELETE FROM Functions WHERE filepath = ?"; const char* delete_imports = "DELETE FROM Imports WHERE filepath = ?"; sqlite3_stmt* stmt; // Clear functions sqlite3_prepare_v2(db, delete_functions, -1, &stmt, nullptr); sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); sqlite3_step(stmt); sqlite3_finalize(stmt); // Clear imports sqlite3_prepare_v2(db, delete_imports, -1, &stmt, nullptr); sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); sqlite3_step(stmt); sqlite3_finalize(stmt); } void insertFunction(const FunctionInfo& func) { const char* table = func.is_import ? "Imports" : "Functions"; std::string sql = "INSERT OR REPLACE INTO " + std::string(table) + " (filepath, name, address) VALUES (?, ?, ?)"; sqlite3_stmt* stmt; sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, nullptr); sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC); sqlite3_step(stmt); sqlite3_finalize(stmt); } }; std::string extractAddress(const std::string& comment) { // Look for hex addresses in comments like "// 0043e4f0" or "// 0043e4f0 // FUN_0043e4f0" std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); std::smatch match; if (std::regex_search(comment, match, addr_regex)) { return match[1].str(); } return ""; } std::string getFunctionName(TSNode node, const char* source_code) { // For function declarations/definitions, find the function name uint32_t child_count = ts_node_child_count(node); for (uint32_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); const char* type = ts_node_type(child); if (strcmp(type, "function_declarator") == 0) { // Find the identifier within the function_declarator uint32_t declarator_children = ts_node_child_count(child); for (uint32_t j = 0; j < declarator_children; j++) { TSNode declarator_child = ts_node_child(child, j); if (strcmp(ts_node_type(declarator_child), "identifier") == 0) { uint32_t start = ts_node_start_byte(declarator_child); uint32_t end = ts_node_end_byte(declarator_child); return std::string(source_code + start, end - start); } } } else if (strcmp(type, "identifier") == 0) { // Direct identifier (simpler cases) uint32_t start = ts_node_start_byte(child); uint32_t end = ts_node_end_byte(child); return std::string(source_code + start, end - start); } } return ""; } std::string getCommentBeforeNode(TSNode node, const char* source_code) { uint32_t start_byte = ts_node_start_byte(node); // Look backwards from the start of the node to find comments if (start_byte == 0) return ""; // Get text before the node std::string before_text(source_code, start_byte); // Find all "//" comments before this node and look for addresses std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); std::smatch match; std::string found_address; // Search backwards through all comment lines size_t search_pos = before_text.length(); while (search_pos > 0) { size_t comment_pos = before_text.rfind("//", search_pos - 1); if (comment_pos == std::string::npos) { break; } // Find the end of this comment line size_t line_end = before_text.find('\n', comment_pos); if (line_end == std::string::npos) { line_end = before_text.length(); } // Extract this comment line std::string comment_line = before_text.substr(comment_pos, line_end - comment_pos); // Check if this comment contains an address if (std::regex_search(comment_line, match, addr_regex)) { // Check if this comment is reasonably close to the function (within 20 lines) size_t newlines_between = 0; for (size_t i = comment_pos; i < start_byte; i++) { if (before_text[i] == '\n') newlines_between++; } if (newlines_between <= 20) { return comment_line; } } search_pos = comment_pos; } return ""; } std::string getCommentAfterNode(TSNode node, const char* source_code, uint32_t source_length) { uint32_t end_byte = ts_node_end_byte(node); // Look for comment on the same line or next line std::string remaining(source_code + end_byte, source_length - end_byte); // Find the first comment marker "//" size_t comment_pos = remaining.find("//"); if (comment_pos != std::string::npos) { // Extract until end of line size_t line_end = remaining.find('\n', comment_pos); if (line_end == std::string::npos) { line_end = remaining.length(); } return remaining.substr(comment_pos, line_end - comment_pos); } return ""; } bool isExternDeclaration(TSNode node, const char* source_code) { // Check if this is inside an extern "C" block or has extern storage class TSNode current = ts_node_parent(node); while (!ts_node_is_null(current)) { const char* type = ts_node_type(current); if (strcmp(type, "linkage_specification") == 0) { return true; } current = ts_node_parent(current); } // Also check for explicit extern keyword const char* node_type = ts_node_type(node); if (strcmp(node_type, "declaration") == 0) { uint32_t child_count = ts_node_child_count(node); for (uint32_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) { uint32_t start = ts_node_start_byte(child); uint32_t end = ts_node_end_byte(child); std::string text(source_code + start, end - start); if (text == "extern") { return true; } } } } return false; } bool hasFunctionBody(TSNode node) { // Check if this function definition has a compound statement (body) if (strcmp(ts_node_type(node), "function_definition") == 0) { uint32_t child_count = ts_node_child_count(node); for (uint32_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); const char* child_type = ts_node_type(child); if (strcmp(child_type, "compound_statement") == 0) { return true; } } } return false; } void findFunctions(TSNode node, const char* source_code, uint32_t source_length, std::vector& functions) { const char* type = ts_node_type(node); // Check for function declarations and definitions if (strcmp(type, "function_definition") == 0 || strcmp(type, "declaration") == 0) { std::string func_name = getFunctionName(node, source_code); if (!func_name.empty()) { std::string comment = getCommentAfterNode(node, source_code, source_length); std::string address = extractAddress(comment); // If no address found after, try looking before (for function definitions) if (address.empty() && strcmp(type, "function_definition") == 0) { comment = getCommentBeforeNode(node, source_code); address = extractAddress(comment); } if (!address.empty()) { FunctionInfo func; func.name = func_name; func.address = address; // Determine if it's an import based on whether it has a body // Function definitions with bodies are actual functions // Declarations without bodies are imports if (strcmp(type, "function_definition") == 0) { func.is_import = !hasFunctionBody(node); } else { func.is_import = true; // Pure declarations are always imports } functions.push_back(func); } } } // Recursively check children uint32_t child_count = ts_node_child_count(node); for (uint32_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); findFunctions(child, source_code, source_length, functions); } } std::vector readFileList(const std::string& list_file) { std::vector files; std::ifstream file(list_file); if (!file.is_open()) { std::cerr << "Error: Could not open list file " << list_file << std::endl; return files; } std::string line; while (std::getline(file, line)) { // Skip empty lines and comments if (line.empty() || line[0] == '#') { continue; } // Handle wildcard patterns like "tmps/gh_fix/*.h" if (line.find('*') != std::string::npos) { // For now, skip wildcard patterns as they need more complex handling std::cout << "Skipping wildcard pattern: " << line << std::endl; continue; } // Check if file exists if (std::filesystem::exists(line)) { files.push_back(line); } else { std::cout << "Warning: File not found: " << line << std::endl; } } return files; } bool processFile(const std::string& filepath, DatabaseManager& db) { std::ifstream file(filepath); if (!file.is_open()) { std::cerr << "Error: Could not open file " << filepath << std::endl; return false; } std::stringstream buffer; buffer << file.rdbuf(); std::string file_content = buffer.str(); const char *source_code = file_content.c_str(); TSParser *parser = ts_parser_new(); ts_parser_set_language(parser, tree_sitter_cpp()); TSTree *tree = ts_parser_parse_string(parser, nullptr, source_code, file_content.length()); TSNode root_node = ts_tree_root_node(tree); if (ts_node_is_null(root_node)) { std::cerr << "Error: Failed to parse file " << filepath << std::endl; ts_tree_delete(tree); ts_parser_delete(parser); return false; } // Clear existing entries for this file db.clearEntriesForFile(filepath); // Find all functions with addresses std::vector functions; findFunctions(root_node, source_code, file_content.length(), functions); // Insert into database for (auto& func : functions) { func.filepath = filepath; db.insertFunction(func); std::cout << (func.is_import ? "Import: " : "Function: ") << func.name << " @ " << func.address << " in " << filepath << std::endl; } std::cout << "Processed " << functions.size() << " functions/imports from " << filepath << std::endl; ts_tree_delete(tree); ts_parser_delete(parser); return true; } int main(int argc, char* argv[]) { CLI::App app{"C++ Function Parser - Extracts function addresses from C++ files"}; std::vector input_files; std::string list_file; std::string db_path = "functions.db"; // Add options app.add_option("files", input_files, "Input C++ files to parse (supports @listfile.txt syntax)"); app.add_option("-l,--list", list_file, "File containing list of files to process"); app.add_option("-d,--database", db_path, "SQLite database path")->default_val("functions.db"); CLI11_PARSE(app, argc, argv); // Collect all files to process std::vector files_to_process; // Handle list file option if (!list_file.empty()) { auto list_files = readFileList(list_file); files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); } // Handle input files (including @listfile.txt syntax) for (const auto& input : input_files) { if (input.starts_with("@")) { // Handle @listfile.txt syntax std::string list_path = input.substr(1); auto list_files = readFileList(list_path); files_to_process.insert(files_to_process.end(), list_files.begin(), list_files.end()); } else { // Regular file if (std::filesystem::exists(input)) { files_to_process.push_back(input); } else { std::cout << "Warning: File not found: " << input << std::endl; } } } if (files_to_process.empty()) { std::cerr << "No files to process. Use --help for usage information." << std::endl; return 1; } try { DatabaseManager db(db_path); int processed_count = 0; int total_functions = 0; for (const auto& filepath : files_to_process) { std::cout << "\n=== Processing: " << filepath << " ===" << std::endl; if (processFile(filepath, db)) { processed_count++; } } std::cout << "\n=== Summary ===" << std::endl; std::cout << "Processed " << processed_count << " files successfully" << std::endl; std::cout << "Database saved to: " << db_path << std::endl; } catch (const std::exception& e) { std::cerr << "Database error: " << e.what() << std::endl; return 1; } return 0; }