From c7309d1b2955c66482675581378a5b615165824b Mon Sep 17 00:00:00 2001 From: Guus Waals <_@guusw.nl> Date: Wed, 28 May 2025 00:32:14 +0800 Subject: [PATCH] Test with globals and stuff --- tooling/files.sh | 5 +- tooling/tool.cpp | 191 ++++++++++++++++++++++++++++------------------- 2 files changed, 117 insertions(+), 79 deletions(-) diff --git a/tooling/files.sh b/tooling/files.sh index 7d33192e..0edcd538 100644 --- a/tooling/files.sh +++ b/tooling/files.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool +# cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool scan_dir=tmps/gh_auto file_list=files.txt @@ -10,7 +11,7 @@ do echo $file >> $file_list done for file in tmps/gh_stub/*.cxx -do +do echo $file >> $file_list done for file in tmps/gh_fix/*.cxx @@ -18,4 +19,4 @@ do echo $file >> $file_list done -$tool @$file_list +$tool @$file_list -v diff --git a/tooling/tool.cpp b/tooling/tool.cpp index 72accceb..3b6dced4 100644 --- a/tooling/tool.cpp +++ b/tooling/tool.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include extern "C" TSLanguage *tree_sitter_cpp(); @@ -88,7 +88,8 @@ public: void clearGlobalsForFile(const std::string &filepath) { sqlite3_reset(delete_globals_stmt); - sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1, + SQLITE_STATIC); sqlite3_step(delete_globals_stmt); } @@ -104,9 +105,12 @@ public: void insertGlobal(const GlobalInfo &global) { sqlite3_reset(insert_globals_stmt); - sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1, + SQLITE_STATIC); + sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1, + SQLITE_STATIC); + sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1, + SQLITE_STATIC); sqlite3_step(insert_globals_stmt); } }; @@ -119,7 +123,7 @@ private: public: DatabaseManager(const std::string &db_path) : db(nullptr) { if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) { - std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl; + spdlog::error("Can't open database: {}", sqlite3_errmsg(db)); sqlite3_close(db); throw std::runtime_error("Failed to open database"); } @@ -196,49 +200,67 @@ std::string getFunctionName(TSNode node, const char *source_code) { std::string getComment(TSNode node, const char *source_code, uint32_t source_length, bool search_before) { + TSNode current = node; + if (search_before) { - uint32_t start_byte = ts_node_start_byte(node); - if (start_byte == 0) - return ""; + // Look for comments before the current node + while (!ts_node_is_null(current)) { + TSNode prev_sibling = ts_node_prev_sibling(current); - std::string before_text(source_code, start_byte); - std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); - std::smatch match; + while (!ts_node_is_null(prev_sibling)) { + const char *type = ts_node_type(prev_sibling); - size_t search_pos = before_text.length(); - while (search_pos > 0) { - size_t comment_pos = before_text.rfind("//", search_pos - 1); - if (comment_pos == std::string::npos) - break; + if (strcmp(type, "comment") == 0) { + uint32_t start = ts_node_start_byte(prev_sibling); + uint32_t end = ts_node_end_byte(prev_sibling); + std::string comment_text(source_code + start, end - start); - size_t line_end = before_text.find('\n', comment_pos); - if (line_end == std::string::npos) - line_end = before_text.length(); + // Check if it contains an address pattern + std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); + if (std::regex_search(comment_text, addr_regex)) { + return comment_text; + } + } + // Skip whitespace and continue looking + else if (strcmp(type, "ERROR") != 0) { + // If we hit non-comment, non-whitespace content, stop searching + break; + } - std::string comment_line = - before_text.substr(comment_pos, line_end - comment_pos); - - if (std::regex_search(comment_line, match, addr_regex)) { - size_t newlines_between = - std::count(before_text.begin() + comment_pos, - before_text.begin() + start_byte, '\n'); - if (newlines_between <= 20) - return comment_line; + prev_sibling = ts_node_prev_sibling(prev_sibling); } - search_pos = comment_pos; + + // Move up to parent and continue searching + current = ts_node_parent(current); } } else { - uint32_t end_byte = ts_node_end_byte(node); - std::string remaining(source_code + end_byte, source_length - end_byte); + // Look for comments after the current node + TSNode next_sibling = ts_node_next_sibling(node); - size_t comment_pos = remaining.find("//"); - if (comment_pos != std::string::npos) { - size_t line_end = remaining.find('\n', comment_pos); - if (line_end == std::string::npos) - line_end = remaining.length(); - return remaining.substr(comment_pos, line_end - comment_pos); + while (!ts_node_is_null(next_sibling)) { + const char *type = ts_node_type(next_sibling); + + if (strcmp(type, "comment") == 0) { + uint32_t start = ts_node_start_byte(next_sibling); + uint32_t end = ts_node_end_byte(next_sibling); + std::string comment_text(source_code + start, end - start); + + // Check if it contains an address pattern + std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); + if (std::regex_search(comment_text, addr_regex)) { + return comment_text; + } + } + // Skip whitespace and continue looking + else if (strcmp(type, "ERROR") != 0) { + // If we hit non-comment, non-whitespace content, stop searching + break; + } + + next_sibling = ts_node_next_sibling(next_sibling); } } + return ""; } @@ -293,7 +315,7 @@ std::vector readFileList(const std::string &list_file) { std::vector files; std::ifstream file(list_file); if (!file.is_open()) { - std::cerr << "Error: Could not open list file " << list_file << std::endl; + spdlog::error("Could not open list file {}", list_file); return files; } @@ -303,14 +325,14 @@ std::vector readFileList(const std::string &list_file) { continue; if (line.find('*') != std::string::npos) { - std::cout << "Skipping wildcard pattern: " << line << std::endl; + spdlog::info("Skipping wildcard pattern: {}", line); continue; } if (std::filesystem::exists(line)) { files.push_back(line); } else { - std::cout << "Warning: File not found: " << line << std::endl; + spdlog::warn("File not found: {}", line); } } return files; @@ -319,7 +341,7 @@ std::vector readFileList(const std::string &list_file) { bool processFile(const std::string &filepath, DatabaseManager &db) { std::ifstream file(filepath); if (!file.is_open()) { - std::cerr << "Error: Could not open file " << filepath << std::endl; + spdlog::error("Could not open file {}", filepath); return false; } @@ -334,7 +356,7 @@ bool processFile(const std::string &filepath, DatabaseManager &db) { TSNode root_node = ts_tree_root_node(tree); if (ts_node_is_null(root_node)) { - std::cerr << "Error: Failed to parse file " << filepath << std::endl; + spdlog::error("Failed to parse file {}", filepath); ts_tree_delete(tree); ts_parser_delete(parser); return false; @@ -349,12 +371,12 @@ bool processFile(const std::string &filepath, DatabaseManager &db) { for (auto &func : functions) { func.filepath = filepath; db.insertFunction(func); - std::cout << (func.is_import ? "Import: " : "Function: ") << func.name - << " @ " << func.address << " in " << filepath << std::endl; + spdlog::debug("{}: {} @ {} in {}", func.is_import ? "Import" : "Function", + func.name, func.address, filepath); } - std::cout << "Processed " << functions.size() << " functions/imports from " - << filepath << std::endl; + spdlog::info("Processed {} functions/imports from {}", functions.size(), + filepath); ts_tree_delete(tree); ts_parser_delete(parser); @@ -368,7 +390,8 @@ std::string getGlobalName(TSNode node, const char *source_code) { TSNode child = ts_node_child(node, i); const char *type = ts_node_type(child); - // Handle reference declarators like "undefined& DAT_00000004" (direct child) + // Handle reference declarators like "undefined& DAT_00000004" (direct + // child) if (strcmp(type, "reference_declarator") == 0) { uint32_t ref_children = ts_node_child_count(child); for (uint32_t k = 0; k < ref_children; k++) { @@ -381,7 +404,8 @@ std::string getGlobalName(TSNode node, const char *source_code) { } } // Look for declarator in the declaration - else if (strcmp(type, "init_declarator") == 0 || strcmp(type, "declarator") == 0) { + else if (strcmp(type, "init_declarator") == 0 || + strcmp(type, "declarator") == 0) { uint32_t declarator_children = ts_node_child_count(child); for (uint32_t j = 0; j < declarator_children; j++) { TSNode declarator_child = ts_node_child(child, j); @@ -399,12 +423,14 @@ std::string getGlobalName(TSNode node, const char *source_code) { } } } - // Handle array declarators like "char(&s_or_press_ESC_to_quit_Rayman_3__005b662c)[32]" + // Handle array declarators like + // "char(&s_or_press_ESC_to_quit_Rayman_3__005b662c)[32]" else if (strcmp(child_type, "parenthesized_declarator") == 0) { uint32_t paren_children = ts_node_child_count(declarator_child); for (uint32_t k = 0; k < paren_children; k++) { TSNode paren_child = ts_node_child(declarator_child, k); - if (strcmp(ts_node_type(paren_child), "reference_declarator") == 0) { + if (strcmp(ts_node_type(paren_child), "reference_declarator") == + 0) { uint32_t ref_children = ts_node_child_count(paren_child); for (uint32_t l = 0; l < ref_children; l++) { TSNode ref_child = ts_node_child(paren_child, l); @@ -462,7 +488,8 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length, std::string global_name = getGlobalName(node, source_code); if (!global_name.empty()) { // Look for address comment after the declaration - std::string address = extractAddress(getComment(node, source_code, source_length, false)); + std::string address = + extractAddress(getComment(node, source_code, source_length, false)); if (!address.empty()) { GlobalInfo global{global_name, address, ""}; @@ -471,7 +498,8 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length, } else { size_t start = ts_node_start_byte(node); size_t end = ts_node_end_byte(node); - std::string_view src = std::string_view(source_code + start, end - start); + std::string_view src = + std::string_view(source_code + start, end - start); SPDLOG_ERROR("Failed to get global name for {}", src); } } @@ -487,7 +515,7 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length, bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { std::ifstream file(filepath); if (!file.is_open()) { - std::cerr << "Error: Could not open file " << filepath << std::endl; + spdlog::error("Could not open file {}", filepath); return false; } @@ -502,7 +530,7 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { TSNode root_node = ts_tree_root_node(tree); if (ts_node_is_null(root_node)) { - std::cerr << "Error: Failed to parse file " << filepath << std::endl; + spdlog::error("Failed to parse file {}", filepath); ts_tree_delete(tree); ts_parser_delete(parser); return false; @@ -516,12 +544,11 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { for (auto &global : globals) { global.filepath = filepath; db.insertGlobal(global); - std::cout << "Global: " << global.name << " @ " << global.address - << " in " << filepath << std::endl; + spdlog::debug("Global: {} @ {} in {}", global.name, global.address, + filepath); } - std::cout << "Processed " << globals.size() << " globals from " - << filepath << std::endl; + spdlog::info("Processed {} globals from {}", globals.size(), filepath); ts_tree_delete(tree); ts_parser_delete(parser); @@ -529,26 +556,39 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { } int main(int argc, char *argv[]) { - CLI::App app{ - "C++ Function/Global Parser - Extracts function addresses or global variable addresses from C++ files"}; + // Initialize spdlog + auto console = spdlog::stdout_color_mt("console"); + spdlog::set_default_logger(console); + spdlog::set_level(spdlog::level::info); // Default to info level + spdlog::set_pattern("[%H:%M:%S] [%^%l%$] %v"); + + CLI::App app{"C++ Function/Global Parser - Extracts function addresses or " + "global variable addresses from C++ files"}; std::vector input_files; std::string list_file; - std::string db_path = "functions.db"; + std::string db_path = "gh.db"; std::string mode = "functions"; + bool verbose = false; app.add_option("files", input_files, "Input C++ files to parse (supports @listfile.txt syntax)"); app.add_option("-l,--list", list_file, "File containing list of files to process"); app.add_option("-d,--database", db_path, "SQLite database path") - ->default_val("functions.db"); + ->default_val("gh.db"); app.add_option("-m,--mode", mode, "Processing mode: 'functions' or 'globals'") ->default_val("functions") ->check(CLI::IsMember({"functions", "globals"})); + app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)"); CLI11_PARSE(app, argc, argv); + // Set log level based on verbose flag + if (verbose) { + spdlog::set_level(spdlog::level::debug); + } + std::vector files_to_process; if (!list_file.empty()) { @@ -565,13 +605,12 @@ int main(int argc, char *argv[]) { } else if (std::filesystem::exists(input)) { files_to_process.push_back(input); } else { - std::cout << "Warning: File not found: " << input << std::endl; + spdlog::warn("File not found: {}", input); } } if (files_to_process.empty()) { - std::cerr << "No files to process. Use --help for usage information." - << std::endl; + spdlog::error("No files to process. Use --help for usage information."); return 1; } @@ -584,7 +623,7 @@ int main(int argc, char *argv[]) { db.beginTransaction(); for (const auto &filepath : files_to_process) { - std::cout << "\n=== Processing: " << filepath << " ===" << std::endl; + spdlog::info("=== Processing: {} ===", filepath); bool success = false; if (mode == "functions") { success = processFile(filepath, db); @@ -597,8 +636,7 @@ int main(int argc, char *argv[]) { if (++current_batch >= batch_size) { db.commitTransaction(); - std::cout << "Committed batch of " << current_batch - << " files to database" << std::endl; + spdlog::info("Committed batch of {} files to database", current_batch); db.beginTransaction(); current_batch = 0; } @@ -606,18 +644,17 @@ int main(int argc, char *argv[]) { if (current_batch > 0) { db.commitTransaction(); - std::cout << "Committed final batch of " << current_batch - << " files to database" << std::endl; + spdlog::info("Committed final batch of {} files to database", + current_batch); } - std::cout << "\n=== Summary ===" << std::endl; - std::cout << "Processed " << processed_count << " files successfully" - << std::endl; - std::cout << "Mode: " << mode << std::endl; - std::cout << "Database saved to: " << db_path << std::endl; + spdlog::info("=== Summary ==="); + spdlog::info("Processed {} files successfully", processed_count); + spdlog::info("Mode: {}", mode); + spdlog::info("Database saved to: {}", db_path); } catch (const std::exception &e) { - std::cerr << "Database error: " << e.what() << std::endl; + spdlog::error("Database error: {}", e.what()); return 1; }