diff --git a/tooling/files.sh b/tooling/files.sh index 0edcd538..582afe4d 100644 --- a/tooling/files.sh +++ b/tooling/files.sh @@ -1,22 +1,20 @@ #!/bin/bash set -e tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool -# cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool +cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool scan_dir=tmps/gh_auto file_list=files.txt rm -f $file_list -for file in tmps/gh_auto/*.cxx -do - echo $file >> $file_list +for file in tmps/gh_auto/*.cxx; do + echo $file >>$file_list done -for file in tmps/gh_stub/*.cxx -do - echo $file >> $file_list +for file in tmps/gh_stub/*.cxx; do + echo $file >>$file_list done -for file in tmps/gh_fix/*.cxx -do - echo $file >> $file_list +for file in tmps/gh_fix/*.cxx; do + echo $file >>$file_list done -$tool @$file_list -v +$tool @$file_list -v --log-file=log-functions.txt +$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt diff --git a/tooling/tool.cpp b/tooling/tool.cpp index 3b6dced4..c68f3073 100644 --- a/tooling/tool.cpp +++ b/tooling/tool.cpp @@ -10,10 +10,38 @@ #include #include #include +#include #include extern "C" TSLanguage *tree_sitter_cpp(); +// Global address regex pattern +const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))"); + +// Helper function to check if a comment contains an address +bool hasAddressPattern(const std::string &comment) { + return std::regex_search(comment, ADDRESS_REGEX); +} + +// Helper function to extract text from a TSNode +std::string extractNodeText(TSNode node, const char *source_code) { + uint32_t start = ts_node_start_byte(node); + uint32_t end = ts_node_end_byte(node); + return std::string(source_code + start, end - start); +} + +// Helper function to find first identifier in a node +std::string findIdentifierInNode(TSNode node, const char *source_code) { + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + if (strcmp(ts_node_type(child), "identifier") == 0) { + return extractNodeText(child, source_code); + } + } + return ""; +} + struct FunctionInfo { std::string name; std::string address; @@ -167,9 +195,8 @@ public: }; std::string extractAddress(const std::string &comment) { - std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); std::smatch match; - return std::regex_search(comment, match, addr_regex) ? match[1].str() : ""; + return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : ""; } std::string getFunctionName(TSNode node, const char *source_code) { @@ -180,19 +207,15 @@ std::string getFunctionName(TSNode node, const char *source_code) { const char *type = ts_node_type(child); if (strcmp(type, "function_declarator") == 0) { - uint32_t declarator_children = ts_node_child_count(child); - for (uint32_t j = 0; j < declarator_children; j++) { - TSNode declarator_child = ts_node_child(child, j); - if (strcmp(ts_node_type(declarator_child), "identifier") == 0) { - uint32_t start = ts_node_start_byte(declarator_child); - uint32_t end = ts_node_end_byte(declarator_child); - return std::string(source_code + start, end - start); - } - } + std::string name = findIdentifierInNode(child, source_code); + if (!name.empty()) + return name; } else if (strcmp(type, "identifier") == 0) { - uint32_t start = ts_node_start_byte(child); - uint32_t end = ts_node_end_byte(child); - return std::string(source_code + start, end - start); + return extractNodeText(child, source_code); + } else if (strcmp(type, "pointer_declarator") == 0) { + std::string name = getFunctionName(child, source_code); + if (!name.empty()) + return name; } } return ""; @@ -211,13 +234,10 @@ std::string getComment(TSNode node, const char *source_code, const char *type = ts_node_type(prev_sibling); if (strcmp(type, "comment") == 0) { - uint32_t start = ts_node_start_byte(prev_sibling); - uint32_t end = ts_node_end_byte(prev_sibling); - std::string comment_text(source_code + start, end - start); + std::string comment_text = extractNodeText(prev_sibling, source_code); // Check if it contains an address pattern - std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); - if (std::regex_search(comment_text, addr_regex)) { + if (hasAddressPattern(comment_text)) { return comment_text; } } @@ -241,13 +261,10 @@ std::string getComment(TSNode node, const char *source_code, const char *type = ts_node_type(next_sibling); if (strcmp(type, "comment") == 0) { - uint32_t start = ts_node_start_byte(next_sibling); - uint32_t end = ts_node_end_byte(next_sibling); - std::string comment_text(source_code + start, end - start); + std::string comment_text = extractNodeText(next_sibling, source_code); // Check if it contains an address pattern - std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))"); - if (std::regex_search(comment_text, addr_regex)) { + if (hasAddressPattern(comment_text)) { return comment_text; } } @@ -301,6 +318,11 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length, : true}; functions.push_back(func); } + // We'll never nest function declarations + return; + } else { + spdlog::error("Failed to get function name for {}", + extractNodeText(node, source_code)); } } @@ -397,9 +419,7 @@ std::string getGlobalName(TSNode node, const char *source_code) { for (uint32_t k = 0; k < ref_children; k++) { TSNode ref_child = ts_node_child(child, k); if (strcmp(ts_node_type(ref_child), "identifier") == 0) { - uint32_t start = ts_node_start_byte(ref_child); - uint32_t end = ts_node_end_byte(ref_child); - return std::string(source_code + start, end - start); + return extractNodeText(ref_child, source_code); } } } @@ -417,9 +437,7 @@ std::string getGlobalName(TSNode node, const char *source_code) { for (uint32_t k = 0; k < ref_children; k++) { TSNode ref_child = ts_node_child(declarator_child, k); if (strcmp(ts_node_type(ref_child), "identifier") == 0) { - uint32_t start = ts_node_start_byte(ref_child); - uint32_t end = ts_node_end_byte(ref_child); - return std::string(source_code + start, end - start); + return extractNodeText(ref_child, source_code); } } } @@ -435,9 +453,7 @@ std::string getGlobalName(TSNode node, const char *source_code) { for (uint32_t l = 0; l < ref_children; l++) { TSNode ref_child = ts_node_child(paren_child, l); if (strcmp(ts_node_type(ref_child), "identifier") == 0) { - uint32_t start = ts_node_start_byte(ref_child); - uint32_t end = ts_node_end_byte(ref_child); - return std::string(source_code + start, end - start); + return extractNodeText(ref_child, source_code); } } } @@ -445,17 +461,13 @@ std::string getGlobalName(TSNode node, const char *source_code) { } // Handle simple identifiers else if (strcmp(child_type, "identifier") == 0) { - uint32_t start = ts_node_start_byte(declarator_child); - uint32_t end = ts_node_end_byte(declarator_child); - return std::string(source_code + start, end - start); + return extractNodeText(declarator_child, source_code); } } } // Direct identifier child else if (strcmp(type, "identifier") == 0) { - uint32_t start = ts_node_start_byte(child); - uint32_t end = ts_node_end_byte(child); - return std::string(source_code + start, end - start); + return extractNodeText(child, source_code); } } return ""; @@ -474,9 +486,7 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length, for (uint32_t i = 0; i < child_count; i++) { TSNode child = ts_node_child(node, i); if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) { - uint32_t start = ts_node_start_byte(child); - uint32_t end = ts_node_end_byte(child); - std::string storage_class(source_code + start, end - start); + std::string storage_class = extractNodeText(child, source_code); if (storage_class == "extern") { is_extern = true; break; @@ -496,10 +506,7 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length, globals.push_back(global); } } else { - size_t start = ts_node_start_byte(node); - size_t end = ts_node_end_byte(node); - std::string_view src = - std::string_view(source_code + start, end - start); + std::string src = extractNodeText(node, source_code); SPDLOG_ERROR("Failed to get global name for {}", src); } } @@ -569,6 +576,7 @@ int main(int argc, char *argv[]) { std::string list_file; std::string db_path = "gh.db"; std::string mode = "functions"; + std::string log_file = ""; bool verbose = false; app.add_option("files", input_files, @@ -581,6 +589,7 @@ int main(int argc, char *argv[]) { ->default_val("functions") ->check(CLI::IsMember({"functions", "globals"})); app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)"); + app.add_flag("--log-file", log_file, "Enable logging to file"); CLI11_PARSE(app, argc, argv); @@ -589,6 +598,12 @@ int main(int argc, char *argv[]) { spdlog::set_level(spdlog::level::debug); } + if (!log_file.empty()) { + auto log_sink = + std::make_shared(log_file, true); + spdlog::get("console")->sinks().push_back(log_sink); + } + std::vector files_to_process; if (!list_file.empty()) {