Cleanup tool

This commit is contained in:
Guus Waals 2025-05-28 00:50:59 +08:00
parent c7309d1b29
commit c9c9e9c6e6
2 changed files with 70 additions and 57 deletions

View File

@ -1,22 +1,20 @@
#!/bin/bash
set -e
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
# cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
scan_dir=tmps/gh_auto
file_list=files.txt
rm -f $file_list
for file in tmps/gh_auto/*.cxx
do
echo $file >> $file_list
for file in tmps/gh_auto/*.cxx; do
echo $file >>$file_list
done
for file in tmps/gh_stub/*.cxx
do
echo $file >> $file_list
for file in tmps/gh_stub/*.cxx; do
echo $file >>$file_list
done
for file in tmps/gh_fix/*.cxx
do
echo $file >> $file_list
for file in tmps/gh_fix/*.cxx; do
echo $file >>$file_list
done
$tool @$file_list -v
$tool @$file_list -v --log-file=log-functions.txt
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt

View File

@ -10,10 +10,38 @@
#include <tree_sitter/api.h>
#include <spdlog/spdlog.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <spdlog/sinks/basic_file_sink.h>
#include <CLI11.hpp>
extern "C" TSLanguage *tree_sitter_cpp();
// Global address regex pattern
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
// Helper function to check if a comment contains an address
bool hasAddressPattern(const std::string &comment) {
return std::regex_search(comment, ADDRESS_REGEX);
}
// Helper function to extract text from a TSNode
std::string extractNodeText(TSNode node, const char *source_code) {
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
return std::string(source_code + start, end - start);
}
// Helper function to find first identifier in a node
std::string findIdentifierInNode(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
struct FunctionInfo {
std::string name;
std::string address;
@ -167,9 +195,8 @@ public:
};
std::string extractAddress(const std::string &comment) {
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
std::smatch match;
return std::regex_search(comment, match, addr_regex) ? match[1].str() : "";
return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : "";
}
std::string getFunctionName(TSNode node, const char *source_code) {
@ -180,19 +207,15 @@ std::string getFunctionName(TSNode node, const char *source_code) {
const char *type = ts_node_type(child);
if (strcmp(type, "function_declarator") == 0) {
uint32_t declarator_children = ts_node_child_count(child);
for (uint32_t j = 0; j < declarator_children; j++) {
TSNode declarator_child = ts_node_child(child, j);
if (strcmp(ts_node_type(declarator_child), "identifier") == 0) {
uint32_t start = ts_node_start_byte(declarator_child);
uint32_t end = ts_node_end_byte(declarator_child);
return std::string(source_code + start, end - start);
}
}
std::string name = findIdentifierInNode(child, source_code);
if (!name.empty())
return name;
} else if (strcmp(type, "identifier") == 0) {
uint32_t start = ts_node_start_byte(child);
uint32_t end = ts_node_end_byte(child);
return std::string(source_code + start, end - start);
return extractNodeText(child, source_code);
} else if (strcmp(type, "pointer_declarator") == 0) {
std::string name = getFunctionName(child, source_code);
if (!name.empty())
return name;
}
}
return "";
@ -211,13 +234,10 @@ std::string getComment(TSNode node, const char *source_code,
const char *type = ts_node_type(prev_sibling);
if (strcmp(type, "comment") == 0) {
uint32_t start = ts_node_start_byte(prev_sibling);
uint32_t end = ts_node_end_byte(prev_sibling);
std::string comment_text(source_code + start, end - start);
std::string comment_text = extractNodeText(prev_sibling, source_code);
// Check if it contains an address pattern
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
if (std::regex_search(comment_text, addr_regex)) {
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
@ -241,13 +261,10 @@ std::string getComment(TSNode node, const char *source_code,
const char *type = ts_node_type(next_sibling);
if (strcmp(type, "comment") == 0) {
uint32_t start = ts_node_start_byte(next_sibling);
uint32_t end = ts_node_end_byte(next_sibling);
std::string comment_text(source_code + start, end - start);
std::string comment_text = extractNodeText(next_sibling, source_code);
// Check if it contains an address pattern
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
if (std::regex_search(comment_text, addr_regex)) {
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
@ -301,6 +318,11 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
: true};
functions.push_back(func);
}
// We'll never nest function declarations
return;
} else {
spdlog::error("Failed to get function name for {}",
extractNodeText(node, source_code));
}
}
@ -397,9 +419,7 @@ std::string getGlobalName(TSNode node, const char *source_code) {
for (uint32_t k = 0; k < ref_children; k++) {
TSNode ref_child = ts_node_child(child, k);
if (strcmp(ts_node_type(ref_child), "identifier") == 0) {
uint32_t start = ts_node_start_byte(ref_child);
uint32_t end = ts_node_end_byte(ref_child);
return std::string(source_code + start, end - start);
return extractNodeText(ref_child, source_code);
}
}
}
@ -417,9 +437,7 @@ std::string getGlobalName(TSNode node, const char *source_code) {
for (uint32_t k = 0; k < ref_children; k++) {
TSNode ref_child = ts_node_child(declarator_child, k);
if (strcmp(ts_node_type(ref_child), "identifier") == 0) {
uint32_t start = ts_node_start_byte(ref_child);
uint32_t end = ts_node_end_byte(ref_child);
return std::string(source_code + start, end - start);
return extractNodeText(ref_child, source_code);
}
}
}
@ -435,9 +453,7 @@ std::string getGlobalName(TSNode node, const char *source_code) {
for (uint32_t l = 0; l < ref_children; l++) {
TSNode ref_child = ts_node_child(paren_child, l);
if (strcmp(ts_node_type(ref_child), "identifier") == 0) {
uint32_t start = ts_node_start_byte(ref_child);
uint32_t end = ts_node_end_byte(ref_child);
return std::string(source_code + start, end - start);
return extractNodeText(ref_child, source_code);
}
}
}
@ -445,17 +461,13 @@ std::string getGlobalName(TSNode node, const char *source_code) {
}
// Handle simple identifiers
else if (strcmp(child_type, "identifier") == 0) {
uint32_t start = ts_node_start_byte(declarator_child);
uint32_t end = ts_node_end_byte(declarator_child);
return std::string(source_code + start, end - start);
return extractNodeText(declarator_child, source_code);
}
}
}
// Direct identifier child
else if (strcmp(type, "identifier") == 0) {
uint32_t start = ts_node_start_byte(child);
uint32_t end = ts_node_end_byte(child);
return std::string(source_code + start, end - start);
return extractNodeText(child, source_code);
}
}
return "";
@ -474,9 +486,7 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) {
uint32_t start = ts_node_start_byte(child);
uint32_t end = ts_node_end_byte(child);
std::string storage_class(source_code + start, end - start);
std::string storage_class = extractNodeText(child, source_code);
if (storage_class == "extern") {
is_extern = true;
break;
@ -496,10 +506,7 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
globals.push_back(global);
}
} else {
size_t start = ts_node_start_byte(node);
size_t end = ts_node_end_byte(node);
std::string_view src =
std::string_view(source_code + start, end - start);
std::string src = extractNodeText(node, source_code);
SPDLOG_ERROR("Failed to get global name for {}", src);
}
}
@ -569,6 +576,7 @@ int main(int argc, char *argv[]) {
std::string list_file;
std::string db_path = "gh.db";
std::string mode = "functions";
std::string log_file = "";
bool verbose = false;
app.add_option("files", input_files,
@ -581,6 +589,7 @@ int main(int argc, char *argv[]) {
->default_val("functions")
->check(CLI::IsMember({"functions", "globals"}));
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
app.add_flag("--log-file", log_file, "Enable logging to file");
CLI11_PARSE(app, argc, argv);
@ -589,6 +598,12 @@ int main(int argc, char *argv[]) {
spdlog::set_level(spdlog::level::debug);
}
if (!log_file.empty()) {
auto log_sink =
std::make_shared<spdlog::sinks::basic_file_sink_mt>(log_file, true);
spdlog::get("console")->sinks().push_back(log_sink);
}
std::vector<std::string> files_to_process;
if (!list_file.empty()) {