Test with globals and stuff
This commit is contained in:
parent
4070a99bf8
commit
c7309d1b29
|
@ -1,6 +1,7 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
|
||||
# cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
|
||||
|
||||
scan_dir=tmps/gh_auto
|
||||
file_list=files.txt
|
||||
|
@ -10,7 +11,7 @@ do
|
|||
echo $file >> $file_list
|
||||
done
|
||||
for file in tmps/gh_stub/*.cxx
|
||||
do
|
||||
do
|
||||
echo $file >> $file_list
|
||||
done
|
||||
for file in tmps/gh_fix/*.cxx
|
||||
|
@ -18,4 +19,4 @@ do
|
|||
echo $file >> $file_list
|
||||
done
|
||||
|
||||
$tool @$file_list
|
||||
$tool @$file_list -v
|
||||
|
|
191
tooling/tool.cpp
191
tooling/tool.cpp
|
@ -1,4 +1,3 @@
|
|||
#include <iostream>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
|
@ -10,6 +9,7 @@
|
|||
#include <memory>
|
||||
#include <tree_sitter/api.h>
|
||||
#include <spdlog/spdlog.h>
|
||||
#include <spdlog/sinks/stdout_color_sinks.h>
|
||||
#include <CLI11.hpp>
|
||||
|
||||
extern "C" TSLanguage *tree_sitter_cpp();
|
||||
|
@ -88,7 +88,8 @@ public:
|
|||
|
||||
void clearGlobalsForFile(const std::string &filepath) {
|
||||
sqlite3_reset(delete_globals_stmt);
|
||||
sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_step(delete_globals_stmt);
|
||||
}
|
||||
|
||||
|
@ -104,9 +105,12 @@ public:
|
|||
|
||||
void insertGlobal(const GlobalInfo &global) {
|
||||
sqlite3_reset(insert_globals_stmt);
|
||||
sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_step(insert_globals_stmt);
|
||||
}
|
||||
};
|
||||
|
@ -119,7 +123,7 @@ private:
|
|||
public:
|
||||
DatabaseManager(const std::string &db_path) : db(nullptr) {
|
||||
if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
|
||||
std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl;
|
||||
spdlog::error("Can't open database: {}", sqlite3_errmsg(db));
|
||||
sqlite3_close(db);
|
||||
throw std::runtime_error("Failed to open database");
|
||||
}
|
||||
|
@ -196,49 +200,67 @@ std::string getFunctionName(TSNode node, const char *source_code) {
|
|||
|
||||
std::string getComment(TSNode node, const char *source_code,
|
||||
uint32_t source_length, bool search_before) {
|
||||
TSNode current = node;
|
||||
|
||||
if (search_before) {
|
||||
uint32_t start_byte = ts_node_start_byte(node);
|
||||
if (start_byte == 0)
|
||||
return "";
|
||||
// Look for comments before the current node
|
||||
while (!ts_node_is_null(current)) {
|
||||
TSNode prev_sibling = ts_node_prev_sibling(current);
|
||||
|
||||
std::string before_text(source_code, start_byte);
|
||||
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
std::smatch match;
|
||||
while (!ts_node_is_null(prev_sibling)) {
|
||||
const char *type = ts_node_type(prev_sibling);
|
||||
|
||||
size_t search_pos = before_text.length();
|
||||
while (search_pos > 0) {
|
||||
size_t comment_pos = before_text.rfind("//", search_pos - 1);
|
||||
if (comment_pos == std::string::npos)
|
||||
break;
|
||||
if (strcmp(type, "comment") == 0) {
|
||||
uint32_t start = ts_node_start_byte(prev_sibling);
|
||||
uint32_t end = ts_node_end_byte(prev_sibling);
|
||||
std::string comment_text(source_code + start, end - start);
|
||||
|
||||
size_t line_end = before_text.find('\n', comment_pos);
|
||||
if (line_end == std::string::npos)
|
||||
line_end = before_text.length();
|
||||
// Check if it contains an address pattern
|
||||
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
if (std::regex_search(comment_text, addr_regex)) {
|
||||
return comment_text;
|
||||
}
|
||||
}
|
||||
// Skip whitespace and continue looking
|
||||
else if (strcmp(type, "ERROR") != 0) {
|
||||
// If we hit non-comment, non-whitespace content, stop searching
|
||||
break;
|
||||
}
|
||||
|
||||
std::string comment_line =
|
||||
before_text.substr(comment_pos, line_end - comment_pos);
|
||||
|
||||
if (std::regex_search(comment_line, match, addr_regex)) {
|
||||
size_t newlines_between =
|
||||
std::count(before_text.begin() + comment_pos,
|
||||
before_text.begin() + start_byte, '\n');
|
||||
if (newlines_between <= 20)
|
||||
return comment_line;
|
||||
prev_sibling = ts_node_prev_sibling(prev_sibling);
|
||||
}
|
||||
search_pos = comment_pos;
|
||||
|
||||
// Move up to parent and continue searching
|
||||
current = ts_node_parent(current);
|
||||
}
|
||||
} else {
|
||||
uint32_t end_byte = ts_node_end_byte(node);
|
||||
std::string remaining(source_code + end_byte, source_length - end_byte);
|
||||
// Look for comments after the current node
|
||||
TSNode next_sibling = ts_node_next_sibling(node);
|
||||
|
||||
size_t comment_pos = remaining.find("//");
|
||||
if (comment_pos != std::string::npos) {
|
||||
size_t line_end = remaining.find('\n', comment_pos);
|
||||
if (line_end == std::string::npos)
|
||||
line_end = remaining.length();
|
||||
return remaining.substr(comment_pos, line_end - comment_pos);
|
||||
while (!ts_node_is_null(next_sibling)) {
|
||||
const char *type = ts_node_type(next_sibling);
|
||||
|
||||
if (strcmp(type, "comment") == 0) {
|
||||
uint32_t start = ts_node_start_byte(next_sibling);
|
||||
uint32_t end = ts_node_end_byte(next_sibling);
|
||||
std::string comment_text(source_code + start, end - start);
|
||||
|
||||
// Check if it contains an address pattern
|
||||
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
if (std::regex_search(comment_text, addr_regex)) {
|
||||
return comment_text;
|
||||
}
|
||||
}
|
||||
// Skip whitespace and continue looking
|
||||
else if (strcmp(type, "ERROR") != 0) {
|
||||
// If we hit non-comment, non-whitespace content, stop searching
|
||||
break;
|
||||
}
|
||||
|
||||
next_sibling = ts_node_next_sibling(next_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
@ -293,7 +315,7 @@ std::vector<std::string> readFileList(const std::string &list_file) {
|
|||
std::vector<std::string> files;
|
||||
std::ifstream file(list_file);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Error: Could not open list file " << list_file << std::endl;
|
||||
spdlog::error("Could not open list file {}", list_file);
|
||||
return files;
|
||||
}
|
||||
|
||||
|
@ -303,14 +325,14 @@ std::vector<std::string> readFileList(const std::string &list_file) {
|
|||
continue;
|
||||
|
||||
if (line.find('*') != std::string::npos) {
|
||||
std::cout << "Skipping wildcard pattern: " << line << std::endl;
|
||||
spdlog::info("Skipping wildcard pattern: {}", line);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::filesystem::exists(line)) {
|
||||
files.push_back(line);
|
||||
} else {
|
||||
std::cout << "Warning: File not found: " << line << std::endl;
|
||||
spdlog::warn("File not found: {}", line);
|
||||
}
|
||||
}
|
||||
return files;
|
||||
|
@ -319,7 +341,7 @@ std::vector<std::string> readFileList(const std::string &list_file) {
|
|||
bool processFile(const std::string &filepath, DatabaseManager &db) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Error: Could not open file " << filepath << std::endl;
|
||||
spdlog::error("Could not open file {}", filepath);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -334,7 +356,7 @@ bool processFile(const std::string &filepath, DatabaseManager &db) {
|
|||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (ts_node_is_null(root_node)) {
|
||||
std::cerr << "Error: Failed to parse file " << filepath << std::endl;
|
||||
spdlog::error("Failed to parse file {}", filepath);
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return false;
|
||||
|
@ -349,12 +371,12 @@ bool processFile(const std::string &filepath, DatabaseManager &db) {
|
|||
for (auto &func : functions) {
|
||||
func.filepath = filepath;
|
||||
db.insertFunction(func);
|
||||
std::cout << (func.is_import ? "Import: " : "Function: ") << func.name
|
||||
<< " @ " << func.address << " in " << filepath << std::endl;
|
||||
spdlog::debug("{}: {} @ {} in {}", func.is_import ? "Import" : "Function",
|
||||
func.name, func.address, filepath);
|
||||
}
|
||||
|
||||
std::cout << "Processed " << functions.size() << " functions/imports from "
|
||||
<< filepath << std::endl;
|
||||
spdlog::info("Processed {} functions/imports from {}", functions.size(),
|
||||
filepath);
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
|
@ -368,7 +390,8 @@ std::string getGlobalName(TSNode node, const char *source_code) {
|
|||
TSNode child = ts_node_child(node, i);
|
||||
const char *type = ts_node_type(child);
|
||||
|
||||
// Handle reference declarators like "undefined& DAT_00000004" (direct child)
|
||||
// Handle reference declarators like "undefined& DAT_00000004" (direct
|
||||
// child)
|
||||
if (strcmp(type, "reference_declarator") == 0) {
|
||||
uint32_t ref_children = ts_node_child_count(child);
|
||||
for (uint32_t k = 0; k < ref_children; k++) {
|
||||
|
@ -381,7 +404,8 @@ std::string getGlobalName(TSNode node, const char *source_code) {
|
|||
}
|
||||
}
|
||||
// Look for declarator in the declaration
|
||||
else if (strcmp(type, "init_declarator") == 0 || strcmp(type, "declarator") == 0) {
|
||||
else if (strcmp(type, "init_declarator") == 0 ||
|
||||
strcmp(type, "declarator") == 0) {
|
||||
uint32_t declarator_children = ts_node_child_count(child);
|
||||
for (uint32_t j = 0; j < declarator_children; j++) {
|
||||
TSNode declarator_child = ts_node_child(child, j);
|
||||
|
@ -399,12 +423,14 @@ std::string getGlobalName(TSNode node, const char *source_code) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Handle array declarators like "char(&s_or_press_ESC_to_quit_Rayman_3__005b662c)[32]"
|
||||
// Handle array declarators like
|
||||
// "char(&s_or_press_ESC_to_quit_Rayman_3__005b662c)[32]"
|
||||
else if (strcmp(child_type, "parenthesized_declarator") == 0) {
|
||||
uint32_t paren_children = ts_node_child_count(declarator_child);
|
||||
for (uint32_t k = 0; k < paren_children; k++) {
|
||||
TSNode paren_child = ts_node_child(declarator_child, k);
|
||||
if (strcmp(ts_node_type(paren_child), "reference_declarator") == 0) {
|
||||
if (strcmp(ts_node_type(paren_child), "reference_declarator") ==
|
||||
0) {
|
||||
uint32_t ref_children = ts_node_child_count(paren_child);
|
||||
for (uint32_t l = 0; l < ref_children; l++) {
|
||||
TSNode ref_child = ts_node_child(paren_child, l);
|
||||
|
@ -462,7 +488,8 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
|
|||
std::string global_name = getGlobalName(node, source_code);
|
||||
if (!global_name.empty()) {
|
||||
// Look for address comment after the declaration
|
||||
std::string address = extractAddress(getComment(node, source_code, source_length, false));
|
||||
std::string address =
|
||||
extractAddress(getComment(node, source_code, source_length, false));
|
||||
|
||||
if (!address.empty()) {
|
||||
GlobalInfo global{global_name, address, ""};
|
||||
|
@ -471,7 +498,8 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
|
|||
} else {
|
||||
size_t start = ts_node_start_byte(node);
|
||||
size_t end = ts_node_end_byte(node);
|
||||
std::string_view src = std::string_view(source_code + start, end - start);
|
||||
std::string_view src =
|
||||
std::string_view(source_code + start, end - start);
|
||||
SPDLOG_ERROR("Failed to get global name for {}", src);
|
||||
}
|
||||
}
|
||||
|
@ -487,7 +515,7 @@ void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
|
|||
bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Error: Could not open file " << filepath << std::endl;
|
||||
spdlog::error("Could not open file {}", filepath);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -502,7 +530,7 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
|
|||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (ts_node_is_null(root_node)) {
|
||||
std::cerr << "Error: Failed to parse file " << filepath << std::endl;
|
||||
spdlog::error("Failed to parse file {}", filepath);
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return false;
|
||||
|
@ -516,12 +544,11 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
|
|||
for (auto &global : globals) {
|
||||
global.filepath = filepath;
|
||||
db.insertGlobal(global);
|
||||
std::cout << "Global: " << global.name << " @ " << global.address
|
||||
<< " in " << filepath << std::endl;
|
||||
spdlog::debug("Global: {} @ {} in {}", global.name, global.address,
|
||||
filepath);
|
||||
}
|
||||
|
||||
std::cout << "Processed " << globals.size() << " globals from "
|
||||
<< filepath << std::endl;
|
||||
spdlog::info("Processed {} globals from {}", globals.size(), filepath);
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
|
@ -529,26 +556,39 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
|
|||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
CLI::App app{
|
||||
"C++ Function/Global Parser - Extracts function addresses or global variable addresses from C++ files"};
|
||||
// Initialize spdlog
|
||||
auto console = spdlog::stdout_color_mt("console");
|
||||
spdlog::set_default_logger(console);
|
||||
spdlog::set_level(spdlog::level::info); // Default to info level
|
||||
spdlog::set_pattern("[%H:%M:%S] [%^%l%$] %v");
|
||||
|
||||
CLI::App app{"C++ Function/Global Parser - Extracts function addresses or "
|
||||
"global variable addresses from C++ files"};
|
||||
|
||||
std::vector<std::string> input_files;
|
||||
std::string list_file;
|
||||
std::string db_path = "functions.db";
|
||||
std::string db_path = "gh.db";
|
||||
std::string mode = "functions";
|
||||
bool verbose = false;
|
||||
|
||||
app.add_option("files", input_files,
|
||||
"Input C++ files to parse (supports @listfile.txt syntax)");
|
||||
app.add_option("-l,--list", list_file,
|
||||
"File containing list of files to process");
|
||||
app.add_option("-d,--database", db_path, "SQLite database path")
|
||||
->default_val("functions.db");
|
||||
->default_val("gh.db");
|
||||
app.add_option("-m,--mode", mode, "Processing mode: 'functions' or 'globals'")
|
||||
->default_val("functions")
|
||||
->check(CLI::IsMember({"functions", "globals"}));
|
||||
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
||||
|
||||
CLI11_PARSE(app, argc, argv);
|
||||
|
||||
// Set log level based on verbose flag
|
||||
if (verbose) {
|
||||
spdlog::set_level(spdlog::level::debug);
|
||||
}
|
||||
|
||||
std::vector<std::string> files_to_process;
|
||||
|
||||
if (!list_file.empty()) {
|
||||
|
@ -565,13 +605,12 @@ int main(int argc, char *argv[]) {
|
|||
} else if (std::filesystem::exists(input)) {
|
||||
files_to_process.push_back(input);
|
||||
} else {
|
||||
std::cout << "Warning: File not found: " << input << std::endl;
|
||||
spdlog::warn("File not found: {}", input);
|
||||
}
|
||||
}
|
||||
|
||||
if (files_to_process.empty()) {
|
||||
std::cerr << "No files to process. Use --help for usage information."
|
||||
<< std::endl;
|
||||
spdlog::error("No files to process. Use --help for usage information.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -584,7 +623,7 @@ int main(int argc, char *argv[]) {
|
|||
db.beginTransaction();
|
||||
|
||||
for (const auto &filepath : files_to_process) {
|
||||
std::cout << "\n=== Processing: " << filepath << " ===" << std::endl;
|
||||
spdlog::info("=== Processing: {} ===", filepath);
|
||||
bool success = false;
|
||||
if (mode == "functions") {
|
||||
success = processFile(filepath, db);
|
||||
|
@ -597,8 +636,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
if (++current_batch >= batch_size) {
|
||||
db.commitTransaction();
|
||||
std::cout << "Committed batch of " << current_batch
|
||||
<< " files to database" << std::endl;
|
||||
spdlog::info("Committed batch of {} files to database", current_batch);
|
||||
db.beginTransaction();
|
||||
current_batch = 0;
|
||||
}
|
||||
|
@ -606,18 +644,17 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
if (current_batch > 0) {
|
||||
db.commitTransaction();
|
||||
std::cout << "Committed final batch of " << current_batch
|
||||
<< " files to database" << std::endl;
|
||||
spdlog::info("Committed final batch of {} files to database",
|
||||
current_batch);
|
||||
}
|
||||
|
||||
std::cout << "\n=== Summary ===" << std::endl;
|
||||
std::cout << "Processed " << processed_count << " files successfully"
|
||||
<< std::endl;
|
||||
std::cout << "Mode: " << mode << std::endl;
|
||||
std::cout << "Database saved to: " << db_path << std::endl;
|
||||
spdlog::info("=== Summary ===");
|
||||
spdlog::info("Processed {} files successfully", processed_count);
|
||||
spdlog::info("Mode: {}", mode);
|
||||
spdlog::info("Database saved to: {}", db_path);
|
||||
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Database error: " << e.what() << std::endl;
|
||||
spdlog::error("Database error: {}", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue