New tool split up

This commit is contained in:
Guus Waals 2025-05-31 23:34:29 +08:00
parent af2c8c1384
commit 2c305c7a03
10 changed files with 1007 additions and 910 deletions

View File

@ -15,9 +15,18 @@ target_include_directories(sqlite3 PUBLIC ${SQLITE_SRC})
add_library(CLI11 INTERFACE)
target_include_directories(CLI11 INTERFACE third_party/CLI11)
add_executable(r3_gh_tool tool.cpp)
target_link_libraries(r3_gh_tool PRIVATE spdlog::spdlog tree-sitter tree-sitter-cpp sqlite3 CLI11)
target_compile_features(r3_gh_tool PRIVATE cxx_std_23)
add_executable(gh_tool
tool.cpp
file_processor.cpp
database.cpp
parser.cpp
cmd_scan.cpp
cmd_dump.cpp
cmd_verify.cpp
cmd_hooks.cpp
)
target_link_libraries(gh_tool PRIVATE spdlog::spdlog tree-sitter tree-sitter-cpp sqlite3 CLI11)
target_compile_features(gh_tool PRIVATE cxx_std_23)
add_executable(generate_dbg_sec generate_dbg_sec.cpp)
target_compile_features(generate_dbg_sec PRIVATE cxx_std_23)

88
tooling/cmd_dump.cpp Normal file
View File

@ -0,0 +1,88 @@
#include "tool.hpp"
#include <CLI11.hpp>
#include <tree_sitter/api.h>
// Forward declarations
extern "C" TSLanguage *tree_sitter_cpp();
static std::string filepath;
// Helper function to dump Tree-sitter AST
void dumpTreeSitterAST(TSNode node, const char *source_code, int depth) {
std::string indent(depth * 2, ' ');
const char *type = ts_node_type(node);
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
// Get the text content for leaf nodes or small nodes
std::string content;
if (end - start < 100) { // Only show content for small nodes
content = extractNodeText(node, source_code);
// Replace newlines with \n for better readability
std::regex newline_regex("\n");
content = std::regex_replace(content, newline_regex, "\\n");
// Truncate if still too long
if (content.length() > 50) {
content = content.substr(0, 47) + "...";
}
}
if (!content.empty()) {
spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content);
} else {
spdlog::info("{}{}[{}:{}]", indent, type, start, end);
}
// Recursively dump children
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
dumpTreeSitterAST(child, source_code, depth + 1);
}
}
bool dumpTreeFile(const std::string &filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
spdlog::info("=== Tree-sitter AST for {} ===", filepath);
dumpTreeSitterAST(root_node, file_content.c_str());
spdlog::info("=== End of AST dump ===");
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}
void register_cmd_dump(CLI::App &app) {
auto cmd =
app.add_subcommand("dump-tree", "Dump the tree-sitter AST for a file");
cmd->add_option("-f,--filepath", filepath,
"File to dump the tree-sitter AST for")
->required();
cmd->final_callback([]() {
spdlog::info("=== Processing: {} ===", filepath);
dumpTreeFile(filepath);
});
}

0
tooling/cmd_hooks.cpp Normal file
View File

109
tooling/cmd_scan.cpp Normal file
View File

@ -0,0 +1,109 @@
#include "tool.hpp"
#include <CLI11.hpp>
static std::vector<std::string> files;
static std::string list_file;
static std::string type_str;
FileType file_type; // Add type string variable
std::vector<std::string> getFilesToProcess() {
std::vector<std::string> files_to_process;
if (!list_file.empty()) {
auto list_files = readFileList(list_file);
files_to_process.insert(files_to_process.end(), list_files.begin(),
list_files.end());
}
for (const auto &input : files) {
if (input.starts_with("@")) {
auto list_files = readFileList(input.substr(1));
files_to_process.insert(files_to_process.end(), list_files.begin(),
list_files.end());
} else if (std::filesystem::exists(input)) {
files_to_process.push_back(input);
} else {
spdlog::warn("File not found: {}", input);
}
}
return files_to_process;
}
static void setupCommand(CLI::App &app, std::string mode) {
auto cmd = app.add_subcommand(mode, "Scan for functions and globals");
cmd->add_option("files", files,
"Input C++ files to parse (supports @listfile.txt syntax)");
cmd->add_option("-l,--list", list_file,
"File containing list of files to process");
cmd->add_option("-t,--type", type_str,
"File type: 'auto', 'fix', 'stub', or 'ref'")
->default_val("auto")
->check(CLI::IsMember({"auto", "fix", "stub", "ref"}));
cmd->final_callback([mode]() {
if (files.empty() && list_file.empty()) {
spdlog::error("No files to process. Use --help for usage information.");
exit(1);
}
// Convert string to FileType enum
try {
file_type = stringToFileType(type_str);
} catch (const std::invalid_argument &e) {
spdlog::error("Invalid file type: {}", type_str);
exit(1);
}
auto files_to_process = getFilesToProcess();
if (files_to_process.empty()) {
spdlog::error("No files to process. Use --help for usage information.");
exit(1);
}
auto &options = Options::get();
DatabaseManager db(options.db_path);
const size_t batch_size = 50;
size_t current_batch = 0;
int processed_count = 0;
db.beginTransaction();
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str);
bool success = false;
if (mode == "functions") {
success = processFile(filepath, db, file_type);
} else if (mode == "globals") {
success = processGlobalsFile(filepath, db);
}
if (success)
processed_count++;
if (++current_batch >= batch_size) {
db.commitTransaction();
spdlog::info("Committed batch of {} files to database", current_batch);
db.beginTransaction();
current_batch = 0;
}
}
if (current_batch > 0) {
db.commitTransaction();
spdlog::info("Committed final batch of {} files to database",
current_batch);
}
spdlog::info("=== Summary ===");
spdlog::info("Processed {} files successfully", processed_count);
spdlog::info("Mode: {}", mode);
spdlog::info("File type: {}", type_str);
spdlog::info("Database saved to: {}", options.db_path);
});
}
void register_cmd_scan(CLI::App &app) {
setupCommand(app, "functions");
setupCommand(app, "globals");
}

38
tooling/cmd_verify.cpp Normal file
View File

@ -0,0 +1,38 @@
#include "tool.hpp"
#include <CLI11.hpp>
bool processDuplicates(DatabaseManager &db) {
spdlog::info("=== Checking for duplicate addresses ===");
bool found_address_duplicates = db.checkDuplicateAddresses();
if (found_address_duplicates) {
spdlog::error("Found duplicate addresses in the database!");
} else {
spdlog::info("No duplicate addresses found in the database.");
}
spdlog::info("=== Checking for duplicate names ===");
bool found_name_duplicates = db.checkDuplicateNames();
if (found_name_duplicates) {
spdlog::error("Found duplicate names in the database!");
} else {
spdlog::info("No duplicate names found in the database.");
}
return !found_address_duplicates && !found_name_duplicates;
}
void register_cmd_verify(CLI::App &app) {
auto cmd = app.add_subcommand("verify", "Verify the database");
cmd->final_callback([]() {
auto &options = Options::get();
DatabaseManager db(options.db_path);
// For duplicates mode, we only check the database, no file processing
spdlog::info("=== Checking database for duplicates ===");
bool has_duplicates = !processDuplicates(db);
spdlog::info("=== Summary ===");
spdlog::info("Mode: {}", options.mode);
spdlog::info("Database: {}", options.db_path);
return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none
});
}

222
tooling/database.cpp Normal file
View File

@ -0,0 +1,222 @@
#include "tool.hpp"
#include <stdexcept>
#include <spdlog/spdlog.h>
// Database classes
class PreparedStatements {
public:
sqlite3 *db;
sqlite3_stmt *delete_functions_stmt;
sqlite3_stmt *delete_imports_stmt;
sqlite3_stmt *insert_functions_stmt;
sqlite3_stmt *insert_imports_stmt;
sqlite3_stmt *delete_globals_stmt;
sqlite3_stmt *insert_globals_stmt;
void prepareStatement(const char *sql, sqlite3_stmt **stmt,
const std::string &error_msg);
PreparedStatements(sqlite3 *database) : db(database) {
prepareStatement("DELETE FROM Functions WHERE filepath = ?",
&delete_functions_stmt,
"Failed to prepare delete functions statement");
prepareStatement("DELETE FROM Imports WHERE filepath = ?",
&delete_imports_stmt,
"Failed to prepare delete imports statement");
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
"address, type) VALUES (?, ?, ?, ?)",
&insert_functions_stmt,
"Failed to prepare insert functions statement");
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, "
"type) VALUES (?, ?, ?, ?)",
&insert_imports_stmt,
"Failed to prepare insert imports statement");
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
&delete_globals_stmt,
"Failed to prepare delete globals statement");
prepareStatement("INSERT OR REPLACE INTO Globals (filepath, name, address) "
"VALUES (?, ?, ?)",
&insert_globals_stmt,
"Failed to prepare insert globals statement");
}
~PreparedStatements() {
sqlite3_finalize(delete_functions_stmt);
sqlite3_finalize(delete_imports_stmt);
sqlite3_finalize(insert_functions_stmt);
sqlite3_finalize(insert_imports_stmt);
sqlite3_finalize(delete_globals_stmt);
sqlite3_finalize(insert_globals_stmt);
}
};
void PreparedStatements::prepareStatement(const char *sql, sqlite3_stmt **stmt,
const std::string &error_msg) {
if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) {
throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db));
}
}
DatabaseManager::DatabaseManager(const std::string &db_path) : db(nullptr) {
if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
spdlog::error("Can't open database: {}", sqlite3_errmsg(db));
sqlite3_close(db);
throw std::runtime_error("Failed to open database");
}
const char *create_tables = R"(
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
)";
sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr);
prepared_stmts = std::make_shared<PreparedStatements>(db);
}
DatabaseManager::~DatabaseManager() {
if (db)
sqlite3_close(db);
}
void DatabaseManager::clearEntriesForFile(const std::string &filepath) {
for (auto stmt : {prepared_stmts->delete_functions_stmt,
prepared_stmts->delete_imports_stmt}) {
sqlite3_reset(stmt);
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
sqlite3_step(stmt);
}
}
void DatabaseManager::clearGlobalsForFile(const std::string &filepath) {
sqlite3_reset(prepared_stmts->delete_globals_stmt);
sqlite3_bind_text(prepared_stmts->delete_globals_stmt, 1, filepath.c_str(),
-1, SQLITE_STATIC);
sqlite3_step(prepared_stmts->delete_globals_stmt);
}
void DatabaseManager::insertFunction(const FunctionInfo &func) {
sqlite3_stmt *stmt = func.is_import ? prepared_stmts->insert_imports_stmt
: prepared_stmts->insert_functions_stmt;
sqlite3_reset(stmt);
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 4, static_cast<int>(func.type));
sqlite3_step(stmt);
}
void DatabaseManager::insertGlobal(const GlobalInfo &global) {
sqlite3_reset(prepared_stmts->insert_globals_stmt);
sqlite3_bind_text(prepared_stmts->insert_globals_stmt, 1,
global.filepath.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(prepared_stmts->insert_globals_stmt, 2, global.name.c_str(),
-1, SQLITE_STATIC);
sqlite3_bind_text(prepared_stmts->insert_globals_stmt, 3,
global.address.c_str(), -1, SQLITE_STATIC);
sqlite3_step(prepared_stmts->insert_globals_stmt);
}
void DatabaseManager::beginTransaction() {
sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr);
}
void DatabaseManager::commitTransaction() {
sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
}
void DatabaseManager::rollbackTransaction() {
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
}
bool DatabaseManager::checkDuplicateAddresses() {
const char *sql = R"(
WITH all_addresses AS (
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3
UNION ALL
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
)
SELECT address, COUNT(*) as count,
GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries
FROM all_addresses
GROUP BY address
HAVING COUNT(*) > 1
ORDER BY address;
)";
sqlite3_stmt *stmt;
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) {
spdlog::error("Failed to prepare duplicate address query: {}",
sqlite3_errmsg(db));
return false;
}
bool found_duplicates = false;
while (sqlite3_step(stmt) == SQLITE_ROW) {
found_duplicates = true;
const char *address = (const char *)sqlite3_column_text(stmt, 0);
int count = sqlite3_column_int(stmt, 1);
const char *entries = (const char *)sqlite3_column_text(stmt, 2);
spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address,
count, entries);
}
sqlite3_finalize(stmt);
return found_duplicates;
}
bool DatabaseManager::checkDuplicateNames() {
bool found_duplicates = false;
// Check Functions table
const char *functions_sql = R"(
SELECT name, COUNT(*) as count,
GROUP_CONCAT(filepath, '; ') as filepaths
FROM Functions
WHERE type != 3
GROUP BY name
HAVING COUNT(*) > 1
ORDER BY name;
)";
sqlite3_stmt *stmt;
if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) == SQLITE_OK) {
while (sqlite3_step(stmt) == SQLITE_ROW) {
found_duplicates = true;
const char *name = (const char *)sqlite3_column_text(stmt, 0);
int count = sqlite3_column_int(stmt, 1);
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
spdlog::error(
"DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name,
count, filepaths);
}
sqlite3_finalize(stmt);
}
// Check Globals table
const char *globals_sql = R"(
SELECT name, COUNT(*) as count,
GROUP_CONCAT(filepath, '; ') as filepaths
FROM Globals
GROUP BY name
HAVING COUNT(*) > 1
ORDER BY name;
)";
if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) {
while (sqlite3_step(stmt) == SQLITE_ROW) {
found_duplicates = true;
const char *name = (const char *)sqlite3_column_text(stmt, 0);
int count = sqlite3_column_int(stmt, 1);
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
spdlog::error("DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}",
name, count, filepaths);
}
sqlite3_finalize(stmt);
}
return found_duplicates;
}

126
tooling/file_processor.cpp Normal file
View File

@ -0,0 +1,126 @@
#include "tool.hpp"
#include <fstream>
#include <sstream>
#include <filesystem>
#include <spdlog/spdlog.h>
#include <tree_sitter/api.h>
// Forward declarations
extern "C" TSLanguage *tree_sitter_cpp();
std::vector<std::string> readFileList(const std::string &list_file) {
std::vector<std::string> files;
std::ifstream file(list_file);
if (!file.is_open()) {
spdlog::error("Could not open list file {}", list_file);
return files;
}
std::string line;
while (std::getline(file, line)) {
if (line.empty() || line[0] == '#')
continue;
if (line.find('*') != std::string::npos) {
spdlog::info("Skipping wildcard pattern: {}", line);
continue;
}
if (std::filesystem::exists(line)) {
files.push_back(line);
} else {
spdlog::warn("File not found: {}", line);
}
}
return files;
}
bool processFile(const std::string &filepath, DatabaseManager &db,
FileType file_type) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
db.clearEntriesForFile(filepath);
std::vector<FunctionInfo> functions;
findFunctions(root_node, file_content.c_str(), file_content.length(),
functions, file_type);
for (auto &func : functions) {
func.filepath = filepath;
db.insertFunction(func);
spdlog::debug("{}: {} @ {} in {} (type: {})",
func.is_import ? "Import" : "Function", func.name,
func.address, filepath, fileTypeToString(func.type));
}
spdlog::info("Processed {} functions/imports from {} (type: {})",
functions.size(), filepath, fileTypeToString(file_type));
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}
bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
db.clearGlobalsForFile(filepath);
std::vector<GlobalInfo> globals;
findGlobals(root_node, file_content.c_str(), file_content.length(), globals);
for (auto &global : globals) {
global.filepath = filepath;
db.insertGlobal(global);
spdlog::debug("Global: {} @ {} in {}", global.name, global.address,
filepath);
}
spdlog::info("Processed {} globals from {}", globals.size(), filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}

296
tooling/parser.cpp Normal file
View File

@ -0,0 +1,296 @@
#include "tool.hpp"
#include <cstring>
#include <stdexcept>
#include <spdlog/spdlog.h>
// Global address regex pattern
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
// Helper function to convert string to FileType
FileType stringToFileType(const std::string &type_str) {
if (type_str == "auto")
return FileType::Auto;
if (type_str == "fix")
return FileType::Fix;
if (type_str == "stub")
return FileType::Stub;
if (type_str == "ref")
return FileType::Ref;
throw std::invalid_argument("Invalid file type: " + type_str);
}
// Helper function to convert FileType to string
std::string fileTypeToString(FileType type) {
switch (type) {
case FileType::Auto:
return "auto";
case FileType::Fix:
return "fix";
case FileType::Stub:
return "stub";
case FileType::Ref:
return "ref";
default:
throw std::logic_error("Invalid file type: " + std::to_string((int)type));
}
}
// Helper function to check if a comment contains an address
bool hasAddressPattern(const std::string &comment) {
return std::regex_search(comment, ADDRESS_REGEX);
}
std::string extractAddress(const std::string &comment) {
std::smatch match;
return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : "";
}
// Helper function to extract text from a TSNode
std::string extractNodeText(TSNode node, const char *source_code) {
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
return std::string(source_code + start, end - start);
}
// Helper function to find first identifier in a node
std::string findIdentifierInNode(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
// Helper function to recursively find identifier in any declarator
std::string findIdentifierInDeclarator(TSNode node, const char *source_code) {
const char *type = ts_node_type(node);
// If this is an identifier, return it
if (strcmp(type, "identifier") == 0) {
return extractNodeText(node, source_code);
}
// Recursively search all children
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
std::string result = findIdentifierInDeclarator(child, source_code);
if (!result.empty()) {
return result;
}
}
return "";
}
std::string getFunctionName(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
const char *type = ts_node_type(child);
if (strcmp(type, "function_declarator") == 0) {
std::string name = findIdentifierInNode(child, source_code);
if (!name.empty())
return name;
} else if (strcmp(type, "identifier") == 0) {
return extractNodeText(child, source_code);
} else if (strcmp(type, "pointer_declarator") == 0) {
std::string name = getFunctionName(child, source_code);
if (!name.empty())
return name;
}
}
return "";
}
std::string getGlobalName(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
const char *type = ts_node_type(child);
// Look for any kind of declarator and recursively search for identifier
if (strcmp(type, "init_declarator") == 0 ||
strcmp(type, "declarator") == 0 ||
strcmp(type, "reference_declarator") == 0 ||
strcmp(type, "pointer_declarator") == 0 ||
strcmp(type, "parenthesized_declarator") == 0 ||
strcmp(type, "array_declarator") == 0) {
std::string name = findIdentifierInDeclarator(child, source_code);
if (!name.empty()) {
return name;
}
}
// Direct identifier child
else if (strcmp(type, "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
std::string getComment(TSNode node, const char *source_code,
uint32_t source_length, bool search_before) {
TSNode current = node;
if (search_before) {
// Look for comments before the current node
while (!ts_node_is_null(current)) {
TSNode prev_sibling = ts_node_prev_sibling(current);
while (!ts_node_is_null(prev_sibling)) {
const char *type = ts_node_type(prev_sibling);
if (strcmp(type, "comment") == 0) {
std::string comment_text = extractNodeText(prev_sibling, source_code);
// Check if it contains an address pattern
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
// Skip whitespace and continue looking
else if (strcmp(type, "ERROR") != 0) {
// If we hit non-comment, non-whitespace content, stop searching
break;
}
prev_sibling = ts_node_prev_sibling(prev_sibling);
}
// Move up to parent and continue searching
current = ts_node_parent(current);
}
} else {
// Look for comments after the current node
TSNode next_sibling = ts_node_next_sibling(node);
while (!ts_node_is_null(next_sibling)) {
const char *type = ts_node_type(next_sibling);
if (strcmp(type, "comment") == 0) {
std::string comment_text = extractNodeText(next_sibling, source_code);
// Check if it contains an address pattern
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
// Skip whitespace and continue looking
else if (strcmp(type, "ERROR") != 0) {
// If we hit non-comment, non-whitespace content, stop searching
break;
}
next_sibling = ts_node_next_sibling(next_sibling);
}
}
return "";
}
bool hasFunctionBody(TSNode node) {
if (strcmp(ts_node_type(node), "function_definition") != 0)
return false;
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") ==
0) {
return true;
}
}
return false;
}
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
std::vector<FunctionInfo> &functions, FileType file_type) {
const char *type = ts_node_type(node);
if (strcmp(type, "function_definition") == 0 ||
strcmp(type, "declaration") == 0) {
std::string func_name = getFunctionName(node, source_code);
if (!func_name.empty()) {
std::string address =
extractAddress(getComment(node, source_code, source_length, false));
if (address.empty() && strcmp(type, "function_definition") == 0) {
address =
extractAddress(getComment(node, source_code, source_length, true));
}
if (!address.empty()) {
FunctionInfo func{func_name, address, "",
strcmp(type, "function_definition") == 0
? !hasFunctionBody(node)
: true,
file_type}; // Add file_type parameter
functions.push_back(func);
}
// We'll never nest function declarations
return;
} else {
spdlog::error("Failed to get function name for {}",
extractNodeText(node, source_code));
}
}
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findFunctions(ts_node_child(node, i), source_code, source_length, functions,
file_type);
}
}
void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
std::vector<GlobalInfo> &globals) {
const char *type = ts_node_type(node);
// Look for extern declarations
if (strcmp(type, "declaration") == 0) {
// Check if this is an extern declaration
uint32_t child_count = ts_node_child_count(node);
bool is_extern = false;
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) {
std::string storage_class = extractNodeText(child, source_code);
if (storage_class == "extern") {
is_extern = true;
break;
}
}
}
if (is_extern) {
std::string global_name = getGlobalName(node, source_code);
if (!global_name.empty()) {
// Look for address comment after the declaration
std::string address =
extractAddress(getComment(node, source_code, source_length, false));
if (!address.empty()) {
GlobalInfo global{global_name, address, ""};
globals.push_back(global);
}
} else {
std::string src = extractNodeText(node, source_code);
spdlog::error("Failed to get global name for {}", src);
}
return;
}
}
// Recursively search child nodes
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findGlobals(ts_node_child(node, i), source_code, source_length, globals);
}
}

View File

@ -1,926 +1,45 @@
#include <string>
#include <cstring>
#include <fstream>
#include <sstream>
#include <vector>
#include <regex>
#include <sqlite3.h>
#include "tool.hpp"
#include <filesystem>
#include <memory>
#include <tree_sitter/api.h>
#include <spdlog/spdlog.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <spdlog/sinks/basic_file_sink.h>
#include <CLI11.hpp>
extern "C" TSLanguage *tree_sitter_cpp();
// Global address regex pattern
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
// Add enum for file types
enum class FileType { Auto, Fix, Stub, Ref };
// Helper function to convert string to FileType
FileType stringToFileType(const std::string &type_str) {
if (type_str == "auto")
return FileType::Auto;
if (type_str == "fix")
return FileType::Fix;
if (type_str == "stub")
return FileType::Stub;
if (type_str == "ref")
return FileType::Ref;
throw std::invalid_argument("Invalid file type: " + type_str);
}
// Helper function to convert FileType to string
std::string fileTypeToString(FileType type) {
switch (type) {
case FileType::Auto:
return "auto";
case FileType::Fix:
return "fix";
case FileType::Stub:
return "stub";
case FileType::Ref:
return "ref";
default:
throw std::logic_error(fmt::format("Invalid file type: {}", (int)type));
}
}
// Helper function to check if a comment contains an address
bool hasAddressPattern(const std::string &comment) {
return std::regex_search(comment, ADDRESS_REGEX);
}
// Helper function to extract text from a TSNode
std::string extractNodeText(TSNode node, const char *source_code) {
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
return std::string(source_code + start, end - start);
}
// Helper function to find first identifier in a node
std::string findIdentifierInNode(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
struct FunctionInfo {
std::string name;
std::string address;
std::string filepath;
bool is_import;
FileType type; // Add type field
};
struct GlobalInfo {
std::string name;
std::string address;
std::string filepath;
};
class PreparedStatements {
private:
sqlite3 *db;
sqlite3_stmt *delete_functions_stmt;
sqlite3_stmt *delete_imports_stmt;
sqlite3_stmt *insert_functions_stmt;
sqlite3_stmt *insert_imports_stmt;
sqlite3_stmt *delete_globals_stmt;
sqlite3_stmt *insert_globals_stmt;
void prepareStatement(const char *sql, sqlite3_stmt **stmt,
const std::string &error_msg) {
if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) {
throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db));
}
}
public:
PreparedStatements(sqlite3 *database) : db(database) {
prepareStatement("DELETE FROM Functions WHERE filepath = ?",
&delete_functions_stmt,
"Failed to prepare delete functions statement");
prepareStatement("DELETE FROM Imports WHERE filepath = ?",
&delete_imports_stmt,
"Failed to prepare delete imports statement");
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
"address, type) VALUES (?, ?, ?, ?)",
&insert_functions_stmt,
"Failed to prepare insert functions statement");
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, "
"type) VALUES (?, ?, ?, ?)",
&insert_imports_stmt,
"Failed to prepare insert imports statement");
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
&delete_globals_stmt,
"Failed to prepare delete globals statement");
prepareStatement("INSERT OR REPLACE INTO Globals (filepath, name, address) "
"VALUES (?, ?, ?)",
&insert_globals_stmt,
"Failed to prepare insert globals statement");
}
~PreparedStatements() {
sqlite3_finalize(delete_functions_stmt);
sqlite3_finalize(delete_imports_stmt);
sqlite3_finalize(insert_functions_stmt);
sqlite3_finalize(insert_imports_stmt);
sqlite3_finalize(delete_globals_stmt);
sqlite3_finalize(insert_globals_stmt);
}
void clearEntriesForFile(const std::string &filepath) {
for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) {
sqlite3_reset(stmt);
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
sqlite3_step(stmt);
}
}
void clearGlobalsForFile(const std::string &filepath) {
sqlite3_reset(delete_globals_stmt);
sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1,
SQLITE_STATIC);
sqlite3_step(delete_globals_stmt);
}
void insertFunction(const FunctionInfo &func) {
sqlite3_stmt *stmt =
func.is_import ? insert_imports_stmt : insert_functions_stmt;
sqlite3_reset(stmt);
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 4, static_cast<int>(func.type));
sqlite3_step(stmt);
}
void insertGlobal(const GlobalInfo &global) {
sqlite3_reset(insert_globals_stmt);
sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1,
SQLITE_STATIC);
sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1,
SQLITE_STATIC);
sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1,
SQLITE_STATIC);
sqlite3_step(insert_globals_stmt);
}
};
class DatabaseManager {
private:
sqlite3 *db;
std::unique_ptr<PreparedStatements> prepared_stmts;
public:
DatabaseManager(const std::string &db_path) : db(nullptr) {
if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
spdlog::error("Can't open database: {}", sqlite3_errmsg(db));
sqlite3_close(db);
throw std::runtime_error("Failed to open database");
}
const char *create_tables = R"(
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
)";
sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr);
prepared_stmts = std::make_unique<PreparedStatements>(db);
}
~DatabaseManager() {
if (db)
sqlite3_close(db);
}
void clearEntriesForFile(const std::string &filepath) {
prepared_stmts->clearEntriesForFile(filepath);
}
void insertFunction(const FunctionInfo &func) {
prepared_stmts->insertFunction(func);
}
void clearGlobalsForFile(const std::string &filepath) {
prepared_stmts->clearGlobalsForFile(filepath);
}
void insertGlobal(const GlobalInfo &global) {
prepared_stmts->insertGlobal(global);
}
void beginTransaction() {
sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr);
}
void commitTransaction() {
sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
}
void rollbackTransaction() {
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
}
// New methods for duplicate checking
bool checkDuplicateAddresses() {
const char *sql = R"(
WITH all_addresses AS (
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3
UNION ALL
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
)
SELECT address, COUNT(*) as count,
GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries
FROM all_addresses
GROUP BY address
HAVING COUNT(*) > 1
ORDER BY address;
)";
sqlite3_stmt *stmt;
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) {
spdlog::error("Failed to prepare duplicate address query: {}",
sqlite3_errmsg(db));
return false;
}
bool found_duplicates = false;
while (sqlite3_step(stmt) == SQLITE_ROW) {
found_duplicates = true;
const char *address = (const char *)sqlite3_column_text(stmt, 0);
int count = sqlite3_column_int(stmt, 1);
const char *entries = (const char *)sqlite3_column_text(stmt, 2);
spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address,
count, entries);
}
sqlite3_finalize(stmt);
return found_duplicates;
}
bool checkDuplicateNames() {
bool found_duplicates = false;
// Check Functions table
const char *functions_sql = R"(
SELECT name, COUNT(*) as count,
GROUP_CONCAT(filepath, '; ') as filepaths
FROM Functions
WHERE type != 3
GROUP BY name
HAVING COUNT(*) > 1
ORDER BY name;
)";
sqlite3_stmt *stmt;
if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) ==
SQLITE_OK) {
while (sqlite3_step(stmt) == SQLITE_ROW) {
found_duplicates = true;
const char *name = (const char *)sqlite3_column_text(stmt, 0);
int count = sqlite3_column_int(stmt, 1);
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
spdlog::error(
"DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name,
count, filepaths);
}
sqlite3_finalize(stmt);
}
// Check Globals table
const char *globals_sql = R"(
SELECT name, COUNT(*) as count,
GROUP_CONCAT(filepath, '; ') as filepaths
FROM Globals
GROUP BY name
HAVING COUNT(*) > 1
ORDER BY name;
)";
if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) {
while (sqlite3_step(stmt) == SQLITE_ROW) {
found_duplicates = true;
const char *name = (const char *)sqlite3_column_text(stmt, 0);
int count = sqlite3_column_int(stmt, 1);
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
spdlog::error(
"DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", name,
count, filepaths);
}
sqlite3_finalize(stmt);
}
return found_duplicates;
}
};
std::string extractAddress(const std::string &comment) {
std::smatch match;
return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : "";
}
std::string getFunctionName(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
const char *type = ts_node_type(child);
if (strcmp(type, "function_declarator") == 0) {
std::string name = findIdentifierInNode(child, source_code);
if (!name.empty())
return name;
} else if (strcmp(type, "identifier") == 0) {
return extractNodeText(child, source_code);
} else if (strcmp(type, "pointer_declarator") == 0) {
std::string name = getFunctionName(child, source_code);
if (!name.empty())
return name;
}
}
return "";
}
std::string getComment(TSNode node, const char *source_code,
uint32_t source_length, bool search_before) {
TSNode current = node;
if (search_before) {
// Look for comments before the current node
while (!ts_node_is_null(current)) {
TSNode prev_sibling = ts_node_prev_sibling(current);
while (!ts_node_is_null(prev_sibling)) {
const char *type = ts_node_type(prev_sibling);
if (strcmp(type, "comment") == 0) {
std::string comment_text = extractNodeText(prev_sibling, source_code);
// Check if it contains an address pattern
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
// Skip whitespace and continue looking
else if (strcmp(type, "ERROR") != 0) {
// If we hit non-comment, non-whitespace content, stop searching
break;
}
prev_sibling = ts_node_prev_sibling(prev_sibling);
}
// Move up to parent and continue searching
current = ts_node_parent(current);
}
} else {
// Look for comments after the current node
TSNode next_sibling = ts_node_next_sibling(node);
while (!ts_node_is_null(next_sibling)) {
const char *type = ts_node_type(next_sibling);
if (strcmp(type, "comment") == 0) {
std::string comment_text = extractNodeText(next_sibling, source_code);
// Check if it contains an address pattern
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
// Skip whitespace and continue looking
else if (strcmp(type, "ERROR") != 0) {
// If we hit non-comment, non-whitespace content, stop searching
break;
}
next_sibling = ts_node_next_sibling(next_sibling);
}
}
return "";
}
bool hasFunctionBody(TSNode node) {
if (strcmp(ts_node_type(node), "function_definition") != 0)
return false;
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") ==
0) {
return true;
}
}
return false;
}
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
std::vector<FunctionInfo> &functions, FileType file_type) {
const char *type = ts_node_type(node);
if (strcmp(type, "function_definition") == 0 ||
strcmp(type, "declaration") == 0) {
std::string func_name = getFunctionName(node, source_code);
if (!func_name.empty()) {
std::string address =
extractAddress(getComment(node, source_code, source_length, false));
if (address.empty() && strcmp(type, "function_definition") == 0) {
address =
extractAddress(getComment(node, source_code, source_length, true));
}
if (!address.empty()) {
FunctionInfo func{func_name, address, "",
strcmp(type, "function_definition") == 0
? !hasFunctionBody(node)
: true,
file_type}; // Add file_type parameter
functions.push_back(func);
}
// We'll never nest function declarations
return;
} else {
spdlog::error("Failed to get function name for {}",
extractNodeText(node, source_code));
}
}
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findFunctions(ts_node_child(node, i), source_code, source_length, functions,
file_type);
}
}
std::vector<std::string> readFileList(const std::string &list_file) {
std::vector<std::string> files;
std::ifstream file(list_file);
if (!file.is_open()) {
spdlog::error("Could not open list file {}", list_file);
return files;
}
std::string line;
while (std::getline(file, line)) {
if (line.empty() || line[0] == '#')
continue;
if (line.find('*') != std::string::npos) {
spdlog::info("Skipping wildcard pattern: {}", line);
continue;
}
if (std::filesystem::exists(line)) {
files.push_back(line);
} else {
spdlog::warn("File not found: {}", line);
}
}
return files;
}
bool processFile(const std::string &filepath, DatabaseManager &db,
FileType file_type) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
db.clearEntriesForFile(filepath);
std::vector<FunctionInfo> functions;
findFunctions(root_node, file_content.c_str(), file_content.length(),
functions, file_type);
for (auto &func : functions) {
func.filepath = filepath;
db.insertFunction(func);
spdlog::debug("{}: {} @ {} in {} (type: {})",
func.is_import ? "Import" : "Function", func.name,
func.address, filepath, fileTypeToString(func.type));
}
spdlog::info("Processed {} functions/imports from {} (type: {})",
functions.size(), filepath, fileTypeToString(file_type));
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}
// Helper function to recursively find identifier in any declarator
std::string findIdentifierInDeclarator(TSNode node, const char *source_code) {
const char *type = ts_node_type(node);
// If this is an identifier, return it
if (strcmp(type, "identifier") == 0) {
return extractNodeText(node, source_code);
}
// Recursively search all children
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
std::string result = findIdentifierInDeclarator(child, source_code);
if (!result.empty()) {
return result;
}
}
return "";
}
std::string getGlobalName(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
const char *type = ts_node_type(child);
// Look for any kind of declarator and recursively search for identifier
if (strcmp(type, "init_declarator") == 0 ||
strcmp(type, "declarator") == 0 ||
strcmp(type, "reference_declarator") == 0 ||
strcmp(type, "pointer_declarator") == 0 ||
strcmp(type, "parenthesized_declarator") == 0 ||
strcmp(type, "array_declarator") == 0) {
std::string name = findIdentifierInDeclarator(child, source_code);
if (!name.empty()) {
return name;
}
}
// Direct identifier child
else if (strcmp(type, "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
std::vector<GlobalInfo> &globals) {
const char *type = ts_node_type(node);
// Look for extern declarations
if (strcmp(type, "declaration") == 0) {
// Check if this is an extern declaration
uint32_t child_count = ts_node_child_count(node);
bool is_extern = false;
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) {
std::string storage_class = extractNodeText(child, source_code);
if (storage_class == "extern") {
is_extern = true;
break;
}
}
}
if (is_extern) {
std::string global_name = getGlobalName(node, source_code);
if (!global_name.empty()) {
// Look for address comment after the declaration
std::string address =
extractAddress(getComment(node, source_code, source_length, false));
if (!address.empty()) {
GlobalInfo global{global_name, address, ""};
globals.push_back(global);
}
} else {
std::string src = extractNodeText(node, source_code);
SPDLOG_ERROR("Failed to get global name for {}", src);
}
return;
}
}
// Recursively search child nodes
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findGlobals(ts_node_child(node, i), source_code, source_length, globals);
}
}
bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
db.clearGlobalsForFile(filepath);
std::vector<GlobalInfo> globals;
findGlobals(root_node, file_content.c_str(), file_content.length(), globals);
for (auto &global : globals) {
global.filepath = filepath;
db.insertGlobal(global);
spdlog::debug("Global: {} @ {} in {}", global.name, global.address,
filepath);
}
spdlog::info("Processed {} globals from {}", globals.size(), filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}
// Helper function to dump Tree-sitter AST
void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0) {
std::string indent(depth * 2, ' ');
const char *type = ts_node_type(node);
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
// Get the text content for leaf nodes or small nodes
std::string content;
if (end - start < 100) { // Only show content for small nodes
content = extractNodeText(node, source_code);
// Replace newlines with \n for better readability
std::regex newline_regex("\n");
content = std::regex_replace(content, newline_regex, "\\n");
// Truncate if still too long
if (content.length() > 50) {
content = content.substr(0, 47) + "...";
}
}
if (!content.empty()) {
spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content);
} else {
spdlog::info("{}{}[{}:{}]", indent, type, start, end);
}
// Recursively dump children
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
dumpTreeSitterAST(child, source_code, depth + 1);
}
}
bool dumpTreeFile(const std::string &filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
spdlog::info("=== Tree-sitter AST for {} ===", filepath);
dumpTreeSitterAST(root_node, file_content.c_str());
spdlog::info("=== End of AST dump ===");
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}
bool processDuplicates(DatabaseManager &db) {
spdlog::info("=== Checking for duplicate addresses ===");
bool found_address_duplicates = db.checkDuplicateAddresses();
if (found_address_duplicates) {
spdlog::error("Found duplicate addresses in the database!");
} else {
spdlog::info("No duplicate addresses found in the database.");
}
spdlog::info("=== Checking for duplicate names ===");
bool found_name_duplicates = db.checkDuplicateNames();
if (found_name_duplicates) {
spdlog::error("Found duplicate names in the database!");
} else {
spdlog::info("No duplicate names found in the database.");
}
return !found_address_duplicates && !found_name_duplicates;
}
void register_cmd_scan(CLI::App &app);
void register_cmd_dump(CLI::App &app);
void register_cmd_verify(CLI::App &app);
int main(int argc, char *argv[]) {
// Initialize spdlog
auto console = spdlog::stdout_color_mt("console");
spdlog::set_default_logger(console);
spdlog::set_level(spdlog::level::info); // Default to info level
spdlog::set_pattern("[%H:%M:%S] [%^%l%$] %v");
CLI::App app{"C++ Function/Global Parser - Extracts function addresses or "
"global variable addresses from C++ files"};
std::vector<std::string> input_files;
std::string list_file;
std::string db_path = "gh.db";
std::string mode = "functions";
std::string log_file = "";
std::string type_str = "auto"; // Add type string variable
bool verbose = false;
app.add_option("files", input_files,
"Input C++ files to parse (supports @listfile.txt syntax)");
app.add_option("-l,--list", list_file,
"File containing list of files to process");
app.add_option("-d,--database", db_path, "SQLite database path")
->default_val("gh.db");
app.add_option("-m,--mode", mode,
"Processing mode: 'functions', 'globals', 'duplicates', or "
"'dump-tree'")
->default_val("functions")
->check(
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
app.add_option("-t,--type", type_str,
"File type: 'auto', 'fix', 'stub', or 'ref'")
->default_val("auto")
->check(CLI::IsMember({"auto", "fix", "stub", "ref"}));
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
app.add_flag("--log-file", log_file, "Enable logging to file");
CLI11_PARSE(app, argc, argv);
// Convert string to FileType enum
FileType file_type;
try {
file_type = stringToFileType(type_str);
} catch (const std::invalid_argument &e) {
spdlog::error("Invalid file type: {}", type_str);
return 1;
}
// Set log level based on verbose flag
if (verbose) {
spdlog::set_level(spdlog::level::debug);
}
spdlog::set_pattern(std::string("[%^%l%$] %v"));
if (!log_file.empty()) {
auto log_sink =
std::make_shared<spdlog::sinks::basic_file_sink_mt>(log_file, true);
spdlog::get("console")->sinks().push_back(log_sink);
}
CLI::App app{"Ghidra decompile database tool"};
std::vector<std::string> files_to_process;
bool needFiles = mode != "duplicates";
std::string type_str = "auto";
if (needFiles) {
if (!list_file.empty()) {
auto list_files = readFileList(list_file);
files_to_process.insert(files_to_process.end(), list_files.begin(),
list_files.end());
}
for (const auto &input : input_files) {
if (input.starts_with("@")) {
auto list_files = readFileList(input.substr(1));
files_to_process.insert(files_to_process.end(), list_files.begin(),
list_files.end());
} else if (std::filesystem::exists(input)) {
files_to_process.push_back(input);
} else {
spdlog::warn("File not found: {}", input);
}
}
if (files_to_process.empty()) {
spdlog::error("No files to process. Use --help for usage information.");
return 1;
}
}
try {
int processed_count = 0;
// For dump-tree mode, we don't need database operations
if (mode == "dump-tree") {
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} ===", filepath);
if (dumpTreeFile(filepath)) {
processed_count++;
Options &options = Options::get();
app.add_option("-d,--database", options.db_path, "SQLite database path")
->default_val("gh.db");
app.add_flag("-v,--verbose", options.verbose,
"Enable verbose logging (debug level)")
->each([&](std::string) {
if (options.verbose) {
spdlog::set_level(spdlog::level::debug);
}
}
} else if (mode == "duplicates") {
DatabaseManager db(db_path);
// For duplicates mode, we only check the database, no file processing
spdlog::info("=== Checking database for duplicates ===");
bool has_duplicates = !processDuplicates(db);
spdlog::info("=== Summary ===");
spdlog::info("Mode: {}", mode);
spdlog::info("Database: {}", db_path);
return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none
} else {
DatabaseManager db(db_path);
const size_t batch_size = 50;
size_t current_batch = 0;
db.beginTransaction();
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str);
bool success = false;
if (mode == "functions") {
success = processFile(filepath, db, file_type);
} else if (mode == "globals") {
success = processGlobalsFile(filepath, db);
}
if (success)
processed_count++;
if (++current_batch >= batch_size) {
db.commitTransaction();
spdlog::info("Committed batch of {} files to database",
current_batch);
db.beginTransaction();
current_batch = 0;
}
}
if (current_batch > 0) {
db.commitTransaction();
spdlog::info("Committed final batch of {} files to database",
current_batch);
}
}
spdlog::info("=== Summary ===");
spdlog::info("Processed {} files successfully", processed_count);
spdlog::info("Mode: {}", mode);
spdlog::info("File type: {}", type_str);
if (mode != "dump-tree") {
spdlog::info("Database saved to: {}", db_path);
}
} catch (const std::exception &e) {
spdlog::error("Database error: {}", e.what());
return 1;
}
});
app.add_flag("--log-file", options.log_file, "Enable logging to file")
->each([&](std::string) {
auto log_sink = std::make_shared<spdlog::sinks::basic_file_sink_mt>(
options.log_file, true);
console->sinks().push_back(log_sink);
});
app.require_subcommand();
register_cmd_scan(app);
register_cmd_dump(app);
register_cmd_verify(app);
CLI11_PARSE(app, argc, argv);
return 0;
}

90
tooling/tool.hpp Normal file
View File

@ -0,0 +1,90 @@
#pragma once
#include <string>
#include <vector>
#include <memory>
#include <regex>
#include <sqlite3.h>
#include <spdlog/spdlog.h>
#include <tree_sitter/api.h>
// Global constants
extern const std::regex ADDRESS_REGEX;
// Enums
enum class FileType { Auto, Fix, Stub, Ref };
// Data structures
struct FunctionInfo {
std::string name;
std::string address;
std::string filepath;
bool is_import;
FileType type;
};
struct GlobalInfo {
std::string name;
std::string address;
std::string filepath;
};
// Utility functions
FileType stringToFileType(const std::string &type_str);
std::string fileTypeToString(FileType type);
bool hasAddressPattern(const std::string &comment);
std::string extractAddress(const std::string &comment);
// Tree-sitter parsing functions
std::string extractNodeText(TSNode node, const char *source_code);
std::string findIdentifierInNode(TSNode node, const char *source_code);
std::string findIdentifierInDeclarator(TSNode node, const char *source_code);
std::string getFunctionName(TSNode node, const char *source_code);
std::string getGlobalName(TSNode node, const char *source_code);
std::string getComment(TSNode node, const char *source_code, uint32_t source_length, bool search_before);
bool hasFunctionBody(TSNode node);
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
std::vector<FunctionInfo> &functions, FileType file_type);
void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
std::vector<GlobalInfo> &globals);
void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0);
struct PreparedStatements;
class DatabaseManager {
private:
sqlite3 *db;
std::shared_ptr<PreparedStatements> prepared_stmts;
public:
DatabaseManager(const std::string &db_path);
~DatabaseManager();
void clearEntriesForFile(const std::string &filepath);
void insertFunction(const FunctionInfo &func);
void clearGlobalsForFile(const std::string &filepath);
void insertGlobal(const GlobalInfo &global);
void beginTransaction();
void commitTransaction();
void rollbackTransaction();
bool checkDuplicateAddresses();
bool checkDuplicateNames();
};
// File processing functions
std::vector<std::string> readFileList(const std::string &list_file);
bool processFile(const std::string &filepath, DatabaseManager &db, FileType file_type);
bool processGlobalsFile(const std::string &filepath, DatabaseManager &db);
bool dumpTreeFile(const std::string &filepath);
bool processDuplicates(DatabaseManager &db);
struct Options {
std::string db_path = "gh.db";
std::string mode = "functions";
std::string log_file = "";
bool verbose = false;
static Options &get() {
static Options options;
return options;
}
};