Compare commits
10 Commits
c20f540b47
...
a958b0268e
Author | SHA1 | Date |
---|---|---|
|
a958b0268e | |
|
ec5d40be0c | |
|
99aaebba82 | |
|
c9c9e9c6e6 | |
|
c7309d1b29 | |
|
4070a99bf8 | |
|
04b92cbd08 | |
|
f2ca59b3b1 | |
|
84ebde858e | |
|
c2397e8e24 |
|
@ -2,8 +2,8 @@
|
|||
path = game_re/third_party/spdlog
|
||||
url = https://github.com/gabime/spdlog.git
|
||||
[submodule "tooling2/third_party/tree-sitter"]
|
||||
path = tooling2/third_party/tree-sitter
|
||||
path = tooling/third_party/tree-sitter
|
||||
url = https://github.com/guusw/tree-sitter.git
|
||||
[submodule "tooling2/third_party/tree-sitter-cpp"]
|
||||
path = tooling2/third_party/tree-sitter-cpp
|
||||
path = tooling/third_party/tree-sitter-cpp
|
||||
url = https://github.com/guusw/tree-sitter-cpp.git
|
||||
|
|
|
@ -3,4 +3,6 @@ gh_ref/*
|
|||
*.bak
|
||||
gh_cmake_timestamp
|
||||
functions.dat
|
||||
*.zip
|
||||
*.zip
|
||||
.tmp
|
||||
.txt
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
script_dir=$(readlink -f $(dirname "$0"))
|
||||
tool=$script_dir/../tooling/bin/r3_gh_tool
|
||||
|
||||
set -e
|
||||
|
||||
pushd $script_dir
|
||||
|
||||
file_list=files.txt
|
||||
rm -f $file_list
|
||||
for dir in gh_auto gh_stub gh_fix; do
|
||||
for file in $dir/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
done
|
||||
|
||||
$tool @$file_list
|
||||
|
||||
popd
|
|
@ -1,61 +1,20 @@
|
|||
cmake_minimum_required(VERSION 3.26.4)
|
||||
project(reman3_tooling)
|
||||
|
||||
set(LLVM_PATH "" CACHE STRING "Path to the LLVM source tree")
|
||||
|
||||
set(LLVM_ADD_PATH ${LLVM_PATH}/llvm)
|
||||
if(NOT EXISTS ${LLVM_ADD_PATH}/CMakeLists.txt)
|
||||
message(FATAL_ERROR "LLVM_PATH is invalid (${LLVM_PATH})")
|
||||
endif()
|
||||
cmake_minimum_required(VERSION 4.0)
|
||||
project(ShardsSandbox)
|
||||
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
set(LLVM_INCLUDE_UTILS OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_INCLUDE_RUNTIMES OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_BUILD_RUNTIME OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_ENABLE_OCAMLDOC OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_ENABLE_BINDINGS OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_ENABLE_TELEMETRY OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "" FORCE)
|
||||
set(LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "" FORCE)
|
||||
set(LLVM_ENABLE_PROJECTS "clang" CACHE STRING "" FORCE)
|
||||
add_subdirectory(../third_party/spdlog deps/spdlog)
|
||||
add_subdirectory(third_party/tree-sitter/lib deps/tree-sitter)
|
||||
add_subdirectory(third_party/tree-sitter-cpp deps/tree-sitter-cpp)
|
||||
|
||||
set(LLVM_BINARY_DIR ${CMAKE_BINARY_DIR}/external/llvm)
|
||||
add_subdirectory(${LLVM_ADD_PATH} ${LLVM_BINARY_DIR})
|
||||
add_subdirectory(../game_re/third_party/spdlog spdlog)
|
||||
|
||||
# Add the clang tooling executable
|
||||
add_executable(clang_tool main.cpp)
|
||||
|
||||
target_include_directories(clang_tool PRIVATE
|
||||
${LLVM_PATH}/llvm/include
|
||||
${LLVM_PATH}/clang/include
|
||||
${LLVM_BINARY_DIR}/include
|
||||
${LLVM_BINARY_DIR}/tools/clang/include
|
||||
set(SQLITE_SRC third_party/sqlite)
|
||||
add_library(sqlite3
|
||||
${SQLITE_SRC}/sqlite3.c
|
||||
)
|
||||
target_include_directories(sqlite3 PUBLIC ${SQLITE_SRC})
|
||||
|
||||
# Link against clang tooling libraries
|
||||
target_link_libraries(clang_tool
|
||||
clangTooling
|
||||
clangFrontend
|
||||
clangASTMatchers
|
||||
clangAST
|
||||
clangBasic
|
||||
clangSerialization
|
||||
clangDriver
|
||||
clangParse
|
||||
clangSema
|
||||
clangAnalysis
|
||||
clangEdit
|
||||
clangRewrite
|
||||
clangLex
|
||||
spdlog::spdlog
|
||||
)
|
||||
add_library(CLI11 INTERFACE)
|
||||
target_include_directories(CLI11 INTERFACE third_party/CLI11)
|
||||
|
||||
# Include LLVM/Clang headers
|
||||
target_include_directories(clang_tool PRIVATE
|
||||
${LLVM_INCLUDE_DIRS}
|
||||
${CLANG_INCLUDE_DIRS}
|
||||
)
|
||||
add_executable(r3_gh_tool tool.cpp)
|
||||
target_link_libraries(r3_gh_tool PRIVATE spdlog::spdlog tree-sitter tree-sitter-cpp sqlite3 CLI11)
|
||||
target_compile_features(r3_gh_tool PRIVATE cxx_std_23)
|
|
@ -0,0 +1,21 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
|
||||
cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
|
||||
|
||||
scan_dir=tmps/gh_auto
|
||||
file_list=files.txt
|
||||
rm -f $file_list
|
||||
for file in tmps/gh_auto/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
for file in tmps/gh_stub/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
for file in tmps/gh_fix/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
|
||||
$tool @$file_list -v --log-file=log-functions.txt
|
||||
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
||||
$tool -mduplicates -v --log-file=log-duplicates.txt
|
122
tooling/main.cpp
122
tooling/main.cpp
|
@ -1,122 +0,0 @@
|
|||
#include "clang/AST/ASTConsumer.h"
|
||||
#include "clang/AST/RecursiveASTVisitor.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Frontend/FrontendAction.h"
|
||||
#include "clang/Tooling/CommonOptionsParser.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include <spdlog/spdlog.h>
|
||||
#include "clang/AST/ASTConcept.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace clang::tooling;
|
||||
using namespace llvm;
|
||||
|
||||
// Apply a custom category to all command-line options so that they are the
|
||||
// only ones displayed.
|
||||
static cl::OptionCategory MyToolCategory("my-tool options");
|
||||
|
||||
// CommonOptionsParser declares HelpMessage with a description of the common
|
||||
// command-line options related to the compilation database and input files.
|
||||
// It's nice to have this help message in all tools.
|
||||
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
|
||||
|
||||
// A help message for this specific tool can be added afterwards.
|
||||
static cl::extrahelp MoreHelp("\nMore help text...\n");
|
||||
|
||||
class FunctionVisitor : public RecursiveASTVisitor<FunctionVisitor> {
|
||||
private:
|
||||
ASTContext *Context;
|
||||
|
||||
public:
|
||||
explicit FunctionVisitor(ASTContext *Context) : Context(Context) {}
|
||||
|
||||
bool VisitFunctionDecl(FunctionDecl *Declaration) {
|
||||
if (Declaration->hasBody()) {
|
||||
SourceManager &SM = Context->getSourceManager();
|
||||
SourceLocation Loc = Declaration->getLocation();
|
||||
|
||||
if (SM.isInMainFile(Loc)) {
|
||||
std::string FuncName = Declaration->getNameInfo().getName().getAsString();
|
||||
unsigned Line = SM.getExpansionLineNumber(Loc);
|
||||
|
||||
spdlog::info("Found function '{}' at line {}", FuncName, Line);
|
||||
|
||||
// Count parameters
|
||||
unsigned ParamCount = Declaration->getNumParams();
|
||||
spdlog::debug(" - Function '{}' has {} parameters", FuncName, ParamCount);
|
||||
|
||||
// Check if it's a main function
|
||||
if (FuncName == "main") {
|
||||
spdlog::warn("Found main function at line {}", Line);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VisitVarDecl(VarDecl *Declaration) {
|
||||
SourceManager &SM = Context->getSourceManager();
|
||||
SourceLocation Loc = Declaration->getLocation();
|
||||
|
||||
if (SM.isInMainFile(Loc)) {
|
||||
std::string VarName = Declaration->getNameAsString();
|
||||
unsigned Line = SM.getExpansionLineNumber(Loc);
|
||||
|
||||
spdlog::debug("Found variable '{}' at line {}", VarName, Line);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionConsumer : public ASTConsumer {
|
||||
private:
|
||||
FunctionVisitor Visitor;
|
||||
|
||||
public:
|
||||
explicit FunctionConsumer(ASTContext *Context) : Visitor(Context) {}
|
||||
|
||||
void HandleTranslationUnit(ASTContext &Context) override {
|
||||
spdlog::info("Starting AST traversal...");
|
||||
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
|
||||
spdlog::info("AST traversal completed");
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionAction : public ASTFrontendAction {
|
||||
public:
|
||||
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &Compiler,
|
||||
StringRef InFile) override {
|
||||
spdlog::info("Processing file: {}", InFile.str());
|
||||
return std::make_unique<FunctionConsumer>(&Compiler.getASTContext());
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
// Set up logging
|
||||
spdlog::set_level(spdlog::level::debug);
|
||||
spdlog::info("Starting clang tooling example");
|
||||
|
||||
auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory);
|
||||
if (!ExpectedParser) {
|
||||
// Fail gracefully for unsupported options.
|
||||
llvm::errs() << ExpectedParser.takeError();
|
||||
return 1;
|
||||
}
|
||||
CommonOptionsParser &OptionsParser = ExpectedParser.get();
|
||||
ClangTool Tool(OptionsParser.getCompilations(),
|
||||
OptionsParser.getSourcePathList());
|
||||
|
||||
spdlog::info("Running tool on {} source files",
|
||||
OptionsParser.getSourcePathList().size());
|
||||
|
||||
int Result = Tool.run(newFrontendActionFactory<FunctionAction>().get());
|
||||
|
||||
if (Result == 0) {
|
||||
spdlog::info("Tool completed successfully");
|
||||
} else {
|
||||
spdlog::error("Tool failed with exit code {}", Result);
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
#include <iostream>
|
||||
|
||||
int add(int a, int b) {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
void greet(const std::string& name) {
|
||||
std::cout << "Hello, " << name << "!" << std::endl;
|
||||
}
|
||||
|
||||
int main() {
|
||||
int x = 5;
|
||||
int y = 10;
|
||||
int result = add(x, y);
|
||||
|
||||
greet("World");
|
||||
|
||||
std::cout << "Result: " << result << std::endl;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,901 @@
|
|||
#include <string>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
#include <sqlite3.h>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <tree_sitter/api.h>
|
||||
#include <spdlog/spdlog.h>
|
||||
#include <spdlog/sinks/stdout_color_sinks.h>
|
||||
#include <spdlog/sinks/basic_file_sink.h>
|
||||
#include <CLI11.hpp>
|
||||
|
||||
extern "C" TSLanguage *tree_sitter_cpp();
|
||||
|
||||
// Global address regex pattern
|
||||
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
|
||||
// Helper function to check if a comment contains an address
|
||||
bool hasAddressPattern(const std::string &comment) {
|
||||
return std::regex_search(comment, ADDRESS_REGEX);
|
||||
}
|
||||
|
||||
// Helper function to extract text from a TSNode
|
||||
std::string extractNodeText(TSNode node, const char *source_code) {
|
||||
uint32_t start = ts_node_start_byte(node);
|
||||
uint32_t end = ts_node_end_byte(node);
|
||||
return std::string(source_code + start, end - start);
|
||||
}
|
||||
|
||||
// Helper function to find first identifier in a node
|
||||
std::string findIdentifierInNode(TSNode node, const char *source_code) {
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
if (strcmp(ts_node_type(child), "identifier") == 0) {
|
||||
return extractNodeText(child, source_code);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
struct FunctionInfo {
|
||||
std::string name;
|
||||
std::string address;
|
||||
std::string filepath;
|
||||
bool is_import;
|
||||
};
|
||||
|
||||
struct GlobalInfo {
|
||||
std::string name;
|
||||
std::string address;
|
||||
std::string filepath;
|
||||
};
|
||||
|
||||
class PreparedStatements {
|
||||
private:
|
||||
sqlite3 *db;
|
||||
sqlite3_stmt *delete_functions_stmt;
|
||||
sqlite3_stmt *delete_imports_stmt;
|
||||
sqlite3_stmt *insert_functions_stmt;
|
||||
sqlite3_stmt *insert_imports_stmt;
|
||||
sqlite3_stmt *delete_globals_stmt;
|
||||
sqlite3_stmt *insert_globals_stmt;
|
||||
|
||||
void prepareStatement(const char *sql, sqlite3_stmt **stmt,
|
||||
const std::string &error_msg) {
|
||||
if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) {
|
||||
throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db));
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
PreparedStatements(sqlite3 *database) : db(database) {
|
||||
prepareStatement("DELETE FROM Functions WHERE filepath = ?",
|
||||
&delete_functions_stmt,
|
||||
"Failed to prepare delete functions statement");
|
||||
prepareStatement("DELETE FROM Imports WHERE filepath = ?",
|
||||
&delete_imports_stmt,
|
||||
"Failed to prepare delete imports statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
|
||||
"address) VALUES (?, ?, ?)",
|
||||
&insert_functions_stmt,
|
||||
"Failed to prepare insert functions statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) "
|
||||
"VALUES (?, ?, ?)",
|
||||
&insert_imports_stmt,
|
||||
"Failed to prepare insert imports statement");
|
||||
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
|
||||
&delete_globals_stmt,
|
||||
"Failed to prepare delete globals statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Globals (filepath, name, address) "
|
||||
"VALUES (?, ?, ?)",
|
||||
&insert_globals_stmt,
|
||||
"Failed to prepare insert globals statement");
|
||||
}
|
||||
|
||||
~PreparedStatements() {
|
||||
sqlite3_finalize(delete_functions_stmt);
|
||||
sqlite3_finalize(delete_imports_stmt);
|
||||
sqlite3_finalize(insert_functions_stmt);
|
||||
sqlite3_finalize(insert_imports_stmt);
|
||||
sqlite3_finalize(delete_globals_stmt);
|
||||
sqlite3_finalize(insert_globals_stmt);
|
||||
}
|
||||
|
||||
void clearEntriesForFile(const std::string &filepath) {
|
||||
for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) {
|
||||
sqlite3_reset(stmt);
|
||||
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_step(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
void clearGlobalsForFile(const std::string &filepath) {
|
||||
sqlite3_reset(delete_globals_stmt);
|
||||
sqlite3_bind_text(delete_globals_stmt, 1, filepath.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_step(delete_globals_stmt);
|
||||
}
|
||||
|
||||
void insertFunction(const FunctionInfo &func) {
|
||||
sqlite3_stmt *stmt =
|
||||
func.is_import ? insert_imports_stmt : insert_functions_stmt;
|
||||
sqlite3_reset(stmt);
|
||||
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_step(stmt);
|
||||
}
|
||||
|
||||
void insertGlobal(const GlobalInfo &global) {
|
||||
sqlite3_reset(insert_globals_stmt);
|
||||
sqlite3_bind_text(insert_globals_stmt, 1, global.filepath.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 2, global.name.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_bind_text(insert_globals_stmt, 3, global.address.c_str(), -1,
|
||||
SQLITE_STATIC);
|
||||
sqlite3_step(insert_globals_stmt);
|
||||
}
|
||||
};
|
||||
|
||||
class DatabaseManager {
|
||||
private:
|
||||
sqlite3 *db;
|
||||
std::unique_ptr<PreparedStatements> prepared_stmts;
|
||||
|
||||
public:
|
||||
DatabaseManager(const std::string &db_path) : db(nullptr) {
|
||||
if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
|
||||
spdlog::error("Can't open database: {}", sqlite3_errmsg(db));
|
||||
sqlite3_close(db);
|
||||
throw std::runtime_error("Failed to open database");
|
||||
}
|
||||
|
||||
const char *create_tables = R"(
|
||||
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
|
||||
)";
|
||||
|
||||
sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr);
|
||||
prepared_stmts = std::make_unique<PreparedStatements>(db);
|
||||
}
|
||||
|
||||
~DatabaseManager() {
|
||||
if (db)
|
||||
sqlite3_close(db);
|
||||
}
|
||||
|
||||
void clearEntriesForFile(const std::string &filepath) {
|
||||
prepared_stmts->clearEntriesForFile(filepath);
|
||||
}
|
||||
void insertFunction(const FunctionInfo &func) {
|
||||
prepared_stmts->insertFunction(func);
|
||||
}
|
||||
void clearGlobalsForFile(const std::string &filepath) {
|
||||
prepared_stmts->clearGlobalsForFile(filepath);
|
||||
}
|
||||
void insertGlobal(const GlobalInfo &global) {
|
||||
prepared_stmts->insertGlobal(global);
|
||||
}
|
||||
void beginTransaction() {
|
||||
sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr);
|
||||
}
|
||||
void commitTransaction() {
|
||||
sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
|
||||
}
|
||||
void rollbackTransaction() {
|
||||
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
||||
}
|
||||
|
||||
// New methods for duplicate checking
|
||||
bool checkDuplicateAddresses() {
|
||||
const char *sql = R"(
|
||||
WITH all_addresses AS (
|
||||
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != ''
|
||||
UNION ALL
|
||||
SELECT 'Imports' as table_name, name, address, filepath FROM Imports WHERE address != ''
|
||||
UNION ALL
|
||||
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
|
||||
)
|
||||
SELECT address, COUNT(*) as count,
|
||||
GROUP_CONCAT(table_name || ':' || name || ' (' || filepath || ')', '; ') as entries
|
||||
FROM all_addresses
|
||||
GROUP BY address
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY address;
|
||||
)";
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK) {
|
||||
spdlog::error("Failed to prepare duplicate address query: {}",
|
||||
sqlite3_errmsg(db));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool found_duplicates = false;
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *address = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *entries = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error("DUPLICATE ADDRESS: {} appears {} times in: {}", address,
|
||||
count, entries);
|
||||
}
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
return found_duplicates;
|
||||
}
|
||||
|
||||
bool checkDuplicateNames() {
|
||||
bool found_duplicates = false;
|
||||
|
||||
// Check Functions table
|
||||
const char *functions_sql = R"(
|
||||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Functions
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
)";
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
if (sqlite3_prepare_v2(db, functions_sql, -1, &stmt, nullptr) ==
|
||||
SQLITE_OK) {
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error(
|
||||
"DUPLICATE FUNCTION NAME: '{}' appears {} times in files: {}", name,
|
||||
count, filepaths);
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
// Check Imports table
|
||||
const char *imports_sql = R"(
|
||||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Imports
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
)";
|
||||
|
||||
if (sqlite3_prepare_v2(db, imports_sql, -1, &stmt, nullptr) == SQLITE_OK) {
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error(
|
||||
"DUPLICATE IMPORT NAME: '{}' appears {} times in files: {}", name,
|
||||
count, filepaths);
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
// Check Globals table
|
||||
const char *globals_sql = R"(
|
||||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Globals
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
)";
|
||||
|
||||
if (sqlite3_prepare_v2(db, globals_sql, -1, &stmt, nullptr) == SQLITE_OK) {
|
||||
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||
found_duplicates = true;
|
||||
const char *name = (const char *)sqlite3_column_text(stmt, 0);
|
||||
int count = sqlite3_column_int(stmt, 1);
|
||||
const char *filepaths = (const char *)sqlite3_column_text(stmt, 2);
|
||||
|
||||
spdlog::error(
|
||||
"DUPLICATE GLOBAL NAME: '{}' appears {} times in files: {}", name,
|
||||
count, filepaths);
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
return found_duplicates;
|
||||
}
|
||||
};
|
||||
|
||||
std::string extractAddress(const std::string &comment) {
|
||||
std::smatch match;
|
||||
return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : "";
|
||||
}
|
||||
|
||||
std::string getFunctionName(TSNode node, const char *source_code) {
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
const char *type = ts_node_type(child);
|
||||
|
||||
if (strcmp(type, "function_declarator") == 0) {
|
||||
std::string name = findIdentifierInNode(child, source_code);
|
||||
if (!name.empty())
|
||||
return name;
|
||||
} else if (strcmp(type, "identifier") == 0) {
|
||||
return extractNodeText(child, source_code);
|
||||
} else if (strcmp(type, "pointer_declarator") == 0) {
|
||||
std::string name = getFunctionName(child, source_code);
|
||||
if (!name.empty())
|
||||
return name;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string getComment(TSNode node, const char *source_code,
|
||||
uint32_t source_length, bool search_before) {
|
||||
TSNode current = node;
|
||||
|
||||
if (search_before) {
|
||||
// Look for comments before the current node
|
||||
while (!ts_node_is_null(current)) {
|
||||
TSNode prev_sibling = ts_node_prev_sibling(current);
|
||||
|
||||
while (!ts_node_is_null(prev_sibling)) {
|
||||
const char *type = ts_node_type(prev_sibling);
|
||||
|
||||
if (strcmp(type, "comment") == 0) {
|
||||
std::string comment_text = extractNodeText(prev_sibling, source_code);
|
||||
|
||||
// Check if it contains an address pattern
|
||||
if (hasAddressPattern(comment_text)) {
|
||||
return comment_text;
|
||||
}
|
||||
}
|
||||
// Skip whitespace and continue looking
|
||||
else if (strcmp(type, "ERROR") != 0) {
|
||||
// If we hit non-comment, non-whitespace content, stop searching
|
||||
break;
|
||||
}
|
||||
|
||||
prev_sibling = ts_node_prev_sibling(prev_sibling);
|
||||
}
|
||||
|
||||
// Move up to parent and continue searching
|
||||
current = ts_node_parent(current);
|
||||
}
|
||||
} else {
|
||||
// Look for comments after the current node
|
||||
TSNode next_sibling = ts_node_next_sibling(node);
|
||||
|
||||
while (!ts_node_is_null(next_sibling)) {
|
||||
const char *type = ts_node_type(next_sibling);
|
||||
|
||||
if (strcmp(type, "comment") == 0) {
|
||||
std::string comment_text = extractNodeText(next_sibling, source_code);
|
||||
|
||||
// Check if it contains an address pattern
|
||||
if (hasAddressPattern(comment_text)) {
|
||||
return comment_text;
|
||||
}
|
||||
}
|
||||
// Skip whitespace and continue looking
|
||||
else if (strcmp(type, "ERROR") != 0) {
|
||||
// If we hit non-comment, non-whitespace content, stop searching
|
||||
break;
|
||||
}
|
||||
|
||||
next_sibling = ts_node_next_sibling(next_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
bool hasFunctionBody(TSNode node) {
|
||||
if (strcmp(ts_node_type(node), "function_definition") != 0)
|
||||
return false;
|
||||
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") ==
|
||||
0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
||||
std::vector<FunctionInfo> &functions) {
|
||||
const char *type = ts_node_type(node);
|
||||
|
||||
if (strcmp(type, "function_definition") == 0 ||
|
||||
strcmp(type, "declaration") == 0) {
|
||||
std::string func_name = getFunctionName(node, source_code);
|
||||
if (!func_name.empty()) {
|
||||
std::string address =
|
||||
extractAddress(getComment(node, source_code, source_length, false));
|
||||
|
||||
if (address.empty() && strcmp(type, "function_definition") == 0) {
|
||||
address =
|
||||
extractAddress(getComment(node, source_code, source_length, true));
|
||||
}
|
||||
|
||||
if (!address.empty()) {
|
||||
FunctionInfo func{func_name, address, "",
|
||||
strcmp(type, "function_definition") == 0
|
||||
? !hasFunctionBody(node)
|
||||
: true};
|
||||
functions.push_back(func);
|
||||
}
|
||||
// We'll never nest function declarations
|
||||
return;
|
||||
} else {
|
||||
spdlog::error("Failed to get function name for {}",
|
||||
extractNodeText(node, source_code));
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
findFunctions(ts_node_child(node, i), source_code, source_length,
|
||||
functions);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> readFileList(const std::string &list_file) {
|
||||
std::vector<std::string> files;
|
||||
std::ifstream file(list_file);
|
||||
if (!file.is_open()) {
|
||||
spdlog::error("Could not open list file {}", list_file);
|
||||
return files;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
if (line.empty() || line[0] == '#')
|
||||
continue;
|
||||
|
||||
if (line.find('*') != std::string::npos) {
|
||||
spdlog::info("Skipping wildcard pattern: {}", line);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::filesystem::exists(line)) {
|
||||
files.push_back(line);
|
||||
} else {
|
||||
spdlog::warn("File not found: {}", line);
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
bool processFile(const std::string &filepath, DatabaseManager &db) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
spdlog::error("Could not open file {}", filepath);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string file_content((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
|
||||
TSParser *parser = ts_parser_new();
|
||||
ts_parser_set_language(parser, tree_sitter_cpp());
|
||||
|
||||
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
|
||||
file_content.length());
|
||||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (ts_node_is_null(root_node)) {
|
||||
spdlog::error("Failed to parse file {}", filepath);
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return false;
|
||||
}
|
||||
|
||||
db.clearEntriesForFile(filepath);
|
||||
|
||||
std::vector<FunctionInfo> functions;
|
||||
findFunctions(root_node, file_content.c_str(), file_content.length(),
|
||||
functions);
|
||||
|
||||
for (auto &func : functions) {
|
||||
func.filepath = filepath;
|
||||
db.insertFunction(func);
|
||||
spdlog::debug("{}: {} @ {} in {}", func.is_import ? "Import" : "Function",
|
||||
func.name, func.address, filepath);
|
||||
}
|
||||
|
||||
spdlog::info("Processed {} functions/imports from {}", functions.size(),
|
||||
filepath);
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helper function to recursively find identifier in any declarator
|
||||
std::string findIdentifierInDeclarator(TSNode node, const char *source_code) {
|
||||
const char *type = ts_node_type(node);
|
||||
|
||||
// If this is an identifier, return it
|
||||
if (strcmp(type, "identifier") == 0) {
|
||||
return extractNodeText(node, source_code);
|
||||
}
|
||||
|
||||
// Recursively search all children
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
std::string result = findIdentifierInDeclarator(child, source_code);
|
||||
if (!result.empty()) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string getGlobalName(TSNode node, const char *source_code) {
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
const char *type = ts_node_type(child);
|
||||
|
||||
// Look for any kind of declarator and recursively search for identifier
|
||||
if (strcmp(type, "init_declarator") == 0 ||
|
||||
strcmp(type, "declarator") == 0 ||
|
||||
strcmp(type, "reference_declarator") == 0 ||
|
||||
strcmp(type, "pointer_declarator") == 0 ||
|
||||
strcmp(type, "parenthesized_declarator") == 0 ||
|
||||
strcmp(type, "array_declarator") == 0) {
|
||||
std::string name = findIdentifierInDeclarator(child, source_code);
|
||||
if (!name.empty()) {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
// Direct identifier child
|
||||
else if (strcmp(type, "identifier") == 0) {
|
||||
return extractNodeText(child, source_code);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
|
||||
std::vector<GlobalInfo> &globals) {
|
||||
const char *type = ts_node_type(node);
|
||||
|
||||
// Look for extern declarations
|
||||
if (strcmp(type, "declaration") == 0) {
|
||||
// Check if this is an extern declaration
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
bool is_extern = false;
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) {
|
||||
std::string storage_class = extractNodeText(child, source_code);
|
||||
if (storage_class == "extern") {
|
||||
is_extern = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_extern) {
|
||||
std::string global_name = getGlobalName(node, source_code);
|
||||
if (!global_name.empty()) {
|
||||
// Look for address comment after the declaration
|
||||
std::string address =
|
||||
extractAddress(getComment(node, source_code, source_length, false));
|
||||
|
||||
if (!address.empty()) {
|
||||
GlobalInfo global{global_name, address, ""};
|
||||
globals.push_back(global);
|
||||
}
|
||||
} else {
|
||||
std::string src = extractNodeText(node, source_code);
|
||||
SPDLOG_ERROR("Failed to get global name for {}", src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively search child nodes
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
findGlobals(ts_node_child(node, i), source_code, source_length, globals);
|
||||
}
|
||||
}
|
||||
|
||||
bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
spdlog::error("Could not open file {}", filepath);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string file_content((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
|
||||
TSParser *parser = ts_parser_new();
|
||||
ts_parser_set_language(parser, tree_sitter_cpp());
|
||||
|
||||
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
|
||||
file_content.length());
|
||||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (ts_node_is_null(root_node)) {
|
||||
spdlog::error("Failed to parse file {}", filepath);
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return false;
|
||||
}
|
||||
|
||||
db.clearGlobalsForFile(filepath);
|
||||
|
||||
std::vector<GlobalInfo> globals;
|
||||
findGlobals(root_node, file_content.c_str(), file_content.length(), globals);
|
||||
|
||||
for (auto &global : globals) {
|
||||
global.filepath = filepath;
|
||||
db.insertGlobal(global);
|
||||
spdlog::debug("Global: {} @ {} in {}", global.name, global.address,
|
||||
filepath);
|
||||
}
|
||||
|
||||
spdlog::info("Processed {} globals from {}", globals.size(), filepath);
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helper function to dump Tree-sitter AST
|
||||
void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0) {
|
||||
std::string indent(depth * 2, ' ');
|
||||
const char *type = ts_node_type(node);
|
||||
|
||||
uint32_t start = ts_node_start_byte(node);
|
||||
uint32_t end = ts_node_end_byte(node);
|
||||
|
||||
// Get the text content for leaf nodes or small nodes
|
||||
std::string content;
|
||||
if (end - start < 100) { // Only show content for small nodes
|
||||
content = extractNodeText(node, source_code);
|
||||
// Replace newlines with \n for better readability
|
||||
std::regex newline_regex("\n");
|
||||
content = std::regex_replace(content, newline_regex, "\\n");
|
||||
// Truncate if still too long
|
||||
if (content.length() > 50) {
|
||||
content = content.substr(0, 47) + "...";
|
||||
}
|
||||
}
|
||||
|
||||
if (!content.empty()) {
|
||||
spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content);
|
||||
} else {
|
||||
spdlog::info("{}{}[{}:{}]", indent, type, start, end);
|
||||
}
|
||||
|
||||
// Recursively dump children
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
dumpTreeSitterAST(child, source_code, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
bool dumpTreeFile(const std::string &filepath) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
spdlog::error("Could not open file {}", filepath);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string file_content((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
|
||||
TSParser *parser = ts_parser_new();
|
||||
ts_parser_set_language(parser, tree_sitter_cpp());
|
||||
|
||||
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
|
||||
file_content.length());
|
||||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (ts_node_is_null(root_node)) {
|
||||
spdlog::error("Failed to parse file {}", filepath);
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return false;
|
||||
}
|
||||
|
||||
spdlog::info("=== Tree-sitter AST for {} ===", filepath);
|
||||
dumpTreeSitterAST(root_node, file_content.c_str());
|
||||
spdlog::info("=== End of AST dump ===");
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool processDuplicates(DatabaseManager &db) {
|
||||
spdlog::info("=== Checking for duplicate addresses ===");
|
||||
bool found_address_duplicates = db.checkDuplicateAddresses();
|
||||
|
||||
spdlog::info("=== Checking for duplicate names ===");
|
||||
bool found_name_duplicates = db.checkDuplicateNames();
|
||||
|
||||
if (!found_address_duplicates && !found_name_duplicates) {
|
||||
spdlog::info("No duplicates found in the database.");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (found_address_duplicates) {
|
||||
spdlog::error("Found duplicate addresses in the database!");
|
||||
}
|
||||
|
||||
if (found_name_duplicates) {
|
||||
spdlog::error("Found duplicate names in the database!");
|
||||
}
|
||||
|
||||
return false; // Return false to indicate errors were found
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Initialize spdlog
|
||||
auto console = spdlog::stdout_color_mt("console");
|
||||
spdlog::set_default_logger(console);
|
||||
spdlog::set_level(spdlog::level::info); // Default to info level
|
||||
spdlog::set_pattern("[%H:%M:%S] [%^%l%$] %v");
|
||||
|
||||
CLI::App app{"C++ Function/Global Parser - Extracts function addresses or "
|
||||
"global variable addresses from C++ files"};
|
||||
|
||||
std::vector<std::string> input_files;
|
||||
std::string list_file;
|
||||
std::string db_path = "gh.db";
|
||||
std::string mode = "functions";
|
||||
std::string log_file = "";
|
||||
bool verbose = false;
|
||||
|
||||
app.add_option("files", input_files,
|
||||
"Input C++ files to parse (supports @listfile.txt syntax)");
|
||||
app.add_option("-l,--list", list_file,
|
||||
"File containing list of files to process");
|
||||
app.add_option("-d,--database", db_path, "SQLite database path")
|
||||
->default_val("gh.db");
|
||||
app.add_option("-m,--mode", mode,
|
||||
"Processing mode: 'functions', 'globals', 'duplicates', or "
|
||||
"'dump-tree'")
|
||||
->default_val("functions")
|
||||
->check(
|
||||
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
|
||||
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
||||
app.add_flag("--log-file", log_file, "Enable logging to file");
|
||||
|
||||
CLI11_PARSE(app, argc, argv);
|
||||
|
||||
// Set log level based on verbose flag
|
||||
if (verbose) {
|
||||
spdlog::set_level(spdlog::level::debug);
|
||||
}
|
||||
|
||||
spdlog::set_pattern(std::string("[%^%l%$] %v"));
|
||||
|
||||
if (!log_file.empty()) {
|
||||
auto log_sink =
|
||||
std::make_shared<spdlog::sinks::basic_file_sink_mt>(log_file, true);
|
||||
spdlog::get("console")->sinks().push_back(log_sink);
|
||||
}
|
||||
|
||||
std::vector<std::string> files_to_process;
|
||||
bool needFiles = mode != "duplicates";
|
||||
|
||||
if (needFiles) {
|
||||
if (!list_file.empty()) {
|
||||
auto list_files = readFileList(list_file);
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
}
|
||||
|
||||
for (const auto &input : input_files) {
|
||||
if (input.starts_with("@")) {
|
||||
auto list_files = readFileList(input.substr(1));
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
} else if (std::filesystem::exists(input)) {
|
||||
files_to_process.push_back(input);
|
||||
} else {
|
||||
spdlog::warn("File not found: {}", input);
|
||||
}
|
||||
}
|
||||
|
||||
if (files_to_process.empty()) {
|
||||
spdlog::error("No files to process. Use --help for usage information.");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
int processed_count = 0;
|
||||
|
||||
// For dump-tree mode, we don't need database operations
|
||||
if (mode == "dump-tree") {
|
||||
for (const auto &filepath : files_to_process) {
|
||||
spdlog::info("=== Processing: {} ===", filepath);
|
||||
if (dumpTreeFile(filepath)) {
|
||||
processed_count++;
|
||||
}
|
||||
}
|
||||
} else if (mode == "duplicates") {
|
||||
DatabaseManager db(db_path);
|
||||
// For duplicates mode, we only check the database, no file processing
|
||||
spdlog::info("=== Checking database for duplicates ===");
|
||||
bool has_duplicates = !processDuplicates(db);
|
||||
spdlog::info("=== Summary ===");
|
||||
spdlog::info("Mode: {}", mode);
|
||||
spdlog::info("Database: {}", db_path);
|
||||
return has_duplicates ? 1 : 0; // Return 1 if duplicates found, 0 if none
|
||||
} else {
|
||||
DatabaseManager db(db_path);
|
||||
|
||||
const size_t batch_size = 50;
|
||||
size_t current_batch = 0;
|
||||
|
||||
db.beginTransaction();
|
||||
|
||||
for (const auto &filepath : files_to_process) {
|
||||
spdlog::info("=== Processing: {} ===", filepath);
|
||||
bool success = false;
|
||||
if (mode == "functions") {
|
||||
success = processFile(filepath, db);
|
||||
} else if (mode == "globals") {
|
||||
success = processGlobalsFile(filepath, db);
|
||||
}
|
||||
|
||||
if (success)
|
||||
processed_count++;
|
||||
|
||||
if (++current_batch >= batch_size) {
|
||||
db.commitTransaction();
|
||||
spdlog::info("Committed batch of {} files to database",
|
||||
current_batch);
|
||||
db.beginTransaction();
|
||||
current_batch = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_batch > 0) {
|
||||
db.commitTransaction();
|
||||
spdlog::info("Committed final batch of {} files to database",
|
||||
current_batch);
|
||||
}
|
||||
}
|
||||
|
||||
spdlog::info("=== Summary ===");
|
||||
spdlog::info("Processed {} files successfully", processed_count);
|
||||
spdlog::info("Mode: {}", mode);
|
||||
if (mode != "dump-tree") {
|
||||
spdlog::info("Database saved to: {}", db_path);
|
||||
}
|
||||
|
||||
} catch (const std::exception &e) {
|
||||
spdlog::error("Database error: {}", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
cmake_minimum_required(VERSION 4.0)
|
||||
project(ShardsSandbox)
|
||||
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
add_subdirectory(../third_party/spdlog deps/spdlog)
|
||||
add_subdirectory(third_party/tree-sitter/lib deps/tree-sitter)
|
||||
add_subdirectory(third_party/tree-sitter-cpp deps/tree-sitter-cpp)
|
||||
|
||||
set(SQLITE_SRC third_party/sqlite)
|
||||
add_library(sqlite3
|
||||
${SQLITE_SRC}/sqlite3.c
|
||||
)
|
||||
target_include_directories(sqlite3 PUBLIC ${SQLITE_SRC})
|
||||
|
||||
add_library(CLI11 INTERFACE)
|
||||
target_include_directories(CLI11 INTERFACE third_party/CLI11)
|
||||
|
||||
add_executable(r3_gh_tool tool.cpp)
|
||||
target_link_libraries(r3_gh_tool PRIVATE spdlog::spdlog tree-sitter tree-sitter-cpp sqlite3 CLI11)
|
||||
target_compile_features(r3_gh_tool PRIVATE cxx_std_23)
|
|
@ -1,21 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
|
||||
|
||||
scan_dir=tmps/gh_auto
|
||||
file_list=files.txt
|
||||
rm -f $file_list
|
||||
for file in tmps/gh_auto/*.cxx
|
||||
do
|
||||
echo $file >> $file_list
|
||||
done
|
||||
for file in tmps/gh_stub/*.cxx
|
||||
do
|
||||
echo $file >> $file_list
|
||||
done
|
||||
for file in tmps/gh_fix/*.cxx
|
||||
do
|
||||
echo $file >> $file_list
|
||||
done
|
||||
|
||||
$tool @$file_list
|
|
@ -1,407 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
#include <sqlite3.h>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <tree_sitter/api.h>
|
||||
#include <CLI11.hpp>
|
||||
|
||||
extern "C" TSLanguage *tree_sitter_cpp();
|
||||
|
||||
struct FunctionInfo {
|
||||
std::string name;
|
||||
std::string address;
|
||||
std::string filepath;
|
||||
bool is_import;
|
||||
};
|
||||
|
||||
class PreparedStatements {
|
||||
private:
|
||||
sqlite3 *db;
|
||||
sqlite3_stmt *delete_functions_stmt;
|
||||
sqlite3_stmt *delete_imports_stmt;
|
||||
sqlite3_stmt *insert_functions_stmt;
|
||||
sqlite3_stmt *insert_imports_stmt;
|
||||
|
||||
void prepareStatement(const char *sql, sqlite3_stmt **stmt,
|
||||
const std::string &error_msg) {
|
||||
if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) {
|
||||
throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db));
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
PreparedStatements(sqlite3 *database) : db(database) {
|
||||
prepareStatement("DELETE FROM Functions WHERE filepath = ?",
|
||||
&delete_functions_stmt,
|
||||
"Failed to prepare delete functions statement");
|
||||
prepareStatement("DELETE FROM Imports WHERE filepath = ?",
|
||||
&delete_imports_stmt,
|
||||
"Failed to prepare delete imports statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
|
||||
"address) VALUES (?, ?, ?)",
|
||||
&insert_functions_stmt,
|
||||
"Failed to prepare insert functions statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) "
|
||||
"VALUES (?, ?, ?)",
|
||||
&insert_imports_stmt,
|
||||
"Failed to prepare insert imports statement");
|
||||
}
|
||||
|
||||
~PreparedStatements() {
|
||||
sqlite3_finalize(delete_functions_stmt);
|
||||
sqlite3_finalize(delete_imports_stmt);
|
||||
sqlite3_finalize(insert_functions_stmt);
|
||||
sqlite3_finalize(insert_imports_stmt);
|
||||
}
|
||||
|
||||
void clearEntriesForFile(const std::string &filepath) {
|
||||
for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) {
|
||||
sqlite3_reset(stmt);
|
||||
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_step(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
void insertFunction(const FunctionInfo &func) {
|
||||
sqlite3_stmt *stmt =
|
||||
func.is_import ? insert_imports_stmt : insert_functions_stmt;
|
||||
sqlite3_reset(stmt);
|
||||
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_step(stmt);
|
||||
}
|
||||
};
|
||||
|
||||
class DatabaseManager {
|
||||
private:
|
||||
sqlite3 *db;
|
||||
std::unique_ptr<PreparedStatements> prepared_stmts;
|
||||
|
||||
public:
|
||||
DatabaseManager(const std::string &db_path) : db(nullptr) {
|
||||
if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
|
||||
std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl;
|
||||
sqlite3_close(db);
|
||||
throw std::runtime_error("Failed to open database");
|
||||
}
|
||||
|
||||
const char *create_tables = R"(
|
||||
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
||||
)";
|
||||
|
||||
sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr);
|
||||
prepared_stmts = std::make_unique<PreparedStatements>(db);
|
||||
}
|
||||
|
||||
~DatabaseManager() {
|
||||
if (db)
|
||||
sqlite3_close(db);
|
||||
}
|
||||
|
||||
void clearEntriesForFile(const std::string &filepath) {
|
||||
prepared_stmts->clearEntriesForFile(filepath);
|
||||
}
|
||||
void insertFunction(const FunctionInfo &func) {
|
||||
prepared_stmts->insertFunction(func);
|
||||
}
|
||||
void beginTransaction() {
|
||||
sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr);
|
||||
}
|
||||
void commitTransaction() {
|
||||
sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
|
||||
}
|
||||
void rollbackTransaction() {
|
||||
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
||||
}
|
||||
};
|
||||
|
||||
std::string extractAddress(const std::string &comment) {
|
||||
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
std::smatch match;
|
||||
return std::regex_search(comment, match, addr_regex) ? match[1].str() : "";
|
||||
}
|
||||
|
||||
std::string getFunctionName(TSNode node, const char *source_code) {
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
const char *type = ts_node_type(child);
|
||||
|
||||
if (strcmp(type, "function_declarator") == 0) {
|
||||
uint32_t declarator_children = ts_node_child_count(child);
|
||||
for (uint32_t j = 0; j < declarator_children; j++) {
|
||||
TSNode declarator_child = ts_node_child(child, j);
|
||||
if (strcmp(ts_node_type(declarator_child), "identifier") == 0) {
|
||||
uint32_t start = ts_node_start_byte(declarator_child);
|
||||
uint32_t end = ts_node_end_byte(declarator_child);
|
||||
return std::string(source_code + start, end - start);
|
||||
}
|
||||
}
|
||||
} else if (strcmp(type, "identifier") == 0) {
|
||||
uint32_t start = ts_node_start_byte(child);
|
||||
uint32_t end = ts_node_end_byte(child);
|
||||
return std::string(source_code + start, end - start);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string getComment(TSNode node, const char *source_code,
|
||||
uint32_t source_length, bool search_before) {
|
||||
if (search_before) {
|
||||
uint32_t start_byte = ts_node_start_byte(node);
|
||||
if (start_byte == 0)
|
||||
return "";
|
||||
|
||||
std::string before_text(source_code, start_byte);
|
||||
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
std::smatch match;
|
||||
|
||||
size_t search_pos = before_text.length();
|
||||
while (search_pos > 0) {
|
||||
size_t comment_pos = before_text.rfind("//", search_pos - 1);
|
||||
if (comment_pos == std::string::npos)
|
||||
break;
|
||||
|
||||
size_t line_end = before_text.find('\n', comment_pos);
|
||||
if (line_end == std::string::npos)
|
||||
line_end = before_text.length();
|
||||
|
||||
std::string comment_line =
|
||||
before_text.substr(comment_pos, line_end - comment_pos);
|
||||
|
||||
if (std::regex_search(comment_line, match, addr_regex)) {
|
||||
size_t newlines_between =
|
||||
std::count(before_text.begin() + comment_pos,
|
||||
before_text.begin() + start_byte, '\n');
|
||||
if (newlines_between <= 20)
|
||||
return comment_line;
|
||||
}
|
||||
search_pos = comment_pos;
|
||||
}
|
||||
} else {
|
||||
uint32_t end_byte = ts_node_end_byte(node);
|
||||
std::string remaining(source_code + end_byte, source_length - end_byte);
|
||||
|
||||
size_t comment_pos = remaining.find("//");
|
||||
if (comment_pos != std::string::npos) {
|
||||
size_t line_end = remaining.find('\n', comment_pos);
|
||||
if (line_end == std::string::npos)
|
||||
line_end = remaining.length();
|
||||
return remaining.substr(comment_pos, line_end - comment_pos);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool hasFunctionBody(TSNode node) {
|
||||
if (strcmp(ts_node_type(node), "function_definition") != 0)
|
||||
return false;
|
||||
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") ==
|
||||
0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
||||
std::vector<FunctionInfo> &functions) {
|
||||
const char *type = ts_node_type(node);
|
||||
|
||||
if (strcmp(type, "function_definition") == 0 ||
|
||||
strcmp(type, "declaration") == 0) {
|
||||
std::string func_name = getFunctionName(node, source_code);
|
||||
if (!func_name.empty()) {
|
||||
std::string address =
|
||||
extractAddress(getComment(node, source_code, source_length, false));
|
||||
|
||||
if (address.empty() && strcmp(type, "function_definition") == 0) {
|
||||
address =
|
||||
extractAddress(getComment(node, source_code, source_length, true));
|
||||
}
|
||||
|
||||
if (!address.empty()) {
|
||||
FunctionInfo func{func_name, address, "",
|
||||
strcmp(type, "function_definition") == 0
|
||||
? !hasFunctionBody(node)
|
||||
: true};
|
||||
functions.push_back(func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
findFunctions(ts_node_child(node, i), source_code, source_length,
|
||||
functions);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> readFileList(const std::string &list_file) {
|
||||
std::vector<std::string> files;
|
||||
std::ifstream file(list_file);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Error: Could not open list file " << list_file << std::endl;
|
||||
return files;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
if (line.empty() || line[0] == '#')
|
||||
continue;
|
||||
|
||||
if (line.find('*') != std::string::npos) {
|
||||
std::cout << "Skipping wildcard pattern: " << line << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::filesystem::exists(line)) {
|
||||
files.push_back(line);
|
||||
} else {
|
||||
std::cout << "Warning: File not found: " << line << std::endl;
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
bool processFile(const std::string &filepath, DatabaseManager &db) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Error: Could not open file " << filepath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string file_content((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
|
||||
TSParser *parser = ts_parser_new();
|
||||
ts_parser_set_language(parser, tree_sitter_cpp());
|
||||
|
||||
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
|
||||
file_content.length());
|
||||
TSNode root_node = ts_tree_root_node(tree);
|
||||
|
||||
if (ts_node_is_null(root_node)) {
|
||||
std::cerr << "Error: Failed to parse file " << filepath << std::endl;
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return false;
|
||||
}
|
||||
|
||||
db.clearEntriesForFile(filepath);
|
||||
|
||||
std::vector<FunctionInfo> functions;
|
||||
findFunctions(root_node, file_content.c_str(), file_content.length(),
|
||||
functions);
|
||||
|
||||
for (auto &func : functions) {
|
||||
func.filepath = filepath;
|
||||
db.insertFunction(func);
|
||||
std::cout << (func.is_import ? "Import: " : "Function: ") << func.name
|
||||
<< " @ " << func.address << " in " << filepath << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Processed " << functions.size() << " functions/imports from "
|
||||
<< filepath << std::endl;
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
CLI::App app{
|
||||
"C++ Function Parser - Extracts function addresses from C++ files"};
|
||||
|
||||
std::vector<std::string> input_files;
|
||||
std::string list_file;
|
||||
std::string db_path = "functions.db";
|
||||
|
||||
app.add_option("files", input_files,
|
||||
"Input C++ files to parse (supports @listfile.txt syntax)");
|
||||
app.add_option("-l,--list", list_file,
|
||||
"File containing list of files to process");
|
||||
app.add_option("-d,--database", db_path, "SQLite database path")
|
||||
->default_val("functions.db");
|
||||
|
||||
CLI11_PARSE(app, argc, argv);
|
||||
|
||||
std::vector<std::string> files_to_process;
|
||||
|
||||
if (!list_file.empty()) {
|
||||
auto list_files = readFileList(list_file);
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
}
|
||||
|
||||
for (const auto &input : input_files) {
|
||||
if (input.starts_with("@")) {
|
||||
auto list_files = readFileList(input.substr(1));
|
||||
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
||||
list_files.end());
|
||||
} else if (std::filesystem::exists(input)) {
|
||||
files_to_process.push_back(input);
|
||||
} else {
|
||||
std::cout << "Warning: File not found: " << input << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (files_to_process.empty()) {
|
||||
std::cerr << "No files to process. Use --help for usage information."
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
DatabaseManager db(db_path);
|
||||
int processed_count = 0;
|
||||
const size_t batch_size = 50;
|
||||
size_t current_batch = 0;
|
||||
|
||||
db.beginTransaction();
|
||||
|
||||
for (const auto &filepath : files_to_process) {
|
||||
std::cout << "\n=== Processing: " << filepath << " ===" << std::endl;
|
||||
if (processFile(filepath, db))
|
||||
processed_count++;
|
||||
|
||||
if (++current_batch >= batch_size) {
|
||||
db.commitTransaction();
|
||||
std::cout << "Committed batch of " << current_batch
|
||||
<< " files to database" << std::endl;
|
||||
db.beginTransaction();
|
||||
current_batch = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_batch > 0) {
|
||||
db.commitTransaction();
|
||||
std::cout << "Committed final batch of " << current_batch
|
||||
<< " files to database" << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "\n=== Summary ===" << std::endl;
|
||||
std::cout << "Processed " << processed_count << " files successfully"
|
||||
<< std::endl;
|
||||
std::cout << "Database saved to: " << db_path << std::endl;
|
||||
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Database error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue