Compare commits

...

7 Commits

Author SHA1 Message Date
Guus Waals d7de3deb59 Update tooling ignores 2025-05-29 15:57:35 +08:00
Guus Waals 7c18d04724 Add notes 2025-05-29 15:57:22 +08:00
Guus Waals 3d40dc7e80 Update ignores 2025-05-29 15:57:01 +08:00
Guus Waals 0383ef8f13 WIP 2025-05-29 15:56:11 +08:00
Guus Waals 58397127e7 Fix setup script 2025-05-29 15:52:26 +08:00
Guus Waals 560fbe70ce Ignore refs from tooling duplicate check 2025-05-29 15:46:15 +08:00
Guus Waals db228e64ec Fix tool file categorization 2025-05-29 15:29:07 +08:00
11 changed files with 359 additions and 109 deletions

2
game_re/.gitignore vendored
View File

@ -1,5 +1,5 @@
gh_auto/*
gh_ref/*
old/
*.bak
gh_cmake_timestamp
functions.dat

View File

@ -1,64 +1,71 @@
add_executable(game_re
r3/main.cpp
r3/binders/static_mem.cxx
gh_global.cxx
)
target_compile_features(game_re PUBLIC cxx_std_20)
target_include_directories(game_re PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
set_target_properties(
game_re PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
)
if(WIN32 AND R3_32BIT)
target_link_libraries(game_re PRIVATE
binkw32
d3d8
dinput8
function(setup_target TARGET)
add_executable(${TARGET}
r3/main.cpp
r3/binders/static_mem.cxx
gh_global.cxx
)
endif()
target_compile_definitions(game_re PRIVATE
R3_GAME_DATA_DIR=\"${GAME_DATA_DIR}\"
)
target_compile_features(${TARGET} PUBLIC cxx_std_20)
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
get_filename_component(R3_DATA_SEGMENT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gh_datasegment.bin" ABSOLUTE)
target_compile_definitions(game_re PRIVATE
R3_DATA_SEGMENT_FILE=\"${R3_DATA_SEGMENT_FILE}\"
)
set_target_properties(
${TARGET} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
)
target_compile_definitions(game_re PRIVATE
_CRT_SECURE_NO_WARNINGS=1
_CRT_NONSTDC_NO_WARNINGS=1)
if(WIN32 AND R3_32BIT)
target_link_libraries(${TARGET} PRIVATE
binkw32
d3d8
dinput8
)
endif()
target_link_libraries(game_re PRIVATE spdlog)
target_compile_definitions(${TARGET} PRIVATE
R3_GAME_DATA_DIR=\"${GAME_DATA_DIR}\"
)
file(GLOB GH_AUTO_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_auto/*.cxx)
file(GLOB GH_FIX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_fix/*.cxx)
file(GLOB GH_STUB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_stub/*.cxx)
get_filename_component(R3_DATA_SEGMENT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gh_datasegment.bin" ABSOLUTE)
target_compile_definitions(${TARGET} PRIVATE
R3_DATA_SEGMENT_FILE=\"${R3_DATA_SEGMENT_FILE}\"
)
target_sources(game_re PRIVATE
${GH_AUTO_SOURCES}
${GH_FIX_SOURCES}
${GH_STUB_SOURCES}
)
target_compile_definitions(${TARGET} PRIVATE
_CRT_SECURE_NO_WARNINGS=1
_CRT_NONSTDC_NO_WARNINGS=1)
# Ignore -Wformat-security
target_compile_options(game_re PRIVATE -Wno-format-security)
target_link_libraries(${TARGET} PRIVATE spdlog)
# Ignore -Wmicrosoft-cast
target_compile_options(game_re PRIVATE -Wno-microsoft-cast)
file(GLOB GH_AUTO_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_auto/*.cxx)
file(GLOB GH_FIX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_fix/*.cxx)
file(GLOB GH_STUB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_stub/*.cxx)
# Automatically re-run CMake if any gh_*.cxx files change
# due to ghidra script runs
set_property(
DIRECTORY
APPEND
PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gh_cmake_timestamp
)
target_sources(${TARGET} PRIVATE
${GH_AUTO_SOURCES}
${GH_FIX_SOURCES}
${GH_STUB_SOURCES}
)
target_precompile_headers(game_re PRIVATE
"$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/r3/binders/auto_pch.cxx>"
)
# Ignore -Wformat-security
target_compile_options(${TARGET} PRIVATE -Wno-format-security)
# Ignore -Wmicrosoft-cast
target_compile_options(${TARGET} PRIVATE -Wno-microsoft-cast)
# Automatically re-run CMake if any gh_*.cxx files change
# due to ghidra script runs
set_property(
DIRECTORY
APPEND
PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gh_cmake_timestamp
)
target_precompile_headers(${TARGET} PRIVATE
"$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/r3/binders/auto_pch.cxx>"
)
endfunction()
setup_target(game_re)
setup_target(game_dbg)
target_compile_definitions(game_dbg PRIVATE RE_DBG_INJECTED=1)

View File

@ -6,14 +6,21 @@ set -e
pushd $script_dir
file_list=files.txt
rm -f $file_list
for dir in gh_auto gh_stub gh_fix; do
for file in $dir/*.cxx; do
echo $file >>$file_list
done
mkdir -p logs
types=(auto ref fix stub)
for type in "${types[@]}"; do
file_list="logs/files_${type}.txt"
rm -f "$file_list"
if [ -d "gh_${type}" ]; then
for file in "gh_${type}"/*.cxx; do
echo "$file" >>"$file_list"
done
$tool "@$file_list" -v --type=$type --log-file=logs/log-functions-${type}.txt
fi
done
$tool @$file_list
$tool gh_global.h -mglobals -v --log-file=logs/log-globals.txt
$tool -mduplicates -v --log-file=logs/log-duplicates.txt
popd

View File

@ -1,5 +1,6 @@
// @category _Reman3
// @menupath Reman3.Test
// @importpackage org.sqlite
import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
@ -26,15 +27,15 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class Test extends GhidraScript {
// Will rebuild all functions
public boolean rebuildAllGlobals = true;
FunctionDatabase functionDB;
import org.sqlite.JDBC;
public class Test extends GhidraScript {
@Override
public void run() throws Exception {
RecompileConfig.INSTANCE = new RecompileConfig(this);
java.sql.DriverManager.registerDriver(new JDBC());
// Example SQLite usage
testSQLite();
}
@ -80,14 +81,4 @@ public class Test extends GhidraScript {
throw e;
}
}
private void scanDirectory(File directory, FunctionDatabase.Type type) throws Exception {
File[] files = directory.listFiles((dir, name) -> name.endsWith(".cxx"));
if (files == null)
return;
for (File file : files) {
scanFile(file, type);
}
}
}

View File

@ -16,7 +16,8 @@ public class FunctionDatabase {
public enum Type {
Auto,
Fix,
Stub
Stub,
Ref
}
public class Dependency implements java.io.Serializable {

View File

@ -1,3 +1,13 @@
# Ghidra Scripts
Add this to your scripts folder and run to generate c code for all functions in the project
Add this to your ghidra scripts folder to add the reman decompile scripts to ghidra
NOTE: Make sure to also add the include `sqlite-jdbc-3.49.1.0.jar` to the ghidra scripts folders, as this is required for the ghidra scripts to read the decompile database.
## Decompile database
The decompile database is a sqlite database that contains a list of all files that implement decompiled functions and their source address. It is used to regenerate files, check for duplicates, and more.
To generate the database from the current set of files, run the scan_sources script in the /game_re folder.
Make sure you have set up the tooling by running the /tooling/setup script.

1
tooling/.gitignore vendored
View File

@ -1,5 +1,6 @@
build/
.claude/
tmps/
bin/
*.db
*.txt

177
tooling/Notes.md Normal file
View File

@ -0,0 +1,177 @@
# C++ Function/Global Parser Tool - Database Output Summary
## Overview
This tool parses C++ source files using Tree-sitter to extract function and global variable information along with their memory addresses from comments. The extracted data is stored in an SQLite database for analysis and lookup purposes.
## Database Schema
The tool creates an SQLite database (default: `gh.db`) with three main tables:
### 1. Functions Table
```sql
CREATE TABLE Functions (
filepath TEXT,
name TEXT,
address TEXT,
PRIMARY KEY (name, filepath)
);
```
**Purpose**: Stores function definitions that have function bodies (actual implementations)
- `filepath`: Source file path where the function is defined
- `name`: Function name (identifier)
- `address`: 8-character hexadecimal memory address extracted from comments
- **Primary Key**: Combination of name and filepath (allows same function name in different files)
### 2. Imports Table
```sql
CREATE TABLE Imports (
filepath TEXT,
name TEXT,
address TEXT,
PRIMARY KEY (name, filepath)
);
```
**Purpose**: Stores function declarations without bodies (imports/forward declarations)
- Same schema as Functions table
- Distinguishes between function definitions and declarations
- Useful for tracking external function references
### 3. Globals Table
```sql
CREATE TABLE Globals (
filepath TEXT,
name TEXT,
address TEXT
);
```
**Purpose**: Stores global variable declarations marked with `extern`
- `filepath`: Source file path where the global is declared
- `name`: Global variable name (identifier)
- `address`: 8-character hexadecimal memory address from comments
- **No Primary Key**: Allows duplicate global names across files
## Address Format
The tool extracts addresses from C++ comments using this regex pattern:
```regex
//\s*([0-9a-fA-F]{8})
```
**Expected Comment Format**:
```cpp
void myFunction(); // 12345678
extern int globalVar; // ABCDEF00
```
- Addresses must be exactly 8 hexadecimal characters
- Can be uppercase or lowercase
- Must be in a C++ line comment (`//`)
- Whitespace after `//` is optional
## Tool Modes
### 1. Functions Mode (`-m functions`)
- **Default mode**
- Parses C++ files for function definitions and declarations
- Populates `Functions` and `Imports` tables
- Distinguishes between functions with bodies vs. declarations only
### 2. Globals Mode (`-m globals`)
- Parses C++ files for `extern` global variable declarations
- Populates `Globals` table
- Only processes variables marked with `extern` storage class
### 3. Duplicates Mode (`-m duplicates`)
- **Analysis mode** - doesn't process files
- Checks existing database for duplicate addresses and names
- Reports conflicts across all tables
- Returns exit code 1 if duplicates found, 0 if clean
### 4. Dump-Tree Mode (`-m dump-tree`)
- **Debug mode** - doesn't use database
- Outputs Tree-sitter AST for debugging parsing issues
- Useful for understanding how the parser interprets source code
## Data Quality Checks
The tool includes built-in validation:
### Duplicate Address Detection
- Scans all tables for addresses used multiple times
- Reports format: `"DUPLICATE ADDRESS: {address} appears {count} times in: {entries}"`
- Cross-references Functions, Imports, and Globals tables
### Duplicate Name Detection
- Checks for function names appearing in multiple files
- Checks for global names appearing in multiple files
- Helps identify naming conflicts and potential issues
## Usage Examples
### Basic Function Extraction
```bash
./tool file1.cpp file2.cpp -d output.db -m functions
```
### Global Variable Extraction
```bash
./tool globals.h -d output.db -m globals
```
### Batch Processing with File List
```bash
./tool -l filelist.txt -d output.db -m functions
```
### Quality Assurance Check
```bash
./tool -d output.db -m duplicates
```
## Database Queries for Users
### Find Function by Name
```sql
SELECT * FROM Functions WHERE name = 'functionName';
SELECT * FROM Imports WHERE name = 'functionName';
```
### Find All Symbols at Address
```sql
SELECT 'Function' as type, name, filepath FROM Functions WHERE address = '12345678'
UNION ALL
SELECT 'Import' as type, name, filepath FROM Imports WHERE address = '12345678'
UNION ALL
SELECT 'Global' as type, name, filepath FROM Globals WHERE address = '12345678';
```
### List All Functions in File
```sql
SELECT name, address FROM Functions WHERE filepath = 'path/to/file.cpp'
ORDER BY name;
```
### Find Functions Without Addresses
```sql
SELECT name, filepath FROM Functions WHERE address = '' OR address IS NULL;
```
### Address Range Analysis
```sql
SELECT name, address, filepath FROM Functions
WHERE CAST(address AS INTEGER) BETWEEN 0x10000000 AND 0x20000000
ORDER BY CAST(address AS INTEGER);
```
## Integration Considerations
- **Database Format**: Standard SQLite3 - compatible with most tools and languages
- **File Paths**: Relative to the game source directory, meaning there will be gh_auto, gh_fix subfolders. (relative to the game_re folder in repo root)
- **Address Format**: Always 8-character hex strings (32 bit addresses) - pad with leading zeros if needed
- **Case Sensitivity**: Function/global names are case-sensitive as per C++ standards
- **Unicode Support**: Handles UTF-8 encoded source files
This database serves as a comprehensive symbol table for reverse engineering, debugging, and code analysis workflows.

View File

@ -3,19 +3,18 @@ set -e
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
scan_dir=tmps/gh_auto
file_list=files.txt
rm -f $file_list
for file in tmps/gh_auto/*.cxx; do
echo $file >>$file_list
done
for file in tmps/gh_stub/*.cxx; do
echo $file >>$file_list
done
for file in tmps/gh_fix/*.cxx; do
echo $file >>$file_list
types=(auto ref fix stub)
for type in "${types[@]}"; do
file_list="files_${type}.txt"
rm -f "$file_list"
if [ -d "tmps/gh_${type}" ]; then
for file in "tmps/gh_${type}"/*.cxx; do
echo "$file" >>"$file_list"
done
$tool "@$file_list" -v --type=$type --log-file=log-functions.txt
fi
done
$tool @$file_list -v --log-file=log-functions.txt
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
$tool -mduplicates -v --log-file=log-duplicates.txt

View File

@ -1,11 +1,13 @@
#!/bin/bash
script_dir=$(readlink -f $(dirname "$0"))
pushd $script_dir
# Create build directory if it doesn't exist
mkdir -p build/tmp
if [ ! -d "build/tmp" ]; then
# Configure project using CMake with Ninja generator for Release build
cmake -S . -B build/tmp -G Ninja -DCMAKE_BUILD_TYPE=Release
cmake -B build/tmp -G Ninja -DCMAKE_BUILD_TYPE=Release
fi
# Build project using Ninja
@ -21,3 +23,5 @@ if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
else
cp build/tmp/r3_gh_tool bin/
fi
popd

View File

@ -18,6 +18,38 @@ extern "C" TSLanguage *tree_sitter_cpp();
// Global address regex pattern
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
// Add enum for file types
enum class FileType { Auto, Fix, Stub, Ref };
// Helper function to convert string to FileType
FileType stringToFileType(const std::string &type_str) {
if (type_str == "auto")
return FileType::Auto;
if (type_str == "fix")
return FileType::Fix;
if (type_str == "stub")
return FileType::Stub;
if (type_str == "ref")
return FileType::Ref;
throw std::invalid_argument("Invalid file type: " + type_str);
}
// Helper function to convert FileType to string
std::string fileTypeToString(FileType type) {
switch (type) {
case FileType::Auto:
return "auto";
case FileType::Fix:
return "fix";
case FileType::Stub:
return "stub";
case FileType::Ref:
return "ref";
default:
throw std::logic_error(fmt::format("Invalid file type: {}", (int)type));
}
}
// Helper function to check if a comment contains an address
bool hasAddressPattern(const std::string &comment) {
return std::regex_search(comment, ADDRESS_REGEX);
@ -47,6 +79,7 @@ struct FunctionInfo {
std::string address;
std::string filepath;
bool is_import;
FileType type; // Add type field
};
struct GlobalInfo {
@ -81,11 +114,11 @@ public:
&delete_imports_stmt,
"Failed to prepare delete imports statement");
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
"address) VALUES (?, ?, ?)",
"address, type) VALUES (?, ?, ?, ?)",
&insert_functions_stmt,
"Failed to prepare insert functions statement");
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) "
"VALUES (?, ?, ?)",
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, "
"type) VALUES (?, ?, ?, ?)",
&insert_imports_stmt,
"Failed to prepare insert imports statement");
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
@ -128,6 +161,7 @@ public:
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 4, static_cast<int>(func.type));
sqlite3_step(stmt);
}
@ -157,8 +191,8 @@ public:
}
const char *create_tables = R"(
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
)";
@ -197,7 +231,7 @@ public:
bool checkDuplicateAddresses() {
const char *sql = R"(
WITH all_addresses AS (
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != ''
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3
UNION ALL
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
)
@ -239,6 +273,7 @@ public:
SELECT name, COUNT(*) as count,
GROUP_CONCAT(filepath, '; ') as filepaths
FROM Functions
WHERE type != 3
GROUP BY name
HAVING COUNT(*) > 1
ORDER BY name;
@ -390,7 +425,7 @@ bool hasFunctionBody(TSNode node) {
}
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
std::vector<FunctionInfo> &functions) {
std::vector<FunctionInfo> &functions, FileType file_type) {
const char *type = ts_node_type(node);
if (strcmp(type, "function_definition") == 0 ||
@ -409,7 +444,8 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
FunctionInfo func{func_name, address, "",
strcmp(type, "function_definition") == 0
? !hasFunctionBody(node)
: true};
: true,
file_type}; // Add file_type parameter
functions.push_back(func);
}
// We'll never nest function declarations
@ -422,8 +458,8 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findFunctions(ts_node_child(node, i), source_code, source_length,
functions);
findFunctions(ts_node_child(node, i), source_code, source_length, functions,
file_type);
}
}
@ -454,7 +490,8 @@ std::vector<std::string> readFileList(const std::string &list_file) {
return files;
}
bool processFile(const std::string &filepath, DatabaseManager &db) {
bool processFile(const std::string &filepath, DatabaseManager &db,
FileType file_type) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
@ -482,17 +519,18 @@ bool processFile(const std::string &filepath, DatabaseManager &db) {
std::vector<FunctionInfo> functions;
findFunctions(root_node, file_content.c_str(), file_content.length(),
functions);
functions, file_type);
for (auto &func : functions) {
func.filepath = filepath;
db.insertFunction(func);
spdlog::debug("{}: {} @ {} in {}", func.is_import ? "Import" : "Function",
func.name, func.address, filepath);
spdlog::debug("{}: {} @ {} in {} (type: {})",
func.is_import ? "Import" : "Function", func.name,
func.address, filepath, fileTypeToString(func.type));
}
spdlog::info("Processed {} functions/imports from {}", functions.size(),
filepath);
spdlog::info("Processed {} functions/imports from {} (type: {})",
functions.size(), filepath, fileTypeToString(file_type));
ts_tree_delete(tree);
ts_parser_delete(parser);
@ -741,6 +779,7 @@ int main(int argc, char *argv[]) {
std::string db_path = "gh.db";
std::string mode = "functions";
std::string log_file = "";
std::string type_str = "auto"; // Add type string variable
bool verbose = false;
app.add_option("files", input_files,
@ -755,11 +794,24 @@ int main(int argc, char *argv[]) {
->default_val("functions")
->check(
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
app.add_option("-t,--type", type_str,
"File type: 'auto', 'fix', 'stub', or 'ref'")
->default_val("auto")
->check(CLI::IsMember({"auto", "fix", "stub", "ref"}));
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
app.add_flag("--log-file", log_file, "Enable logging to file");
CLI11_PARSE(app, argc, argv);
// Convert string to FileType enum
FileType file_type;
try {
file_type = stringToFileType(type_str);
} catch (const std::invalid_argument &e) {
spdlog::error("Invalid file type: {}", type_str);
return 1;
}
// Set log level based on verbose flag
if (verbose) {
spdlog::set_level(spdlog::level::debug);
@ -830,10 +882,10 @@ int main(int argc, char *argv[]) {
db.beginTransaction();
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} ===", filepath);
spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str);
bool success = false;
if (mode == "functions") {
success = processFile(filepath, db);
success = processFile(filepath, db, file_type);
} else if (mode == "globals") {
success = processGlobalsFile(filepath, db);
}
@ -860,6 +912,7 @@ int main(int argc, char *argv[]) {
spdlog::info("=== Summary ===");
spdlog::info("Processed {} files successfully", processed_count);
spdlog::info("Mode: {}", mode);
spdlog::info("File type: {}", type_str);
if (mode != "dump-tree") {
spdlog::info("Database saved to: {}", db_path);
}