Compare commits
7 Commits
836e281b80
...
d7de3deb59
Author | SHA1 | Date |
---|---|---|
|
d7de3deb59 | |
|
7c18d04724 | |
|
3d40dc7e80 | |
|
0383ef8f13 | |
|
58397127e7 | |
|
560fbe70ce | |
|
db228e64ec |
|
@ -1,5 +1,5 @@
|
||||||
gh_auto/*
|
|
||||||
gh_ref/*
|
gh_ref/*
|
||||||
|
old/
|
||||||
*.bak
|
*.bak
|
||||||
gh_cmake_timestamp
|
gh_cmake_timestamp
|
||||||
functions.dat
|
functions.dat
|
||||||
|
|
|
@ -1,64 +1,71 @@
|
||||||
add_executable(game_re
|
function(setup_target TARGET)
|
||||||
r3/main.cpp
|
add_executable(${TARGET}
|
||||||
r3/binders/static_mem.cxx
|
r3/main.cpp
|
||||||
gh_global.cxx
|
r3/binders/static_mem.cxx
|
||||||
)
|
gh_global.cxx
|
||||||
|
|
||||||
target_compile_features(game_re PUBLIC cxx_std_20)
|
|
||||||
target_include_directories(game_re PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
set_target_properties(
|
|
||||||
game_re PROPERTIES
|
|
||||||
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
|
|
||||||
)
|
|
||||||
|
|
||||||
if(WIN32 AND R3_32BIT)
|
|
||||||
target_link_libraries(game_re PRIVATE
|
|
||||||
binkw32
|
|
||||||
d3d8
|
|
||||||
dinput8
|
|
||||||
)
|
)
|
||||||
endif()
|
|
||||||
|
|
||||||
target_compile_definitions(game_re PRIVATE
|
target_compile_features(${TARGET} PUBLIC cxx_std_20)
|
||||||
R3_GAME_DATA_DIR=\"${GAME_DATA_DIR}\"
|
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
)
|
|
||||||
|
|
||||||
get_filename_component(R3_DATA_SEGMENT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gh_datasegment.bin" ABSOLUTE)
|
set_target_properties(
|
||||||
target_compile_definitions(game_re PRIVATE
|
${TARGET} PROPERTIES
|
||||||
R3_DATA_SEGMENT_FILE=\"${R3_DATA_SEGMENT_FILE}\"
|
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
|
||||||
)
|
)
|
||||||
|
|
||||||
target_compile_definitions(game_re PRIVATE
|
if(WIN32 AND R3_32BIT)
|
||||||
_CRT_SECURE_NO_WARNINGS=1
|
target_link_libraries(${TARGET} PRIVATE
|
||||||
_CRT_NONSTDC_NO_WARNINGS=1)
|
binkw32
|
||||||
|
d3d8
|
||||||
|
dinput8
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
target_link_libraries(game_re PRIVATE spdlog)
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
|
R3_GAME_DATA_DIR=\"${GAME_DATA_DIR}\"
|
||||||
|
)
|
||||||
|
|
||||||
file(GLOB GH_AUTO_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_auto/*.cxx)
|
get_filename_component(R3_DATA_SEGMENT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gh_datasegment.bin" ABSOLUTE)
|
||||||
file(GLOB GH_FIX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_fix/*.cxx)
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
file(GLOB GH_STUB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_stub/*.cxx)
|
R3_DATA_SEGMENT_FILE=\"${R3_DATA_SEGMENT_FILE}\"
|
||||||
|
)
|
||||||
|
|
||||||
target_sources(game_re PRIVATE
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
${GH_AUTO_SOURCES}
|
_CRT_SECURE_NO_WARNINGS=1
|
||||||
${GH_FIX_SOURCES}
|
_CRT_NONSTDC_NO_WARNINGS=1)
|
||||||
${GH_STUB_SOURCES}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Ignore -Wformat-security
|
target_link_libraries(${TARGET} PRIVATE spdlog)
|
||||||
target_compile_options(game_re PRIVATE -Wno-format-security)
|
|
||||||
|
|
||||||
# Ignore -Wmicrosoft-cast
|
file(GLOB GH_AUTO_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_auto/*.cxx)
|
||||||
target_compile_options(game_re PRIVATE -Wno-microsoft-cast)
|
file(GLOB GH_FIX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_fix/*.cxx)
|
||||||
|
file(GLOB GH_STUB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_stub/*.cxx)
|
||||||
|
|
||||||
# Automatically re-run CMake if any gh_*.cxx files change
|
target_sources(${TARGET} PRIVATE
|
||||||
# due to ghidra script runs
|
${GH_AUTO_SOURCES}
|
||||||
set_property(
|
${GH_FIX_SOURCES}
|
||||||
DIRECTORY
|
${GH_STUB_SOURCES}
|
||||||
APPEND
|
)
|
||||||
PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gh_cmake_timestamp
|
|
||||||
)
|
|
||||||
|
|
||||||
target_precompile_headers(game_re PRIVATE
|
# Ignore -Wformat-security
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/r3/binders/auto_pch.cxx>"
|
target_compile_options(${TARGET} PRIVATE -Wno-format-security)
|
||||||
)
|
|
||||||
|
# Ignore -Wmicrosoft-cast
|
||||||
|
target_compile_options(${TARGET} PRIVATE -Wno-microsoft-cast)
|
||||||
|
|
||||||
|
# Automatically re-run CMake if any gh_*.cxx files change
|
||||||
|
# due to ghidra script runs
|
||||||
|
set_property(
|
||||||
|
DIRECTORY
|
||||||
|
APPEND
|
||||||
|
PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gh_cmake_timestamp
|
||||||
|
)
|
||||||
|
|
||||||
|
target_precompile_headers(${TARGET} PRIVATE
|
||||||
|
"$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/r3/binders/auto_pch.cxx>"
|
||||||
|
)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
setup_target(game_re)
|
||||||
|
|
||||||
|
setup_target(game_dbg)
|
||||||
|
target_compile_definitions(game_dbg PRIVATE RE_DBG_INJECTED=1)
|
||||||
|
|
|
@ -6,14 +6,21 @@ set -e
|
||||||
|
|
||||||
pushd $script_dir
|
pushd $script_dir
|
||||||
|
|
||||||
file_list=files.txt
|
mkdir -p logs
|
||||||
rm -f $file_list
|
types=(auto ref fix stub)
|
||||||
for dir in gh_auto gh_stub gh_fix; do
|
for type in "${types[@]}"; do
|
||||||
for file in $dir/*.cxx; do
|
file_list="logs/files_${type}.txt"
|
||||||
echo $file >>$file_list
|
rm -f "$file_list"
|
||||||
done
|
|
||||||
|
if [ -d "gh_${type}" ]; then
|
||||||
|
for file in "gh_${type}"/*.cxx; do
|
||||||
|
echo "$file" >>"$file_list"
|
||||||
|
done
|
||||||
|
$tool "@$file_list" -v --type=$type --log-file=logs/log-functions-${type}.txt
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
$tool @$file_list
|
$tool gh_global.h -mglobals -v --log-file=logs/log-globals.txt
|
||||||
|
$tool -mduplicates -v --log-file=logs/log-duplicates.txt
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
// @category _Reman3
|
// @category _Reman3
|
||||||
// @menupath Reman3.Test
|
// @menupath Reman3.Test
|
||||||
|
// @importpackage org.sqlite
|
||||||
|
|
||||||
import ghidra.app.script.GhidraScript;
|
import ghidra.app.script.GhidraScript;
|
||||||
import ghidra.program.model.address.Address;
|
import ghidra.program.model.address.Address;
|
||||||
|
@ -26,15 +27,15 @@ import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
|
|
||||||
public class Test extends GhidraScript {
|
import org.sqlite.JDBC;
|
||||||
// Will rebuild all functions
|
|
||||||
public boolean rebuildAllGlobals = true;
|
|
||||||
FunctionDatabase functionDB;
|
|
||||||
|
|
||||||
|
public class Test extends GhidraScript {
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
RecompileConfig.INSTANCE = new RecompileConfig(this);
|
RecompileConfig.INSTANCE = new RecompileConfig(this);
|
||||||
|
|
||||||
|
java.sql.DriverManager.registerDriver(new JDBC());
|
||||||
|
|
||||||
// Example SQLite usage
|
// Example SQLite usage
|
||||||
testSQLite();
|
testSQLite();
|
||||||
}
|
}
|
||||||
|
@ -80,14 +81,4 @@ public class Test extends GhidraScript {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scanDirectory(File directory, FunctionDatabase.Type type) throws Exception {
|
|
||||||
File[] files = directory.listFiles((dir, name) -> name.endsWith(".cxx"));
|
|
||||||
if (files == null)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (File file : files) {
|
|
||||||
scanFile(file, type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
|
@ -16,7 +16,8 @@ public class FunctionDatabase {
|
||||||
public enum Type {
|
public enum Type {
|
||||||
Auto,
|
Auto,
|
||||||
Fix,
|
Fix,
|
||||||
Stub
|
Stub,
|
||||||
|
Ref
|
||||||
}
|
}
|
||||||
|
|
||||||
public class Dependency implements java.io.Serializable {
|
public class Dependency implements java.io.Serializable {
|
||||||
|
|
|
@ -1,3 +1,13 @@
|
||||||
# Ghidra Scripts
|
# Ghidra Scripts
|
||||||
|
|
||||||
Add this to your scripts folder and run to generate c code for all functions in the project
|
Add this to your ghidra scripts folder to add the reman decompile scripts to ghidra
|
||||||
|
|
||||||
|
NOTE: Make sure to also add the include `sqlite-jdbc-3.49.1.0.jar` to the ghidra scripts folders, as this is required for the ghidra scripts to read the decompile database.
|
||||||
|
|
||||||
|
## Decompile database
|
||||||
|
|
||||||
|
The decompile database is a sqlite database that contains a list of all files that implement decompiled functions and their source address. It is used to regenerate files, check for duplicates, and more.
|
||||||
|
|
||||||
|
To generate the database from the current set of files, run the scan_sources script in the /game_re folder.
|
||||||
|
|
||||||
|
Make sure you have set up the tooling by running the /tooling/setup script.
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
build/
|
build/
|
||||||
.claude/
|
.claude/
|
||||||
|
tmps/
|
||||||
bin/
|
bin/
|
||||||
*.db
|
*.db
|
||||||
*.txt
|
*.txt
|
||||||
|
|
|
@ -0,0 +1,177 @@
|
||||||
|
# C++ Function/Global Parser Tool - Database Output Summary
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
This tool parses C++ source files using Tree-sitter to extract function and global variable information along with their memory addresses from comments. The extracted data is stored in an SQLite database for analysis and lookup purposes.
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
The tool creates an SQLite database (default: `gh.db`) with three main tables:
|
||||||
|
|
||||||
|
### 1. Functions Table
|
||||||
|
```sql
|
||||||
|
CREATE TABLE Functions (
|
||||||
|
filepath TEXT,
|
||||||
|
name TEXT,
|
||||||
|
address TEXT,
|
||||||
|
PRIMARY KEY (name, filepath)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Purpose**: Stores function definitions that have function bodies (actual implementations)
|
||||||
|
- `filepath`: Source file path where the function is defined
|
||||||
|
- `name`: Function name (identifier)
|
||||||
|
- `address`: 8-character hexadecimal memory address extracted from comments
|
||||||
|
- **Primary Key**: Combination of name and filepath (allows same function name in different files)
|
||||||
|
|
||||||
|
### 2. Imports Table
|
||||||
|
```sql
|
||||||
|
CREATE TABLE Imports (
|
||||||
|
filepath TEXT,
|
||||||
|
name TEXT,
|
||||||
|
address TEXT,
|
||||||
|
PRIMARY KEY (name, filepath)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Purpose**: Stores function declarations without bodies (imports/forward declarations)
|
||||||
|
- Same schema as Functions table
|
||||||
|
- Distinguishes between function definitions and declarations
|
||||||
|
- Useful for tracking external function references
|
||||||
|
|
||||||
|
### 3. Globals Table
|
||||||
|
```sql
|
||||||
|
CREATE TABLE Globals (
|
||||||
|
filepath TEXT,
|
||||||
|
name TEXT,
|
||||||
|
address TEXT
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Purpose**: Stores global variable declarations marked with `extern`
|
||||||
|
- `filepath`: Source file path where the global is declared
|
||||||
|
- `name`: Global variable name (identifier)
|
||||||
|
- `address`: 8-character hexadecimal memory address from comments
|
||||||
|
- **No Primary Key**: Allows duplicate global names across files
|
||||||
|
|
||||||
|
## Address Format
|
||||||
|
|
||||||
|
The tool extracts addresses from C++ comments using this regex pattern:
|
||||||
|
```regex
|
||||||
|
//\s*([0-9a-fA-F]{8})
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Comment Format**:
|
||||||
|
```cpp
|
||||||
|
void myFunction(); // 12345678
|
||||||
|
extern int globalVar; // ABCDEF00
|
||||||
|
```
|
||||||
|
|
||||||
|
- Addresses must be exactly 8 hexadecimal characters
|
||||||
|
- Can be uppercase or lowercase
|
||||||
|
- Must be in a C++ line comment (`//`)
|
||||||
|
- Whitespace after `//` is optional
|
||||||
|
|
||||||
|
## Tool Modes
|
||||||
|
|
||||||
|
### 1. Functions Mode (`-m functions`)
|
||||||
|
- **Default mode**
|
||||||
|
- Parses C++ files for function definitions and declarations
|
||||||
|
- Populates `Functions` and `Imports` tables
|
||||||
|
- Distinguishes between functions with bodies vs. declarations only
|
||||||
|
|
||||||
|
### 2. Globals Mode (`-m globals`)
|
||||||
|
- Parses C++ files for `extern` global variable declarations
|
||||||
|
- Populates `Globals` table
|
||||||
|
- Only processes variables marked with `extern` storage class
|
||||||
|
|
||||||
|
### 3. Duplicates Mode (`-m duplicates`)
|
||||||
|
- **Analysis mode** - doesn't process files
|
||||||
|
- Checks existing database for duplicate addresses and names
|
||||||
|
- Reports conflicts across all tables
|
||||||
|
- Returns exit code 1 if duplicates found, 0 if clean
|
||||||
|
|
||||||
|
### 4. Dump-Tree Mode (`-m dump-tree`)
|
||||||
|
- **Debug mode** - doesn't use database
|
||||||
|
- Outputs Tree-sitter AST for debugging parsing issues
|
||||||
|
- Useful for understanding how the parser interprets source code
|
||||||
|
|
||||||
|
## Data Quality Checks
|
||||||
|
|
||||||
|
The tool includes built-in validation:
|
||||||
|
|
||||||
|
### Duplicate Address Detection
|
||||||
|
- Scans all tables for addresses used multiple times
|
||||||
|
- Reports format: `"DUPLICATE ADDRESS: {address} appears {count} times in: {entries}"`
|
||||||
|
- Cross-references Functions, Imports, and Globals tables
|
||||||
|
|
||||||
|
### Duplicate Name Detection
|
||||||
|
- Checks for function names appearing in multiple files
|
||||||
|
- Checks for global names appearing in multiple files
|
||||||
|
- Helps identify naming conflicts and potential issues
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Function Extraction
|
||||||
|
```bash
|
||||||
|
./tool file1.cpp file2.cpp -d output.db -m functions
|
||||||
|
```
|
||||||
|
|
||||||
|
### Global Variable Extraction
|
||||||
|
```bash
|
||||||
|
./tool globals.h -d output.db -m globals
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch Processing with File List
|
||||||
|
```bash
|
||||||
|
./tool -l filelist.txt -d output.db -m functions
|
||||||
|
```
|
||||||
|
|
||||||
|
### Quality Assurance Check
|
||||||
|
```bash
|
||||||
|
./tool -d output.db -m duplicates
|
||||||
|
```
|
||||||
|
|
||||||
|
## Database Queries for Users
|
||||||
|
|
||||||
|
### Find Function by Name
|
||||||
|
```sql
|
||||||
|
SELECT * FROM Functions WHERE name = 'functionName';
|
||||||
|
SELECT * FROM Imports WHERE name = 'functionName';
|
||||||
|
```
|
||||||
|
|
||||||
|
### Find All Symbols at Address
|
||||||
|
```sql
|
||||||
|
SELECT 'Function' as type, name, filepath FROM Functions WHERE address = '12345678'
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'Import' as type, name, filepath FROM Imports WHERE address = '12345678'
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'Global' as type, name, filepath FROM Globals WHERE address = '12345678';
|
||||||
|
```
|
||||||
|
|
||||||
|
### List All Functions in File
|
||||||
|
```sql
|
||||||
|
SELECT name, address FROM Functions WHERE filepath = 'path/to/file.cpp'
|
||||||
|
ORDER BY name;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Find Functions Without Addresses
|
||||||
|
```sql
|
||||||
|
SELECT name, filepath FROM Functions WHERE address = '' OR address IS NULL;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Address Range Analysis
|
||||||
|
```sql
|
||||||
|
SELECT name, address, filepath FROM Functions
|
||||||
|
WHERE CAST(address AS INTEGER) BETWEEN 0x10000000 AND 0x20000000
|
||||||
|
ORDER BY CAST(address AS INTEGER);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration Considerations
|
||||||
|
|
||||||
|
- **Database Format**: Standard SQLite3 - compatible with most tools and languages
|
||||||
|
- **File Paths**: Relative to the game source directory, meaning there will be gh_auto, gh_fix subfolders. (relative to the game_re folder in repo root)
|
||||||
|
- **Address Format**: Always 8-character hex strings (32 bit addresses) - pad with leading zeros if needed
|
||||||
|
- **Case Sensitivity**: Function/global names are case-sensitive as per C++ standards
|
||||||
|
- **Unicode Support**: Handles UTF-8 encoded source files
|
||||||
|
|
||||||
|
This database serves as a comprehensive symbol table for reverse engineering, debugging, and code analysis workflows.
|
|
@ -3,19 +3,18 @@ set -e
|
||||||
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
|
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
|
||||||
cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
|
cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
|
||||||
|
|
||||||
scan_dir=tmps/gh_auto
|
types=(auto ref fix stub)
|
||||||
file_list=files.txt
|
for type in "${types[@]}"; do
|
||||||
rm -f $file_list
|
file_list="files_${type}.txt"
|
||||||
for file in tmps/gh_auto/*.cxx; do
|
rm -f "$file_list"
|
||||||
echo $file >>$file_list
|
|
||||||
done
|
if [ -d "tmps/gh_${type}" ]; then
|
||||||
for file in tmps/gh_stub/*.cxx; do
|
for file in "tmps/gh_${type}"/*.cxx; do
|
||||||
echo $file >>$file_list
|
echo "$file" >>"$file_list"
|
||||||
done
|
done
|
||||||
for file in tmps/gh_fix/*.cxx; do
|
$tool "@$file_list" -v --type=$type --log-file=log-functions.txt
|
||||||
echo $file >>$file_list
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
$tool @$file_list -v --log-file=log-functions.txt
|
|
||||||
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
||||||
$tool -mduplicates -v --log-file=log-duplicates.txt
|
$tool -mduplicates -v --log-file=log-duplicates.txt
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
script_dir=$(readlink -f $(dirname "$0"))
|
||||||
|
pushd $script_dir
|
||||||
|
|
||||||
# Create build directory if it doesn't exist
|
# Create build directory if it doesn't exist
|
||||||
mkdir -p build/tmp
|
|
||||||
|
|
||||||
if [ ! -d "build/tmp" ]; then
|
if [ ! -d "build/tmp" ]; then
|
||||||
# Configure project using CMake with Ninja generator for Release build
|
# Configure project using CMake with Ninja generator for Release build
|
||||||
cmake -S . -B build/tmp -G Ninja -DCMAKE_BUILD_TYPE=Release
|
cmake -B build/tmp -G Ninja -DCMAKE_BUILD_TYPE=Release
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Build project using Ninja
|
# Build project using Ninja
|
||||||
|
@ -21,3 +23,5 @@ if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
|
||||||
else
|
else
|
||||||
cp build/tmp/r3_gh_tool bin/
|
cp build/tmp/r3_gh_tool bin/
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
popd
|
||||||
|
|
|
@ -18,6 +18,38 @@ extern "C" TSLanguage *tree_sitter_cpp();
|
||||||
// Global address regex pattern
|
// Global address regex pattern
|
||||||
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
|
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
|
||||||
|
|
||||||
|
// Add enum for file types
|
||||||
|
enum class FileType { Auto, Fix, Stub, Ref };
|
||||||
|
|
||||||
|
// Helper function to convert string to FileType
|
||||||
|
FileType stringToFileType(const std::string &type_str) {
|
||||||
|
if (type_str == "auto")
|
||||||
|
return FileType::Auto;
|
||||||
|
if (type_str == "fix")
|
||||||
|
return FileType::Fix;
|
||||||
|
if (type_str == "stub")
|
||||||
|
return FileType::Stub;
|
||||||
|
if (type_str == "ref")
|
||||||
|
return FileType::Ref;
|
||||||
|
throw std::invalid_argument("Invalid file type: " + type_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to convert FileType to string
|
||||||
|
std::string fileTypeToString(FileType type) {
|
||||||
|
switch (type) {
|
||||||
|
case FileType::Auto:
|
||||||
|
return "auto";
|
||||||
|
case FileType::Fix:
|
||||||
|
return "fix";
|
||||||
|
case FileType::Stub:
|
||||||
|
return "stub";
|
||||||
|
case FileType::Ref:
|
||||||
|
return "ref";
|
||||||
|
default:
|
||||||
|
throw std::logic_error(fmt::format("Invalid file type: {}", (int)type));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Helper function to check if a comment contains an address
|
// Helper function to check if a comment contains an address
|
||||||
bool hasAddressPattern(const std::string &comment) {
|
bool hasAddressPattern(const std::string &comment) {
|
||||||
return std::regex_search(comment, ADDRESS_REGEX);
|
return std::regex_search(comment, ADDRESS_REGEX);
|
||||||
|
@ -47,6 +79,7 @@ struct FunctionInfo {
|
||||||
std::string address;
|
std::string address;
|
||||||
std::string filepath;
|
std::string filepath;
|
||||||
bool is_import;
|
bool is_import;
|
||||||
|
FileType type; // Add type field
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GlobalInfo {
|
struct GlobalInfo {
|
||||||
|
@ -81,11 +114,11 @@ public:
|
||||||
&delete_imports_stmt,
|
&delete_imports_stmt,
|
||||||
"Failed to prepare delete imports statement");
|
"Failed to prepare delete imports statement");
|
||||||
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
|
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
|
||||||
"address) VALUES (?, ?, ?)",
|
"address, type) VALUES (?, ?, ?, ?)",
|
||||||
&insert_functions_stmt,
|
&insert_functions_stmt,
|
||||||
"Failed to prepare insert functions statement");
|
"Failed to prepare insert functions statement");
|
||||||
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) "
|
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, "
|
||||||
"VALUES (?, ?, ?)",
|
"type) VALUES (?, ?, ?, ?)",
|
||||||
&insert_imports_stmt,
|
&insert_imports_stmt,
|
||||||
"Failed to prepare insert imports statement");
|
"Failed to prepare insert imports statement");
|
||||||
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
|
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
|
||||||
|
@ -128,6 +161,7 @@ public:
|
||||||
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
||||||
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
||||||
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
||||||
|
sqlite3_bind_int(stmt, 4, static_cast<int>(func.type));
|
||||||
sqlite3_step(stmt);
|
sqlite3_step(stmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,8 +191,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *create_tables = R"(
|
const char *create_tables = R"(
|
||||||
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
|
||||||
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
|
||||||
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
|
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
|
||||||
)";
|
)";
|
||||||
|
|
||||||
|
@ -197,7 +231,7 @@ public:
|
||||||
bool checkDuplicateAddresses() {
|
bool checkDuplicateAddresses() {
|
||||||
const char *sql = R"(
|
const char *sql = R"(
|
||||||
WITH all_addresses AS (
|
WITH all_addresses AS (
|
||||||
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != ''
|
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
|
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
|
||||||
)
|
)
|
||||||
|
@ -239,6 +273,7 @@ public:
|
||||||
SELECT name, COUNT(*) as count,
|
SELECT name, COUNT(*) as count,
|
||||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||||
FROM Functions
|
FROM Functions
|
||||||
|
WHERE type != 3
|
||||||
GROUP BY name
|
GROUP BY name
|
||||||
HAVING COUNT(*) > 1
|
HAVING COUNT(*) > 1
|
||||||
ORDER BY name;
|
ORDER BY name;
|
||||||
|
@ -390,7 +425,7 @@ bool hasFunctionBody(TSNode node) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
||||||
std::vector<FunctionInfo> &functions) {
|
std::vector<FunctionInfo> &functions, FileType file_type) {
|
||||||
const char *type = ts_node_type(node);
|
const char *type = ts_node_type(node);
|
||||||
|
|
||||||
if (strcmp(type, "function_definition") == 0 ||
|
if (strcmp(type, "function_definition") == 0 ||
|
||||||
|
@ -409,7 +444,8 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
||||||
FunctionInfo func{func_name, address, "",
|
FunctionInfo func{func_name, address, "",
|
||||||
strcmp(type, "function_definition") == 0
|
strcmp(type, "function_definition") == 0
|
||||||
? !hasFunctionBody(node)
|
? !hasFunctionBody(node)
|
||||||
: true};
|
: true,
|
||||||
|
file_type}; // Add file_type parameter
|
||||||
functions.push_back(func);
|
functions.push_back(func);
|
||||||
}
|
}
|
||||||
// We'll never nest function declarations
|
// We'll never nest function declarations
|
||||||
|
@ -422,8 +458,8 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
||||||
|
|
||||||
uint32_t child_count = ts_node_child_count(node);
|
uint32_t child_count = ts_node_child_count(node);
|
||||||
for (uint32_t i = 0; i < child_count; i++) {
|
for (uint32_t i = 0; i < child_count; i++) {
|
||||||
findFunctions(ts_node_child(node, i), source_code, source_length,
|
findFunctions(ts_node_child(node, i), source_code, source_length, functions,
|
||||||
functions);
|
file_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -454,7 +490,8 @@ std::vector<std::string> readFileList(const std::string &list_file) {
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool processFile(const std::string &filepath, DatabaseManager &db) {
|
bool processFile(const std::string &filepath, DatabaseManager &db,
|
||||||
|
FileType file_type) {
|
||||||
std::ifstream file(filepath);
|
std::ifstream file(filepath);
|
||||||
if (!file.is_open()) {
|
if (!file.is_open()) {
|
||||||
spdlog::error("Could not open file {}", filepath);
|
spdlog::error("Could not open file {}", filepath);
|
||||||
|
@ -482,17 +519,18 @@ bool processFile(const std::string &filepath, DatabaseManager &db) {
|
||||||
|
|
||||||
std::vector<FunctionInfo> functions;
|
std::vector<FunctionInfo> functions;
|
||||||
findFunctions(root_node, file_content.c_str(), file_content.length(),
|
findFunctions(root_node, file_content.c_str(), file_content.length(),
|
||||||
functions);
|
functions, file_type);
|
||||||
|
|
||||||
for (auto &func : functions) {
|
for (auto &func : functions) {
|
||||||
func.filepath = filepath;
|
func.filepath = filepath;
|
||||||
db.insertFunction(func);
|
db.insertFunction(func);
|
||||||
spdlog::debug("{}: {} @ {} in {}", func.is_import ? "Import" : "Function",
|
spdlog::debug("{}: {} @ {} in {} (type: {})",
|
||||||
func.name, func.address, filepath);
|
func.is_import ? "Import" : "Function", func.name,
|
||||||
|
func.address, filepath, fileTypeToString(func.type));
|
||||||
}
|
}
|
||||||
|
|
||||||
spdlog::info("Processed {} functions/imports from {}", functions.size(),
|
spdlog::info("Processed {} functions/imports from {} (type: {})",
|
||||||
filepath);
|
functions.size(), filepath, fileTypeToString(file_type));
|
||||||
|
|
||||||
ts_tree_delete(tree);
|
ts_tree_delete(tree);
|
||||||
ts_parser_delete(parser);
|
ts_parser_delete(parser);
|
||||||
|
@ -741,6 +779,7 @@ int main(int argc, char *argv[]) {
|
||||||
std::string db_path = "gh.db";
|
std::string db_path = "gh.db";
|
||||||
std::string mode = "functions";
|
std::string mode = "functions";
|
||||||
std::string log_file = "";
|
std::string log_file = "";
|
||||||
|
std::string type_str = "auto"; // Add type string variable
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
|
|
||||||
app.add_option("files", input_files,
|
app.add_option("files", input_files,
|
||||||
|
@ -755,11 +794,24 @@ int main(int argc, char *argv[]) {
|
||||||
->default_val("functions")
|
->default_val("functions")
|
||||||
->check(
|
->check(
|
||||||
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
|
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
|
||||||
|
app.add_option("-t,--type", type_str,
|
||||||
|
"File type: 'auto', 'fix', 'stub', or 'ref'")
|
||||||
|
->default_val("auto")
|
||||||
|
->check(CLI::IsMember({"auto", "fix", "stub", "ref"}));
|
||||||
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
||||||
app.add_flag("--log-file", log_file, "Enable logging to file");
|
app.add_flag("--log-file", log_file, "Enable logging to file");
|
||||||
|
|
||||||
CLI11_PARSE(app, argc, argv);
|
CLI11_PARSE(app, argc, argv);
|
||||||
|
|
||||||
|
// Convert string to FileType enum
|
||||||
|
FileType file_type;
|
||||||
|
try {
|
||||||
|
file_type = stringToFileType(type_str);
|
||||||
|
} catch (const std::invalid_argument &e) {
|
||||||
|
spdlog::error("Invalid file type: {}", type_str);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Set log level based on verbose flag
|
// Set log level based on verbose flag
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
spdlog::set_level(spdlog::level::debug);
|
spdlog::set_level(spdlog::level::debug);
|
||||||
|
@ -830,10 +882,10 @@ int main(int argc, char *argv[]) {
|
||||||
db.beginTransaction();
|
db.beginTransaction();
|
||||||
|
|
||||||
for (const auto &filepath : files_to_process) {
|
for (const auto &filepath : files_to_process) {
|
||||||
spdlog::info("=== Processing: {} ===", filepath);
|
spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str);
|
||||||
bool success = false;
|
bool success = false;
|
||||||
if (mode == "functions") {
|
if (mode == "functions") {
|
||||||
success = processFile(filepath, db);
|
success = processFile(filepath, db, file_type);
|
||||||
} else if (mode == "globals") {
|
} else if (mode == "globals") {
|
||||||
success = processGlobalsFile(filepath, db);
|
success = processGlobalsFile(filepath, db);
|
||||||
}
|
}
|
||||||
|
@ -860,6 +912,7 @@ int main(int argc, char *argv[]) {
|
||||||
spdlog::info("=== Summary ===");
|
spdlog::info("=== Summary ===");
|
||||||
spdlog::info("Processed {} files successfully", processed_count);
|
spdlog::info("Processed {} files successfully", processed_count);
|
||||||
spdlog::info("Mode: {}", mode);
|
spdlog::info("Mode: {}", mode);
|
||||||
|
spdlog::info("File type: {}", type_str);
|
||||||
if (mode != "dump-tree") {
|
if (mode != "dump-tree") {
|
||||||
spdlog::info("Database saved to: {}", db_path);
|
spdlog::info("Database saved to: {}", db_path);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue