Compare commits
7 Commits
836e281b80
...
d7de3deb59
Author | SHA1 | Date |
---|---|---|
|
d7de3deb59 | |
|
7c18d04724 | |
|
3d40dc7e80 | |
|
0383ef8f13 | |
|
58397127e7 | |
|
560fbe70ce | |
|
db228e64ec |
|
@ -1,5 +1,5 @@
|
|||
gh_auto/*
|
||||
gh_ref/*
|
||||
old/
|
||||
*.bak
|
||||
gh_cmake_timestamp
|
||||
functions.dat
|
||||
|
|
|
@ -1,64 +1,71 @@
|
|||
add_executable(game_re
|
||||
r3/main.cpp
|
||||
r3/binders/static_mem.cxx
|
||||
gh_global.cxx
|
||||
)
|
||||
|
||||
target_compile_features(game_re PUBLIC cxx_std_20)
|
||||
target_include_directories(game_re PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
set_target_properties(
|
||||
game_re PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
|
||||
)
|
||||
|
||||
if(WIN32 AND R3_32BIT)
|
||||
target_link_libraries(game_re PRIVATE
|
||||
binkw32
|
||||
d3d8
|
||||
dinput8
|
||||
function(setup_target TARGET)
|
||||
add_executable(${TARGET}
|
||||
r3/main.cpp
|
||||
r3/binders/static_mem.cxx
|
||||
gh_global.cxx
|
||||
)
|
||||
endif()
|
||||
|
||||
target_compile_definitions(game_re PRIVATE
|
||||
R3_GAME_DATA_DIR=\"${GAME_DATA_DIR}\"
|
||||
)
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_20)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
get_filename_component(R3_DATA_SEGMENT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gh_datasegment.bin" ABSOLUTE)
|
||||
target_compile_definitions(game_re PRIVATE
|
||||
R3_DATA_SEGMENT_FILE=\"${R3_DATA_SEGMENT_FILE}\"
|
||||
)
|
||||
set_target_properties(
|
||||
${TARGET} PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
|
||||
)
|
||||
|
||||
target_compile_definitions(game_re PRIVATE
|
||||
_CRT_SECURE_NO_WARNINGS=1
|
||||
_CRT_NONSTDC_NO_WARNINGS=1)
|
||||
if(WIN32 AND R3_32BIT)
|
||||
target_link_libraries(${TARGET} PRIVATE
|
||||
binkw32
|
||||
d3d8
|
||||
dinput8
|
||||
)
|
||||
endif()
|
||||
|
||||
target_link_libraries(game_re PRIVATE spdlog)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
R3_GAME_DATA_DIR=\"${GAME_DATA_DIR}\"
|
||||
)
|
||||
|
||||
file(GLOB GH_AUTO_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_auto/*.cxx)
|
||||
file(GLOB GH_FIX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_fix/*.cxx)
|
||||
file(GLOB GH_STUB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_stub/*.cxx)
|
||||
get_filename_component(R3_DATA_SEGMENT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gh_datasegment.bin" ABSOLUTE)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
R3_DATA_SEGMENT_FILE=\"${R3_DATA_SEGMENT_FILE}\"
|
||||
)
|
||||
|
||||
target_sources(game_re PRIVATE
|
||||
${GH_AUTO_SOURCES}
|
||||
${GH_FIX_SOURCES}
|
||||
${GH_STUB_SOURCES}
|
||||
)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
_CRT_SECURE_NO_WARNINGS=1
|
||||
_CRT_NONSTDC_NO_WARNINGS=1)
|
||||
|
||||
# Ignore -Wformat-security
|
||||
target_compile_options(game_re PRIVATE -Wno-format-security)
|
||||
target_link_libraries(${TARGET} PRIVATE spdlog)
|
||||
|
||||
# Ignore -Wmicrosoft-cast
|
||||
target_compile_options(game_re PRIVATE -Wno-microsoft-cast)
|
||||
file(GLOB GH_AUTO_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_auto/*.cxx)
|
||||
file(GLOB GH_FIX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_fix/*.cxx)
|
||||
file(GLOB GH_STUB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gh_stub/*.cxx)
|
||||
|
||||
# Automatically re-run CMake if any gh_*.cxx files change
|
||||
# due to ghidra script runs
|
||||
set_property(
|
||||
DIRECTORY
|
||||
APPEND
|
||||
PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gh_cmake_timestamp
|
||||
)
|
||||
target_sources(${TARGET} PRIVATE
|
||||
${GH_AUTO_SOURCES}
|
||||
${GH_FIX_SOURCES}
|
||||
${GH_STUB_SOURCES}
|
||||
)
|
||||
|
||||
target_precompile_headers(game_re PRIVATE
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/r3/binders/auto_pch.cxx>"
|
||||
)
|
||||
# Ignore -Wformat-security
|
||||
target_compile_options(${TARGET} PRIVATE -Wno-format-security)
|
||||
|
||||
# Ignore -Wmicrosoft-cast
|
||||
target_compile_options(${TARGET} PRIVATE -Wno-microsoft-cast)
|
||||
|
||||
# Automatically re-run CMake if any gh_*.cxx files change
|
||||
# due to ghidra script runs
|
||||
set_property(
|
||||
DIRECTORY
|
||||
APPEND
|
||||
PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gh_cmake_timestamp
|
||||
)
|
||||
|
||||
target_precompile_headers(${TARGET} PRIVATE
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/r3/binders/auto_pch.cxx>"
|
||||
)
|
||||
endfunction()
|
||||
|
||||
setup_target(game_re)
|
||||
|
||||
setup_target(game_dbg)
|
||||
target_compile_definitions(game_dbg PRIVATE RE_DBG_INJECTED=1)
|
||||
|
|
|
@ -6,14 +6,21 @@ set -e
|
|||
|
||||
pushd $script_dir
|
||||
|
||||
file_list=files.txt
|
||||
rm -f $file_list
|
||||
for dir in gh_auto gh_stub gh_fix; do
|
||||
for file in $dir/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
mkdir -p logs
|
||||
types=(auto ref fix stub)
|
||||
for type in "${types[@]}"; do
|
||||
file_list="logs/files_${type}.txt"
|
||||
rm -f "$file_list"
|
||||
|
||||
if [ -d "gh_${type}" ]; then
|
||||
for file in "gh_${type}"/*.cxx; do
|
||||
echo "$file" >>"$file_list"
|
||||
done
|
||||
$tool "@$file_list" -v --type=$type --log-file=logs/log-functions-${type}.txt
|
||||
fi
|
||||
done
|
||||
|
||||
$tool @$file_list
|
||||
$tool gh_global.h -mglobals -v --log-file=logs/log-globals.txt
|
||||
$tool -mduplicates -v --log-file=logs/log-duplicates.txt
|
||||
|
||||
popd
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
// @category _Reman3
|
||||
// @menupath Reman3.Test
|
||||
// @importpackage org.sqlite
|
||||
|
||||
import ghidra.app.script.GhidraScript;
|
||||
import ghidra.program.model.address.Address;
|
||||
|
@ -26,15 +27,15 @@ import java.sql.ResultSet;
|
|||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
|
||||
public class Test extends GhidraScript {
|
||||
// Will rebuild all functions
|
||||
public boolean rebuildAllGlobals = true;
|
||||
FunctionDatabase functionDB;
|
||||
import org.sqlite.JDBC;
|
||||
|
||||
public class Test extends GhidraScript {
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
RecompileConfig.INSTANCE = new RecompileConfig(this);
|
||||
|
||||
java.sql.DriverManager.registerDriver(new JDBC());
|
||||
|
||||
// Example SQLite usage
|
||||
testSQLite();
|
||||
}
|
||||
|
@ -80,14 +81,4 @@ public class Test extends GhidraScript {
|
|||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private void scanDirectory(File directory, FunctionDatabase.Type type) throws Exception {
|
||||
File[] files = directory.listFiles((dir, name) -> name.endsWith(".cxx"));
|
||||
if (files == null)
|
||||
return;
|
||||
|
||||
for (File file : files) {
|
||||
scanFile(file, type);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,7 +16,8 @@ public class FunctionDatabase {
|
|||
public enum Type {
|
||||
Auto,
|
||||
Fix,
|
||||
Stub
|
||||
Stub,
|
||||
Ref
|
||||
}
|
||||
|
||||
public class Dependency implements java.io.Serializable {
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
# Ghidra Scripts
|
||||
|
||||
Add this to your scripts folder and run to generate c code for all functions in the project
|
||||
Add this to your ghidra scripts folder to add the reman decompile scripts to ghidra
|
||||
|
||||
NOTE: Make sure to also add the include `sqlite-jdbc-3.49.1.0.jar` to the ghidra scripts folders, as this is required for the ghidra scripts to read the decompile database.
|
||||
|
||||
## Decompile database
|
||||
|
||||
The decompile database is a sqlite database that contains a list of all files that implement decompiled functions and their source address. It is used to regenerate files, check for duplicates, and more.
|
||||
|
||||
To generate the database from the current set of files, run the scan_sources script in the /game_re folder.
|
||||
|
||||
Make sure you have set up the tooling by running the /tooling/setup script.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
build/
|
||||
.claude/
|
||||
tmps/
|
||||
bin/
|
||||
*.db
|
||||
*.txt
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
# C++ Function/Global Parser Tool - Database Output Summary
|
||||
|
||||
## Overview
|
||||
This tool parses C++ source files using Tree-sitter to extract function and global variable information along with their memory addresses from comments. The extracted data is stored in an SQLite database for analysis and lookup purposes.
|
||||
|
||||
## Database Schema
|
||||
|
||||
The tool creates an SQLite database (default: `gh.db`) with three main tables:
|
||||
|
||||
### 1. Functions Table
|
||||
```sql
|
||||
CREATE TABLE Functions (
|
||||
filepath TEXT,
|
||||
name TEXT,
|
||||
address TEXT,
|
||||
PRIMARY KEY (name, filepath)
|
||||
);
|
||||
```
|
||||
|
||||
**Purpose**: Stores function definitions that have function bodies (actual implementations)
|
||||
- `filepath`: Source file path where the function is defined
|
||||
- `name`: Function name (identifier)
|
||||
- `address`: 8-character hexadecimal memory address extracted from comments
|
||||
- **Primary Key**: Combination of name and filepath (allows same function name in different files)
|
||||
|
||||
### 2. Imports Table
|
||||
```sql
|
||||
CREATE TABLE Imports (
|
||||
filepath TEXT,
|
||||
name TEXT,
|
||||
address TEXT,
|
||||
PRIMARY KEY (name, filepath)
|
||||
);
|
||||
```
|
||||
|
||||
**Purpose**: Stores function declarations without bodies (imports/forward declarations)
|
||||
- Same schema as Functions table
|
||||
- Distinguishes between function definitions and declarations
|
||||
- Useful for tracking external function references
|
||||
|
||||
### 3. Globals Table
|
||||
```sql
|
||||
CREATE TABLE Globals (
|
||||
filepath TEXT,
|
||||
name TEXT,
|
||||
address TEXT
|
||||
);
|
||||
```
|
||||
|
||||
**Purpose**: Stores global variable declarations marked with `extern`
|
||||
- `filepath`: Source file path where the global is declared
|
||||
- `name`: Global variable name (identifier)
|
||||
- `address`: 8-character hexadecimal memory address from comments
|
||||
- **No Primary Key**: Allows duplicate global names across files
|
||||
|
||||
## Address Format
|
||||
|
||||
The tool extracts addresses from C++ comments using this regex pattern:
|
||||
```regex
|
||||
//\s*([0-9a-fA-F]{8})
|
||||
```
|
||||
|
||||
**Expected Comment Format**:
|
||||
```cpp
|
||||
void myFunction(); // 12345678
|
||||
extern int globalVar; // ABCDEF00
|
||||
```
|
||||
|
||||
- Addresses must be exactly 8 hexadecimal characters
|
||||
- Can be uppercase or lowercase
|
||||
- Must be in a C++ line comment (`//`)
|
||||
- Whitespace after `//` is optional
|
||||
|
||||
## Tool Modes
|
||||
|
||||
### 1. Functions Mode (`-m functions`)
|
||||
- **Default mode**
|
||||
- Parses C++ files for function definitions and declarations
|
||||
- Populates `Functions` and `Imports` tables
|
||||
- Distinguishes between functions with bodies vs. declarations only
|
||||
|
||||
### 2. Globals Mode (`-m globals`)
|
||||
- Parses C++ files for `extern` global variable declarations
|
||||
- Populates `Globals` table
|
||||
- Only processes variables marked with `extern` storage class
|
||||
|
||||
### 3. Duplicates Mode (`-m duplicates`)
|
||||
- **Analysis mode** - doesn't process files
|
||||
- Checks existing database for duplicate addresses and names
|
||||
- Reports conflicts across all tables
|
||||
- Returns exit code 1 if duplicates found, 0 if clean
|
||||
|
||||
### 4. Dump-Tree Mode (`-m dump-tree`)
|
||||
- **Debug mode** - doesn't use database
|
||||
- Outputs Tree-sitter AST for debugging parsing issues
|
||||
- Useful for understanding how the parser interprets source code
|
||||
|
||||
## Data Quality Checks
|
||||
|
||||
The tool includes built-in validation:
|
||||
|
||||
### Duplicate Address Detection
|
||||
- Scans all tables for addresses used multiple times
|
||||
- Reports format: `"DUPLICATE ADDRESS: {address} appears {count} times in: {entries}"`
|
||||
- Cross-references Functions, Imports, and Globals tables
|
||||
|
||||
### Duplicate Name Detection
|
||||
- Checks for function names appearing in multiple files
|
||||
- Checks for global names appearing in multiple files
|
||||
- Helps identify naming conflicts and potential issues
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Basic Function Extraction
|
||||
```bash
|
||||
./tool file1.cpp file2.cpp -d output.db -m functions
|
||||
```
|
||||
|
||||
### Global Variable Extraction
|
||||
```bash
|
||||
./tool globals.h -d output.db -m globals
|
||||
```
|
||||
|
||||
### Batch Processing with File List
|
||||
```bash
|
||||
./tool -l filelist.txt -d output.db -m functions
|
||||
```
|
||||
|
||||
### Quality Assurance Check
|
||||
```bash
|
||||
./tool -d output.db -m duplicates
|
||||
```
|
||||
|
||||
## Database Queries for Users
|
||||
|
||||
### Find Function by Name
|
||||
```sql
|
||||
SELECT * FROM Functions WHERE name = 'functionName';
|
||||
SELECT * FROM Imports WHERE name = 'functionName';
|
||||
```
|
||||
|
||||
### Find All Symbols at Address
|
||||
```sql
|
||||
SELECT 'Function' as type, name, filepath FROM Functions WHERE address = '12345678'
|
||||
UNION ALL
|
||||
SELECT 'Import' as type, name, filepath FROM Imports WHERE address = '12345678'
|
||||
UNION ALL
|
||||
SELECT 'Global' as type, name, filepath FROM Globals WHERE address = '12345678';
|
||||
```
|
||||
|
||||
### List All Functions in File
|
||||
```sql
|
||||
SELECT name, address FROM Functions WHERE filepath = 'path/to/file.cpp'
|
||||
ORDER BY name;
|
||||
```
|
||||
|
||||
### Find Functions Without Addresses
|
||||
```sql
|
||||
SELECT name, filepath FROM Functions WHERE address = '' OR address IS NULL;
|
||||
```
|
||||
|
||||
### Address Range Analysis
|
||||
```sql
|
||||
SELECT name, address, filepath FROM Functions
|
||||
WHERE CAST(address AS INTEGER) BETWEEN 0x10000000 AND 0x20000000
|
||||
ORDER BY CAST(address AS INTEGER);
|
||||
```
|
||||
|
||||
## Integration Considerations
|
||||
|
||||
- **Database Format**: Standard SQLite3 - compatible with most tools and languages
|
||||
- **File Paths**: Relative to the game source directory, meaning there will be gh_auto, gh_fix subfolders. (relative to the game_re folder in repo root)
|
||||
- **Address Format**: Always 8-character hex strings (32 bit addresses) - pad with leading zeros if needed
|
||||
- **Case Sensitivity**: Function/global names are case-sensitive as per C++ standards
|
||||
- **Unicode Support**: Handles UTF-8 encoded source files
|
||||
|
||||
This database serves as a comprehensive symbol table for reverse engineering, debugging, and code analysis workflows.
|
|
@ -3,19 +3,18 @@ set -e
|
|||
tool=build/clang-x86_64-pc-windows-msvc/Release/r3_gh_tool
|
||||
cmake --build build/clang-x86_64-pc-windows-msvc/Release --target r3_gh_tool
|
||||
|
||||
scan_dir=tmps/gh_auto
|
||||
file_list=files.txt
|
||||
rm -f $file_list
|
||||
for file in tmps/gh_auto/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
for file in tmps/gh_stub/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
done
|
||||
for file in tmps/gh_fix/*.cxx; do
|
||||
echo $file >>$file_list
|
||||
types=(auto ref fix stub)
|
||||
for type in "${types[@]}"; do
|
||||
file_list="files_${type}.txt"
|
||||
rm -f "$file_list"
|
||||
|
||||
if [ -d "tmps/gh_${type}" ]; then
|
||||
for file in "tmps/gh_${type}"/*.cxx; do
|
||||
echo "$file" >>"$file_list"
|
||||
done
|
||||
$tool "@$file_list" -v --type=$type --log-file=log-functions.txt
|
||||
fi
|
||||
done
|
||||
|
||||
$tool @$file_list -v --log-file=log-functions.txt
|
||||
$tool tmps/gh_global.h -mglobals -v --log-file=log-globals.txt
|
||||
$tool -mduplicates -v --log-file=log-duplicates.txt
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/bin/bash
|
||||
|
||||
script_dir=$(readlink -f $(dirname "$0"))
|
||||
pushd $script_dir
|
||||
|
||||
# Create build directory if it doesn't exist
|
||||
mkdir -p build/tmp
|
||||
|
||||
if [ ! -d "build/tmp" ]; then
|
||||
# Configure project using CMake with Ninja generator for Release build
|
||||
cmake -S . -B build/tmp -G Ninja -DCMAKE_BUILD_TYPE=Release
|
||||
cmake -B build/tmp -G Ninja -DCMAKE_BUILD_TYPE=Release
|
||||
fi
|
||||
|
||||
# Build project using Ninja
|
||||
|
@ -21,3 +23,5 @@ if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
|
|||
else
|
||||
cp build/tmp/r3_gh_tool bin/
|
||||
fi
|
||||
|
||||
popd
|
||||
|
|
|
@ -18,6 +18,38 @@ extern "C" TSLanguage *tree_sitter_cpp();
|
|||
// Global address regex pattern
|
||||
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
|
||||
|
||||
// Add enum for file types
|
||||
enum class FileType { Auto, Fix, Stub, Ref };
|
||||
|
||||
// Helper function to convert string to FileType
|
||||
FileType stringToFileType(const std::string &type_str) {
|
||||
if (type_str == "auto")
|
||||
return FileType::Auto;
|
||||
if (type_str == "fix")
|
||||
return FileType::Fix;
|
||||
if (type_str == "stub")
|
||||
return FileType::Stub;
|
||||
if (type_str == "ref")
|
||||
return FileType::Ref;
|
||||
throw std::invalid_argument("Invalid file type: " + type_str);
|
||||
}
|
||||
|
||||
// Helper function to convert FileType to string
|
||||
std::string fileTypeToString(FileType type) {
|
||||
switch (type) {
|
||||
case FileType::Auto:
|
||||
return "auto";
|
||||
case FileType::Fix:
|
||||
return "fix";
|
||||
case FileType::Stub:
|
||||
return "stub";
|
||||
case FileType::Ref:
|
||||
return "ref";
|
||||
default:
|
||||
throw std::logic_error(fmt::format("Invalid file type: {}", (int)type));
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to check if a comment contains an address
|
||||
bool hasAddressPattern(const std::string &comment) {
|
||||
return std::regex_search(comment, ADDRESS_REGEX);
|
||||
|
@ -47,6 +79,7 @@ struct FunctionInfo {
|
|||
std::string address;
|
||||
std::string filepath;
|
||||
bool is_import;
|
||||
FileType type; // Add type field
|
||||
};
|
||||
|
||||
struct GlobalInfo {
|
||||
|
@ -81,11 +114,11 @@ public:
|
|||
&delete_imports_stmt,
|
||||
"Failed to prepare delete imports statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
|
||||
"address) VALUES (?, ?, ?)",
|
||||
"address, type) VALUES (?, ?, ?, ?)",
|
||||
&insert_functions_stmt,
|
||||
"Failed to prepare insert functions statement");
|
||||
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) "
|
||||
"VALUES (?, ?, ?)",
|
||||
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address, "
|
||||
"type) VALUES (?, ?, ?, ?)",
|
||||
&insert_imports_stmt,
|
||||
"Failed to prepare insert imports statement");
|
||||
prepareStatement("DELETE FROM Globals WHERE filepath = ?",
|
||||
|
@ -128,6 +161,7 @@ public:
|
|||
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, 4, static_cast<int>(func.type));
|
||||
sqlite3_step(stmt);
|
||||
}
|
||||
|
||||
|
@ -157,8 +191,8 @@ public:
|
|||
}
|
||||
|
||||
const char *create_tables = R"(
|
||||
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, type INTEGER DEFAULT 0, PRIMARY KEY (name, filepath));
|
||||
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
|
||||
)";
|
||||
|
||||
|
@ -197,7 +231,7 @@ public:
|
|||
bool checkDuplicateAddresses() {
|
||||
const char *sql = R"(
|
||||
WITH all_addresses AS (
|
||||
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != ''
|
||||
SELECT 'Functions' as table_name, name, address, filepath FROM Functions WHERE address != '' AND type != 3
|
||||
UNION ALL
|
||||
SELECT 'Globals' as table_name, name, address, filepath FROM Globals WHERE address != ''
|
||||
)
|
||||
|
@ -239,6 +273,7 @@ public:
|
|||
SELECT name, COUNT(*) as count,
|
||||
GROUP_CONCAT(filepath, '; ') as filepaths
|
||||
FROM Functions
|
||||
WHERE type != 3
|
||||
GROUP BY name
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY name;
|
||||
|
@ -390,7 +425,7 @@ bool hasFunctionBody(TSNode node) {
|
|||
}
|
||||
|
||||
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
||||
std::vector<FunctionInfo> &functions) {
|
||||
std::vector<FunctionInfo> &functions, FileType file_type) {
|
||||
const char *type = ts_node_type(node);
|
||||
|
||||
if (strcmp(type, "function_definition") == 0 ||
|
||||
|
@ -409,7 +444,8 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
|||
FunctionInfo func{func_name, address, "",
|
||||
strcmp(type, "function_definition") == 0
|
||||
? !hasFunctionBody(node)
|
||||
: true};
|
||||
: true,
|
||||
file_type}; // Add file_type parameter
|
||||
functions.push_back(func);
|
||||
}
|
||||
// We'll never nest function declarations
|
||||
|
@ -422,8 +458,8 @@ void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
|||
|
||||
uint32_t child_count = ts_node_child_count(node);
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
findFunctions(ts_node_child(node, i), source_code, source_length,
|
||||
functions);
|
||||
findFunctions(ts_node_child(node, i), source_code, source_length, functions,
|
||||
file_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -454,7 +490,8 @@ std::vector<std::string> readFileList(const std::string &list_file) {
|
|||
return files;
|
||||
}
|
||||
|
||||
bool processFile(const std::string &filepath, DatabaseManager &db) {
|
||||
bool processFile(const std::string &filepath, DatabaseManager &db,
|
||||
FileType file_type) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file.is_open()) {
|
||||
spdlog::error("Could not open file {}", filepath);
|
||||
|
@ -482,17 +519,18 @@ bool processFile(const std::string &filepath, DatabaseManager &db) {
|
|||
|
||||
std::vector<FunctionInfo> functions;
|
||||
findFunctions(root_node, file_content.c_str(), file_content.length(),
|
||||
functions);
|
||||
functions, file_type);
|
||||
|
||||
for (auto &func : functions) {
|
||||
func.filepath = filepath;
|
||||
db.insertFunction(func);
|
||||
spdlog::debug("{}: {} @ {} in {}", func.is_import ? "Import" : "Function",
|
||||
func.name, func.address, filepath);
|
||||
spdlog::debug("{}: {} @ {} in {} (type: {})",
|
||||
func.is_import ? "Import" : "Function", func.name,
|
||||
func.address, filepath, fileTypeToString(func.type));
|
||||
}
|
||||
|
||||
spdlog::info("Processed {} functions/imports from {}", functions.size(),
|
||||
filepath);
|
||||
spdlog::info("Processed {} functions/imports from {} (type: {})",
|
||||
functions.size(), filepath, fileTypeToString(file_type));
|
||||
|
||||
ts_tree_delete(tree);
|
||||
ts_parser_delete(parser);
|
||||
|
@ -741,6 +779,7 @@ int main(int argc, char *argv[]) {
|
|||
std::string db_path = "gh.db";
|
||||
std::string mode = "functions";
|
||||
std::string log_file = "";
|
||||
std::string type_str = "auto"; // Add type string variable
|
||||
bool verbose = false;
|
||||
|
||||
app.add_option("files", input_files,
|
||||
|
@ -755,11 +794,24 @@ int main(int argc, char *argv[]) {
|
|||
->default_val("functions")
|
||||
->check(
|
||||
CLI::IsMember({"functions", "globals", "duplicates", "dump-tree"}));
|
||||
app.add_option("-t,--type", type_str,
|
||||
"File type: 'auto', 'fix', 'stub', or 'ref'")
|
||||
->default_val("auto")
|
||||
->check(CLI::IsMember({"auto", "fix", "stub", "ref"}));
|
||||
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
|
||||
app.add_flag("--log-file", log_file, "Enable logging to file");
|
||||
|
||||
CLI11_PARSE(app, argc, argv);
|
||||
|
||||
// Convert string to FileType enum
|
||||
FileType file_type;
|
||||
try {
|
||||
file_type = stringToFileType(type_str);
|
||||
} catch (const std::invalid_argument &e) {
|
||||
spdlog::error("Invalid file type: {}", type_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Set log level based on verbose flag
|
||||
if (verbose) {
|
||||
spdlog::set_level(spdlog::level::debug);
|
||||
|
@ -830,10 +882,10 @@ int main(int argc, char *argv[]) {
|
|||
db.beginTransaction();
|
||||
|
||||
for (const auto &filepath : files_to_process) {
|
||||
spdlog::info("=== Processing: {} ===", filepath);
|
||||
spdlog::info("=== Processing: {} (type: {}) ===", filepath, type_str);
|
||||
bool success = false;
|
||||
if (mode == "functions") {
|
||||
success = processFile(filepath, db);
|
||||
success = processFile(filepath, db, file_type);
|
||||
} else if (mode == "globals") {
|
||||
success = processGlobalsFile(filepath, db);
|
||||
}
|
||||
|
@ -860,6 +912,7 @@ int main(int argc, char *argv[]) {
|
|||
spdlog::info("=== Summary ===");
|
||||
spdlog::info("Processed {} files successfully", processed_count);
|
||||
spdlog::info("Mode: {}", mode);
|
||||
spdlog::info("File type: {}", type_str);
|
||||
if (mode != "dump-tree") {
|
||||
spdlog::info("Database saved to: {}", db_path);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue