359 lines
12 KiB
C++
359 lines
12 KiB
C++
#include <iostream>
|
|
#include <string>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <vector>
|
|
#include <regex>
|
|
#include <sqlite3.h>
|
|
#include "tree_sitter/api.h"
|
|
extern "C" TSLanguage *tree_sitter_cpp();
|
|
|
|
struct FunctionInfo {
|
|
std::string name;
|
|
std::string address;
|
|
std::string filepath;
|
|
bool is_import; // true for extern declarations, false for definitions
|
|
};
|
|
|
|
class DatabaseManager {
|
|
private:
|
|
sqlite3* db;
|
|
|
|
public:
|
|
DatabaseManager(const std::string& db_path) : db(nullptr) {
|
|
int rc = sqlite3_open(db_path.c_str(), &db);
|
|
if (rc) {
|
|
std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl;
|
|
sqlite3_close(db);
|
|
db = nullptr;
|
|
throw std::runtime_error("Failed to open database");
|
|
}
|
|
|
|
// Create tables if they don't exist
|
|
const char* create_functions_table = R"(
|
|
CREATE TABLE IF NOT EXISTS Functions (
|
|
filepath TEXT,
|
|
name TEXT,
|
|
address TEXT,
|
|
PRIMARY KEY (name, filepath)
|
|
)
|
|
)";
|
|
|
|
const char* create_imports_table = R"(
|
|
CREATE TABLE IF NOT EXISTS Imports (
|
|
filepath TEXT,
|
|
name TEXT,
|
|
address TEXT,
|
|
PRIMARY KEY (name, filepath)
|
|
)
|
|
)";
|
|
|
|
sqlite3_exec(db, create_functions_table, nullptr, nullptr, nullptr);
|
|
sqlite3_exec(db, create_imports_table, nullptr, nullptr, nullptr);
|
|
}
|
|
|
|
~DatabaseManager() {
|
|
if (db) {
|
|
sqlite3_close(db);
|
|
}
|
|
}
|
|
|
|
void clearEntriesForFile(const std::string& filepath) {
|
|
const char* delete_functions = "DELETE FROM Functions WHERE filepath = ?";
|
|
const char* delete_imports = "DELETE FROM Imports WHERE filepath = ?";
|
|
|
|
sqlite3_stmt* stmt;
|
|
|
|
// Clear functions
|
|
sqlite3_prepare_v2(db, delete_functions, -1, &stmt, nullptr);
|
|
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_step(stmt);
|
|
sqlite3_finalize(stmt);
|
|
|
|
// Clear imports
|
|
sqlite3_prepare_v2(db, delete_imports, -1, &stmt, nullptr);
|
|
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_step(stmt);
|
|
sqlite3_finalize(stmt);
|
|
}
|
|
|
|
void insertFunction(const FunctionInfo& func) {
|
|
const char* table = func.is_import ? "Imports" : "Functions";
|
|
std::string sql = "INSERT OR REPLACE INTO " + std::string(table) +
|
|
" (filepath, name, address) VALUES (?, ?, ?)";
|
|
|
|
sqlite3_stmt* stmt;
|
|
sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, nullptr);
|
|
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_step(stmt);
|
|
sqlite3_finalize(stmt);
|
|
}
|
|
};
|
|
|
|
std::string extractAddress(const std::string& comment) {
|
|
// Look for hex addresses in comments like "// 0043e4f0" or "// 0043e4f0 // FUN_0043e4f0"
|
|
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
|
std::smatch match;
|
|
|
|
if (std::regex_search(comment, match, addr_regex)) {
|
|
return match[1].str();
|
|
}
|
|
return "";
|
|
}
|
|
|
|
std::string getFunctionName(TSNode node, const char* source_code) {
|
|
// For function declarations/definitions, find the function name
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
TSNode child = ts_node_child(node, i);
|
|
const char* type = ts_node_type(child);
|
|
|
|
if (strcmp(type, "function_declarator") == 0) {
|
|
// Find the identifier within the function_declarator
|
|
uint32_t declarator_children = ts_node_child_count(child);
|
|
for (uint32_t j = 0; j < declarator_children; j++) {
|
|
TSNode declarator_child = ts_node_child(child, j);
|
|
if (strcmp(ts_node_type(declarator_child), "identifier") == 0) {
|
|
uint32_t start = ts_node_start_byte(declarator_child);
|
|
uint32_t end = ts_node_end_byte(declarator_child);
|
|
return std::string(source_code + start, end - start);
|
|
}
|
|
}
|
|
}
|
|
else if (strcmp(type, "identifier") == 0) {
|
|
// Direct identifier (simpler cases)
|
|
uint32_t start = ts_node_start_byte(child);
|
|
uint32_t end = ts_node_end_byte(child);
|
|
return std::string(source_code + start, end - start);
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
std::string getCommentBeforeNode(TSNode node, const char* source_code) {
|
|
uint32_t start_byte = ts_node_start_byte(node);
|
|
|
|
// Look backwards from the start of the node to find comments
|
|
if (start_byte == 0) return "";
|
|
|
|
// Get text before the node
|
|
std::string before_text(source_code, start_byte);
|
|
|
|
// Find all "//" comments before this node and look for addresses
|
|
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
|
std::smatch match;
|
|
std::string found_address;
|
|
|
|
// Search backwards through all comment lines
|
|
size_t search_pos = before_text.length();
|
|
while (search_pos > 0) {
|
|
size_t comment_pos = before_text.rfind("//", search_pos - 1);
|
|
if (comment_pos == std::string::npos) {
|
|
break;
|
|
}
|
|
|
|
// Find the end of this comment line
|
|
size_t line_end = before_text.find('\n', comment_pos);
|
|
if (line_end == std::string::npos) {
|
|
line_end = before_text.length();
|
|
}
|
|
|
|
// Extract this comment line
|
|
std::string comment_line = before_text.substr(comment_pos, line_end - comment_pos);
|
|
|
|
// Check if this comment contains an address
|
|
if (std::regex_search(comment_line, match, addr_regex)) {
|
|
// Check if this comment is reasonably close to the function (within 20 lines)
|
|
size_t newlines_between = 0;
|
|
for (size_t i = comment_pos; i < start_byte; i++) {
|
|
if (before_text[i] == '\n') newlines_between++;
|
|
}
|
|
|
|
if (newlines_between <= 20) {
|
|
return comment_line;
|
|
}
|
|
}
|
|
|
|
search_pos = comment_pos;
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
std::string getCommentAfterNode(TSNode node, const char* source_code, uint32_t source_length) {
|
|
uint32_t end_byte = ts_node_end_byte(node);
|
|
|
|
// Look for comment on the same line or next line
|
|
std::string remaining(source_code + end_byte, source_length - end_byte);
|
|
|
|
// Find the first comment marker "//"
|
|
size_t comment_pos = remaining.find("//");
|
|
if (comment_pos != std::string::npos) {
|
|
// Extract until end of line
|
|
size_t line_end = remaining.find('\n', comment_pos);
|
|
if (line_end == std::string::npos) {
|
|
line_end = remaining.length();
|
|
}
|
|
return remaining.substr(comment_pos, line_end - comment_pos);
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
bool isExternDeclaration(TSNode node, const char* source_code) {
|
|
// Check if this is inside an extern "C" block or has extern storage class
|
|
TSNode current = ts_node_parent(node);
|
|
while (!ts_node_is_null(current)) {
|
|
const char* type = ts_node_type(current);
|
|
if (strcmp(type, "linkage_specification") == 0) {
|
|
return true;
|
|
}
|
|
current = ts_node_parent(current);
|
|
}
|
|
|
|
// Also check for explicit extern keyword
|
|
const char* node_type = ts_node_type(node);
|
|
if (strcmp(node_type, "declaration") == 0) {
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
TSNode child = ts_node_child(node, i);
|
|
if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) {
|
|
uint32_t start = ts_node_start_byte(child);
|
|
uint32_t end = ts_node_end_byte(child);
|
|
std::string text(source_code + start, end - start);
|
|
if (text == "extern") {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool hasFunctionBody(TSNode node) {
|
|
// Check if this function definition has a compound statement (body)
|
|
if (strcmp(ts_node_type(node), "function_definition") == 0) {
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
TSNode child = ts_node_child(node, i);
|
|
const char* child_type = ts_node_type(child);
|
|
if (strcmp(child_type, "compound_statement") == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void findFunctions(TSNode node, const char* source_code, uint32_t source_length,
|
|
std::vector<FunctionInfo>& functions) {
|
|
const char* type = ts_node_type(node);
|
|
|
|
// Check for function declarations and definitions
|
|
if (strcmp(type, "function_definition") == 0 ||
|
|
strcmp(type, "declaration") == 0) {
|
|
|
|
std::string func_name = getFunctionName(node, source_code);
|
|
if (!func_name.empty()) {
|
|
std::string comment = getCommentAfterNode(node, source_code, source_length);
|
|
std::string address = extractAddress(comment);
|
|
|
|
// If no address found after, try looking before (for function definitions)
|
|
if (address.empty() && strcmp(type, "function_definition") == 0) {
|
|
comment = getCommentBeforeNode(node, source_code);
|
|
address = extractAddress(comment);
|
|
}
|
|
|
|
if (!address.empty()) {
|
|
FunctionInfo func;
|
|
func.name = func_name;
|
|
func.address = address;
|
|
|
|
// Determine if it's an import based on whether it has a body
|
|
// Function definitions with bodies are actual functions
|
|
// Declarations without bodies are imports
|
|
if (strcmp(type, "function_definition") == 0) {
|
|
func.is_import = !hasFunctionBody(node);
|
|
} else {
|
|
func.is_import = true; // Pure declarations are always imports
|
|
}
|
|
|
|
functions.push_back(func);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Recursively check children
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
TSNode child = ts_node_child(node, i);
|
|
findFunctions(child, source_code, source_length, functions);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
if (argc != 2) {
|
|
std::cerr << "Usage: " << argv[0] << " <input_file>" << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
std::string filepath = argv[1];
|
|
std::ifstream file(filepath);
|
|
if (!file.is_open()) {
|
|
std::cerr << "Error: Could not open file " << filepath << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
std::stringstream buffer;
|
|
buffer << file.rdbuf();
|
|
std::string file_content = buffer.str();
|
|
const char *source_code = file_content.c_str();
|
|
|
|
TSParser *parser = ts_parser_new();
|
|
ts_parser_set_language(parser, tree_sitter_cpp());
|
|
|
|
TSTree *tree = ts_parser_parse_string(parser, nullptr, source_code, file_content.length());
|
|
TSNode root_node = ts_tree_root_node(tree);
|
|
|
|
if (ts_node_is_null(root_node)) {
|
|
std::cerr << "Error: Failed to parse file" << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
try {
|
|
DatabaseManager db("functions.db");
|
|
|
|
// Clear existing entries for this file
|
|
db.clearEntriesForFile(filepath);
|
|
|
|
// Find all functions with addresses
|
|
std::vector<FunctionInfo> functions;
|
|
findFunctions(root_node, source_code, file_content.length(), functions);
|
|
|
|
// Insert into database
|
|
for (auto& func : functions) {
|
|
func.filepath = filepath;
|
|
db.insertFunction(func);
|
|
|
|
std::cout << (func.is_import ? "Import: " : "Function: ")
|
|
<< func.name << " @ " << func.address << std::endl;
|
|
}
|
|
|
|
std::cout << "Processed " << functions.size() << " functions/imports from " << filepath << std::endl;
|
|
|
|
} catch (const std::exception& e) {
|
|
std::cerr << "Database error: " << e.what() << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
ts_tree_delete(tree);
|
|
ts_parser_delete(parser);
|
|
|
|
return 0;
|
|
}
|