408 lines
13 KiB
C++
408 lines
13 KiB
C++
#include <iostream>
|
|
#include <string>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <vector>
|
|
#include <regex>
|
|
#include <sqlite3.h>
|
|
#include <filesystem>
|
|
#include <memory>
|
|
#include <tree_sitter/api.h>
|
|
#include <CLI11.hpp>
|
|
|
|
extern "C" TSLanguage *tree_sitter_cpp();
|
|
|
|
struct FunctionInfo {
|
|
std::string name;
|
|
std::string address;
|
|
std::string filepath;
|
|
bool is_import;
|
|
};
|
|
|
|
class PreparedStatements {
|
|
private:
|
|
sqlite3 *db;
|
|
sqlite3_stmt *delete_functions_stmt;
|
|
sqlite3_stmt *delete_imports_stmt;
|
|
sqlite3_stmt *insert_functions_stmt;
|
|
sqlite3_stmt *insert_imports_stmt;
|
|
|
|
void prepareStatement(const char *sql, sqlite3_stmt **stmt,
|
|
const std::string &error_msg) {
|
|
if (sqlite3_prepare_v2(db, sql, -1, stmt, nullptr) != SQLITE_OK) {
|
|
throw std::runtime_error(error_msg + ": " + sqlite3_errmsg(db));
|
|
}
|
|
}
|
|
|
|
public:
|
|
PreparedStatements(sqlite3 *database) : db(database) {
|
|
prepareStatement("DELETE FROM Functions WHERE filepath = ?",
|
|
&delete_functions_stmt,
|
|
"Failed to prepare delete functions statement");
|
|
prepareStatement("DELETE FROM Imports WHERE filepath = ?",
|
|
&delete_imports_stmt,
|
|
"Failed to prepare delete imports statement");
|
|
prepareStatement("INSERT OR REPLACE INTO Functions (filepath, name, "
|
|
"address) VALUES (?, ?, ?)",
|
|
&insert_functions_stmt,
|
|
"Failed to prepare insert functions statement");
|
|
prepareStatement("INSERT OR REPLACE INTO Imports (filepath, name, address) "
|
|
"VALUES (?, ?, ?)",
|
|
&insert_imports_stmt,
|
|
"Failed to prepare insert imports statement");
|
|
}
|
|
|
|
~PreparedStatements() {
|
|
sqlite3_finalize(delete_functions_stmt);
|
|
sqlite3_finalize(delete_imports_stmt);
|
|
sqlite3_finalize(insert_functions_stmt);
|
|
sqlite3_finalize(insert_imports_stmt);
|
|
}
|
|
|
|
void clearEntriesForFile(const std::string &filepath) {
|
|
for (auto stmt : {delete_functions_stmt, delete_imports_stmt}) {
|
|
sqlite3_reset(stmt);
|
|
sqlite3_bind_text(stmt, 1, filepath.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_step(stmt);
|
|
}
|
|
}
|
|
|
|
void insertFunction(const FunctionInfo &func) {
|
|
sqlite3_stmt *stmt =
|
|
func.is_import ? insert_imports_stmt : insert_functions_stmt;
|
|
sqlite3_reset(stmt);
|
|
sqlite3_bind_text(stmt, 1, func.filepath.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_bind_text(stmt, 2, func.name.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_bind_text(stmt, 3, func.address.c_str(), -1, SQLITE_STATIC);
|
|
sqlite3_step(stmt);
|
|
}
|
|
};
|
|
|
|
class DatabaseManager {
|
|
private:
|
|
sqlite3 *db;
|
|
std::unique_ptr<PreparedStatements> prepared_stmts;
|
|
|
|
public:
|
|
DatabaseManager(const std::string &db_path) : db(nullptr) {
|
|
if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
|
|
std::cerr << "Can't open database: " << sqlite3_errmsg(db) << std::endl;
|
|
sqlite3_close(db);
|
|
throw std::runtime_error("Failed to open database");
|
|
}
|
|
|
|
const char *create_tables = R"(
|
|
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
|
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
|
|
)";
|
|
|
|
sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr);
|
|
prepared_stmts = std::make_unique<PreparedStatements>(db);
|
|
}
|
|
|
|
~DatabaseManager() {
|
|
if (db)
|
|
sqlite3_close(db);
|
|
}
|
|
|
|
void clearEntriesForFile(const std::string &filepath) {
|
|
prepared_stmts->clearEntriesForFile(filepath);
|
|
}
|
|
void insertFunction(const FunctionInfo &func) {
|
|
prepared_stmts->insertFunction(func);
|
|
}
|
|
void beginTransaction() {
|
|
sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, nullptr);
|
|
}
|
|
void commitTransaction() {
|
|
sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
|
|
}
|
|
void rollbackTransaction() {
|
|
sqlite3_exec(db, "ROLLBACK", nullptr, nullptr, nullptr);
|
|
}
|
|
};
|
|
|
|
std::string extractAddress(const std::string &comment) {
|
|
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
|
std::smatch match;
|
|
return std::regex_search(comment, match, addr_regex) ? match[1].str() : "";
|
|
}
|
|
|
|
std::string getFunctionName(TSNode node, const char *source_code) {
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
TSNode child = ts_node_child(node, i);
|
|
const char *type = ts_node_type(child);
|
|
|
|
if (strcmp(type, "function_declarator") == 0) {
|
|
uint32_t declarator_children = ts_node_child_count(child);
|
|
for (uint32_t j = 0; j < declarator_children; j++) {
|
|
TSNode declarator_child = ts_node_child(child, j);
|
|
if (strcmp(ts_node_type(declarator_child), "identifier") == 0) {
|
|
uint32_t start = ts_node_start_byte(declarator_child);
|
|
uint32_t end = ts_node_end_byte(declarator_child);
|
|
return std::string(source_code + start, end - start);
|
|
}
|
|
}
|
|
} else if (strcmp(type, "identifier") == 0) {
|
|
uint32_t start = ts_node_start_byte(child);
|
|
uint32_t end = ts_node_end_byte(child);
|
|
return std::string(source_code + start, end - start);
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
std::string getComment(TSNode node, const char *source_code,
|
|
uint32_t source_length, bool search_before) {
|
|
if (search_before) {
|
|
uint32_t start_byte = ts_node_start_byte(node);
|
|
if (start_byte == 0)
|
|
return "";
|
|
|
|
std::string before_text(source_code, start_byte);
|
|
std::regex addr_regex(R"(//\s*([0-9a-fA-F]{8}))");
|
|
std::smatch match;
|
|
|
|
size_t search_pos = before_text.length();
|
|
while (search_pos > 0) {
|
|
size_t comment_pos = before_text.rfind("//", search_pos - 1);
|
|
if (comment_pos == std::string::npos)
|
|
break;
|
|
|
|
size_t line_end = before_text.find('\n', comment_pos);
|
|
if (line_end == std::string::npos)
|
|
line_end = before_text.length();
|
|
|
|
std::string comment_line =
|
|
before_text.substr(comment_pos, line_end - comment_pos);
|
|
|
|
if (std::regex_search(comment_line, match, addr_regex)) {
|
|
size_t newlines_between =
|
|
std::count(before_text.begin() + comment_pos,
|
|
before_text.begin() + start_byte, '\n');
|
|
if (newlines_between <= 20)
|
|
return comment_line;
|
|
}
|
|
search_pos = comment_pos;
|
|
}
|
|
} else {
|
|
uint32_t end_byte = ts_node_end_byte(node);
|
|
std::string remaining(source_code + end_byte, source_length - end_byte);
|
|
|
|
size_t comment_pos = remaining.find("//");
|
|
if (comment_pos != std::string::npos) {
|
|
size_t line_end = remaining.find('\n', comment_pos);
|
|
if (line_end == std::string::npos)
|
|
line_end = remaining.length();
|
|
return remaining.substr(comment_pos, line_end - comment_pos);
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
bool hasFunctionBody(TSNode node) {
|
|
if (strcmp(ts_node_type(node), "function_definition") != 0)
|
|
return false;
|
|
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") ==
|
|
0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
|
|
std::vector<FunctionInfo> &functions) {
|
|
const char *type = ts_node_type(node);
|
|
|
|
if (strcmp(type, "function_definition") == 0 ||
|
|
strcmp(type, "declaration") == 0) {
|
|
std::string func_name = getFunctionName(node, source_code);
|
|
if (!func_name.empty()) {
|
|
std::string address =
|
|
extractAddress(getComment(node, source_code, source_length, false));
|
|
|
|
if (address.empty() && strcmp(type, "function_definition") == 0) {
|
|
address =
|
|
extractAddress(getComment(node, source_code, source_length, true));
|
|
}
|
|
|
|
if (!address.empty()) {
|
|
FunctionInfo func{func_name, address, "",
|
|
strcmp(type, "function_definition") == 0
|
|
? !hasFunctionBody(node)
|
|
: true};
|
|
functions.push_back(func);
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32_t child_count = ts_node_child_count(node);
|
|
for (uint32_t i = 0; i < child_count; i++) {
|
|
findFunctions(ts_node_child(node, i), source_code, source_length,
|
|
functions);
|
|
}
|
|
}
|
|
|
|
std::vector<std::string> readFileList(const std::string &list_file) {
|
|
std::vector<std::string> files;
|
|
std::ifstream file(list_file);
|
|
if (!file.is_open()) {
|
|
std::cerr << "Error: Could not open list file " << list_file << std::endl;
|
|
return files;
|
|
}
|
|
|
|
std::string line;
|
|
while (std::getline(file, line)) {
|
|
if (line.empty() || line[0] == '#')
|
|
continue;
|
|
|
|
if (line.find('*') != std::string::npos) {
|
|
std::cout << "Skipping wildcard pattern: " << line << std::endl;
|
|
continue;
|
|
}
|
|
|
|
if (std::filesystem::exists(line)) {
|
|
files.push_back(line);
|
|
} else {
|
|
std::cout << "Warning: File not found: " << line << std::endl;
|
|
}
|
|
}
|
|
return files;
|
|
}
|
|
|
|
bool processFile(const std::string &filepath, DatabaseManager &db) {
|
|
std::ifstream file(filepath);
|
|
if (!file.is_open()) {
|
|
std::cerr << "Error: Could not open file " << filepath << std::endl;
|
|
return false;
|
|
}
|
|
|
|
std::string file_content((std::istreambuf_iterator<char>(file)),
|
|
std::istreambuf_iterator<char>());
|
|
|
|
TSParser *parser = ts_parser_new();
|
|
ts_parser_set_language(parser, tree_sitter_cpp());
|
|
|
|
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
|
|
file_content.length());
|
|
TSNode root_node = ts_tree_root_node(tree);
|
|
|
|
if (ts_node_is_null(root_node)) {
|
|
std::cerr << "Error: Failed to parse file " << filepath << std::endl;
|
|
ts_tree_delete(tree);
|
|
ts_parser_delete(parser);
|
|
return false;
|
|
}
|
|
|
|
db.clearEntriesForFile(filepath);
|
|
|
|
std::vector<FunctionInfo> functions;
|
|
findFunctions(root_node, file_content.c_str(), file_content.length(),
|
|
functions);
|
|
|
|
for (auto &func : functions) {
|
|
func.filepath = filepath;
|
|
db.insertFunction(func);
|
|
std::cout << (func.is_import ? "Import: " : "Function: ") << func.name
|
|
<< " @ " << func.address << " in " << filepath << std::endl;
|
|
}
|
|
|
|
std::cout << "Processed " << functions.size() << " functions/imports from "
|
|
<< filepath << std::endl;
|
|
|
|
ts_tree_delete(tree);
|
|
ts_parser_delete(parser);
|
|
return true;
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
CLI::App app{
|
|
"C++ Function Parser - Extracts function addresses from C++ files"};
|
|
|
|
std::vector<std::string> input_files;
|
|
std::string list_file;
|
|
std::string db_path = "functions.db";
|
|
|
|
app.add_option("files", input_files,
|
|
"Input C++ files to parse (supports @listfile.txt syntax)");
|
|
app.add_option("-l,--list", list_file,
|
|
"File containing list of files to process");
|
|
app.add_option("-d,--database", db_path, "SQLite database path")
|
|
->default_val("functions.db");
|
|
|
|
CLI11_PARSE(app, argc, argv);
|
|
|
|
std::vector<std::string> files_to_process;
|
|
|
|
if (!list_file.empty()) {
|
|
auto list_files = readFileList(list_file);
|
|
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
|
list_files.end());
|
|
}
|
|
|
|
for (const auto &input : input_files) {
|
|
if (input.starts_with("@")) {
|
|
auto list_files = readFileList(input.substr(1));
|
|
files_to_process.insert(files_to_process.end(), list_files.begin(),
|
|
list_files.end());
|
|
} else if (std::filesystem::exists(input)) {
|
|
files_to_process.push_back(input);
|
|
} else {
|
|
std::cout << "Warning: File not found: " << input << std::endl;
|
|
}
|
|
}
|
|
|
|
if (files_to_process.empty()) {
|
|
std::cerr << "No files to process. Use --help for usage information."
|
|
<< std::endl;
|
|
return 1;
|
|
}
|
|
|
|
try {
|
|
DatabaseManager db(db_path);
|
|
int processed_count = 0;
|
|
const size_t batch_size = 50;
|
|
size_t current_batch = 0;
|
|
|
|
db.beginTransaction();
|
|
|
|
for (const auto &filepath : files_to_process) {
|
|
std::cout << "\n=== Processing: " << filepath << " ===" << std::endl;
|
|
if (processFile(filepath, db))
|
|
processed_count++;
|
|
|
|
if (++current_batch >= batch_size) {
|
|
db.commitTransaction();
|
|
std::cout << "Committed batch of " << current_batch
|
|
<< " files to database" << std::endl;
|
|
db.beginTransaction();
|
|
current_batch = 0;
|
|
}
|
|
}
|
|
|
|
if (current_batch > 0) {
|
|
db.commitTransaction();
|
|
std::cout << "Committed final batch of " << current_batch
|
|
<< " files to database" << std::endl;
|
|
}
|
|
|
|
std::cout << "\n=== Summary ===" << std::endl;
|
|
std::cout << "Processed " << processed_count << " files successfully"
|
|
<< std::endl;
|
|
std::cout << "Database saved to: " << db_path << std::endl;
|
|
|
|
} catch (const std::exception &e) {
|
|
std::cerr << "Database error: " << e.what() << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|