This commit is contained in:
Guus Waals 2025-05-28 01:16:13 +08:00
parent 99aaebba82
commit ec5d40be0c
1 changed files with 111 additions and 29 deletions

View File

@ -159,7 +159,7 @@ public:
const char *create_tables = R"(
CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath));
CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT);
)";
sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr);
@ -439,7 +439,8 @@ std::string getGlobalName(TSNode node, const char *source_code) {
strcmp(type, "declarator") == 0 ||
strcmp(type, "reference_declarator") == 0 ||
strcmp(type, "pointer_declarator") == 0 ||
strcmp(type, "parenthesized_declarator") == 0) {
strcmp(type, "parenthesized_declarator") == 0 ||
strcmp(type, "array_declarator") == 0) {
std::string name = findIdentifierInDeclarator(child, source_code);
if (!name.empty()) {
return name;
@ -542,6 +543,74 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) {
return true;
}
// Helper function to dump Tree-sitter AST
void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0) {
std::string indent(depth * 2, ' ');
const char *type = ts_node_type(node);
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
// Get the text content for leaf nodes or small nodes
std::string content;
if (end - start < 100) { // Only show content for small nodes
content = extractNodeText(node, source_code);
// Replace newlines with \n for better readability
std::regex newline_regex("\n");
content = std::regex_replace(content, newline_regex, "\\n");
// Truncate if still too long
if (content.length() > 50) {
content = content.substr(0, 47) + "...";
}
}
if (!content.empty()) {
spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content);
} else {
spdlog::info("{}{}[{}:{}]", indent, type, start, end);
}
// Recursively dump children
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
dumpTreeSitterAST(child, source_code, depth + 1);
}
}
bool dumpTreeFile(const std::string &filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
spdlog::error("Could not open file {}", filepath);
return false;
}
std::string file_content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_cpp());
TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(),
file_content.length());
TSNode root_node = ts_tree_root_node(tree);
if (ts_node_is_null(root_node)) {
spdlog::error("Failed to parse file {}", filepath);
ts_tree_delete(tree);
ts_parser_delete(parser);
return false;
}
spdlog::info("=== Tree-sitter AST for {} ===", filepath);
dumpTreeSitterAST(root_node, file_content.c_str());
spdlog::info("=== End of AST dump ===");
ts_tree_delete(tree);
ts_parser_delete(parser);
return true;
}
int main(int argc, char *argv[]) {
// Initialize spdlog
auto console = spdlog::stdout_color_mt("console");
@ -565,9 +634,9 @@ int main(int argc, char *argv[]) {
"File containing list of files to process");
app.add_option("-d,--database", db_path, "SQLite database path")
->default_val("gh.db");
app.add_option("-m,--mode", mode, "Processing mode: 'functions' or 'globals'")
app.add_option("-m,--mode", mode, "Processing mode: 'functions', 'globals', or 'dump-tree'")
->default_val("functions")
->check(CLI::IsMember({"functions", "globals"}));
->check(CLI::IsMember({"functions", "globals", "dump-tree"}));
app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)");
app.add_flag("--log-file", log_file, "Enable logging to file");
@ -610,43 +679,56 @@ int main(int argc, char *argv[]) {
}
try {
DatabaseManager db(db_path);
int processed_count = 0;
const size_t batch_size = 50;
size_t current_batch = 0;
db.beginTransaction();
// For dump-tree mode, we don't need database operations
if (mode == "dump-tree") {
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} ===", filepath);
if (dumpTreeFile(filepath)) {
processed_count++;
}
}
} else {
DatabaseManager db(db_path);
const size_t batch_size = 50;
size_t current_batch = 0;
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} ===", filepath);
bool success = false;
if (mode == "functions") {
success = processFile(filepath, db);
} else if (mode == "globals") {
success = processGlobalsFile(filepath, db);
db.beginTransaction();
for (const auto &filepath : files_to_process) {
spdlog::info("=== Processing: {} ===", filepath);
bool success = false;
if (mode == "functions") {
success = processFile(filepath, db);
} else if (mode == "globals") {
success = processGlobalsFile(filepath, db);
}
if (success)
processed_count++;
if (++current_batch >= batch_size) {
db.commitTransaction();
spdlog::info("Committed batch of {} files to database", current_batch);
db.beginTransaction();
current_batch = 0;
}
}
if (success)
processed_count++;
if (++current_batch >= batch_size) {
if (current_batch > 0) {
db.commitTransaction();
spdlog::info("Committed batch of {} files to database", current_batch);
db.beginTransaction();
current_batch = 0;
spdlog::info("Committed final batch of {} files to database",
current_batch);
}
}
if (current_batch > 0) {
db.commitTransaction();
spdlog::info("Committed final batch of {} files to database",
current_batch);
}
spdlog::info("=== Summary ===");
spdlog::info("Processed {} files successfully", processed_count);
spdlog::info("Mode: {}", mode);
spdlog::info("Database saved to: {}", db_path);
if (mode != "dump-tree") {
spdlog::info("Database saved to: {}", db_path);
}
} catch (const std::exception &e) {
spdlog::error("Database error: {}", e.what());