From ec5d40be0cbbf4e3f67a0dfdd9c4b7b7de6fd991 Mon Sep 17 00:00:00 2001 From: Guus Waals <_@guusw.nl> Date: Wed, 28 May 2025 01:16:13 +0800 Subject: [PATCH] WIP --- tooling/tool.cpp | 140 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 29 deletions(-) diff --git a/tooling/tool.cpp b/tooling/tool.cpp index 35255ebe..a6378546 100644 --- a/tooling/tool.cpp +++ b/tooling/tool.cpp @@ -159,7 +159,7 @@ public: const char *create_tables = R"( CREATE TABLE IF NOT EXISTS Functions (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); CREATE TABLE IF NOT EXISTS Imports (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); - CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT, PRIMARY KEY (name, filepath)); + CREATE TABLE IF NOT EXISTS Globals (filepath TEXT, name TEXT, address TEXT); )"; sqlite3_exec(db, create_tables, nullptr, nullptr, nullptr); @@ -439,7 +439,8 @@ std::string getGlobalName(TSNode node, const char *source_code) { strcmp(type, "declarator") == 0 || strcmp(type, "reference_declarator") == 0 || strcmp(type, "pointer_declarator") == 0 || - strcmp(type, "parenthesized_declarator") == 0) { + strcmp(type, "parenthesized_declarator") == 0 || + strcmp(type, "array_declarator") == 0) { std::string name = findIdentifierInDeclarator(child, source_code); if (!name.empty()) { return name; @@ -542,6 +543,74 @@ bool processGlobalsFile(const std::string &filepath, DatabaseManager &db) { return true; } +// Helper function to dump Tree-sitter AST +void dumpTreeSitterAST(TSNode node, const char *source_code, int depth = 0) { + std::string indent(depth * 2, ' '); + const char *type = ts_node_type(node); + + uint32_t start = ts_node_start_byte(node); + uint32_t end = ts_node_end_byte(node); + + // Get the text content for leaf nodes or small nodes + std::string content; + if (end - start < 100) { // Only show content for small nodes + content = extractNodeText(node, source_code); + // Replace newlines with \n for better readability + std::regex newline_regex("\n"); + content = std::regex_replace(content, newline_regex, "\\n"); + // Truncate if still too long + if (content.length() > 50) { + content = content.substr(0, 47) + "..."; + } + } + + if (!content.empty()) { + spdlog::info("{}{}[{}:{}] \"{}\"", indent, type, start, end, content); + } else { + spdlog::info("{}{}[{}:{}]", indent, type, start, end); + } + + // Recursively dump children + uint32_t child_count = ts_node_child_count(node); + for (uint32_t i = 0; i < child_count; i++) { + TSNode child = ts_node_child(node, i); + dumpTreeSitterAST(child, source_code, depth + 1); + } +} + +bool dumpTreeFile(const std::string &filepath) { + std::ifstream file(filepath); + if (!file.is_open()) { + spdlog::error("Could not open file {}", filepath); + return false; + } + + std::string file_content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + + TSParser *parser = ts_parser_new(); + ts_parser_set_language(parser, tree_sitter_cpp()); + + TSTree *tree = ts_parser_parse_string(parser, nullptr, file_content.c_str(), + file_content.length()); + TSNode root_node = ts_tree_root_node(tree); + + if (ts_node_is_null(root_node)) { + spdlog::error("Failed to parse file {}", filepath); + ts_tree_delete(tree); + ts_parser_delete(parser); + return false; + } + + spdlog::info("=== Tree-sitter AST for {} ===", filepath); + dumpTreeSitterAST(root_node, file_content.c_str()); + spdlog::info("=== End of AST dump ==="); + + ts_tree_delete(tree); + ts_parser_delete(parser); + return true; +} + int main(int argc, char *argv[]) { // Initialize spdlog auto console = spdlog::stdout_color_mt("console"); @@ -565,9 +634,9 @@ int main(int argc, char *argv[]) { "File containing list of files to process"); app.add_option("-d,--database", db_path, "SQLite database path") ->default_val("gh.db"); - app.add_option("-m,--mode", mode, "Processing mode: 'functions' or 'globals'") + app.add_option("-m,--mode", mode, "Processing mode: 'functions', 'globals', or 'dump-tree'") ->default_val("functions") - ->check(CLI::IsMember({"functions", "globals"})); + ->check(CLI::IsMember({"functions", "globals", "dump-tree"})); app.add_flag("-v,--verbose", verbose, "Enable verbose logging (debug level)"); app.add_flag("--log-file", log_file, "Enable logging to file"); @@ -610,43 +679,56 @@ int main(int argc, char *argv[]) { } try { - DatabaseManager db(db_path); int processed_count = 0; - const size_t batch_size = 50; - size_t current_batch = 0; - db.beginTransaction(); + // For dump-tree mode, we don't need database operations + if (mode == "dump-tree") { + for (const auto &filepath : files_to_process) { + spdlog::info("=== Processing: {} ===", filepath); + if (dumpTreeFile(filepath)) { + processed_count++; + } + } + } else { + DatabaseManager db(db_path); + const size_t batch_size = 50; + size_t current_batch = 0; - for (const auto &filepath : files_to_process) { - spdlog::info("=== Processing: {} ===", filepath); - bool success = false; - if (mode == "functions") { - success = processFile(filepath, db); - } else if (mode == "globals") { - success = processGlobalsFile(filepath, db); + db.beginTransaction(); + + for (const auto &filepath : files_to_process) { + spdlog::info("=== Processing: {} ===", filepath); + bool success = false; + if (mode == "functions") { + success = processFile(filepath, db); + } else if (mode == "globals") { + success = processGlobalsFile(filepath, db); + } + + if (success) + processed_count++; + + if (++current_batch >= batch_size) { + db.commitTransaction(); + spdlog::info("Committed batch of {} files to database", current_batch); + db.beginTransaction(); + current_batch = 0; + } } - if (success) - processed_count++; - - if (++current_batch >= batch_size) { + if (current_batch > 0) { db.commitTransaction(); - spdlog::info("Committed batch of {} files to database", current_batch); - db.beginTransaction(); - current_batch = 0; + spdlog::info("Committed final batch of {} files to database", + current_batch); } } - if (current_batch > 0) { - db.commitTransaction(); - spdlog::info("Committed final batch of {} files to database", - current_batch); - } - spdlog::info("=== Summary ==="); spdlog::info("Processed {} files successfully", processed_count); spdlog::info("Mode: {}", mode); - spdlog::info("Database saved to: {}", db_path); + if (mode != "dump-tree") { + spdlog::info("Database saved to: {}", db_path); + } } catch (const std::exception &e) { spdlog::error("Database error: {}", e.what());