reman3/tooling/parser.cpp

297 lines
9.2 KiB
C++

#include "tool.hpp"
#include <cstring>
#include <stdexcept>
#include <spdlog/spdlog.h>
// Global address regex pattern
const std::regex ADDRESS_REGEX(R"(//\s*([0-9a-fA-F]{8}))");
// Helper function to convert string to FileType
FileType stringToFileType(const std::string &type_str) {
if (type_str == "auto")
return FileType::Auto;
if (type_str == "fix")
return FileType::Fix;
if (type_str == "stub")
return FileType::Stub;
if (type_str == "ref")
return FileType::Ref;
throw std::invalid_argument("Invalid file type: " + type_str);
}
// Helper function to convert FileType to string
std::string fileTypeToString(FileType type) {
switch (type) {
case FileType::Auto:
return "auto";
case FileType::Fix:
return "fix";
case FileType::Stub:
return "stub";
case FileType::Ref:
return "ref";
default:
throw std::logic_error("Invalid file type: " + std::to_string((int)type));
}
}
// Helper function to check if a comment contains an address
bool hasAddressPattern(const std::string &comment) {
return std::regex_search(comment, ADDRESS_REGEX);
}
std::string extractAddress(const std::string &comment) {
std::smatch match;
return std::regex_search(comment, match, ADDRESS_REGEX) ? match[1].str() : "";
}
// Helper function to extract text from a TSNode
std::string extractNodeText(TSNode node, const char *source_code) {
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
return std::string(source_code + start, end - start);
}
// Helper function to find first identifier in a node
std::string findIdentifierInNode(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
// Helper function to recursively find identifier in any declarator
std::string findIdentifierInDeclarator(TSNode node, const char *source_code) {
const char *type = ts_node_type(node);
// If this is an identifier, return it
if (strcmp(type, "identifier") == 0) {
return extractNodeText(node, source_code);
}
// Recursively search all children
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
std::string result = findIdentifierInDeclarator(child, source_code);
if (!result.empty()) {
return result;
}
}
return "";
}
std::string getFunctionName(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
const char *type = ts_node_type(child);
if (strcmp(type, "function_declarator") == 0) {
std::string name = findIdentifierInNode(child, source_code);
if (!name.empty())
return name;
} else if (strcmp(type, "identifier") == 0) {
return extractNodeText(child, source_code);
} else if (strcmp(type, "pointer_declarator") == 0) {
std::string name = getFunctionName(child, source_code);
if (!name.empty())
return name;
}
}
return "";
}
std::string getGlobalName(TSNode node, const char *source_code) {
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
const char *type = ts_node_type(child);
// Look for any kind of declarator and recursively search for identifier
if (strcmp(type, "init_declarator") == 0 ||
strcmp(type, "declarator") == 0 ||
strcmp(type, "reference_declarator") == 0 ||
strcmp(type, "pointer_declarator") == 0 ||
strcmp(type, "parenthesized_declarator") == 0 ||
strcmp(type, "array_declarator") == 0) {
std::string name = findIdentifierInDeclarator(child, source_code);
if (!name.empty()) {
return name;
}
}
// Direct identifier child
else if (strcmp(type, "identifier") == 0) {
return extractNodeText(child, source_code);
}
}
return "";
}
std::string getComment(TSNode node, const char *source_code,
uint32_t source_length, bool search_before) {
TSNode current = node;
if (search_before) {
// Look for comments before the current node
while (!ts_node_is_null(current)) {
TSNode prev_sibling = ts_node_prev_sibling(current);
while (!ts_node_is_null(prev_sibling)) {
const char *type = ts_node_type(prev_sibling);
if (strcmp(type, "comment") == 0) {
std::string comment_text = extractNodeText(prev_sibling, source_code);
// Check if it contains an address pattern
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
// Skip whitespace and continue looking
else if (strcmp(type, "ERROR") != 0) {
// If we hit non-comment, non-whitespace content, stop searching
break;
}
prev_sibling = ts_node_prev_sibling(prev_sibling);
}
// Move up to parent and continue searching
current = ts_node_parent(current);
}
} else {
// Look for comments after the current node
TSNode next_sibling = ts_node_next_sibling(node);
while (!ts_node_is_null(next_sibling)) {
const char *type = ts_node_type(next_sibling);
if (strcmp(type, "comment") == 0) {
std::string comment_text = extractNodeText(next_sibling, source_code);
// Check if it contains an address pattern
if (hasAddressPattern(comment_text)) {
return comment_text;
}
}
// Skip whitespace and continue looking
else if (strcmp(type, "ERROR") != 0) {
// If we hit non-comment, non-whitespace content, stop searching
break;
}
next_sibling = ts_node_next_sibling(next_sibling);
}
}
return "";
}
bool hasFunctionBody(TSNode node) {
if (strcmp(ts_node_type(node), "function_definition") != 0)
return false;
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
if (strcmp(ts_node_type(ts_node_child(node, i)), "compound_statement") ==
0) {
return true;
}
}
return false;
}
void findFunctions(TSNode node, const char *source_code, uint32_t source_length,
std::vector<FunctionInfo> &functions, FileType file_type) {
const char *type = ts_node_type(node);
if (strcmp(type, "function_definition") == 0 ||
strcmp(type, "declaration") == 0) {
std::string func_name = getFunctionName(node, source_code);
if (!func_name.empty()) {
std::string address =
extractAddress(getComment(node, source_code, source_length, false));
if (address.empty() && strcmp(type, "function_definition") == 0) {
address =
extractAddress(getComment(node, source_code, source_length, true));
}
if (!address.empty()) {
FunctionInfo func{func_name, address, "",
strcmp(type, "function_definition") == 0
? !hasFunctionBody(node)
: true,
file_type}; // Add file_type parameter
functions.push_back(func);
}
// We'll never nest function declarations
return;
} else {
spdlog::error("Failed to get function name for {}",
extractNodeText(node, source_code));
}
}
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findFunctions(ts_node_child(node, i), source_code, source_length, functions,
file_type);
}
}
void findGlobals(TSNode node, const char *source_code, uint32_t source_length,
std::vector<GlobalInfo> &globals) {
const char *type = ts_node_type(node);
// Look for extern declarations
if (strcmp(type, "declaration") == 0) {
// Check if this is an extern declaration
uint32_t child_count = ts_node_child_count(node);
bool is_extern = false;
for (uint32_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "storage_class_specifier") == 0) {
std::string storage_class = extractNodeText(child, source_code);
if (storage_class == "extern") {
is_extern = true;
break;
}
}
}
if (is_extern) {
std::string global_name = getGlobalName(node, source_code);
if (!global_name.empty()) {
// Look for address comment after the declaration
std::string address =
extractAddress(getComment(node, source_code, source_length, false));
if (!address.empty()) {
GlobalInfo global{global_name, address, ""};
globals.push_back(global);
}
} else {
std::string src = extractNodeText(node, source_code);
spdlog::error("Failed to get global name for {}", src);
}
return;
}
}
// Recursively search child nodes
uint32_t child_count = ts_node_child_count(node);
for (uint32_t i = 0; i < child_count; i++) {
findGlobals(ts_node_child(node, i), source_code, source_length, globals);
}
}