Maybe add tests for cparser

This commit is contained in:
Guus Waals 2024-10-07 22:08:33 +08:00
parent 2da111a348
commit 8f0e8f68bb
10 changed files with 323 additions and 38 deletions

View File

@ -7,8 +7,6 @@ import ghidra.program.model.data.DataType;
import ghidra.program.model.data.StandAloneDataTypeManager; import ghidra.program.model.data.StandAloneDataTypeManager;
import re3lib.FunctionDatabase; import re3lib.FunctionDatabase;
import re3lib.RecompileConfig; import re3lib.RecompileConfig;
import re3lib.CParser;
import re3lib.CTokenizer;
import java.io.File; import java.io.File;
import java.io.BufferedReader; import java.io.BufferedReader;
@ -21,6 +19,9 @@ import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import cparser.Parser;
import cparser.Tokenizer;
public class RebuildFunctionDatabase extends GhidraScript { public class RebuildFunctionDatabase extends GhidraScript {
// Will rebuild all functions // Will rebuild all functions
public boolean rebuildAllGlobals = true; public boolean rebuildAllGlobals = true;
@ -125,8 +126,8 @@ public class RebuildFunctionDatabase extends GhidraScript {
println("Scanning " + file); println("Scanning " + file);
String text = new String(Files.readAllBytes(file.toPath())); String text = new String(Files.readAllBytes(file.toPath()));
CTokenizer.TokenSet tokens = new CTokenizer(text).parse(); Tokenizer.TokenSet tokens = new Tokenizer(text).parse();
CParser parser = new CParser(tokens); Parser parser = new Parser(tokens);
parser.parse(); parser.parse();
// for (CTokenizer.Token token : tokens.getTokens()) { // for (CTokenizer.Token token : tokens.getTokens()) {
@ -134,13 +135,13 @@ public class RebuildFunctionDatabase extends GhidraScript {
// println("Line " + line + ": " + token.ofs + " " + token.len + " " + token.type + " - " // println("Line " + line + ": " + token.ofs + " " + token.len + " " + token.type + " - "
// + tokens.getTextNoNewlines(token)); // + tokens.getTextNoNewlines(token));
// } // }
for (CParser.Function function : parser.getFunctions()) { for (Parser.Function function : parser.getFunctions()) {
println("Function: " + function.name + " " + function.startOffset + " " + function.endOffset); println("Function: " + function.name + " " + function.startOffset + " " + function.endOffset);
} }
for (CParser.FunctionCall functionCall : parser.getFunctionCalls()) { for (Parser.FunctionCall functionCall : parser.getFunctionCalls()) {
println("FunctionCall: " + functionCall.name + " " + functionCall.startOffset + " " + functionCall.endOffset); println("FunctionCall: " + functionCall.name + " " + functionCall.startOffset + " " + functionCall.endOffset);
} }
for (CParser.Variable variable : parser.getVariables()) { for (Parser.Variable variable : parser.getVariables()) {
println("Variable: " + variable.name + " " + variable.startOffset + " " + variable.endOffset); println("Variable: " + variable.name + " " + variable.startOffset + " " + variable.endOffset);
} }
} }

5
scripts/cparser/Log.java Normal file
View File

@ -0,0 +1,5 @@
package cparser;
public interface Log {
public void log(String msg);
}

View File

@ -1,37 +1,45 @@
package re3lib; package cparser;
import java.util.*; import java.util.*;
import re3lib.CTokenizer.Token; import cparser.Tokenizer.Token;
import cparser.Log;
public class CParser { public class Parser {
private CTokenizer.TokenSet tokenSet; private Tokenizer.TokenSet tokenSet;
private List<Variable> variables; private List<Variable> variables;
private List<Function> functions; private List<Function> functions;
private List<FunctionCall> functionCalls; private List<FunctionCall> functionCalls;
private Log log;
public CParser(CTokenizer.TokenSet tokenSet) { public Parser(Tokenizer.TokenSet tokenSet, Log log) {
this.tokenSet = tokenSet; this.tokenSet = tokenSet;
this.variables = new ArrayList<>(); this.variables = new ArrayList<>();
this.functions = new ArrayList<>(); this.functions = new ArrayList<>();
this.functionCalls = new ArrayList<>(); this.functionCalls = new ArrayList<>();
} }
void log(String msg) {
if (log != null) {
log.log(msg);
}
}
int index = 0; int index = 0;
public void parse() { public void parse() {
CTokenizer.Token[] tokens = tokenSet.getTokens(); Tokenizer.Token[] tokens = tokenSet.getTokens();
for (index = 0; index < tokens.length; index++) { for (index = 0; index < tokens.length; index++) {
CTokenizer.Token token = tokens[index]; Tokenizer.Token token = tokens[index];
if (token.type == CTokenizer.TokenType.BLOCK_COMMENT || token.type == CTokenizer.TokenType.COMMENT) { if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) {
continue; continue;
} else if (token.type == CTokenizer.TokenType.HASH) { } else if (token.type == Tokenizer.TokenType.HASH) {
index = parsePreprocessorExpression(); index = parsePreprocessorExpression();
} else if (tokens[index].type == CTokenizer.TokenType.IDENTIFIER) { } else if (tokens[index].type == Tokenizer.TokenType.IDENTIFIER) {
if (index + 1 < tokens.length && tokens[index + 1].type == CTokenizer.TokenType.L_PAREN) { if (index + 1 < tokens.length && tokens[index + 1].type == Tokenizer.TokenType.L_PAREN) {
// Function call or declaration/definition // Function call or declaration/definition
if (index > 0 && (tokens[index - 1].type == CTokenizer.TokenType.IDENTIFIER || if (index > 0 && (tokens[index - 1].type == Tokenizer.TokenType.IDENTIFIER ||
tokens[index - 1].type == CTokenizer.TokenType.OTHER)) { tokens[index - 1].type == Tokenizer.TokenType.OTHER)) {
// Function declaration or definition // Function declaration or definition
index = parseFunctionDeclaration(); index = parseFunctionDeclaration();
} else { } else {
@ -49,7 +57,7 @@ public class CParser {
// Try to parse prep expression // Try to parse prep expression
private int parsePreprocessorExpression() { private int parsePreprocessorExpression() {
int index = this.index; int index = this.index;
if (tokenSet.tokens[index].type == CTokenizer.TokenType.HASH) { if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) {
int startLine = tokenSet.getLine(index); int startLine = tokenSet.getLine(index);
while (index < tokenSet.tokens.length) { while (index < tokenSet.tokens.length) {
if (tokenSet.getLine(index) > startLine) { if (tokenSet.getLine(index) > startLine) {
@ -65,7 +73,7 @@ public class CParser {
// Try to parse function declaration and return the ending token index // Try to parse function declaration and return the ending token index
private int parseFunctionDeclaration() { private int parseFunctionDeclaration() {
CTokenizer.Token[] tokens = tokenSet.getTokens(); Tokenizer.Token[] tokens = tokenSet.getTokens();
String name = tokenSet.getTextNoNewlines(tokens[index]); String name = tokenSet.getTextNoNewlines(tokens[index]);
int endIndex = findClosingParenthesis(index + 1); int endIndex = findClosingParenthesis(index + 1);
@ -73,7 +81,7 @@ public class CParser {
return index; return index;
boolean isDefinition = false; boolean isDefinition = false;
if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == CTokenizer.TokenType.L_BRACE) { if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == Tokenizer.TokenType.L_BRACE) {
isDefinition = true; isDefinition = true;
endIndex = findClosingBrace(endIndex + 1); endIndex = findClosingBrace(endIndex + 1);
} }
@ -89,7 +97,7 @@ public class CParser {
// Try to parse function call and return the ending token index // Try to parse function call and return the ending token index
private int parseFunctionCall() { private int parseFunctionCall() {
CTokenizer.Token[] tokens = tokenSet.getTokens(); Tokenizer.Token[] tokens = tokenSet.getTokens();
String name = tokenSet.getTextNoNewlines(tokens[index]); String name = tokenSet.getTextNoNewlines(tokens[index]);
int endIndex = findClosingParenthesis(index + 1); int endIndex = findClosingParenthesis(index + 1);
if (endIndex == -1) if (endIndex == -1)
@ -103,7 +111,7 @@ public class CParser {
// Try to parse variable reference and add it to the list // Try to parse variable reference and add it to the list
private int parseVariableReference() { private int parseVariableReference() {
CTokenizer.Token token = tokenSet.getTokens()[index]; Tokenizer.Token token = tokenSet.getTokens()[index];
String name = tokenSet.getTextNoNewlines(token); String name = tokenSet.getTextNoNewlines(token);
Variable variable = new Variable(name, token.ofs, token.ofs + token.len); Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
variables.add(variable); variables.add(variable);
@ -111,12 +119,12 @@ public class CParser {
} }
private int findClosingParenthesis(int startIndex) { private int findClosingParenthesis(int startIndex) {
CTokenizer.Token[] tokens = tokenSet.getTokens(); Tokenizer.Token[] tokens = tokenSet.getTokens();
int parenCount = 1; int parenCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) { for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == CTokenizer.TokenType.L_PAREN) { if (tokens[i].type == Tokenizer.TokenType.L_PAREN) {
parenCount++; parenCount++;
} else if (tokens[i].type == CTokenizer.TokenType.R_PAREN) { } else if (tokens[i].type == Tokenizer.TokenType.R_PAREN) {
parenCount--; parenCount--;
if (parenCount == 0) { if (parenCount == 0) {
return i; return i;
@ -127,12 +135,12 @@ public class CParser {
} }
private int findClosingBrace(int startIndex) { private int findClosingBrace(int startIndex) {
CTokenizer.Token[] tokens = tokenSet.getTokens(); Tokenizer.Token[] tokens = tokenSet.getTokens();
int braceCount = 1; int braceCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) { for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == CTokenizer.TokenType.L_BRACE) { if (tokens[i].type == Tokenizer.TokenType.L_BRACE) {
braceCount++; braceCount++;
} else if (tokens[i].type == CTokenizer.TokenType.R_BRACE) { } else if (tokens[i].type == Tokenizer.TokenType.R_BRACE) {
braceCount--; braceCount--;
if (braceCount == 0) { if (braceCount == 0) {
return i; return i;

View File

@ -1,13 +1,11 @@
package re3lib; package cparser;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import ghidra.app.script.GhidraScript; public class Tokenizer {
public class CTokenizer {
public enum TokenType { public enum TokenType {
UNDEFINED, UNDEFINED,
HASH, HASH,
@ -61,17 +59,23 @@ public class CTokenizer {
private final String text; private final String text;
private TreeMap<Integer, Integer> lineNumberTable; private TreeMap<Integer, Integer> lineNumberTable;
public GhidraScript log; private Log log;
public CTokenizer(String text) { public Tokenizer(String text) {
this.text = text; this.text = text;
} }
public CTokenizer(String text, GhidraScript script) { public Tokenizer(String text, Log log) {
this.text = text; this.text = text;
this.log = log; this.log = log;
} }
void log(String msg) {
if (log != null) {
log.log(msg);
}
}
String getText(Token token) { String getText(Token token) {
return getText(token.ofs, token.len); return getText(token.ofs, token.len);
} }

View File

@ -0,0 +1,126 @@
package cparser.tests;
import static org.junit.Assert.*;
import org.junit.Before;
import org.junit.Test;
import java.lang.String;
import cparser.Parser;
import cparser.Tokenizer;
import cparser.Log;
import java.util.List;
public class ParserTests {
private Parser parser;
private Tokenizer.TokenSet tokenSet;
private Log mockLog;
@Before
public void setUp() {
mockLog = new Log() {
@Override
public void log(String msg) {
// Do nothing for tests
}
};
}
@Test
public void testParseVariableReference() {
String code = "int x = 5;";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser.parse();
List<Parser.Variable> variables = parser.getVariables();
assertEquals(1, variables.size());
assertEquals("x", variables.get(0).name);
}
@Test
public void testParseFunctionDeclaration() {
String code = "void foo(int a, int b);";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser.parse();
List<Parser.Function> functions = parser.getFunctions();
assertEquals(1, functions.size());
assertEquals("foo", functions.get(0).name);
assertFalse(functions.get(0).isDefinition);
}
@Test
public void testParseFunctionDefinition() {
String code = "int bar(int x) { return x + 1; }";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser.parse();
List<Parser.Function> functions = parser.getFunctions();
assertEquals(1, functions.size());
assertEquals("bar", functions.get(0).name);
assertTrue(functions.get(0).isDefinition);
}
@Test
public void testParseFunctionCall() {
String code = "result = calculate(5, 10);";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser.parse();
List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
assertEquals(1, functionCalls.size());
assertEquals("calculate", functionCalls.get(0).name);
}
@Test
public void testParsePreprocessorDirective() {
String code = "#include <stdio.h>\nint main() { return 0; }";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser.parse();
List<Parser.Function> functions = parser.getFunctions();
assertEquals(1, functions.size());
assertEquals("main", functions.get(0).name);
}
@Test
public void testParseComplexCode() {
String code =
"#include <stdio.h>\n" +
"int globalVar = 10;\n" +
"void helper(int x);\n" +
"int main() {\n" +
" int localVar = 5;\n" +
" helper(localVar);\n" +
" return 0;\n" +
"}\n" +
"void helper(int x) {\n" +
" printf(\"%d\", x);\n" +
"}";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser.parse();
List<Parser.Variable> variables = parser.getVariables();
List<Parser.Function> functions = parser.getFunctions();
List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
assertEquals(2, variables.size());
assertEquals(2, functions.size());
assertEquals(2, functionCalls.size());
assertTrue(variables.stream().anyMatch(v -> v.name.equals("globalVar")));
assertTrue(variables.stream().anyMatch(v -> v.name.equals("localVar")));
assertTrue(functions.stream().anyMatch(f -> f.name.equals("main")));
assertTrue(functions.stream().anyMatch(f -> f.name.equals("helper")));
assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("helper")));
assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("printf")));
}
}

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>cparser-tests</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

Binary file not shown.

View File

@ -0,0 +1,57 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.yourname.scripts</groupId>
<artifactId>cparser-tests</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- JUnit for testing -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<!--
Configure Maven to recognize the main source directory outside the default
src/main/java by setting the sourceDirectory to ../../cparser relative to pom.xml.
Similarly, set the testSourceDirectory to the current directory where ParserTests.java resides.
-->
<sourceDirectory>../../cparser</sourceDirectory>
<testSourceDirectory>.</testSourceDirectory>
<plugins>
<!-- Compiler Plugin to specify Java version -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<!-- Surefire Plugin to run JUnit tests -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
<configuration>
<includes>
<include>**/ParserTests.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,57 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.yourname.scripts</groupId>
<artifactId>cparser-tests</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- JUnit for testing -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<!--
Configure Maven to recognize the main source directory outside the default
src/main/java by setting the sourceDirectory to ../../cparser relative to pom.xml.
Similarly, set the testSourceDirectory to the current directory where ParserTests.java resides.
-->
<sourceDirectory>../../cparser</sourceDirectory>
<testSourceDirectory>.</testSourceDirectory>
<plugins>
<!-- Compiler Plugin to specify Java version -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<!-- Surefire Plugin to run JUnit tests -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
<configuration>
<includes>
<include>**/ParserTests.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</project>