Maybe add tests for cparser
This commit is contained in:
parent
2da111a348
commit
8f0e8f68bb
|
@ -7,8 +7,6 @@ import ghidra.program.model.data.DataType;
|
|||
import ghidra.program.model.data.StandAloneDataTypeManager;
|
||||
import re3lib.FunctionDatabase;
|
||||
import re3lib.RecompileConfig;
|
||||
import re3lib.CParser;
|
||||
import re3lib.CTokenizer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.BufferedReader;
|
||||
|
@ -21,6 +19,9 @@ import java.util.Map;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import cparser.Parser;
|
||||
import cparser.Tokenizer;
|
||||
|
||||
public class RebuildFunctionDatabase extends GhidraScript {
|
||||
// Will rebuild all functions
|
||||
public boolean rebuildAllGlobals = true;
|
||||
|
@ -125,8 +126,8 @@ public class RebuildFunctionDatabase extends GhidraScript {
|
|||
println("Scanning " + file);
|
||||
|
||||
String text = new String(Files.readAllBytes(file.toPath()));
|
||||
CTokenizer.TokenSet tokens = new CTokenizer(text).parse();
|
||||
CParser parser = new CParser(tokens);
|
||||
Tokenizer.TokenSet tokens = new Tokenizer(text).parse();
|
||||
Parser parser = new Parser(tokens);
|
||||
parser.parse();
|
||||
|
||||
// for (CTokenizer.Token token : tokens.getTokens()) {
|
||||
|
@ -134,13 +135,13 @@ public class RebuildFunctionDatabase extends GhidraScript {
|
|||
// println("Line " + line + ": " + token.ofs + " " + token.len + " " + token.type + " - "
|
||||
// + tokens.getTextNoNewlines(token));
|
||||
// }
|
||||
for (CParser.Function function : parser.getFunctions()) {
|
||||
for (Parser.Function function : parser.getFunctions()) {
|
||||
println("Function: " + function.name + " " + function.startOffset + " " + function.endOffset);
|
||||
}
|
||||
for (CParser.FunctionCall functionCall : parser.getFunctionCalls()) {
|
||||
for (Parser.FunctionCall functionCall : parser.getFunctionCalls()) {
|
||||
println("FunctionCall: " + functionCall.name + " " + functionCall.startOffset + " " + functionCall.endOffset);
|
||||
}
|
||||
for (CParser.Variable variable : parser.getVariables()) {
|
||||
for (Parser.Variable variable : parser.getVariables()) {
|
||||
println("Variable: " + variable.name + " " + variable.startOffset + " " + variable.endOffset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
package cparser;
|
||||
|
||||
public interface Log {
|
||||
public void log(String msg);
|
||||
}
|
|
@ -1,37 +1,45 @@
|
|||
package re3lib;
|
||||
package cparser;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import re3lib.CTokenizer.Token;
|
||||
import cparser.Tokenizer.Token;
|
||||
import cparser.Log;
|
||||
|
||||
public class CParser {
|
||||
private CTokenizer.TokenSet tokenSet;
|
||||
public class Parser {
|
||||
private Tokenizer.TokenSet tokenSet;
|
||||
private List<Variable> variables;
|
||||
private List<Function> functions;
|
||||
private List<FunctionCall> functionCalls;
|
||||
private Log log;
|
||||
|
||||
public CParser(CTokenizer.TokenSet tokenSet) {
|
||||
public Parser(Tokenizer.TokenSet tokenSet, Log log) {
|
||||
this.tokenSet = tokenSet;
|
||||
this.variables = new ArrayList<>();
|
||||
this.functions = new ArrayList<>();
|
||||
this.functionCalls = new ArrayList<>();
|
||||
}
|
||||
|
||||
void log(String msg) {
|
||||
if (log != null) {
|
||||
log.log(msg);
|
||||
}
|
||||
}
|
||||
|
||||
int index = 0;
|
||||
|
||||
public void parse() {
|
||||
CTokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
Tokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
for (index = 0; index < tokens.length; index++) {
|
||||
CTokenizer.Token token = tokens[index];
|
||||
if (token.type == CTokenizer.TokenType.BLOCK_COMMENT || token.type == CTokenizer.TokenType.COMMENT) {
|
||||
Tokenizer.Token token = tokens[index];
|
||||
if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) {
|
||||
continue;
|
||||
} else if (token.type == CTokenizer.TokenType.HASH) {
|
||||
} else if (token.type == Tokenizer.TokenType.HASH) {
|
||||
index = parsePreprocessorExpression();
|
||||
} else if (tokens[index].type == CTokenizer.TokenType.IDENTIFIER) {
|
||||
if (index + 1 < tokens.length && tokens[index + 1].type == CTokenizer.TokenType.L_PAREN) {
|
||||
} else if (tokens[index].type == Tokenizer.TokenType.IDENTIFIER) {
|
||||
if (index + 1 < tokens.length && tokens[index + 1].type == Tokenizer.TokenType.L_PAREN) {
|
||||
// Function call or declaration/definition
|
||||
if (index > 0 && (tokens[index - 1].type == CTokenizer.TokenType.IDENTIFIER ||
|
||||
tokens[index - 1].type == CTokenizer.TokenType.OTHER)) {
|
||||
if (index > 0 && (tokens[index - 1].type == Tokenizer.TokenType.IDENTIFIER ||
|
||||
tokens[index - 1].type == Tokenizer.TokenType.OTHER)) {
|
||||
// Function declaration or definition
|
||||
index = parseFunctionDeclaration();
|
||||
} else {
|
||||
|
@ -49,7 +57,7 @@ public class CParser {
|
|||
// Try to parse prep expression
|
||||
private int parsePreprocessorExpression() {
|
||||
int index = this.index;
|
||||
if (tokenSet.tokens[index].type == CTokenizer.TokenType.HASH) {
|
||||
if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) {
|
||||
int startLine = tokenSet.getLine(index);
|
||||
while (index < tokenSet.tokens.length) {
|
||||
if (tokenSet.getLine(index) > startLine) {
|
||||
|
@ -65,7 +73,7 @@ public class CParser {
|
|||
|
||||
// Try to parse function declaration and return the ending token index
|
||||
private int parseFunctionDeclaration() {
|
||||
CTokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
Tokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
String name = tokenSet.getTextNoNewlines(tokens[index]);
|
||||
int endIndex = findClosingParenthesis(index + 1);
|
||||
|
||||
|
@ -73,7 +81,7 @@ public class CParser {
|
|||
return index;
|
||||
|
||||
boolean isDefinition = false;
|
||||
if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == CTokenizer.TokenType.L_BRACE) {
|
||||
if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == Tokenizer.TokenType.L_BRACE) {
|
||||
isDefinition = true;
|
||||
endIndex = findClosingBrace(endIndex + 1);
|
||||
}
|
||||
|
@ -89,7 +97,7 @@ public class CParser {
|
|||
|
||||
// Try to parse function call and return the ending token index
|
||||
private int parseFunctionCall() {
|
||||
CTokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
Tokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
String name = tokenSet.getTextNoNewlines(tokens[index]);
|
||||
int endIndex = findClosingParenthesis(index + 1);
|
||||
if (endIndex == -1)
|
||||
|
@ -103,7 +111,7 @@ public class CParser {
|
|||
|
||||
// Try to parse variable reference and add it to the list
|
||||
private int parseVariableReference() {
|
||||
CTokenizer.Token token = tokenSet.getTokens()[index];
|
||||
Tokenizer.Token token = tokenSet.getTokens()[index];
|
||||
String name = tokenSet.getTextNoNewlines(token);
|
||||
Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
|
||||
variables.add(variable);
|
||||
|
@ -111,12 +119,12 @@ public class CParser {
|
|||
}
|
||||
|
||||
private int findClosingParenthesis(int startIndex) {
|
||||
CTokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
Tokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
int parenCount = 1;
|
||||
for (int i = startIndex + 1; i < tokens.length; i++) {
|
||||
if (tokens[i].type == CTokenizer.TokenType.L_PAREN) {
|
||||
if (tokens[i].type == Tokenizer.TokenType.L_PAREN) {
|
||||
parenCount++;
|
||||
} else if (tokens[i].type == CTokenizer.TokenType.R_PAREN) {
|
||||
} else if (tokens[i].type == Tokenizer.TokenType.R_PAREN) {
|
||||
parenCount--;
|
||||
if (parenCount == 0) {
|
||||
return i;
|
||||
|
@ -127,12 +135,12 @@ public class CParser {
|
|||
}
|
||||
|
||||
private int findClosingBrace(int startIndex) {
|
||||
CTokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
Tokenizer.Token[] tokens = tokenSet.getTokens();
|
||||
int braceCount = 1;
|
||||
for (int i = startIndex + 1; i < tokens.length; i++) {
|
||||
if (tokens[i].type == CTokenizer.TokenType.L_BRACE) {
|
||||
if (tokens[i].type == Tokenizer.TokenType.L_BRACE) {
|
||||
braceCount++;
|
||||
} else if (tokens[i].type == CTokenizer.TokenType.R_BRACE) {
|
||||
} else if (tokens[i].type == Tokenizer.TokenType.R_BRACE) {
|
||||
braceCount--;
|
||||
if (braceCount == 0) {
|
||||
return i;
|
|
@ -1,13 +1,11 @@
|
|||
package re3lib;
|
||||
package cparser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import ghidra.app.script.GhidraScript;
|
||||
|
||||
public class CTokenizer {
|
||||
public class Tokenizer {
|
||||
public enum TokenType {
|
||||
UNDEFINED,
|
||||
HASH,
|
||||
|
@ -61,17 +59,23 @@ public class CTokenizer {
|
|||
|
||||
private final String text;
|
||||
private TreeMap<Integer, Integer> lineNumberTable;
|
||||
public GhidraScript log;
|
||||
private Log log;
|
||||
|
||||
public CTokenizer(String text) {
|
||||
public Tokenizer(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
public CTokenizer(String text, GhidraScript script) {
|
||||
public Tokenizer(String text, Log log) {
|
||||
this.text = text;
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
void log(String msg) {
|
||||
if (log != null) {
|
||||
log.log(msg);
|
||||
}
|
||||
}
|
||||
|
||||
String getText(Token token) {
|
||||
return getText(token.ofs, token.len);
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
package cparser.tests;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import java.lang.String;
|
||||
|
||||
import cparser.Parser;
|
||||
import cparser.Tokenizer;
|
||||
import cparser.Log;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class ParserTests {
|
||||
|
||||
private Parser parser;
|
||||
private Tokenizer.TokenSet tokenSet;
|
||||
private Log mockLog;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
mockLog = new Log() {
|
||||
@Override
|
||||
public void log(String msg) {
|
||||
// Do nothing for tests
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseVariableReference() {
|
||||
String code = "int x = 5;";
|
||||
tokenSet = new Tokenizer(code).parse();
|
||||
parser = new Parser(tokenSet, mockLog);
|
||||
parser.parse();
|
||||
|
||||
List<Parser.Variable> variables = parser.getVariables();
|
||||
assertEquals(1, variables.size());
|
||||
assertEquals("x", variables.get(0).name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseFunctionDeclaration() {
|
||||
String code = "void foo(int a, int b);";
|
||||
tokenSet = new Tokenizer(code).parse();
|
||||
parser = new Parser(tokenSet, mockLog);
|
||||
parser.parse();
|
||||
|
||||
List<Parser.Function> functions = parser.getFunctions();
|
||||
assertEquals(1, functions.size());
|
||||
assertEquals("foo", functions.get(0).name);
|
||||
assertFalse(functions.get(0).isDefinition);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseFunctionDefinition() {
|
||||
String code = "int bar(int x) { return x + 1; }";
|
||||
tokenSet = new Tokenizer(code).parse();
|
||||
parser = new Parser(tokenSet, mockLog);
|
||||
parser.parse();
|
||||
|
||||
List<Parser.Function> functions = parser.getFunctions();
|
||||
assertEquals(1, functions.size());
|
||||
assertEquals("bar", functions.get(0).name);
|
||||
assertTrue(functions.get(0).isDefinition);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseFunctionCall() {
|
||||
String code = "result = calculate(5, 10);";
|
||||
tokenSet = new Tokenizer(code).parse();
|
||||
parser = new Parser(tokenSet, mockLog);
|
||||
parser.parse();
|
||||
|
||||
List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
|
||||
assertEquals(1, functionCalls.size());
|
||||
assertEquals("calculate", functionCalls.get(0).name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParsePreprocessorDirective() {
|
||||
String code = "#include <stdio.h>\nint main() { return 0; }";
|
||||
tokenSet = new Tokenizer(code).parse();
|
||||
parser = new Parser(tokenSet, mockLog);
|
||||
parser.parse();
|
||||
|
||||
List<Parser.Function> functions = parser.getFunctions();
|
||||
assertEquals(1, functions.size());
|
||||
assertEquals("main", functions.get(0).name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseComplexCode() {
|
||||
String code =
|
||||
"#include <stdio.h>\n" +
|
||||
"int globalVar = 10;\n" +
|
||||
"void helper(int x);\n" +
|
||||
"int main() {\n" +
|
||||
" int localVar = 5;\n" +
|
||||
" helper(localVar);\n" +
|
||||
" return 0;\n" +
|
||||
"}\n" +
|
||||
"void helper(int x) {\n" +
|
||||
" printf(\"%d\", x);\n" +
|
||||
"}";
|
||||
|
||||
tokenSet = new Tokenizer(code).parse();
|
||||
parser = new Parser(tokenSet, mockLog);
|
||||
parser.parse();
|
||||
|
||||
List<Parser.Variable> variables = parser.getVariables();
|
||||
List<Parser.Function> functions = parser.getFunctions();
|
||||
List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
|
||||
|
||||
assertEquals(2, variables.size());
|
||||
assertEquals(2, functions.size());
|
||||
assertEquals(2, functionCalls.size());
|
||||
|
||||
assertTrue(variables.stream().anyMatch(v -> v.name.equals("globalVar")));
|
||||
assertTrue(variables.stream().anyMatch(v -> v.name.equals("localVar")));
|
||||
assertTrue(functions.stream().anyMatch(f -> f.name.equals("main")));
|
||||
assertTrue(functions.stream().anyMatch(f -> f.name.equals("helper")));
|
||||
assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("helper")));
|
||||
assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("printf")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>cparser-tests</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.m2e.core.maven2Builder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
<nature>org.eclipse.m2e.core.maven2Nature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -0,0 +1,4 @@
|
|||
activeProfiles=
|
||||
eclipse.preferences.version=1
|
||||
resolveWorkspaceProjects=true
|
||||
version=1
|
Binary file not shown.
|
@ -0,0 +1,57 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.yourname.scripts</groupId>
|
||||
<artifactId>cparser-tests</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<!-- JUnit for testing -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<!--
|
||||
Configure Maven to recognize the main source directory outside the default
|
||||
src/main/java by setting the sourceDirectory to ../../cparser relative to pom.xml.
|
||||
Similarly, set the testSourceDirectory to the current directory where ParserTests.java resides.
|
||||
-->
|
||||
<sourceDirectory>../../cparser</sourceDirectory>
|
||||
<testSourceDirectory>.</testSourceDirectory>
|
||||
|
||||
<plugins>
|
||||
<!-- Compiler Plugin to specify Java version -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- Surefire Plugin to run JUnit tests -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.22.2</version>
|
||||
<configuration>
|
||||
<includes>
|
||||
<include>**/ParserTests.java</include>
|
||||
</includes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,57 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.yourname.scripts</groupId>
|
||||
<artifactId>cparser-tests</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<!-- JUnit for testing -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<!--
|
||||
Configure Maven to recognize the main source directory outside the default
|
||||
src/main/java by setting the sourceDirectory to ../../cparser relative to pom.xml.
|
||||
Similarly, set the testSourceDirectory to the current directory where ParserTests.java resides.
|
||||
-->
|
||||
<sourceDirectory>../../cparser</sourceDirectory>
|
||||
<testSourceDirectory>.</testSourceDirectory>
|
||||
|
||||
<plugins>
|
||||
<!-- Compiler Plugin to specify Java version -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- Surefire Plugin to run JUnit tests -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.22.2</version>
|
||||
<configuration>
|
||||
<includes>
|
||||
<include>**/ParserTests.java</include>
|
||||
</includes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
Loading…
Reference in New Issue