Add cparser java project

This commit is contained in:
Guus Waals 2024-10-08 00:58:50 +08:00
parent 8f0e8f68bb
commit 0133a237ac
31 changed files with 299 additions and 324 deletions

2
java/cparser/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.class
target/

View File

@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.yourname.scripts</groupId>
<artifactId>cparser-tests</artifactId>
<groupId>cparser</groupId>
<artifactId>cparser</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
@ -20,13 +20,8 @@
</dependencies>
<build>
<!--
Configure Maven to recognize the main source directory outside the default
src/main/java by setting the sourceDirectory to ../../cparser relative to pom.xml.
Similarly, set the testSourceDirectory to the current directory where ParserTests.java resides.
-->
<sourceDirectory>../../cparser</sourceDirectory>
<testSourceDirectory>.</testSourceDirectory>
<!-- Specify the custom test source directory -->
<testSourceDirectory>./src/test/java</testSourceDirectory>
<plugins>
<!-- Compiler Plugin to specify Java version -->

View File

@ -0,0 +1,264 @@
package cparser;
import java.util.*;
import cparser.Tokenizer.Token;
import cparser.Log;
public class Parser {
private Tokenizer.TokenSet tokenSet;
private List<Object> statements;
private Log log;
private Tokenizer.Token[] tokens;
public Parser(Tokenizer.TokenSet tokenSet, Log log) {
this.tokenSet = tokenSet;
this.statements = new ArrayList<>();
this.tokens = tokenSet.getTokens();
}
void log(String msg) {
if (log != null) {
log.log(msg);
}
}
int index = 0;
public void parse() {
for (index = 0; index < tokens.length; index++) {
Tokenizer.Token token = tokens[index];
if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) {
continue;
} else if (token.type == Tokenizer.TokenType.HASH) {
index = parsePreprocessorExpression();
} else {
index = parseStmt();
}
}
}
private int parseStmt() {
int startIndex = index;
List<Tokenizer.Token> idStack = new ArrayList<>();
for (int i = startIndex; i < tokens.length; i++) {
Tokenizer.Token token = tokens[i];
if (token.type == Tokenizer.TokenType.L_PAREN && idStack.size() > 0) {
// Function call?
} else if (token.type == Tokenizer.TokenType.SEMICOLON) {
boolean isVarAssign = false;
for (int j = startIndex; j < i; j++) {
if (tokens[j].type == Tokenizer.TokenType.EQUALS) {
isVarAssign = true;
}
}
} else if (token.type == Tokenizer.TokenType.L_BRACE) {
boolean isVarAssign = false;
for (int j = startIndex; j < i; j++) {
if (tokens[j].type == Tokenizer.TokenType.L_PAREN) {
int endIndex = findClosingParenthesis(j);
if (endIndex != -1) {
index = endIndex;
}
}
}
}
}
// if (index + 1 < tokens.length && tokens[index + 1].type ==
// Tokenizer.TokenType.L_PAREN) {
// // Function call or declaration/definition
// if (index > 0 && (tokens[index - 1].type == Tokenizer.TokenType.IDENTIFIER ||
// tokens[index - 1].type == Tokenizer.TokenType.OTHER)) {
// // Function declaration or definition
// index = parseFunctionDeclaration();
// } else {
// // Function call
// index = parseFunctionCall();
// }
// } else {
// // Variable reference
// index = parseVariableReference();
// }
}
private int parseVarDecl(int startIndex, int endIndex) {
if (tokens[startIndex].type == Tokenizer.TokenType.R_PAREN) {
return startIndex;
}
return startIndex + 1;
}
private ArgumentList parseArgumentList(int startIndex, int endIndex) {
List<Object> arguments = new ArrayList<>();
for (int i = startIndex; i < endIndex; i++) {
if (tokens[i].type == Tokenizer.TokenType.COMMA) {
}
}
return new ArgumentList(arguments);
}
// Try to parse prep expression
private int parsePreprocessorExpression() {
int index = this.index;
if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) {
int startLine = tokenSet.getLine(index);
while (index < tokenSet.tokens.length) {
if (tokenSet.getLine(index) > startLine) {
break;
}
index++;
}
// Find first next line token
index--;
}
return index;
}
// // Try to parse function declaration and return the ending token index
// private int parseFunctionDeclaration() {
// Tokenizer.Token[] tokens = tokenSet.getTokens();
// String name = tokenSet.getTextNoNewlines(tokens[index]);
// int endIndex = findClosingParenthesis(index + 1);
// if (endIndex == -1)
// return index;
// boolean isDefinition = false;
// if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type ==
// Tokenizer.TokenType.L_BRACE) {
// isDefinition = true;
// endIndex = findClosingBrace(endIndex + 1);
// }
// if (endIndex == -1)
// return index;
// Function function = new Function(name, tokens[index].ofs,
// tokens[endIndex].ofs + tokens[endIndex].len,
// isDefinition);
// functions.add(function);
// return endIndex - 1;
// }
// // Try to parse function call and return the ending token index
// private int parseFunctionCall() {
// Tokenizer.Token[] tokens = tokenSet.getTokens();
// String name = tokenSet.getTextNoNewlines(tokens[index]);
// int endIndex = findClosingParenthesis(index + 1);
// if (endIndex == -1)
// return index;
// FunctionCall functionCall = new FunctionCall(name, tokens[index].ofs,
// tokens[endIndex].ofs + tokens[endIndex].len);
// functionCalls.add(functionCall);
// return endIndex - 1;
// }
// // Try to parse variable reference and add it to the list
// private int parseVariableReference() {
// Tokenizer.Token token = tokenSet.getTokens()[index];
// String name = tokenSet.getTextNoNewlines(token);
// Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
// variables.add(variable);
// return index + 1;
// }
private int findClosingParenthesis(int startIndex) {
Tokenizer.Token[] tokens = tokenSet.getTokens();
int parenCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == Tokenizer.TokenType.L_PAREN) {
parenCount++;
} else if (tokens[i].type == Tokenizer.TokenType.R_PAREN) {
parenCount--;
if (parenCount == 0) {
return i;
}
}
}
return -1;
}
private int findClosingBrace(int startIndex) {
Tokenizer.Token[] tokens = tokenSet.getTokens();
int braceCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == Tokenizer.TokenType.L_BRACE) {
braceCount++;
} else if (tokens[i].type == Tokenizer.TokenType.R_BRACE) {
braceCount--;
if (braceCount == 0) {
return i;
}
}
}
return -1;
}
public static class Span {
public final int startOffset;
public final int endOffset;
public Span(int startOffset, int endOffset) {
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
public static class Type {
public final Span span;
public Type(Span span) {
this.span = span;
}
}
public static class Identifier {
public final Span span;
public Identifier(Span span) {
this.span = span;
}
}
public static class ArgumentList {
public final List<Object> arguments;
public ArgumentList(List<Object> arguments) {
this.arguments = arguments;
}
}
public static class VariableDeclaration {
public final Type type;
public final Identifier name;
public VariableDeclaration(Type type, Identifier name) {
this.type = type;
this.name = name;
}
}
public static class FunctionDecl {
public final Identifier name;
public final Type returnValue;
public final ArgumentList args;
public FunctionDecl(Identifier name, Type returnValue, ArgumentList args) {
this.name = name;
this.returnValue = returnValue;
this.args = args;
}
}
public static class FunctionCall {
public final Identifier name;
public final ArgumentList args;
public FunctionCall(Identifier name, ArgumentList args) {
this.name = name;
this.args = args;
}
}
}

View File

@ -13,7 +13,12 @@ public class Tokenizer {
R_PAREN,
L_BRACE,
R_BRACE,
L_IDX,
R_IDX,
SEMICOLON,
EQUALS,
ARROW,
STAR,
COMMA,
COMMENT,
BLOCK_COMMENT,
@ -325,6 +330,18 @@ public class Tokenizer {
newType = TokenType.COMMA;
} else if (currentChar == '#') {
newType = TokenType.HASH;
} else if (currentChar == '[') {
newType = TokenType.L_IDX;
} else if (currentChar == ']') {
newType = TokenType.R_IDX;
} else if (currentChar == '=') {
newType = TokenType.EQUALS;
} else if (currentChar == '>' && index > 0 && text.charAt(index - 1) == '-') {
newType = TokenType.ARROW;
currentType = TokenType.ARROW;
tokenStart = index - 1;
} else if (currentChar == '*') {
newType = TokenType.STAR;
}
// Handle other characters
else {

View File

@ -1,5 +1,4 @@
package cparser.tests;
package cparser_tests;
import static org.junit.Assert.*;
import org.junit.Before;
import org.junit.Test;
@ -12,17 +11,16 @@ import cparser.Log;
import java.util.List;
public class ParserTests {
private Parser parser;
private Tokenizer.TokenSet tokenSet;
private Log mockLog;
private Log testLog;
@Before
public void setUp() {
mockLog = new Log() {
testLog = new Log() {
@Override
public void log(String msg) {
// Do nothing for tests
public void log(String msg) {
System.out.println(msg);
}
};
}
@ -31,7 +29,7 @@ public class ParserTests {
public void testParseVariableReference() {
String code = "int x = 5;";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser = new Parser(tokenSet, testLog);
parser.parse();
List<Parser.Variable> variables = parser.getVariables();
@ -43,7 +41,7 @@ public class ParserTests {
public void testParseFunctionDeclaration() {
String code = "void foo(int a, int b);";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser = new Parser(tokenSet, testLog);
parser.parse();
List<Parser.Function> functions = parser.getFunctions();
@ -56,7 +54,7 @@ public class ParserTests {
public void testParseFunctionDefinition() {
String code = "int bar(int x) { return x + 1; }";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser = new Parser(tokenSet, testLog);
parser.parse();
List<Parser.Function> functions = parser.getFunctions();
@ -69,7 +67,7 @@ public class ParserTests {
public void testParseFunctionCall() {
String code = "result = calculate(5, 10);";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser = new Parser(tokenSet, testLog);
parser.parse();
List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
@ -81,7 +79,7 @@ public class ParserTests {
public void testParsePreprocessorDirective() {
String code = "#include <stdio.h>\nint main() { return 0; }";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser = new Parser(tokenSet, testLog);
parser.parse();
List<Parser.Function> functions = parser.getFunctions();
@ -105,7 +103,7 @@ public class ParserTests {
"}";
tokenSet = new Tokenizer(code).parse();
parser = new Parser(tokenSet, mockLog);
parser = new Parser(tokenSet, testLog);
parser.parse();
List<Parser.Variable> variables = parser.getVariables();

View File

@ -1,15 +0,0 @@
// Script to categorize Stuff
// @menupath File.Run.ScriptName
// @category _Reman3
import ghidra.app.script.GhidraScript;
import re3lib.*;
public class NewScript extends GhidraScript {
@Override
protected void run() throws Exception {
// TODO: Add your script logic here
println("Hello from NewScript!");
}
}

View File

@ -1,202 +0,0 @@
package cparser;
import java.util.*;
import cparser.Tokenizer.Token;
import cparser.Log;
public class Parser {
private Tokenizer.TokenSet tokenSet;
private List<Variable> variables;
private List<Function> functions;
private List<FunctionCall> functionCalls;
private Log log;
public Parser(Tokenizer.TokenSet tokenSet, Log log) {
this.tokenSet = tokenSet;
this.variables = new ArrayList<>();
this.functions = new ArrayList<>();
this.functionCalls = new ArrayList<>();
}
void log(String msg) {
if (log != null) {
log.log(msg);
}
}
int index = 0;
public void parse() {
Tokenizer.Token[] tokens = tokenSet.getTokens();
for (index = 0; index < tokens.length; index++) {
Tokenizer.Token token = tokens[index];
if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) {
continue;
} else if (token.type == Tokenizer.TokenType.HASH) {
index = parsePreprocessorExpression();
} else if (tokens[index].type == Tokenizer.TokenType.IDENTIFIER) {
if (index + 1 < tokens.length && tokens[index + 1].type == Tokenizer.TokenType.L_PAREN) {
// Function call or declaration/definition
if (index > 0 && (tokens[index - 1].type == Tokenizer.TokenType.IDENTIFIER ||
tokens[index - 1].type == Tokenizer.TokenType.OTHER)) {
// Function declaration or definition
index = parseFunctionDeclaration();
} else {
// Function call
index = parseFunctionCall();
}
} else {
// Variable reference
index = parseVariableReference();
}
}
}
}
// Try to parse prep expression
private int parsePreprocessorExpression() {
int index = this.index;
if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) {
int startLine = tokenSet.getLine(index);
while (index < tokenSet.tokens.length) {
if (tokenSet.getLine(index) > startLine) {
break;
}
index++;
}
// Find first next line token
index--;
}
return index;
}
// Try to parse function declaration and return the ending token index
private int parseFunctionDeclaration() {
Tokenizer.Token[] tokens = tokenSet.getTokens();
String name = tokenSet.getTextNoNewlines(tokens[index]);
int endIndex = findClosingParenthesis(index + 1);
if (endIndex == -1)
return index;
boolean isDefinition = false;
if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == Tokenizer.TokenType.L_BRACE) {
isDefinition = true;
endIndex = findClosingBrace(endIndex + 1);
}
if (endIndex == -1)
return index;
Function function = new Function(name, tokens[index].ofs, tokens[endIndex].ofs + tokens[endIndex].len,
isDefinition);
functions.add(function);
return endIndex - 1;
}
// Try to parse function call and return the ending token index
private int parseFunctionCall() {
Tokenizer.Token[] tokens = tokenSet.getTokens();
String name = tokenSet.getTextNoNewlines(tokens[index]);
int endIndex = findClosingParenthesis(index + 1);
if (endIndex == -1)
return index;
FunctionCall functionCall = new FunctionCall(name, tokens[index].ofs,
tokens[endIndex].ofs + tokens[endIndex].len);
functionCalls.add(functionCall);
return endIndex - 1;
}
// Try to parse variable reference and add it to the list
private int parseVariableReference() {
Tokenizer.Token token = tokenSet.getTokens()[index];
String name = tokenSet.getTextNoNewlines(token);
Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
variables.add(variable);
return index + 1;
}
private int findClosingParenthesis(int startIndex) {
Tokenizer.Token[] tokens = tokenSet.getTokens();
int parenCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == Tokenizer.TokenType.L_PAREN) {
parenCount++;
} else if (tokens[i].type == Tokenizer.TokenType.R_PAREN) {
parenCount--;
if (parenCount == 0) {
return i;
}
}
}
return -1;
}
private int findClosingBrace(int startIndex) {
Tokenizer.Token[] tokens = tokenSet.getTokens();
int braceCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == Tokenizer.TokenType.L_BRACE) {
braceCount++;
} else if (tokens[i].type == Tokenizer.TokenType.R_BRACE) {
braceCount--;
if (braceCount == 0) {
return i;
}
}
}
return -1;
}
public List<Variable> getVariables() {
return variables;
}
public List<Function> getFunctions() {
return functions;
}
public List<FunctionCall> getFunctionCalls() {
return functionCalls;
}
public static class Variable {
public final String name;
public final int startOffset;
public final int endOffset;
public Variable(String name, int startOffset, int endOffset) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
public static class Function {
public final String name;
public final int startOffset;
public final int endOffset;
public final boolean isDefinition;
public Function(String name, int startOffset, int endOffset, boolean isDefinition) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.isDefinition = isDefinition;
}
}
public static class FunctionCall {
public final String name;
public final int startOffset;
public final int endOffset;
public FunctionCall(String name, int startOffset, int endOffset) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
}

View File

@ -1,23 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>cparser-tests</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

View File

@ -1,4 +0,0 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

View File

@ -1,57 +0,0 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.yourname.scripts</groupId>
<artifactId>cparser-tests</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- JUnit for testing -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<!--
Configure Maven to recognize the main source directory outside the default
src/main/java by setting the sourceDirectory to ../../cparser relative to pom.xml.
Similarly, set the testSourceDirectory to the current directory where ParserTests.java resides.
-->
<sourceDirectory>../../cparser</sourceDirectory>
<testSourceDirectory>.</testSourceDirectory>
<plugins>
<!-- Compiler Plugin to specify Java version -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<!-- Surefire Plugin to run JUnit tests -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
<configuration>
<includes>
<include>**/ParserTests.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</project>