diff --git a/java/cparser/src/main/java/cparser/AST.java b/java/cparser/src/main/java/cparser/AST.java new file mode 100644 index 00000000..1890d894 --- /dev/null +++ b/java/cparser/src/main/java/cparser/AST.java @@ -0,0 +1,124 @@ +package cparser; + +import java.util.List; + +public class AST { + + public static class Error { + public final Span span; + public final String message; + + public Error(Span span, String message) { + this.span = span; + this.message = message; + } + } + + public static class Span { + public final int startOffset; + public final int endOffset; + + public Span(int startOffset, int endOffset) { + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + public Span() { + this.startOffset = -1; + this.endOffset = -1; + } + + public boolean isValid() { + return endOffset > startOffset; + } + } + + public static class PreprocessorExpression { + public final Span span; + + public PreprocessorExpression(Span span) { + this.span = span; + } + } + + public static class Type { + // The entire type definition, e.g. "byte**" + public final Span span; + // the base type name, e.g. byte in "byte* (&value)[20]" + public final Span baseSpan; + + public Type(Span span, Span baseSpan) { + this.span = span; + this.baseSpan = baseSpan; + } + } + + public static class Identifier { + public final Span span; + + public Identifier(Span span) { + this.span = span; + } + } + + public static class ArgumentList { + public final List arguments; + + public ArgumentList(List arguments) { + this.arguments = arguments; + } + } + + private static class Expr { + public final Span span; + + public Expr(Span span) { + this.span = span; + } + }; + + public static class VariableDeclaration { + public final String type; + public final String identifier; + public final boolean isPointer; + public final boolean isReference; + public final boolean isArray; + public final int arraySize; + public final boolean isFunction; + public final Span span; + + public VariableDeclaration(String type, String identifier, boolean isPointer, boolean isReference, + boolean isArray, int arraySize, boolean isFunction, Span span) { + this.type = type; + this.identifier = identifier; + this.isPointer = isPointer; + this.isReference = isReference; + this.isArray = isArray; + this.arraySize = arraySize; + this.isFunction = isFunction; + this.span = span; + } + } + + public static class FunctionDecl { + public final Identifier name; + public final Type returnValue; + public final ArgumentList args; + + public FunctionDecl(Identifier name, Type returnValue, ArgumentList args) { + this.name = name; + this.returnValue = returnValue; + this.args = args; + } + } + + public static class FunctionCall { + public final Identifier name; + public final ArgumentList args; + + public FunctionCall(Identifier name, ArgumentList args) { + this.name = name; + this.args = args; + } + } +} diff --git a/java/cparser/src/main/java/cparser/Parser.java b/java/cparser/src/main/java/cparser/Parser.java index a871c702..c733e85b 100644 --- a/java/cparser/src/main/java/cparser/Parser.java +++ b/java/cparser/src/main/java/cparser/Parser.java @@ -4,13 +4,18 @@ import java.util.*; import cparser.Tokenizer.Token; import cparser.Log; +import cparser.AST.VariableDeclaration; public class Parser { private Tokenizer.TokenSet tokenSet; - private List statements; private Log log; private Tokenizer.Token[] tokens; + private List statements = new ArrayList<>(); + private List variableDeclarations = new ArrayList<>(); + private List commentStack = new ArrayList<>(); + private List errors = new ArrayList<>(); + public Parser(Tokenizer.TokenSet tokenSet, Log log) { this.tokenSet = tokenSet; this.statements = new ArrayList<>(); @@ -25,45 +30,197 @@ public class Parser { int index = 0; + public List getVariableDeclarations() { + return variableDeclarations; + } + + private static class Result { + public final T value; + public final AST.Span span; + + public Result(T value, AST.Span span) { + this.value = value; + this.span = span; + } + + public Result() { + this.value = null; + this.span = new AST.Span(); + } + + public boolean isValid() { + return span.isValid(); + } + + public static Result none() { + return new Result(); + } + + public Result toGeneric() { + return new Result(value, span); + } + } + + boolean applyResult(Result result) { + if (result.span.isValid()) { + index = result.span.endOffset + 1; + + if (result.value instanceof VariableDeclaration) { + variableDeclarations.add((VariableDeclaration) result.value); + } + + return true; + } + return false; + } + + private void addError(AST.Span span, String message) { + errors.add(new AST.Error(span, message)); + } + + private void addError(Tokenizer.Token token, String message) { + addError(token.getSpan(), message); + } + public void parse() { - for (index = 0; index < tokens.length; index++) { + for (index = 0; index < tokens.length;) { Tokenizer.Token token = tokens[index]; if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) { + commentStack.add(token.getSpan()); + index++; continue; } else if (token.type == Tokenizer.TokenType.HASH) { - index = parsePreprocessorExpression(); + Result result = parsePreprocessorExpression(); + if (result.span.isValid()) { + index = result.span.endOffset + 1; + } else { + addError(token, "Invalid preprocessor expression"); + index++; + } } else { - index = parseStmt(); + Result stmt = parseStmt(); + if (applyResult(stmt)) + continue; + addError(token, "Invalid statement"); + index++; } } } - private int parseStmt() { + private Result parseVarDecl() { int startIndex = index; - List idStack = new ArrayList<>(); - for (int i = startIndex; i < tokens.length; i++) { - Tokenizer.Token token = tokens[i]; - if (token.type == Tokenizer.TokenType.L_PAREN && idStack.size() > 0) { - // Function call? - } else if (token.type == Tokenizer.TokenType.SEMICOLON) { - boolean isVarAssign = false; - for (int j = startIndex; j < i; j++) { - if (tokens[j].type == Tokenizer.TokenType.EQUALS) { - isVarAssign = true; - } + StringBuilder type = new StringBuilder(); + String identifier = null; + boolean isPointer = false; + boolean isReference = false; + boolean isArray = false; + boolean isFunction = false; + int arraySize = -1; + + boolean haveTypeIdentifier = false; + boolean haveIdentifier = false; + + int parenDepth = 0; + int idxStart = 0; + int idxDepth = 0; + + // Parse type + while (index < tokens.length) { + Token token = tokens[index]; + if (token.type == Tokenizer.TokenType.IDENTIFIER || token.type == Tokenizer.TokenType.OTHER) { + // Already have a type identifier, so this is most likely a variable name + if (!haveTypeIdentifier) { + type.append(tokenSet.getTextNoNewlines(token)); + haveTypeIdentifier = true; + } else if (!haveIdentifier) { + identifier = tokenSet.getTextNoNewlines(token); + haveIdentifier = true; + if (parenDepth == 0) + break; + } else { + // Unknown + addError(token, "Unknown token after identifier"); } - } else if (token.type == Tokenizer.TokenType.L_BRACE) { - boolean isVarAssign = false; - for (int j = startIndex; j < i; j++) { - if (tokens[j].type == Tokenizer.TokenType.L_PAREN) { - int endIndex = findClosingParenthesis(j); - if (endIndex != -1) { - index = endIndex; - } - } + index++; + } else if (token.type == Tokenizer.TokenType.STAR) { + isPointer = true; + type.append("*"); + index++; + } else if (token.type == Tokenizer.TokenType.AMPERSAND) { + isReference = true; + type.append("&"); + index++; + } else if (token.type == Tokenizer.TokenType.L_PAREN) { + parenDepth++; + index++; + } else if (token.type == Tokenizer.TokenType.R_PAREN) { + parenDepth--; + index++; + } else if (token.type == Tokenizer.TokenType.L_IDX) { + idxDepth++; + idxStart = token.ofs + 1; + if (haveIdentifier) { + // Parse function parameters? } + break; + } else if (token.type == Tokenizer.TokenType.R_IDX) { + idxDepth--; + if (idxDepth == 0) { + String idxVal = tokenSet.getTextNoNewlines(idxStart, (token.ofs - 1) - idxStart); + type.append("[" + idxVal + "]"); + } + break; + } else { + break; } } + + AST.Span span = new AST.Span(tokens[startIndex].ofs, tokens[index].getEnd()); + VariableDeclaration varDecl = new VariableDeclaration( + type.toString().trim(), + identifier, + isPointer, + isReference, + isArray, + arraySize, + isFunction, + span); + + return new Result<>(varDecl, span); + } + + private Result parseStmt() { + int startIndex = index; + for (int i = startIndex; i < tokens.length; i++) { + Tokenizer.Token token = tokens[i]; + + Result varDeclResult = parseVarDecl(); + if (varDeclResult.isValid()) + return varDeclResult; + + // if (token.type == Tokenizer.TokenType.L_PAREN && idStack.size() > 0) { + // // Function call? + // } else if (token.type == Tokenizer.TokenType.SEMICOLON) { + // boolean isVarAssign = false; + // for (int j = startIndex; j < i; j++) { + // if (tokens[j].type == Tokenizer.TokenType.EQUALS) { + // isVarAssign = true; + // } + // } + // } else if (token.type == Tokenizer.TokenType.L_BRACE) { + // boolean isVarAssign = false; + // for (int j = startIndex; j < i; j++) { + // if (tokens[j].type == Tokenizer.TokenType.L_PAREN) { + // int endIndex = findClosingParenthesis(j); + // if (endIndex != -1) { + // index = endIndex; + // } + // } + // } + // } + } + + return Result.none(); // if (index + 1 < tokens.length && tokens[index + 1].type == // Tokenizer.TokenType.L_PAREN) { // // Function call or declaration/definition @@ -88,19 +245,10 @@ public class Parser { return startIndex + 1; } - private ArgumentList parseArgumentList(int startIndex, int endIndex) { - List arguments = new ArrayList<>(); - for (int i = startIndex; i < endIndex; i++) { - if (tokens[i].type == Tokenizer.TokenType.COMMA) { - - } - } - return new ArgumentList(arguments); - } - // Try to parse prep expression - private int parsePreprocessorExpression() { - int index = this.index; + private Result parsePreprocessorExpression() { + int startIndex = index; + int index = startIndex; if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) { int startLine = tokenSet.getLine(index); while (index < tokenSet.tokens.length) { @@ -112,9 +260,21 @@ public class Parser { // Find first next line token index--; } - return index; + + AST.Span span = new AST.Span(startIndex, index); + return new Result(new AST.PreprocessorExpression(span), span); } + // private ArgumentList parseArgumentList(int startIndex, int endIndex) { + // List arguments = new ArrayList<>(); + // for (int i = startIndex; i < endIndex; i++) { + // if (tokens[i].type == Tokenizer.TokenType.COMMA) { + + // } + // } + // return new ArgumentList(arguments); + // } + // // Try to parse function declaration and return the ending token index // private int parseFunctionDeclaration() { // Tokenizer.Token[] tokens = tokenSet.getTokens(); @@ -195,70 +355,4 @@ public class Parser { } return -1; } - - public static class Span { - public final int startOffset; - public final int endOffset; - - public Span(int startOffset, int endOffset) { - this.startOffset = startOffset; - this.endOffset = endOffset; - } - } - - public static class Type { - public final Span span; - - public Type(Span span) { - this.span = span; - } - } - - public static class Identifier { - public final Span span; - - public Identifier(Span span) { - this.span = span; - } - } - - public static class ArgumentList { - public final List arguments; - - public ArgumentList(List arguments) { - this.arguments = arguments; - } - } - - public static class VariableDeclaration { - public final Type type; - public final Identifier name; - - public VariableDeclaration(Type type, Identifier name) { - this.type = type; - this.name = name; - } - } - - public static class FunctionDecl { - public final Identifier name; - public final Type returnValue; - public final ArgumentList args; - - public FunctionDecl(Identifier name, Type returnValue, ArgumentList args) { - this.name = name; - this.returnValue = returnValue; - this.args = args; - } - } - - public static class FunctionCall { - public final Identifier name; - public final ArgumentList args; - - public FunctionCall(Identifier name, ArgumentList args) { - this.name = name; - this.args = args; - } - } } diff --git a/java/cparser/src/main/java/cparser/Tokenizer.java b/java/cparser/src/main/java/cparser/Tokenizer.java index 968ca858..6df24880 100644 --- a/java/cparser/src/main/java/cparser/Tokenizer.java +++ b/java/cparser/src/main/java/cparser/Tokenizer.java @@ -19,6 +19,7 @@ public class Tokenizer { EQUALS, ARROW, STAR, + AMPERSAND, COMMA, COMMENT, BLOCK_COMMENT, @@ -34,9 +35,17 @@ public class Tokenizer { public int ofs; public int len; public TokenType type; + + public AST.Span getSpan() { + return new AST.Span(ofs, getEnd()); + } + + public int getEnd() { + return ofs + len; + } } - public class TokenSet { + public static class TokenSet { public final Token[] tokens; public final String text; private final TreeMap lineNumberTable; @@ -56,10 +65,24 @@ public class Tokenizer { return entry != null ? entry.getValue() : -1; } + public String getText(Token token) { + return getText(token.ofs, token.len); + } + + public String getText(int ofs, int len) { + return text.substring(ofs, ofs + len); // Fixed recursion issue + } + public String getTextNoNewlines(Token token) { String text = getText(token); return text.replace("\n", ""); } + + + public String getTextNoNewlines(int ofs, int len) { + String text = getText(ofs, len); + return text.replace("\n", ""); + } }; private final String text; @@ -330,6 +353,8 @@ public class Tokenizer { newType = TokenType.COMMA; } else if (currentChar == '#') { newType = TokenType.HASH; + } else if (currentChar == '&') { + newType = TokenType.AMPERSAND; } else if (currentChar == '[') { newType = TokenType.L_IDX; } else if (currentChar == ']') { diff --git a/java/cparser/src/test/java/cparser_tests/ParserTests.java b/java/cparser/src/test/java/cparser_tests/ParserTests.java index 287db162..51766c25 100644 --- a/java/cparser/src/test/java/cparser_tests/ParserTests.java +++ b/java/cparser/src/test/java/cparser_tests/ParserTests.java @@ -1,4 +1,5 @@ package cparser_tests; + import static org.junit.Assert.*; import org.junit.Before; import org.junit.Test; @@ -6,119 +7,144 @@ import java.lang.String; import cparser.Parser; import cparser.Tokenizer; +import cparser.AST; import cparser.Log; import java.util.List; public class ParserTests { - private Parser parser; - private Tokenizer.TokenSet tokenSet; - private Log testLog; + private Parser parser; + private Tokenizer.TokenSet tokenSet; + private Log testLog; - @Before - public void setUp() { - testLog = new Log() { - @Override - public void log(String msg) { - System.out.println(msg); - } - }; + @Before + public void setUp() { + testLog = new Log() { + @Override + public void log(String msg) { + System.out.println(msg); + } + }; + } + + @Test + public void testParseVariableDecl() { + String[] testCases = { + "byte RVar1;", + "tdstLastErrorInfo *pdVar2;", + "undefined4 *puVar2;", + "CHAR pathToUbi_ini[260];", + "undefined& DAT_005a9ed4;", + "char(&s_Identifier_005b6420)[16];", + "void (*fnType)(int j, char, bool);", + "void** (*fnType)(int j, char, bool);" + }; + + for (String code : testCases) { + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, testLog); + parser.parse(); + + List declarations = parser.getVariableDeclarations(); + assertEquals("Failed for case: " + code, 1, declarations.size()); + AST.VariableDeclaration decl = declarations.get(0); + assertNotNull(decl); + System.out.println("Parsed: " + code); + System.out.println(" Type: " + decl.type); + System.out.println(" Identifier: " + decl.identifier); + System.out.println(" Is Pointer: " + decl.isPointer); + System.out.println(" Is Reference: " + decl.isReference); + System.out.println(" Is Array: " + decl.isArray); + System.out.println(" Array Size: " + decl.arraySize); + System.out.println(" Is Function: " + decl.isFunction); + System.out.println(); } + } - @Test - public void testParseVariableReference() { - String code = "int x = 5;"; - tokenSet = new Tokenizer(code).parse(); - parser = new Parser(tokenSet, testLog); - parser.parse(); - - List variables = parser.getVariables(); - assertEquals(1, variables.size()); - assertEquals("x", variables.get(0).name); - } - - @Test - public void testParseFunctionDeclaration() { - String code = "void foo(int a, int b);"; - tokenSet = new Tokenizer(code).parse(); - parser = new Parser(tokenSet, testLog); - parser.parse(); - - List functions = parser.getFunctions(); - assertEquals(1, functions.size()); - assertEquals("foo", functions.get(0).name); - assertFalse(functions.get(0).isDefinition); - } - - @Test - public void testParseFunctionDefinition() { - String code = "int bar(int x) { return x + 1; }"; - tokenSet = new Tokenizer(code).parse(); - parser = new Parser(tokenSet, testLog); - parser.parse(); - - List functions = parser.getFunctions(); - assertEquals(1, functions.size()); - assertEquals("bar", functions.get(0).name); - assertTrue(functions.get(0).isDefinition); - } - - @Test - public void testParseFunctionCall() { - String code = "result = calculate(5, 10);"; - tokenSet = new Tokenizer(code).parse(); - parser = new Parser(tokenSet, testLog); - parser.parse(); - - List functionCalls = parser.getFunctionCalls(); - assertEquals(1, functionCalls.size()); - assertEquals("calculate", functionCalls.get(0).name); - } - - @Test - public void testParsePreprocessorDirective() { - String code = "#include \nint main() { return 0; }"; - tokenSet = new Tokenizer(code).parse(); - parser = new Parser(tokenSet, testLog); - parser.parse(); - - List functions = parser.getFunctions(); - assertEquals(1, functions.size()); - assertEquals("main", functions.get(0).name); - } - - @Test - public void testParseComplexCode() { - String code = - "#include \n" + - "int globalVar = 10;\n" + - "void helper(int x);\n" + - "int main() {\n" + - " int localVar = 5;\n" + - " helper(localVar);\n" + - " return 0;\n" + - "}\n" + - "void helper(int x) {\n" + - " printf(\"%d\", x);\n" + - "}"; - - tokenSet = new Tokenizer(code).parse(); - parser = new Parser(tokenSet, testLog); - parser.parse(); - - List variables = parser.getVariables(); - List functions = parser.getFunctions(); - List functionCalls = parser.getFunctionCalls(); - - assertEquals(2, variables.size()); - assertEquals(2, functions.size()); - assertEquals(2, functionCalls.size()); - - assertTrue(variables.stream().anyMatch(v -> v.name.equals("globalVar"))); - assertTrue(variables.stream().anyMatch(v -> v.name.equals("localVar"))); - assertTrue(functions.stream().anyMatch(f -> f.name.equals("main"))); - assertTrue(functions.stream().anyMatch(f -> f.name.equals("helper"))); - assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("helper"))); - assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("printf"))); - } + /* + * @Test + * public void testParseFunctionDeclaration() { + * String code = "void foo(int a, int b);"; + * tokenSet = new Tokenizer(code).parse(); + * parser = new Parser(tokenSet, testLog); + * parser.parse(); + * + * List functions = parser.getFunctions(); + * assertEquals(1, functions.size()); + * assertEquals("foo", functions.get(0).name); + * assertFalse(functions.get(0).isDefinition); + * } + * + * @Test + * public void testParseFunctionDefinition() { + * String code = "int bar(int x) { return x + 1; }"; + * tokenSet = new Tokenizer(code).parse(); + * parser = new Parser(tokenSet, testLog); + * parser.parse(); + * + * List functions = parser.getFunctions(); + * assertEquals(1, functions.size()); + * assertEquals("bar", functions.get(0).name); + * assertTrue(functions.get(0).isDefinition); + * } + * + * @Test + * public void testParseFunctionCall() { + * String code = "result = calculate(5, 10);"; + * tokenSet = new Tokenizer(code).parse(); + * parser = new Parser(tokenSet, testLog); + * parser.parse(); + * + * List functionCalls = parser.getFunctionCalls(); + * assertEquals(1, functionCalls.size()); + * assertEquals("calculate", functionCalls.get(0).name); + * } + * + * @Test + * public void testParsePreprocessorDirective() { + * String code = "#include \nint main() { return 0; }"; + * tokenSet = new Tokenizer(code).parse(); + * parser = new Parser(tokenSet, testLog); + * parser.parse(); + * + * List functions = parser.getFunctions(); + * assertEquals(1, functions.size()); + * assertEquals("main", functions.get(0).name); + * } + * + * @Test + * public void testParseComplexCode() { + * String code = + * "#include \n" + + * "int globalVar = 10;\n" + + * "void helper(int x);\n" + + * "int main() {\n" + + * " int localVar = 5;\n" + + * " helper(localVar);\n" + + * " return 0;\n" + + * "}\n" + + * "void helper(int x) {\n" + + * " printf(\"%d\", x);\n" + + * "}"; + * + * tokenSet = new Tokenizer(code).parse(); + * parser = new Parser(tokenSet, testLog); + * parser.parse(); + * + * List variables = parser.getVariables(); + * List functions = parser.getFunctions(); + * List functionCalls = parser.getFunctionCalls(); + * + * assertEquals(2, variables.size()); + * assertEquals(2, functions.size()); + * assertEquals(2, functionCalls.size()); + * + * assertTrue(variables.stream().anyMatch(v -> v.name.equals("globalVar"))); + * assertTrue(variables.stream().anyMatch(v -> v.name.equals("localVar"))); + * assertTrue(functions.stream().anyMatch(f -> f.name.equals("main"))); + * assertTrue(functions.stream().anyMatch(f -> f.name.equals("helper"))); + * assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("helper"))); + * assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("printf"))); + * } + */ }