From 8f0e8f68bbfa6a90ed36d5263f134f4fb1f955c4 Mon Sep 17 00:00:00 2001 From: Guus Waals <_@guusw.nl> Date: Mon, 7 Oct 2024 22:08:33 +0800 Subject: [PATCH] Maybe add tests for cparser --- scripts/RebuildFunctionDatabase.java | 15 ++- scripts/cparser/Log.java | 5 + .../CParser.java => cparser/Parser.java} | 56 ++++---- .../Tokenizer.java} | 18 ++- scripts/cparser/tests/ParserTests.java | 126 ++++++++++++++++++ scripts/cparser/tests/bin/.project | 23 ++++ .../bin/.settings/org.eclipse.m2e.core.prefs | 4 + scripts/cparser/tests/bin/ParserTests.class | Bin 0 -> 6601 bytes scripts/cparser/tests/bin/pom.xml | 57 ++++++++ scripts/cparser/tests/pom.xml | 57 ++++++++ 10 files changed, 323 insertions(+), 38 deletions(-) create mode 100644 scripts/cparser/Log.java rename scripts/{re3lib/CParser.java => cparser/Parser.java} (75%) rename scripts/{re3lib/CTokenizer.java => cparser/Tokenizer.java} (97%) create mode 100644 scripts/cparser/tests/ParserTests.java create mode 100644 scripts/cparser/tests/bin/.project create mode 100644 scripts/cparser/tests/bin/.settings/org.eclipse.m2e.core.prefs create mode 100644 scripts/cparser/tests/bin/ParserTests.class create mode 100644 scripts/cparser/tests/bin/pom.xml create mode 100644 scripts/cparser/tests/pom.xml diff --git a/scripts/RebuildFunctionDatabase.java b/scripts/RebuildFunctionDatabase.java index a12aaae4..70bb2088 100644 --- a/scripts/RebuildFunctionDatabase.java +++ b/scripts/RebuildFunctionDatabase.java @@ -7,8 +7,6 @@ import ghidra.program.model.data.DataType; import ghidra.program.model.data.StandAloneDataTypeManager; import re3lib.FunctionDatabase; import re3lib.RecompileConfig; -import re3lib.CParser; -import re3lib.CTokenizer; import java.io.File; import java.io.BufferedReader; @@ -21,6 +19,9 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import cparser.Parser; +import cparser.Tokenizer; + public class RebuildFunctionDatabase extends GhidraScript { // Will rebuild all functions public boolean rebuildAllGlobals = true; @@ -125,8 +126,8 @@ public class RebuildFunctionDatabase extends GhidraScript { println("Scanning " + file); String text = new String(Files.readAllBytes(file.toPath())); - CTokenizer.TokenSet tokens = new CTokenizer(text).parse(); - CParser parser = new CParser(tokens); + Tokenizer.TokenSet tokens = new Tokenizer(text).parse(); + Parser parser = new Parser(tokens); parser.parse(); // for (CTokenizer.Token token : tokens.getTokens()) { @@ -134,13 +135,13 @@ public class RebuildFunctionDatabase extends GhidraScript { // println("Line " + line + ": " + token.ofs + " " + token.len + " " + token.type + " - " // + tokens.getTextNoNewlines(token)); // } - for (CParser.Function function : parser.getFunctions()) { + for (Parser.Function function : parser.getFunctions()) { println("Function: " + function.name + " " + function.startOffset + " " + function.endOffset); } - for (CParser.FunctionCall functionCall : parser.getFunctionCalls()) { + for (Parser.FunctionCall functionCall : parser.getFunctionCalls()) { println("FunctionCall: " + functionCall.name + " " + functionCall.startOffset + " " + functionCall.endOffset); } - for (CParser.Variable variable : parser.getVariables()) { + for (Parser.Variable variable : parser.getVariables()) { println("Variable: " + variable.name + " " + variable.startOffset + " " + variable.endOffset); } } diff --git a/scripts/cparser/Log.java b/scripts/cparser/Log.java new file mode 100644 index 00000000..a1abdf33 --- /dev/null +++ b/scripts/cparser/Log.java @@ -0,0 +1,5 @@ +package cparser; + +public interface Log { + public void log(String msg); +} diff --git a/scripts/re3lib/CParser.java b/scripts/cparser/Parser.java similarity index 75% rename from scripts/re3lib/CParser.java rename to scripts/cparser/Parser.java index 8ae54451..36baa2a5 100644 --- a/scripts/re3lib/CParser.java +++ b/scripts/cparser/Parser.java @@ -1,37 +1,45 @@ -package re3lib; +package cparser; import java.util.*; -import re3lib.CTokenizer.Token; +import cparser.Tokenizer.Token; +import cparser.Log; -public class CParser { - private CTokenizer.TokenSet tokenSet; +public class Parser { + private Tokenizer.TokenSet tokenSet; private List variables; private List functions; private List functionCalls; + private Log log; - public CParser(CTokenizer.TokenSet tokenSet) { + public Parser(Tokenizer.TokenSet tokenSet, Log log) { this.tokenSet = tokenSet; this.variables = new ArrayList<>(); this.functions = new ArrayList<>(); this.functionCalls = new ArrayList<>(); } + void log(String msg) { + if (log != null) { + log.log(msg); + } + } + int index = 0; public void parse() { - CTokenizer.Token[] tokens = tokenSet.getTokens(); + Tokenizer.Token[] tokens = tokenSet.getTokens(); for (index = 0; index < tokens.length; index++) { - CTokenizer.Token token = tokens[index]; - if (token.type == CTokenizer.TokenType.BLOCK_COMMENT || token.type == CTokenizer.TokenType.COMMENT) { + Tokenizer.Token token = tokens[index]; + if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) { continue; - } else if (token.type == CTokenizer.TokenType.HASH) { + } else if (token.type == Tokenizer.TokenType.HASH) { index = parsePreprocessorExpression(); - } else if (tokens[index].type == CTokenizer.TokenType.IDENTIFIER) { - if (index + 1 < tokens.length && tokens[index + 1].type == CTokenizer.TokenType.L_PAREN) { + } else if (tokens[index].type == Tokenizer.TokenType.IDENTIFIER) { + if (index + 1 < tokens.length && tokens[index + 1].type == Tokenizer.TokenType.L_PAREN) { // Function call or declaration/definition - if (index > 0 && (tokens[index - 1].type == CTokenizer.TokenType.IDENTIFIER || - tokens[index - 1].type == CTokenizer.TokenType.OTHER)) { + if (index > 0 && (tokens[index - 1].type == Tokenizer.TokenType.IDENTIFIER || + tokens[index - 1].type == Tokenizer.TokenType.OTHER)) { // Function declaration or definition index = parseFunctionDeclaration(); } else { @@ -49,7 +57,7 @@ public class CParser { // Try to parse prep expression private int parsePreprocessorExpression() { int index = this.index; - if (tokenSet.tokens[index].type == CTokenizer.TokenType.HASH) { + if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) { int startLine = tokenSet.getLine(index); while (index < tokenSet.tokens.length) { if (tokenSet.getLine(index) > startLine) { @@ -65,7 +73,7 @@ public class CParser { // Try to parse function declaration and return the ending token index private int parseFunctionDeclaration() { - CTokenizer.Token[] tokens = tokenSet.getTokens(); + Tokenizer.Token[] tokens = tokenSet.getTokens(); String name = tokenSet.getTextNoNewlines(tokens[index]); int endIndex = findClosingParenthesis(index + 1); @@ -73,7 +81,7 @@ public class CParser { return index; boolean isDefinition = false; - if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == CTokenizer.TokenType.L_BRACE) { + if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == Tokenizer.TokenType.L_BRACE) { isDefinition = true; endIndex = findClosingBrace(endIndex + 1); } @@ -89,7 +97,7 @@ public class CParser { // Try to parse function call and return the ending token index private int parseFunctionCall() { - CTokenizer.Token[] tokens = tokenSet.getTokens(); + Tokenizer.Token[] tokens = tokenSet.getTokens(); String name = tokenSet.getTextNoNewlines(tokens[index]); int endIndex = findClosingParenthesis(index + 1); if (endIndex == -1) @@ -103,7 +111,7 @@ public class CParser { // Try to parse variable reference and add it to the list private int parseVariableReference() { - CTokenizer.Token token = tokenSet.getTokens()[index]; + Tokenizer.Token token = tokenSet.getTokens()[index]; String name = tokenSet.getTextNoNewlines(token); Variable variable = new Variable(name, token.ofs, token.ofs + token.len); variables.add(variable); @@ -111,12 +119,12 @@ public class CParser { } private int findClosingParenthesis(int startIndex) { - CTokenizer.Token[] tokens = tokenSet.getTokens(); + Tokenizer.Token[] tokens = tokenSet.getTokens(); int parenCount = 1; for (int i = startIndex + 1; i < tokens.length; i++) { - if (tokens[i].type == CTokenizer.TokenType.L_PAREN) { + if (tokens[i].type == Tokenizer.TokenType.L_PAREN) { parenCount++; - } else if (tokens[i].type == CTokenizer.TokenType.R_PAREN) { + } else if (tokens[i].type == Tokenizer.TokenType.R_PAREN) { parenCount--; if (parenCount == 0) { return i; @@ -127,12 +135,12 @@ public class CParser { } private int findClosingBrace(int startIndex) { - CTokenizer.Token[] tokens = tokenSet.getTokens(); + Tokenizer.Token[] tokens = tokenSet.getTokens(); int braceCount = 1; for (int i = startIndex + 1; i < tokens.length; i++) { - if (tokens[i].type == CTokenizer.TokenType.L_BRACE) { + if (tokens[i].type == Tokenizer.TokenType.L_BRACE) { braceCount++; - } else if (tokens[i].type == CTokenizer.TokenType.R_BRACE) { + } else if (tokens[i].type == Tokenizer.TokenType.R_BRACE) { braceCount--; if (braceCount == 0) { return i; diff --git a/scripts/re3lib/CTokenizer.java b/scripts/cparser/Tokenizer.java similarity index 97% rename from scripts/re3lib/CTokenizer.java rename to scripts/cparser/Tokenizer.java index 16e25ed7..b1871718 100644 --- a/scripts/re3lib/CTokenizer.java +++ b/scripts/cparser/Tokenizer.java @@ -1,13 +1,11 @@ -package re3lib; +package cparser; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; -import ghidra.app.script.GhidraScript; - -public class CTokenizer { +public class Tokenizer { public enum TokenType { UNDEFINED, HASH, @@ -61,17 +59,23 @@ public class CTokenizer { private final String text; private TreeMap lineNumberTable; - public GhidraScript log; + private Log log; - public CTokenizer(String text) { + public Tokenizer(String text) { this.text = text; } - public CTokenizer(String text, GhidraScript script) { + public Tokenizer(String text, Log log) { this.text = text; this.log = log; } + void log(String msg) { + if (log != null) { + log.log(msg); + } + } + String getText(Token token) { return getText(token.ofs, token.len); } diff --git a/scripts/cparser/tests/ParserTests.java b/scripts/cparser/tests/ParserTests.java new file mode 100644 index 00000000..c78de600 --- /dev/null +++ b/scripts/cparser/tests/ParserTests.java @@ -0,0 +1,126 @@ +package cparser.tests; + +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.Test; +import java.lang.String; + +import cparser.Parser; +import cparser.Tokenizer; +import cparser.Log; + +import java.util.List; + +public class ParserTests { + + private Parser parser; + private Tokenizer.TokenSet tokenSet; + private Log mockLog; + + @Before + public void setUp() { + mockLog = new Log() { + @Override + public void log(String msg) { + // Do nothing for tests + } + }; + } + + @Test + public void testParseVariableReference() { + String code = "int x = 5;"; + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, mockLog); + parser.parse(); + + List variables = parser.getVariables(); + assertEquals(1, variables.size()); + assertEquals("x", variables.get(0).name); + } + + @Test + public void testParseFunctionDeclaration() { + String code = "void foo(int a, int b);"; + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, mockLog); + parser.parse(); + + List functions = parser.getFunctions(); + assertEquals(1, functions.size()); + assertEquals("foo", functions.get(0).name); + assertFalse(functions.get(0).isDefinition); + } + + @Test + public void testParseFunctionDefinition() { + String code = "int bar(int x) { return x + 1; }"; + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, mockLog); + parser.parse(); + + List functions = parser.getFunctions(); + assertEquals(1, functions.size()); + assertEquals("bar", functions.get(0).name); + assertTrue(functions.get(0).isDefinition); + } + + @Test + public void testParseFunctionCall() { + String code = "result = calculate(5, 10);"; + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, mockLog); + parser.parse(); + + List functionCalls = parser.getFunctionCalls(); + assertEquals(1, functionCalls.size()); + assertEquals("calculate", functionCalls.get(0).name); + } + + @Test + public void testParsePreprocessorDirective() { + String code = "#include \nint main() { return 0; }"; + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, mockLog); + parser.parse(); + + List functions = parser.getFunctions(); + assertEquals(1, functions.size()); + assertEquals("main", functions.get(0).name); + } + + @Test + public void testParseComplexCode() { + String code = + "#include \n" + + "int globalVar = 10;\n" + + "void helper(int x);\n" + + "int main() {\n" + + " int localVar = 5;\n" + + " helper(localVar);\n" + + " return 0;\n" + + "}\n" + + "void helper(int x) {\n" + + " printf(\"%d\", x);\n" + + "}"; + + tokenSet = new Tokenizer(code).parse(); + parser = new Parser(tokenSet, mockLog); + parser.parse(); + + List variables = parser.getVariables(); + List functions = parser.getFunctions(); + List functionCalls = parser.getFunctionCalls(); + + assertEquals(2, variables.size()); + assertEquals(2, functions.size()); + assertEquals(2, functionCalls.size()); + + assertTrue(variables.stream().anyMatch(v -> v.name.equals("globalVar"))); + assertTrue(variables.stream().anyMatch(v -> v.name.equals("localVar"))); + assertTrue(functions.stream().anyMatch(f -> f.name.equals("main"))); + assertTrue(functions.stream().anyMatch(f -> f.name.equals("helper"))); + assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("helper"))); + assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("printf"))); + } +} diff --git a/scripts/cparser/tests/bin/.project b/scripts/cparser/tests/bin/.project new file mode 100644 index 00000000..d48d61dd --- /dev/null +++ b/scripts/cparser/tests/bin/.project @@ -0,0 +1,23 @@ + + + cparser-tests + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/scripts/cparser/tests/bin/.settings/org.eclipse.m2e.core.prefs b/scripts/cparser/tests/bin/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 00000000..f897a7f1 --- /dev/null +++ b/scripts/cparser/tests/bin/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/scripts/cparser/tests/bin/ParserTests.class b/scripts/cparser/tests/bin/ParserTests.class new file mode 100644 index 0000000000000000000000000000000000000000..cd8d07c2dae798e53e0d00bf89faed21985502c1 GIT binary patch literal 6601 zcmeHL-EI^&82!8?yPHiQG!6X$nwqqQgwjo0AysM+5+p^6G=YdLNR@iy%sPq5j6K+% z74&KP7L{75z2^mZn~KjfyURwn8aIMUL31=EbPz>f%?f*NPf` zMMNuA@lm7>2uw#<;FsntA%(q7;LOVEV**nfs?S?6PhkG_m_KQ)bQ?Y(^S?cknj4i2 zc%Q~9%|ybiP?Ba^?Ik=lKhn-j?~qgbJmb_6bls%8yH*Q%Kw~DQvUHEr5lNSBSt<+_ zvM)5QD|wDh`&@Gw;|Tj&rTG3d$F@H0#tAcq?u&#Q?O8he)6&%|t1fnO+2{L0x_H4Btg}3^ljttp&W&{~ zh@wn7@07$7l%1I}jXX1U#41u#l zeW6ezP%xM_+&;<(TzZ&GD^k8C2f_&KcE?@r3poILwWIQWv)+c!2)qoC4Nl{Ih`>ds z0ThlIS?}RUzd&szaz;ZFTp^8SfC=h9@5x&>DpnQOkW-lH1TK#w=3Xvir=D#V0#lJW zm{y)<)GdSqVoJZ!y1|l!z|#2pj^-%F#oQRBHw#To3^2nN$AAr#?GpaX>3nTiA&s3n zFj1`hFY?cO8JJ=6SI7}WptGxT9rJsr)S`A>_UYkHXu9q^YQYUun=yA=(8WEnY;M7~ zcnpp2e`C#>z}&Vl2G!#pdda{Qz`iJzpMeHULlbKaXhFLuXP|>+8xgZGhtIS4b`y~V z%g�?&pQ`zr(`c1@;0yn?(zOkFf6Wh=Pyd6D$dQieDOVafrHv<(q7mJ#33!wxwfi zpL^M;hi%ErwtPai>t41mPRRCym+i|FvTb_VzB(b>j+gCf0NXW>7k=@uHDDFJ*nsQs O9hPm3vajJASbGHx^DVvr literal 0 HcmV?d00001 diff --git a/scripts/cparser/tests/bin/pom.xml b/scripts/cparser/tests/bin/pom.xml new file mode 100644 index 00000000..de0db96f --- /dev/null +++ b/scripts/cparser/tests/bin/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + com.yourname.scripts + cparser-tests + 1.0-SNAPSHOT + + + + + junit + junit + 4.13.2 + test + + + + + + ../../cparser + . + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 1.8 + 1.8 + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + **/ParserTests.java + + + + + + + \ No newline at end of file diff --git a/scripts/cparser/tests/pom.xml b/scripts/cparser/tests/pom.xml new file mode 100644 index 00000000..de0db96f --- /dev/null +++ b/scripts/cparser/tests/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + com.yourname.scripts + cparser-tests + 1.0-SNAPSHOT + + + + + junit + junit + 4.13.2 + test + + + + + + ../../cparser + . + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 1.8 + 1.8 + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + **/ParserTests.java + + + + + + + \ No newline at end of file