Add cparser java project

2024-10-08 00:58:50 +08:00
parent 8f0e8f68bb
commit 0133a237ac
31 changed files with 299 additions and 324 deletions
--- a/java/cparser/.gitignore
+++ b/java/cparser/.gitignore
@@ -0,0 +1,2 @@
+.class
+target/
--- a/java/cparser/pom.xml
+++ b/java/cparser/pom.xml
@@ -0,0 +1,52 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"     
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"     
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
+         http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  
+  <modelVersion>4.0.0</modelVersion>
+  
+  <groupId>cparser</groupId>
+  <artifactId>cparser</artifactId>
+  <version>1.0-SNAPSHOT</version>
+  
+  <dependencies>
+    <!-- JUnit for testing -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.13.2</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <!-- Specify the custom test source directory -->
+    <testSourceDirectory>./src/test/java</testSourceDirectory>
+    
+    <plugins>
+      <!-- Compiler Plugin to specify Java version -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.8.1</version>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+        </configuration>
+      </plugin>
+      
+      <!-- Surefire Plugin to run JUnit tests -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.22.2</version>
+        <configuration>
+          <includes>
+            <include>**/ParserTests.java</include>
+          </includes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  
+</project>
--- a/java/cparser/src/main/java/cparser/Log.java
+++ b/java/cparser/src/main/java/cparser/Log.java
@@ -0,0 +1,5 @@
+package cparser;
+
+public interface Log {
+  public void log(String msg);
+}
--- a/java/cparser/src/main/java/cparser/Parser.java
+++ b/java/cparser/src/main/java/cparser/Parser.java
@@ -0,0 +1,264 @@
+package cparser;
+
+import java.util.*;
+
+import cparser.Tokenizer.Token;
+import cparser.Log;
+
+public class Parser {
+  private Tokenizer.TokenSet tokenSet;
+  private List<Object> statements;
+  private Log log;
+  private Tokenizer.Token[] tokens;
+
+  public Parser(Tokenizer.TokenSet tokenSet, Log log) {
+    this.tokenSet = tokenSet;
+    this.statements = new ArrayList<>();
+    this.tokens = tokenSet.getTokens();
+  }
+
+  void log(String msg) {
+    if (log != null) {
+      log.log(msg);
+    }
+  }
+
+  int index = 0;
+
+  public void parse() {
+    for (index = 0; index < tokens.length; index++) {
+      Tokenizer.Token token = tokens[index];
+      if (token.type == Tokenizer.TokenType.BLOCK_COMMENT || token.type == Tokenizer.TokenType.COMMENT) {
+        continue;
+      } else if (token.type == Tokenizer.TokenType.HASH) {
+        index = parsePreprocessorExpression();
+      } else {
+        index = parseStmt();
+      }
+    }
+  }
+
+  private int parseStmt() {
+    int startIndex = index;
+    List<Tokenizer.Token> idStack = new ArrayList<>();
+    for (int i = startIndex; i < tokens.length; i++) {
+      Tokenizer.Token token = tokens[i];
+      if (token.type == Tokenizer.TokenType.L_PAREN && idStack.size() > 0) {
+        // Function call?
+      } else if (token.type == Tokenizer.TokenType.SEMICOLON) {
+        boolean isVarAssign = false;
+        for (int j = startIndex; j < i; j++) {
+          if (tokens[j].type == Tokenizer.TokenType.EQUALS) {
+            isVarAssign = true;
+          }
+        }
+      } else if (token.type == Tokenizer.TokenType.L_BRACE) {
+        boolean isVarAssign = false;
+        for (int j = startIndex; j < i; j++) {
+          if (tokens[j].type == Tokenizer.TokenType.L_PAREN) {
+            int endIndex = findClosingParenthesis(j);
+            if (endIndex != -1) {
+              index = endIndex;
+            }
+          }
+        }
+      }
+    }
+    // if (index + 1 < tokens.length && tokens[index + 1].type ==
+    // Tokenizer.TokenType.L_PAREN) {
+    // // Function call or declaration/definition
+    // if (index > 0 && (tokens[index - 1].type == Tokenizer.TokenType.IDENTIFIER ||
+    // tokens[index - 1].type == Tokenizer.TokenType.OTHER)) {
+    // // Function declaration or definition
+    // index = parseFunctionDeclaration();
+    // } else {
+    // // Function call
+    // index = parseFunctionCall();
+    // }
+    // } else {
+    // // Variable reference
+    // index = parseVariableReference();
+    // }
+  }
+
+  private int parseVarDecl(int startIndex, int endIndex) {
+    if (tokens[startIndex].type == Tokenizer.TokenType.R_PAREN) {
+      return startIndex;
+    }
+    return startIndex + 1;
+  }
+
+  private ArgumentList parseArgumentList(int startIndex, int endIndex) {
+    List<Object> arguments = new ArrayList<>();
+    for (int i = startIndex; i < endIndex; i++) {
+      if (tokens[i].type == Tokenizer.TokenType.COMMA) {
+        
+      }
+    }
+    return new ArgumentList(arguments);
+  }
+
+  // Try to parse prep expression
+  private int parsePreprocessorExpression() {
+    int index = this.index;
+    if (tokenSet.tokens[index].type == Tokenizer.TokenType.HASH) {
+      int startLine = tokenSet.getLine(index);
+      while (index < tokenSet.tokens.length) {
+        if (tokenSet.getLine(index) > startLine) {
+          break;
+        }
+        index++;
+      }
+      // Find first next line token
+      index--;
+    }
+    return index;
+  }
+
+  // // Try to parse function declaration and return the ending token index
+  // private int parseFunctionDeclaration() {
+  // Tokenizer.Token[] tokens = tokenSet.getTokens();
+  // String name = tokenSet.getTextNoNewlines(tokens[index]);
+  // int endIndex = findClosingParenthesis(index + 1);
+
+  // if (endIndex == -1)
+  // return index;
+
+  // boolean isDefinition = false;
+  // if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type ==
+  // Tokenizer.TokenType.L_BRACE) {
+  // isDefinition = true;
+  // endIndex = findClosingBrace(endIndex + 1);
+  // }
+
+  // if (endIndex == -1)
+  // return index;
+
+  // Function function = new Function(name, tokens[index].ofs,
+  // tokens[endIndex].ofs + tokens[endIndex].len,
+  // isDefinition);
+  // functions.add(function);
+  // return endIndex - 1;
+  // }
+
+  // // Try to parse function call and return the ending token index
+  // private int parseFunctionCall() {
+  // Tokenizer.Token[] tokens = tokenSet.getTokens();
+  // String name = tokenSet.getTextNoNewlines(tokens[index]);
+  // int endIndex = findClosingParenthesis(index + 1);
+  // if (endIndex == -1)
+  // return index;
+
+  // FunctionCall functionCall = new FunctionCall(name, tokens[index].ofs,
+  // tokens[endIndex].ofs + tokens[endIndex].len);
+  // functionCalls.add(functionCall);
+  // return endIndex - 1;
+  // }
+
+  // // Try to parse variable reference and add it to the list
+  // private int parseVariableReference() {
+  // Tokenizer.Token token = tokenSet.getTokens()[index];
+  // String name = tokenSet.getTextNoNewlines(token);
+  // Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
+  // variables.add(variable);
+  // return index + 1;
+  // }
+
+  private int findClosingParenthesis(int startIndex) {
+    Tokenizer.Token[] tokens = tokenSet.getTokens();
+    int parenCount = 1;
+    for (int i = startIndex + 1; i < tokens.length; i++) {
+      if (tokens[i].type == Tokenizer.TokenType.L_PAREN) {
+        parenCount++;
+      } else if (tokens[i].type == Tokenizer.TokenType.R_PAREN) {
+        parenCount--;
+        if (parenCount == 0) {
+          return i;
+        }
+      }
+    }
+    return -1;
+  }
+
+  private int findClosingBrace(int startIndex) {
+    Tokenizer.Token[] tokens = tokenSet.getTokens();
+    int braceCount = 1;
+    for (int i = startIndex + 1; i < tokens.length; i++) {
+      if (tokens[i].type == Tokenizer.TokenType.L_BRACE) {
+        braceCount++;
+      } else if (tokens[i].type == Tokenizer.TokenType.R_BRACE) {
+        braceCount--;
+        if (braceCount == 0) {
+          return i;
+        }
+      }
+    }
+    return -1;
+  }
+
+  public static class Span {
+    public final int startOffset;
+    public final int endOffset;
+
+    public Span(int startOffset, int endOffset) {
+      this.startOffset = startOffset;
+      this.endOffset = endOffset;
+    }
+  }
+
+  public static class Type {
+    public final Span span;
+
+    public Type(Span span) {
+      this.span = span;
+    }
+  }
+
+  public static class Identifier {
+    public final Span span;
+
+    public Identifier(Span span) {
+      this.span = span;
+    }
+  }
+
+  public static class ArgumentList {
+    public final List<Object> arguments;
+
+    public ArgumentList(List<Object> arguments) {
+      this.arguments = arguments;
+    }
+  }
+
+  public static class VariableDeclaration {
+    public final Type type;
+    public final Identifier name;
+
+    public VariableDeclaration(Type type, Identifier name) {
+      this.type = type;
+      this.name = name;
+    }
+  }
+
+  public static class FunctionDecl {
+    public final Identifier name;
+    public final Type returnValue;
+    public final ArgumentList args;
+
+    public FunctionDecl(Identifier name, Type returnValue, ArgumentList args) {
+      this.name = name;
+      this.returnValue = returnValue;
+      this.args = args;
+    }
+  }
+
+  public static class FunctionCall {
+    public final Identifier name;
+    public final ArgumentList args;
+
+    public FunctionCall(Identifier name, ArgumentList args) {
+      this.name = name;
+      this.args = args;
+    }
+  }
+}
--- a/java/cparser/src/main/java/cparser/Tokenizer.java
+++ b/java/cparser/src/main/java/cparser/Tokenizer.java
@@ -0,0 +1,367 @@
+package cparser;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+public class Tokenizer {
+  public enum TokenType {
+    UNDEFINED,
+    HASH,
+    L_PAREN,
+    R_PAREN,
+    L_BRACE,
+    R_BRACE,
+    L_IDX,
+    R_IDX,
+    SEMICOLON,
+    EQUALS,
+    ARROW,
+    STAR,
+    COMMA,
+    COMMENT,
+    BLOCK_COMMENT,
+    IDENTIFIER,
+    STRING_LITERAL,
+    NUMERIC_LITERAL,
+    NUMERIC_LITERAL_HEX,
+    OTHER,
+    KEYWORD,
+  }
+
+  public class Token {
+    public int ofs;
+    public int len;
+    public TokenType type;
+  }
+
+  public class TokenSet {
+    public final Token[] tokens;
+    public final String text;
+    private final TreeMap<Integer, Integer> lineNumberTable;
+
+    TokenSet(Token[] tokens, String text, TreeMap<Integer, Integer> lineNumberTable) {
+      this.tokens = tokens;
+      this.text = text;
+      this.lineNumberTable = lineNumberTable;
+    }
+
+    public Token[] getTokens() {
+      return this.tokens;
+    }
+
+    public int getLine(int offset) {
+      Map.Entry<Integer, Integer> entry = lineNumberTable.floorEntry(offset);
+      return entry != null ? entry.getValue() : -1;
+    }
+
+    public String getTextNoNewlines(Token token) {
+      String text = getText(token);
+      return text.replace("\n", "");
+    }
+  };
+
+  private final String text;
+  private TreeMap<Integer, Integer> lineNumberTable;
+  private Log log;
+
+  public Tokenizer(String text) {
+    this.text = text;
+  }
+
+  public Tokenizer(String text, Log log) {
+    this.text = text;
+    this.log = log;
+  }
+
+  void log(String msg) {
+    if (log != null) {
+      log.log(msg);
+    }
+  }
+
+  String getText(Token token) {
+    return getText(token.ofs, token.len);
+  }
+
+  String getText(int ofs, int len) {
+    return text.substring(ofs, ofs + len); // Fixed recursion issue
+  }
+
+  TokenType lastTokenType = TokenType.UNDEFINED;
+
+  /**
+   * Inserts a new token into the tokens list.
+   *
+   * @param tokens      The list of tokens.
+   * @param tokenStart  The starting index of the token.
+   * @param tokenEnd    The current index in the text.
+   * @param currentType The type of the current token.
+   */
+  private void insertToken(List<Token> tokens, int tokenStart, int tokenEnd, TokenType currentType) {
+    if (currentType != TokenType.UNDEFINED && tokenStart < tokenEnd) {
+      // Strip whitespace
+      for (int i = tokenStart; i < tokenEnd; i++) {
+        if (Character.isWhitespace(text.charAt(i))) {
+          tokenStart = i + 1;
+        } else {
+          break;
+        }
+      }
+      // Strip whitespace from end
+      for (int i = tokenEnd - 1; i >= tokenStart; i--) {
+        if (Character.isWhitespace(text.charAt(i))) {
+          tokenEnd = i;
+        } else {
+          break;
+        }
+      }
+
+      if (tokenEnd - tokenStart > 0) {
+        Token token = new Token();
+        token.ofs = tokenStart;
+        token.len = tokenEnd - tokenStart;
+        token.type = currentType;
+        if (currentType == TokenType.IDENTIFIER && isKeyword(getText(token))) {
+          token.type = TokenType.KEYWORD;
+        }
+        tokens.add(token);
+      }
+
+      // Consume the token
+      currentType = TokenType.UNDEFINED;
+    }
+  }
+
+  /**
+   * Handles the insertion of the last token after parsing is complete.
+   *
+   * @param tokens      The list of tokens.
+   * @param tokenStart  The starting index of the last token.
+   * @param currentType The type of the last token.
+   */
+  private void handleLastToken(List<Token> tokens, int tokenStart, TokenType currentType) {
+    insertToken(tokens, tokenStart, text.length(), currentType);
+  }
+
+  void buildLineNumberTable() {
+    this.lineNumberTable = new TreeMap<>();
+    int lineNumber = 1;
+    lineNumberTable.put(0, 1);
+    for (int i = 0; i < text.length(); i++) {
+      if (text.charAt(i) == '\n') {
+        lineNumber++;
+        lineNumberTable.put(i + 1, lineNumber);
+      }
+    }
+  }
+
+  List<Token> tokens = new ArrayList<>();
+
+  // Initialize tokenization state
+  int tokenStart = 0;
+  TokenType currentType = TokenType.UNDEFINED;
+  boolean inComment = false;
+  boolean inBlockComment = false;
+  boolean inString = false;
+
+  class ScanRange {
+    int start;
+    int end;
+    TokenType type;
+
+    ScanRange(int start, int end, TokenType type) {
+      this.start = start;
+      this.end = end;
+      this.type = type;
+    }
+
+    // Invalid constructor
+    ScanRange() {
+      this.type = TokenType.UNDEFINED;
+    }
+
+    boolean isValid() {
+      return this.type != TokenType.UNDEFINED;
+    }
+  };
+
+  // Add the following method to handle hexadecimal literals
+  private ScanRange tryParseHexadecimal(int currentIndex) {
+    if (text.charAt(currentIndex) == '0' && currentIndex + 1 < text.length()) {
+      char nextChar = text.charAt(currentIndex + 1);
+      if (nextChar == 'x' || nextChar == 'X') {
+        int tempIndex = currentIndex + 2;
+        while (tempIndex < text.length()) {
+          char c = text.charAt(tempIndex);
+          if (Character.digit(c, 16) == -1) {
+            break;
+          }
+          tempIndex++;
+        }
+        if (tempIndex > currentIndex + 2) {
+          return new ScanRange(currentIndex, tempIndex, TokenType.NUMERIC_LITERAL_HEX);
+        }
+      }
+    }
+    return new ScanRange();
+  }
+
+  // Identifier that starts with a letter or underscore, and can contain letters,
+  // digits, and underscores
+  private ScanRange tryParseIdentifier(int currentIndex) {
+    if (Character.isLetter(text.charAt(currentIndex)) || text.charAt(currentIndex) == '_') {
+      int tempIndex = currentIndex + 1;
+      while (tempIndex < text.length()) {
+        char c = text.charAt(tempIndex);
+        if (!(Character.isLetter(c) || Character.isDigit(c) || c == '_')) {
+          break;
+        }
+        tempIndex++;
+      }
+      return new ScanRange(currentIndex, tempIndex, TokenType.IDENTIFIER);
+    }
+    return new ScanRange();
+  }
+
+  private ScanRange tryParseWithLookahead(int currentIndex) {
+    ScanRange sr = tryParseHexadecimal(currentIndex);
+    if (!sr.isValid()) {
+      sr = tryParseIdentifier(currentIndex);
+    }
+    return sr;
+  }
+
+  public boolean isKeyword(String text) {
+    return text.equals("while") || text.equals("for") || text.equals("if") || text.equals("else") ||
+        text.equals("return") || text.equals("struct") || text.equals("typedef") ||
+        text.equals("enum") || text.equals("union") || text.equals("const") || text.equals("static");
+  }
+
+  public TokenSet parse() {
+    this.buildLineNumberTable();
+
+    int index = 0;
+    while (index < text.length()) {
+      char currentChar = text.charAt(index);
+      TokenType newType = TokenType.OTHER;
+
+      // Handle comments
+      if (inBlockComment) {
+        newType = TokenType.BLOCK_COMMENT;
+        if (currentChar == '*') {
+          if (index + 1 < text.length() && text.charAt(index + 1) == '/') {
+            inBlockComment = false;
+            index++;
+          }
+        }
+      } else if (inComment) {
+        newType = TokenType.COMMENT;
+        if (currentChar == '\n') {
+          inComment = false;
+        }
+      }
+      // Handle string literals
+      else if (inString) {
+        if (currentChar == '"') {
+          inString = false;
+          newType = TokenType.STRING_LITERAL;
+        } else {
+          newType = TokenType.STRING_LITERAL;
+        }
+      }
+      // Detect start of comments
+      else if (currentChar == '/' && index + 1 < text.length() && text.charAt(index + 1) == '*') {
+        inBlockComment = true;
+        newType = TokenType.BLOCK_COMMENT;
+      } else if (currentChar == '/' && index + 1 < text.length() && text.charAt(index + 1) == '/') {
+        inComment = true;
+        newType = TokenType.COMMENT;
+      }
+      // Detect start of string literals
+      else if (currentChar == '"') {
+        inString = true;
+        newType = TokenType.STRING_LITERAL;
+      } else {
+        ScanRange range = tryParseWithLookahead(index);
+        if (range.isValid()) {
+          // Insert the current token first
+          // script.println("Inserting current token: " + currentType + ", start: " +
+          // tokenStart + ", end: " + range.start);
+          insertToken(tokens, tokenStart, range.start, currentType);
+
+          // Insert a ranged token
+          // script.println("Inserting ranged token: " + range.type + " start: " +
+          // range.start + ", end: " + range.end);
+          insertToken(tokens, range.start, range.end, range.type);
+
+          // New start
+          currentType = TokenType.UNDEFINED;
+          tokenStart = range.end;
+          index = range.end;
+        }
+        // Detect numeric literals
+        else if (Character.isDigit(currentChar)) {
+          newType = TokenType.NUMERIC_LITERAL;
+        }
+        // Detect identifiers
+        else if (Character.isLetter(currentChar) || currentChar == '_') {
+          newType = TokenType.IDENTIFIER;
+        }
+        // Detect parentheses
+        else if (currentChar == '(') {
+          newType = TokenType.L_PAREN;
+        } else if (currentChar == ')') {
+          newType = TokenType.R_PAREN;
+        }
+        // Detect braces
+        else if (currentChar == '{') {
+          newType = TokenType.L_BRACE;
+        } else if (currentChar == '}') {
+          newType = TokenType.R_BRACE;
+        }
+        // Detect semicolon
+        else if (currentChar == ';') {
+          newType = TokenType.SEMICOLON;
+        }
+        // Detect comma
+        else if (currentChar == ',') {
+          newType = TokenType.COMMA;
+        } else if (currentChar == '#') {
+          newType = TokenType.HASH;
+        } else if (currentChar == '[') {
+          newType = TokenType.L_IDX;
+        } else if (currentChar == ']') {
+          newType = TokenType.R_IDX;
+        } else if (currentChar == '=') {
+          newType = TokenType.EQUALS;
+        } else if (currentChar == '>' && index > 0 && text.charAt(index - 1) == '-') {
+          newType = TokenType.ARROW;
+          currentType = TokenType.ARROW;
+          tokenStart = index - 1;
+        } else if (currentChar == '*') {
+          newType = TokenType.STAR;
+        }
+        // Handle other characters
+        else {
+          newType = TokenType.OTHER;
+        }
+      }
+
+      // Insert a new token if the type changes
+      if (newType != currentType) {
+        insertToken(tokens, tokenStart, index, currentType);
+        tokenStart = index;
+        currentType = newType;
+      }
+
+      index++;
+    }
+
+    // Handle the last token
+    handleLastToken(tokens, tokenStart, currentType);
+
+    return new TokenSet(tokens.toArray(new Token[0]), text, lineNumberTable);
+  }
+}
--- a/java/cparser/src/test/java/cparser_tests/ParserTests.java
+++ b/java/cparser/src/test/java/cparser_tests/ParserTests.java
@@ -0,0 +1,124 @@
+package cparser_tests;
+import static org.junit.Assert.*;
+import org.junit.Before;
+import org.junit.Test;
+import java.lang.String;
+
+import cparser.Parser;
+import cparser.Tokenizer;
+import cparser.Log;
+
+import java.util.List;
+
+public class ParserTests {
+    private Parser parser;
+    private Tokenizer.TokenSet tokenSet;
+    private Log testLog;
+
+    @Before
+    public void setUp() {
+        testLog = new Log() {
+            @Override
+          public void log(String msg) {
+              System.out.println(msg);
+            }
+        };
+    }
+
+    @Test
+    public void testParseVariableReference() {
+        String code = "int x = 5;";
+        tokenSet = new Tokenizer(code).parse();
+        parser = new Parser(tokenSet, testLog);
+        parser.parse();
+
+        List<Parser.Variable> variables = parser.getVariables();
+        assertEquals(1, variables.size());
+        assertEquals("x", variables.get(0).name);
+    }
+
+    @Test
+    public void testParseFunctionDeclaration() {
+        String code = "void foo(int a, int b);";
+        tokenSet = new Tokenizer(code).parse();
+        parser = new Parser(tokenSet, testLog);
+        parser.parse();
+
+        List<Parser.Function> functions = parser.getFunctions();
+        assertEquals(1, functions.size());
+        assertEquals("foo", functions.get(0).name);
+        assertFalse(functions.get(0).isDefinition);
+    }
+
+    @Test
+    public void testParseFunctionDefinition() {
+        String code = "int bar(int x) { return x + 1; }";
+        tokenSet = new Tokenizer(code).parse();
+        parser = new Parser(tokenSet, testLog);
+        parser.parse();
+
+        List<Parser.Function> functions = parser.getFunctions();
+        assertEquals(1, functions.size());
+        assertEquals("bar", functions.get(0).name);
+        assertTrue(functions.get(0).isDefinition);
+    }
+
+    @Test
+    public void testParseFunctionCall() {
+        String code = "result = calculate(5, 10);";
+        tokenSet = new Tokenizer(code).parse();
+        parser = new Parser(tokenSet, testLog);
+        parser.parse();
+
+        List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
+        assertEquals(1, functionCalls.size());
+        assertEquals("calculate", functionCalls.get(0).name);
+    }
+
+    @Test
+    public void testParsePreprocessorDirective() {
+        String code = "#include <stdio.h>\nint main() { return 0; }";
+        tokenSet = new Tokenizer(code).parse();
+        parser = new Parser(tokenSet, testLog);
+        parser.parse();
+
+        List<Parser.Function> functions = parser.getFunctions();
+        assertEquals(1, functions.size());
+        assertEquals("main", functions.get(0).name);
+    }
+
+    @Test
+    public void testParseComplexCode() {
+        String code = 
+            "#include <stdio.h>\n" +
+            "int globalVar = 10;\n" +
+            "void helper(int x);\n" +
+            "int main() {\n" +
+            "    int localVar = 5;\n" +
+            "    helper(localVar);\n" +
+            "    return 0;\n" +
+            "}\n" +
+            "void helper(int x) {\n" +
+            "    printf(\"%d\", x);\n" +
+            "}";
+        
+        tokenSet = new Tokenizer(code).parse();
+        parser = new Parser(tokenSet, testLog);
+        parser.parse();
+
+        List<Parser.Variable> variables = parser.getVariables();
+        List<Parser.Function> functions = parser.getFunctions();
+        List<Parser.FunctionCall> functionCalls = parser.getFunctionCalls();
+
+        assertEquals(2, variables.size());
+        assertEquals(2, functions.size());
+        assertEquals(2, functionCalls.size());
+
+        assertTrue(variables.stream().anyMatch(v -> v.name.equals("globalVar")));
+        assertTrue(variables.stream().anyMatch(v -> v.name.equals("localVar")));
+        assertTrue(functions.stream().anyMatch(f -> f.name.equals("main")));
+        assertTrue(functions.stream().anyMatch(f -> f.name.equals("helper")));
+        assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("helper")));
+        assertTrue(functionCalls.stream().anyMatch(fc -> fc.name.equals("printf")));
+    }
+}