WIP C parser

2024-10-06 19:44:32 +08:00
parent 7090abf5a2
commit 2da111a348
4 changed files with 684 additions and 115 deletions
--- a/scripts/DumpCurrentFunctionN.java
+++ b/scripts/DumpCurrentFunctionN.java
@@ -18,28 +18,28 @@ import re3lib.TypeDumper;
 public class DumpCurrentFunctionN extends GhidraScript {
  final int NumFunctions = 8;
-  class Entry {
+  // class Entry {
-    Function function;
+  //   Function function;
-  }
+  // }
-  class QueueEntry {
+  // class QueueEntry {
-    Function function;
+  //   Function function;
-    List<Function> callees;
+  //   List<Function> callees;
-  }
+  // }
-  HashSet<Address> visited = new HashSet<>();
+  // HashSet<Address> visited = new HashSet<>();
-  QueueEntry enter(Function function) {
+  // QueueEntry enter(Function function) {
-    if (visited.contains(function.getEntryPoint()))
+  //   if (visited.contains(function.getEntryPoint()))
-      return null;
+  //     return null;
-    visited.add(function.getEntryPoint());
+  //   visited.add(function.getEntryPoint());
-    QueueEntry entry = new QueueEntry();
+  //   QueueEntry entry = new QueueEntry();
-    entry.function = function;
+  //   entry.function = function;
-    function.getCalledFunctions(monitor);
+  //   function.getCalledFunctions(monitor);
-  }
+  // }
  @Override
  public void run() throws Exception {
@@ -51,57 +51,57 @@ public class DumpCurrentFunctionN extends GhidraScript {
    FunctionDumper functionDumper = new FunctionDumper(this, globalDumper);
-    // PCallTracer tracer = new PCallTracer();
+    PCallTracer tracer = new PCallTracer();
-    // tracer.setBlacklist(functionDumper.functionAddrBlackList);
+    tracer.setBlacklist(functionDumper.functionAddrBlackList);
-    // tracer.traceCalls(getFunctionContaining(currentAddress));
+    tracer.traceCalls(getFunctionContaining(currentAddress));
    List<Address> queue = new ArrayList<>();
-    List<Function> functionsToDump = new ArrayList<>();
+    // List<Function> functionsToDump = new ArrayList<>();
-    List<Function> functionsToDumpNew = new ArrayList<>();
+    // List<Function> functionsToDumpNew = new ArrayList<>();
-    for (Function func : tracer.out) {
+    // for (Function func : tracer.out) {
-      if (FunctionDumper.isDumpedFix(func))
+    //   if (FunctionDumper.isDumpedFix(func))
-        continue;
+    //     continue;
-      println("Dump: " + func.getName());
+    //   println("Dump: " + func.getName());
-      functionsToDump.add(func);
+    //   functionsToDump.add(func);
-      if (!FunctionDumper.isDumpedAuto(func))
+    //   if (!FunctionDumper.isDumpedAuto(func))
-        functionsToDumpNew.add(func);
+    //     functionsToDumpNew.add(func);
-    }
+    // }
-    if (!functionsToDump.isEmpty()) {
+    // if (!functionsToDump.isEmpty()) {
-      String newOpt = "Only new (" + functionsToDumpNew.size() + ")";
+    //   String newOpt = "Only new (" + functionsToDumpNew.size() + ")";
-      String okOpt = "Yes (" + functionsToDump.size() + ")";
+    //   String okOpt = "Yes (" + functionsToDump.size() + ")";
-      String choice = askChoice("Confirmation", "About to generate " + functionsToDump.size() + " functions ("
+    //   String choice = askChoice("Confirmation", "About to generate " + functionsToDump.size() + " functions ("
-          + functionsToDumpNew.size() + " new), continue?",
+    //       + functionsToDumpNew.size() + " new), continue?",
-          new ArrayList<String>() {
+    //       new ArrayList<String>() {
-            {
+    //         {
-              add(okOpt);
+    //           add(okOpt);
-              add(newOpt);
+    //           add(newOpt);
-              add("No");
+    //           add("No");
-            }
+    //         }
-          }, okOpt);
+    //       }, okOpt);
-      if (choice == okOpt) {
+    //   if (choice == okOpt) {
-      } else if (choice == newOpt) {
+    //   } else if (choice == newOpt) {
-        functionsToDump = functionsToDumpNew;
+    //     functionsToDump = functionsToDumpNew;
-      } else {
+    //   } else {
-        return;
+    //     return;
-      }
+    //   }
-      for (Function func : functionsToDump) {
+    //   for (Function func : functionsToDump) {
-        functionDumper.dump(func);
+    //     functionDumper.dump(func);
-      }
+    //   }
-      if (functionDumper.createdFile)
+    //   if (functionDumper.createdFile)
-        RecompileConfig.INSTANCE.touchCMakeTimestamp();
+    //     RecompileConfig.INSTANCE.touchCMakeTimestamp();
-      globalDumper.dumpGlobals();
+    //   globalDumper.dumpGlobals();
-      globalDumper.saveGlobalManifest();
+    //   globalDumper.saveGlobalManifest();
-    }
+    // }
-    // Dump types
+    // // Dump types
-    TypeDumper dumper = new TypeDumper(this);
+    // TypeDumper dumper = new TypeDumper(this);
-    dumper.run();
+    // dumper.run();
  }
 }
--- a/scripts/RebuildFunctionDatabase.java
+++ b/scripts/RebuildFunctionDatabase.java
@@ -3,14 +3,21 @@
 import ghidra.app.script.GhidraScript;
 import ghidra.program.model.address.Address;
 import ghidra.program.model.data.DataType;
 import ghidra.program.model.data.StandAloneDataTypeManager;
 import re3lib.FunctionDatabase;
 import re3lib.RecompileConfig;
 import re3lib.CParser;
 import re3lib.CTokenizer;
 import java.io.File;
 import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -25,9 +32,10 @@ public class RebuildFunctionDatabase extends GhidraScript {
    functionDB = new FunctionDatabase(this);
-    scanDirectory(RecompileConfig.INSTANCE.dirDecompAuto, FunctionDatabase.Type.Auto);
+    scanFile(new File(RecompileConfig.INSTANCE.outputDir, "gh_auto/r3_engineLoop.cxx"), FunctionDatabase.Type.Auto);
-    scanDirectory(RecompileConfig.INSTANCE.dirDecompFix, FunctionDatabase.Type.Fix);
+    // scanDirectory(RecompileConfig.INSTANCE.dirDecompAuto, FunctionDatabase.Type.Auto);
-    scanDirectory(RecompileConfig.INSTANCE.dirDecompStub, FunctionDatabase.Type.Stub);
+    // scanDirectory(RecompileConfig.INSTANCE.dirDecompFix, FunctionDatabase.Type.Fix);
    // scanDirectory(RecompileConfig.INSTANCE.dirDecompStub, FunctionDatabase.Type.Stub);
    println("Applying default filters...");
    functionDB.applyDefaultFilters(rebuildAllGlobals);
@@ -38,10 +46,10 @@ public class RebuildFunctionDatabase extends GhidraScript {
    println("Function database rebuilt successfully.");
    // for (FunctionDatabase.Entry entry : functionDB.entries) {
-    //   println(entry.address + " " + entry.name + " " + entry.file.getName());
+    // println(entry.address + " " + entry.name + " " + entry.file.getName());
-    //   for (FunctionDatabase.Dependency dependency : entry.dependencies) {
+    // for (FunctionDatabase.Dependency dependency : entry.dependencies) {
-    //     println("  " + dependency.address + " " + dependency.name);
+    // println(" " + dependency.address + " " + dependency.name);
-    //   }
+    // }
    // }
  }
@@ -55,64 +63,85 @@ public class RebuildFunctionDatabase extends GhidraScript {
    }
  }
-  private void scanFile(File file, FunctionDatabase.Type type) throws Exception {
+  private void parseOld(BufferedReader reader, File file, FunctionDatabase.Type type) throws Exception {
-    println("Scanning " + file);
+    String line;
-    try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
+    Pattern dependencyPattern = Pattern.compile("(\\w+)\\s+(\\w+)\\(.*\\);\\s*//\\s*([0-9A-Fa-f]{8})\\s*//\\s*(.*)");
-      String line;
+    Pattern addressPattern = Pattern.compile("//\\s*([0-9A-Fa-f]{8})");
-      Pattern dependencyPattern = Pattern.compile("(\\w+)\\s+(\\w+)\\(.*\\);\\s*//\\s*([0-9A-Fa-f]{8})\\s*//\\s*(.*)");
+    Pattern functionNamePattern = Pattern.compile("(\\S+)\\s+(\\S+)\\s*\\(");
      Pattern addressPattern = Pattern.compile("//\\s*([0-9A-Fa-f]{8})");
      Pattern functionNamePattern = Pattern.compile("(\\S+)\\s+(\\S+)\\s*\\(");
-      List<FunctionDatabase.Dependency> dependencies = new ArrayList<>();
+    List<FunctionDatabase.Dependency> dependencies = new ArrayList<>();
-      String address = null;
+    String address = null;
-      String functionName = null;
+    String functionName = null;
-      while ((line = reader.readLine()) != null) {
+    while ((line = reader.readLine()) != null) {
-        Matcher dependencyMatcher = dependencyPattern.matcher(line);
+      Matcher dependencyMatcher = dependencyPattern.matcher(line);
-        if (dependencyMatcher.find()) {
+      if (dependencyMatcher.find()) {
-          // println("Found dependency: " + dependencyMatcher.group(3));
+        // println("Found dependency: " + dependencyMatcher.group(3));
-          Address depAddress = currentProgram.getAddressFactory().getAddress(dependencyMatcher.group(3));
+        Address depAddress = currentProgram.getAddressFactory().getAddress(dependencyMatcher.group(3));
-          String name = dependencyMatcher.group(2);
+        String name = dependencyMatcher.group(2);
-          FunctionDatabase.Dependency dependency = functionDB.new Dependency(depAddress, name);
+        FunctionDatabase.Dependency dependency = functionDB.new Dependency(depAddress, name);
-          dependencies.add(dependency);
+        dependencies.add(dependency);
-          continue;
+        continue;
-        }
+      }
-        Matcher addressMatcher = addressPattern.matcher(line);
+      Matcher addressMatcher = addressPattern.matcher(line);
-        if (addressMatcher.find()) {
+      if (addressMatcher.find()) {
-          // println("Found address: " + addressMatcher.group(1));
+        // println("Found address: " + addressMatcher.group(1));
-          address = addressMatcher.group(1);
+        address = addressMatcher.group(1);
-          // Skip any comments or newlines between address and function definition
+        // Skip any comments or newlines between address and function definition
-          while ((line = reader.readLine()) != null) {
+        while ((line = reader.readLine()) != null) {
-            line = line.trim();
+          line = line.trim();
-            // println("Line: " + line);
+          // println("Line: " + line);
-            if (!line.isEmpty()) {
+          if (!line.isEmpty()) {
-              Matcher functionNameMatcher = functionNamePattern.matcher(line);
+            Matcher functionNameMatcher = functionNamePattern.matcher(line);
-              if (functionNameMatcher.find()) {
+            if (functionNameMatcher.find()) {
-                functionName = functionNameMatcher.group(2).trim();
+              functionName = functionNameMatcher.group(2).trim();
-                break;
+              break;
              }
            }
          }
-          if (functionName != null) {
+        }
-            break;
+        if (functionName != null) {
-          }
+          break;
        }
      }
    }
-      if (address != null && functionName != null) {
+    if (address != null && functionName != null) {
-        Address functionAddress = currentProgram.getAddressFactory().getAddress(address);
+      Address functionAddress = currentProgram.getAddressFactory().getAddress(address);
-        FunctionDatabase.Entry entry = functionDB.new Entry();
+      FunctionDatabase.Entry entry = functionDB.new Entry();
-        entry.address = functionAddress;
+      entry.address = functionAddress;
-        entry.name = functionName;
+      entry.name = functionName;
-        entry.file = file;
+      entry.file = file;
-        entry.type = type;
+      entry.type = type;
-        entry.dependencies = dependencies;
+      entry.dependencies = dependencies;
-        functionDB.entries.add(entry);
+      functionDB.entries.add(entry);
-      } else {
+    } else {
-        // throw new Exception("Failed to parse function at " + file.getName());
+      // throw new Exception("Failed to parse function at " + file.getName());
-        println("Failed to parse function at " + file.getName());
+      println("Failed to parse function at " + file.getName());
-      }
+    }
  }
  private void scanFile(File file, FunctionDatabase.Type type) throws Exception {
    println("Scanning " + file);
    String text = new String(Files.readAllBytes(file.toPath()));
    CTokenizer.TokenSet tokens = new CTokenizer(text).parse();
    CParser parser = new CParser(tokens);
    parser.parse();
    // for (CTokenizer.Token token : tokens.getTokens()) {
    //   int line = tokens.getLine(token.ofs);
    //   println("Line " + line + ": " + token.ofs + " " + token.len + " " + token.type + " - "
    //       + tokens.getTextNoNewlines(token));
    // }
    for (CParser.Function function : parser.getFunctions()) {
      println("Function: " + function.name + " " + function.startOffset + " " + function.endOffset);
    }
    for (CParser.FunctionCall functionCall : parser.getFunctionCalls()) {
      println("FunctionCall: " + functionCall.name + " " + functionCall.startOffset + " " + functionCall.endOffset);
    }
    for (CParser.Variable variable : parser.getVariables()) {
      println("Variable: " + variable.name + " " + variable.startOffset + " " + variable.endOffset);
    }
  }
 }
--- a/scripts/re3lib/CParser.java
+++ b/scripts/re3lib/CParser.java
@@ -0,0 +1,194 @@
 package re3lib;
 import java.util.*;
 import re3lib.CTokenizer.Token;
 public class CParser {
  private CTokenizer.TokenSet tokenSet;
  private List<Variable> variables;
  private List<Function> functions;
  private List<FunctionCall> functionCalls;
  public CParser(CTokenizer.TokenSet tokenSet) {
    this.tokenSet = tokenSet;
    this.variables = new ArrayList<>();
    this.functions = new ArrayList<>();
    this.functionCalls = new ArrayList<>();
  }
  int index = 0;
  public void parse() {
    CTokenizer.Token[] tokens = tokenSet.getTokens();
    for (index = 0; index < tokens.length; index++) {
      CTokenizer.Token token = tokens[index];
      if (token.type == CTokenizer.TokenType.BLOCK_COMMENT || token.type == CTokenizer.TokenType.COMMENT) {
        continue;
      } else if (token.type == CTokenizer.TokenType.HASH) {
        index = parsePreprocessorExpression();
      } else if (tokens[index].type == CTokenizer.TokenType.IDENTIFIER) {
        if (index + 1 < tokens.length && tokens[index + 1].type == CTokenizer.TokenType.L_PAREN) {
          // Function call or declaration/definition
          if (index > 0 && (tokens[index - 1].type == CTokenizer.TokenType.IDENTIFIER ||
              tokens[index - 1].type == CTokenizer.TokenType.OTHER)) {
            // Function declaration or definition
            index = parseFunctionDeclaration();
          } else {
            // Function call
            index = parseFunctionCall();
          }
        } else {
          // Variable reference
          index = parseVariableReference();
        }
      }
    }
  }
  // Try to parse prep expression
  private int parsePreprocessorExpression() {
    int index = this.index;
    if (tokenSet.tokens[index].type == CTokenizer.TokenType.HASH) {
      int startLine = tokenSet.getLine(index);
      while (index < tokenSet.tokens.length) {
        if (tokenSet.getLine(index) > startLine) {
          break;
        }
        index++;
      }
      // Find first next line token
      index--;
    }
    return index;
  }
  // Try to parse function declaration and return the ending token index
  private int parseFunctionDeclaration() {
    CTokenizer.Token[] tokens = tokenSet.getTokens();
    String name = tokenSet.getTextNoNewlines(tokens[index]);
    int endIndex = findClosingParenthesis(index + 1);
    if (endIndex == -1)
      return index;
    boolean isDefinition = false;
    if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == CTokenizer.TokenType.L_BRACE) {
      isDefinition = true;
      endIndex = findClosingBrace(endIndex + 1);
    }
    if (endIndex == -1)
      return index;
    Function function = new Function(name, tokens[index].ofs, tokens[endIndex].ofs + tokens[endIndex].len,
        isDefinition);
    functions.add(function);
    return endIndex - 1;
  }
  // Try to parse function call and return the ending token index
  private int parseFunctionCall() {
    CTokenizer.Token[] tokens = tokenSet.getTokens();
    String name = tokenSet.getTextNoNewlines(tokens[index]);
    int endIndex = findClosingParenthesis(index + 1);
    if (endIndex == -1)
      return index;
    FunctionCall functionCall = new FunctionCall(name, tokens[index].ofs,
        tokens[endIndex].ofs + tokens[endIndex].len);
    functionCalls.add(functionCall);
    return endIndex - 1;
  }
  // Try to parse variable reference and add it to the list
  private int parseVariableReference() {
    CTokenizer.Token token = tokenSet.getTokens()[index];
    String name = tokenSet.getTextNoNewlines(token);
    Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
    variables.add(variable);
    return index + 1;
  }
  private int findClosingParenthesis(int startIndex) {
    CTokenizer.Token[] tokens = tokenSet.getTokens();
    int parenCount = 1;
    for (int i = startIndex + 1; i < tokens.length; i++) {
      if (tokens[i].type == CTokenizer.TokenType.L_PAREN) {
        parenCount++;
      } else if (tokens[i].type == CTokenizer.TokenType.R_PAREN) {
        parenCount--;
        if (parenCount == 0) {
          return i;
        }
      }
    }
    return -1;
  }
  private int findClosingBrace(int startIndex) {
    CTokenizer.Token[] tokens = tokenSet.getTokens();
    int braceCount = 1;
    for (int i = startIndex + 1; i < tokens.length; i++) {
      if (tokens[i].type == CTokenizer.TokenType.L_BRACE) {
        braceCount++;
      } else if (tokens[i].type == CTokenizer.TokenType.R_BRACE) {
        braceCount--;
        if (braceCount == 0) {
          return i;
        }
      }
    }
    return -1;
  }
  public List<Variable> getVariables() {
    return variables;
  }
  public List<Function> getFunctions() {
    return functions;
  }
  public List<FunctionCall> getFunctionCalls() {
    return functionCalls;
  }
  public static class Variable {
    public final String name;
    public final int startOffset;
    public final int endOffset;
    public Variable(String name, int startOffset, int endOffset) {
      this.name = name;
      this.startOffset = startOffset;
      this.endOffset = endOffset;
    }
  }
  public static class Function {
    public final String name;
    public final int startOffset;
    public final int endOffset;
    public final boolean isDefinition;
    public Function(String name, int startOffset, int endOffset, boolean isDefinition) {
      this.name = name;
      this.startOffset = startOffset;
      this.endOffset = endOffset;
      this.isDefinition = isDefinition;
    }
  }
  public static class FunctionCall {
    public final String name;
    public final int startOffset;
    public final int endOffset;
    public FunctionCall(String name, int startOffset, int endOffset) {
      this.name = name;
      this.startOffset = startOffset;
      this.endOffset = endOffset;
    }
  }
 }
--- a/scripts/re3lib/CTokenizer.java
+++ b/scripts/re3lib/CTokenizer.java
@@ -0,0 +1,346 @@
 package re3lib;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import ghidra.app.script.GhidraScript;
 public class CTokenizer {
  public enum TokenType {
    UNDEFINED,
    HASH,
    L_PAREN,
    R_PAREN,
    L_BRACE,
    R_BRACE,
    SEMICOLON,
    COMMA,
    COMMENT,
    BLOCK_COMMENT,
    IDENTIFIER,
    STRING_LITERAL,
    NUMERIC_LITERAL,
    NUMERIC_LITERAL_HEX,
    OTHER,
    KEYWORD,
  }
  public class Token {
    public int ofs;
    public int len;
    public TokenType type;
  }
  public class TokenSet {
    public final Token[] tokens;
    public final String text;
    private final TreeMap<Integer, Integer> lineNumberTable;
    TokenSet(Token[] tokens, String text, TreeMap<Integer, Integer> lineNumberTable) {
      this.tokens = tokens;
      this.text = text;
      this.lineNumberTable = lineNumberTable;
    }
    public Token[] getTokens() {
      return this.tokens;
    }
    public int getLine(int offset) {
      Map.Entry<Integer, Integer> entry = lineNumberTable.floorEntry(offset);
      return entry != null ? entry.getValue() : -1;
    }
    public String getTextNoNewlines(Token token) {
      String text = getText(token);
      return text.replace("\n", "");
    }
  };
  private final String text;
  private TreeMap<Integer, Integer> lineNumberTable;
  public GhidraScript log;
  public CTokenizer(String text) {
    this.text = text;
  }
  public CTokenizer(String text, GhidraScript script) {
    this.text = text;
    this.log = log;
  }
  String getText(Token token) {
    return getText(token.ofs, token.len);
  }
  String getText(int ofs, int len) {
    return text.substring(ofs, ofs + len); // Fixed recursion issue
  }
  TokenType lastTokenType = TokenType.UNDEFINED;
  /**
   * Inserts a new token into the tokens list.
   *
   * @param tokens      The list of tokens.
   * @param tokenStart  The starting index of the token.
   * @param tokenEnd    The current index in the text.
   * @param currentType The type of the current token.
   */
  private void insertToken(List<Token> tokens, int tokenStart, int tokenEnd, TokenType currentType) {
    if (currentType != TokenType.UNDEFINED && tokenStart < tokenEnd) {
      // Strip whitespace
      for (int i = tokenStart; i < tokenEnd; i++) {
        if (Character.isWhitespace(text.charAt(i))) {
          tokenStart = i + 1;
        } else {
          break;
        }
      }
      // Strip whitespace from end
      for (int i = tokenEnd - 1; i >= tokenStart; i--) {
        if (Character.isWhitespace(text.charAt(i))) {
          tokenEnd = i;
        } else {
          break;
        }
      }
      if (tokenEnd - tokenStart > 0) {
        Token token = new Token();
        token.ofs = tokenStart;
        token.len = tokenEnd - tokenStart;
        token.type = currentType;
        if (currentType == TokenType.IDENTIFIER && isKeyword(getText(token))) {
          token.type = TokenType.KEYWORD;
        }
        tokens.add(token);
      }
      // Consume the token
      currentType = TokenType.UNDEFINED;
    }
  }
  /**
   * Handles the insertion of the last token after parsing is complete.
   *
   * @param tokens      The list of tokens.
   * @param tokenStart  The starting index of the last token.
   * @param currentType The type of the last token.
   */
  private void handleLastToken(List<Token> tokens, int tokenStart, TokenType currentType) {
    insertToken(tokens, tokenStart, text.length(), currentType);
  }
  void buildLineNumberTable() {
    this.lineNumberTable = new TreeMap<>();
    int lineNumber = 1;
    lineNumberTable.put(0, 1);
    for (int i = 0; i < text.length(); i++) {
      if (text.charAt(i) == '\n') {
        lineNumber++;
        lineNumberTable.put(i + 1, lineNumber);
      }
    }
  }
  List<Token> tokens = new ArrayList<>();
  // Initialize tokenization state
  int tokenStart = 0;
  TokenType currentType = TokenType.UNDEFINED;
  boolean inComment = false;
  boolean inBlockComment = false;
  boolean inString = false;
  class ScanRange {
    int start;
    int end;
    TokenType type;
    ScanRange(int start, int end, TokenType type) {
      this.start = start;
      this.end = end;
      this.type = type;
    }
    // Invalid constructor
    ScanRange() {
      this.type = TokenType.UNDEFINED;
    }
    boolean isValid() {
      return this.type != TokenType.UNDEFINED;
    }
  };
  // Add the following method to handle hexadecimal literals
  private ScanRange tryParseHexadecimal(int currentIndex) {
    if (text.charAt(currentIndex) == '0' && currentIndex + 1 < text.length()) {
      char nextChar = text.charAt(currentIndex + 1);
      if (nextChar == 'x' || nextChar == 'X') {
        int tempIndex = currentIndex + 2;
        while (tempIndex < text.length()) {
          char c = text.charAt(tempIndex);
          if (Character.digit(c, 16) == -1) {
            break;
          }
          tempIndex++;
        }
        if (tempIndex > currentIndex + 2) {
          return new ScanRange(currentIndex, tempIndex, TokenType.NUMERIC_LITERAL_HEX);
        }
      }
    }
    return new ScanRange();
  }
  // Identifier that starts with a letter or underscore, and can contain letters,
  // digits, and underscores
  private ScanRange tryParseIdentifier(int currentIndex) {
    if (Character.isLetter(text.charAt(currentIndex)) || text.charAt(currentIndex) == '_') {
      int tempIndex = currentIndex + 1;
      while (tempIndex < text.length()) {
        char c = text.charAt(tempIndex);
        if (!(Character.isLetter(c) || Character.isDigit(c) || c == '_')) {
          break;
        }
        tempIndex++;
      }
      return new ScanRange(currentIndex, tempIndex, TokenType.IDENTIFIER);
    }
    return new ScanRange();
  }
  private ScanRange tryParseWithLookahead(int currentIndex) {
    ScanRange sr = tryParseHexadecimal(currentIndex);
    if (!sr.isValid()) {
      sr = tryParseIdentifier(currentIndex);
    }
    return sr;
  }
  public boolean isKeyword(String text) {
    return text.equals("while") || text.equals("for") || text.equals("if") || text.equals("else") || 
        text.equals("return") || text.equals("struct") || text.equals("typedef") ||
        text.equals("enum") || text.equals("union") || text.equals("const") || text.equals("static");
  }
  public TokenSet parse() {
    this.buildLineNumberTable();
    int index = 0;
    while (index < text.length()) {
      char currentChar = text.charAt(index);
      TokenType newType = TokenType.OTHER;
      // Handle comments
      if (inBlockComment) {
        newType = TokenType.BLOCK_COMMENT;
        if (currentChar == '*') {
          if (index + 1 < text.length() && text.charAt(index + 1) == '/') {
            inBlockComment = false;
            index++;
          }
        }
      } else if (inComment) {
        newType = TokenType.COMMENT;
        if (currentChar == '\n') {
          inComment = false;
        }
      }
      // Handle string literals
      else if (inString) {
        if (currentChar == '"') {
          inString = false;
          newType = TokenType.STRING_LITERAL;
        } else {
          newType = TokenType.STRING_LITERAL;
        }
      }
      // Detect start of comments
      else if (currentChar == '/' && index + 1 < text.length() && text.charAt(index + 1) == '*') {
        inBlockComment = true;
        newType = TokenType.BLOCK_COMMENT;
      } else if (currentChar == '/' && index + 1 < text.length() && text.charAt(index + 1) == '/') {
        inComment = true;
        newType = TokenType.COMMENT;
      }
      // Detect start of string literals
      else if (currentChar == '"') {
        inString = true;
        newType = TokenType.STRING_LITERAL;
      } else {
        ScanRange range = tryParseWithLookahead(index);
        if (range.isValid()) {
          // Insert the current token first
          // script.println("Inserting current token: " + currentType + ", start: " +
          // tokenStart + ", end: " + range.start);
          insertToken(tokens, tokenStart, range.start, currentType);
          // Insert a ranged token
          // script.println("Inserting ranged token: " + range.type + " start: " +
          // range.start + ", end: " + range.end);
          insertToken(tokens, range.start, range.end, range.type);
          // New start
          currentType = TokenType.UNDEFINED;
          tokenStart = range.end;
          index = range.end;
        }
        // Detect numeric literals
        else if (Character.isDigit(currentChar)) {
          newType = TokenType.NUMERIC_LITERAL;
        }
        // Detect identifiers
        else if (Character.isLetter(currentChar) || currentChar == '_') {
          newType = TokenType.IDENTIFIER;
        }
        // Detect parentheses
        else if (currentChar == '(') {
          newType = TokenType.L_PAREN;
        } else if (currentChar == ')') {
          newType = TokenType.R_PAREN;
        }
        // Detect braces
        else if (currentChar == '{') {
          newType = TokenType.L_BRACE;
        } else if (currentChar == '}') {
          newType = TokenType.R_BRACE;
        }
        // Detect semicolon
        else if (currentChar == ';') {
          newType = TokenType.SEMICOLON;
        }
        // Detect comma
        else if (currentChar == ',') {
          newType = TokenType.COMMA;
        } else if (currentChar == '#') {
          newType = TokenType.HASH;
        }
        // Handle other characters
        else {
          newType = TokenType.OTHER;
        }
      }
      // Insert a new token if the type changes
      if (newType != currentType) {
        insertToken(tokens, tokenStart, index, currentType);
        tokenStart = index;
        currentType = newType;
      }
      index++;
    }
    // Handle the last token
    handleLastToken(tokens, tokenStart, currentType);
    return new TokenSet(tokens.toArray(new Token[0]), text, lineNumberTable);
  }
 }