WIP C parser

This commit is contained in:
Guus Waals 2024-10-06 19:44:32 +08:00
parent 7090abf5a2
commit 2da111a348
4 changed files with 684 additions and 115 deletions

View File

@ -18,28 +18,28 @@ import re3lib.TypeDumper;
public class DumpCurrentFunctionN extends GhidraScript {
final int NumFunctions = 8;
class Entry {
Function function;
}
class QueueEntry {
Function function;
List<Function> callees;
}
// class Entry {
// Function function;
// }
// class QueueEntry {
// Function function;
// List<Function> callees;
// }
HashSet<Address> visited = new HashSet<>();
// HashSet<Address> visited = new HashSet<>();
QueueEntry enter(Function function) {
if (visited.contains(function.getEntryPoint()))
return null;
// QueueEntry enter(Function function) {
// if (visited.contains(function.getEntryPoint()))
// return null;
visited.add(function.getEntryPoint());
// visited.add(function.getEntryPoint());
QueueEntry entry = new QueueEntry();
entry.function = function;
// QueueEntry entry = new QueueEntry();
// entry.function = function;
function.getCalledFunctions(monitor);
// function.getCalledFunctions(monitor);
}
// }
@Override
public void run() throws Exception {
@ -51,57 +51,57 @@ public class DumpCurrentFunctionN extends GhidraScript {
FunctionDumper functionDumper = new FunctionDumper(this, globalDumper);
// PCallTracer tracer = new PCallTracer();
// tracer.setBlacklist(functionDumper.functionAddrBlackList);
// tracer.traceCalls(getFunctionContaining(currentAddress));
PCallTracer tracer = new PCallTracer();
tracer.setBlacklist(functionDumper.functionAddrBlackList);
tracer.traceCalls(getFunctionContaining(currentAddress));
List<Address> queue = new ArrayList<>();
List<Function> functionsToDump = new ArrayList<>();
List<Function> functionsToDumpNew = new ArrayList<>();
for (Function func : tracer.out) {
if (FunctionDumper.isDumpedFix(func))
continue;
// List<Function> functionsToDump = new ArrayList<>();
// List<Function> functionsToDumpNew = new ArrayList<>();
// for (Function func : tracer.out) {
// if (FunctionDumper.isDumpedFix(func))
// continue;
println("Dump: " + func.getName());
functionsToDump.add(func);
// println("Dump: " + func.getName());
// functionsToDump.add(func);
if (!FunctionDumper.isDumpedAuto(func))
functionsToDumpNew.add(func);
}
// if (!FunctionDumper.isDumpedAuto(func))
// functionsToDumpNew.add(func);
// }
if (!functionsToDump.isEmpty()) {
String newOpt = "Only new (" + functionsToDumpNew.size() + ")";
String okOpt = "Yes (" + functionsToDump.size() + ")";
String choice = askChoice("Confirmation", "About to generate " + functionsToDump.size() + " functions ("
+ functionsToDumpNew.size() + " new), continue?",
new ArrayList<String>() {
{
add(okOpt);
add(newOpt);
add("No");
}
}, okOpt);
if (choice == okOpt) {
} else if (choice == newOpt) {
functionsToDump = functionsToDumpNew;
} else {
return;
}
// if (!functionsToDump.isEmpty()) {
// String newOpt = "Only new (" + functionsToDumpNew.size() + ")";
// String okOpt = "Yes (" + functionsToDump.size() + ")";
// String choice = askChoice("Confirmation", "About to generate " + functionsToDump.size() + " functions ("
// + functionsToDumpNew.size() + " new), continue?",
// new ArrayList<String>() {
// {
// add(okOpt);
// add(newOpt);
// add("No");
// }
// }, okOpt);
// if (choice == okOpt) {
// } else if (choice == newOpt) {
// functionsToDump = functionsToDumpNew;
// } else {
// return;
// }
for (Function func : functionsToDump) {
functionDumper.dump(func);
}
// for (Function func : functionsToDump) {
// functionDumper.dump(func);
// }
if (functionDumper.createdFile)
RecompileConfig.INSTANCE.touchCMakeTimestamp();
// if (functionDumper.createdFile)
// RecompileConfig.INSTANCE.touchCMakeTimestamp();
globalDumper.dumpGlobals();
globalDumper.saveGlobalManifest();
}
// globalDumper.dumpGlobals();
// globalDumper.saveGlobalManifest();
// }
// Dump types
TypeDumper dumper = new TypeDumper(this);
dumper.run();
// // Dump types
// TypeDumper dumper = new TypeDumper(this);
// dumper.run();
}
}

View File

@ -3,14 +3,21 @@
import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.StandAloneDataTypeManager;
import re3lib.FunctionDatabase;
import re3lib.RecompileConfig;
import re3lib.CParser;
import re3lib.CTokenizer;
import java.io.File;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -25,9 +32,10 @@ public class RebuildFunctionDatabase extends GhidraScript {
functionDB = new FunctionDatabase(this);
scanDirectory(RecompileConfig.INSTANCE.dirDecompAuto, FunctionDatabase.Type.Auto);
scanDirectory(RecompileConfig.INSTANCE.dirDecompFix, FunctionDatabase.Type.Fix);
scanDirectory(RecompileConfig.INSTANCE.dirDecompStub, FunctionDatabase.Type.Stub);
scanFile(new File(RecompileConfig.INSTANCE.outputDir, "gh_auto/r3_engineLoop.cxx"), FunctionDatabase.Type.Auto);
// scanDirectory(RecompileConfig.INSTANCE.dirDecompAuto, FunctionDatabase.Type.Auto);
// scanDirectory(RecompileConfig.INSTANCE.dirDecompFix, FunctionDatabase.Type.Fix);
// scanDirectory(RecompileConfig.INSTANCE.dirDecompStub, FunctionDatabase.Type.Stub);
println("Applying default filters...");
functionDB.applyDefaultFilters(rebuildAllGlobals);
@ -38,10 +46,10 @@ public class RebuildFunctionDatabase extends GhidraScript {
println("Function database rebuilt successfully.");
// for (FunctionDatabase.Entry entry : functionDB.entries) {
// println(entry.address + " " + entry.name + " " + entry.file.getName());
// for (FunctionDatabase.Dependency dependency : entry.dependencies) {
// println(" " + dependency.address + " " + dependency.name);
// }
// println(entry.address + " " + entry.name + " " + entry.file.getName());
// for (FunctionDatabase.Dependency dependency : entry.dependencies) {
// println(" " + dependency.address + " " + dependency.name);
// }
// }
}
@ -55,64 +63,85 @@ public class RebuildFunctionDatabase extends GhidraScript {
}
}
private void scanFile(File file, FunctionDatabase.Type type) throws Exception {
println("Scanning " + file);
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line;
Pattern dependencyPattern = Pattern.compile("(\\w+)\\s+(\\w+)\\(.*\\);\\s*//\\s*([0-9A-Fa-f]{8})\\s*//\\s*(.*)");
Pattern addressPattern = Pattern.compile("//\\s*([0-9A-Fa-f]{8})");
Pattern functionNamePattern = Pattern.compile("(\\S+)\\s+(\\S+)\\s*\\(");
private void parseOld(BufferedReader reader, File file, FunctionDatabase.Type type) throws Exception {
String line;
Pattern dependencyPattern = Pattern.compile("(\\w+)\\s+(\\w+)\\(.*\\);\\s*//\\s*([0-9A-Fa-f]{8})\\s*//\\s*(.*)");
Pattern addressPattern = Pattern.compile("//\\s*([0-9A-Fa-f]{8})");
Pattern functionNamePattern = Pattern.compile("(\\S+)\\s+(\\S+)\\s*\\(");
List<FunctionDatabase.Dependency> dependencies = new ArrayList<>();
String address = null;
String functionName = null;
List<FunctionDatabase.Dependency> dependencies = new ArrayList<>();
String address = null;
String functionName = null;
while ((line = reader.readLine()) != null) {
Matcher dependencyMatcher = dependencyPattern.matcher(line);
if (dependencyMatcher.find()) {
// println("Found dependency: " + dependencyMatcher.group(3));
Address depAddress = currentProgram.getAddressFactory().getAddress(dependencyMatcher.group(3));
String name = dependencyMatcher.group(2);
FunctionDatabase.Dependency dependency = functionDB.new Dependency(depAddress, name);
dependencies.add(dependency);
continue;
}
while ((line = reader.readLine()) != null) {
Matcher dependencyMatcher = dependencyPattern.matcher(line);
if (dependencyMatcher.find()) {
// println("Found dependency: " + dependencyMatcher.group(3));
Address depAddress = currentProgram.getAddressFactory().getAddress(dependencyMatcher.group(3));
String name = dependencyMatcher.group(2);
FunctionDatabase.Dependency dependency = functionDB.new Dependency(depAddress, name);
dependencies.add(dependency);
continue;
}
Matcher addressMatcher = addressPattern.matcher(line);
if (addressMatcher.find()) {
// println("Found address: " + addressMatcher.group(1));
address = addressMatcher.group(1);
// Skip any comments or newlines between address and function definition
while ((line = reader.readLine()) != null) {
line = line.trim();
// println("Line: " + line);
if (!line.isEmpty()) {
Matcher functionNameMatcher = functionNamePattern.matcher(line);
if (functionNameMatcher.find()) {
functionName = functionNameMatcher.group(2).trim();
break;
}
Matcher addressMatcher = addressPattern.matcher(line);
if (addressMatcher.find()) {
// println("Found address: " + addressMatcher.group(1));
address = addressMatcher.group(1);
// Skip any comments or newlines between address and function definition
while ((line = reader.readLine()) != null) {
line = line.trim();
// println("Line: " + line);
if (!line.isEmpty()) {
Matcher functionNameMatcher = functionNamePattern.matcher(line);
if (functionNameMatcher.find()) {
functionName = functionNameMatcher.group(2).trim();
break;
}
}
if (functionName != null) {
break;
}
}
if (functionName != null) {
break;
}
}
}
if (address != null && functionName != null) {
Address functionAddress = currentProgram.getAddressFactory().getAddress(address);
FunctionDatabase.Entry entry = functionDB.new Entry();
entry.address = functionAddress;
entry.name = functionName;
entry.file = file;
entry.type = type;
entry.dependencies = dependencies;
functionDB.entries.add(entry);
} else {
// throw new Exception("Failed to parse function at " + file.getName());
println("Failed to parse function at " + file.getName());
}
if (address != null && functionName != null) {
Address functionAddress = currentProgram.getAddressFactory().getAddress(address);
FunctionDatabase.Entry entry = functionDB.new Entry();
entry.address = functionAddress;
entry.name = functionName;
entry.file = file;
entry.type = type;
entry.dependencies = dependencies;
functionDB.entries.add(entry);
} else {
// throw new Exception("Failed to parse function at " + file.getName());
println("Failed to parse function at " + file.getName());
}
}
private void scanFile(File file, FunctionDatabase.Type type) throws Exception {
println("Scanning " + file);
String text = new String(Files.readAllBytes(file.toPath()));
CTokenizer.TokenSet tokens = new CTokenizer(text).parse();
CParser parser = new CParser(tokens);
parser.parse();
// for (CTokenizer.Token token : tokens.getTokens()) {
// int line = tokens.getLine(token.ofs);
// println("Line " + line + ": " + token.ofs + " " + token.len + " " + token.type + " - "
// + tokens.getTextNoNewlines(token));
// }
for (CParser.Function function : parser.getFunctions()) {
println("Function: " + function.name + " " + function.startOffset + " " + function.endOffset);
}
for (CParser.FunctionCall functionCall : parser.getFunctionCalls()) {
println("FunctionCall: " + functionCall.name + " " + functionCall.startOffset + " " + functionCall.endOffset);
}
for (CParser.Variable variable : parser.getVariables()) {
println("Variable: " + variable.name + " " + variable.startOffset + " " + variable.endOffset);
}
}
}

194
scripts/re3lib/CParser.java Normal file
View File

@ -0,0 +1,194 @@
package re3lib;
import java.util.*;
import re3lib.CTokenizer.Token;
public class CParser {
private CTokenizer.TokenSet tokenSet;
private List<Variable> variables;
private List<Function> functions;
private List<FunctionCall> functionCalls;
public CParser(CTokenizer.TokenSet tokenSet) {
this.tokenSet = tokenSet;
this.variables = new ArrayList<>();
this.functions = new ArrayList<>();
this.functionCalls = new ArrayList<>();
}
int index = 0;
public void parse() {
CTokenizer.Token[] tokens = tokenSet.getTokens();
for (index = 0; index < tokens.length; index++) {
CTokenizer.Token token = tokens[index];
if (token.type == CTokenizer.TokenType.BLOCK_COMMENT || token.type == CTokenizer.TokenType.COMMENT) {
continue;
} else if (token.type == CTokenizer.TokenType.HASH) {
index = parsePreprocessorExpression();
} else if (tokens[index].type == CTokenizer.TokenType.IDENTIFIER) {
if (index + 1 < tokens.length && tokens[index + 1].type == CTokenizer.TokenType.L_PAREN) {
// Function call or declaration/definition
if (index > 0 && (tokens[index - 1].type == CTokenizer.TokenType.IDENTIFIER ||
tokens[index - 1].type == CTokenizer.TokenType.OTHER)) {
// Function declaration or definition
index = parseFunctionDeclaration();
} else {
// Function call
index = parseFunctionCall();
}
} else {
// Variable reference
index = parseVariableReference();
}
}
}
}
// Try to parse prep expression
private int parsePreprocessorExpression() {
int index = this.index;
if (tokenSet.tokens[index].type == CTokenizer.TokenType.HASH) {
int startLine = tokenSet.getLine(index);
while (index < tokenSet.tokens.length) {
if (tokenSet.getLine(index) > startLine) {
break;
}
index++;
}
// Find first next line token
index--;
}
return index;
}
// Try to parse function declaration and return the ending token index
private int parseFunctionDeclaration() {
CTokenizer.Token[] tokens = tokenSet.getTokens();
String name = tokenSet.getTextNoNewlines(tokens[index]);
int endIndex = findClosingParenthesis(index + 1);
if (endIndex == -1)
return index;
boolean isDefinition = false;
if (endIndex + 1 < tokens.length && tokens[endIndex + 1].type == CTokenizer.TokenType.L_BRACE) {
isDefinition = true;
endIndex = findClosingBrace(endIndex + 1);
}
if (endIndex == -1)
return index;
Function function = new Function(name, tokens[index].ofs, tokens[endIndex].ofs + tokens[endIndex].len,
isDefinition);
functions.add(function);
return endIndex - 1;
}
// Try to parse function call and return the ending token index
private int parseFunctionCall() {
CTokenizer.Token[] tokens = tokenSet.getTokens();
String name = tokenSet.getTextNoNewlines(tokens[index]);
int endIndex = findClosingParenthesis(index + 1);
if (endIndex == -1)
return index;
FunctionCall functionCall = new FunctionCall(name, tokens[index].ofs,
tokens[endIndex].ofs + tokens[endIndex].len);
functionCalls.add(functionCall);
return endIndex - 1;
}
// Try to parse variable reference and add it to the list
private int parseVariableReference() {
CTokenizer.Token token = tokenSet.getTokens()[index];
String name = tokenSet.getTextNoNewlines(token);
Variable variable = new Variable(name, token.ofs, token.ofs + token.len);
variables.add(variable);
return index + 1;
}
private int findClosingParenthesis(int startIndex) {
CTokenizer.Token[] tokens = tokenSet.getTokens();
int parenCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == CTokenizer.TokenType.L_PAREN) {
parenCount++;
} else if (tokens[i].type == CTokenizer.TokenType.R_PAREN) {
parenCount--;
if (parenCount == 0) {
return i;
}
}
}
return -1;
}
private int findClosingBrace(int startIndex) {
CTokenizer.Token[] tokens = tokenSet.getTokens();
int braceCount = 1;
for (int i = startIndex + 1; i < tokens.length; i++) {
if (tokens[i].type == CTokenizer.TokenType.L_BRACE) {
braceCount++;
} else if (tokens[i].type == CTokenizer.TokenType.R_BRACE) {
braceCount--;
if (braceCount == 0) {
return i;
}
}
}
return -1;
}
public List<Variable> getVariables() {
return variables;
}
public List<Function> getFunctions() {
return functions;
}
public List<FunctionCall> getFunctionCalls() {
return functionCalls;
}
public static class Variable {
public final String name;
public final int startOffset;
public final int endOffset;
public Variable(String name, int startOffset, int endOffset) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
public static class Function {
public final String name;
public final int startOffset;
public final int endOffset;
public final boolean isDefinition;
public Function(String name, int startOffset, int endOffset, boolean isDefinition) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.isDefinition = isDefinition;
}
}
public static class FunctionCall {
public final String name;
public final int startOffset;
public final int endOffset;
public FunctionCall(String name, int startOffset, int endOffset) {
this.name = name;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
}

View File

@ -0,0 +1,346 @@
package re3lib;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import ghidra.app.script.GhidraScript;
public class CTokenizer {
public enum TokenType {
UNDEFINED,
HASH,
L_PAREN,
R_PAREN,
L_BRACE,
R_BRACE,
SEMICOLON,
COMMA,
COMMENT,
BLOCK_COMMENT,
IDENTIFIER,
STRING_LITERAL,
NUMERIC_LITERAL,
NUMERIC_LITERAL_HEX,
OTHER,
KEYWORD,
}
public class Token {
public int ofs;
public int len;
public TokenType type;
}
public class TokenSet {
public final Token[] tokens;
public final String text;
private final TreeMap<Integer, Integer> lineNumberTable;
TokenSet(Token[] tokens, String text, TreeMap<Integer, Integer> lineNumberTable) {
this.tokens = tokens;
this.text = text;
this.lineNumberTable = lineNumberTable;
}
public Token[] getTokens() {
return this.tokens;
}
public int getLine(int offset) {
Map.Entry<Integer, Integer> entry = lineNumberTable.floorEntry(offset);
return entry != null ? entry.getValue() : -1;
}
public String getTextNoNewlines(Token token) {
String text = getText(token);
return text.replace("\n", "");
}
};
private final String text;
private TreeMap<Integer, Integer> lineNumberTable;
public GhidraScript log;
public CTokenizer(String text) {
this.text = text;
}
public CTokenizer(String text, GhidraScript script) {
this.text = text;
this.log = log;
}
String getText(Token token) {
return getText(token.ofs, token.len);
}
String getText(int ofs, int len) {
return text.substring(ofs, ofs + len); // Fixed recursion issue
}
TokenType lastTokenType = TokenType.UNDEFINED;
/**
* Inserts a new token into the tokens list.
*
* @param tokens The list of tokens.
* @param tokenStart The starting index of the token.
* @param tokenEnd The current index in the text.
* @param currentType The type of the current token.
*/
private void insertToken(List<Token> tokens, int tokenStart, int tokenEnd, TokenType currentType) {
if (currentType != TokenType.UNDEFINED && tokenStart < tokenEnd) {
// Strip whitespace
for (int i = tokenStart; i < tokenEnd; i++) {
if (Character.isWhitespace(text.charAt(i))) {
tokenStart = i + 1;
} else {
break;
}
}
// Strip whitespace from end
for (int i = tokenEnd - 1; i >= tokenStart; i--) {
if (Character.isWhitespace(text.charAt(i))) {
tokenEnd = i;
} else {
break;
}
}
if (tokenEnd - tokenStart > 0) {
Token token = new Token();
token.ofs = tokenStart;
token.len = tokenEnd - tokenStart;
token.type = currentType;
if (currentType == TokenType.IDENTIFIER && isKeyword(getText(token))) {
token.type = TokenType.KEYWORD;
}
tokens.add(token);
}
// Consume the token
currentType = TokenType.UNDEFINED;
}
}
/**
* Handles the insertion of the last token after parsing is complete.
*
* @param tokens The list of tokens.
* @param tokenStart The starting index of the last token.
* @param currentType The type of the last token.
*/
private void handleLastToken(List<Token> tokens, int tokenStart, TokenType currentType) {
insertToken(tokens, tokenStart, text.length(), currentType);
}
void buildLineNumberTable() {
this.lineNumberTable = new TreeMap<>();
int lineNumber = 1;
lineNumberTable.put(0, 1);
for (int i = 0; i < text.length(); i++) {
if (text.charAt(i) == '\n') {
lineNumber++;
lineNumberTable.put(i + 1, lineNumber);
}
}
}
List<Token> tokens = new ArrayList<>();
// Initialize tokenization state
int tokenStart = 0;
TokenType currentType = TokenType.UNDEFINED;
boolean inComment = false;
boolean inBlockComment = false;
boolean inString = false;
class ScanRange {
int start;
int end;
TokenType type;
ScanRange(int start, int end, TokenType type) {
this.start = start;
this.end = end;
this.type = type;
}
// Invalid constructor
ScanRange() {
this.type = TokenType.UNDEFINED;
}
boolean isValid() {
return this.type != TokenType.UNDEFINED;
}
};
// Add the following method to handle hexadecimal literals
private ScanRange tryParseHexadecimal(int currentIndex) {
if (text.charAt(currentIndex) == '0' && currentIndex + 1 < text.length()) {
char nextChar = text.charAt(currentIndex + 1);
if (nextChar == 'x' || nextChar == 'X') {
int tempIndex = currentIndex + 2;
while (tempIndex < text.length()) {
char c = text.charAt(tempIndex);
if (Character.digit(c, 16) == -1) {
break;
}
tempIndex++;
}
if (tempIndex > currentIndex + 2) {
return new ScanRange(currentIndex, tempIndex, TokenType.NUMERIC_LITERAL_HEX);
}
}
}
return new ScanRange();
}
// Identifier that starts with a letter or underscore, and can contain letters,
// digits, and underscores
private ScanRange tryParseIdentifier(int currentIndex) {
if (Character.isLetter(text.charAt(currentIndex)) || text.charAt(currentIndex) == '_') {
int tempIndex = currentIndex + 1;
while (tempIndex < text.length()) {
char c = text.charAt(tempIndex);
if (!(Character.isLetter(c) || Character.isDigit(c) || c == '_')) {
break;
}
tempIndex++;
}
return new ScanRange(currentIndex, tempIndex, TokenType.IDENTIFIER);
}
return new ScanRange();
}
private ScanRange tryParseWithLookahead(int currentIndex) {
ScanRange sr = tryParseHexadecimal(currentIndex);
if (!sr.isValid()) {
sr = tryParseIdentifier(currentIndex);
}
return sr;
}
public boolean isKeyword(String text) {
return text.equals("while") || text.equals("for") || text.equals("if") || text.equals("else") ||
text.equals("return") || text.equals("struct") || text.equals("typedef") ||
text.equals("enum") || text.equals("union") || text.equals("const") || text.equals("static");
}
public TokenSet parse() {
this.buildLineNumberTable();
int index = 0;
while (index < text.length()) {
char currentChar = text.charAt(index);
TokenType newType = TokenType.OTHER;
// Handle comments
if (inBlockComment) {
newType = TokenType.BLOCK_COMMENT;
if (currentChar == '*') {
if (index + 1 < text.length() && text.charAt(index + 1) == '/') {
inBlockComment = false;
index++;
}
}
} else if (inComment) {
newType = TokenType.COMMENT;
if (currentChar == '\n') {
inComment = false;
}
}
// Handle string literals
else if (inString) {
if (currentChar == '"') {
inString = false;
newType = TokenType.STRING_LITERAL;
} else {
newType = TokenType.STRING_LITERAL;
}
}
// Detect start of comments
else if (currentChar == '/' && index + 1 < text.length() && text.charAt(index + 1) == '*') {
inBlockComment = true;
newType = TokenType.BLOCK_COMMENT;
} else if (currentChar == '/' && index + 1 < text.length() && text.charAt(index + 1) == '/') {
inComment = true;
newType = TokenType.COMMENT;
}
// Detect start of string literals
else if (currentChar == '"') {
inString = true;
newType = TokenType.STRING_LITERAL;
} else {
ScanRange range = tryParseWithLookahead(index);
if (range.isValid()) {
// Insert the current token first
// script.println("Inserting current token: " + currentType + ", start: " +
// tokenStart + ", end: " + range.start);
insertToken(tokens, tokenStart, range.start, currentType);
// Insert a ranged token
// script.println("Inserting ranged token: " + range.type + " start: " +
// range.start + ", end: " + range.end);
insertToken(tokens, range.start, range.end, range.type);
// New start
currentType = TokenType.UNDEFINED;
tokenStart = range.end;
index = range.end;
}
// Detect numeric literals
else if (Character.isDigit(currentChar)) {
newType = TokenType.NUMERIC_LITERAL;
}
// Detect identifiers
else if (Character.isLetter(currentChar) || currentChar == '_') {
newType = TokenType.IDENTIFIER;
}
// Detect parentheses
else if (currentChar == '(') {
newType = TokenType.L_PAREN;
} else if (currentChar == ')') {
newType = TokenType.R_PAREN;
}
// Detect braces
else if (currentChar == '{') {
newType = TokenType.L_BRACE;
} else if (currentChar == '}') {
newType = TokenType.R_BRACE;
}
// Detect semicolon
else if (currentChar == ';') {
newType = TokenType.SEMICOLON;
}
// Detect comma
else if (currentChar == ',') {
newType = TokenType.COMMA;
} else if (currentChar == '#') {
newType = TokenType.HASH;
}
// Handle other characters
else {
newType = TokenType.OTHER;
}
}
// Insert a new token if the type changes
if (newType != currentType) {
insertToken(tokens, tokenStart, index, currentType);
tokenStart = index;
currentType = newType;
}
index++;
}
// Handle the last token
handleLastToken(tokens, tokenStart, currentType);
return new TokenSet(tokens.toArray(new Token[0]), text, lineNumberTable);
}
}