From a4b12ba2f46ab5a844ea30922606ac00435cdd54 Mon Sep 17 00:00:00 2001 From: Guus Waals <_@guusw.nl> Date: Fri, 20 Sep 2024 18:48:03 +0800 Subject: [PATCH] Fixup _0_0_ like access --- game_re/binders/gh_auto_binder.h | 26 + game_re/binders/gh_base.h | 12 + scripts/Decompile.java | 1011 ++++++++++++++++-------------- 3 files changed, 564 insertions(+), 485 deletions(-) diff --git a/game_re/binders/gh_auto_binder.h b/game_re/binders/gh_auto_binder.h index 2c46e18e..4ab1983b 100644 --- a/game_re/binders/gh_auto_binder.h +++ b/game_re/binders/gh_auto_binder.h @@ -6,4 +6,30 @@ #include "gh_structs.h" #include "gh_global.h" +template +struct Field { +}; + +template +struct _FieldBinderSizeTraits { + using type = undefined; +}; +template<> +struct _FieldBinderSizeTraits<4> { + using type = undefined4; +}; +template<> +struct _FieldBinderSizeTraits<2> { + using type = undefined2; +}; +template<> +struct _FieldBinderSizeTraits<3> { + using type = undefined3; +}; + +template +inline uint32_t& operator+(const char* str, Field offset) { + return *(_FieldBinderSizeTraits*)(str + offset); +} + #endif /* B8D59B54_1674_4C0F_AA2C_611385FF5D03 */ diff --git a/game_re/binders/gh_base.h b/game_re/binders/gh_base.h index b059fc66..9fe1225b 100644 --- a/game_re/binders/gh_base.h +++ b/game_re/binders/gh_base.h @@ -1,14 +1,26 @@ #ifndef A523F6DB_0645_4DEB_8AEB_3792CB732B49 #define A523F6DB_0645_4DEB_8AEB_3792CB732B49 +#if _WIN32 #include +#else +#include "../win32_shim.h" +#include +#include +#endif #include #include +#include // Fallback to int if type is not annotated // typedef int64_t undefined; typedef uint32_t undefined4; typedef uint16_t undefined2; +struct undefined3 { + uint8_t _0; + uint8_t _1; + uint8_t _2; +}; typedef unsigned char byte; typedef byte undefined1; // typedef uint64_t uint; diff --git a/scripts/Decompile.java b/scripts/Decompile.java index 9664bafc..2ecd102e 100644 --- a/scripts/Decompile.java +++ b/scripts/Decompile.java @@ -10,10 +10,14 @@ import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.Arrays; import java.util.Dictionary; +import ghidra.app.decompiler.ClangFieldToken; import ghidra.app.decompiler.ClangLine; +import ghidra.app.decompiler.ClangSyntaxToken; import ghidra.app.decompiler.ClangToken; import ghidra.app.decompiler.DecompileResults; import ghidra.app.decompiler.PrettyPrinter; @@ -24,7 +28,6 @@ import ghidra.program.model.data.BitFieldDataType; import ghidra.program.model.data.DataType; import ghidra.program.model.data.DataTypeComponent; import ghidra.program.model.data.DataTypeManager; -import re3lib.DataTypeWriter; import ghidra.program.model.data.PointerDataType; import ghidra.program.model.data.ProgramBasedDataTypeManager; import ghidra.program.model.data.Structure; @@ -40,488 +43,526 @@ import ghidra.util.task.TaskMonitor; import re3lib.*; public class Decompile extends GhidraScript { - // Auto rename invalid symbols - private static final boolean AUTO_RENAME_SYMBOLS = true; - - private static final HashSet ONLY_SYMBOLS = new HashSet<>(Arrays.asList( - "r3_main", // - "_strrchr")); - - static final boolean BUILD_BLACKLIST = true; - - HashSet
functionAddrBlackList = new HashSet<>(); - - boolean shouldDecompileFunction(Function function) { - if (ONLY_SYMBOLS != null && !ONLY_SYMBOLS.contains(function.getName())) { - return false; - } - return !functionAddrBlackList.contains(function.getEntryPoint()); - } - - void buildFunctionBlacklist() { - functionAddrBlackList = Utils.loadFunctionBlacklist(RecompileConfig.INSTANCE.functionBlacklistPath); - - if (BUILD_BLACKLIST) { - boolean modified = false; - - Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); - while (functionsIt.hasNext()) { - Function function = functionsIt.next(); - if (functionAddrBlackList.contains(function.getEntryPoint())) { - continue; - } - - String comment = function.getComment(); - boolean isIgnoredFunction = false; - if (comment != null && comment.contains("Library Function")) { - println("Adding library function " + function.getName() + " to blacklist"); - println("ac:" + functionAddrBlackList.size() + " jj:" - + functionAddrBlackList.contains(function.getEntryPoint()) + " " + function.getEntryPoint()); - isIgnoredFunction = true; - } - - if (function.getName().startsWith("crt_")) { - println("Adding crt function " + function.getName() + " to blacklist"); - isIgnoredFunction = true; - } - - if (isIgnoredFunction) { - // Decompile and trace - PCallTracer tracer = new PCallTracer(); - tracer.setBlacklist(functionAddrBlackList); - tracer.traceCalls(function); - for (Function f : tracer.out) { - println(" Adding " + f.getName() + " to blacklist"); - functionAddrBlackList.add(f.getEntryPoint()); - modified = true; - } - } - } - - if (modified) { - Utils.saveFunctionBlacklist(functionAddrBlackList, RecompileConfig.INSTANCE.functionBlacklistPath); - } - } - } - - String escapeCString(String str) { - str = str.replace("\\", "\\\\"); - str = str.replace("\"", "\\\""); - return str; - } - - String readCString(Address addr, int maxLen) throws Exception { - StringBuilder sb = new StringBuilder(); - int ofs = 0; - while (true) { - Address read = addr.add(ofs++); - // println("Reading: " + read); - byte b = currentProgram.getMemory().getByte(read); - // println("Read: " + b); - if (b == 0 || ofs >= maxLen) { - break; - } - sb.append((char) b); - } - if (sb.length() > 0) { - // println("STR \"" + sb.toString() + "\""); - } - return sb.toString(); - } - - public class GlobalRec { - public HighSymbol highSymbol; - public Address address; - // public boolean isFullyDefined; - - public GlobalRec(HighSymbol highSymbol, Address address) { - this.highSymbol = highSymbol; - this.address = address; - // this.isFullyDefined = isFullyDefined; - } - }; - - void decompileFunction(Hashtable outGlobalSymbols, Function function) - throws Exception { - String fileName = sanitizeFunctionName(function.getName()) + ".cxx"; - - File f1 = new File(RecompileConfig.INSTANCE.dirDecompFix, fileName); - if (f1.exists()) { - println("Func " + function.getName() + " skipped (gh_fix)"); - return; - } - - File f0 = new File(RecompileConfig.INSTANCE.dirDecompAuto, fileName); - if (f0.exists()) { - f0.delete(); - } - - println("Processing " + function.getName() + " => " + f0.toString()); - - DecompileResults decompRes = RecompileConfig.INSTANCE.decompCache.getOrInsert(function); - try (PrintWriter writer2 = new PrintWriter(f0, "UTF-8")) { - writer2.println("// AUTO-GENERATED FILE, MOVE TO 'gh_fix' FOLDER PREVENT OVERWRITING!!!!! "); - writer2.println(); - writer2.println("#include "); - writer2.println("#include \"../gh_global.h\""); - writer2.println(); - - // decompRes.get - HighFunction highFunction = decompRes.getHighFunction(); - - // Remap for dynamic symbols - // Dictionary symbolRemap = new Hashtable<>(); - - HashSet headers = new HashSet<>(); - StringWriter codeWriter = new StringWriter(); - - PrettyPrinter pp = new PrettyPrinter(decompRes.getFunction(), decompRes.getCCodeMarkup(), null); - Iterator lines = pp.getLines().iterator(); - while (lines.hasNext()) { - ClangLine line = lines.next(); - for (int i = 0; i < line.getIndent(); i++) { - codeWriter.write(' '); - } - for (int t = 0; t < line.getNumTokens(); t++) { - ClangToken token = line.getToken(t); - HighSymbol gsym = token.getHighSymbol(highFunction); - if (gsym != null) { - var symStorage = gsym.getStorage(); - var sym = gsym.getSymbol(); - - Address address; - if (symStorage.isUnassignedStorage()) { - address = sym.getAddress(); - } else { - address = gsym.getStorage().getMinAddress(); - } - - if (address.isMemoryAddress()) { - // println("Memory: " + token.toString() + " - " + gsym.getName() + " - " + - // address); - // boolean defined = address.isConstantAddress(); - // Symbol symAtAddr = getSymbolAt(address); - // if (defined && symAtAddr != null) { - // } else { - // // println("NOT FULLY DEFINED: " + address + " - " + symAtAddr); - // // defined = false; - // } - - outGlobalSymbols.put(gsym.getName(), new GlobalRec(gsym, address)); - } else { - // println("Unknown: " + token.toString() + " - " + gsym.getName() + " - " + - // address); - } - - // println("CONST: " + token.toString() + " - " + gsym.getName()); - // println( - // "Token: " + token.toString() + " - " + gsym.getName() + " - " + - // symStorage.getSerializationString()); - } - PcodeOp op = token.getPcodeOp(); - if (op != null && op.getOpcode() == PcodeOp.CALL) { - // println("PcodeOp: " + op.toString() + " - " + op.getInput(0).toString()); - Varnode target = op.getInput(0); - if (target.isAddress()) { - Address callAddr = target.getAddress(); - Function calledFunction = getFunctionAt(callAddr); - if (calledFunction != null) { - if (!functionAddrBlackList.contains(calledFunction.getEntryPoint())) { - // println("Adding header: " + calledFunction + " / " + - // calledFunction.getSignature().getPrototypeString(true)); - headers.add("extern " + calledFunction.getSignature().getPrototypeString(true) - + "; // " + calledFunction.getEntryPoint() + " // " + calledFunction.getName()); - } - } - } - } - codeWriter.write(token.toString()); - } - codeWriter.write('\n'); - } - - for (String header : headers) { - writer2.println(header); - } - writer2.println(); - writer2.println("// " + function.getEntryPoint()); - writer2.print(codeWriter.toString()); - writer2.println(); - - // Iterator it = decompRes.getCCodeMarkup().tokenIterator(true); - // int ln = 0; - // while(it.hasNext()) { - // ClangToken token = it.next(); - // ClangLine line = token.getLineParent(); - // while (line != null && ln < line.getLineNumber()) { - // writer2.println(); - // ln++; - // } - // writer2.print(token.toString()); - // } - - // Collect referenced global symbols - // Iterator smyIt = highFunction.getGlobalSymbolMap().getSymbols(); - // while (smyIt.hasNext()) { - // HighSymbol gsym = smyIt.next(); - - // Address addr = gsym.getSymbol().getAddress(); - // println("FunctionSym " + addr + " " + gsym.getName() + " " + - // gsym.getStorage().getMinAddress()); - // println(" IsMem: " + gsym.getStorage().isMemoryStorage() + " " + - // gsym.getStorage().getSerializationString()); - - // if (outGlobalSymbols.containsKey(gsym.getName())) - // continue; - // outGlobalSymbols.put(gsym.getName(), gsym); - // } - } - } - - void headerGuardPre(PrintWriter writer, String tag) { - writer.println("#ifndef GH_GENERATED_" + tag + "_H"); - writer.println("#define GH_GENERATED_" + tag + "_H"); - writer.println(); - } - - void headerGuardPost(PrintWriter writer, String tag) { - writer.println("#endif // GH_GENERATED_" + tag + "_H"); - } - - class DependencyTypeDumper { - HashSet types = new HashSet<>(); - DataTypeManager dtm; - TaskMonitor taskMonitor; - - public DependencyTypeDumper(DataTypeManager dtm) { - this.dtm = dtm; - this.taskMonitor = monitor; - } - - void visit(DataType dataType, PrintWriter writer) throws Exception { - // If not already written - if (types.contains(dataType)) { - println("Visiting: " + dataType.getName()); - types.remove(dataType); - - // Write dependencies, and then write self - if (dataType instanceof Structure) { - Structure struct = (Structure) dataType; - for (DataTypeComponent component : struct.getComponents()) { - DataType dt = component.getDataType(); - if (dt instanceof Structure) { - println("Dependency: " + dt.getName()); - visit((Structure) dt, writer); - } - } - } - writeNoDeps(dataType, writer); - } - } - - void writeNoDeps(DataType dt, PrintWriter writer) throws Exception { - // Check - // https://github.com/NationalSecurityAgency/ghidra/blob/17c93909bbf99f7f98dbf5737b38d8dd2c01bef0/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/exporter/CppExporter.java#L401 - - // DataTypeWriter dtw = new DataTypeWriter(dtm, writer); - // dtw.blacklistedTypes = - // dtw.write(new DataType[] { dt }, taskMonitor); - - // if (dt instanceof Structure) { - // Structure struct = (Structure) dt; - // writer.append("typedef struct " + struct.getName() + " {\n"); - // for (DataTypeComponent component : struct.getComponents()) { - // if (component.isBitFieldComponent()) { - // BitFieldDataType bfdt = (BitFieldDataType) component.getDataType(); - // writer.append( - // " " + bfdt.getDisplayName() + " " + component.getDefaultFieldName() + " : " + - // bfdt.getBitSize() + ";\n"); - // } else { - // writer.append( - // " " + component.getDataType().getDisplayName() + " " + - // component.getDefaultFieldName() + ";\n"); - // } - // writer.append("} " + struct.getDisplayName() + " ;\n"); - // writer.append("\n"); - // } else if (dt instanceof TypedefDataType) { - // TypedefDataType typedef = (TypedefDataType) dt; - // writer.append("typedef " + typedef.getDataType().getDisplayName() + " " + - // typedef.getName() + ";\n"); - // writer.append("\n"); - // } else { - // throw new Exception("Unsupported type: " + dt.getDisplayName()); - // } - } - - void writeAll(PrintWriter writer) throws Exception { - while (types.size() > 0) { - DataType first = types.iterator().next(); - visit(first, writer); - } - } - }; - - void dumpStructureTypes() throws Exception { - ProgramBasedDataTypeManager dtm = currentProgram.getDataTypeManager(); - - HashSet typeBlacklist = Utils.loadStructBlacklist(RecompileConfig.INSTANCE.typeBlacklistPath); - - if (typeBlacklist == null) { - println("Building struct blacklist from existing data types"); - typeBlacklist = new HashSet<>(); - Iterator it = dtm.getAllDataTypes(); - while (it.hasNext()) { - DataType dt = it.next(); - if (dt instanceof Structure || dt instanceof TypedefDataType) { - typeBlacklist.add(dt.getDisplayName()); - } - } - Utils.saveStructBlacklist(typeBlacklist, RecompileConfig.INSTANCE.typeBlacklistPath); - } - - List filteredTypes = new ArrayList<>(); - Iterator it = dtm.getAllDataTypes(); - // DependencyTypeDumper dumper = new DependencyTypeDumper(dtm); - while (it.hasNext()) { - DataType dt = it.next(); - if (dt instanceof Structure || dt instanceof TypedefDataType) { - if (typeBlacklist.contains(dt.getDisplayName())) - continue; - println("Adding: " + dt.getDisplayName() + " - " + dt.getClass().getSimpleName()); - filteredTypes.add(dt); - } - - // Structure struct = (Structure) dt; - // dumper.types.add(struct); - // } else if (dt instanceof TypedefDataType) { - // TypedefDataType typedef = (TypedefDataType) dt; - // dumper.types.add(typedef); - // } - } - - try (PrintWriter writer = new PrintWriter(new File(RecompileConfig.INSTANCE.outputDir, "gh_structs.h"), "UTF-8")) { - headerGuardPre(writer, "STRUCTS"); - writer.println("// AUTO-GENERATED FILE "); - writer.println("#include "); - // dumper.writeAll(writer); - - DataTypeWriter dtw = new DataTypeWriter(dtm, writer); - dtw.blacklistedTypes = typeBlacklist; - dtw.write(filteredTypes, monitor); - - headerGuardPost(writer, "STRUCTS"); - } - } - - void dumpGlobals(Hashtable globalSymbols) throws Exception { - File globalSymbolsListH = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.h"); - PrintWriter hwriter = new PrintWriter(globalSymbolsListH, "UTF-8"); - hwriter.println("// AUTO-GENERATED FILE "); - headerGuardPre(hwriter, "GLOBALS"); - hwriter.println("#include "); - hwriter.println(); - - File globalSymbolsListC = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.cxx"); - PrintWriter cwriter = new PrintWriter(globalSymbolsListC, "UTF-8"); - cwriter.println("// AUTO-GENERATED FILE "); - cwriter.println("#include "); - hwriter.println(); - - for (GlobalRec sym : globalSymbols.values()) { - HighSymbol highSym = sym.highSymbol; - DataType dt = highSym.getDataType(); - String dataType = dt.getDisplayName(); - String name = highSym.getName(); - String sanitizedName = sanitizeFunctionName(highSym.getName()); - if (!sanitizedName.equals(highSym.getName())) { - println("Invalid global symbol name: " + highSym.getName() + " - " - + highSym.getHighFunction().getFunction().getName()); - } else { - Address addr = sym.address; - // println("Symbol: " + symbol + " Addr: " + addr + " Size:" + symSize + " " + - // storage.getSerializationString()); - try { - String initBlk = " = "; - boolean fullyDefinedType = false; - if (dt instanceof AbstractStringDataType) { - AbstractStringDataType sdt = (AbstractStringDataType) dt; - dataType = "const char*"; - // String type - initBlk += "\"" + escapeCString(readCString(addr, 2048)) + "\""; - fullyDefinedType = true; - } else if (dt instanceof PointerDataType) { - PointerDataType pdt = (PointerDataType) dt; - DataType baseType = pdt.getDataType(); - dataType = baseType.getDisplayName() + "*"; - initBlk += "(" + dataType + ")&GH_MEM(0x" + addr + ")"; - fullyDefinedType = true; - } - if (fullyDefinedType) { - hwriter.println("extern " + dataType + " " + name + "; // " + addr); - cwriter.println(dataType + " " + name + initBlk + "; // " + addr); - } else { - String refTypeStr = dt.getDisplayName() + "&"; - hwriter.println("extern " + refTypeStr + " " + name + "; // " + addr); - cwriter.println(dataType + " " + name + "= (" + refTypeStr + ") GH_MEM(0x" + addr + ");"); - } - } catch (Exception e) { - println("Error processing global symbol: " + e); - println("Symbol: " + highSym.getName() + " - " + addr + " - " - + highSym.getHighFunction().getFunction().getName()); - } - } - } - - headerGuardPost(hwriter, "GLOBALS"); - hwriter.close(); - cwriter.close(); - } - - void decompileAll(List functions) throws Exception { - Hashtable globalSymbols = new Hashtable<>(); - - for (Function function : functions) { - decompileFunction(globalSymbols, function); - } - - dumpStructureTypes(); - dumpGlobals(globalSymbols); - } - - @Override - public void run() throws Exception { - if (currentProgram == null) { - return; - } - - RecompileConfig.INSTANCE = new RecompileConfig(this); - - if (!new File(RecompileConfig.INSTANCE.outputDir).exists()) { - throw new Exception("Output directory does not exist: " + RecompileConfig.INSTANCE.outputDir); - } - - // Make sure to create output folders - RecompileConfig.INSTANCE.dirDecompFix.mkdirs(); - RecompileConfig.INSTANCE.dirDecompAuto.mkdirs(); - - buildFunctionBlacklist(); - - List functions = new ArrayList<>(); - - Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); - while (functionsIt.hasNext()) { - Function function = functionsIt.next(); - if (!shouldDecompileFunction(function)) { - continue; - } - - functions.add(function); - } - - decompileAll(functions); - } - - String sanitizeFunctionName(String name) { - return name.replaceAll("[^a-zA-Z0-9_]", "_"); - } + // Auto rename invalid symbols + private static final boolean AUTO_RENAME_SYMBOLS = true; + + private static final HashSet ONLY_SYMBOLS = new HashSet<>(Arrays.asList( + "r3_main", // + "_strrchr")); + + static final boolean BUILD_BLACKLIST = true; + + HashSet
functionAddrBlackList = new HashSet<>(); + + boolean shouldDecompileFunction(Function function) { + if (ONLY_SYMBOLS != null && !ONLY_SYMBOLS.contains(function.getName())) { + return false; + } + return !functionAddrBlackList.contains(function.getEntryPoint()); + } + + void buildFunctionBlacklist() { + functionAddrBlackList = Utils.loadFunctionBlacklist(RecompileConfig.INSTANCE.functionBlacklistPath); + + if (BUILD_BLACKLIST) { + boolean modified = false; + + Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); + while (functionsIt.hasNext()) { + Function function = functionsIt.next(); + if (functionAddrBlackList.contains(function.getEntryPoint())) { + continue; + } + + String comment = function.getComment(); + boolean isIgnoredFunction = false; + if (comment != null && comment.contains("Library Function")) { + println("Adding library function " + function.getName() + " to blacklist"); + println("ac:" + functionAddrBlackList.size() + " jj:" + + functionAddrBlackList.contains(function.getEntryPoint()) + " " + + function.getEntryPoint()); + isIgnoredFunction = true; + } + + if (function.getName().startsWith("crt_")) { + println("Adding crt function " + function.getName() + " to blacklist"); + isIgnoredFunction = true; + } + + if (isIgnoredFunction) { + // Decompile and trace + PCallTracer tracer = new PCallTracer(); + tracer.setBlacklist(functionAddrBlackList); + tracer.traceCalls(function); + for (Function f : tracer.out) { + println(" Adding " + f.getName() + " to blacklist"); + functionAddrBlackList.add(f.getEntryPoint()); + modified = true; + } + } + } + + if (modified) { + Utils.saveFunctionBlacklist(functionAddrBlackList, RecompileConfig.INSTANCE.functionBlacklistPath); + } + } + } + + String escapeCString(String str) { + str = str.replace("\\", "\\\\"); + str = str.replace("\"", "\\\""); + return str; + } + + String readCString(Address addr, int maxLen) throws Exception { + StringBuilder sb = new StringBuilder(); + int ofs = 0; + while (true) { + Address read = addr.add(ofs++); + // println("Reading: " + read); + byte b = currentProgram.getMemory().getByte(read); + // println("Read: " + b); + if (b == 0 || ofs >= maxLen) { + break; + } + sb.append((char) b); + } + if (sb.length() > 0) { + // println("STR \"" + sb.toString() + "\""); + } + return sb.toString(); + } + + public class GlobalRec { + public HighSymbol highSymbol; + public Address address; + // public boolean isFullyDefined; + + public GlobalRec(HighSymbol highSymbol, Address address) { + this.highSymbol = highSymbol; + this.address = address; + // this.isFullyDefined = isFullyDefined; + } + }; + + static final Pattern fieldAccessRegex = Pattern.compile("^_([0-9]+)_([0-9]+)_$"); + + void decompileFunction(Hashtable outGlobalSymbols, Function function) + throws Exception { + String fileName = sanitizeFunctionName(function.getName()) + ".cxx"; + + File f1 = new File(RecompileConfig.INSTANCE.dirDecompFix, fileName); + if (f1.exists()) { + println("Func " + function.getName() + " skipped (gh_fix)"); + return; + } + + File f0 = new File(RecompileConfig.INSTANCE.dirDecompAuto, fileName); + if (f0.exists()) { + f0.delete(); + } + + println("Processing " + function.getName() + " => " + f0.toString()); + + DecompileResults decompRes = RecompileConfig.INSTANCE.decompCache.getOrInsert(function); + try (PrintWriter writer2 = new PrintWriter(f0, "UTF-8")) { + writer2.println("// AUTO-GENERATED FILE, MOVE TO 'gh_fix' FOLDER PREVENT OVERWRITING!!!!! "); + writer2.println(); + writer2.println("#include "); + writer2.println("#include \"../gh_global.h\""); + writer2.println(); + + // decompRes.get + HighFunction highFunction = decompRes.getHighFunction(); + + // Remap for dynamic symbols + // Dictionary symbolRemap = new Hashtable<>(); + + HashSet headers = new HashSet<>(); + StringWriter codeWriter = new StringWriter(); + + PrettyPrinter pp = new PrettyPrinter(decompRes.getFunction(), decompRes.getCCodeMarkup(), null); + Iterator lines = pp.getLines().iterator(); + while (lines.hasNext()) { + ClangLine line = lines.next(); + for (int i = 0; i < line.getIndent(); i++) { + codeWriter.write(' '); + } + + List tokens = new ArrayList<>(); + // Parse preliminary line tokens + for (int i = 0; i < line.getNumTokens(); i++) { + ClangToken token = line.getToken(i); + if (!token.getText().isEmpty()) + tokens.add(token); + } + + // Preprocess tokens + boolean prevDot = false; + for (int t = 0; t < tokens.size(); t++) { + ClangToken token = tokens.get(t); + + boolean thisDot = false; + // println("Token: " + token.toString()); + if (token.toString().equals(".")) { + // println("Found dot: " + token.toString() + " - " + token.getClass()); + thisDot = true; + } + + if (prevDot) { + // println("Possible field access: " + token.getText()); + if (token instanceof ClangSyntaxToken) { + // Parse _4_4_ sub-access using regex + String text = token.getText(); + Matcher matcher = fieldAccessRegex.matcher(text); + if (matcher.matches()) { + int offset = Integer.parseInt(matcher.group(1)); + int size = Integer.parseInt(matcher.group(2)); + // println("MATCHED: " + token.getText() + " - " + token.getSyntaxType() + " - " + token.getVarnode() + " - " + // + token.getPcodeOp()); + + // Replace tokens with + Field + ClangToken replacement = new ClangToken(token.Parent(), " + Field<" + offset + ", " + size + ">()"); + tokens.remove(t); + tokens.remove(t - 1); + tokens.add(t - 1, replacement); + t--; + } + } + } + + // Extract memory references + HighSymbol gsym = token.getHighSymbol(highFunction); + if (gsym != null) { + var symStorage = gsym.getStorage(); + var sym = gsym.getSymbol(); + + Address address; + if (symStorage.isUnassignedStorage()) { + address = sym.getAddress(); + } else { + address = gsym.getStorage().getMinAddress(); + } + + if (address.isMemoryAddress()) { + outGlobalSymbols.put(gsym.getName(), new GlobalRec(gsym, address)); + } + } + + // Extract external function calls + PcodeOp op = token.getPcodeOp(); + if (op != null && op.getOpcode() == PcodeOp.CALL) { + // println("PcodeOp: " + op.toString() + " - " + op.getInput(0).toString()); + Varnode target = op.getInput(0); + if (target.isAddress()) { + Address callAddr = target.getAddress(); + Function calledFunction = getFunctionAt(callAddr); + if (calledFunction != null) { + if (!functionAddrBlackList.contains(calledFunction.getEntryPoint())) { + // println("Adding header: " + calledFunction + " / " + + // calledFunction.getSignature().getPrototypeString(true)); + headers.add("extern " + calledFunction.getSignature().getPrototypeString(true) + + "; // " + calledFunction.getEntryPoint() + " // " + + calledFunction.getName()); + } + } + } + } + prevDot = thisDot; + } + + // Print tokens + for (int t = 0; t < tokens.size(); t++) { + ClangToken token = tokens.get(t); + codeWriter.write(token.toString()); + } + codeWriter.write('\n'); + } + + for (String header : headers) { + writer2.println(header); + } + writer2.println(); + writer2.println("// " + function.getEntryPoint()); + writer2.print(codeWriter.toString()); + writer2.println(); + + // Iterator it = decompRes.getCCodeMarkup().tokenIterator(true); + // int ln = 0; + // while(it.hasNext()) { + // ClangToken token = it.next(); + // ClangLine line = token.getLineParent(); + // while (line != null && ln < line.getLineNumber()) { + // writer2.println(); + // ln++; + // } + // writer2.print(token.toString()); + // } + + // Collect referenced global symbols + // Iterator smyIt = highFunction.getGlobalSymbolMap().getSymbols(); + // while (smyIt.hasNext()) { + // HighSymbol gsym = smyIt.next(); + + // Address addr = gsym.getSymbol().getAddress(); + // println("FunctionSym " + addr + " " + gsym.getName() + " " + + // gsym.getStorage().getMinAddress()); + // println(" IsMem: " + gsym.getStorage().isMemoryStorage() + " " + + // gsym.getStorage().getSerializationString()); + + // if (outGlobalSymbols.containsKey(gsym.getName())) + // continue; + // outGlobalSymbols.put(gsym.getName(), gsym); + // } + } + } + + void headerGuardPre(PrintWriter writer, String tag) { + writer.println("#ifndef GH_GENERATED_" + tag + "_H"); + writer.println("#define GH_GENERATED_" + tag + "_H"); + writer.println(); + } + + void headerGuardPost(PrintWriter writer, String tag) { + writer.println("#endif // GH_GENERATED_" + tag + "_H"); + } + + class DependencyTypeDumper { + HashSet types = new HashSet<>(); + DataTypeManager dtm; + TaskMonitor taskMonitor; + + public DependencyTypeDumper(DataTypeManager dtm) { + this.dtm = dtm; + this.taskMonitor = monitor; + } + + void visit(DataType dataType, PrintWriter writer) throws Exception { + // If not already written + if (types.contains(dataType)) { + println("Visiting: " + dataType.getName()); + types.remove(dataType); + + // Write dependencies, and then write self + if (dataType instanceof Structure) { + Structure struct = (Structure) dataType; + for (DataTypeComponent component : struct.getComponents()) { + DataType dt = component.getDataType(); + if (dt instanceof Structure) { + println("Dependency: " + dt.getName()); + visit((Structure) dt, writer); + } + } + } + writeNoDeps(dataType, writer); + } + } + + void writeNoDeps(DataType dt, PrintWriter writer) throws Exception { + // Check + // https://github.com/NationalSecurityAgency/ghidra/blob/17c93909bbf99f7f98dbf5737b38d8dd2c01bef0/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/exporter/CppExporter.java#L401 + + // DataTypeWriter dtw = new DataTypeWriter(dtm, writer); + // dtw.blacklistedTypes = + // dtw.write(new DataType[] { dt }, taskMonitor); + + // if (dt instanceof Structure) { + // Structure struct = (Structure) dt; + // writer.append("typedef struct " + struct.getName() + " {\n"); + // for (DataTypeComponent component : struct.getComponents()) { + // if (component.isBitFieldComponent()) { + // BitFieldDataType bfdt = (BitFieldDataType) component.getDataType(); + // writer.append( + // " " + bfdt.getDisplayName() + " " + component.getDefaultFieldName() + " : " + + // bfdt.getBitSize() + ";\n"); + // } else { + // writer.append( + // " " + component.getDataType().getDisplayName() + " " + + // component.getDefaultFieldName() + ";\n"); + // } + // writer.append("} " + struct.getDisplayName() + " ;\n"); + // writer.append("\n"); + // } else if (dt instanceof TypedefDataType) { + // TypedefDataType typedef = (TypedefDataType) dt; + // writer.append("typedef " + typedef.getDataType().getDisplayName() + " " + + // typedef.getName() + ";\n"); + // writer.append("\n"); + // } else { + // throw new Exception("Unsupported type: " + dt.getDisplayName()); + // } + } + + void writeAll(PrintWriter writer) throws Exception { + while (types.size() > 0) { + DataType first = types.iterator().next(); + visit(first, writer); + } + } + }; + + void dumpStructureTypes() throws Exception { + ProgramBasedDataTypeManager dtm = currentProgram.getDataTypeManager(); + + HashSet typeBlacklist = Utils.loadStructBlacklist(RecompileConfig.INSTANCE.typeBlacklistPath); + + if (typeBlacklist == null) { + println("Building struct blacklist from existing data types"); + typeBlacklist = new HashSet<>(); + Iterator it = dtm.getAllDataTypes(); + while (it.hasNext()) { + DataType dt = it.next(); + if (dt instanceof Structure || dt instanceof TypedefDataType) { + typeBlacklist.add(dt.getDisplayName()); + } + } + Utils.saveStructBlacklist(typeBlacklist, RecompileConfig.INSTANCE.typeBlacklistPath); + } + + List filteredTypes = new ArrayList<>(); + Iterator it = dtm.getAllDataTypes(); + // DependencyTypeDumper dumper = new DependencyTypeDumper(dtm); + while (it.hasNext()) { + DataType dt = it.next(); + if (dt instanceof Structure || dt instanceof TypedefDataType) { + if (typeBlacklist.contains(dt.getDisplayName())) + continue; + // println("Adding: " + dt.getDisplayName() + " - " + + // dt.getClass().getSimpleName()); + filteredTypes.add(dt); + } + + // Structure struct = (Structure) dt; + // dumper.types.add(struct); + // } else if (dt instanceof TypedefDataType) { + // TypedefDataType typedef = (TypedefDataType) dt; + // dumper.types.add(typedef); + // } + } + + try (PrintWriter writer = new PrintWriter(new File(RecompileConfig.INSTANCE.outputDir, "gh_structs.h"), + "UTF-8")) { + headerGuardPre(writer, "STRUCTS"); + writer.println("// AUTO-GENERATED FILE "); + writer.println("#include "); + // dumper.writeAll(writer); + + DataTypeWriter dtw = new DataTypeWriter(dtm, writer); + dtw.blacklistedTypes = typeBlacklist; + dtw.write(filteredTypes, monitor); + + headerGuardPost(writer, "STRUCTS"); + } + } + + void dumpGlobals(Hashtable globalSymbols) throws Exception { + File globalSymbolsListH = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.h"); + PrintWriter hwriter = new PrintWriter(globalSymbolsListH, "UTF-8"); + hwriter.println("// AUTO-GENERATED FILE "); + headerGuardPre(hwriter, "GLOBALS"); + hwriter.println("#include "); + hwriter.println(); + + File globalSymbolsListC = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.cxx"); + PrintWriter cwriter = new PrintWriter(globalSymbolsListC, "UTF-8"); + cwriter.println("// AUTO-GENERATED FILE "); + cwriter.println("#include "); + hwriter.println(); + + for (GlobalRec sym : globalSymbols.values()) { + HighSymbol highSym = sym.highSymbol; + DataType dt = highSym.getDataType(); + String dataType = dt.getDisplayName(); + String name = highSym.getName(); + String sanitizedName = sanitizeFunctionName(highSym.getName()); + if (!sanitizedName.equals(highSym.getName())) { + println("Invalid global symbol name: " + highSym.getName() + " - " + + highSym.getHighFunction().getFunction().getName()); + } else { + Address addr = sym.address; + // println("Symbol: " + symbol + " Addr: " + addr + " Size:" + symSize + " " + + // storage.getSerializationString()); + try { + String initBlk = " = "; + boolean fullyDefinedType = false; + if (dt instanceof AbstractStringDataType) { + AbstractStringDataType sdt = (AbstractStringDataType) dt; + dataType = "const char*"; + // String type + initBlk += "\"" + escapeCString(readCString(addr, 2048)) + "\""; + fullyDefinedType = true; + } else if (dt instanceof PointerDataType) { + PointerDataType pdt = (PointerDataType) dt; + DataType baseType = pdt.getDataType(); + dataType = baseType.getDisplayName() + "*"; + initBlk += "(" + dataType + ")&GH_MEM(0x" + addr + ")"; + fullyDefinedType = true; + } + if (fullyDefinedType) { + hwriter.println("extern " + dataType + " " + name + "; // " + addr); + cwriter.println(dataType + " " + name + initBlk + "; // " + addr); + } else { + String refTypeStr = dt.getDisplayName() + "&"; + hwriter.println("extern " + refTypeStr + " " + name + "; // " + addr); + cwriter.println(dataType + " " + name + "= (" + refTypeStr + ") GH_MEM(0x" + addr + ");"); + } + } catch (Exception e) { + println("Error processing global symbol: " + e); + println("Symbol: " + highSym.getName() + " - " + addr + " - " + + highSym.getHighFunction().getFunction().getName()); + } + } + } + + headerGuardPost(hwriter, "GLOBALS"); + hwriter.close(); + cwriter.close(); + } + + void decompileAll(List functions) throws Exception { + Hashtable globalSymbols = new Hashtable<>(); + + for (Function function : functions) { + decompileFunction(globalSymbols, function); + } + + dumpStructureTypes(); + dumpGlobals(globalSymbols); + } + + @Override + public void run() throws Exception { + if (currentProgram == null) { + return; + } + + RecompileConfig.INSTANCE = new RecompileConfig(this); + + if (!new File(RecompileConfig.INSTANCE.outputDir).exists()) { + throw new Exception("Output directory does not exist: " + RecompileConfig.INSTANCE.outputDir); + } + + // Make sure to create output folders + RecompileConfig.INSTANCE.dirDecompFix.mkdirs(); + RecompileConfig.INSTANCE.dirDecompAuto.mkdirs(); + + buildFunctionBlacklist(); + + List functions = new ArrayList<>(); + + Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); + while (functionsIt.hasNext()) { + Function function = functionsIt.next(); + if (!shouldDecompileFunction(function)) { + continue; + } + + functions.add(function); + } + + decompileAll(functions); + } + + String sanitizeFunctionName(String name) { + return name.replaceAll("[^a-zA-Z0-9_]", "_"); + } }