/* ### */ // Script to export decompiled C code from Ghidra // //@category Examples.Demangler import java.io.File; import java.io.FileNotFoundException; import java.io.PrintWriter; import java.io.StringWriter; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Scanner; import java.util.Arrays; import java.util.Dictionary; import ghidra.app.cmd.label.AddLabelCmd; import ghidra.app.decompiler.ClangLine; import ghidra.app.decompiler.ClangMarkup; import ghidra.app.decompiler.ClangNode; import ghidra.app.decompiler.ClangToken; import ghidra.app.decompiler.ClangTokenGroup; import ghidra.app.decompiler.DecompInterface; import ghidra.app.decompiler.DecompileResults; import ghidra.app.decompiler.DecompiledFunction; import ghidra.app.decompiler.PrettyPrinter; import ghidra.app.script.GhidraScript; import ghidra.docking.settings.Settings; import ghidra.program.model.address.Address; import ghidra.program.model.address.AddressFactory; import ghidra.program.model.data.AbstractStringDataType; import ghidra.program.model.data.BuiltInDataType; import ghidra.program.model.data.CategoryPath; import ghidra.program.model.data.DataOrganization; import ghidra.program.model.data.DataOrganizationImpl; import ghidra.program.model.data.DataType; import ghidra.program.model.data.DataTypeComponent; import ghidra.program.model.data.PointerDataType; import ghidra.program.model.data.ProgramBasedDataTypeManager; import ghidra.program.model.data.SourceArchive; import ghidra.program.model.data.StringDataInstance; import ghidra.program.model.data.Structure; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.Variable; import ghidra.program.model.listing.VariableStorage; import ghidra.program.model.mem.MemoryAccessException; import ghidra.program.model.mem.MemoryBlock; import ghidra.program.model.mem.MemoryBufferImpl; import ghidra.program.model.pcode.HighFunction; import ghidra.program.model.pcode.HighSymbol; import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.PcodeOpAST; import ghidra.program.model.pcode.Varnode; import ghidra.program.model.symbol.NameTransformer; import ghidra.program.model.symbol.SourceType; import ghidra.program.model.symbol.Symbol; import ghidra.program.model.symbol.SymbolTable; import ghidra.program.model.symbol.SymbolType; public class DecompileC extends GhidraScript { public class DecompileCache { private static final int TIMEOUT = 10000; Hashtable cache = new Hashtable<>(); DecompInterface decomp; public DecompileCache(DecompInterface decomp) { this.decomp = decomp; } public DecompileResults get(Function function) { return cache.get(function); } public DecompileResults getOrInsert(Function function) { DecompileResults res = cache.get(function); if (res == null) { res = decomp.decompileFunction(function, TIMEOUT, monitor); cache.put(function, res); } return res; } } public class PCallTracer { public class QueueItem { Function function; int depth; QueueItem(Function function, int depth) { this.function = function; this.depth = depth; } } public List out = new ArrayList<>(); public boolean trace = false; List queue = new ArrayList<>(); HashSet
visited = new HashSet<>(); DecompileCache decomp; PCallTracer(DecompileCache decomp) { this.decomp = decomp; } public void setBlacklist(HashSet
blacklist) { this.visited = new HashSet<>(blacklist); } void visit(HighFunction highFunction, int depth) { Iterator opIter = highFunction.getPcodeOps(); while (opIter.hasNext()) { PcodeOpAST op = opIter.next(); if (op.getOpcode() == PcodeOp.CALL) { Varnode target = op.getInput(0); if (target.isAddress()) { Address callAddr = target.getAddress(); Function calledFunction = getFunctionAt(callAddr); if (calledFunction == null) { println("PCallTracer, called function not found: " + op.toString() + " - " + highFunction.getFunction().getName()); continue; } if (!visited.contains(calledFunction.getEntryPoint())) { queue.add(new QueueItem(calledFunction, depth + 1)); } } } } } void visit(Function function, int depth) { if (!visited.contains(function.getEntryPoint())) { visited.add(function.getEntryPoint()); if (trace) { println("PCallTracer, visiting " + function.getName() + " (depth:" + depth + ")"); } DecompileResults decompRes = decomp.getOrInsert(function); visit(decompRes.getHighFunction(), depth); out.add(function); } } public void traceCalls(Function inFunction) { queue.add(new QueueItem(inFunction, 0)); while (queue.size() > 0) { QueueItem item = queue.remove(0); visit(item.function, item.depth); } } } private static final String OUTPUT_DIR = "game_re"; // The static memory block private Address staticMemoryBlockStart; private Address staticMemoryBlockEnd; private DecompileCache decompCache; // Auto rename invalid symbols private static final boolean AUTO_RENAME_SYMBOLS = true; private static final HashSet ONLY_SYMBOLS = new HashSet<>(Arrays.asList( "r3_main", // "_strrchr")); File rootDir; File outputDir; File dirDecompAuto; File dirDecompFix; HashSet
functionAddrBlackList = new HashSet<>(); void loadFunctionBlacklist() { functionAddrBlackList.clear(); File blacklistFile = new File(outputDir, "blacklist.txt"); try (Scanner scanner = new Scanner(blacklistFile)) { while (scanner.hasNextLine()) { String line = scanner.nextLine(); // Strip comment String line1 = line.split("//")[0].trim(); // Deserialize address Address addr = currentProgram.getAddressFactory().getAddress(line1); functionAddrBlackList.add(addr); } println("Loaded blacklist with " + functionAddrBlackList.size() + " entries"); } catch (FileNotFoundException e) { println("No blacklist found"); return; } // for (Address a : functionAddrBlackList) { // Function fn = getFunctionAt(a); // println("In blacklist: " + a + " (" + (fn != null ? fn.getName() : "unknown") // + ")"); // } } void saveFunctionBlacklist() { println("Saving blacklist"); File blacklistFile = new File(outputDir, "blacklist.txt"); try (PrintWriter writer = new PrintWriter(blacklistFile)) { for (Address addr : functionAddrBlackList) { writer.println(addr.toString() + " // " + getFunctionAt(addr).getName()); } } catch (FileNotFoundException e) { println("Error saving blacklist: " + e.getMessage()); } } boolean shouldDecompileFunction(Function function) { if (ONLY_SYMBOLS != null && !ONLY_SYMBOLS.contains(function.getName())) { return false; } return !functionAddrBlackList.contains(function.getEntryPoint()); } static final boolean BUILD_BLACKLIST = true; void buildFunctionBlacklist() { loadFunctionBlacklist(); if (BUILD_BLACKLIST) { boolean modified = false; Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); while (functionsIt.hasNext()) { Function function = functionsIt.next(); if (functionAddrBlackList.contains(function.getEntryPoint())) { continue; } String comment = function.getComment(); boolean isIgnoredFunction = false; if (comment != null && comment.contains("Library Function")) { println("Adding library function " + function.getName() + " to blacklist"); println("ac:" + functionAddrBlackList.size() + " jj:" + functionAddrBlackList.contains(function.getEntryPoint()) + " " + function.getEntryPoint()); isIgnoredFunction = true; } if (function.getName().startsWith("crt_")) { println("Adding crt function " + function.getName() + " to blacklist"); isIgnoredFunction = true; } if (isIgnoredFunction) { // Decompile and trace PCallTracer tracer = new PCallTracer(decompCache); tracer.setBlacklist(functionAddrBlackList); tracer.traceCalls(function); for (Function f : tracer.out) { println(" Adding " + f.getName() + " to blacklist"); functionAddrBlackList.add(f.getEntryPoint()); modified = true; } } } if (modified) { saveFunctionBlacklist(); } } } void sanitizeGlobalSymbolsPass(List functions) { Hashtable globalSymbols = new Hashtable<>(); for (Function function : functions) { println("Processing global symbols for " + function.getName()); DecompileResults decompRes = decompCache.getOrInsert(function); Iterator smyIt = decompRes.getHighFunction().getGlobalSymbolMap().getSymbols(); HighSymbol gsym = smyIt.next(); if (globalSymbols.containsKey(gsym.getName())) continue; println("GLOBAL: " + gsym.getName()); String sanitizedName = sanitizeFunctionName(gsym.getName()); if (!sanitizedName.equals(gsym.getName())) { if (AUTO_RENAME_SYMBOLS) { Symbol symbol = gsym.getSymbol(); VariableStorage storage = gsym.getStorage(); Address addr = storage.getMinAddress(); println("Renaming global symbol: " + gsym.getName() + " (" + addr + ") -> " + sanitizedName); if (symbol != null) { AddLabelCmd cmd = new AddLabelCmd(addr, sanitizedName, symbol.getParentNamespace(), SourceType.USER_DEFINED); if (cmd.applyTo(currentProgram)) { println("Renamed global symbol: " + gsym.getName() + " -> " + sanitizedName); } else { println("Error renaming symbol: " + cmd.getStatusMsg()); } } else { println("Symbol is null: " + gsym.getName() + " - " + function.getName()); } } else { println("Invalid global symbol name: " + gsym.getName() + " - " + function.getName()); } } } } String escapeCString(String str) { str = str.replace("\\", "\\\\"); str = str.replace("\"", "\\\""); // str = str.replaceAll("\n", "\\n"); // str = str.replaceAll("\r", "\\r"); // str = str.replaceAll("\t", "\\t"); // str = str.replaceAll("\b", "\\b"); // str = str.replaceAll("\f", "\\f"); // str = str.replaceAll("\0", "\\0"); return str; } String readCString(Address addr, int maxLen) throws Exception { StringBuilder sb = new StringBuilder(); int ofs = 0; while (true) { Address read = addr.add(ofs++); // println("Reading: " + read); byte b = currentProgram.getMemory().getByte(read); // println("Read: " + b); if (b == 0 || ofs >= maxLen) { break; } sb.append((char) b); } if (sb.length() > 0) { println("STR \"" + sb.toString() + "\""); } return sb.toString(); } void decompileFunction(Hashtable outGlobalSymbols, Function function) throws Exception { String fileName = sanitizeFunctionName(function.getName()) + ".cxx"; File f1 = new File(dirDecompFix, fileName); if (f1.exists()) { println("Func " + function.getName() + " skipped (gh_fix)"); return; } File f0 = new File(dirDecompAuto, fileName); if (f0.exists()) { f0.delete(); } println("Processing " + function.getName() + " => " + f0.toString()); DecompileResults decompRes = decompCache.getOrInsert(function); try (PrintWriter writer2 = new PrintWriter(f0, "UTF-8")) { writer2.println("// AUTO-GENERATED FILE, MOVE TO 'gh_fix' FOLDER PREVENT OVERWRITING!!!!! "); writer2.println(); writer2.println("#include "); writer2.println("#include \"../gh_global.h\""); writer2.println(); // decompRes.get HighFunction highFunction = decompRes.getHighFunction(); // Remap for dynamic symbols // Dictionary symbolRemap = new Hashtable<>(); HashSet headers = new HashSet<>(); StringWriter codeWriter = new StringWriter(); PrettyPrinter pp = new PrettyPrinter(decompRes.getFunction(), decompRes.getCCodeMarkup(), null); Iterator lines = pp.getLines().iterator(); while (lines.hasNext()) { ClangLine line = lines.next(); for (int i = 0; i < line.getIndent(); i++) { codeWriter.write(' '); } for (int t = 0; t < line.getNumTokens(); t++) { ClangToken token = line.getToken(t); HighSymbol gsym = token.getHighSymbol(highFunction); if (gsym != null) { var symStorage = gsym.getStorage(); if (symStorage.isMemoryStorage() || symStorage.isConstantStorage()) { // println("Token: " + token.toString() + " - " + gsym.getName()); outGlobalSymbols.put(gsym.getName(), gsym); } } PcodeOp op = token.getPcodeOp(); if (op != null && op.getOpcode() == PcodeOp.CALL) { println("PcodeOp: " + op.toString() + " - " + op.getInput(0).toString()); Varnode target = op.getInput(0); if (target.isAddress()) { Address callAddr = target.getAddress(); Function calledFunction = getFunctionAt(callAddr); if (calledFunction != null) { if (functionAddrBlackList.contains(calledFunction.getEntryPoint())) { println("Adding header: " + calledFunction + " / " + calledFunction.getSignature().getPrototypeString(true)); headers.add("extern " + calledFunction.getSignature().getPrototypeString(true)); } } } } codeWriter.write(token.toString()); } codeWriter.write('\n'); } for (String header : headers) { writer2.println(header + ";"); } writer2.println(); writer2.println("// " + function.getEntryPoint()); writer2.print(codeWriter.toString()); writer2.println(); // Iterator it = decompRes.getCCodeMarkup().tokenIterator(true); // int ln = 0; // while(it.hasNext()) { // ClangToken token = it.next(); // ClangLine line = token.getLineParent(); // while (line != null && ln < line.getLineNumber()) { // writer2.println(); // ln++; // } // writer2.print(token.toString()); // } // Collect referenced global symbols // Iterator smyIt = highFunction.getGlobalSymbolMap().getSymbols(); // while (smyIt.hasNext()) { // HighSymbol gsym = smyIt.next(); // Address addr = gsym.getSymbol().getAddress(); // println("FunctionSym " + addr + " " + gsym.getName() + " " + // gsym.getStorage().getMinAddress()); // println(" IsMem: " + gsym.getStorage().isMemoryStorage() + " " + // gsym.getStorage().getSerializationString()); // if (outGlobalSymbols.containsKey(gsym.getName())) // continue; // outGlobalSymbols.put(gsym.getName(), gsym); // } } } HashSet loadStructBlacklist() { File file = new File(outputDir, "struct_blacklist.txt"); HashSet structBlacklist = new HashSet<>(); try (Scanner scanner = new Scanner(file)) { while (scanner.hasNextLine()) { String line = scanner.nextLine(); structBlacklist.add(line.trim()); } } catch (FileNotFoundException e) { return null; } return structBlacklist; } void saveStructBlacklist(HashSet structBlacklist) throws Exception { String[] arr = structBlacklist.toArray(new String[0]); Arrays.sort(arr); File file = new File(outputDir, "struct_blacklist.txt"); try (PrintWriter writer = new PrintWriter(file)) { for (String structName : arr) { writer.println(structName); } } } void headerGuardPre(PrintWriter writer, String tag) { writer.println("#ifndef GH_GENERATED_" + tag + "_H"); writer.println("#define GH_GENERATED_" + tag + "_H"); writer.println(); } void headerGuardPost(PrintWriter writer, String tag) { writer.println("#endif // GH_GENERATED_" + tag + "_H"); } void dumpStructureTypes() throws Exception { try (PrintWriter writer = new PrintWriter(new File(outputDir, "gh_structs.h"), "UTF-8")) { headerGuardPre(writer, "STRUCTS"); writer.println("// AUTO-GENERATED FILE "); writer.println("#include "); ProgramBasedDataTypeManager dtm = currentProgram.getDataTypeManager(); HashSet structBlacklist = loadStructBlacklist(); if (structBlacklist == null) { println("Building struct blacklist from existing data types"); structBlacklist = new HashSet<>(); Iterator it = dtm.getAllDataTypes(); while (it.hasNext()) { DataType dt = it.next(); if (dt instanceof Structure) { structBlacklist.add(dt.getName()); } } saveStructBlacklist(structBlacklist); } Iterator it = dtm.getAllDataTypes(); while (it.hasNext()) { DataType dt = it.next(); if (dt instanceof Structure) { Structure struct = (Structure) dt; if (structBlacklist.contains(struct.getName())) continue; writer.println("struct " + struct.getName() + " {"); for (DataTypeComponent component : struct.getComponents()) { writer.println( " " + component.getDataType().getDisplayName() + " " + component.getDefaultFieldName() + ";"); } writer.println("};"); writer.println(); } } headerGuardPost(writer, "STRUCTS"); } } void dumpGlobals(Hashtable globalSymbols) throws Exception { File globalSymbolsListH = new File(outputDir, "gh_global.h"); PrintWriter hwriter = new PrintWriter(globalSymbolsListH, "UTF-8"); hwriter.println("// AUTO-GENERATED FILE "); headerGuardPre(hwriter, "GLOBALS"); hwriter.println("#include "); hwriter.println("#include \"gh_structs.h\""); hwriter.println(); File globalSymbolsListC = new File(outputDir, "gh_global.cxx"); PrintWriter cwriter = new PrintWriter(globalSymbolsListC, "UTF-8"); cwriter.println("// AUTO-GENERATED FILE "); cwriter.println("#include "); cwriter.println("#include \"gh_structs.h\""); hwriter.println(); for (HighSymbol highSym : globalSymbols.values()) { DataType dt = highSym.getDataType(); String dataType = dt.getDisplayName(); String name = highSym.getName(); String sanitizedName = sanitizeFunctionName(highSym.getName()); if (!sanitizedName.equals(highSym.getName())) { println("Invalid global symbol name: " + highSym.getName() + " - " + highSym.getHighFunction().getFunction().getName()); } else { Symbol symbol = highSym.getSymbol(); VariableStorage storage = highSym.getStorage(); Address addr = storage.getMinAddress(); int symSize = highSym.getSize(); if (addr == null) { // Not sure why this is sometimes null // also when it is not null, Symbol.getAddress() is not correct but very small // like 00000056 // Not that storage will be so maybe can check that addr = symbol.getAddress(); } println("Symbol: " + symbol + " Addr: " + addr + " Size:" + symSize + " " + storage.getSerializationString()); try { String initBlk = " = "; if (dt instanceof AbstractStringDataType) { AbstractStringDataType sdt = (AbstractStringDataType) dt; dataType = "const char*"; // String type initBlk += "\"" + escapeCString(readCString(addr, 2048)) + "\""; } else if (dt instanceof PointerDataType) { PointerDataType pdt = (PointerDataType) dt; DataType baseType = pdt.getDataType(); dataType = baseType.getDisplayName() + "*"; initBlk += "gh_ptr(0x" + addr + ")"; } else { initBlk = " {}"; } cwriter.println(dataType + " " + name + initBlk + "; // " + addr); } catch (Exception e) { println("Error processing global symbol: " + e); println("Symbol: " + highSym.getName() + " - " + addr + " - " + highSym.getHighFunction().getFunction().getName()); } hwriter.println("extern " + dataType + " " + name + "; // " + addr); } } headerGuardPost(hwriter, "GLOBALS"); hwriter.close(); cwriter.close(); } void decompileAll(List functions) throws Exception { Hashtable globalSymbols = new Hashtable<>(); for (Function function : functions) { decompileFunction(globalSymbols, function); } dumpStructureTypes(); dumpGlobals(globalSymbols); } @Override public void run() throws Exception { if (currentProgram == null) { return; } DecompInterface decomp = new DecompInterface(); decomp.openProgram(currentProgram); decompCache = new DecompileCache(decomp); staticMemoryBlockStart = currentProgram.getAddressFactory().getAddress("005b6400"); staticMemoryBlockEnd = currentProgram.getAddressFactory().getAddress("00843fff"); // Make sure to create OUTPUT_PATH rootDir = new File(sourceFile.getAbsolutePath()).getParentFile().getParentFile(); outputDir = new File(rootDir, OUTPUT_DIR); if (!outputDir.exists()) { throw new Exception("Output directory does not exist: " + outputDir.getCanonicalPath()); } dirDecompAuto = new File(outputDir, "gh_auto"); dirDecompFix = new File(outputDir, "gh_fix"); println("Output path: " + outputDir.getCanonicalPath()); buildFunctionBlacklist(); List functions = new ArrayList<>(); Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); while (functionsIt.hasNext()) { Function function = functionsIt.next(); if (!shouldDecompileFunction(function)) { continue; } functions.add(function); } int mode = 1; if (mode == 0) { // Sanitize symbols sanitizeGlobalSymbolsPass(functions); } else if (mode == 1) { // Decompile all functions decompileAll(functions); } } String sanitizeFunctionName(String name) { return name.replaceAll("[^a-zA-Z0-9_]", "_"); } }