// Script to export decompiled C code from Ghidra // @category _Reman3 // @menupath Reman3.Decompile All import java.io.File; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Arrays; import java.util.Dictionary; import ghidra.app.decompiler.ClangFieldToken; import ghidra.app.decompiler.ClangLine; import ghidra.app.decompiler.ClangSyntaxToken; import ghidra.app.decompiler.ClangToken; import ghidra.app.decompiler.DecompileResults; import ghidra.app.decompiler.PrettyPrinter; import ghidra.app.script.GhidraScript; import ghidra.program.model.address.Address; import ghidra.program.model.data.AbstractStringDataType; import ghidra.program.model.data.BitFieldDataType; import ghidra.program.model.data.DataType; import ghidra.program.model.data.DataTypeComponent; import ghidra.program.model.data.DataTypeManager; import ghidra.program.model.data.PointerDataType; import ghidra.program.model.data.ProgramBasedDataTypeManager; import ghidra.program.model.data.Structure; import ghidra.program.model.data.TypedefDataType; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.VariableStorage; import ghidra.program.model.pcode.HighFunction; import ghidra.program.model.pcode.HighSymbol; import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.Varnode; import ghidra.program.model.symbol.Symbol; import ghidra.util.task.TaskMonitor; import re3lib.*; public class Decompile extends GhidraScript { // Auto rename invalid symbols private static final boolean AUTO_RENAME_SYMBOLS = true; private static final HashSet ONLY_SYMBOLS = new HashSet<>(Arrays.asList( "r3_main", // "_strrchr")); static final boolean BUILD_BLACKLIST = true; HashSet
functionAddrBlackList = new HashSet<>(); boolean shouldDecompileFunction(Function function) { if (ONLY_SYMBOLS != null && !ONLY_SYMBOLS.contains(function.getName())) { return false; } return !functionAddrBlackList.contains(function.getEntryPoint()); } void buildFunctionBlacklist() { functionAddrBlackList = Utils.loadFunctionBlacklist(RecompileConfig.INSTANCE.functionBlacklistPath); if (BUILD_BLACKLIST) { boolean modified = false; Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); while (functionsIt.hasNext()) { Function function = functionsIt.next(); if (functionAddrBlackList.contains(function.getEntryPoint())) { continue; } String comment = function.getComment(); boolean isIgnoredFunction = false; if (comment != null && comment.contains("Library Function")) { println("Adding library function " + function.getName() + " to blacklist"); println("ac:" + functionAddrBlackList.size() + " jj:" + functionAddrBlackList.contains(function.getEntryPoint()) + " " + function.getEntryPoint()); isIgnoredFunction = true; } if (function.getName().startsWith("crt_")) { println("Adding crt function " + function.getName() + " to blacklist"); isIgnoredFunction = true; } if (isIgnoredFunction) { // Decompile and trace PCallTracer tracer = new PCallTracer(); tracer.setBlacklist(functionAddrBlackList); tracer.traceCalls(function); for (Function f : tracer.out) { println(" Adding " + f.getName() + " to blacklist"); functionAddrBlackList.add(f.getEntryPoint()); modified = true; } } } if (modified) { Utils.saveFunctionBlacklist(functionAddrBlackList, RecompileConfig.INSTANCE.functionBlacklistPath); } } } String escapeCString(String str) { str = str.replace("\\", "\\\\"); str = str.replace("\"", "\\\""); return str; } String readCString(Address addr, int maxLen) throws Exception { StringBuilder sb = new StringBuilder(); int ofs = 0; while (true) { Address read = addr.add(ofs++); // println("Reading: " + read); byte b = currentProgram.getMemory().getByte(read); // println("Read: " + b); if (b == 0 || ofs >= maxLen) { break; } sb.append((char) b); } if (sb.length() > 0) { // println("STR \"" + sb.toString() + "\""); } return sb.toString(); } public class GlobalRec { public HighSymbol highSymbol; public Address address; // public boolean isFullyDefined; public GlobalRec(HighSymbol highSymbol, Address address) { this.highSymbol = highSymbol; this.address = address; // this.isFullyDefined = isFullyDefined; } }; static final Pattern fieldAccessRegex = Pattern.compile("^_([0-9]+)_([0-9]+)_$"); void decompileFunction(Hashtable outGlobalSymbols, Function function) throws Exception { String fileName = sanitizeFunctionName(function.getName()) + ".cxx"; File f1 = new File(RecompileConfig.INSTANCE.dirDecompFix, fileName); if (f1.exists()) { println("Func " + function.getName() + " skipped (gh_fix)"); return; } File f0 = new File(RecompileConfig.INSTANCE.dirDecompAuto, fileName); if (f0.exists()) { f0.delete(); } println("Processing " + function.getName() + " => " + f0.toString()); DecompileResults decompRes = RecompileConfig.INSTANCE.decompCache.getOrInsert(function); try (PrintWriter writer2 = new PrintWriter(f0, "UTF-8")) { writer2.println("// AUTO-GENERATED FILE, MOVE TO 'gh_fix' FOLDER PREVENT OVERWRITING!!!!! "); writer2.println(); writer2.println("#include "); writer2.println("#include \"../gh_global.h\""); writer2.println(); // decompRes.get HighFunction highFunction = decompRes.getHighFunction(); // Remap for dynamic symbols // Dictionary symbolRemap = new Hashtable<>(); HashSet headers = new HashSet<>(); StringWriter codeWriter = new StringWriter(); PrettyPrinter pp = new PrettyPrinter(decompRes.getFunction(), decompRes.getCCodeMarkup(), null); Iterator lines = pp.getLines().iterator(); while (lines.hasNext()) { ClangLine line = lines.next(); for (int i = 0; i < line.getIndent(); i++) { codeWriter.write(' '); } List tokens = new ArrayList<>(); // Parse preliminary line tokens for (int i = 0; i < line.getNumTokens(); i++) { ClangToken token = line.getToken(i); if (!token.getText().isEmpty()) tokens.add(token); } // Preprocess tokens boolean prevDot = false; for (int t = 0; t < tokens.size(); t++) { ClangToken token = tokens.get(t); boolean thisDot = false; // println("Token: " + token.toString()); if (token.toString().equals(".")) { // println("Found dot: " + token.toString() + " - " + token.getClass()); thisDot = true; } if (prevDot) { // println("Possible field access: " + token.getText()); if (token instanceof ClangSyntaxToken) { // Parse _4_4_ sub-access using regex String text = token.getText(); Matcher matcher = fieldAccessRegex.matcher(text); if (matcher.matches()) { int offset = Integer.parseInt(matcher.group(1)); int size = Integer.parseInt(matcher.group(2)); // println("MATCHED: " + token.getText() + " - " + token.getSyntaxType() + " - " + token.getVarnode() + " - " // + token.getPcodeOp()); // Replace tokens with + Field ClangToken replacement = new ClangToken(token.Parent(), " + Field<" + offset + ", " + size + ">()"); tokens.remove(t); tokens.remove(t - 1); tokens.add(t - 1, replacement); t--; } } } // Extract memory references HighSymbol gsym = token.getHighSymbol(highFunction); if (gsym != null) { var symStorage = gsym.getStorage(); var sym = gsym.getSymbol(); Address address; if (symStorage.isUnassignedStorage()) { address = sym.getAddress(); } else { address = gsym.getStorage().getMinAddress(); } if (address.isMemoryAddress()) { outGlobalSymbols.put(gsym.getName(), new GlobalRec(gsym, address)); } } // Extract external function calls PcodeOp op = token.getPcodeOp(); if (op != null && op.getOpcode() == PcodeOp.CALL) { // println("PcodeOp: " + op.toString() + " - " + op.getInput(0).toString()); Varnode target = op.getInput(0); if (target.isAddress()) { Address callAddr = target.getAddress(); Function calledFunction = getFunctionAt(callAddr); if (calledFunction != null) { if (!functionAddrBlackList.contains(calledFunction.getEntryPoint())) { // println("Adding header: " + calledFunction + " / " + // calledFunction.getSignature().getPrototypeString(true)); headers.add("extern " + calledFunction.getSignature().getPrototypeString(true) + "; // " + calledFunction.getEntryPoint() + " // " + calledFunction.getName()); } } } } prevDot = thisDot; } // Print tokens for (int t = 0; t < tokens.size(); t++) { ClangToken token = tokens.get(t); codeWriter.write(token.toString()); } codeWriter.write('\n'); } for (String header : headers) { writer2.println(header); } writer2.println(); writer2.println("// " + function.getEntryPoint()); writer2.print(codeWriter.toString()); writer2.println(); // Iterator it = decompRes.getCCodeMarkup().tokenIterator(true); // int ln = 0; // while(it.hasNext()) { // ClangToken token = it.next(); // ClangLine line = token.getLineParent(); // while (line != null && ln < line.getLineNumber()) { // writer2.println(); // ln++; // } // writer2.print(token.toString()); // } // Collect referenced global symbols // Iterator smyIt = highFunction.getGlobalSymbolMap().getSymbols(); // while (smyIt.hasNext()) { // HighSymbol gsym = smyIt.next(); // Address addr = gsym.getSymbol().getAddress(); // println("FunctionSym " + addr + " " + gsym.getName() + " " + // gsym.getStorage().getMinAddress()); // println(" IsMem: " + gsym.getStorage().isMemoryStorage() + " " + // gsym.getStorage().getSerializationString()); // if (outGlobalSymbols.containsKey(gsym.getName())) // continue; // outGlobalSymbols.put(gsym.getName(), gsym); // } } } void headerGuardPre(PrintWriter writer, String tag) { writer.println("#ifndef GH_GENERATED_" + tag + "_H"); writer.println("#define GH_GENERATED_" + tag + "_H"); writer.println(); } void headerGuardPost(PrintWriter writer, String tag) { writer.println("#endif // GH_GENERATED_" + tag + "_H"); } class DependencyTypeDumper { HashSet types = new HashSet<>(); DataTypeManager dtm; TaskMonitor taskMonitor; public DependencyTypeDumper(DataTypeManager dtm) { this.dtm = dtm; this.taskMonitor = monitor; } void visit(DataType dataType, PrintWriter writer) throws Exception { // If not already written if (types.contains(dataType)) { println("Visiting: " + dataType.getName()); types.remove(dataType); // Write dependencies, and then write self if (dataType instanceof Structure) { Structure struct = (Structure) dataType; for (DataTypeComponent component : struct.getComponents()) { DataType dt = component.getDataType(); if (dt instanceof Structure) { println("Dependency: " + dt.getName()); visit((Structure) dt, writer); } } } writeNoDeps(dataType, writer); } } void writeNoDeps(DataType dt, PrintWriter writer) throws Exception { // Check // https://github.com/NationalSecurityAgency/ghidra/blob/17c93909bbf99f7f98dbf5737b38d8dd2c01bef0/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/exporter/CppExporter.java#L401 // DataTypeWriter dtw = new DataTypeWriter(dtm, writer); // dtw.blacklistedTypes = // dtw.write(new DataType[] { dt }, taskMonitor); // if (dt instanceof Structure) { // Structure struct = (Structure) dt; // writer.append("typedef struct " + struct.getName() + " {\n"); // for (DataTypeComponent component : struct.getComponents()) { // if (component.isBitFieldComponent()) { // BitFieldDataType bfdt = (BitFieldDataType) component.getDataType(); // writer.append( // " " + bfdt.getDisplayName() + " " + component.getDefaultFieldName() + " : " + // bfdt.getBitSize() + ";\n"); // } else { // writer.append( // " " + component.getDataType().getDisplayName() + " " + // component.getDefaultFieldName() + ";\n"); // } // writer.append("} " + struct.getDisplayName() + " ;\n"); // writer.append("\n"); // } else if (dt instanceof TypedefDataType) { // TypedefDataType typedef = (TypedefDataType) dt; // writer.append("typedef " + typedef.getDataType().getDisplayName() + " " + // typedef.getName() + ";\n"); // writer.append("\n"); // } else { // throw new Exception("Unsupported type: " + dt.getDisplayName()); // } } void writeAll(PrintWriter writer) throws Exception { while (types.size() > 0) { DataType first = types.iterator().next(); visit(first, writer); } } }; void dumpStructureTypes() throws Exception { ProgramBasedDataTypeManager dtm = currentProgram.getDataTypeManager(); HashSet typeBlacklist = Utils.loadStructBlacklist(RecompileConfig.INSTANCE.typeBlacklistPath); if (typeBlacklist == null) { println("Building struct blacklist from existing data types"); typeBlacklist = new HashSet<>(); Iterator it = dtm.getAllDataTypes(); while (it.hasNext()) { DataType dt = it.next(); if (dt instanceof Structure || dt instanceof TypedefDataType) { typeBlacklist.add(dt.getDisplayName()); } } Utils.saveStructBlacklist(typeBlacklist, RecompileConfig.INSTANCE.typeBlacklistPath); } List filteredTypes = new ArrayList<>(); Iterator it = dtm.getAllDataTypes(); // DependencyTypeDumper dumper = new DependencyTypeDumper(dtm); while (it.hasNext()) { DataType dt = it.next(); if (dt instanceof Structure || dt instanceof TypedefDataType) { if (typeBlacklist.contains(dt.getDisplayName())) continue; // println("Adding: " + dt.getDisplayName() + " - " + // dt.getClass().getSimpleName()); filteredTypes.add(dt); } // Structure struct = (Structure) dt; // dumper.types.add(struct); // } else if (dt instanceof TypedefDataType) { // TypedefDataType typedef = (TypedefDataType) dt; // dumper.types.add(typedef); // } } try (PrintWriter writer = new PrintWriter(new File(RecompileConfig.INSTANCE.outputDir, "gh_structs.h"), "UTF-8")) { headerGuardPre(writer, "STRUCTS"); writer.println("// AUTO-GENERATED FILE "); writer.println("#include "); // dumper.writeAll(writer); DataTypeWriter dtw = new DataTypeWriter(dtm, writer); dtw.blacklistedTypes = typeBlacklist; dtw.write(filteredTypes, monitor); headerGuardPost(writer, "STRUCTS"); } } void dumpGlobals(Hashtable globalSymbols) throws Exception { File globalSymbolsListH = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.h"); PrintWriter hwriter = new PrintWriter(globalSymbolsListH, "UTF-8"); hwriter.println("// AUTO-GENERATED FILE "); headerGuardPre(hwriter, "GLOBALS"); hwriter.println("#include "); hwriter.println(); File globalSymbolsListC = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.cxx"); PrintWriter cwriter = new PrintWriter(globalSymbolsListC, "UTF-8"); cwriter.println("// AUTO-GENERATED FILE "); cwriter.println("#include "); hwriter.println(); for (GlobalRec sym : globalSymbols.values()) { HighSymbol highSym = sym.highSymbol; DataType dt = highSym.getDataType(); String dataType = dt.getDisplayName(); String name = highSym.getName(); String sanitizedName = sanitizeFunctionName(highSym.getName()); if (!sanitizedName.equals(highSym.getName())) { println("Invalid global symbol name: " + highSym.getName() + " - " + highSym.getHighFunction().getFunction().getName()); } else { Address addr = sym.address; // println("Symbol: " + symbol + " Addr: " + addr + " Size:" + symSize + " " + // storage.getSerializationString()); try { String initBlk = " = "; boolean fullyDefinedType = false; if (dt instanceof AbstractStringDataType) { AbstractStringDataType sdt = (AbstractStringDataType) dt; dataType = "const char*"; // String type initBlk += "\"" + escapeCString(readCString(addr, 2048)) + "\""; fullyDefinedType = true; } else if (dt instanceof PointerDataType) { PointerDataType pdt = (PointerDataType) dt; DataType baseType = pdt.getDataType(); dataType = baseType.getDisplayName() + "*"; initBlk += "(" + dataType + ")&GH_MEM(0x" + addr + ")"; fullyDefinedType = true; } if (fullyDefinedType) { hwriter.println("extern " + dataType + " " + name + "; // " + addr); cwriter.println(dataType + " " + name + initBlk + "; // " + addr); } else { String refTypeStr = dt.getDisplayName() + "&"; hwriter.println("extern " + refTypeStr + " " + name + "; // " + addr); cwriter.println(dataType + " " + name + "= (" + refTypeStr + ") GH_MEM(0x" + addr + ");"); } } catch (Exception e) { println("Error processing global symbol: " + e); println("Symbol: " + highSym.getName() + " - " + addr + " - " + highSym.getHighFunction().getFunction().getName()); } } } headerGuardPost(hwriter, "GLOBALS"); hwriter.close(); cwriter.close(); } void decompileAll(List functions) throws Exception { Hashtable globalSymbols = new Hashtable<>(); for (Function function : functions) { decompileFunction(globalSymbols, function); } dumpStructureTypes(); dumpGlobals(globalSymbols); } @Override public void run() throws Exception { if (currentProgram == null) { return; } RecompileConfig.INSTANCE = new RecompileConfig(this); if (!new File(RecompileConfig.INSTANCE.outputDir).exists()) { throw new Exception("Output directory does not exist: " + RecompileConfig.INSTANCE.outputDir); } // Make sure to create output folders RecompileConfig.INSTANCE.dirDecompFix.mkdirs(); RecompileConfig.INSTANCE.dirDecompAuto.mkdirs(); buildFunctionBlacklist(); List functions = new ArrayList<>(); Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); while (functionsIt.hasNext()) { Function function = functionsIt.next(); if (!shouldDecompileFunction(function)) { continue; } functions.add(function); } decompileAll(functions); } String sanitizeFunctionName(String name) { return name.replaceAll("[^a-zA-Z0-9_]", "_"); } }