From 0dced8f5a6404f14c12107ecef404e4408f20714 Mon Sep 17 00:00:00 2001 From: Guus Waals <_@guusw.nl> Date: Sun, 15 Sep 2024 23:07:59 +0800 Subject: [PATCH] WIP Decompile script --- scripts/DecompileC.java | 397 +++++++++++++++++++++++++++++++++------- 1 file changed, 330 insertions(+), 67 deletions(-) diff --git a/scripts/DecompileC.java b/scripts/DecompileC.java index 098ba63c..bdbb5f26 100644 --- a/scripts/DecompileC.java +++ b/scripts/DecompileC.java @@ -1,76 +1,279 @@ +/* ### + */ +// Script to export decompiled C code from Ghidra -// Test script +// +//@category Examples.Demangler import java.io.File; +import java.io.FileNotFoundException; import java.io.PrintWriter; -import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Dictionary; +import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.List; - +import java.util.Scanner; +import java.util.Arrays; +import ghidra.app.cmd.label.AddLabelCmd; import ghidra.app.decompiler.DecompInterface; import ghidra.app.decompiler.DecompileResults; +import ghidra.app.decompiler.DecompiledFunction; import ghidra.app.script.GhidraScript; -import ghidra.app.script.GhidraScriptUtil; -import ghidra.program.model.address.GlobalSymbol; +import ghidra.docking.settings.Settings; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.AbstractStringDataType; import ghidra.program.model.data.DataType; +import ghidra.program.model.data.StringDataInstance; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.Variable; +import ghidra.program.model.listing.VariableStorage; +import ghidra.program.model.mem.MemoryAccessException; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.mem.MemoryBufferImpl; +import ghidra.program.model.pcode.HighFunction; import ghidra.program.model.pcode.HighSymbol; +import ghidra.program.model.pcode.PcodeOp; +import ghidra.program.model.pcode.PcodeOpAST; +import ghidra.program.model.pcode.Varnode; import ghidra.program.model.symbol.SourceType; import ghidra.program.model.symbol.Symbol; +import ghidra.program.model.symbol.SymbolTable; public class DecompileC extends GhidraScript { + public class PCallTracer { + public class QueueItem { + Function function; + int depth; + + QueueItem(Function function, int depth) { + this.function = function; + this.depth = depth; + } + } + + public List out = new ArrayList<>(); + public boolean trace = false; + List queue = new ArrayList<>(); + HashSet
visited = new HashSet<>(); + DecompInterface decomp; + + PCallTracer(DecompInterface decomp) { + this.decomp = decomp; + } + + public void setBlacklist(HashSet
blacklist) { + this.visited = new HashSet<>(blacklist); + } + + void visit(HighFunction highFunction, int depth) { + Iterator opIter = highFunction.getPcodeOps(); + while (opIter.hasNext()) { + PcodeOpAST op = opIter.next(); + if (op.getOpcode() == PcodeOp.CALL) { + Varnode target = op.getInput(0); + if (target.isAddress()) { + Address callAddr = target.getAddress(); + Function calledFunction = getFunctionAt(callAddr); + if (calledFunction == null) { + + println("PCallTracer, called function not found: " + op.toString() + " - " + + highFunction.getFunction().getName()); + continue; + } + if (!visited.contains(calledFunction.getEntryPoint())) { + queue.add(new QueueItem(calledFunction, depth + 1)); + } + } + } + } + } + + void visit(Function function, int depth) { + if (!visited.contains(function.getEntryPoint())) { + visited.add(function.getEntryPoint()); + if (trace) { + println("PCallTracer, visiting " + function.getName() + " (depth:" + depth + ")"); + } + DecompileResults decompRes = decomp.decompileFunction(function, TIMEOUT, monitor); + visit(decompRes.getHighFunction(), depth); + out.add(function); + } + } + + public void traceCalls(Function inFunction) { + queue.add(new QueueItem(inFunction, 0)); + while (queue.size() > 0) { + QueueItem item = queue.remove(0); + visit(item.function, item.depth); + } + } + } + private static final String OUTPUT_DIR = "game_re"; private static final int TIMEOUT = 10000; // Auto rename invalid symbols private static final boolean AUTO_RENAME_SYMBOLS = true; - @Override - public void run() throws Exception { - if (currentProgram == null) { + private static final HashSet ONLY_SYMBOLS = new HashSet<>(Arrays.asList( + "r3_main", // + "_strrchr")); + + File rootDir; + File outputDir; + File dirDecompAuto; + File dirDecompFix; + + HashSet
functionAddrBlackList = new HashSet<>(); + + void loadFunctionBlacklist() { + functionAddrBlackList.clear(); + + File blacklistFile = new File(outputDir, "blacklist.txt"); + try (Scanner scanner = new Scanner(blacklistFile)) { + while (scanner.hasNextLine()) { + String line = scanner.nextLine(); + // Strip comment + String line1 = line.split("//")[0].trim(); + // Deserialize address + Address addr = currentProgram.getAddressFactory().getAddress(line1); + functionAddrBlackList.add(addr); + } + println("Loaded blacklist with " + functionAddrBlackList.size() + " entries"); + } catch (FileNotFoundException e) { + println("No blacklist found"); return; } - // Make sure to create OUTPUT_PATH - File rootDir = new File(sourceFile.getAbsolutePath()).getParentFile().getParentFile(); - File outputDir = new File(rootDir, OUTPUT_DIR); + // for (Address a : functionAddrBlackList) { + // Function fn = getFunctionAt(a); + // println("In blacklist: " + a + " (" + (fn != null ? fn.getName() : "unknown") + // + ")"); + // } + } - if (!outputDir.exists()) { - throw new Exception("Output directory does not exist: " + outputDir.getCanonicalPath()); - } + void saveFunctionBlacklist() { + println("Saving blacklist"); - File dirDecompAuto = new File(outputDir, "gh_auto"); - File dirDecompFix = new File(outputDir, "gh_fix"); - - println("Output path: " + outputDir.getCanonicalPath()); - - DecompInterface decomp = new DecompInterface(); - decomp.openProgram(currentProgram); - - List functions = new ArrayList<>(); - List uniqueVars = new ArrayList<>(); - - Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); - while (functionsIt.hasNext()) { - Function function = functionsIt.next(); - String comment = function.getComment(); - if (comment != null && comment.contains("TODO")) { - println("Function: " + function.getName() + " - " + comment); + File blacklistFile = new File(outputDir, "blacklist.txt"); + try (PrintWriter writer = new PrintWriter(blacklistFile)) { + for (Address addr : functionAddrBlackList) { + writer.println(addr.toString() + " // " + getFunctionAt(addr).getName()); } - functions.add(function); + } catch (FileNotFoundException e) { + println("Error saving blacklist: " + e.getMessage()); + } + } - for (Variable var : function.getAllVariables()) { - uniqueVars.add(var); + boolean shouldDecompileFunction(Function function) { + if (ONLY_SYMBOLS != null && !ONLY_SYMBOLS.contains(function.getName())) { + return false; + } + return !functionAddrBlackList.contains(function.getEntryPoint()); + } + + static final boolean BUILD_BLACKLIST = true; + + void buildFunctionBlacklist(DecompInterface decomp) { + loadFunctionBlacklist(); + + if (BUILD_BLACKLIST) { + boolean modified = false; + + Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); + while (functionsIt.hasNext()) { + Function function = functionsIt.next(); + if (functionAddrBlackList.contains(function.getEntryPoint())) { + continue; + } + + String comment = function.getComment(); + boolean isIgnoredFunction = false; + if (comment != null && comment.contains("Library Function")) { + println("Adding library function " + function.getName() + " to blacklist"); + println("ac:" + functionAddrBlackList.size() + " jj:" + + functionAddrBlackList.contains(function.getEntryPoint()) + " " + function.getEntryPoint()); + isIgnoredFunction = true; + } + + if (function.getName().startsWith("crt_")) { + println("Adding crt function " + function.getName() + " to blacklist"); + isIgnoredFunction = true; + } + + if (isIgnoredFunction) { + // Decompile and trace + PCallTracer tracer = new PCallTracer(decomp); + tracer.setBlacklist(functionAddrBlackList); + tracer.traceCalls(function); + for (Function f : tracer.out) { + println(" Adding " + f.getName() + " to blacklist"); + functionAddrBlackList.add(f.getEntryPoint()); + modified = true; + } + } + } + + if (modified) { + saveFunctionBlacklist(); } } + } + void sanitizeGlobalSymbolsPass(DecompInterface decomp, List functions) { + Hashtable globalSymbols = new Hashtable<>(); + + for (Function function : functions) { + println("Processing global symbols for " + function.getName()); + + DecompileResults decompRes = decomp.decompileFunction(function, TIMEOUT, monitor); + Iterator smyIt = decompRes.getHighFunction().getGlobalSymbolMap().getSymbols(); + + HighSymbol gsym = smyIt.next(); + if (globalSymbols.containsKey(gsym.getName())) + continue; + println("GLOBAL: " + gsym.getName()); + String sanitizedName = sanitizeFunctionName(gsym.getName()); + if (!sanitizedName.equals(gsym.getName())) { + if (AUTO_RENAME_SYMBOLS) { + Symbol symbol = gsym.getSymbol(); + VariableStorage storage = gsym.getStorage(); + Address addr = storage.getMinAddress(); + println("Renaming global symbol: " + gsym.getName() + " (" + addr + + ") -> " + sanitizedName); + if (symbol != null) { + AddLabelCmd cmd = new AddLabelCmd(addr, sanitizedName, symbol.getParentNamespace(), + SourceType.USER_DEFINED); + if (cmd.applyTo(currentProgram)) { + println("Renamed global symbol: " + gsym.getName() + " -> " + sanitizedName); + } else { + println("Error renaming symbol: " + cmd.getStatusMsg()); + } + } else { + println("Symbol is null: " + gsym.getName() + " - " + function.getName()); + } + } else { + println("Invalid global symbol name: " + gsym.getName() + " - " + function.getName()); + } + } + } + } + + String escapeCString(String str) { + str = str.replaceAll("\"", "\\\""); + str = str.replaceAll("\n", "\\n"); + str = str.replaceAll("\r", "\\r"); + str = str.replaceAll("\t", "\\t"); + str = str.replaceAll("\b", "\\b"); + str = str.replaceAll("\f", "\\f"); + str = str.replaceAll("\0", "\\0"); + return str; + } + + void decompileAll(DecompInterface decomp, List functions) throws Exception { Hashtable globalSymbols = new Hashtable<>(); - File functionList = new File(outputDir, "functions.txt"); - PrintWriter writer = new PrintWriter(functionList, "UTF-8"); for (Function function : functions) { String fileName = sanitizeFunctionName(function.getName()) + ".c"; @@ -85,60 +288,120 @@ public class DecompileC extends GhidraScript { f0.delete(); } + println("Processing " + function.getName() + " => " + f0.toString()); + DecompileResults decompRes = decomp.decompileFunction(function, TIMEOUT, monitor); PrintWriter writer2 = new PrintWriter(f0, "UTF-8"); + writer2.println("// AUTO-GENERATED FILE, MOVE TO 'gh_fix' FOLDER PREVENT OVERWRITING!!!!! "); + writer2.println("// " + function.getEntryPoint()); + writer2.println(); + writer2.println("#include "); + writer2.println(); writer2.println(decompRes.getDecompiledFunction().getC()); + writer2.close(); + // Collect referenced global symbols Iterator smyIt = decompRes.getHighFunction().getGlobalSymbolMap().getSymbols(); while (smyIt.hasNext()) { HighSymbol gsym = smyIt.next(); if (globalSymbols.containsKey(gsym.getName())) continue; - println("GLOBAL: " + gsym.getName()); - String sanitizedName = sanitizeFunctionName(gsym.getName()); - if (!sanitizedName.equals(gsym.getName())) { - if (AUTO_RENAME_SYMBOLS) { - println("Renaming global symbol: " + gsym.getName() + " -> " + sanitizedName); - Symbol symbol = gsym.getSymbol();// currentProgram.getSymbolTable().getSymbol(gsym.getName()); - if (symbol != null) { - try { - symbol.setName(sanitizedName, SourceType.USER_DEFINED); - println("Renamed global symbol: " + gsym.getName() + " -> " + sanitizedName); - } catch (Exception e) { - println("Error renaming symbol: " + e.getMessage()); - } - } else { - println("Could not find symbol to rename: " + gsym.getName()); - } - } - println("Invalid global symbol name: " + gsym.getName() + " - " + function.getName()); - } globalSymbols.put(gsym.getName(), gsym); } } writer.close(); - File uniqueVarsList = new File(outputDir, "unique_vars.txt"); - writer = new PrintWriter(uniqueVarsList, "UTF-8"); - for (Variable var : uniqueVars) { - writer.println(var.getName()); - } - writer.close(); - - File globalSymbolsListH = new File(outputDir, "global.h"); - File globalSymbolsListC = new File(outputDir, "global.c"); + File globalSymbolsListH = new File(outputDir, "gh_global.h"); + File globalSymbolsListC = new File(outputDir, "gh_global.c"); PrintWriter hwriter = new PrintWriter(globalSymbolsListH, "UTF-8"); PrintWriter cwriter = new PrintWriter(globalSymbolsListC, "UTF-8"); for (HighSymbol sym : globalSymbols.values()) { DataType dt = sym.getDataType(); - hwriter.println("extern " + dt.toString() + " " + sym.getName() + ";"); - cwriter.println(sym.getDataType().toString() + " " + sym.getName() + " {}"); + String sanitizedName = sanitizeFunctionName(sym.getName()); + if (!sanitizedName.equals(sym.getName())) { + println("Invalid global symbol name: " + sym.getName() + " - " + sym.getHighFunction().getFunction().getName()); + } else { + Address address = sym.getStorage().getMinAddress(); + MemoryBlock block = currentProgram.getMemory().getBlock(address); + String dataType = dt.toString(); + String name = sym.getName(); + + if (block == null) { + println("Can not read variable " + name + " (" + dataType + ") at " + address); + continue; + } + + if (dt instanceof AbstractStringDataType) { + // String type + hwriter.println("extern " + dataType + " " + name + "; // " + address); + + String srcBlock = ""; + // Read the actual string data from Ghidra + if (block != null && block.isInitialized()) { + byte[] bytes = new byte[dt.getLength()]; + block.getBytes(address, bytes); + // Parse from UTF-8 + String stringValue = new String(bytes, StandardCharsets.UTF_8); + srcBlock = dataType + " " + name + " = \"" + escapeCString(stringValue) + "\";"; + } else { + } + cwriter.println(srcBlock + " // " + address); + } + } } hwriter.close(); cwriter.close(); } + @Override + public void run() throws Exception { + if (currentProgram == null) { + return; + } + + // Make sure to create OUTPUT_PATH + rootDir = new File(sourceFile.getAbsolutePath()).getParentFile().getParentFile(); + outputDir = new File(rootDir, OUTPUT_DIR); + + if (!outputDir.exists()) { + throw new Exception("Output directory does not exist: " + outputDir.getCanonicalPath()); + } + + dirDecompAuto = new File(outputDir, "gh_auto"); + dirDecompFix = new File(outputDir, "gh_fix"); + + println("Output path: " + outputDir.getCanonicalPath()); + + DecompInterface decomp = new DecompInterface(); + decomp.openProgram(currentProgram); + + buildFunctionBlacklist(decomp); + + List functions = new ArrayList<>(); + + Iterator functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator(); + while (functionsIt.hasNext()) { + Function function = functionsIt.next(); + if (!shouldDecompileFunction(function)) { + continue; + } + + functions.add(function); + } + + // File functionList = new File(outputDir, "functions.txt"); + // PrintWriter writer = new PrintWriter(functionList, "UTF-8"); + + int mode = 1; + if (mode == 0) { // Sanitize symbols + sanitizeGlobalSymbolsPass(decomp, functions); + } else if (mode == 1) { // Decompile all functions + decompileAll(decomp, functions); + + } + } + String sanitizeFunctionName(String name) { return name.replaceAll("[^a-zA-Z0-9_]", "_"); }