reman3/scripts/Decompile.java

471 lines
17 KiB
Java

// Script to export decompiled C code from Ghidra
// @category _Reman3
// @menupath Reman3.Decompile All
import java.io.File;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Arrays;
import java.util.Dictionary;
import ghidra.app.decompiler.ClangFieldToken;
import ghidra.app.decompiler.ClangLine;
import ghidra.app.decompiler.ClangSyntaxToken;
import ghidra.app.decompiler.ClangToken;
import ghidra.app.decompiler.DecompileResults;
import ghidra.app.decompiler.PrettyPrinter;
import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
import ghidra.program.model.data.AbstractStringDataType;
import ghidra.program.model.data.Array;
import ghidra.program.model.data.ArrayDataType;
import ghidra.program.model.data.BitFieldDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DataTypeComponent;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.data.EnumDataType;
import ghidra.program.model.data.PointerDataType;
import ghidra.program.model.data.ProgramBasedDataTypeManager;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TypedefDataType;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.VariableStorage;
import ghidra.program.model.pcode.HighFunction;
import ghidra.program.model.pcode.HighSymbol;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
import ghidra.program.model.symbol.Symbol;
import ghidra.util.task.TaskMonitor;
import re3lib.*;
public class Decompile extends GhidraScript {
// Auto rename invalid symbols
private static final boolean AUTO_RENAME_SYMBOLS = true;
private static final HashSet<String> ONLY_SYMBOLS = new HashSet<>(Arrays.asList(
"r3_main",
"r3_get_gli_width",
"r3_get_gli_height",
"r3_setup_dx8",
"r3_setup_window",
"r3_contains_cmd_line",
"r3_module0_init",
"r3_noop",
"gfx_init2",
"spawn_thread",
"FUN_004464f0",
"FUN_00401320",
"thunk_FUN_00401410"
));
// private static final HashSet<String> ONLY_SYMBOLS = null;
static final boolean BUILD_BLACKLIST = true;
HashSet<Address> functionAddrBlackList = new HashSet<>();
boolean shouldDecompileFunction(Function function) {
if (ONLY_SYMBOLS != null && !ONLY_SYMBOLS.contains(function.getName())) {
return false;
}
return !functionAddrBlackList.contains(function.getEntryPoint());
}
void buildFunctionBlacklist() {
functionAddrBlackList = Utils.loadFunctionBlacklist(RecompileConfig.INSTANCE.functionBlacklistPath);
if (BUILD_BLACKLIST) {
boolean modified = false;
Iterator<Function> functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator();
while (functionsIt.hasNext()) {
Function function = functionsIt.next();
if (functionAddrBlackList.contains(function.getEntryPoint())) {
continue;
}
String comment = function.getComment();
boolean isIgnoredFunction = false;
if (comment != null && comment.contains("Library Function")) {
println("Adding library function " + function.getName() + " to blacklist");
println("ac:" + functionAddrBlackList.size() + " jj:"
+ functionAddrBlackList.contains(function.getEntryPoint()) + " "
+ function.getEntryPoint());
isIgnoredFunction = true;
}
if (function.getName().startsWith("crt_")) {
println("Adding crt function " + function.getName() + " to blacklist");
isIgnoredFunction = true;
}
if (isIgnoredFunction) {
// Decompile and trace
PCallTracer tracer = new PCallTracer();
tracer.setBlacklist(functionAddrBlackList);
tracer.traceCalls(function);
for (Function f : tracer.out) {
println(" Adding " + f.getName() + " to blacklist");
functionAddrBlackList.add(f.getEntryPoint());
modified = true;
}
}
}
if (modified) {
Utils.saveFunctionBlacklist(functionAddrBlackList, RecompileConfig.INSTANCE.functionBlacklistPath);
}
}
}
String escapeCString(String str) {
str = str.replace("\\", "\\\\");
str = str.replace("\"", "\\\"");
return str;
}
String readCString(Address addr, int maxLen) throws Exception {
StringBuilder sb = new StringBuilder();
int ofs = 0;
while (true) {
Address read = addr.add(ofs++);
// println("Reading: " + read);
byte b = currentProgram.getMemory().getByte(read);
// println("Read: " + b);
if (b == 0 || ofs >= maxLen) {
break;
}
sb.append((char) b);
}
if (sb.length() > 0) {
// println("STR \"" + sb.toString() + "\"");
}
return sb.toString();
}
public class GlobalRec {
public HighSymbol highSymbol;
public Address address;
// public boolean isFullyDefined;
public GlobalRec(HighSymbol highSymbol, Address address) {
this.highSymbol = highSymbol;
this.address = address;
// this.isFullyDefined = isFullyDefined;
}
};
static final Pattern fieldAccessRegex = Pattern.compile("^_([0-9]+)_([0-9]+)_$");
void decompileFunction(Hashtable<String, GlobalRec> outGlobalSymbols, Function function)
throws Exception {
String fileName = sanitizeFunctionName(function.getName()) + ".cxx";
File f0 = new File(RecompileConfig.INSTANCE.dirDecompFix, fileName);
if (f0.exists()) {
println("Func " + function.getName() + " skipped (gh_fix)");
f0 = new File(RecompileConfig.INSTANCE.dirDecompRef, fileName);
} else {
f0 = new File(RecompileConfig.INSTANCE.dirDecompAuto, fileName);
if (f0.exists()) {
f0.delete();
}
}
println("Processing " + function.getName() + " => " + f0.toString());
DecompileResults decompRes = RecompileConfig.INSTANCE.decompCache.getOrInsert(function);
try (PrintWriter writer2 = new PrintWriter(f0, "UTF-8")) {
writer2.println("// AUTO-GENERATED FILE, MOVE TO 'gh_fix' FOLDER PREVENT OVERWRITING!!!!! ");
writer2.println();
writer2.println("#include <gh_auto_binder.h>");
writer2.println("#include \"../gh_global.h\"");
writer2.println();
// decompRes.get
HighFunction highFunction = decompRes.getHighFunction();
// Remap for dynamic symbols
// Dictionary<String, String> symbolRemap = new Hashtable<>();
HashSet<String> headers = new HashSet<>();
StringWriter codeWriter = new StringWriter();
PrettyPrinter pp = new PrettyPrinter(decompRes.getFunction(), decompRes.getCCodeMarkup(), null);
Iterator<ClangLine> lines = pp.getLines().iterator();
while (lines.hasNext()) {
ClangLine line = lines.next();
for (int i = 0; i < line.getIndent(); i++) {
codeWriter.write(' ');
}
List<ClangToken> tokens = new ArrayList<>();
// Parse preliminary line tokens
for (int i = 0; i < line.getNumTokens(); i++) {
ClangToken token = line.getToken(i);
if (!token.getText().isEmpty())
tokens.add(token);
}
// Preprocess tokens
boolean prevDot = false;
for (int t = 0; t < tokens.size(); t++) {
ClangToken token = tokens.get(t);
boolean thisDot = false;
// println("Token: " + token.toString());
if (token.toString().equals(".")) {
// println("Found dot: " + token.toString() + " - " + token.getClass());
thisDot = true;
}
if (prevDot) {
// println("Possible field access: " + token.getText());
if (token instanceof ClangSyntaxToken) {
// Parse _4_4_ sub-access using regex
String text = token.getText();
Matcher matcher = fieldAccessRegex.matcher(text);
if (matcher.matches()) {
int offset = Integer.parseInt(matcher.group(1));
int size = Integer.parseInt(matcher.group(2));
// println("MATCHED: " + token.getText() + " - " + token.getSyntaxType() + " - "
// + token.getVarnode() + " - "
// + token.getPcodeOp());
// Replace tokens with + Field<offset, size>
ClangToken replacement = new ClangToken(token.Parent(), " + Field<" + offset + ", " + size + ">()");
tokens.remove(t);
tokens.remove(t - 1);
tokens.add(t - 1, replacement);
t--;
}
}
}
// Extract memory references
HighSymbol gsym = token.getHighSymbol(highFunction);
if (gsym != null) {
var symStorage = gsym.getStorage();
var sym = gsym.getSymbol();
Address address;
if (symStorage.isUnassignedStorage()) {
address = sym.getAddress();
} else {
address = gsym.getStorage().getMinAddress();
}
if (address.isMemoryAddress()) {
outGlobalSymbols.put(gsym.getName(), new GlobalRec(gsym, address));
}
}
// Extract external function calls
PcodeOp op = token.getPcodeOp();
if (op != null && op.getOpcode() == PcodeOp.CALL) {
// println("PcodeOp: " + op.toString() + " - " + op.getInput(0).toString());
Varnode target = op.getInput(0);
if (target.isAddress()) {
Address callAddr = target.getAddress();
Function calledFunction = getFunctionAt(callAddr);
if (calledFunction != null) {
if (!functionAddrBlackList.contains(calledFunction.getEntryPoint())) {
// println("Adding header: " + calledFunction + " / " +
// calledFunction.getSignature().getPrototypeString(true));
headers.add("extern " + calledFunction.getSignature().getPrototypeString(true)
+ "; // " + calledFunction.getEntryPoint() + " // "
+ calledFunction.getName());
}
}
}
}
prevDot = thisDot;
}
// Print tokens
for (int t = 0; t < tokens.size(); t++) {
ClangToken token = tokens.get(t);
codeWriter.write(token.toString());
}
codeWriter.write('\n');
}
for (String header : headers) {
writer2.println(header);
}
writer2.println();
writer2.println("// " + function.getEntryPoint());
writer2.print(codeWriter.toString());
writer2.println();
// Iterator<ClangToken> it = decompRes.getCCodeMarkup().tokenIterator(true);
// int ln = 0;
// while(it.hasNext()) {
// ClangToken token = it.next();
// ClangLine line = token.getLineParent();
// while (line != null && ln < line.getLineNumber()) {
// writer2.println();
// ln++;
// }
// writer2.print(token.toString());
// }
// Collect referenced global symbols
// Iterator<HighSymbol> smyIt = highFunction.getGlobalSymbolMap().getSymbols();
// while (smyIt.hasNext()) {
// HighSymbol gsym = smyIt.next();
// Address addr = gsym.getSymbol().getAddress();
// println("FunctionSym " + addr + " " + gsym.getName() + " " +
// gsym.getStorage().getMinAddress());
// println(" IsMem: " + gsym.getStorage().isMemoryStorage() + " " +
// gsym.getStorage().getSerializationString());
// if (outGlobalSymbols.containsKey(gsym.getName()))
// continue;
// outGlobalSymbols.put(gsym.getName(), gsym);
// }
}
}
void headerGuardPre(PrintWriter writer, String tag) {
writer.println("#ifndef GH_GENERATED_" + tag + "_H");
writer.println("#define GH_GENERATED_" + tag + "_H");
writer.println();
}
void headerGuardPost(PrintWriter writer, String tag) {
writer.println("#endif // GH_GENERATED_" + tag + "_H");
}
void dumpGlobals(Hashtable<String, GlobalRec> globalSymbols) throws Exception {
File globalSymbolsListH = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.h");
PrintWriter hwriter = new PrintWriter(globalSymbolsListH, "UTF-8");
hwriter.println("// AUTO-GENERATED FILE ");
headerGuardPre(hwriter, "GLOBALS");
hwriter.println("#include <gh_global_binder.h>");
hwriter.println();
File globalSymbolsListC = new File(RecompileConfig.INSTANCE.outputDir, "gh_global.cxx");
PrintWriter cwriter = new PrintWriter(globalSymbolsListC, "UTF-8");
cwriter.println("// AUTO-GENERATED FILE ");
cwriter.println("#include <gh_global_binder.h>");
hwriter.println();
for (GlobalRec sym : globalSymbols.values()) {
HighSymbol highSym = sym.highSymbol;
DataType dt = highSym.getDataType();
String dataType = dt.getDisplayName();
String sanitizedName = sanitizeFunctionName(highSym.getName());
String name = highSym.getName();
if (!sanitizedName.equals(name)) {
println("Invalid global symbol name: " + name);
name = sanitizedName;
}
Address addr = sym.address;
// println("Symbol: " + symbol + " Addr: " + addr + " Size:" + symSize + " " +
// storage.getSerializationString());
try {
String initBlk = " = ";
boolean fullyDefinedType = false;
if (dt instanceof AbstractStringDataType) {
AbstractStringDataType sdt = (AbstractStringDataType) dt;
dataType = "const char*";
// String type
initBlk += "\"" + escapeCString(readCString(addr, 2048)) + "\"";
fullyDefinedType = true;
} else if (dt instanceof PointerDataType) {
PointerDataType pdt = (PointerDataType) dt;
DataType baseType = pdt.getDataType();
dataType = baseType.getDisplayName() + "*";
initBlk += "(" + dataType + ")&GH_MEM(0x" + addr + ")";
fullyDefinedType = true;
}
if (fullyDefinedType) {
hwriter.println("extern " + dataType + " " + name + "; // " + addr);
cwriter.println(dataType + " " + name + initBlk + "; // " + addr);
} else {
if (dt instanceof Array) {
// println("Array: " + dt.getDisplayName() + " - " + addr + " - " +
// dt.getClass().getSimpleName());
Array adt = (Array) dt;
DataType baseType = adt.getDataType();
hwriter.println(
"extern " + baseType.getDisplayName() + "(&" + name + ")[" + adt.getNumElements() + "]; // " + addr);
cwriter.println(
baseType.getDisplayName() + "(&" + name + ")[" + adt.getNumElements() + "] = *reinterpret_cast<"
+ baseType.getDisplayName() + "(*)[" + adt.getNumElements() + "]>(GH_MEM(0x" + addr + "));");
} else {
String refTypeStr = dt.getDisplayName() + "&";
hwriter.println("extern " + refTypeStr + " " + name + "; // " + addr);
cwriter.println(dataType + " " + name + "= (" + refTypeStr + ") GH_MEM(0x" + addr + ");");
}
}
} catch (Exception e) {
println("Error processing global symbol: " + e);
println("Symbol: " + highSym.getName() + " - " + addr + " - "
+ highSym.getHighFunction().getFunction().getName());
}
}
headerGuardPost(hwriter, "GLOBALS");
hwriter.close();
cwriter.close();
}
void decompileAll(List<Function> functions) throws Exception {
Hashtable<String, GlobalRec> globalSymbols = new Hashtable<>();
for (Function function : functions) {
decompileFunction(globalSymbols, function);
}
dumpStructureTypes();
dumpGlobals(globalSymbols);
}
@Override
public void run() throws Exception {
if (currentProgram == null) {
return;
}
RecompileConfig.INSTANCE = new RecompileConfig(this);
if (!new File(RecompileConfig.INSTANCE.outputDir).exists()) {
throw new Exception("Output directory does not exist: " + RecompileConfig.INSTANCE.outputDir);
}
// Make sure to create output folders
RecompileConfig.INSTANCE.dirDecompFix.mkdirs();
RecompileConfig.INSTANCE.dirDecompAuto.mkdirs();
RecompileConfig.INSTANCE.dirDecompRef.mkdirs();
buildFunctionBlacklist();
List<Function> functions = new ArrayList<>();
Iterator<Function> functionsIt = currentProgram.getFunctionManager().getFunctions(true).iterator();
while (functionsIt.hasNext()) {
Function function = functionsIt.next();
if (!shouldDecompileFunction(function)) {
continue;
}
functions.add(function);
}
decompileAll(functions);
}
String sanitizeFunctionName(String name) {
return name.replaceAll("[^a-zA-Z0-9_]", "_");
}
}