//Generates a PDB containing public symbols and type information derived from ghidra's database //@author Brett Wandel //@category Windows //@keybinding ctrl G //@menupath Tools.Generate PDB //@toolbar import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Iterator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FilenameUtils; import com.google.gson.JsonArray; import com.google.gson.JsonObject; import generic.util.Path; import ghidra.app.script.GhidraScript; import ghidra.app.services.ConsoleService; import ghidra.app.util.bin.ByteProvider; import ghidra.app.util.bin.MemoryByteProvider; import ghidra.app.util.bin.format.pdb.PdbParserConstants; import ghidra.app.util.bin.format.pe.FileHeader; import ghidra.app.util.bin.format.pe.NTHeader; import ghidra.app.util.bin.format.pe.OptionalHeader; import ghidra.app.util.bin.format.pe.PortableExecutable; import ghidra.app.util.pdb.PdbProgramAttributes; import ghidra.framework.options.Options; import ghidra.framework.preferences.Preferences; import ghidra.program.model.address.Address; import ghidra.program.model.data.*; import ghidra.program.model.data.Enum; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.FunctionManager; import ghidra.program.model.listing.FunctionSignature; import ghidra.program.model.listing.Program; import ghidra.program.model.mem.Memory; import ghidra.program.model.symbol.Symbol; import ghidra.program.model.symbol.SymbolType; import ghidra.util.exception.CancelledException; import pdb.PdbPlugin; import pdb.symbolserver.SymbolFileInfo; import pdb.symbolserver.SymbolServerInstanceCreatorContext; import pdb.symbolserver.SymbolServerInstanceCreatorRegistry; import pdb.symbolserver.SymbolServerService; import pdb.symbolserver.SymbolStore; import re3lib.RemanConfig; public class PdbGen extends GhidraScript { public final static String PREFERENCE_SYMBOL_OUTPUT_PATH = "PDBGEN_SYMBOL_OUTPUT_PATH"; public String OVERRIDE_SYMBOL_OUTPUT_PATH = null; // Note: we are manually serializing json here, this is just to avoid any // dependencies. // this means it will break if we have any fields that need escaping. Map typedefs = new HashMap(); List serialized = new ArrayList(); Map forwardDeclared = new HashMap(); Map entrypoints = new HashMap(); private boolean isSerialized(DataType dt) { String id = GetId(dt); return isSerialized(id); } private boolean isSerialized(String id) { return serialized.contains(id); } private void setSerialized(DataType dt) { String id = GetId(dt); serialized.add(id); } private String GetIdUnmapped(DataType dt) { if (dt == null) { // Not sure if this should be LF_NULLLEAF (0x0009) // using no type (0x0000) first, this might need to change return "0x0000"; // uncharacterized type (no type) } // FML... this needs to be fixed at some point. String name = dt.getPathName(); // if (name.contains("-")) { // name = name.split("-")[0]; // } // // if (name == "/undefined") { // // this should be done as a typedef, but we can't get "/undefined" by path // for some reason. // return "0x0003"; // } // some BitFieldDataTypes do not have a source archive... no idea why SourceArchive source = dt.getSourceArchive(); if (source != null) { name = String.format("%s:%s", dt.getSourceArchive().getName(), name); } // a BitField does not have a unique name, so we create one // The hashCode is based on basetype.hashcode, bitOffset and bitSize... // so basetype.name:bitSize:bitOffset should be unique. if (dt instanceof BitFieldDataType) { name = String.format("%s:%d", name, ((BitFieldDataType) dt).getBitOffset()); } // some types don't have UniversalIDs, so we use the name instead return name; } private String GetId(DataType dt) { String key = GetIdUnmapped(dt); // follow the typedefs to the original type. while (typedefs.containsKey(key)) { assert key != typedefs.get(key); key = typedefs.get(key); } return key; } // Get the new ID for a type that has been forward declared. private String GetFwdId(DataType dt) { String id = GetId(dt); if (!forwardDeclared.containsKey(id)) { String alias = UUID.randomUUID().toString(); forwardDeclared.put(id, alias); } return forwardDeclared.get(id); } private JsonObject dump(Pointer x) { if (!isSerialized(x.getDataType())) return null; JsonObject json = new JsonObject(); json.addProperty("id", GetId(x)); json.addProperty("type", "LF_POINTER"); json.addProperty("referent_type", GetId(x.getDataType())); return json; } private JsonObject dump(Array x) { if (!isSerialized(x.getDataType())) return null; JsonObject json = new JsonObject(); json.addProperty("id", GetId(x)); json.addProperty("type", "LF_ARRAY"); // TODO currently this is set to QWORD, is this different for x86/x64? json.addProperty("index_type", "0x0077"); json.addProperty("element_type", GetId(x.getDataType())); json.addProperty("size", x.getLength()); return json; } private JsonObject dump(Union x) { JsonArray members = new JsonArray(); for (DataTypeComponent dt : x.getComponents()) { if (!isSerialized(dt.getDataType())) return null; JsonObject json = new JsonObject(); json.addProperty("type", "LF_MEMBER"); // TODO currently this is set to QWORD, is this different for x86/x64? json.addProperty("name", dt.getFieldName()); json.addProperty("type_id", GetId(dt.getDataType())); json.addProperty("offset", dt.getOffset()); json.add("attributes", new JsonArray()); members.add(json); } JsonObject json = new JsonObject(); json.addProperty("id", GetFwdId(x)); json.addProperty("type", "LF_UNION"); json.addProperty("name", x.getName()); json.addProperty("unique_name", GetFwdId(x)); json.addProperty("size", x.getLength()); json.add("fields", members); json.add("options", new JsonArray()); return json; } private JsonObject dump(Enum x) { JsonArray fields = new JsonArray(); for (long value : x.getValues()) { JsonObject json = new JsonObject(); json.addProperty("name", x.getName(value)); json.addProperty("value", value); fields.add(json); } JsonObject json = new JsonObject(); json.addProperty("id", GetFwdId(x)); json.addProperty("type", "LF_ENUM"); json.addProperty("size", x.getLength()); json.addProperty("underlying_type", "0x0074"); json.addProperty("name", x.getName()); json.addProperty("unique_name", GetFwdId(x)); json.add("fields", fields); json.add("options", new JsonArray()); return json; } private JsonObject dump(Structure x) { JsonArray fields = new JsonArray(); for (DataTypeComponent dt : x.getComponents()) { if (!isSerialized(dt.getDataType())) { return null; } JsonObject json = new JsonObject(); json.addProperty("type", "LF_MEMBER"); json.addProperty("type_id", GetId(dt.getDataType())); json.addProperty("offset", dt.getOffset()); json.add("attributes", new JsonArray()); if (dt.getFieldName() == null) { json.addProperty("name", dt.getDefaultFieldName()); } else { json.addProperty("name", dt.getFieldName()); } if (dt.isBitFieldComponent()) { // TODO implement this // BitFieldDataType bfdt = (BitFieldDataType) dt.getDataType(); } fields.add(json); } JsonObject json = new JsonObject(); json.addProperty("id", GetFwdId(x)); json.addProperty("type", "LF_STRUCTURE"); json.addProperty("name", x.getName()); json.addProperty("size", x.getLength()); json.addProperty("unique_name", GetFwdId(x)); json.add("options", new JsonArray()); json.add("fields", fields); return json; } private JsonObject dump(BitFieldDataType x) { if (!isSerialized(x.getBaseDataType())) return null; JsonObject json = new JsonObject(); json.addProperty("id", GetId(x)); json.addProperty("type", "LF_BITFIELD"); json.addProperty("type_id", GetId(x.getBaseDataType())); json.addProperty("bit_offset", x.getBitOffset()); json.addProperty("bit_size", x.getBitSize()); return json; } private List dump(FunctionDefinition x) { // // There should be a good way of determining class, but I haven't found it // yet // // So instead I'm just gonna check calling convention and lookup the type // manually. // if (x.getGenericCallingConvention() == GenericCallingConvention.thiscall) { // DataType clz = x.getArguments()[0].getDataType(); // if (clz instanceof Pointer) { // clz = ((Pointer) clz).getDataType(); // } // printf("%s::%s()\n", clz.getName(), x.getName()); // } // printf("function [%s] %s %s", x.getName(), GetId(x), x.getClass().getName()); // we wait (return null) until we have dumped all the dependant types if (!isSerialized(x.getReturnType())) return null; JsonArray parameters = new JsonArray(); for (ParameterDefinition p : x.getArguments()) { if (!isSerialized(p.getDataType())) return null; parameters.add(GetId(p.getDataType())); } List entries = new ArrayList(); JsonObject json = new JsonObject(); json.addProperty("type", "LF_PROCEDURE"); json.addProperty("id", GetId(x)); json.addProperty("name", x.getName()); json.addProperty("return_type", GetId(x.getReturnType())); String callingConvention = x.getCallingConventionName(); json.addProperty("calling_convention", callingConvention); json.add("options", new JsonArray()); json.add("parameters", parameters); entries.add(json); if (!(callingConvention.equals("thiscall") || callingConvention.equals("cdecl") || callingConvention.equals("fastcall") || callingConvention.equals("stdcall"))) { return null; } json = new JsonObject(); // We are creating a new id here just so it flows through our pipeline correctly // with the rest of the types. json.addProperty("type", "LF_FUNC_ID"); json.addProperty("id", UUID.randomUUID().toString()); json.addProperty("name", x.getName()); json.addProperty("function_type", GetId(x)); json.addProperty("parent_scope", "0x0000"); // placeholder entries.add(json); return entries; } private JsonObject dump(TypeDef dt) { DataType base = dt.getBaseDataType(); if (!isSerialized(base)) { return null; } typedefs.put(GetIdUnmapped(dt), GetIdUnmapped(dt.getBaseDataType())); JsonObject json = new JsonObject(); // json.addProperty("", null); return json; } private List toJson(DataType dt) { if (dt instanceof FunctionDefinition) { return dump((FunctionDefinition) dt); } List entries = new ArrayList(); JsonObject json = null; if (dt instanceof Pointer) { json = dump((Pointer) dt); } else if (dt instanceof BitFieldDataType) { json = dump((BitFieldDataType) dt); } else if (dt instanceof Array) { json = dump((Array) dt); } else if (dt instanceof Union) { json = dump((Union) dt); } else if (dt instanceof Enum) { json = dump((Enum) dt); } else if (dt instanceof Structure) { json = dump((Structure) dt); } else if (dt instanceof DefaultDataType) { // this is "undefined" which is predefined by codeview, so we will skip it here. return entries; } else if (dt instanceof TypeDef) { json = dump((TypeDef) dt); // Not required... we map typedefs to their underlying type before processing // the rest of the types // I have not found any CodeView type for typedefs, so we map the types (AFAIK // like the linker does). // implementing this *might* cleanup the output a little, but not sure if the // juice is worth the squeeze if (json == null) { return null; } else { return entries; } } else { printf("[PDBGEN] Unknown Type: id=%s, name=%s, class=%s\n", GetId(dt), dt.getName(), dt.getClass().getName()); } if (json == null) { return null; } entries.add(json); return entries; } public void printMissing(DataType dt) { if (dt instanceof BuiltIn) { if (dt instanceof PointerDataType) { printMissing((Pointer) dt); } else { printf("[PDBGEN] missing: BuiltIn '%s' missing, size=%d\n", GetIdUnmapped(dt), dt.getLength()); } } else if (dt instanceof FunctionDefinition) { printMissing((FunctionDefinition) dt); } else if (dt instanceof Pointer) { printMissing((Pointer) dt); } else if (dt instanceof Array) { printMissing((Array) dt); } else if (dt instanceof Structure) { printMissing((Structure) dt); } else if (dt instanceof Union) { printMissing((Union) dt); } else if (dt instanceof DefaultDataType) { printMissing((DefaultDataType) dt); } else if (dt instanceof TypeDef) { printMissing((TypeDef) dt); } else if (dt instanceof Enum) { printMissing((Enum) dt); } else if (dt instanceof BitFieldDataType) { printMissing((BitFieldDataType) dt); } else { printf("[PDBGEN] missing: Unknown data type id='%s', type=%s\n", GetIdUnmapped(dt), dt.getClass().getName()); } } public void printMissing(FunctionDefinition dt) { if (!isSerialized(dt.getReturnType())) { printf("[PDBGEN] missing: FunctionDefinition '%s' missing return type '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(dt.getReturnType())); } for (ParameterDefinition argument : dt.getArguments()) { if (!isSerialized(argument.getDataType())) { printf("[PDBGEN] missing: FunctionDefinition '%s' missing argument type '%s' for '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(argument.getDataType()), argument.getName()); } } } public void printMissing(Pointer dt) { if (!isSerialized(dt.getDataType())) { printf("[PDBGEN] missing: Pointer '%s' missing base type '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(dt.getDataType())); } } public void printMissing(Array dt) { if (!isSerialized(dt.getDataType())) { printf("[PDBGEN] missing: Array '%s' missing base type '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(dt.getDataType())); } } public void printMissing(Structure dt) { for (DataTypeComponent component : dt.getComponents()) { if (!isSerialized(component.getDataType())) { printf("[PDBGEN] missing: Structure '%s' missing component type '%s' for field '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(component.getDataType()), component.getFieldName()); } } } public void printMissing(Union dt) { for (DataTypeComponent component : dt.getComponents()) { if (!isSerialized(component.getDataType())) { printf("[PDBGEN] missing: Union '%s' missing component type '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(component.getDataType())); } } } public void printMissing(Enum dt) { printf("[PDBGEN] missing: Enum missing '%s'\n", GetIdUnmapped(dt)); } public void printMissing(DefaultDataType dt) { printf("[PDBGEN] missing: DefaultDataType missing '%s'\n", GetIdUnmapped(dt)); } public void printMissing(TypeDef dt) { if (!isSerialized(dt.getBaseDataType())) { printf("[PDBGEN] missing: TypeDef '%s' missing base type '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(dt.getBaseDataType())); } } public void printMissing(BitFieldDataType dt) { if (!isSerialized(dt.getBaseDataType())) { printf("[PDBGEN] missing: BitField '%s' missing base type '%s'\n", GetIdUnmapped(dt), GetIdUnmapped(dt.getBaseDataType())); } } public JsonArray toJson(List datatypes) throws CancelledException { monitor.setMessage("Extracting DataTypes"); monitor.initialize(datatypes.size()); monitor.setIndeterminate(false); monitor.setShowProgressValue(true); monitor.setCancelEnabled(true); // Build forward declarations for everything, basically because I'm lazy. // We should only need to add forward declarations for data types that have // cyclic dependencies. JsonArray json = buildForwardDeclarations(datatypes); // A naive ordered serialization. We continually iterate through the list, // serializing data types only once they have had all their dependencies // serialized. // we stop looping over the list once we fail to serialize at least one data // type. // Any data types that are missing dependent types will be left in the input // list. while (!datatypes.isEmpty()) { boolean changed = false; Iterator itr = datatypes.iterator(); while (itr.hasNext()) { monitor.checkCanceled(); DataType dt = itr.next(); List entries = toJson(dt); if (entries == null) { // printf("skipped: %s (%s)\n", dt.getName(), GetId(dt)); continue; // waiting for dependencies to added first } printf("[PDBGEN] dumped: id=%s, original=%s\n", GetId(dt), GetIdUnmapped(dt)); itr.remove(); for (JsonObject entry : entries) { json.add(entry); } setSerialized(dt); changed = true; monitor.incrementProgress(1); } if (!changed) { break; // we failed to remove any data types. } } for (DataType dt : datatypes) { printMissing(dt); } printf("[PDBGEN] missing: %d\n", datatypes.size()); return json; } public JsonArray buildForwardDeclarations(List datatypes) { // some data that is common to all forward declarations JsonArray fields = new JsonArray(); JsonArray options = new JsonArray(); options.add("forwardref"); JsonArray objs = new JsonArray(); for (DataType dt : datatypes) { JsonObject json = new JsonObject(); // the forward declared type and the actual type need different IDs // to make things easy, we use the original id in the forward declaration // so we do not need to rewrite the all the references. // We create a new Id for the actual type, because nothing else references it. json.addProperty("id", GetId(dt)); if (dt instanceof Enum) { json.addProperty("type", "LF_ENUM"); json.addProperty("underlying_type", "0x0000"); } else if (dt instanceof Union) { json.addProperty("type", "LF_UNION"); } else if (dt instanceof Structure) { json.addProperty("type", "LF_STRUCTURE"); } else { continue; // we do not need to forward declare this type } // PDB resolves forward declarations by looking for other types with the same // unique name, // if it does not find one, it will match on name instead. // I'm not sure if this can cause inconsistency if unique_name is not used... // To avoid issues, we use a uuid for the unique name to consistently match // correctly. json.addProperty("name", dt.getName()); json.addProperty("unique_name", GetFwdId(dt)); json.addProperty("size", 0); json.add("options", options); json.add("fields", fields); objs.add(json); setSerialized(dt); } return objs; } public List getAllDataTypes() { List datatypes = new ArrayList(); // this function, despite its name, does not return all datatypes :( // we are going to have to go find the missing ones. currentProgram.getDataTypeManager().getAllDataTypes(datatypes); // for some reason, Ghidra does not include BitField DataTypes in // getAllDataTypes, so we manually add them here. Iterator composites = currentProgram.getDataTypeManager().getAllComposites(); while (composites.hasNext()) { Composite composite = composites.next(); for (DataTypeComponent component : composite.getComponents()) { datatypes.add(component.getDataType()); } } // functions are not apart of the data type manager apparently. Iterator functions = currentProgram.getFunctionManager().getFunctionsNoStubs(true); while (functions.hasNext()) { Function function = functions.next(); if (function.isThunk()) continue; if (function.isExternal()) continue; FunctionSignature signature = function.getSignature(); if (signature instanceof FunctionDefinition) { datatypes.add((FunctionDefinition) signature); entrypoints.put(function.getEntryPoint(), (FunctionDefinition) signature); for (ParameterDefinition argument : signature.getArguments()) { datatypes.add(argument.getDataType()); } } } // remove data types that we do not need to serialize for the pdb Iterator itr = datatypes.iterator(); while (itr.hasNext()) { DataType dt = itr.next(); if (dt instanceof PointerDataType) { // technically a BuiltInDataType, however some thiscall "this" parameters are // defined like this :( continue; } else if (dt instanceof BuiltIn) { if (typedefs.containsKey(dt.getName())) { String value = typedefs.get(dt.getName()); typedefs.put(GetIdUnmapped(dt), value); } itr.remove(); if (isSerialized(dt)) { // normal built in (int, bool, char*, etc) continue; } // printf("[PDBGEN] removed: %s (%s)\n", dt.getName(), dt.getClass().getName()); } else if (dt instanceof TypeDef) { // any other typedefs that are not explictly defined by codeview // DataType basetype = ((TypeDef) dt).getBaseDataType(); // typedefs.put(GetIdUnmapped(dt), GetIdUnmapped(basetype)); // typedefs.put(dt.getName(), GetIdUnmapped(basetype)); // itr.remove(); } } return datatypes; } public List getAllSymbols() { List symbols = new ArrayList(); for (Symbol symbol : currentProgram.getSymbolTable().getAllSymbols(false)) { if (symbol.isExternal()) continue; symbols.add(symbol); } return symbols; } public JsonArray toJsonSymbols(List symbols) throws CancelledException { monitor.setMessage("Extracting Symbols"); monitor.initialize(symbols.size()); monitor.setShowProgressValue(true); monitor.setIndeterminate(false); monitor.setCancelEnabled(true); JsonArray objs = new JsonArray(); FunctionManager manager = currentProgram.getFunctionManager(); for (Symbol symbol : symbols) { monitor.checkCanceled(); monitor.incrementProgress(1); SymbolType stype = symbol.getSymbolType(); // SourceType source = symbol.getSource(); Address address = symbol.getAddress(); // // We can do some interesting filtering based on where the symbol came from. // if (source == SourceType.ANALYSIS) { // } else if (source == SourceType.DEFAULT) { // } else if (source == SourceType.IMPORTED) { // } else if (source == SourceType.USER_DEFINED) { // } String name = symbol.getName(true); if (stype == SymbolType.CLASS) { } else if (stype == SymbolType.FUNCTION) { Function function = manager.getFunctionAt(address); // we rename any thunks to easily distinguish them from the actual functions if (function.isThunk() && !name.startsWith("thunk_")) { name = "thunk_" + name; } JsonObject json = new JsonObject(); json.addProperty("type", "S_PUB32"); json.addProperty("name", name); json.addProperty("address", address.getUnsignedOffset()); json.addProperty("function", true); objs.add(json); if (function.isThunk()) continue; // for what ever reason, the ID of the FunctionSignature is different from when // we dumps the types, // so we cache the original type, and use the function's address to find it now. FunctionDefinition definition = entrypoints.get(address); String id = GetId(definition); // printf("signature [%s] %s %s", definition.getName(), id, // definition.getClass().getName()); // // I dont have a good way of looking up the FunctionDefinition id from here. // will probably need a refactor. Address start = function.getBody().getMinAddress(); Address end = function.getBody().getMaxAddress(); if (!start.hasSameAddressSpace(end)) { // TODO: Generate symbols in a sane way when there are multiple "address ranges" // for a function. // The above functions will return the start of the lowest range, and the end of // the highest range // which is absolutely not what we want, so we are gonna skip them for now. continue; } // S_GPROC32 json = new JsonObject(); json.addProperty("type", "S_GPROC32"); json.addProperty("name", name); json.addProperty("address", start.getUnsignedOffset()); json.addProperty("code_size", end.subtract(start) + 1); json.addProperty("end", 0); json.addProperty("function_type", id); json.addProperty("debug_start", 0); json.addProperty("debug_end", 0); json.addProperty("parent", "0x0000"); json.add("flags", new JsonArray()); objs.add(json); json = new JsonObject(); json.addProperty("type", "S_END"); objs.add(json); // // S_PROCREF // fmt = "{\"type\": \"S_PROCREF\", \"name\": \"%s\", \"address\": %d, // \"code_size\": \"%d\", \"function_type\": \"%s\", \"debug_start\": %d, // \"debug_end\": %d, \"parent\": \"%s\", \"flags\": []}"; // lines.add(String.format(fmt, name, start.getUnsignedOffset(), // end.subtract(start)+1, id, 0, 0, "0x0000")); } else if (stype == SymbolType.GLOBAL || stype == SymbolType.GLOBAL_VAR) { JsonObject json = new JsonObject(); json.addProperty("type", "S_PUB32"); json.addProperty("name", name); json.addProperty("address", address.getUnsignedOffset()); json.addProperty("function", false); objs.add(json); } else if (stype == SymbolType.LABEL) { } else if (stype == SymbolType.CLASS) { } else if (stype == SymbolType.LIBRARY) { } else if (stype == SymbolType.LOCAL_VAR) { } else if (stype == SymbolType.NAMESPACE) { } else if (stype == SymbolType.PARAMETER) { } else { // unknown symbol type } } return objs; } public void initializeTypeDefs() { // map Ghidra built-in types that are predefined by CodeView // these do not have a UniversalID so we reference them by their name instead. // note: name may not be unique, but its all i have found so far. Map aliases = new HashMap(); aliases.put("/undefined", "0x0003"); // we have to do this manually in GetIdUnmapped aliases.put("BuiltInTypes:/null", "0x0000"); aliases.put("BuiltInTypes:/void", "0x0003"); aliases.put("BuiltInTypes:/bool", "0x0030"); aliases.put("BuiltInTypes:/byte", "0x0069"); aliases.put("BuiltInTypes:/sbyte", "0x0068"); aliases.put("BuiltInTypes:/char", "0x0070"); aliases.put("BuiltInTypes:/wchar_t", "0x0071"); aliases.put("BuiltInTypes:/char16_t", "0x007A"); aliases.put("BuiltInTypes:/char32_t", "0x007B"); aliases.put("BuiltInTypes:/uchar", "0x0020"); aliases.put("BuiltInTypes:/wchar16", "0x007A"); aliases.put("BuiltInTypes:/wchar32", "0x007B"); aliases.put("BuiltInTypes:/short", "0x0011"); aliases.put("BuiltInTypes:/ushort", "0x0021"); aliases.put("BuiltInTypes:/int", "0x0074"); aliases.put("BuiltInTypes:/uint", "0x0075"); aliases.put("BuiltInTypes:/long", "0x0012"); aliases.put("BuiltInTypes:/ulong", "0x0022"); aliases.put("BuiltInTypes:/longlong", "0x0076"); aliases.put("BuiltInTypes:/ulonglong", "0x0077"); aliases.put("BuiltInTypes:/uint128_t", "0x0079"); aliases.put("BuiltInTypes:/word", "0x0073"); aliases.put("BuiltInTypes:/dword", "0x0075"); aliases.put("BuiltInTypes:/qword", "0x0077"); aliases.put("BuiltInTypes:/float", "0x0040"); aliases.put("BuiltInTypes:/double", "0x0041"); aliases.put("BuiltInTypes:/float10", "0x0042"); aliases.put("BuiltInTypes:/string", "0x0670"); aliases.put("BuiltInTypes:/string-utf8", "0x0670"); aliases.put("BuiltInTypes:/unicode", "0x067A"); aliases.put("BuiltInTypes:/unicode32", "0x067B"); aliases.put("BuiltInTypes:/TerminatedCString", "0x0670"); aliases.put("BuiltInTypes:/ImageBaseOffset32", "0x0075"); aliases.put("BuiltInTypes:/ImageBaseOffset64", "0x0076"); aliases.put("BuiltInTypes:/uint3", "0x0075"); aliases.put("BuiltInTypes:/longdouble", "0x0042"); aliases.put("BuiltInTypes:/undefined1", "0x0069"); aliases.put("BuiltInTypes:/undefined2", "0x0021"); aliases.put("BuiltInTypes:/undefined3", "0x0022"); aliases.put("BuiltInTypes:/undefined4", "0x0022"); aliases.put("BuiltInTypes:/undefined5", "0x0077"); aliases.put("BuiltInTypes:/undefined6", "0x0077"); aliases.put("BuiltInTypes:/undefined7", "0x0077"); aliases.put("BuiltInTypes:/undefined8", "0x0077"); aliases.put("BuiltInTypes:/GUID", "0x0079"); aliases.put("BuiltInTypes:/IMAGE_RICH_HEADER", "0x0069"); aliases.put("BuiltInTypes:/PEx64_UnwindInfo", "0x069"); for (String key : aliases.keySet()) { String value = aliases.get(key); printf("alias: %s -> %s\n", key, value); DataType dt = currentProgram.getDataTypeManager().getDataType(key); String typeid = GetIdUnmapped(dt); typedefs.put(key, value); typedefs.put(typeid, value); serialized.add(key); serialized.add(typeid); } for (String value : aliases.values()) { if (value.startsWith("0x")) { serialized.add(value); } } } public static List readAll(InputStream in) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(in)); List lines = new ArrayList(); while (reader.ready()) { lines.add(reader.readLine()); } return lines; } public String getDbgPath(String base) throws Exception { String exePath = FilenameUtils.normalize(currentProgram.getExecutablePath()); String dbgName = FilenameUtils.getBaseName(exePath).concat(".dbg"); // Get PortableExecutable from the current program Memory memory = currentProgram.getMemory(); ByteProvider provider = new MemoryByteProvider(memory, currentProgram.getImageBase()); PortableExecutable pe = new PortableExecutable(provider, PortableExecutable.SectionLayout.MEMORY); // Get NT Header (IMAGE_NT_HEADERS) NTHeader ntHeader = pe.getNTHeader(); if (ntHeader == null) { println("NT header not found."); } // Get the Optional Header (IMAGE_OPTIONAL_HEADER) OptionalHeader optionalHeader = ntHeader.getOptionalHeader(); if (optionalHeader == null) { println("Optional header not found."); } FileHeader fileHeader = ntHeader.getFileHeader(); String timeDateStamp = Integer.toHexString(fileHeader.getTimeDateStamp()); String imageSize = Long.toHexString(optionalHeader.getSizeOfImage()); return Paths.get(base, dbgName, timeDateStamp + imageSize, dbgName).toString(); } public String getPdbPath(String base) { String uuid = "7FA5717E-253A-B9B5-4C4C-44205044422E"; SymbolFileInfo info = SymbolFileInfo.fromValues("Rayman3.pdb", uuid, 1); // SymbolFileInfo info = SymbolFileInfo.fromProgramInfo(currentProgram); if (info == null) { return null; } String filename = info.getName(); return Paths.get(base, filename, info.getUniqueDirName(), filename).toString(); } public String getSymbolOutputPath() throws Exception { if (OVERRIDE_SYMBOL_OUTPUT_PATH != null) { return OVERRIDE_SYMBOL_OUTPUT_PATH; } // TODO use the LocalSymbolStore in an intelligent manner // SymbolServerInstanceCreatorContext context = // SymbolServerInstanceCreatorRegistry.getInstance().getContext(currentProgram); // SymbolServerService service = PdbPlugin.getSymbolServerService(context); // SymbolStore store = service.getSymbolStore(); // Get the Symbol Output Path were we will save all our future pdbs. String symbolOutputPath = Preferences.getProperty(PREFERENCE_SYMBOL_OUTPUT_PATH); if (symbolOutputPath == null) { symbolOutputPath = askDirectory("Select Symbol Output Path", "Set Symbol Output Path").getAbsolutePath(); Preferences.setProperty(PREFERENCE_SYMBOL_OUTPUT_PATH, symbolOutputPath); } else { printf("Using saved symbol output directory '%s'\n", symbolOutputPath); printf("modify '%s' in '%s' to change symbol output location\n", PREFERENCE_SYMBOL_OUTPUT_PATH, Preferences.getFilename()); } return symbolOutputPath; } public void run() throws Exception { RemanConfig.INSTANCE = new RemanConfig(this); OVERRIDE_SYMBOL_OUTPUT_PATH = RemanConfig.INSTANCE.originalDir + "/symbols"; if (state.getTool() != null) { ConsoleService console = state.getTool().getService(ConsoleService.class); console.clearMessages(); } String symbolOutputPath = getSymbolOutputPath(); File baseSymbolDir = new File(symbolOutputPath); if (!baseSymbolDir.exists()) { String msg = String.format("The symbol output directory \"%s\" does not exit", baseSymbolDir.getAbsolutePath()); if (askYesNo("create symbol output directory", msg + ", would you like to create it?")) { baseSymbolDir.mkdirs(); } else { printerr(msg); return; } } printf("base symbol path: %s\n", symbolOutputPath); String output = getPdbPath(symbolOutputPath); if (output == null) { // TODO generate .dbg file instead, or ask for a location to save to popup( "Unable to create a PDB!\n\nThe original binary is missing the required PDB signature/guid and age information."); return; } // clear types from the last run typedefs.clear(); serialized.clear(); forwardDeclared.clear(); // setup typedefs so we can map to basic types initializeTypeDefs(); JsonObject json = new JsonObject(); // Now serialize all the data types (in dependency order) json.add("types", toJson(getAllDataTypes())); json.add("symbols", toJsonSymbols(getAllSymbols())); // Ghidra has unhelpfully set the path to \C:\\Something\ this gives as a normal // c:\\Something String exepath = Path.fromPathString(currentProgram.getExecutablePath()).toString(); printf("executable: %s\n", exepath); String jsonpath = FilenameUtils.removeExtension(output).concat(".json"); File pdbfile = new File(output); if (pdbfile.exists()) { if (!askYesNo("overwrite pdb", "are you sure you want to overwrite \"" + pdbfile.getAbsolutePath() + "\"")) { return; } pdbfile.delete(); } else { pdbfile.getParentFile().mkdirs(); } FileWriter w = new FileWriter(jsonpath); w.write(json.toString()); w.close(); monitor.setIndeterminate(true); monitor.setCancelEnabled(true); // ProcessBuilder pdbgen = new ProcessBuilder(); // pdbgen.command("pdbgen.exe", exepath, "-", "--output", output); // Process proc = pdbgen.start(); // PrintWriter stdin = new PrintWriter(proc.getOutputStream()); // stdin.write(json.toString()); // stdin.close(); // while (proc.isAlive()) { // if (monitor.isCancelled()) { // monitor.setMessage("Stopping pdbgen.exe"); // proc.destroy(); // } // for (String line : readAll(proc.getInputStream())) { // println(line); // } // for (String line : readAll(proc.getErrorStream())) { // printerr(line); // } // proc.waitFor(100, TimeUnit.MILLISECONDS); // } return; } }