reman3/java/ghidra/PdbGen.java

1009 lines
36 KiB
Java

//Generates a PDB containing public symbols and type information derived from ghidra's database
//@author Brett Wandel
//@category Windows
//@keybinding ctrl G
//@menupath Tools.Generate PDB
//@toolbar
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FilenameUtils;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import generic.util.Path;
import ghidra.app.script.GhidraScript;
import ghidra.app.services.ConsoleService;
import ghidra.app.util.bin.ByteProvider;
import ghidra.app.util.bin.MemoryByteProvider;
import ghidra.app.util.bin.format.pdb.PdbParserConstants;
import ghidra.app.util.bin.format.pe.FileHeader;
import ghidra.app.util.bin.format.pe.NTHeader;
import ghidra.app.util.bin.format.pe.OptionalHeader;
import ghidra.app.util.bin.format.pe.PortableExecutable;
import ghidra.app.util.pdb.PdbProgramAttributes;
import ghidra.framework.options.Options;
import ghidra.framework.preferences.Preferences;
import ghidra.program.model.address.Address;
import ghidra.program.model.data.*;
import ghidra.program.model.data.Enum;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionManager;
import ghidra.program.model.listing.FunctionSignature;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.symbol.Symbol;
import ghidra.program.model.symbol.SymbolType;
import ghidra.util.exception.CancelledException;
import pdb.PdbPlugin;
import pdb.symbolserver.SymbolFileInfo;
import pdb.symbolserver.SymbolServerInstanceCreatorContext;
import pdb.symbolserver.SymbolServerInstanceCreatorRegistry;
import pdb.symbolserver.SymbolServerService;
import pdb.symbolserver.SymbolStore;
import re3lib.RemanConfig;
public class PdbGen extends GhidraScript {
public final static String PREFERENCE_SYMBOL_OUTPUT_PATH = "PDBGEN_SYMBOL_OUTPUT_PATH";
public String OVERRIDE_SYMBOL_OUTPUT_PATH = null;
// Note: we are manually serializing json here, this is just to avoid any
// dependencies.
// this means it will break if we have any fields that need escaping.
Map<String, String> typedefs = new HashMap<String, String>();
List<String> serialized = new ArrayList<String>();
Map<String, String> forwardDeclared = new HashMap<String, String>();
Map<Address, FunctionDefinition> entrypoints = new HashMap<Address, FunctionDefinition>();
private boolean isSerialized(DataType dt) {
String id = GetId(dt);
return isSerialized(id);
}
private boolean isSerialized(String id) {
return serialized.contains(id);
}
private void setSerialized(DataType dt) {
String id = GetId(dt);
serialized.add(id);
}
private String GetIdUnmapped(DataType dt) {
if (dt == null) {
// Not sure if this should be LF_NULLLEAF (0x0009)
// using no type (0x0000) first, this might need to change
return "0x0000"; // uncharacterized type (no type)
}
// FML... this needs to be fixed at some point.
String name = dt.getPathName();
// if (name.contains("-")) {
// name = name.split("-")[0];
// }
//
// if (name == "/undefined") {
// // this should be done as a typedef, but we can't get "/undefined" by path
// for some reason.
// return "0x0003";
// }
// some BitFieldDataTypes do not have a source archive... no idea why
SourceArchive source = dt.getSourceArchive();
if (source != null) {
name = String.format("%s:%s", dt.getSourceArchive().getName(), name);
}
// a BitField does not have a unique name, so we create one
// The hashCode is based on basetype.hashcode, bitOffset and bitSize...
// so basetype.name:bitSize:bitOffset should be unique.
if (dt instanceof BitFieldDataType) {
name = String.format("%s:%d", name, ((BitFieldDataType) dt).getBitOffset());
}
// some types don't have UniversalIDs, so we use the name instead
return name;
}
private String GetId(DataType dt) {
String key = GetIdUnmapped(dt);
// follow the typedefs to the original type.
while (typedefs.containsKey(key)) {
assert key != typedefs.get(key);
key = typedefs.get(key);
}
return key;
}
// Get the new ID for a type that has been forward declared.
private String GetFwdId(DataType dt) {
String id = GetId(dt);
if (!forwardDeclared.containsKey(id)) {
String alias = UUID.randomUUID().toString();
forwardDeclared.put(id, alias);
}
return forwardDeclared.get(id);
}
private JsonObject dump(Pointer x) {
if (!isSerialized(x.getDataType()))
return null;
JsonObject json = new JsonObject();
json.addProperty("id", GetId(x));
json.addProperty("type", "LF_POINTER");
json.addProperty("referent_type", GetId(x.getDataType()));
return json;
}
private JsonObject dump(Array x) {
if (!isSerialized(x.getDataType()))
return null;
JsonObject json = new JsonObject();
json.addProperty("id", GetId(x));
json.addProperty("type", "LF_ARRAY");
// TODO currently this is set to QWORD, is this different for x86/x64?
json.addProperty("index_type", "0x0077");
json.addProperty("element_type", GetId(x.getDataType()));
json.addProperty("size", x.getLength());
return json;
}
private JsonObject dump(Union x) {
JsonArray members = new JsonArray();
for (DataTypeComponent dt : x.getComponents()) {
if (!isSerialized(dt.getDataType()))
return null;
JsonObject json = new JsonObject();
json.addProperty("type", "LF_MEMBER");
// TODO currently this is set to QWORD, is this different for x86/x64?
json.addProperty("name", dt.getFieldName());
json.addProperty("type_id", GetId(dt.getDataType()));
json.addProperty("offset", dt.getOffset());
json.add("attributes", new JsonArray());
members.add(json);
}
JsonObject json = new JsonObject();
json.addProperty("id", GetFwdId(x));
json.addProperty("type", "LF_UNION");
json.addProperty("name", x.getName());
json.addProperty("unique_name", GetFwdId(x));
json.addProperty("size", x.getLength());
json.add("fields", members);
json.add("options", new JsonArray());
return json;
}
private JsonObject dump(Enum x) {
JsonArray fields = new JsonArray();
for (long value : x.getValues()) {
JsonObject json = new JsonObject();
json.addProperty("name", x.getName(value));
json.addProperty("value", value);
fields.add(json);
}
JsonObject json = new JsonObject();
json.addProperty("id", GetFwdId(x));
json.addProperty("type", "LF_ENUM");
json.addProperty("size", x.getLength());
json.addProperty("underlying_type", "0x0074");
json.addProperty("name", x.getName());
json.addProperty("unique_name", GetFwdId(x));
json.add("fields", fields);
json.add("options", new JsonArray());
return json;
}
private JsonObject dump(Structure x) {
JsonArray fields = new JsonArray();
for (DataTypeComponent dt : x.getComponents()) {
if (!isSerialized(dt.getDataType())) {
return null;
}
JsonObject json = new JsonObject();
json.addProperty("type", "LF_MEMBER");
json.addProperty("type_id", GetId(dt.getDataType()));
json.addProperty("offset", dt.getOffset());
json.add("attributes", new JsonArray());
if (dt.getFieldName() == null) {
json.addProperty("name", dt.getDefaultFieldName());
} else {
json.addProperty("name", dt.getFieldName());
}
if (dt.isBitFieldComponent()) {
// TODO implement this
// BitFieldDataType bfdt = (BitFieldDataType) dt.getDataType();
}
fields.add(json);
}
JsonObject json = new JsonObject();
json.addProperty("id", GetFwdId(x));
json.addProperty("type", "LF_STRUCTURE");
json.addProperty("name", x.getName());
json.addProperty("size", x.getLength());
json.addProperty("unique_name", GetFwdId(x));
json.add("options", new JsonArray());
json.add("fields", fields);
return json;
}
private JsonObject dump(BitFieldDataType x) {
if (!isSerialized(x.getBaseDataType()))
return null;
JsonObject json = new JsonObject();
json.addProperty("id", GetId(x));
json.addProperty("type", "LF_BITFIELD");
json.addProperty("type_id", GetId(x.getBaseDataType()));
json.addProperty("bit_offset", x.getBitOffset());
json.addProperty("bit_size", x.getBitSize());
return json;
}
private List<JsonObject> dump(FunctionDefinition x) {
// // There should be a good way of determining class, but I haven't found it
// yet
// // So instead I'm just gonna check calling convention and lookup the type
// manually.
// if (x.getGenericCallingConvention() == GenericCallingConvention.thiscall) {
// DataType clz = x.getArguments()[0].getDataType();
// if (clz instanceof Pointer) {
// clz = ((Pointer) clz).getDataType();
// }
// printf("%s::%s()\n", clz.getName(), x.getName());
// }
// printf("function [%s] %s %s", x.getName(), GetId(x), x.getClass().getName());
// we wait (return null) until we have dumped all the dependant types
if (!isSerialized(x.getReturnType()))
return null;
JsonArray parameters = new JsonArray();
for (ParameterDefinition p : x.getArguments()) {
if (!isSerialized(p.getDataType()))
return null;
parameters.add(GetId(p.getDataType()));
}
List<JsonObject> entries = new ArrayList<JsonObject>();
JsonObject json = new JsonObject();
json.addProperty("type", "LF_PROCEDURE");
json.addProperty("id", GetId(x));
json.addProperty("name", x.getName());
json.addProperty("return_type", GetId(x.getReturnType()));
String callingConvention = x.getCallingConventionName();
json.addProperty("calling_convention", callingConvention);
json.add("options", new JsonArray());
json.add("parameters", parameters);
entries.add(json);
if (!(callingConvention.equals("thiscall") || callingConvention.equals("cdecl")
|| callingConvention.equals("fastcall") || callingConvention.equals("stdcall"))) {
return null;
}
json = new JsonObject();
// We are creating a new id here just so it flows through our pipeline correctly
// with the rest of the types.
json.addProperty("type", "LF_FUNC_ID");
json.addProperty("id", UUID.randomUUID().toString());
json.addProperty("name", x.getName());
json.addProperty("function_type", GetId(x));
json.addProperty("parent_scope", "0x0000"); // placeholder
entries.add(json);
return entries;
}
private JsonObject dump(TypeDef dt) {
DataType base = dt.getBaseDataType();
if (!isSerialized(base)) {
return null;
}
typedefs.put(GetIdUnmapped(dt), GetIdUnmapped(dt.getBaseDataType()));
JsonObject json = new JsonObject();
// json.addProperty("", null);
return json;
}
private List<JsonObject> toJson(DataType dt) {
if (dt instanceof FunctionDefinition) {
return dump((FunctionDefinition) dt);
}
List<JsonObject> entries = new ArrayList<JsonObject>();
JsonObject json = null;
if (dt instanceof Pointer) {
json = dump((Pointer) dt);
} else if (dt instanceof BitFieldDataType) {
json = dump((BitFieldDataType) dt);
} else if (dt instanceof Array) {
json = dump((Array) dt);
} else if (dt instanceof Union) {
json = dump((Union) dt);
} else if (dt instanceof Enum) {
json = dump((Enum) dt);
} else if (dt instanceof Structure) {
json = dump((Structure) dt);
} else if (dt instanceof DefaultDataType) {
// this is "undefined" which is predefined by codeview, so we will skip it here.
return entries;
} else if (dt instanceof TypeDef) {
json = dump((TypeDef) dt);
// Not required... we map typedefs to their underlying type before processing
// the rest of the types
// I have not found any CodeView type for typedefs, so we map the types (AFAIK
// like the linker does).
// implementing this *might* cleanup the output a little, but not sure if the
// juice is worth the squeeze
if (json == null) {
return null;
} else {
return entries;
}
} else {
printf("[PDBGEN] Unknown Type: id=%s, name=%s, class=%s\n", GetId(dt), dt.getName(), dt.getClass().getName());
}
if (json == null) {
return null;
}
entries.add(json);
return entries;
}
public void printMissing(DataType dt) {
if (dt instanceof BuiltIn) {
if (dt instanceof PointerDataType) {
printMissing((Pointer) dt);
} else {
printf("[PDBGEN] missing: BuiltIn '%s' missing, size=%d\n", GetIdUnmapped(dt), dt.getLength());
}
} else if (dt instanceof FunctionDefinition) {
printMissing((FunctionDefinition) dt);
} else if (dt instanceof Pointer) {
printMissing((Pointer) dt);
} else if (dt instanceof Array) {
printMissing((Array) dt);
} else if (dt instanceof Structure) {
printMissing((Structure) dt);
} else if (dt instanceof Union) {
printMissing((Union) dt);
} else if (dt instanceof DefaultDataType) {
printMissing((DefaultDataType) dt);
} else if (dt instanceof TypeDef) {
printMissing((TypeDef) dt);
} else if (dt instanceof Enum) {
printMissing((Enum) dt);
} else if (dt instanceof BitFieldDataType) {
printMissing((BitFieldDataType) dt);
} else {
printf("[PDBGEN] missing: Unknown data type id='%s', type=%s\n", GetIdUnmapped(dt), dt.getClass().getName());
}
}
public void printMissing(FunctionDefinition dt) {
if (!isSerialized(dt.getReturnType())) {
printf("[PDBGEN] missing: FunctionDefinition '%s' missing return type '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(dt.getReturnType()));
}
for (ParameterDefinition argument : dt.getArguments()) {
if (!isSerialized(argument.getDataType())) {
printf("[PDBGEN] missing: FunctionDefinition '%s' missing argument type '%s' for '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(argument.getDataType()), argument.getName());
}
}
}
public void printMissing(Pointer dt) {
if (!isSerialized(dt.getDataType())) {
printf("[PDBGEN] missing: Pointer '%s' missing base type '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(dt.getDataType()));
}
}
public void printMissing(Array dt) {
if (!isSerialized(dt.getDataType())) {
printf("[PDBGEN] missing: Array '%s' missing base type '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(dt.getDataType()));
}
}
public void printMissing(Structure dt) {
for (DataTypeComponent component : dt.getComponents()) {
if (!isSerialized(component.getDataType())) {
printf("[PDBGEN] missing: Structure '%s' missing component type '%s' for field '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(component.getDataType()), component.getFieldName());
}
}
}
public void printMissing(Union dt) {
for (DataTypeComponent component : dt.getComponents()) {
if (!isSerialized(component.getDataType())) {
printf("[PDBGEN] missing: Union '%s' missing component type '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(component.getDataType()));
}
}
}
public void printMissing(Enum dt) {
printf("[PDBGEN] missing: Enum missing '%s'\n", GetIdUnmapped(dt));
}
public void printMissing(DefaultDataType dt) {
printf("[PDBGEN] missing: DefaultDataType missing '%s'\n", GetIdUnmapped(dt));
}
public void printMissing(TypeDef dt) {
if (!isSerialized(dt.getBaseDataType())) {
printf("[PDBGEN] missing: TypeDef '%s' missing base type '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(dt.getBaseDataType()));
}
}
public void printMissing(BitFieldDataType dt) {
if (!isSerialized(dt.getBaseDataType())) {
printf("[PDBGEN] missing: BitField '%s' missing base type '%s'\n", GetIdUnmapped(dt),
GetIdUnmapped(dt.getBaseDataType()));
}
}
public JsonArray toJson(List<DataType> datatypes) throws CancelledException {
monitor.setMessage("Extracting DataTypes");
monitor.initialize(datatypes.size());
monitor.setIndeterminate(false);
monitor.setShowProgressValue(true);
monitor.setCancelEnabled(true);
// Build forward declarations for everything, basically because I'm lazy.
// We should only need to add forward declarations for data types that have
// cyclic dependencies.
JsonArray json = buildForwardDeclarations(datatypes);
// A naive ordered serialization. We continually iterate through the list,
// serializing data types only once they have had all their dependencies
// serialized.
// we stop looping over the list once we fail to serialize at least one data
// type.
// Any data types that are missing dependent types will be left in the input
// list.
while (!datatypes.isEmpty()) {
boolean changed = false;
Iterator<DataType> itr = datatypes.iterator();
while (itr.hasNext()) {
monitor.checkCanceled();
DataType dt = itr.next();
List<JsonObject> entries = toJson(dt);
if (entries == null) {
// printf("skipped: %s (%s)\n", dt.getName(), GetId(dt));
continue; // waiting for dependencies to added first
}
printf("[PDBGEN] dumped: id=%s, original=%s\n", GetId(dt), GetIdUnmapped(dt));
itr.remove();
for (JsonObject entry : entries) {
json.add(entry);
}
setSerialized(dt);
changed = true;
monitor.incrementProgress(1);
}
if (!changed) {
break; // we failed to remove any data types.
}
}
for (DataType dt : datatypes) {
printMissing(dt);
}
printf("[PDBGEN] missing: %d\n", datatypes.size());
return json;
}
public JsonArray buildForwardDeclarations(List<DataType> datatypes) {
// some data that is common to all forward declarations
JsonArray fields = new JsonArray();
JsonArray options = new JsonArray();
options.add("forwardref");
JsonArray objs = new JsonArray();
for (DataType dt : datatypes) {
JsonObject json = new JsonObject();
// the forward declared type and the actual type need different IDs
// to make things easy, we use the original id in the forward declaration
// so we do not need to rewrite the all the references.
// We create a new Id for the actual type, because nothing else references it.
json.addProperty("id", GetId(dt));
if (dt instanceof Enum) {
json.addProperty("type", "LF_ENUM");
json.addProperty("underlying_type", "0x0000");
} else if (dt instanceof Union) {
json.addProperty("type", "LF_UNION");
} else if (dt instanceof Structure) {
json.addProperty("type", "LF_STRUCTURE");
} else {
continue; // we do not need to forward declare this type
}
// PDB resolves forward declarations by looking for other types with the same
// unique name,
// if it does not find one, it will match on name instead.
// I'm not sure if this can cause inconsistency if unique_name is not used...
// To avoid issues, we use a uuid for the unique name to consistently match
// correctly.
json.addProperty("name", dt.getName());
json.addProperty("unique_name", GetFwdId(dt));
json.addProperty("size", 0);
json.add("options", options);
json.add("fields", fields);
objs.add(json);
setSerialized(dt);
}
return objs;
}
public List<DataType> getAllDataTypes() {
List<DataType> datatypes = new ArrayList<DataType>();
// this function, despite its name, does not return all datatypes :(
// we are going to have to go find the missing ones.
currentProgram.getDataTypeManager().getAllDataTypes(datatypes);
// for some reason, Ghidra does not include BitField DataTypes in
// getAllDataTypes, so we manually add them here.
Iterator<Composite> composites = currentProgram.getDataTypeManager().getAllComposites();
while (composites.hasNext()) {
Composite composite = composites.next();
for (DataTypeComponent component : composite.getComponents()) {
datatypes.add(component.getDataType());
}
}
// functions are not apart of the data type manager apparently.
Iterator<Function> functions = currentProgram.getFunctionManager().getFunctionsNoStubs(true);
while (functions.hasNext()) {
Function function = functions.next();
if (function.isThunk())
continue;
if (function.isExternal())
continue;
FunctionSignature signature = function.getSignature();
if (signature instanceof FunctionDefinition) {
datatypes.add((FunctionDefinition) signature);
entrypoints.put(function.getEntryPoint(), (FunctionDefinition) signature);
for (ParameterDefinition argument : signature.getArguments()) {
datatypes.add(argument.getDataType());
}
}
}
// remove data types that we do not need to serialize for the pdb
Iterator<DataType> itr = datatypes.iterator();
while (itr.hasNext()) {
DataType dt = itr.next();
if (dt instanceof PointerDataType) {
// technically a BuiltInDataType, however some thiscall "this" parameters are
// defined like this :(
continue;
} else if (dt instanceof BuiltIn) {
if (typedefs.containsKey(dt.getName())) {
String value = typedefs.get(dt.getName());
typedefs.put(GetIdUnmapped(dt), value);
}
itr.remove();
if (isSerialized(dt)) {
// normal built in (int, bool, char*, etc)
continue;
}
// printf("[PDBGEN] removed: %s (%s)\n", dt.getName(), dt.getClass().getName());
} else if (dt instanceof TypeDef) {
// any other typedefs that are not explictly defined by codeview
// DataType basetype = ((TypeDef) dt).getBaseDataType();
// typedefs.put(GetIdUnmapped(dt), GetIdUnmapped(basetype));
// typedefs.put(dt.getName(), GetIdUnmapped(basetype));
// itr.remove();
}
}
return datatypes;
}
public List<Symbol> getAllSymbols() {
List<Symbol> symbols = new ArrayList<Symbol>();
for (Symbol symbol : currentProgram.getSymbolTable().getAllSymbols(false)) {
if (symbol.isExternal())
continue;
symbols.add(symbol);
}
return symbols;
}
public JsonArray toJsonSymbols(List<Symbol> symbols) throws CancelledException {
monitor.setMessage("Extracting Symbols");
monitor.initialize(symbols.size());
monitor.setShowProgressValue(true);
monitor.setIndeterminate(false);
monitor.setCancelEnabled(true);
JsonArray objs = new JsonArray();
FunctionManager manager = currentProgram.getFunctionManager();
for (Symbol symbol : symbols) {
monitor.checkCanceled();
monitor.incrementProgress(1);
SymbolType stype = symbol.getSymbolType();
// SourceType source = symbol.getSource();
Address address = symbol.getAddress();
// // We can do some interesting filtering based on where the symbol came from.
// if (source == SourceType.ANALYSIS) {
// } else if (source == SourceType.DEFAULT) {
// } else if (source == SourceType.IMPORTED) {
// } else if (source == SourceType.USER_DEFINED) {
// }
String name = symbol.getName(true);
if (stype == SymbolType.CLASS) {
} else if (stype == SymbolType.FUNCTION) {
Function function = manager.getFunctionAt(address);
// we rename any thunks to easily distinguish them from the actual functions
if (function.isThunk() && !name.startsWith("thunk_")) {
name = "thunk_" + name;
}
JsonObject json = new JsonObject();
json.addProperty("type", "S_PUB32");
json.addProperty("name", name);
json.addProperty("address", address.getUnsignedOffset());
json.addProperty("function", true);
objs.add(json);
if (function.isThunk())
continue;
// for what ever reason, the ID of the FunctionSignature is different from when
// we dumps the types,
// so we cache the original type, and use the function's address to find it now.
FunctionDefinition definition = entrypoints.get(address);
String id = GetId(definition);
// printf("signature [%s] %s %s", definition.getName(), id,
// definition.getClass().getName());
// // I dont have a good way of looking up the FunctionDefinition id from here.
// will probably need a refactor.
Address start = function.getBody().getMinAddress();
Address end = function.getBody().getMaxAddress();
if (!start.hasSameAddressSpace(end)) {
// TODO: Generate symbols in a sane way when there are multiple "address ranges"
// for a function.
// The above functions will return the start of the lowest range, and the end of
// the highest range
// which is absolutely not what we want, so we are gonna skip them for now.
continue;
}
// S_GPROC32
json = new JsonObject();
json.addProperty("type", "S_GPROC32");
json.addProperty("name", name);
json.addProperty("address", start.getUnsignedOffset());
json.addProperty("code_size", end.subtract(start) + 1);
json.addProperty("end", 0);
json.addProperty("function_type", id);
json.addProperty("debug_start", 0);
json.addProperty("debug_end", 0);
json.addProperty("parent", "0x0000");
json.add("flags", new JsonArray());
objs.add(json);
json = new JsonObject();
json.addProperty("type", "S_END");
objs.add(json);
// // S_PROCREF
// fmt = "{\"type\": \"S_PROCREF\", \"name\": \"%s\", \"address\": %d,
// \"code_size\": \"%d\", \"function_type\": \"%s\", \"debug_start\": %d,
// \"debug_end\": %d, \"parent\": \"%s\", \"flags\": []}";
// lines.add(String.format(fmt, name, start.getUnsignedOffset(),
// end.subtract(start)+1, id, 0, 0, "0x0000"));
} else if (stype == SymbolType.GLOBAL || stype == SymbolType.GLOBAL_VAR) {
JsonObject json = new JsonObject();
json.addProperty("type", "S_PUB32");
json.addProperty("name", name);
json.addProperty("address", address.getUnsignedOffset());
json.addProperty("function", false);
objs.add(json);
} else if (stype == SymbolType.LABEL) {
} else if (stype == SymbolType.CLASS) {
} else if (stype == SymbolType.LIBRARY) {
} else if (stype == SymbolType.LOCAL_VAR) {
} else if (stype == SymbolType.NAMESPACE) {
} else if (stype == SymbolType.PARAMETER) {
} else {
// unknown symbol type
}
}
return objs;
}
public void initializeTypeDefs() {
// map Ghidra built-in types that are predefined by CodeView
// these do not have a UniversalID so we reference them by their name instead.
// note: name may not be unique, but its all i have found so far.
Map<String, String> aliases = new HashMap<String, String>();
aliases.put("/undefined", "0x0003"); // we have to do this manually in GetIdUnmapped
aliases.put("BuiltInTypes:/null", "0x0000");
aliases.put("BuiltInTypes:/void", "0x0003");
aliases.put("BuiltInTypes:/bool", "0x0030");
aliases.put("BuiltInTypes:/byte", "0x0069");
aliases.put("BuiltInTypes:/sbyte", "0x0068");
aliases.put("BuiltInTypes:/char", "0x0070");
aliases.put("BuiltInTypes:/wchar_t", "0x0071");
aliases.put("BuiltInTypes:/char16_t", "0x007A");
aliases.put("BuiltInTypes:/char32_t", "0x007B");
aliases.put("BuiltInTypes:/uchar", "0x0020");
aliases.put("BuiltInTypes:/wchar16", "0x007A");
aliases.put("BuiltInTypes:/wchar32", "0x007B");
aliases.put("BuiltInTypes:/short", "0x0011");
aliases.put("BuiltInTypes:/ushort", "0x0021");
aliases.put("BuiltInTypes:/int", "0x0074");
aliases.put("BuiltInTypes:/uint", "0x0075");
aliases.put("BuiltInTypes:/long", "0x0012");
aliases.put("BuiltInTypes:/ulong", "0x0022");
aliases.put("BuiltInTypes:/longlong", "0x0076");
aliases.put("BuiltInTypes:/ulonglong", "0x0077");
aliases.put("BuiltInTypes:/uint128_t", "0x0079");
aliases.put("BuiltInTypes:/word", "0x0073");
aliases.put("BuiltInTypes:/dword", "0x0075");
aliases.put("BuiltInTypes:/qword", "0x0077");
aliases.put("BuiltInTypes:/float", "0x0040");
aliases.put("BuiltInTypes:/double", "0x0041");
aliases.put("BuiltInTypes:/float10", "0x0042");
aliases.put("BuiltInTypes:/string", "0x0670");
aliases.put("BuiltInTypes:/string-utf8", "0x0670");
aliases.put("BuiltInTypes:/unicode", "0x067A");
aliases.put("BuiltInTypes:/unicode32", "0x067B");
aliases.put("BuiltInTypes:/TerminatedCString", "0x0670");
aliases.put("BuiltInTypes:/ImageBaseOffset32", "0x0075");
aliases.put("BuiltInTypes:/ImageBaseOffset64", "0x0076");
aliases.put("BuiltInTypes:/uint3", "0x0075");
aliases.put("BuiltInTypes:/longdouble", "0x0042");
aliases.put("BuiltInTypes:/undefined1", "0x0069");
aliases.put("BuiltInTypes:/undefined2", "0x0021");
aliases.put("BuiltInTypes:/undefined3", "0x0022");
aliases.put("BuiltInTypes:/undefined4", "0x0022");
aliases.put("BuiltInTypes:/undefined5", "0x0077");
aliases.put("BuiltInTypes:/undefined6", "0x0077");
aliases.put("BuiltInTypes:/undefined7", "0x0077");
aliases.put("BuiltInTypes:/undefined8", "0x0077");
aliases.put("BuiltInTypes:/GUID", "0x0079");
aliases.put("BuiltInTypes:/IMAGE_RICH_HEADER", "0x0069");
aliases.put("BuiltInTypes:/PEx64_UnwindInfo", "0x069");
for (String key : aliases.keySet()) {
String value = aliases.get(key);
printf("alias: %s -> %s\n", key, value);
DataType dt = currentProgram.getDataTypeManager().getDataType(key);
String typeid = GetIdUnmapped(dt);
typedefs.put(key, value);
typedefs.put(typeid, value);
serialized.add(key);
serialized.add(typeid);
}
for (String value : aliases.values()) {
if (value.startsWith("0x")) {
serialized.add(value);
}
}
}
public static List<String> readAll(InputStream in) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
List<String> lines = new ArrayList<String>();
while (reader.ready()) {
lines.add(reader.readLine());
}
return lines;
}
public String getDbgPath(String base) throws Exception {
String exePath = FilenameUtils.normalize(currentProgram.getExecutablePath());
String dbgName = FilenameUtils.getBaseName(exePath).concat(".dbg");
// Get PortableExecutable from the current program
Memory memory = currentProgram.getMemory();
ByteProvider provider = new MemoryByteProvider(memory, currentProgram.getImageBase());
PortableExecutable pe = new PortableExecutable(provider, PortableExecutable.SectionLayout.MEMORY);
// Get NT Header (IMAGE_NT_HEADERS)
NTHeader ntHeader = pe.getNTHeader();
if (ntHeader == null) {
println("NT header not found.");
}
// Get the Optional Header (IMAGE_OPTIONAL_HEADER)
OptionalHeader optionalHeader = ntHeader.getOptionalHeader();
if (optionalHeader == null) {
println("Optional header not found.");
}
FileHeader fileHeader = ntHeader.getFileHeader();
String timeDateStamp = Integer.toHexString(fileHeader.getTimeDateStamp());
String imageSize = Long.toHexString(optionalHeader.getSizeOfImage());
return Paths.get(base, dbgName, timeDateStamp + imageSize, dbgName).toString();
}
public String getPdbPath(String base) {
String uuid = "7FA5717E-253A-B9B5-4C4C-44205044422E";
SymbolFileInfo info = SymbolFileInfo.fromValues("Rayman3.pdb", uuid, 1);
// SymbolFileInfo info = SymbolFileInfo.fromProgramInfo(currentProgram);
if (info == null) {
return null;
}
String filename = info.getName();
return Paths.get(base, filename, info.getUniqueDirName(), filename).toString();
}
public String getSymbolOutputPath() throws Exception {
if (OVERRIDE_SYMBOL_OUTPUT_PATH != null) {
return OVERRIDE_SYMBOL_OUTPUT_PATH;
}
// TODO use the LocalSymbolStore in an intelligent manner
// SymbolServerInstanceCreatorContext context =
// SymbolServerInstanceCreatorRegistry.getInstance().getContext(currentProgram);
// SymbolServerService service = PdbPlugin.getSymbolServerService(context);
// SymbolStore store = service.getSymbolStore();
// Get the Symbol Output Path were we will save all our future pdbs.
String symbolOutputPath = Preferences.getProperty(PREFERENCE_SYMBOL_OUTPUT_PATH);
if (symbolOutputPath == null) {
symbolOutputPath = askDirectory("Select Symbol Output Path", "Set Symbol Output Path").getAbsolutePath();
Preferences.setProperty(PREFERENCE_SYMBOL_OUTPUT_PATH, symbolOutputPath);
} else {
printf("Using saved symbol output directory '%s'\n", symbolOutputPath);
printf("modify '%s' in '%s' to change symbol output location\n", PREFERENCE_SYMBOL_OUTPUT_PATH,
Preferences.getFilename());
}
return symbolOutputPath;
}
public void run() throws Exception {
RemanConfig.INSTANCE = new RemanConfig(this);
OVERRIDE_SYMBOL_OUTPUT_PATH = RemanConfig.INSTANCE.originalDir + "/symbols";
if (state.getTool() != null) {
ConsoleService console = state.getTool().getService(ConsoleService.class);
console.clearMessages();
}
String symbolOutputPath = getSymbolOutputPath();
File baseSymbolDir = new File(symbolOutputPath);
if (!baseSymbolDir.exists()) {
String msg = String.format("The symbol output directory \"%s\" does not exit", baseSymbolDir.getAbsolutePath());
if (askYesNo("create symbol output directory", msg + ", would you like to create it?")) {
baseSymbolDir.mkdirs();
} else {
printerr(msg);
return;
}
}
printf("base symbol path: %s\n", symbolOutputPath);
String output = getPdbPath(symbolOutputPath);
if (output == null) {
// TODO generate .dbg file instead, or ask for a location to save to
popup(
"Unable to create a PDB!\n\nThe original binary is missing the required PDB signature/guid and age information.");
return;
}
// clear types from the last run
typedefs.clear();
serialized.clear();
forwardDeclared.clear();
// setup typedefs so we can map to basic types
initializeTypeDefs();
JsonObject json = new JsonObject();
// Now serialize all the data types (in dependency order)
json.add("types", toJson(getAllDataTypes()));
json.add("symbols", toJsonSymbols(getAllSymbols()));
// Ghidra has unhelpfully set the path to \C:\\Something\ this gives as a normal
// c:\\Something
String exepath = Path.fromPathString(currentProgram.getExecutablePath()).toString();
printf("executable: %s\n", exepath);
String jsonpath = FilenameUtils.removeExtension(output).concat(".json");
File pdbfile = new File(output);
if (pdbfile.exists()) {
if (!askYesNo("overwrite pdb", "are you sure you want to overwrite \"" + pdbfile.getAbsolutePath() + "\"")) {
return;
}
pdbfile.delete();
} else {
pdbfile.getParentFile().mkdirs();
}
FileWriter w = new FileWriter(jsonpath);
w.write(json.toString());
w.close();
monitor.setIndeterminate(true);
monitor.setCancelEnabled(true);
// ProcessBuilder pdbgen = new ProcessBuilder();
// pdbgen.command("pdbgen.exe", exepath, "-", "--output", output);
// Process proc = pdbgen.start();
// PrintWriter stdin = new PrintWriter(proc.getOutputStream());
// stdin.write(json.toString());
// stdin.close();
// while (proc.isAlive()) {
// if (monitor.isCancelled()) {
// monitor.setMessage("Stopping pdbgen.exe");
// proc.destroy();
// }
// for (String line : readAll(proc.getInputStream())) {
// println(line);
// }
// for (String line : readAll(proc.getErrorStream())) {
// printerr(line);
// }
// proc.waitFor(100, TimeUnit.MILLISECONDS);
// }
return;
}
}