mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-04-30 15:10:59 +03:00
358 lines
11 KiB
C++
358 lines
11 KiB
C++
//===-- disassembler.cpp --------------------------------------------------===//
|
||
//
|
||
// LDC – the LLVM D compiler
|
||
//
|
||
// This file is distributed under the Boost Software License. See the LICENSE
|
||
// file for details.
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
|
||
#include "disassembler.h"
|
||
|
||
#include <algorithm>
|
||
#include <unordered_map>
|
||
|
||
#include "llvm/ADT/Triple.h"
|
||
#include "llvm/MC/MCAsmBackend.h"
|
||
#include "llvm/MC/MCAsmInfo.h"
|
||
#include "llvm/MC/MCCodeEmitter.h"
|
||
#include "llvm/MC/MCContext.h"
|
||
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
||
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
|
||
#include "llvm/MC/MCInst.h"
|
||
#include "llvm/MC/MCInstPrinter.h"
|
||
#include "llvm/MC/MCInstrAnalysis.h"
|
||
#include "llvm/MC/MCObjectFileInfo.h"
|
||
#include "llvm/MC/MCRegisterInfo.h"
|
||
#include "llvm/MC/MCStreamer.h"
|
||
#include "llvm/MC/MCSubtargetInfo.h"
|
||
#include "llvm/Object/ObjectFile.h"
|
||
#include "llvm/Support/Error.h"
|
||
#include "llvm/Support/TargetRegistry.h"
|
||
#include "llvm/Target/TargetMachine.h"
|
||
|
||
namespace {
|
||
template <typename T> std::unique_ptr<T> unique(T *ptr) {
|
||
return std::unique_ptr<T>(ptr);
|
||
}
|
||
|
||
enum class Stage {
|
||
Scan,
|
||
Emit,
|
||
};
|
||
|
||
class SymTable final {
|
||
llvm::MCContext &context;
|
||
Stage stage;
|
||
std::unordered_map<uint64_t, llvm::MCSymbol *> labelsPos;
|
||
std::unordered_map<uint64_t, llvm::MCSymbol *> labelsTargets;
|
||
std::unordered_map<uint64_t, llvm::MCSymbol *> externalSymbols;
|
||
|
||
public:
|
||
SymTable(llvm::MCContext &ctx) : context(ctx) {}
|
||
|
||
llvm::MCContext &getContext() { return context; }
|
||
|
||
void setStage(Stage s) { stage = s; }
|
||
|
||
Stage getStage() const { return stage; }
|
||
|
||
void reset() {
|
||
labelsPos.clear();
|
||
labelsTargets.clear();
|
||
externalSymbols.clear();
|
||
}
|
||
|
||
void addLabel(uint64_t pos, uint64_t target, llvm::StringRef name = {}) {
|
||
if (auto label = getTargetLabel(target)) {
|
||
labelsPos.insert({pos, label});
|
||
return;
|
||
}
|
||
auto sym = name.empty() ? context.createTempSymbol("", false)
|
||
: context.getOrCreateSymbol(name);
|
||
assert(nullptr != sym);
|
||
labelsPos.insert({pos, sym});
|
||
labelsTargets.insert({target, sym});
|
||
}
|
||
|
||
llvm::MCSymbol *getPosLabel(uint64_t pos) const {
|
||
auto it = labelsPos.find(pos);
|
||
if (labelsPos.end() != it) {
|
||
return it->second;
|
||
}
|
||
return nullptr;
|
||
}
|
||
|
||
llvm::MCSymbol *getTargetLabel(uint64_t target) const {
|
||
auto it = labelsTargets.find(target);
|
||
if (labelsTargets.end() != it) {
|
||
return it->second;
|
||
}
|
||
return nullptr;
|
||
}
|
||
|
||
void addExternalSymbolRel(uint64_t pos, llvm::StringRef name) {
|
||
auto sym = context.getOrCreateSymbol(name);
|
||
assert(nullptr != sym);
|
||
externalSymbols.insert({pos, sym});
|
||
}
|
||
|
||
llvm::MCSymbol *getExternalSymbolRel(uint64_t pos) const {
|
||
auto it = externalSymbols.find(pos);
|
||
if (externalSymbols.end() != it) {
|
||
return it->second;
|
||
}
|
||
return nullptr;
|
||
}
|
||
};
|
||
|
||
void printFunction(const llvm::MCDisassembler &disasm,
|
||
const llvm::MCInstrAnalysis &mcia,
|
||
llvm::ArrayRef<uint8_t> data, SymTable &symTable,
|
||
const llvm::MCSubtargetInfo &sti,
|
||
llvm::MCStreamer &streamer) {
|
||
const Stage stages[] = {Stage::Scan, Stage::Emit};
|
||
for (auto stage : stages) {
|
||
symTable.setStage(stage);
|
||
uint64_t size = 0;
|
||
for (uint64_t pos = 0; pos < static_cast<uint64_t>(data.size());
|
||
pos += size) {
|
||
llvm::MCInst inst;
|
||
|
||
std::string comment;
|
||
llvm::raw_string_ostream commentStream(comment);
|
||
auto status = disasm.getInstruction(inst, size, data.slice(pos), pos,
|
||
llvm::nulls(), commentStream);
|
||
|
||
switch (status) {
|
||
case llvm::MCDisassembler::Fail:
|
||
streamer.EmitRawText("failed to disassemble");
|
||
return;
|
||
|
||
case llvm::MCDisassembler::SoftFail:
|
||
streamer.EmitRawText("potentially undefined instruction encoding:");
|
||
LLVM_FALLTHROUGH;
|
||
|
||
case llvm::MCDisassembler::Success:
|
||
if (Stage::Scan == stage) {
|
||
if (mcia.isBranch(inst) || mcia.isCall(inst)) {
|
||
uint64_t target = 0;
|
||
if (mcia.evaluateBranch(inst, pos, size, target)) {
|
||
symTable.addLabel(pos, target);
|
||
}
|
||
}
|
||
} else if (Stage::Emit == stage) {
|
||
if (auto label = symTable.getTargetLabel(pos)) {
|
||
streamer.EmitLabel(label);
|
||
}
|
||
commentStream.flush();
|
||
if (!comment.empty()) {
|
||
streamer.AddComment(comment);
|
||
comment.clear();
|
||
}
|
||
streamer.EmitInstruction(inst, sti);
|
||
}
|
||
break;
|
||
}
|
||
assert(0 != size);
|
||
}
|
||
}
|
||
}
|
||
|
||
class Symbolizer final : public llvm::MCSymbolizer {
|
||
SymTable &symTable;
|
||
|
||
const llvm::MCExpr *createExpr(llvm::MCSymbol *sym, int64_t offset = 0) {
|
||
assert(nullptr != sym);
|
||
auto &ctx = symTable.getContext();
|
||
auto expr = llvm::MCSymbolRefExpr::create(sym, ctx);
|
||
if (0 == offset) {
|
||
return expr;
|
||
}
|
||
auto off = llvm::MCConstantExpr::create(offset, ctx);
|
||
return llvm::MCBinaryExpr::createAdd(expr, off, ctx);
|
||
}
|
||
|
||
public:
|
||
Symbolizer(llvm::MCContext &Ctx,
|
||
std::unique_ptr<llvm::MCRelocationInfo> RelInfo,
|
||
SymTable &symtable)
|
||
: MCSymbolizer(Ctx, std::move(RelInfo)), symTable(symtable) {}
|
||
|
||
virtual bool tryAddingSymbolicOperand(llvm::MCInst &Inst,
|
||
llvm::raw_ostream & /*cStream*/,
|
||
int64_t Value, uint64_t Address,
|
||
bool IsBranch, uint64_t Offset,
|
||
uint64_t /*InstSize*/) override {
|
||
if (Stage::Emit == symTable.getStage()) {
|
||
if (IsBranch) {
|
||
if (auto label = symTable.getPosLabel(Address)) {
|
||
Inst.addOperand(llvm::MCOperand::createExpr(createExpr(label)));
|
||
return true;
|
||
}
|
||
}
|
||
|
||
if (auto sym = symTable.getExternalSymbolRel(Address + Offset)) {
|
||
Inst.addOperand(llvm::MCOperand::createExpr(createExpr(sym, Value)));
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
virtual void tryAddingPcLoadReferenceComment(llvm::raw_ostream &cStream,
|
||
int64_t Value,
|
||
uint64_t /*Address*/) override {
|
||
if (Value >= 0) {
|
||
if (auto sym =
|
||
symTable.getExternalSymbolRel(static_cast<uint64_t>(Value))) {
|
||
cStream << sym->getName();
|
||
}
|
||
}
|
||
}
|
||
};
|
||
|
||
void processRelocations(SymTable &symTable, uint64_t offset,
|
||
const llvm::object::ObjectFile &object,
|
||
const llvm::object::SectionRef &sec) {
|
||
for (const auto &reloc : sec.relocations()) {
|
||
const auto symIt = reloc.getSymbol();
|
||
if (object.symbol_end() != symIt) {
|
||
const auto sym = *symIt;
|
||
auto relOffet = reloc.getOffset();
|
||
if (relOffet >= offset) {
|
||
symTable.addExternalSymbolRel(relOffet - offset,
|
||
llvm::cantFail(sym.getName()));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
void disassemble(const llvm::TargetMachine &tm,
|
||
const llvm::object::ObjectFile &object,
|
||
llvm::raw_ostream &os) {
|
||
auto &target = tm.getTarget();
|
||
|
||
auto mri = tm.getMCRegisterInfo();
|
||
auto mai = tm.getMCAsmInfo();
|
||
auto sti = tm.getMCSubtargetInfo();
|
||
auto mii = tm.getMCInstrInfo();
|
||
|
||
if (nullptr == mri || nullptr == mai || nullptr == sti || nullptr == mii) {
|
||
// TODO: proper error handling
|
||
return;
|
||
}
|
||
|
||
llvm::MCObjectFileInfo mofi;
|
||
llvm::MCContext ctx(mai, mri, &mofi);
|
||
#if LDC_LLVM_VER >= 600
|
||
mofi.InitMCObjectFileInfo(tm.getTargetTriple(), tm.isPositionIndependent(),
|
||
ctx, tm.getCodeModel() == llvm::CodeModel::Large);
|
||
#else
|
||
mofi.InitMCObjectFileInfo(tm.getTargetTriple(), tm.isPositionIndependent(),
|
||
tm.getCodeModel(), ctx);
|
||
#endif
|
||
|
||
auto disasm = unique(target.createMCDisassembler(*sti, ctx));
|
||
if (nullptr == disasm) {
|
||
return;
|
||
}
|
||
|
||
SymTable symTable(ctx);
|
||
disasm->setSymbolizer(llvm::make_unique<Symbolizer>(
|
||
ctx, llvm::make_unique<llvm::MCRelocationInfo>(ctx), symTable));
|
||
|
||
auto mcia = unique(target.createMCInstrAnalysis(mii));
|
||
if (nullptr == mcia) {
|
||
return;
|
||
}
|
||
|
||
auto mip = unique(
|
||
target.createMCInstPrinter(tm.getTargetTriple(), 0, *mai, *mii, *mri));
|
||
if (nullptr == mip) {
|
||
return;
|
||
}
|
||
|
||
llvm::MCTargetOptions opts;
|
||
auto mab = unique(target.createMCAsmBackend(
|
||
#if LDC_LLVM_VER >= 600
|
||
*sti, *mri, opts)
|
||
#else
|
||
*mri, tm.getTargetTriple().getTriple(), tm.getTargetCPU(), opts)
|
||
#endif
|
||
);
|
||
if (nullptr == mab) {
|
||
return;
|
||
}
|
||
|
||
// Streamer takes ownership of mip mab
|
||
auto asmStreamer = unique(target.createAsmStreamer(
|
||
ctx, llvm::make_unique<llvm::formatted_raw_ostream>(os), true, true,
|
||
mip.release(), nullptr,
|
||
#if LDC_LLVM_VER >= 700
|
||
std::move(mab),
|
||
#else
|
||
mab.release(),
|
||
#endif
|
||
false));
|
||
if (nullptr == asmStreamer) {
|
||
return;
|
||
}
|
||
|
||
asmStreamer->InitSections(false);
|
||
|
||
std::unordered_map<uint64_t, std::vector<uint64_t>> sectionsToProcess;
|
||
for (const auto &symbol : object.symbols()) {
|
||
const auto secIt = llvm::cantFail(symbol.getSection());
|
||
if (object.section_end() != secIt) {
|
||
auto offset = symbol.getValue();
|
||
sectionsToProcess[secIt->getIndex()].push_back(offset);
|
||
}
|
||
}
|
||
for (auto &sec : sectionsToProcess) {
|
||
auto &vec = sec.second;
|
||
std::sort(vec.begin(), vec.end());
|
||
auto end = std::unique(vec.begin(), vec.end());
|
||
vec.erase(end, vec.end());
|
||
}
|
||
|
||
for (const auto &symbol : object.symbols()) {
|
||
const auto name = llvm::cantFail(symbol.getName());
|
||
const auto secIt = llvm::cantFail(symbol.getSection());
|
||
if (object.section_end() != secIt) {
|
||
const auto sec = *secIt;
|
||
llvm::StringRef data;
|
||
sec.getContents(data);
|
||
|
||
if (llvm::object::SymbolRef::ST_Function ==
|
||
llvm::cantFail(symbol.getType())) {
|
||
symTable.reset();
|
||
symTable.addLabel(0, 0, name); // Function start
|
||
auto offset = symbol.getValue();
|
||
processRelocations(symTable, offset, object, sec);
|
||
|
||
// TODO: something more optimal
|
||
for (const auto &globalSec : object.sections()) {
|
||
if (globalSec.getRelocatedSection() == secIt) {
|
||
processRelocations(symTable, offset, object, globalSec);
|
||
}
|
||
}
|
||
auto size = data.size() - offset;
|
||
auto &ranges = sectionsToProcess[sec.getIndex()];
|
||
if (!ranges.empty()) {
|
||
for (std::size_t i = 0; i < ranges.size() - 1; ++i) {
|
||
if (ranges[i] == offset) {
|
||
size = std::min(size, ranges[i + 1] - offset);
|
||
}
|
||
}
|
||
}
|
||
llvm::ArrayRef<uint8_t> buff(
|
||
reinterpret_cast<const uint8_t *>(data.data() + offset), size);
|
||
|
||
printFunction(*disasm, *mcia, buff, symTable, *sti, *asmStreamer);
|
||
asmStreamer->EmitRawText("");
|
||
}
|
||
}
|
||
}
|
||
}
|