Drop support for LLVM 14

Mainly because opaque IR pointers appear buggy in LLVM 14 (LLVM
crashes when compiling druntime).
This commit is contained in:
Martin Kinkelin 2024-05-19 14:03:36 +02:00
parent 3067b7b115
commit 2cd14c59dd
30 changed files with 18 additions and 8934 deletions

View file

@ -105,7 +105,7 @@ jobs:
environment:
- PARALLELISM: 4
- CI_OS: linux
- LLVM_MAJOR: 14
- LLVM_MAJOR: 15
- HOST_LDC_VERSION: 1.24.0
- EXTRA_CMAKE_FLAGS: "-DMULTILIB=ON -DRT_SUPPORT_SANITIZERS=ON -DBUILD_LTO_LIBS=ON"
Ubuntu-20.04-sharedLibsOnly-gdmd:
@ -116,7 +116,7 @@ jobs:
environment:
- PARALLELISM: 4
- CI_OS: linux
- LLVM_MAJOR: 14
- LLVM_MAJOR: 15
- EXTRA_APT_PACKAGES: gdmd
- EXTRA_CMAKE_FLAGS: "-DBUILD_SHARED_LIBS=ON -DBUILD_LTO_LIBS=ON -DD_COMPILER=gdmd -DLDC_LINK_MANUALLY=ON"

View file

@ -36,11 +36,6 @@ jobs:
host_dc: dmd-beta
llvm_version: 15.0.6
cmake_flags: -DBUILD_SHARED_LIBS=ON -DRT_SUPPORT_SANITIZERS=ON -DLIB_SUFFIX=64 -DLDC_LINK_MANUALLY=ON
- job_name: macOS 11, LLVM 14, latest DMD beta
os: macos-11
host_dc: dmd-beta
llvm_version: 14.0.6
cmake_flags: -DBUILD_SHARED_LIBS=ON -DRT_SUPPORT_SANITIZERS=ON -DLDC_LINK_MANUALLY=ON -DCMAKE_CXX_COMPILER=/usr/bin/c++ -DCMAKE_C_COMPILER=/usr/bin/cc
name: ${{ matrix.job_name }}
runs-on: ${{ matrix.os }}
env:

View file

@ -3,7 +3,7 @@
#### Big news
#### Platform support
- Supports LLVM 14 - 18. Support for LLVM 11, 12 and 13 was dropped.
- Supports LLVM 15 - 18. Support for LLVM 11 - 14 was dropped. The CLI options `-passmanager` and `-opaque-pointers` were removed.
#### Bug fixes

View file

@ -34,7 +34,7 @@ endfunction()
# Locate LLVM.
#
find_package(LLVM 14.0 REQUIRED
find_package(LLVM 15.0 REQUIRED
all-targets analysis asmparser asmprinter bitreader bitwriter codegen core
debuginfodwarf debuginfomsf debuginfopdb demangle
instcombine ipo instrumentation irreader libdriver linker lto mc
@ -823,13 +823,8 @@ if (LDC_INSTALL_LLVM_RUNTIME_LIBS)
copy_compilerrt_lib("darwin/libclang_rt.xray-fdr_osx.a" "libldc_rt.xray-fdr.a" FALSE)
copy_compilerrt_lib("darwin/libclang_rt.xray-profiling_osx.a" "libldc_rt.xray-profiling.a" FALSE)
elseif(UNIX)
if(LDC_LLVM_VER LESS 1500)
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "linux")
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "x86_64")
else()
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "x86_64-unknown-linux-gnu")
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "")
endif()
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS "${LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT}" CACHE STRING "Non-Mac Posix: OS used as directory name for the compiler-rt source libraries, e.g., 'freebsd'.")
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH "${LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT}" CACHE STRING "Non-Mac Posix: architecture used as libname suffix for the compiler-rt source libraries, e.g., 'aarch64'.")
if(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH STREQUAL "")

View file

@ -36,7 +36,6 @@ set(llvm_config_names llvm-config-18.1 llvm-config181 llvm-config-18
llvm-config-17.0 llvm-config170 llvm-config-17
llvm-config-16.0 llvm-config160 llvm-config-16
llvm-config-15.0 llvm-config150 llvm-config-15
llvm-config-14.0 llvm-config140 llvm-config-14
llvm-config)
find_program(LLVM_CONFIG
NAMES ${llvm_config_names}
@ -49,12 +48,10 @@ if(APPLE)
NAMES ${llvm_config_names}
PATHS /opt/local/libexec/llvm-18/bin /opt/local/libexec/llvm-17/bin
/opt/local/libexec/llvm-16/bin /opt/local/libexec/llvm-15/bin
/opt/local/libexec/llvm-14/bin /opt/local/libexec/llvm-13/bin
/opt/local/libexec/llvm-12/bin /opt/local/libexec/llvm-11/bin
/opt/local/libexec/llvm/bin
/usr/local/opt/llvm@18/bin /usr/local/opt/llvm@17/bin
/usr/local/opt/llvm@16/bin /usr/local/opt/llvm@15/bin
/usr/local/opt/llvm@14/bin /usr/local/opt/llvm/bin
/usr/local/opt/llvm/bin
NO_DEFAULT_PATH)
endif()

View file

@ -561,10 +561,8 @@ void parseCommandLine(Strings &sourceFiles) {
// enforce opaque IR pointers
#if LDC_LLVM_VER >= 1700
// supports opaque IR pointers only
#elif LDC_LLVM_VER >= 1500
#else
getGlobalContext().setOpaquePointers(true);
#else // LLVM 14
getGlobalContext().enableOpaquePointers();
#endif
}

View file

@ -264,13 +264,7 @@ struct X86TargetABI : TargetABI {
// Keep alignment for LLVM 13+, to prevent invalid `movaps` etc.,
// but limit to 4 (required according to runnable/ldc_cabi1.d).
auto align4 = llvm::Align(4);
if (arg->attrs.getAlignment().
#if LDC_LLVM_VER >= 1500
value_or
#else
getValueOr
#endif
(align4) > align4)
if (arg->attrs.getAlignment().value_or(align4) > align4)
arg->attrs.addAlignmentAttr(align4);
}
}

View file

@ -1139,11 +1139,7 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) {
// function attributes
if (gABI->needsUnwindTables()) {
#if LDC_LLVM_VER >= 1500
func->setUWTableKind(llvm::UWTableKind::Default);
#else
func->addFnAttr(LLAttribute::UWTable);
#endif
}
if (opts::isAnySanitizerEnabled() &&
!opts::functionIsInSanitizerBlacklist(fd)) {

View file

@ -299,11 +299,7 @@ void addCoverageAnalysis(Module *m) {
ctor->setCallingConv(gABI->callingConv(LINK::d));
// Set function attributes. See functions.cpp:DtoDefineFunction()
if (global.params.targetTriple->getArch() == llvm::Triple::x86_64) {
#if LDC_LLVM_VER >= 1500
ctor->setUWTableKind(llvm::UWTableKind::Default);
#else
ctor->addFnAttr(LLAttribute::UWTable);
#endif
}
llvm::BasicBlock *bb = llvm::BasicBlock::Create(gIR->context(), "", ctor);
@ -404,11 +400,7 @@ void registerModuleInfo(Module *m) {
}
void addModuleFlags(llvm::Module &m) {
#if LDC_LLVM_VER >= 1500
const auto ModuleMinFlag = llvm::Module::Min;
#else
const auto ModuleMinFlag = llvm::Module::Warning; // Fallback value
#endif
if (opts::fCFProtection == opts::CFProtectionType::Return ||
opts::fCFProtection == opts::CFProtectionType::Full) {

View file

@ -507,20 +507,11 @@ llvm::CallInst *DtoInlineAsmExpr(const Loc &loc, llvm::StringRef code,
llvm::FunctionType *FT =
llvm::FunctionType::get(returnType, operandTypes, false);
#if LDC_LLVM_VER < 1500
// make sure the constraints are valid
if (!llvm::InlineAsm::Verify(FT, constraints)) {
error(loc, "inline asm constraints are invalid");
fatal();
}
#else
if (auto err = llvm::InlineAsm::verify(FT, constraints)) {
error(loc, "inline asm constraints are invalid");
llvm::errs() << err;
fatal();
}
#endif
// build asm call
bool sideeffect = true;

View file

@ -273,11 +273,7 @@ struct LazyFunctionDeclarer {
// FIXME: Move to better place (abi-x86-64.cpp?)
// NOTE: There are several occurances if this line.
if (global.params.targetTriple->getArch() == llvm::Triple::x86_64) {
#if LDC_LLVM_VER >= 1500
fn->setUWTableKind(llvm::UWTableKind::Default);
#else
fn->addFnAttr(LLAttribute::UWTable);
#endif
}
fn->setCallingConv(gABI->callingConv(dty, false));

View file

@ -19,8 +19,7 @@ else
static assert(false, "This module is only valid for LDC");
}
version (LDC_LLVM_1400) enum LLVM_version = 1400;
else version (LDC_LLVM_1500) enum LLVM_version = 1500;
version (LDC_LLVM_1500) enum LLVM_version = 1500;
else version (LDC_LLVM_1600) enum LLVM_version = 1600;
else version (LDC_LLVM_1700) enum LLVM_version = 1700;
else version (LDC_LLVM_1800) enum LLVM_version = 1800;

View file

@ -1,7 +1,5 @@
// Test basic use of sample-based PGO profile
// REQUIRES: atleast_llvm1500
// RUN: split-file %s %t
// RUN: %ldc -O2 -c -gline-tables-only -output-ll -of=%t.ll -fprofile-sample-use=%t/pgo-sample.prof %t/testcase.d && FileCheck %s < %t.ll

View file

@ -1,48 +1,44 @@
// https://github.com/ldc-developers/ldc/issues/3692
// REQUIRES: target_X86
// REQUIRES: atmost_llvm1409
// RUN: %ldc -mtriple=x86_64-linux-gnu -output-ll -of=%t.ll %s
// RUN: FileCheck %s < %t.ll
// D `int[3]` rewritten to LL `{ i64, i32 }` for SysV ABI - mismatching size and alignment
// CHECK: define void @_D6gh36924takeFG3iZv({ i64, i32 } %a_arg)
// CHECK-LABEL: define void @_D6gh36924takeFG3iZv({ i64, i32 } %a_arg)
void take(int[3] a)
{
// the `{ i64, i32 }` size is 16 bytes, so we need a padded alloca (with 8-bytes alignment)
// CHECK-NEXT: %.BaseBitcastABIRewrite_param_storage = alloca { i64, i32 }, align 8
// CHECK-NEXT: store { i64, i32 } %a_arg, { i64, i32 }* %.BaseBitcastABIRewrite_param_storage
// CHECK-NEXT: %a = bitcast { i64, i32 }* %.BaseBitcastABIRewrite_param_storage to [3 x i32]*
// CHECK-NEXT: = alloca { i64, i32 }, align 8
}
// CHECK: define void @_D6gh36924passFZv()
// CHECK-LABEL: define void @_D6gh36924passFZv()
void pass()
{
// CHECK-NEXT: %arrayliteral = alloca [3 x i32], align 4
// we need an extra padded alloca with proper alignment
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i32 }, align 8
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, { i64, i32 }* %.BaseBitcastABIRewrite_padded_arg_storage
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, {{\{ i64, i32 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
take([1, 2, 3]);
}
// D `int[4]` rewritten to LL `{ i64, i64 }` for SysV ABI - mismatching alignment only
// CHECK: define void @_D6gh36925take4FG4iZv({ i64, i64 } %a_arg)
// CHECK-LABEL: define void @_D6gh36925take4FG4iZv({ i64, i64 } %a_arg)
void take4(int[4] a)
{
// the alloca should have 8-bytes alignment, even though a.alignof == 4
// CHECK-NEXT: %a = alloca [4 x i32], align 8
// CHECK-NEXT: %1 = bitcast [4 x i32]* %a to { i64, i64 }*
// CHECK-NEXT: store { i64, i64 } %a_arg, { i64, i64 }* %1
// CHECK: store { i64, i64 } %a_arg, {{\{ i64, i64 \}\*|ptr}} %
}
// CHECK: define void @_D6gh36925pass4FZv()
// CHECK-LABEL: define void @_D6gh36925pass4FZv()
void pass4()
{
// CHECK-NEXT: %arrayliteral = alloca [4 x i32], align 4
// we need an extra alloca with 8-bytes alignment
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i64 }, align 8
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, { i64, i64 }* %.BaseBitcastABIRewrite_padded_arg_storage
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, {{\{ i64, i64 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
take4([1, 2, 3, 4]);
}

View file

@ -1,45 +0,0 @@
// https://github.com/ldc-developers/ldc/issues/3692
// REQUIRES: target_X86
// REQUIRES: atleast_llvm1500
// RUN: %ldc -mtriple=x86_64-linux-gnu -output-ll -of=%t.ll %s
// RUN: FileCheck %s < %t.ll
// D `int[3]` rewritten to LL `{ i64, i32 }` for SysV ABI - mismatching size and alignment
// CHECK-LABEL: define void @_D13gh3692_llvm154takeFG3iZv({ i64, i32 } %a_arg)
void take(int[3] a)
{
// the `{ i64, i32 }` size is 16 bytes, so we need a padded alloca (with 8-bytes alignment)
// CHECK-NEXT: = alloca { i64, i32 }, align 8
}
// CHECK-LABEL: define void @_D13gh3692_llvm154passFZv()
void pass()
{
// CHECK-NEXT: %arrayliteral = alloca [3 x i32], align 4
// we need an extra padded alloca with proper alignment
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i32 }, align 8
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, {{\{ i64, i32 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
take([1, 2, 3]);
}
// D `int[4]` rewritten to LL `{ i64, i64 }` for SysV ABI - mismatching alignment only
// CHECK-LABEL: define void @_D13gh3692_llvm155take4FG4iZv({ i64, i64 } %a_arg)
void take4(int[4] a)
{
// the alloca should have 8-bytes alignment, even though a.alignof == 4
// CHECK-NEXT: %a = alloca [4 x i32], align 8
// CHECK: store { i64, i64 } %a_arg, {{\{ i64, i64 \}\*|ptr}} %
}
// CHECK-LABEL: define void @_D13gh3692_llvm155pass4FZv()
void pass4()
{
// CHECK-NEXT: %arrayliteral = alloca [4 x i32], align 4
// we need an extra alloca with 8-bytes alignment
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i64 }, align 8
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, {{\{ i64, i64 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
take4([1, 2, 3, 4]);
}

File diff suppressed because it is too large Load diff

View file

@ -1,23 +0,0 @@
set(LLVM_LINK_COMPONENTS
AllTargetsDescs
AllTargetsDisassemblers
AllTargetsInfos
DebugInfoDWARF
Core
MC
IPO
MCDisassembler
Object
ProfileData
Support
Symbolize
)
add_llvm_tool(llvm-profgen
llvm-profgen.cpp
PerfReader.cpp
CSPreInliner.cpp
ProfiledBinary.cpp
ProfileGenerator.cpp
)

View file

@ -1,285 +0,0 @@
//===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "CSPreInliner.h"
#include "ProfiledBinary.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include <cstdint>
#include <queue>
#define DEBUG_TYPE "cs-preinliner"
using namespace llvm;
using namespace sampleprof;
STATISTIC(PreInlNumCSInlined,
"Number of functions inlined with context sensitive profile");
STATISTIC(PreInlNumCSNotInlined,
"Number of functions not inlined with context sensitive profile");
STATISTIC(PreInlNumCSInlinedHitMinLimit,
"Number of functions with FDO inline stopped due to min size limit");
STATISTIC(PreInlNumCSInlinedHitMaxLimit,
"Number of functions with FDO inline stopped due to max size limit");
STATISTIC(
PreInlNumCSInlinedHitGrowthLimit,
"Number of functions with FDO inline stopped due to growth size limit");
// The switches specify inline thresholds used in SampleProfileLoader inlining.
// TODO: the actual threshold to be tuned here because the size here is based
// on machine code not LLVM IR.
extern cl::opt<int> SampleHotCallSiteThreshold;
extern cl::opt<int> SampleColdCallSiteThreshold;
extern cl::opt<int> ProfileInlineGrowthLimit;
extern cl::opt<int> ProfileInlineLimitMin;
extern cl::opt<int> ProfileInlineLimitMax;
extern cl::opt<bool> SortProfiledSCC;
cl::opt<bool> EnableCSPreInliner(
"csspgo-preinliner", cl::Hidden, cl::init(true),
cl::desc("Run a global pre-inliner to merge context profile based on "
"estimated global top-down inline decisions"));
cl::opt<bool> UseContextCostForPreInliner(
"use-context-cost-for-preinliner", cl::Hidden, cl::init(true),
cl::desc("Use context-sensitive byte size cost for preinliner decisions"));
static cl::opt<bool> SamplePreInlineReplay(
"csspgo-replay-preinline", cl::Hidden, cl::init(false),
cl::desc(
"Replay previous inlining and adjust context profile accordingly"));
CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
uint64_t HotThreshold, uint64_t ColdThreshold)
: UseContextCost(UseContextCostForPreInliner),
// TODO: Pass in a guid-to-name map in order for
// ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
// as their profile context.
ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary),
HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {
// Set default preinliner hot/cold call site threshold tuned with CSSPGO.
// for good performance with reasonable profile size.
if (!SampleHotCallSiteThreshold.getNumOccurrences())
SampleHotCallSiteThreshold = 1500;
if (!SampleColdCallSiteThreshold.getNumOccurrences())
SampleColdCallSiteThreshold = 0;
}
std::vector<StringRef> CSPreInliner::buildTopDownOrder() {
std::vector<StringRef> Order;
ProfiledCallGraph ProfiledCG(ContextTracker);
// Now that we have a profiled call graph, construct top-down order
// by building up SCC and reversing SCC order.
scc_iterator<ProfiledCallGraph *> I = scc_begin(&ProfiledCG);
while (!I.isAtEnd()) {
auto Range = *I;
if (SortProfiledSCC) {
// Sort nodes in one SCC based on callsite hotness.
scc_member_iterator<ProfiledCallGraph *> SI(*I);
Range = *SI;
}
for (auto *Node : Range) {
if (Node != ProfiledCG.getEntryNode())
Order.push_back(Node->Name);
}
++I;
}
std::reverse(Order.begin(), Order.end());
return Order;
}
bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue,
const FunctionSamples *CallerSamples) {
assert(CallerSamples && "Expect non-null caller samples");
// Ideally we want to consider everything a function calls, but as far as
// context profile is concerned, only those frames that are children of
// current one in the trie is relavent. So we walk the trie instead of call
// targets from function profile.
ContextTrieNode *CallerNode =
ContextTracker.getContextFor(CallerSamples->getContext());
bool HasNewCandidate = false;
for (auto &Child : CallerNode->getAllChildContext()) {
ContextTrieNode *CalleeNode = &Child.second;
FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples();
if (!CalleeSamples)
continue;
// Call site count is more reliable, so we look up the corresponding call
// target profile in caller's context profile to retrieve call site count.
uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
uint64_t CallsiteCount = 0;
LineLocation Callsite = CalleeNode->getCallSiteLoc();
if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
auto It = TargetCounts.find(CalleeSamples->getName());
if (It != TargetCounts.end())
CallsiteCount = It->second;
}
// TODO: call site and callee entry count should be mostly consistent, add
// check for that.
HasNewCandidate = true;
uint32_t CalleeSize = getFuncSize(*CalleeSamples);
CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount),
CalleeSize);
}
return HasNewCandidate;
}
uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) {
if (UseContextCost) {
return Binary.getFuncSizeForContext(FSamples.getContext());
}
return FSamples.getBodySamples().size();
}
bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) {
// If replay inline is requested, simply follow the inline decision of the
// profiled binary.
if (SamplePreInlineReplay)
return Candidate.CalleeSamples->getContext().hasAttribute(
ContextWasInlined);
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
unsigned int SampleThreshold = SampleColdCallSiteThreshold;
if (Candidate.CallsiteCount > HotCountThreshold)
SampleThreshold = SampleHotCallSiteThreshold;
// TODO: for small cold functions, we may inlined them and we need to keep
// context profile accordingly.
if (Candidate.CallsiteCount < ColdCountThreshold)
SampleThreshold = SampleColdCallSiteThreshold;
return (Candidate.SizeCost < SampleThreshold);
}
void CSPreInliner::processFunction(const StringRef Name) {
FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name);
if (!FSamples)
return;
unsigned FuncSize = getFuncSize(*FSamples);
unsigned FuncFinalSize = FuncSize;
unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit;
SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
LLVM_DEBUG(dbgs() << "Process " << Name
<< " for context-sensitive pre-inlining (pre-inline size: "
<< FuncSize << ", size limit: " << SizeLimit << ")\n");
ProfiledCandidateQueue CQueue;
getInlineCandidates(CQueue, FSamples);
while (!CQueue.empty() && FuncFinalSize < SizeLimit) {
ProfiledInlineCandidate Candidate = CQueue.top();
CQueue.pop();
bool ShouldInline = false;
if ((ShouldInline = shouldInline(Candidate))) {
// We mark context as inlined as the corresponding context profile
// won't be merged into that function's base profile.
++PreInlNumCSInlined;
ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples);
Candidate.CalleeSamples->getContext().setAttribute(
ContextShouldBeInlined);
FuncFinalSize += Candidate.SizeCost;
getInlineCandidates(CQueue, Candidate.CalleeSamples);
} else {
++PreInlNumCSNotInlined;
}
LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined")
<< " context profile for: "
<< Candidate.CalleeSamples->getContext().toString()
<< " (callee size: " << Candidate.SizeCost
<< ", call count:" << Candidate.CallsiteCount << ")\n");
}
if (!CQueue.empty()) {
if (SizeLimit == (unsigned)ProfileInlineLimitMax)
++PreInlNumCSInlinedHitMaxLimit;
else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
++PreInlNumCSInlinedHitMinLimit;
else
++PreInlNumCSInlinedHitGrowthLimit;
}
LLVM_DEBUG({
if (!CQueue.empty())
dbgs() << " Inline candidates ignored due to size limit (inliner "
"original size: "
<< FuncSize << ", inliner final size: " << FuncFinalSize
<< ", size limit: " << SizeLimit << ")\n";
while (!CQueue.empty()) {
ProfiledInlineCandidate Candidate = CQueue.top();
CQueue.pop();
bool WasInlined =
Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
dbgs() << " " << Candidate.CalleeSamples->getContext().toString()
<< " (candidate size:" << Candidate.SizeCost
<< ", call count: " << Candidate.CallsiteCount << ", previously "
<< (WasInlined ? "inlined)\n" : "not inlined)\n");
}
});
}
void CSPreInliner::run() {
#ifndef NDEBUG
auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) {
dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles ("
<< Profiles.size() << " total):\n";
for (auto &It : Profiles) {
const FunctionSamples &Samples = It.second;
dbgs() << " [" << Samples.getContext().toString() << "] "
<< Samples.getTotalSamples() << ":" << Samples.getHeadSamples()
<< "\n";
}
};
#endif
LLVM_DEBUG(printProfileNames(ProfileMap, true));
// Execute global pre-inliner to estimate a global top-down inline
// decision and merge profiles accordingly. This helps with profile
// merge for ThinLTO otherwise we won't be able to merge profiles back
// to base profile across module/thin-backend boundaries.
// It also helps better compress context profile to control profile
// size, as we now only need context profile for functions going to
// be inlined.
for (StringRef FuncName : buildTopDownOrder()) {
processFunction(FuncName);
}
// Not inlined context profiles are merged into its base, so we can
// trim out such profiles from the output.
std::vector<SampleContext> ProfilesToBeRemoved;
for (auto &It : ProfileMap) {
SampleContext &Context = It.second.getContext();
if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) {
assert(Context.hasState(MergedContext) &&
"Not inlined context profile should be merged already");
ProfilesToBeRemoved.push_back(It.first);
}
}
for (auto &ContextName : ProfilesToBeRemoved) {
ProfileMap.erase(ContextName);
}
// Make sure ProfileMap's key is consistent with FunctionSamples' name.
SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles();
LLVM_DEBUG(printProfileNames(ProfileMap, false));
}

View file

@ -1,95 +0,0 @@
//===-- CSPreInliner.h - Profile guided preinliner ---------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
#define LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
#include "ProfiledBinary.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
#include "llvm/Transforms/IPO/SampleContextTracker.h"
using namespace llvm;
using namespace sampleprof;
namespace llvm {
namespace sampleprof {
// Inline candidate seen from profile
struct ProfiledInlineCandidate {
ProfiledInlineCandidate(const FunctionSamples *Samples, uint64_t Count,
uint32_t Size)
: CalleeSamples(Samples), CallsiteCount(Count), SizeCost(Size) {}
// Context-sensitive function profile for inline candidate
const FunctionSamples *CalleeSamples;
// Call site count for an inline candidate
// TODO: make sure entry count for context profile and call site
// target count for corresponding call are consistent.
uint64_t CallsiteCount;
// Size proxy for function under particular call context.
uint64_t SizeCost;
};
// Inline candidate comparer using call site weight
struct ProfiledCandidateComparer {
bool operator()(const ProfiledInlineCandidate &LHS,
const ProfiledInlineCandidate &RHS) {
if (LHS.CallsiteCount != RHS.CallsiteCount)
return LHS.CallsiteCount < RHS.CallsiteCount;
if (LHS.SizeCost != RHS.SizeCost)
return LHS.SizeCost > RHS.SizeCost;
// Tie breaker using GUID so we have stable/deterministic inlining order
assert(LHS.CalleeSamples && RHS.CalleeSamples &&
"Expect non-null FunctionSamples");
return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
}
};
using ProfiledCandidateQueue =
PriorityQueue<ProfiledInlineCandidate, std::vector<ProfiledInlineCandidate>,
ProfiledCandidateComparer>;
// Pre-compilation inliner based on context-sensitive profile.
// The PreInliner estimates inline decision using hotness from profile
// and cost estimation from machine code size. It helps merges context
// profile globally and achieves better post-inine profile quality, which
// otherwise won't be possible for ThinLTO. It also reduce context profile
// size by only keep context that is estimated to be inlined.
class CSPreInliner {
public:
CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
uint64_t HotThreshold, uint64_t ColdThreshold);
void run();
private:
bool getInlineCandidates(ProfiledCandidateQueue &CQueue,
const FunctionSamples *FCallerContextSamples);
std::vector<StringRef> buildTopDownOrder();
void processFunction(StringRef Name);
bool shouldInline(ProfiledInlineCandidate &Candidate);
uint32_t getFuncSize(const FunctionSamples &FSamples);
bool UseContextCost;
SampleContextTracker ContextTracker;
SampleProfileMap &ProfileMap;
ProfiledBinary &Binary;
// Count thresholds to answer isHotCount and isColdCount queries.
// Mirrors the threshold in ProfileSummaryInfo.
uint64_t HotCountThreshold;
uint64_t ColdCountThreshold;
};
} // end namespace sampleprof
} // end namespace llvm
#endif

View file

@ -1,59 +0,0 @@
//===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
#define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
#include "llvm/ProfileData/SampleProf.h"
#include <sstream>
#include <string>
#include <vector>
namespace llvm {
namespace sampleprof {
inline std::string getCallSite(const SampleContextFrame &Callsite) {
std::string CallsiteStr = Callsite.FuncName.str();
CallsiteStr += ":";
CallsiteStr += Twine(Callsite.Location.LineOffset).str();
if (Callsite.Location.Discriminator > 0) {
CallsiteStr += ".";
CallsiteStr += Twine(Callsite.Location.Discriminator).str();
}
return CallsiteStr;
}
// TODO: This operation is expansive. If it ever gets called multiple times we
// may think of making a class wrapper with internal states for it.
inline std::string getLocWithContext(const SampleContextFrameVector &Context) {
std::ostringstream OContextStr;
for (const auto &Callsite : Context) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << getCallSite(Callsite);
}
return OContextStr.str();
}
// Reverse call context, i.e., in the order of callee frames to caller frames,
// is useful during instruction printing or pseudo probe printing.
inline std::string
getReversedLocWithContext(const SampleContextFrameVector &Context) {
std::ostringstream OContextStr;
for (const auto &Callsite : reverse(Context)) {
if (OContextStr.str().size())
OContextStr << " @ ";
OContextStr << getCallSite(Callsite);
}
return OContextStr.str();
}
} // end namespace sampleprof
} // end namespace llvm
#endif

View file

@ -1,56 +0,0 @@
//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
#define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/WithColor.h"
#include <system_error>
using namespace llvm;
[[noreturn]] inline void exitWithError(const Twine &Message,
StringRef Whence = StringRef(),
StringRef Hint = StringRef()) {
WithColor::error(errs(), "llvm-profgen");
if (!Whence.empty())
errs() << Whence.str() << ": ";
errs() << Message << "\n";
if (!Hint.empty())
WithColor::note() << Hint.str() << "\n";
::exit(EXIT_FAILURE);
}
[[noreturn]] inline void exitWithError(std::error_code EC,
StringRef Whence = StringRef()) {
exitWithError(EC.message(), Whence);
}
[[noreturn]] inline void exitWithError(Error E, StringRef Whence) {
exitWithError(errorToErrorCode(std::move(E)), Whence);
}
template <typename T, typename... Ts>
T unwrapOrError(Expected<T> EO, Ts &&... Args) {
if (EO)
return std::move(*EO);
exitWithError(EO.takeError(), std::forward<Ts>(Args)...);
}
inline void emitWarningSummary(uint64_t Num, uint64_t Total, StringRef Msg) {
if (!Total || !Num)
return;
WithColor::warning() << format("%.2f", static_cast<double>(Num) * 100 / Total)
<< "%(" << Num << "/" << Total << ") " << Msg << "\n";
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,728 +0,0 @@
//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
#include "ErrorHandling.h"
#include "ProfiledBinary.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Regex.h"
#include <cstdint>
#include <fstream>
#include <list>
#include <map>
#include <vector>
using namespace llvm;
using namespace sampleprof;
namespace llvm {
namespace sampleprof {
// Stream based trace line iterator
class TraceStream {
std::string CurrentLine;
std::ifstream Fin;
bool IsAtEoF = false;
uint64_t LineNumber = 0;
public:
TraceStream(StringRef Filename) : Fin(Filename.str()) {
if (!Fin.good())
exitWithError("Error read input perf script file", Filename);
advance();
}
StringRef getCurrentLine() {
assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
return CurrentLine;
}
uint64_t getLineNumber() { return LineNumber; }
bool isAtEoF() { return IsAtEoF; }
// Read the next line
void advance() {
if (!std::getline(Fin, CurrentLine)) {
IsAtEoF = true;
return;
}
LineNumber++;
}
};
// The type of input format.
enum PerfFormat {
UnknownFormat = 0,
PerfData = 1, // Raw linux perf.data.
PerfScript = 2, // Perf script create by `perf script` command.
UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
};
// The type of perfscript content.
enum PerfContent {
UnknownContent = 0,
LBR = 1, // Only LBR sample.
LBRStack = 2, // Hybrid sample including call stack and LBR stack.
};
struct PerfInputFile {
std::string InputFile;
PerfFormat Format = PerfFormat::UnknownFormat;
PerfContent Content = PerfContent::UnknownContent;
};
// The parsed LBR sample entry.
struct LBREntry {
uint64_t Source = 0;
uint64_t Target = 0;
// An artificial branch stands for a series of consecutive branches starting
// from the current binary with a transition through external code and
// eventually landing back in the current binary.
bool IsArtificial = false;
LBREntry(uint64_t S, uint64_t T, bool I)
: Source(S), Target(T), IsArtificial(I) {}
#ifndef NDEBUG
void print() const {
dbgs() << "from " << format("%#010x", Source) << " to "
<< format("%#010x", Target);
if (IsArtificial)
dbgs() << " Artificial";
}
#endif
};
#ifndef NDEBUG
static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
for (size_t I = 0; I < LBRStack.size(); I++) {
dbgs() << "[" << I << "] ";
LBRStack[I].print();
dbgs() << "\n";
}
}
static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
for (size_t I = 0; I < CallStack.size(); I++) {
dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
}
}
#endif
// Hash interface for generic data of type T
// Data should implement a \fn getHashCode and a \fn isEqual
// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
// i.e we explicitly calculate hash of derived class, assign to base class's
// HashCode. This also provides the flexibility for calculating the hash code
// incrementally(like rolling hash) during frame stack unwinding since unwinding
// only changes the leaf of frame stack. \fn isEqual is a virtual function,
// which will have perf overhead. In the future, if we redesign a better hash
// function, then we can just skip this or switch to non-virtual function(like
// just ignore comparision if hash conflicts probabilities is low)
template <class T> class Hashable {
public:
std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> &D) : Data(D) {}
// Hash code generation
struct Hash {
uint64_t operator()(const Hashable<T> &Key) const {
// Don't make it virtual for getHashCode
uint64_t Hash = Key.Data->getHashCode();
assert(Hash && "Should generate HashCode for it!");
return Hash;
}
};
// Hash equal
struct Equal {
bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
// Precisely compare the data, vtable will have overhead.
return LHS.Data->isEqual(RHS.Data.get());
}
};
T *getPtr() const { return Data.get(); }
};
struct PerfSample {
// LBR stack recorded in FIFO order.
SmallVector<LBREntry, 16> LBRStack;
// Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
// generation
SmallVector<uint64_t, 16> CallStack;
virtual ~PerfSample() = default;
uint64_t getHashCode() const {
// Use simple DJB2 hash
auto HashCombine = [](uint64_t H, uint64_t V) {
return ((H << 5) + H) + V;
};
uint64_t Hash = 5381;
for (const auto &Value : CallStack) {
Hash = HashCombine(Hash, Value);
}
for (const auto &Entry : LBRStack) {
Hash = HashCombine(Hash, Entry.Source);
Hash = HashCombine(Hash, Entry.Target);
}
return Hash;
}
bool isEqual(const PerfSample *Other) const {
const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
if (CallStack.size() != OtherCallStack.size() ||
LBRStack.size() != OtherLBRStack.size())
return false;
if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
return false;
for (size_t I = 0; I < OtherLBRStack.size(); I++) {
if (LBRStack[I].Source != OtherLBRStack[I].Source ||
LBRStack[I].Target != OtherLBRStack[I].Target)
return false;
}
return true;
}
#ifndef NDEBUG
void print() const {
dbgs() << "LBR stack\n";
printLBRStack(LBRStack);
dbgs() << "Call stack\n";
printCallStack(CallStack);
}
#endif
};
// After parsing the sample, we record the samples by aggregating them
// into this counter. The key stores the sample data and the value is
// the sample repeat times.
using AggregatedCounter =
std::unordered_map<Hashable<PerfSample>, uint64_t,
Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
// The state for the unwinder, it doesn't hold the data but only keep the
// pointer/index of the data, While unwinding, the CallStack is changed
// dynamicially and will be recorded as the context of the sample
struct UnwindState {
// Profiled binary that current frame address belongs to
const ProfiledBinary *Binary;
// Call stack trie node
struct ProfiledFrame {
const uint64_t Address = DummyRoot;
ProfiledFrame *Parent;
SampleVector RangeSamples;
SampleVector BranchSamples;
std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
: Address(Addr), Parent(P) {}
ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
assert(Address && "Address can't be zero!");
auto Ret = Children.emplace(
Address, std::make_unique<ProfiledFrame>(Address, this));
return Ret.first->second.get();
}
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
}
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
}
bool isDummyRoot() { return Address == DummyRoot; }
bool isExternalFrame() { return Address == ExternalAddr; }
bool isLeafFrame() { return Children.empty(); }
};
ProfiledFrame DummyTrieRoot;
ProfiledFrame *CurrentLeafFrame;
// Used to fall through the LBR stack
uint32_t LBRIndex = 0;
// Reference to PerfSample.LBRStack
const SmallVector<LBREntry, 16> &LBRStack;
// Used to iterate the address range
InstructionPointer InstPtr;
UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
: Binary(Binary), LBRStack(Sample->LBRStack),
InstPtr(Binary, Sample->CallStack.front()) {
initFrameTrie(Sample->CallStack);
}
bool validateInitialState() {
uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
uint64_t LeafAddr = CurrentLeafFrame->Address;
assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
"External leading LBR should match the leaf frame.");
// When we take a stack sample, ideally the sampling distance between the
// leaf IP of stack and the last LBR target shouldn't be very large.
// Use a heuristic size (0x100) to filter out broken records.
if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
WithColor::warning() << "Bogus trace: stack tip = "
<< format("%#010x", LeafAddr)
<< ", LBR tip = " << format("%#010x\n", LBRLeaf);
return false;
}
return true;
}
void checkStateConsistency() {
assert(InstPtr.Address == CurrentLeafFrame->Address &&
"IP should align with context leaf");
}
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
bool IsLastLBR() const { return LBRIndex == 0; }
bool getLBRStackSize() const { return LBRStack.size(); }
void advanceLBR() { LBRIndex++; }
ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
void pushFrame(uint64_t Address) {
CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
}
void switchToFrame(uint64_t Address) {
if (CurrentLeafFrame->Address == Address)
return;
CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
}
void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
ProfiledFrame *Cur = &DummyTrieRoot;
for (auto Address : reverse(CallStack)) {
Cur = Cur->getOrCreateChildFrame(Address);
}
CurrentLeafFrame = Cur;
}
ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
};
// Base class for sample counter key with context
struct ContextKey {
uint64_t HashCode = 0;
virtual ~ContextKey() = default;
uint64_t getHashCode() {
if (HashCode == 0)
genHashCode();
return HashCode;
}
virtual void genHashCode() = 0;
virtual bool isEqual(const ContextKey *K) const {
return HashCode == K->HashCode;
};
// Utilities for LLVM-style RTTI
enum ContextKind { CK_StringBased, CK_ProbeBased };
const ContextKind Kind;
ContextKind getKind() const { return Kind; }
ContextKey(ContextKind K) : Kind(K){};
};
// String based context id
struct StringBasedCtxKey : public ContextKey {
SampleContextFrameVector Context;
bool WasLeafInlined;
StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
static bool classof(const ContextKey *K) {
return K->getKind() == CK_StringBased;
}
bool isEqual(const ContextKey *K) const override {
const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
return Context == Other->Context;
}
void genHashCode() override {
HashCode = hash_value(SampleContextFrames(Context));
}
};
// Probe based context key as the intermediate key of context
// String based context key will introduce redundant string handling
// since the callee context is inferred from the context string which
// need to be splitted by '@' to get the last location frame, so we
// can just use probe instead and generate the string in the end.
struct ProbeBasedCtxKey : public ContextKey {
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
static bool classof(const ContextKey *K) {
return K->getKind() == CK_ProbeBased;
}
bool isEqual(const ContextKey *K) const override {
const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
O->Probes.end());
}
void genHashCode() override {
for (const auto *P : Probes) {
HashCode = hash_combine(HashCode, P);
}
if (HashCode == 0) {
// Avoid zero value of HashCode when it's an empty list
HashCode = 1;
}
}
};
// The counter of branch samples for one function indexed by the branch,
// which is represented as the source and target offset pair.
using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
// The counter of range samples for one function indexed by the range,
// which is represented as the start and end offset pair.
using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
// Wrapper for sample counters including range counter and branch counter
struct SampleCounter {
RangeSample RangeCounter;
BranchSample BranchCounter;
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
assert(Start <= End && "Invalid instruction range");
RangeCounter[{Start, End}] += Repeat;
}
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
BranchCounter[{Source, Target}] += Repeat;
}
};
// Sample counter with context to support context-sensitive profile
using ContextSampleCounterMap =
std::unordered_map<Hashable<ContextKey>, SampleCounter,
Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
struct FrameStack {
SmallVector<uint64_t, 16> Stack;
ProfiledBinary *Binary;
FrameStack(ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
assert(!Cur->isExternalFrame() &&
"External frame's not expected for context stack.");
Stack.push_back(Cur->Address);
return true;
}
void popFrame() {
if (!Stack.empty())
Stack.pop_back();
}
std::shared_ptr<StringBasedCtxKey> getContextKey();
};
struct ProbeStack {
SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
ProfiledBinary *Binary;
ProbeStack(ProfiledBinary *B) : Binary(B) {}
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
assert(!Cur->isExternalFrame() &&
"External frame's not expected for context stack.");
const MCDecodedPseudoProbe *CallProbe =
Binary->getCallProbeForAddr(Cur->Address);
// We may not find a probe for a merged or external callsite.
// Callsite merging may cause the loss of original probe IDs.
// Cutting off the context from here since the inliner will
// not know how to consume a context with unknown callsites.
if (!CallProbe)
return false;
Stack.push_back(CallProbe);
return true;
}
void popFrame() {
if (!Stack.empty())
Stack.pop_back();
}
// Use pseudo probe based context key to get the sample counter
// A context stands for a call path from 'main' to an uninlined
// callee with all inline frames recovered on that path. The probes
// belonging to that call path is the probes either originated from
// the callee or from any functions inlined into the callee. Since
// pseudo probes are organized in a tri-tree style after decoded,
// the tree path from the tri-tree root (which is the uninlined
// callee) to the probe node forms an inline context.
// Here we use a list of probe(pointer) as the context key to speed up
// aggregation and the final context string will be generate in
// ProfileGenerator
std::shared_ptr<ProbeBasedCtxKey> getContextKey();
};
/*
As in hybrid sample we have a group of LBRs and the most recent sampling call
stack, we can walk through those LBRs to infer more call stacks which would be
used as context for profile. VirtualUnwinder is the class to do the call stack
unwinding based on LBR state. Two types of unwinding are processd here:
1) LBR unwinding and 2) linear range unwinding.
Specifically, for each LBR entry(can be classified into call, return, regular
branch), LBR unwinding will replay the operation by pushing, popping or
switching leaf frame towards the call stack and since the initial call stack
is most recently sampled, the replay should be in anti-execution order, i.e. for
the regular case, pop the call stack when LBR is call, push frame on call stack
when LBR is return. After each LBR processed, it also needs to align with the
next LBR by going through instructions from previous LBR's target to current
LBR's source, which is the linear unwinding. As instruction from linear range
can come from different function by inlining, linear unwinding will do the range
splitting and record counters by the range with same inline context. Over those
unwinding process we will record each call stack as context id and LBR/linear
range as sample counter for further CS profile generation.
*/
class VirtualUnwinder {
public:
VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
: CtxCounterMap(Counter), Binary(B) {}
bool unwind(const PerfSample *Sample, uint64_t Repeat);
std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
uint64_t NumTotalBranches = 0;
uint64_t NumExtCallBranch = 0;
uint64_t NumMissingExternalFrame = 0;
uint64_t NumMismatchedProEpiBranch = 0;
uint64_t NumMismatchedExtCallBranch = 0;
private:
bool isCallState(UnwindState &State) const {
// The tail call frame is always missing here in stack sample, we will
// use a specific tail call tracker to infer it.
return Binary->addressIsCall(State.getCurrentLBRSource());
}
bool isReturnState(UnwindState &State) const {
// Simply check addressIsReturn, as ret is always reliable, both for
// regular call and tail call.
if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
return false;
// In a callback case, a return from internal code, say A, to external
// runtime can happen. The external runtime can then call back to
// another internal routine, say B. Making an artificial branch that
// looks like a return from A to B can confuse the unwinder to treat
// the instruction before B as the call instruction. Here we detect this
// case if the return target is not the next inst of call inst, then we just
// do not treat it as a return.
uint64_t CallAddr =
Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget());
return (CallAddr != 0);
}
void unwindCall(UnwindState &State);
void unwindLinear(UnwindState &State, uint64_t Repeat);
void unwindReturn(UnwindState &State);
void unwindBranch(UnwindState &State);
template <typename T>
void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
// Collect each samples on trie node by DFS traversal
template <typename T>
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
uint64_t Repeat);
void recordBranchCount(const LBREntry &Branch, UnwindState &State,
uint64_t Repeat);
ContextSampleCounterMap *CtxCounterMap;
// Profiled binary that current frame address belongs to
ProfiledBinary *Binary;
// Keep track of all untracked callsites
std::set<uint64_t> UntrackedCallsites;
};
// Read perf trace to parse the events and samples.
class PerfReaderBase {
public:
PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
: Binary(B), PerfTraceFile(PerfTrace) {
// Initialize the base address to preferred address.
Binary->setBaseAddress(Binary->getPreferredBaseAddress());
};
virtual ~PerfReaderBase() = default;
static std::unique_ptr<PerfReaderBase> create(ProfiledBinary *Binary,
PerfInputFile &PerfInput);
// Entry of the reader to parse multiple perf traces
virtual void parsePerfTraces() = 0;
const ContextSampleCounterMap &getSampleCounters() const {
return SampleCounters;
}
bool profileIsCSFlat() { return ProfileIsCSFlat; }
protected:
ProfiledBinary *Binary = nullptr;
StringRef PerfTraceFile;
ContextSampleCounterMap SampleCounters;
bool ProfileIsCSFlat = false;
uint64_t NumTotalSample = 0;
uint64_t NumLeafExternalFrame = 0;
uint64_t NumLeadingOutgoingLBR = 0;
};
// Read perf script to parse the events and samples.
class PerfScriptReader : public PerfReaderBase {
public:
PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace)
: PerfReaderBase(B, PerfTrace){};
// Entry of the reader to parse multiple perf traces
virtual void parsePerfTraces() override;
// Generate perf script from perf data
static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
PerfInputFile &File);
// Extract perf script type by peaking at the input
static PerfContent checkPerfScriptType(StringRef FileName);
protected:
// The parsed MMap event
struct MMapEvent {
uint64_t PID = 0;
uint64_t Address = 0;
uint64_t Size = 0;
uint64_t Offset = 0;
StringRef BinaryPath;
};
// Check whether a given line is LBR sample
static bool isLBRSample(StringRef Line);
// Check whether a given line is MMAP event
static bool isMMap2Event(StringRef Line);
// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
// mapping between the binary name and its memory layout.
static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
MMapEvent &MMap);
// Update base address based on mmap events
void updateBinaryAddress(const MMapEvent &Event);
// Parse mmap event and update binary address
void parseMMap2Event(TraceStream &TraceIt);
// Parse perf events/samples and do aggregation
void parseAndAggregateTrace();
// Parse either an MMAP event or a perf sample
void parseEventOrSample(TraceStream &TraceIt);
// Warn if the relevant mmap event is missing.
void warnIfMissingMMap();
// Emit accumulate warnings.
void warnTruncatedStack();
// Warn if range is invalid.
void warnInvalidRange();
// Extract call stack from the perf trace lines
bool extractCallstack(TraceStream &TraceIt,
SmallVectorImpl<uint64_t> &CallStack);
// Extract LBR stack from one perf trace line
bool extractLBRStack(TraceStream &TraceIt,
SmallVectorImpl<LBREntry> &LBRStack);
uint64_t parseAggregatedCount(TraceStream &TraceIt);
// Parse one sample from multiple perf lines, override this for different
// sample type
void parseSample(TraceStream &TraceIt);
// An aggregated count is given to indicate how many times the sample is
// repeated.
virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
// Post process the profile after trace aggregation, we will do simple range
// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
virtual void generateUnsymbolizedProfile();
void writeUnsymbolizedProfile(StringRef Filename);
void writeUnsymbolizedProfile(raw_fd_ostream &OS);
// Samples with the repeating time generated by the perf reader
AggregatedCounter AggregatedSamples;
// Keep track of all invalid return addresses
std::set<uint64_t> InvalidReturnAddresses;
};
/*
The reader of LBR only perf script.
A typical LBR sample is like:
40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
... 0x4005c8/0x4005dc/P/-/-/0
*/
class LBRPerfReader : public PerfScriptReader {
public:
LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
: PerfScriptReader(Binary, PerfTrace){};
// Parse the LBR only sample.
virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
};
/*
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
which is used to generate CS profile. An example of hybrid sample:
4005dc # call stack leaf
400634
400684 # call stack root
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
*/
class HybridPerfReader : public PerfScriptReader {
public:
HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
: PerfScriptReader(Binary, PerfTrace){};
// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateUnsymbolizedProfile() override;
private:
// Unwind the hybrid samples after aggregration
void unwindSamples();
};
/*
Format of unsymbolized profile:
[frame1 @ frame2 @ ...] # If it's a CS profile
number of entries in RangeCounter
from_1-to_1:count_1
from_2-to_2:count_2
......
from_n-to_n:count_n
number of entries in BranchCounter
src_1->dst_1:count_1
src_2->dst_2:count_2
......
src_n->dst_n:count_n
[frame1 @ frame2 @ ...] # Next context
......
Note that non-CS profile doesn't have the empty `[]` context.
*/
class UnsymbolizedProfileReader : public PerfReaderBase {
public:
UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
: PerfReaderBase(Binary, PerfTrace){};
void parsePerfTraces() override;
private:
void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
void readUnsymbolizedProfile(StringRef Filename);
std::unordered_set<std::string> ContextStrSet;
};
} // end namespace sampleprof
} // end namespace llvm
#endif

View file

@ -1,979 +0,0 @@
//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ProfileGenerator.h"
#include "ErrorHandling.h"
#include "ProfiledBinary.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include <float.h>
#include <unordered_set>
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::Required,
cl::desc("Output profile file"));
static cl::alias OutputA("o", cl::desc("Alias for --output"),
cl::aliasopt(OutputFilename));
static cl::opt<SampleProfileFormat> OutputFormat(
"format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
cl::values(
clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
clEnumValN(SPF_Text, "text", "Text encoding"),
clEnumValN(SPF_GCC, "gcc",
"GCC encoding (only meaningful for -sample)")));
cl::opt<bool> UseMD5(
"use-md5", cl::init(false), cl::Hidden,
cl::desc("Use md5 to represent function names in the output profile (only "
"meaningful for -extbinary)"));
static cl::opt<bool> PopulateProfileSymbolList(
"populate-profile-symbol-list", cl::init(false), cl::Hidden,
cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
static cl::opt<bool> FillZeroForAllFuncs(
"fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
cl::desc("Attribute all functions' range with zero count "
"even it's not hit by any samples."));
static cl::opt<int32_t, true> RecursionCompression(
"compress-recursion",
cl::desc("Compressing recursion by deduplicating adjacent frame "
"sequences up to the specified size. -1 means no size limit."),
cl::Hidden,
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
static cl::opt<bool>
TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
cl::desc("If the total count of the profile is smaller "
"than threshold, it will be trimmed."));
static cl::opt<bool> CSProfMergeColdContext(
"csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
cl::desc("If the total count of context profile is smaller than "
"the threshold, it will be merged into context-less base "
"profile."));
static cl::opt<uint32_t> CSProfMaxColdContextDepth(
"csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
"context-less base profile"));
static cl::opt<int, true> CSProfMaxContextDepth(
"csprof-max-context-depth", cl::ZeroOrMore,
cl::desc("Keep the last K contexts while merging profile. -1 means no "
"depth limit."),
cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
static cl::opt<double> HotFunctionDensityThreshold(
"hot-function-density-threshold", llvm::cl::init(1000),
llvm::cl::desc(
"specify density threshold for hot functions (default: 1000)"),
llvm::cl::Optional);
static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
llvm::cl::desc("show profile density details"),
llvm::cl::Optional);
static cl::opt<bool> UpdateTotalSamples(
"update-total-samples", llvm::cl::init(false),
llvm::cl::desc(
"Update total samples by accumulating all its body samples."),
llvm::cl::Optional);
extern cl::opt<int> ProfileSummaryCutoffHot;
static cl::opt<bool> GenCSNestedProfile(
"gen-cs-nested-profile", cl::Hidden, cl::init(false),
cl::desc("Generate nested function profiles for CSSPGO"));
using namespace llvm;
using namespace sampleprof;
namespace llvm {
namespace sampleprof {
// Initialize the MaxCompressionSize to -1 which means no size limit
int32_t CSProfileGenerator::MaxCompressionSize = -1;
int CSProfileGenerator::MaxContextDepth = -1;
bool ProfileGeneratorBase::UseFSDiscriminator = false;
std::unique_ptr<ProfileGeneratorBase>
ProfileGeneratorBase::create(ProfiledBinary *Binary,
const ContextSampleCounterMap &SampleCounters,
bool ProfileIsCSFlat) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCSFlat) {
if (Binary->useFSDiscriminator())
exitWithError("FS discriminator is not supported in CS profile.");
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
} else {
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
}
ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
return Generator;
}
void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
SampleProfileMap &ProfileMap) {
// Populate profile symbol list if extended binary format is used.
ProfileSymbolList SymbolList;
if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
Binary->populateSymbolListFromDWARF(SymbolList);
Writer->setProfileSymbolList(&SymbolList);
}
if (std::error_code EC = Writer->write(ProfileMap))
exitWithError(std::move(EC));
}
void ProfileGeneratorBase::write() {
auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
if (std::error_code EC = WriterOrErr.getError())
exitWithError(EC, OutputFilename);
if (UseMD5) {
if (OutputFormat != SPF_Ext_Binary)
WithColor::warning() << "-use-md5 is ignored. Specify "
"--format=extbinary to enable it\n";
else
WriterOrErr.get()->setUseMD5();
}
write(std::move(WriterOrErr.get()), ProfileMap);
}
void ProfileGeneratorBase::showDensitySuggestion(double Density) {
if (Density == 0.0)
WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
"set too low. Please check your command.\n";
else if (Density < HotFunctionDensityThreshold)
WithColor::warning()
<< "AutoFDO is estimated to optimize better with "
<< format("%.1f", HotFunctionDensityThreshold / Density)
<< "x more samples. Please consider increasing sampling rate or "
"profiling for longer duration to get more samples.\n";
if (ShowDensity)
outs() << "Minimum profile density for hot functions with top "
<< format("%.2f",
static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
10000)
<< "% total samples: " << format("%.1f", Density) << "\n";
}
double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
uint64_t HotCntThreshold) {
double Density = DBL_MAX;
std::vector<const FunctionSamples *> HotFuncs;
for (auto &I : Profiles) {
auto &FuncSamples = I.second;
if (FuncSamples.getTotalSamples() < HotCntThreshold)
continue;
HotFuncs.emplace_back(&FuncSamples);
}
for (auto *FuncSamples : HotFuncs) {
auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
if (!Func)
continue;
uint64_t FuncSize = Func->getFuncSize();
if (FuncSize == 0)
continue;
Density =
std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
FuncSize);
}
return Density == DBL_MAX ? 0.0 : Density;
}
void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
const RangeSample &Ranges) {
/*
Regions may overlap with each other. Using the boundary info, find all
disjoint ranges and their sample count. BoundaryPoint contains the count
multiple samples begin/end at this points.
|<--100-->| Sample1
|<------200------>| Sample2
A B C
In the example above,
Sample1 begins at A, ends at B, its value is 100.
Sample2 beings at A, ends at C, its value is 200.
For A, BeginCount is the sum of sample begins at A, which is 300 and no
samples ends at A, so EndCount is 0.
Then boundary points A, B, and C with begin/end counts are:
A: (300, 0)
B: (0, 100)
C: (0, 200)
*/
struct BoundaryPoint {
// Sum of sample counts beginning at this point
uint64_t BeginCount = UINT64_MAX;
// Sum of sample counts ending at this point
uint64_t EndCount = UINT64_MAX;
// Is the begin point of a zero range.
bool IsZeroRangeBegin = false;
// Is the end point of a zero range.
bool IsZeroRangeEnd = false;
void addBeginCount(uint64_t Count) {
if (BeginCount == UINT64_MAX)
BeginCount = 0;
BeginCount += Count;
}
void addEndCount(uint64_t Count) {
if (EndCount == UINT64_MAX)
EndCount = 0;
EndCount += Count;
}
};
/*
For the above example. With boundary points, follwing logic finds two
disjoint region of
[A,B]: 300
[B+1,C]: 200
If there is a boundary point that both begin and end, the point itself
becomes a separate disjoint region. For example, if we have original
ranges of
|<--- 100 --->|
|<--- 200 --->|
A B C
there are three boundary points with their begin/end counts of
A: (100, 0)
B: (200, 100)
C: (0, 200)
the disjoint ranges would be
[A, B-1]: 100
[B, B]: 300
[B+1, C]: 200.
Example for zero value range:
|<--- 100 --->|
|<--- 200 --->|
|<--------------- 0 ----------------->|
A B C D E F
[A, B-1] : 0
[B, C] : 100
[C+1, D-1]: 0
[D, E] : 200
[E+1, F] : 0
*/
std::map<uint64_t, BoundaryPoint> Boundaries;
for (const auto &Item : Ranges) {
assert(Item.first.first <= Item.first.second &&
"Invalid instruction range");
auto &BeginPoint = Boundaries[Item.first.first];
auto &EndPoint = Boundaries[Item.first.second];
uint64_t Count = Item.second;
BeginPoint.addBeginCount(Count);
EndPoint.addEndCount(Count);
if (Count == 0) {
BeginPoint.IsZeroRangeBegin = true;
EndPoint.IsZeroRangeEnd = true;
}
}
// Use UINT64_MAX to indicate there is no existing range between BeginAddress
// and the next valid address
uint64_t BeginAddress = UINT64_MAX;
int ZeroRangeDepth = 0;
uint64_t Count = 0;
for (const auto &Item : Boundaries) {
uint64_t Address = Item.first;
const BoundaryPoint &Point = Item.second;
if (Point.BeginCount != UINT64_MAX) {
if (BeginAddress != UINT64_MAX)
DisjointRanges[{BeginAddress, Address - 1}] = Count;
Count += Point.BeginCount;
BeginAddress = Address;
ZeroRangeDepth += Point.IsZeroRangeBegin;
}
if (Point.EndCount != UINT64_MAX) {
assert((BeginAddress != UINT64_MAX) &&
"First boundary point cannot be 'end' point");
DisjointRanges[{BeginAddress, Address}] = Count;
assert(Count >= Point.EndCount && "Mismatched live ranges");
Count -= Point.EndCount;
BeginAddress = Address + 1;
ZeroRangeDepth -= Point.IsZeroRangeEnd;
// If the remaining count is zero and it's no longer in a zero range, this
// means we consume all the ranges before, thus mark BeginAddress as
// UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
// [<---- 10 ---->]
// [<---- 20 ---->]
// A B C D
// The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
// have the [B+1, C-1] zero range.
if (Count == 0 && ZeroRangeDepth == 0)
BeginAddress = UINT64_MAX;
}
}
}
void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
uint64_t Count) {
// Use the maximum count of samples with same line location
uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
// Use duplication factor to compensated for loop unroll/vectorization.
// Note that this is only needed when we're taking MAX of the counts at
// the location instead of SUM.
Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
ErrorOr<uint64_t> R =
FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
uint64_t PreviousCount = R ? R.get() : 0;
if (PreviousCount <= Count) {
FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
Count - PreviousCount);
}
}
void ProfileGeneratorBase::updateTotalSamples() {
if (!UpdateTotalSamples)
return;
for (auto &Item : ProfileMap) {
FunctionSamples &FunctionProfile = Item.second;
FunctionProfile.updateTotalSamples();
}
}
FunctionSamples &
ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
SampleContext Context(FuncName);
auto Ret = ProfileMap.emplace(Context, FunctionSamples());
if (Ret.second) {
FunctionSamples &FProfile = Ret.first->second;
FProfile.setContext(Context);
}
return Ret.first->second;
}
void ProfileGenerator::generateProfile() {
if (Binary->usePseudoProbes()) {
// TODO: Support probe based profile generation
exitWithError("Probe based profile generation not supported for AutoFDO, "
"consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
} else {
generateLineNumBasedProfile();
}
postProcessProfiles();
}
void ProfileGenerator::postProcessProfiles() {
computeSummaryAndThreshold();
trimColdProfiles(ProfileMap, ColdCountThreshold);
calculateAndShowDensity(ProfileMap);
}
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
uint64_t ColdCntThreshold) {
if (!TrimColdProfile)
return;
// Move cold profiles into a tmp container.
std::vector<SampleContext> ColdProfiles;
for (const auto &I : ProfileMap) {
if (I.second.getTotalSamples() < ColdCntThreshold)
ColdProfiles.emplace_back(I.first);
}
// Remove the cold profile from ProfileMap.
for (const auto &I : ColdProfiles)
ProfileMap.erase(I);
}
void ProfileGenerator::generateLineNumBasedProfile() {
assert(SampleCounters.size() == 1 &&
"Must have one entry for profile generation.");
const SampleCounter &SC = SampleCounters.begin()->second;
// Fill in function body samples
populateBodySamplesForAllFunctions(SC.RangeCounter);
// Fill in boundary sample counts as well as call site samples for calls
populateBoundarySamplesForAllFunctions(SC.BranchCounter);
updateTotalSamples();
}
FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
const SampleContextFrameVector &FrameVec, uint64_t Count) {
// Get top level profile
FunctionSamples *FunctionProfile =
&getTopLevelFunctionProfile(FrameVec[0].FuncName);
FunctionProfile->addTotalSamples(Count);
for (size_t I = 1; I < FrameVec.size(); I++) {
LineLocation Callsite(
FrameVec[I - 1].Location.LineOffset,
getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
FunctionSamplesMap &SamplesMap =
FunctionProfile->functionSamplesAt(Callsite);
auto Ret =
SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
if (Ret.second) {
SampleContext Context(FrameVec[I].FuncName);
Ret.first->second.setContext(Context);
}
FunctionProfile = &Ret.first->second;
FunctionProfile->addTotalSamples(Count);
}
return *FunctionProfile;
}
RangeSample
ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
if (FillZeroForAllFuncs) {
for (auto &FuncI : Binary->getAllBinaryFunctions()) {
for (auto &R : FuncI.second.Ranges) {
Ranges[{R.first, R.second - 1}] += 0;
}
}
} else {
// For each range, we search for all ranges of the function it belongs to
// and initialize it with zero count, so it remains zero if doesn't hit any
// samples. This is to be consistent with compiler that interpret zero count
// as unexecuted(cold).
for (const auto &I : RangeCounter) {
uint64_t StartOffset = I.first.first;
for (const auto &Range : Binary->getRangesForOffset(StartOffset))
Ranges[{Range.first, Range.second - 1}] += 0;
}
}
RangeSample DisjointRanges;
findDisjointRanges(DisjointRanges, Ranges);
return DisjointRanges;
}
void ProfileGenerator::populateBodySamplesForAllFunctions(
const RangeSample &RangeCounter) {
for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
InstructionPointer IP(Binary, RangeBegin, true);
// Disjoint ranges may have range in the middle of two instr,
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
// can be Addr1+1 to Addr2-1. We should ignore such range.
if (IP.Address > RangeEnd)
continue;
do {
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(Offset);
if (!FrameVec.empty()) {
// FIXME: As accumulating total count per instruction caused some
// regression, we changed to accumulate total count per byte as a
// workaround. Tuning hotness threshold on the compiler side might be
// necessary in the future.
FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
FrameVec, Count * Binary->getInstSize(Offset));
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
Count);
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
}
StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
// Get the function range by branch target if it's a call branch.
auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
// We won't accumulate sample count for a range whose start is not the real
// function entry such as outlined function or inner labels.
if (!FRange || !FRange->IsFuncEntry)
return StringRef();
return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
}
void ProfileGenerator::populateBoundarySamplesForAllFunctions(
const BranchSample &BranchCounters) {
for (const auto &Entry : BranchCounters) {
uint64_t SourceOffset = Entry.first.first;
uint64_t TargetOffset = Entry.first.second;
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
// Record called target sample and its count.
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(SourceOffset);
if (!FrameVec.empty()) {
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, 0);
FunctionProfile.addCalledTargetSamples(
FrameVec.back().Location.LineOffset,
getBaseDiscriminator(FrameVec.back().Location.Discriminator),
CalleeName, Count);
}
// Add head samples for callee.
FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
CalleeProfile.addHeadSamples(Count);
}
}
void ProfileGeneratorBase::calculateAndShowDensity(
const SampleProfileMap &Profiles) {
double Density = calculateDensity(Profiles, HotCountThreshold);
showDensitySuggestion(Density);
}
FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
const SampleContextFrameVector &Context, bool WasLeafInlined) {
auto I = ProfileMap.find(SampleContext(Context));
if (I == ProfileMap.end()) {
// Save the new context for future references.
SampleContextFrames NewContext = *Contexts.insert(Context).first;
SampleContext FContext(NewContext, RawContext);
auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
if (WasLeafInlined)
FContext.setAttribute(ContextWasInlined);
FunctionSamples &FProfile = Ret.first->second;
FProfile.setContext(FContext);
return Ret.first->second;
}
return I->second;
}
void CSProfileGenerator::generateProfile() {
FunctionSamples::ProfileIsCSFlat = true;
if (Binary->getTrackFuncContextSize())
computeSizeForProfiledFunctions();
if (Binary->usePseudoProbes()) {
// Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true;
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
}
postProcessProfiles();
}
void CSProfileGenerator::computeSizeForProfiledFunctions() {
// Hash map to deduplicate the function range and the item is a pair of
// function start and end offset.
std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
// Go through all the ranges in the CS counters, use the start of the range to
// look up the function it belongs and record the function range.
for (const auto &CI : SampleCounters) {
for (const auto &Item : CI.second.RangeCounter) {
// FIXME: Filter the bogus crossing function range.
uint64_t StartOffset = Item.first.first;
// Note that a function can be spilt into multiple ranges, so get all
// ranges of the function.
for (const auto &Range : Binary->getRangesForOffset(StartOffset))
AggregatedRanges[Range.first] = Range.second;
}
}
for (const auto &I : AggregatedRanges) {
uint64_t StartOffset = I.first;
uint64_t EndOffset = I.second;
Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
}
}
void CSProfileGenerator::generateLineNumBasedProfile() {
for (const auto &CI : SampleCounters) {
const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
// Get or create function profile for the range
FunctionSamples &FunctionProfile =
getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
// Fill in function body samples
populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
// Fill in boundary sample counts as well as call site samples for calls
populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
CI.second.BranchCounter);
}
// Fill in call site value sample for inlined calls and also use context to
// infer missing samples. Since we don't have call count for inlined
// functions, we estimate it from inlinee's profile using the entry of the
// body sample.
populateInferredFunctionSamples();
updateTotalSamples();
}
void CSProfileGenerator::populateBodySamplesForFunction(
FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
// Compute disjoint ranges first, so we can use MAX
// for calculating count for each location.
RangeSample Ranges;
findDisjointRanges(Ranges, RangeCounter);
for (const auto &Range : Ranges) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
// Disjoint ranges have introduce zero-filled gap that
// doesn't belong to current context, filter them out.
if (Count == 0)
continue;
InstructionPointer IP(Binary, RangeBegin, true);
// Disjoint ranges may have range in the middle of two instr,
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
// can be Addr1+1 to Addr2-1. We should ignore such range.
if (IP.Address > RangeEnd)
continue;
do {
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
if (LeafLoc.hasValue()) {
// Recording body sample for this specific context
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
FunctionProfile.addTotalSamples(Count);
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
}
void CSProfileGenerator::populateBoundarySamplesForFunction(
SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
const BranchSample &BranchCounters) {
for (const auto &Entry : BranchCounters) {
uint64_t SourceOffset = Entry.first.first;
uint64_t TargetOffset = Entry.first.second;
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
// Record called target sample and its count
auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
if (!LeafLoc.hasValue())
continue;
FunctionProfile.addCalledTargetSamples(
LeafLoc->Location.LineOffset,
getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
Count);
// Record head sample for called target(callee)
SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
"Leaf function name doesn't match");
CalleeCtx.back() = *LeafLoc;
CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
CalleeProfile.addHeadSamples(Count);
}
}
static SampleContextFrame
getCallerContext(SampleContextFrames CalleeContext,
SampleContextFrameVector &CallerContext) {
assert(CalleeContext.size() > 1 && "Unexpected empty context");
CalleeContext = CalleeContext.drop_back();
CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
SampleContextFrame CallerFrame = CallerContext.back();
CallerContext.back().Location = LineLocation(0, 0);
return CallerFrame;
}
void CSProfileGenerator::populateInferredFunctionSamples() {
for (const auto &Item : ProfileMap) {
const auto &CalleeContext = Item.first;
const FunctionSamples &CalleeProfile = Item.second;
// If we already have head sample counts, we must have value profile
// for call sites added already. Skip to avoid double counting.
if (CalleeProfile.getHeadSamples())
continue;
// If we don't have context, nothing to do for caller's call site.
// This could happen for entry point function.
if (CalleeContext.isBaseContext())
continue;
// Infer Caller's frame loc and context ID through string splitting
SampleContextFrameVector CallerContextId;
SampleContextFrame &&CallerLeafFrameLoc =
getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
SampleContextFrames CallerContext(CallerContextId);
// It's possible that we haven't seen any sample directly in the caller,
// in which case CallerProfile will not exist. But we can't modify
// ProfileMap while iterating it.
// TODO: created function profile for those callers too
if (ProfileMap.find(CallerContext) == ProfileMap.end())
continue;
FunctionSamples &CallerProfile = ProfileMap[CallerContext];
// Since we don't have call count for inlined functions, we
// estimate it from inlinee's profile using entry body sample.
uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
// If we don't have samples with location, use 1 to indicate live.
if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
EstimatedCallCount = 1;
CallerProfile.addCalledTargetSamples(
CallerLeafFrameLoc.Location.LineOffset,
CallerLeafFrameLoc.Location.Discriminator,
CalleeProfile.getContext().getName(), EstimatedCallCount);
CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
CallerLeafFrameLoc.Location.Discriminator,
EstimatedCallCount);
CallerProfile.addTotalSamples(EstimatedCallCount);
}
}
void CSProfileGenerator::postProcessProfiles() {
// Compute hot/cold threshold based on profile. This will be used for cold
// context profile merging/trimming.
computeSummaryAndThreshold();
// Run global pre-inliner to adjust/merge context profile based on estimated
// inline decisions.
if (EnableCSPreInliner) {
CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
.run();
// Turn off the profile merger by default unless it is explicitly enabled.
if (!CSProfMergeColdContext.getNumOccurrences())
CSProfMergeColdContext = false;
}
// Trim and merge cold context profile using cold threshold above.
if (TrimColdProfile || CSProfMergeColdContext) {
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
CSProfMaxColdContextDepth, EnableCSPreInliner);
}
// Merge function samples of CS profile to calculate profile density.
sampleprof::SampleProfileMap ContextLessProfiles;
for (const auto &I : ProfileMap) {
ContextLessProfiles[I.second.getName()].merge(I.second);
}
calculateAndShowDensity(ContextLessProfiles);
if (GenCSNestedProfile) {
CSProfileConverter CSConverter(ProfileMap);
CSConverter.convertProfiles();
FunctionSamples::ProfileIsCSFlat = false;
FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
}
}
void ProfileGeneratorBase::computeSummaryAndThreshold() {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
(Summary->getDetailedSummary()));
ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
(Summary->getDetailedSummary()));
}
// Helper function to extract context prefix string stack
// Extract context stack for reusing, leaf context stack will
// be added compressed while looking up function profile
static void extractPrefixContextStack(
SampleContextFrameVector &ContextStack,
const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
ProfiledBinary *Binary) {
for (const auto *P : Probes) {
Binary->getInlineContextForProbe(P, ContextStack, true);
}
}
void CSProfileGenerator::generateProbeBasedProfile() {
for (const auto &CI : SampleCounters) {
const ProbeBasedCtxKey *CtxKey =
dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
SampleContextFrameVector ContextStack;
extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
// Fill in function body samples from probes, also infer caller's samples
// from callee's probe
populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
// Fill in boundary samples for a call probe
populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
}
}
void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
ProbeCounterMap &ProbeCounter) {
RangeSample Ranges;
findDisjointRanges(Ranges, RangeCounter);
for (const auto &Range : Ranges) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
// Disjoint ranges have introduce zero-filled gap that
// doesn't belong to current context, filter them out.
if (Count == 0)
continue;
InstructionPointer IP(Binary, RangeBegin, true);
// Disjoint ranges may have range in the middle of two instr,
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
// can be Addr1+1 to Addr2-1. We should ignore such range.
if (IP.Address > RangeEnd)
continue;
do {
const AddressProbesMap &Address2ProbesMap =
Binary->getAddress2ProbesMap();
auto It = Address2ProbesMap.find(IP.Address);
if (It != Address2ProbesMap.end()) {
for (const auto &Probe : It->second) {
if (!Probe.isBlock())
continue;
ProbeCounter[&Probe] += Count;
}
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
}
void CSProfileGenerator::populateBodySamplesWithProbes(
const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
ProbeCounterMap ProbeCounter;
// Extract the top frame probes by looking up each address among the range in
// the Address2ProbeMap
extractProbesFromRange(RangeCounter, ProbeCounter);
std::unordered_map<MCDecodedPseudoProbeInlineTree *,
std::unordered_set<FunctionSamples *>>
FrameSamples;
for (const auto &PI : ProbeCounter) {
const MCDecodedPseudoProbe *Probe = PI.first;
uint64_t Count = PI.second;
FunctionSamples &FunctionProfile =
getFunctionProfileForLeafProbe(ContextStack, Probe);
// Record the current frame and FunctionProfile whenever samples are
// collected for non-danglie probes. This is for reporting all of the
// zero count probes of the frame later.
FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
FunctionProfile.addTotalSamples(Count);
if (Probe->isEntry()) {
FunctionProfile.addHeadSamples(Count);
// Look up for the caller's function profile
const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
SampleContextFrames CalleeContextId =
FunctionProfile.getContext().getContextFrames();
if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
// Since the context id will be compressed, we have to use callee's
// context id to infer caller's context id to ensure they share the
// same context prefix.
SampleContextFrameVector CallerContextId;
SampleContextFrame &&CallerLeafFrameLoc =
getCallerContext(CalleeContextId, CallerContextId);
uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
assert(CallerIndex &&
"Inferred caller's location index shouldn't be zero!");
FunctionSamples &CallerProfile =
getFunctionProfileForContext(CallerContextId);
CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
CallerProfile.addBodySamples(CallerIndex, 0, Count);
CallerProfile.addTotalSamples(Count);
CallerProfile.addCalledTargetSamples(
CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
}
}
}
// Assign zero count for remaining probes without sample hits to
// differentiate from probes optimized away, of which the counts are unknown
// and will be inferred by the compiler.
for (auto &I : FrameSamples) {
for (auto *FunctionProfile : I.second) {
for (auto *Probe : I.first->getProbes()) {
FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
}
}
}
}
void CSProfileGenerator::populateBoundarySamplesWithProbes(
const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
for (const auto &BI : BranchCounter) {
uint64_t SourceOffset = BI.first.first;
uint64_t TargetOffset = BI.first.second;
uint64_t Count = BI.second;
uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
const MCDecodedPseudoProbe *CallProbe =
Binary->getCallProbeForAddr(SourceAddress);
if (CallProbe == nullptr)
continue;
FunctionSamples &FunctionProfile =
getFunctionProfileForLeafProbe(ContextStack, CallProbe);
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
FunctionProfile.addTotalSamples(Count);
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
Count);
}
}
FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
// Explicitly copy the context for appending the leaf context
SampleContextFrameVector NewContextStack(ContextStack.begin(),
ContextStack.end());
Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
// For leaf inlined context with the top frame, we should strip off the top
// frame's probe id, like:
// Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
auto LeafFrame = NewContextStack.back();
LeafFrame.Location = LineLocation(0, 0);
NewContextStack.pop_back();
// Compress the context string except for the leaf frame
CSProfileGenerator::compressRecursionContext(NewContextStack);
CSProfileGenerator::trimContext(NewContextStack);
NewContextStack.push_back(LeafFrame);
const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
FunctionSamples &FunctionProile =
getFunctionProfileForContext(NewContextStack, WasLeafInlined);
FunctionProile.setFunctionHash(FuncDesc->FuncHash);
return FunctionProile;
}
} // end namespace sampleprof
} // end namespace llvm

View file

@ -1,312 +0,0 @@
//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
#include "CSPreInliner.h"
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfiledBinary.h"
#include "llvm/ProfileData/SampleProfWriter.h"
#include <memory>
#include <unordered_set>
using namespace llvm;
using namespace sampleprof;
namespace llvm {
namespace sampleprof {
// This base class for profile generation of sample-based PGO. We reuse all
// structures relating to function profiles and profile writers as seen in
// /ProfileData/SampleProf.h.
class ProfileGeneratorBase {
public:
ProfileGeneratorBase(ProfiledBinary *Binary,
const ContextSampleCounterMap &Counters)
: Binary(Binary), SampleCounters(Counters){};
virtual ~ProfileGeneratorBase() = default;
static std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
bool ProfileIsCSFlat);
virtual void generateProfile() = 0;
void write();
static uint32_t
getDuplicationFactor(unsigned Discriminator,
bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
return UseFSD ? 1
: llvm::DILocation::getDuplicationFactorFromDiscriminator(
Discriminator);
}
static uint32_t
getBaseDiscriminator(unsigned Discriminator,
bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
return UseFSD ? Discriminator
: DILocation::getBaseDiscriminatorFromDiscriminator(
Discriminator, /* IsFSDiscriminator */ false);
}
static bool UseFSDiscriminator;
protected:
// Use SampleProfileWriter to serialize profile map
void write(std::unique_ptr<SampleProfileWriter> Writer,
SampleProfileMap &ProfileMap);
/*
For each region boundary point, mark if it is begin or end (or both) of
the region. Boundary points are inclusive. Log the sample count as well
so we can use it when we compute the sample count of each disjoint region
later. Note that there might be multiple ranges with different sample
count that share same begin/end point. We need to accumulate the sample
count for the boundary point for such case, because for the example
below,
|<--100-->|
|<------200------>|
A B C
sample count for disjoint region [A,B] would be 300.
*/
void findDisjointRanges(RangeSample &DisjointRanges,
const RangeSample &Ranges);
// Helper function for updating body sample for a leaf location in
// FunctionProfile
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
const SampleContextFrame &LeafLoc,
uint64_t Count);
void updateTotalSamples();
StringRef getCalleeNameForOffset(uint64_t TargetOffset);
void computeSummaryAndThreshold();
void calculateAndShowDensity(const SampleProfileMap &Profiles);
double calculateDensity(const SampleProfileMap &Profiles,
uint64_t HotCntThreshold);
void showDensitySuggestion(double Density);
// Thresholds from profile summary to answer isHotCount/isColdCount queries.
uint64_t HotCountThreshold;
uint64_t ColdCountThreshold;
// Used by SampleProfileWriter
SampleProfileMap ProfileMap;
ProfiledBinary *Binary = nullptr;
const ContextSampleCounterMap &SampleCounters;
};
class ProfileGenerator : public ProfileGeneratorBase {
public:
ProfileGenerator(ProfiledBinary *Binary,
const ContextSampleCounterMap &Counters)
: ProfileGeneratorBase(Binary, Counters){};
void generateProfile() override;
private:
void generateLineNumBasedProfile();
RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName);
// Helper function to get the leaf frame's FunctionProfile by traversing the
// inline stack and meanwhile it adds the total samples for each frame's
// function profile.
FunctionSamples &
getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
uint64_t Count);
void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
void
populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
void postProcessProfiles();
void trimColdProfiles(const SampleProfileMap &Profiles,
uint64_t ColdCntThreshold);
};
using ProbeCounterMap =
std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
class CSProfileGenerator : public ProfileGeneratorBase {
public:
CSProfileGenerator(ProfiledBinary *Binary,
const ContextSampleCounterMap &Counters)
: ProfileGeneratorBase(Binary, Counters){};
void generateProfile() override;
// Trim the context stack at a given depth.
template <typename T>
static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
return;
std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
S.begin());
S.resize(Depth);
}
// Remove adjacent repeated context sequences up to a given sequence length,
// -1 means no size limit. Note that repeated sequences are identified based
// on the exact call site, this is finer granularity than function recursion.
template <typename T>
static void compressRecursionContext(SmallVectorImpl<T> &Context,
int32_t CSize = MaxCompressionSize) {
uint32_t I = 1;
uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
uint32_t MaxDedupSize =
CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
auto BeginIter = Context.begin();
// Use an in-place algorithm to save memory copy
// End indicates the end location of current iteration's data
uint32_t End = 0;
// Deduplicate from length 1 to the max possible size of a repeated
// sequence.
while (I <= MaxDedupSize) {
// This is a linear algorithm that deduplicates adjacent repeated
// sequences of size I. The deduplication detection runs on a sliding
// window whose size is 2*I and it keeps sliding the window to deduplicate
// the data inside. Once duplication is detected, deduplicate it by
// skipping the right half part of the window, otherwise just copy back
// the new one by appending them at the back of End pointer(for the next
// iteration).
//
// For example:
// Input: [a1, a2, b1, b2]
// (Added index to distinguish the same char, the origin is [a, a, b,
// b], the size of the dedup window is 2(I = 1) at the beginning)
//
// 1) The initial status is a dummy window[null, a1], then just copy the
// right half of the window(End = 0), then slide the window.
// Result: [a1], a2, b1, b2 (End points to the element right before ],
// after ] is the data of the previous iteration)
//
// 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
// the window i.e the duplication happen. Only slide the window.
// Result: [a1], a2, b1, b2
//
// 3) Next window is [a2, b1], copy the right half of the window(b1 is
// new) to the End and slide the window.
// Result: [a1, b1], b1, b2
//
// 4) Next window is [b1, b2], same to 2), skip b2.
// Result: [a1, b1], b1, b2
// After resize, it will be [a, b]
// Use pointers like below to do comparison inside the window
// [a b c a b c]
// | | | | |
// LeftBoundary Left Right Left+I Right+I
// A duplication found if Left < LeftBoundry.
int32_t Right = I - 1;
End = I;
int32_t LeftBoundary = 0;
while (Right + I < Context.size()) {
// To avoids scanning a part of a sequence repeatedly, it finds out
// the common suffix of two hald in the window. The common suffix will
// serve as the common prefix of next possible pair of duplicate
// sequences. The non-common part will be ignored and never scanned
// again.
// For example.
// Input: [a, b1], c1, b2, c2
// I = 2
//
// 1) For the window [a, b1, c1, b2], non-common-suffix for the right
// part is 'c1', copy it and only slide the window 1 step.
// Result: [a, b1, c1], b2, c2
//
// 2) Next window is [b1, c1, b2, c2], so duplication happen.
// Result after resize: [a, b, c]
int32_t Left = Right;
while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
// Find the longest suffix inside the window. When stops, Left points
// at the diverging point in the current sequence.
Left--;
}
bool DuplicationFound = (Left < LeftBoundary);
// Don't need to recheck the data before Right
LeftBoundary = Right + 1;
if (DuplicationFound) {
// Duplication found, skip right half of the window.
Right += I;
} else {
// Copy the non-common-suffix part of the adjacent sequence.
std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
BeginIter + End);
End += Left + I - Right;
// Only slide the window by the size of non-common-suffix
Right = Left + I;
}
}
// Don't forget the remaining part that's not scanned.
std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
End += Context.size() - Right - 1;
I++;
Context.resize(End);
MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
}
}
private:
void generateLineNumBasedProfile();
// Lookup or create FunctionSamples for the context
FunctionSamples &
getFunctionProfileForContext(const SampleContextFrameVector &Context,
bool WasLeafInlined = false);
// For profiled only functions, on-demand compute their inline context
// function byte size which is used by the pre-inliner.
void computeSizeForProfiledFunctions();
// Post processing for profiles before writing out, such as mermining
// and trimming cold profiles, running preinliner on profiles.
void postProcessProfiles();
void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
const RangeSample &RangeCounters);
void populateBoundarySamplesForFunction(SampleContextFrames ContextId,
FunctionSamples &FunctionProfile,
const BranchSample &BranchCounters);
void populateInferredFunctionSamples();
void generateProbeBasedProfile();
// Go through each address from range to extract the top frame probe by
// looking up in the Address2ProbeMap
void extractProbesFromRange(const RangeSample &RangeCounter,
ProbeCounterMap &ProbeCounter);
// Fill in function body samples from probes
void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
SampleContextFrames ContextStack);
// Fill in boundary samples for a call probe
void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
SampleContextFrames ContextStack);
// Helper function to get FunctionSamples for the leaf probe
FunctionSamples &
getFunctionProfileForLeafProbe(SampleContextFrames ContextStack,
const MCDecodedPseudoProbe *LeafProbe);
// Underlying context table serves for sample profile writer.
std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
public:
// Deduplicate adjacent repeated context sequences up to a given sequence
// length. -1 means no size limit.
static int32_t MaxCompressionSize;
static int MaxContextDepth;
};
} // end namespace sampleprof
} // end namespace llvm
#endif

View file

@ -1,790 +0,0 @@
//===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ProfiledBinary.h"
#include "ErrorHandling.h"
#include "ProfileGenerator.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/TargetSelect.h"
#define DEBUG_TYPE "load-binary"
using namespace llvm;
using namespace sampleprof;
cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", cl::init(false),
cl::ZeroOrMore,
cl::desc("Print disassembled code."));
cl::opt<bool> ShowSourceLocations("show-source-locations", cl::init(false),
cl::ZeroOrMore,
cl::desc("Print source locations."));
static cl::opt<bool>
ShowCanonicalFnName("show-canonical-fname", cl::init(false), cl::ZeroOrMore,
cl::desc("Print canonical function name."));
static cl::opt<bool> ShowPseudoProbe(
"show-pseudo-probe", cl::init(false), cl::ZeroOrMore,
cl::desc("Print pseudo probe section and disassembled info."));
static cl::opt<bool> UseDwarfCorrelation(
"use-dwarf-correlation", cl::init(false), cl::ZeroOrMore,
cl::desc("Use dwarf for profile correlation even when binary contains "
"pseudo probe."));
static cl::list<std::string> DisassembleFunctions(
"disassemble-functions", cl::CommaSeparated,
cl::desc("List of functions to print disassembly for. Accept demangled "
"names only. Only work with show-disassembly-only"));
extern cl::opt<bool> ShowDetailedWarning;
namespace llvm {
namespace sampleprof {
static const Target *getTarget(const ObjectFile *Obj) {
Triple TheTriple = Obj->makeTriple();
std::string Error;
std::string ArchName;
const Target *TheTarget =
TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
if (!TheTarget)
exitWithError(Error, Obj->getFileName());
return TheTarget;
}
void BinarySizeContextTracker::addInstructionForContext(
const SampleContextFrameVector &Context, uint32_t InstrSize) {
ContextTrieNode *CurNode = &RootContext;
bool IsLeaf = true;
for (const auto &Callsite : reverse(Context)) {
StringRef CallerName = Callsite.FuncName;
LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
IsLeaf = false;
}
CurNode->addFunctionSize(InstrSize);
}
uint32_t
BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) {
ContextTrieNode *CurrNode = &RootContext;
ContextTrieNode *PrevNode = nullptr;
SampleContextFrames Frames = Context.getContextFrames();
int32_t I = Frames.size() - 1;
Optional<uint32_t> Size;
// Start from top-level context-less function, traverse down the reverse
// context trie to find the best/longest match for given context, then
// retrieve the size.
while (CurrNode && I >= 0) {
// Process from leaf function to callers (added to context).
const auto &ChildFrame = Frames[I--];
PrevNode = CurrNode;
CurrNode =
CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName);
if (CurrNode && CurrNode->getFunctionSize().hasValue())
Size = CurrNode->getFunctionSize().getValue();
}
// If we traversed all nodes along the path of the context and haven't
// found a size yet, pivot to look for size from sibling nodes, i.e size
// of inlinee under different context.
if (!Size.hasValue()) {
if (!CurrNode)
CurrNode = PrevNode;
while (!Size.hasValue() && CurrNode &&
!CurrNode->getAllChildContext().empty()) {
CurrNode = &CurrNode->getAllChildContext().begin()->second;
if (CurrNode->getFunctionSize().hasValue())
Size = CurrNode->getFunctionSize().getValue();
}
}
assert(Size.hasValue() && "We should at least find one context size.");
return Size.getValue();
}
void BinarySizeContextTracker::trackInlineesOptimizedAway(
MCPseudoProbeDecoder &ProbeDecoder) {
ProbeFrameStack ProbeContext;
for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
}
void BinarySizeContextTracker::trackInlineesOptimizedAway(
MCPseudoProbeDecoder &ProbeDecoder,
MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
StringRef FuncName =
ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
ProbeContext.emplace_back(FuncName, 0);
// This ProbeContext has a probe, so it has code before inlining and
// optimization. Make sure we mark its size as known.
if (!ProbeNode.getProbes().empty()) {
ContextTrieNode *SizeContext = &RootContext;
for (auto &ProbeFrame : reverse(ProbeContext)) {
StringRef CallerName = ProbeFrame.first;
LineLocation CallsiteLoc(ProbeFrame.second, 0);
SizeContext =
SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
}
// Add 0 size to make known.
SizeContext->addFunctionSize(0);
}
// DFS down the probe inline tree
for (const auto &ChildNode : ProbeNode.getChildren()) {
InlineSite Location = ChildNode.first;
ProbeContext.back().second = std::get<1>(Location);
trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
}
ProbeContext.pop_back();
}
void ProfiledBinary::warnNoFuncEntry() {
uint64_t NoFuncEntryNum = 0;
for (auto &F : BinaryFunctions) {
if (F.second.Ranges.empty())
continue;
bool hasFuncEntry = false;
for (auto &R : F.second.Ranges) {
if (FuncRange *FR = findFuncRangeForStartOffset(R.first)) {
if (FR->IsFuncEntry) {
hasFuncEntry = true;
break;
}
}
}
if (!hasFuncEntry) {
NoFuncEntryNum++;
if (ShowDetailedWarning)
WithColor::warning()
<< "Failed to determine function entry for " << F.first
<< " due to inconsistent name from symbol table and dwarf info.\n";
}
}
emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
"of functions failed to determine function entry due to "
"inconsistent name from symbol table and dwarf info.");
}
void ProfiledBinary::load() {
// Attempt to open the binary.
OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
Binary &ExeBinary = *OBinary.getBinary();
auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
if (!Obj)
exitWithError("not a valid Elf image", Path);
TheTriple = Obj->makeTriple();
// Current only support X86
if (!TheTriple.isX86())
exitWithError("unsupported target", TheTriple.getTriple());
LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
// Find the preferred load address for text sections.
setPreferredTextSegmentAddresses(Obj);
// Decode pseudo probe related section
decodePseudoProbe(Obj);
// Load debug info of subprograms from DWARF section.
// If path of debug info binary is specified, use the debug info from it,
// otherwise use the debug info from the executable binary.
if (!DebugBinaryPath.empty()) {
OwningBinary<Binary> DebugPath =
unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(DebugPath.getBinary()));
} else {
loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(&ExeBinary));
}
// Disassemble the text sections.
disassemble(Obj);
// Track size for optimized inlinees when probe is available
if (UsePseudoProbes && TrackFuncContextSize)
FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
// Use function start and return address to infer prolog and epilog
ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap);
ProEpilogTracker.inferEpilogOffsets(RetOffsets);
warnNoFuncEntry();
// TODO: decode other sections.
}
bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
uint64_t Offset1 = virtualAddrToOffset(Address1);
uint64_t Offset2 = virtualAddrToOffset(Address2);
const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2);
if (Context1.size() != Context2.size())
return false;
if (Context1.empty())
return false;
// The leaf frame contains location within the leaf, and it
// needs to be remove that as it's not part of the calling context
return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
Context2.begin(), Context2.begin() + Context2.size() - 1);
}
SampleContextFrameVector
ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
bool &WasLeafInlined) {
SampleContextFrameVector ContextVec;
// Process from frame root to leaf
for (auto Address : Stack) {
uint64_t Offset = virtualAddrToOffset(Address);
const SampleContextFrameVector &ExpandedContext =
getFrameLocationStack(Offset);
// An instruction without a valid debug line will be ignored by sample
// processing
if (ExpandedContext.empty())
return SampleContextFrameVector();
// Set WasLeafInlined to the size of inlined frame count for the last
// address which is leaf
WasLeafInlined = (ExpandedContext.size() > 1);
ContextVec.append(ExpandedContext);
}
// Replace with decoded base discriminator
for (auto &Frame : ContextVec) {
Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
Frame.Location.Discriminator, UseFSDiscriminator);
}
assert(ContextVec.size() && "Context length should be at least 1");
// Compress the context string except for the leaf frame
auto LeafFrame = ContextVec.back();
LeafFrame.Location = LineLocation(0, 0);
ContextVec.pop_back();
CSProfileGenerator::compressRecursionContext(ContextVec);
CSProfileGenerator::trimContext(ContextVec);
ContextVec.push_back(LeafFrame);
return ContextVec;
}
template <class ELFT>
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
// FIXME: This should be the page size of the system running profiling.
// However such info isn't available at post-processing time, assuming
// 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
// because we may build the tools on non-linux.
uint32_t PageSize = 0x1000;
for (const typename ELFT::Phdr &Phdr : PhdrRange) {
if (Phdr.p_type == ELF::PT_LOAD) {
if (!FirstLoadableAddress)
FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
if (Phdr.p_flags & ELF::PF_X) {
// Segments will always be loaded at a page boundary.
PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
~(PageSize - 1U));
TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
}
}
}
if (PreferredTextSegmentAddresses.empty())
exitWithError("no executable segment found", FileName);
}
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
else
llvm_unreachable("invalid ELF object format");
}
void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
if (UseDwarfCorrelation)
return;
StringRef FileName = Obj->getFileName();
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
const SectionRef &Section = *SI;
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
if (SectionName == ".pseudo_probe_desc") {
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
if (!ProbeDecoder.buildGUID2FuncDescMap(
reinterpret_cast<const uint8_t *>(Contents.data()),
Contents.size()))
exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section");
} else if (SectionName == ".pseudo_probe") {
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
if (!ProbeDecoder.buildAddress2ProbeMap(
reinterpret_cast<const uint8_t *>(Contents.data()),
Contents.size()))
exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
// set UsePseudoProbes flag, used for PerfReader
UsePseudoProbes = true;
}
}
if (ShowPseudoProbe)
ProbeDecoder.printGUID2FuncDescMap(outs());
}
void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) {
// Note that the start offset of each ELF section can be a non-function
// symbol, we need to binary search for the start of a real function range.
auto *FuncRange = findFuncRangeForOffset(Offset);
// Skip external function symbol.
if (!FuncRange)
return;
// Set IsFuncEntry to ture if there is only one range in the function or the
// RangeSymName from ELF is equal to its DWARF-based function name.
if (FuncRange->Func->Ranges.size() == 1 ||
(!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
FuncRange->IsFuncEntry = true;
}
bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
SectionSymbolsTy &Symbols,
const SectionRef &Section) {
std::size_t SE = Symbols.size();
uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
uint64_t SectSize = Section.getSize();
uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
uint64_t NextStartOffset =
(SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
: SectionOffset + SectSize;
setIsFuncEntry(StartOffset,
FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
StringRef SymbolName =
ShowCanonicalFnName
? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
: Symbols[SI].Name;
bool ShowDisassembly =
ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
DisassembleFunctionSet.count(SymbolName));
if (ShowDisassembly)
outs() << '<' << SymbolName << ">:\n";
auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
WithColor::warning() << "Invalid instructions at "
<< format("%8" PRIx64, Start) << " - "
<< format("%8" PRIx64, End) << "\n";
};
uint64_t Offset = StartOffset;
// Size of a consecutive invalid instruction range starting from Offset -1
// backwards.
uint64_t InvalidInstLength = 0;
while (Offset < NextStartOffset) {
MCInst Inst;
uint64_t Size;
// Disassemble an instruction.
bool Disassembled =
DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
Offset + getPreferredBaseAddress(), nulls());
if (Size == 0)
Size = 1;
if (ShowDisassembly) {
if (ShowPseudoProbe) {
ProbeDecoder.printProbeForAddress(outs(),
Offset + getPreferredBaseAddress());
}
outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress());
size_t Start = outs().tell();
if (Disassembled)
IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
else
outs() << "\t<unknown>";
if (ShowSourceLocations) {
unsigned Cur = outs().tell() - Start;
if (Cur < 40)
outs().indent(40 - Cur);
InstructionPointer IP(this, Offset);
outs() << getReversedLocWithContext(
symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
}
outs() << "\n";
}
if (Disassembled) {
const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
// Record instruction size.
Offset2InstSizeMap[Offset] = Size;
// Populate address maps.
CodeAddrOffsets.push_back(Offset);
if (MCDesc.isCall())
CallOffsets.insert(Offset);
else if (MCDesc.isReturn())
RetOffsets.insert(Offset);
else if (MCDesc.isBranch())
BranchOffsets.insert(Offset);
if (InvalidInstLength) {
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
InvalidInstLength = 0;
}
} else {
InvalidInstLength += Size;
}
Offset += Size;
}
if (InvalidInstLength)
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
if (ShowDisassembly)
outs() << "\n";
return true;
}
void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
const Target *TheTarget = getTarget(Obj);
std::string TripleName = TheTriple.getTriple();
StringRef FileName = Obj->getFileName();
MRI.reset(TheTarget->createMCRegInfo(TripleName));
if (!MRI)
exitWithError("no register info for target " + TripleName, FileName);
MCTargetOptions MCOptions;
AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!AsmInfo)
exitWithError("no assembly info for target " + TripleName, FileName);
SubtargetFeatures Features = Obj->getFeatures();
STI.reset(
TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
if (!STI)
exitWithError("no subtarget info for target " + TripleName, FileName);
MII.reset(TheTarget->createMCInstrInfo());
if (!MII)
exitWithError("no instruction info for target " + TripleName, FileName);
MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
std::unique_ptr<MCObjectFileInfo> MOFI(
TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
Ctx.setObjectFileInfo(MOFI.get());
DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
if (!DisAsm)
exitWithError("no disassembler for target " + TripleName, FileName);
MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
IPrinter.reset(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
IPrinter->setPrintBranchImmAsAddress(true);
}
void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
// Set up disassembler and related components.
setUpDisassembler(Obj);
// Create a mapping from virtual address to symbol name. The symbols in text
// sections are the candidates to dissassemble.
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
if (SecI != Obj->section_end())
AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
}
// Sort all the symbols. Use a stable sort to stabilize the output.
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
stable_sort(SecSyms.second);
DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
DisassembleFunctions.end());
assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
"Functions to disassemble should be only specified together with "
"--show-disassembly-only");
if (ShowDisassemblyOnly)
outs() << "\nDisassembly of " << FileName << ":\n";
// Dissassemble a text section.
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
const SectionRef &Section = *SI;
if (!Section.isText())
continue;
uint64_t ImageLoadAddr = getPreferredBaseAddress();
uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
uint64_t SectSize = Section.getSize();
if (!SectSize)
continue;
// Register the text section.
TextSections.insert({SectionOffset, SectSize});
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
if (ShowDisassemblyOnly) {
outs() << "\nDisassembly of section " << SectionName;
outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
<< format("0x%" PRIx64, Section.getAddress() + SectSize)
<< "]:\n\n";
}
if (SectionName == ".plt")
continue;
// Get the section data.
ArrayRef<uint8_t> Bytes =
arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
// Disassemble symbol by symbol.
for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
exitWithError("disassembling error", FileName);
}
}
// Dissassemble rodata section to check if FS discriminator symbol exists.
checkUseFSDiscriminator(Obj, AllSymbols);
}
void ProfiledBinary::checkUseFSDiscriminator(
const ELFObjectFileBase *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
SI != SE; ++SI) {
const SectionRef &Section = *SI;
if (!Section.isData() || Section.getSize() == 0)
continue;
SectionSymbolsTy &Symbols = AllSymbols[Section];
for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
if (Symbols[SI].Name == FSDiscriminatorVar) {
UseFSDiscriminator = true;
return;
}
}
}
}
void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
auto DebugContext = llvm::DWARFContext::create(Obj);
if (!DebugContext)
exitWithError("Misssing debug info.", Path);
for (const auto &CompilationUnit : DebugContext->compile_units()) {
for (const auto &DieInfo : CompilationUnit->dies()) {
llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo);
if (!Die.isSubprogramDIE())
continue;
auto Name = Die.getName(llvm::DINameKind::LinkageName);
if (!Name)
Name = Die.getName(llvm::DINameKind::ShortName);
if (!Name)
continue;
auto RangesOrError = Die.getAddressRanges();
if (!RangesOrError)
continue;
const DWARFAddressRangesVector &Ranges = RangesOrError.get();
if (Ranges.empty())
continue;
// Different DWARF symbols can have same function name, search or create
// BinaryFunction indexed by the name.
auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
auto &Func = Ret.first->second;
if (Ret.second)
Func.FuncName = Ret.first->first;
for (const auto &Range : Ranges) {
uint64_t FuncStart = Range.LowPC;
uint64_t FuncSize = Range.HighPC - FuncStart;
if (FuncSize == 0 || FuncStart < getPreferredBaseAddress())
continue;
uint64_t StartOffset = FuncStart - getPreferredBaseAddress();
uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress();
// We may want to know all ranges for one function. Here group the
// ranges and store them into BinaryFunction.
Func.Ranges.emplace_back(StartOffset, EndOffset);
auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange());
if (R.second) {
FuncRange &FRange = R.first->second;
FRange.Func = &Func;
FRange.StartOffset = StartOffset;
FRange.EndOffset = EndOffset;
} else {
WithColor::warning()
<< "Duplicated symbol start address at "
<< format("%8" PRIx64, StartOffset + getPreferredBaseAddress())
<< " " << R.first->second.getFuncName() << " and " << Name
<< "\n";
}
}
}
}
assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info.");
}
void ProfiledBinary::populateSymbolListFromDWARF(
ProfileSymbolList &SymbolList) {
for (auto &I : StartOffset2FuncRangeMap)
SymbolList.add(I.second.getFuncName());
}
void ProfiledBinary::setupSymbolizer() {
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
SymbolizerOpts.PrintFunctions =
DILineInfoSpecifier::FunctionNameKind::LinkageName;
SymbolizerOpts.Demangle = false;
SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
SymbolizerOpts.UseSymbolTable = false;
SymbolizerOpts.RelativeAddresses = false;
Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
}
SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
bool UseCanonicalFnName,
bool UseProbeDiscriminator) {
assert(this == IP.Binary &&
"Binary should only symbolize its own instruction");
auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
object::SectionedAddress::UndefSection};
DIInliningInfo InlineStack = unwrapOrError(
Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
SymbolizerPath);
SampleContextFrameVector CallStack;
for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
const auto &CallerFrame = InlineStack.getFrame(I);
if (CallerFrame.FunctionName == "<invalid>")
break;
StringRef FunctionName(CallerFrame.FunctionName);
if (UseCanonicalFnName)
FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
uint32_t Discriminator = CallerFrame.Discriminator;
uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
if (UseProbeDiscriminator) {
LineOffset =
PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
Discriminator = 0;
}
LineLocation Line(LineOffset, Discriminator);
auto It = NameStrings.insert(FunctionName.str());
CallStack.emplace_back(*It.first, Line);
}
return CallStack;
}
void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
uint64_t EndOffset) {
uint64_t RangeBegin = offsetToVirtualAddr(StartOffset);
uint64_t RangeEnd = offsetToVirtualAddr(EndOffset);
InstructionPointer IP(this, RangeBegin, true);
if (IP.Address != RangeBegin)
WithColor::warning() << "Invalid start instruction at "
<< format("%8" PRIx64, RangeBegin) << "\n";
if (IP.Address >= RangeEnd)
return;
do {
uint64_t Offset = virtualAddrToOffset(IP.Address);
const SampleContextFrameVector &SymbolizedCallStack =
getFrameLocationStack(Offset, UsePseudoProbes);
uint64_t Size = Offset2InstSizeMap[Offset];
// Record instruction size for the corresponding context
FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
} while (IP.advance() && IP.Address < RangeEnd);
}
InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
uint64_t Address, bool RoundToNext)
: Binary(Binary), Address(Address) {
Index = Binary->getIndexForAddr(Address);
if (RoundToNext) {
// we might get address which is not the code
// it should round to the next valid address
if (Index >= Binary->getCodeOffsetsSize())
this->Address = UINT64_MAX;
else
this->Address = Binary->getAddressforIndex(Index);
}
}
bool InstructionPointer::advance() {
Index++;
if (Index >= Binary->getCodeOffsetsSize()) {
Address = UINT64_MAX;
return false;
}
Address = Binary->getAddressforIndex(Index);
return true;
}
bool InstructionPointer::backward() {
if (Index == 0) {
Address = 0;
return false;
}
Index--;
Address = Binary->getAddressforIndex(Index);
return true;
}
void InstructionPointer::update(uint64_t Addr) {
Address = Addr;
Index = Binary->getIndexForAddr(Address);
}
} // end namespace sampleprof
} // end namespace llvm

View file

@ -1,541 +0,0 @@
//===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
#include "CallContext.h"
#include "ErrorHandling.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
#include "llvm/Transforms/IPO/SampleContextTracker.h"
#include <list>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
extern cl::opt<bool> EnableCSPreInliner;
extern cl::opt<bool> UseContextCostForPreInliner;
using namespace llvm;
using namespace sampleprof;
using namespace llvm::object;
namespace llvm {
namespace sampleprof {
class ProfiledBinary;
struct InstructionPointer {
const ProfiledBinary *Binary;
union {
// Offset of the executable segment of the binary.
uint64_t Offset = 0;
// Also used as address in unwinder
uint64_t Address;
};
// Index to the sorted code address array of the binary.
uint64_t Index = 0;
InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
bool RoundToNext = false);
bool advance();
bool backward();
void update(uint64_t Addr);
};
// The special frame addresses.
enum SpecialFrameAddr {
// Dummy root of frame trie.
DummyRoot = 0,
// Represent all the addresses outside of current binary.
// This's also used to indicate the call stack should be truncated since this
// isn't a real call context the compiler will see.
ExternalAddr = 1,
};
using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
struct BinaryFunction {
StringRef FuncName;
// End of range is an exclusive bound.
RangesTy Ranges;
uint64_t getFuncSize() {
uint64_t Sum = 0;
for (auto &R : Ranges) {
Sum += R.second - R.first;
}
return Sum;
}
};
// Info about function range. A function can be split into multiple
// non-continuous ranges, each range corresponds to one FuncRange.
struct FuncRange {
uint64_t StartOffset;
// EndOffset is an exclusive bound.
uint64_t EndOffset;
// Function the range belongs to
BinaryFunction *Func;
// Whether the start offset is the real entry of the function.
bool IsFuncEntry = false;
StringRef getFuncName() { return Func->FuncName; }
};
// PrologEpilog offset tracker, used to filter out broken stack samples
// Currently we use a heuristic size (two) to infer prolog and epilog
// based on the start address and return address. In the future,
// we will switch to Dwarf CFI based tracker
struct PrologEpilogTracker {
// A set of prolog and epilog offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> PrologEpilogSet;
ProfiledBinary *Binary;
PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
// Take the two addresses from the start of function as prolog
void inferPrologOffsets(std::map<uint64_t, FuncRange> &FuncStartOffsetMap) {
for (auto I : FuncStartOffsetMap) {
PrologEpilogSet.insert(I.first);
InstructionPointer IP(Binary, I.first);
if (!IP.advance())
break;
PrologEpilogSet.insert(IP.Offset);
}
}
// Take the last two addresses before the return address as epilog
void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) {
for (auto Addr : RetAddrs) {
PrologEpilogSet.insert(Addr);
InstructionPointer IP(Binary, Addr);
if (!IP.backward())
break;
PrologEpilogSet.insert(IP.Offset);
}
}
};
// Track function byte size under different context (outlined version as well as
// various inlined versions). It also provides query support to get function
// size with the best matching context, which is used to help pre-inliner use
// accurate post-optimization size to make decisions.
// TODO: If an inlinee is completely optimized away, ideally we should have zero
// for its context size, currently we would misss such context since it doesn't
// have instructions. To fix this, we need to mark all inlinee with entry probe
// but without instructions as having zero size.
class BinarySizeContextTracker {
public:
// Add instruction with given size to a context
void addInstructionForContext(const SampleContextFrameVector &Context,
uint32_t InstrSize);
// Get function size with a specific context. When there's no exact match
// for the given context, try to retrieve the size of that function from
// closest matching context.
uint32_t getFuncSizeForContext(const SampleContext &Context);
// For inlinees that are full optimized away, we can establish zero size using
// their remaining probes.
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
void dump() { RootContext.dumpTree(); }
private:
using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
MCDecodedPseudoProbeInlineTree &ProbeNode,
ProbeFrameStack &Context);
// Root node for context trie tree, node that this is a reverse context trie
// with callee as parent and caller as child. This way we can traverse from
// root to find the best/longest matching context if an exact match does not
// exist. It gives us the best possible estimate for function's post-inline,
// post-optimization byte size.
ContextTrieNode RootContext;
};
using OffsetRange = std::pair<uint64_t, uint64_t>;
class ProfiledBinary {
// Absolute path of the executable binary.
std::string Path;
// Path of the debug info binary.
std::string DebugBinaryPath;
// Path of symbolizer path which should be pointed to binary with debug info.
StringRef SymbolizerPath;
// The target triple.
Triple TheTriple;
// The runtime base address that the first executable segment is loaded at.
uint64_t BaseAddress = 0;
// The runtime base address that the first loadabe segment is loaded at.
uint64_t FirstLoadableAddress = 0;
// The preferred load address of each executable segment.
std::vector<uint64_t> PreferredTextSegmentAddresses;
// The file offset of each executable segment.
std::vector<uint64_t> TextSegmentOffsets;
// Mutiple MC component info
std::unique_ptr<const MCRegisterInfo> MRI;
std::unique_ptr<const MCAsmInfo> AsmInfo;
std::unique_ptr<const MCSubtargetInfo> STI;
std::unique_ptr<const MCInstrInfo> MII;
std::unique_ptr<MCDisassembler> DisAsm;
std::unique_ptr<const MCInstrAnalysis> MIA;
std::unique_ptr<MCInstPrinter> IPrinter;
// A list of text sections sorted by start RVA and size. Used to check
// if a given RVA is a valid code address.
std::set<std::pair<uint64_t, uint64_t>> TextSections;
// A map of mapping function name to BinaryFunction info.
std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
// An ordered map of mapping function's start offset to function range
// relevant info. Currently to determine if the offset of ELF is the start of
// a real function, we leverage the function range info from DWARF.
std::map<uint64_t, FuncRange> StartOffset2FuncRangeMap;
// Offset to context location map. Used to expand the context.
std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
// Offset to instruction size map. Also used for quick offset lookup.
std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
// An array of offsets of all instructions sorted in increasing order. The
// sorting is needed to fast advance to the next forward/backward instruction.
std::vector<uint64_t> CodeAddrOffsets;
// A set of call instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> CallOffsets;
// A set of return instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> RetOffsets;
// A set of branch instruction offsets.
std::unordered_set<uint64_t> BranchOffsets;
// Estimate and track function prolog and epilog ranges.
PrologEpilogTracker ProEpilogTracker;
// Track function sizes under different context
BinarySizeContextTracker FuncSizeTracker;
// The symbolizer used to get inline context for an instruction.
std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
// String table owning function name strings created from the symbolizer.
std::unordered_set<std::string> NameStrings;
// A collection of functions to print disassembly for.
StringSet<> DisassembleFunctionSet;
// Pseudo probe decoder
MCPseudoProbeDecoder ProbeDecoder;
bool UsePseudoProbes = false;
bool UseFSDiscriminator = false;
// Whether we need to symbolize all instructions to get function context size.
bool TrackFuncContextSize = false;
// Indicate if the base loading address is parsed from the mmap event or uses
// the preferred address
bool IsLoadedByMMap = false;
// Use to avoid redundant warning.
bool MissingMMapWarned = false;
void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
template <class ELFT>
void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
void decodePseudoProbe(const ELFObjectFileBase *Obj);
void
checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
// Set up disassembler and related components.
void setUpDisassembler(const ELFObjectFileBase *Obj);
void setupSymbolizer();
// Load debug info of subprograms from DWARF section.
void loadSymbolsFromDWARF(ObjectFile &Obj);
// A function may be spilt into multiple non-continuous address ranges. We use
// this to set whether start offset of a function is the real entry of the
// function and also set false to the non-function label.
void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName);
// Warn if no entry range exists in the function.
void warnNoFuncEntry();
/// Dissassemble the text section and build various address maps.
void disassemble(const ELFObjectFileBase *O);
/// Helper function to dissassemble the symbol and extract info for unwinding
bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
SectionSymbolsTy &Symbols, const SectionRef &Section);
/// Symbolize a given instruction pointer and return a full call context.
SampleContextFrameVector symbolize(const InstructionPointer &IP,
bool UseCanonicalFnName = false,
bool UseProbeDiscriminator = false);
/// Decode the interesting parts of the binary and build internal data
/// structures. On high level, the parts of interest are:
/// 1. Text sections, including the main code section and the PLT
/// entries that will be used to handle cross-module call transitions.
/// 2. The .debug_line section, used by Dwarf-based profile generation.
/// 3. Pseudo probe related sections, used by probe-based profile
/// generation.
void load();
public:
ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath)
: Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
TrackFuncContextSize(EnableCSPreInliner &&
UseContextCostForPreInliner) {
// Point to executable binary if debug info binary is not specified.
SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
setupSymbolizer();
load();
}
uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
return VirtualAddress - BaseAddress;
}
uint64_t offsetToVirtualAddr(uint64_t Offset) const {
return Offset + BaseAddress;
}
StringRef getPath() const { return Path; }
StringRef getName() const { return llvm::sys::path::filename(Path); }
uint64_t getBaseAddress() const { return BaseAddress; }
void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
// Return the preferred load address for the first executable segment.
uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; }
// Return the preferred load address for the first loadable segment.
uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; }
// Return the file offset for the first executable segment.
uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; }
const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const {
return PreferredTextSegmentAddresses;
}
const std::vector<uint64_t> &getTextSegmentOffsets() const {
return TextSegmentOffsets;
}
uint64_t getInstSize(uint64_t Offset) const {
auto I = Offset2InstSizeMap.find(Offset);
if (I == Offset2InstSizeMap.end())
return 0;
return I->second;
}
bool offsetIsCode(uint64_t Offset) const {
return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
}
bool addressIsCode(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
return offsetIsCode(Offset);
}
bool addressIsCall(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
return CallOffsets.count(Offset);
}
bool addressIsReturn(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
return RetOffsets.count(Offset);
}
bool addressInPrologEpilog(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
return ProEpilogTracker.PrologEpilogSet.count(Offset);
}
bool offsetIsTransfer(uint64_t Offset) {
return BranchOffsets.count(Offset) || RetOffsets.count(Offset) ||
CallOffsets.count(Offset);
}
uint64_t getAddressforIndex(uint64_t Index) const {
return offsetToVirtualAddr(CodeAddrOffsets[Index]);
}
size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
bool usePseudoProbes() const { return UsePseudoProbes; }
bool useFSDiscriminator() const { return UseFSDiscriminator; }
// Get the index in CodeAddrOffsets for the address
// As we might get an address which is not the code
// here it would round to the next valid code address by
// using lower bound operation
uint32_t getIndexForOffset(uint64_t Offset) const {
auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
return Low - CodeAddrOffsets.begin();
}
uint32_t getIndexForAddr(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
return getIndexForOffset(Offset);
}
uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
if (FrameAddr == ExternalAddr)
return ExternalAddr;
auto I = getIndexForAddr(FrameAddr);
FrameAddr = I ? getAddressforIndex(I - 1) : 0;
if (FrameAddr && addressIsCall(FrameAddr))
return FrameAddr;
return 0;
}
FuncRange *findFuncRangeForStartOffset(uint64_t Offset) {
auto I = StartOffset2FuncRangeMap.find(Offset);
if (I == StartOffset2FuncRangeMap.end())
return nullptr;
return &I->second;
}
// Binary search the function range which includes the input offset.
FuncRange *findFuncRangeForOffset(uint64_t Offset) {
auto I = StartOffset2FuncRangeMap.upper_bound(Offset);
if (I == StartOffset2FuncRangeMap.begin())
return nullptr;
I--;
if (Offset >= I->second.EndOffset)
return nullptr;
return &I->second;
}
// Get all ranges of one function.
RangesTy getRangesForOffset(uint64_t Offset) {
auto *FRange = findFuncRangeForOffset(Offset);
// Ignore the range which falls into plt section or system lib.
if (!FRange)
return RangesTy();
return FRange->Func->Ranges;
}
const std::unordered_map<std::string, BinaryFunction> &
getAllBinaryFunctions() {
return BinaryFunctions;
}
BinaryFunction *getBinaryFunction(StringRef FName) {
auto I = BinaryFunctions.find(FName.str());
if (I == BinaryFunctions.end())
return nullptr;
return &I->second;
}
uint32_t getFuncSizeForContext(SampleContext &Context) {
return FuncSizeTracker.getFuncSizeForContext(Context);
}
// Load the symbols from debug table and populate into symbol list.
void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList);
const SampleContextFrameVector &
getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
if (I.second) {
InstructionPointer IP(this, Offset);
I.first->second = symbolize(IP, true, UseProbeDiscriminator);
}
return I.first->second;
}
Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
const auto &Stack = getFrameLocationStack(Offset);
if (Stack.empty())
return {};
return Stack.back();
}
// Compare two addresses' inline context
bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
// Get the full context of the current stack with inline context filled in.
// It will search the disassembling info stored in Offset2LocStackMap. This is
// used as the key of function sample map
SampleContextFrameVector
getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
bool &WasLeafInlined);
// Go through instructions among the given range and record its size for the
// inline context.
void computeInlinedContextSizeForRange(uint64_t StartOffset,
uint64_t EndOffset);
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);
}
void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe,
SampleContextFrameVector &InlineContextStack,
bool IncludeLeaf = false) const {
SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext;
ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext,
IncludeLeaf);
for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) {
auto &Callsite = ProbeInlineContext[I];
// Clear the current context for an unknown probe.
if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) {
InlineContextStack.clear();
continue;
}
InlineContextStack.emplace_back(Callsite.first,
LineLocation(Callsite.second, 0));
}
}
const AddressProbesMap &getAddress2ProbesMap() const {
return ProbeDecoder.getAddress2ProbesMap();
}
const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
return ProbeDecoder.getFuncDescForGUID(GUID);
}
const MCPseudoProbeFuncDesc *
getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) {
return ProbeDecoder.getInlinerDescForProbe(Probe);
}
bool getTrackFuncContextSize() { return TrackFuncContextSize; }
bool getIsLoadedByMMap() { return IsLoadedByMMap; }
void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
bool getMissingMMapWarned() { return MissingMMapWarned; }
void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; }
};
} // end namespace sampleprof
} // end namespace llvm
#endif

View file

@ -1,164 +0,0 @@
//===- llvm-profgen.cpp - LLVM SPGO profile generation tool -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// llvm-profgen generates SPGO profiles from perf script ouput.
//
//===----------------------------------------------------------------------===//
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfileGenerator.h"
#include "ProfiledBinary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/TargetSelect.h"
static cl::OptionCategory ProfGenCategory("ProfGen Options");
static cl::opt<std::string> PerfScriptFilename(
"perfscript", cl::value_desc("perfscript"), cl::ZeroOrMore,
llvm::cl::MiscFlags::CommaSeparated,
cl::desc("Path of perf-script trace created by Linux perf tool with "
"`script` command(the raw perf.data should be profiled with -b)"),
cl::cat(ProfGenCategory));
static cl::alias PSA("ps", cl::desc("Alias for --perfscript"),
cl::aliasopt(PerfScriptFilename));
static cl::opt<std::string> PerfDataFilename(
"perfdata", cl::value_desc("perfdata"), cl::ZeroOrMore,
llvm::cl::MiscFlags::CommaSeparated,
cl::desc("Path of raw perf data created by Linux perf tool (it should be "
"profiled with -b)"),
cl::cat(ProfGenCategory));
static cl::alias PDA("pd", cl::desc("Alias for --perfdata"),
cl::aliasopt(PerfDataFilename));
static cl::opt<std::string> UnsymbolizedProfFilename(
"unsymbolized-profile", cl::value_desc("unsymbolized profile"),
cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
cl::desc("Path of the unsymbolized profile created by "
"`llvm-profgen` with `--skip-symbolization`"),
cl::cat(ProfGenCategory));
static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"),
cl::aliasopt(UnsymbolizedProfFilename));
static cl::opt<std::string>
BinaryPath("binary", cl::value_desc("binary"), cl::Required,
cl::desc("Path of profiled executable binary."),
cl::cat(ProfGenCategory));
static cl::opt<std::string> DebugBinPath(
"debug-binary", cl::value_desc("debug-binary"), cl::ZeroOrMore,
cl::desc("Path of debug info binary, llvm-profgen will load the DWARF info "
"from it instead of the executable binary."),
cl::cat(ProfGenCategory));
extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;
extern cl::opt<bool> SkipSymbolization;
using namespace llvm;
using namespace sampleprof;
// Validate the command line input.
static void validateCommandLine() {
// Allow the missing perfscript if we only use to show binary disassembly.
if (!ShowDisassemblyOnly) {
// Validate input profile is provided only once
uint16_t HasPerfData = PerfDataFilename.getNumOccurrences();
uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences();
uint16_t HasUnsymbolizedProfile =
UnsymbolizedProfFilename.getNumOccurrences();
uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile;
if (S != 1) {
std::string Msg =
S > 1
? "`--perfscript`, `--perfdata` and `--unsymbolized-profile` "
"cannot be used together."
: "Perf input file is missing, please use one of `--perfscript`, "
"`--perfdata` and `--unsymbolized-profile` for the input.";
exitWithError(Msg);
}
auto CheckFileExists = [](bool H, StringRef File) {
if (H && !llvm::sys::fs::exists(File)) {
std::string Msg = "Input perf file(" + File.str() + ") doesn't exist.";
exitWithError(Msg);
}
};
CheckFileExists(HasPerfData, PerfDataFilename);
CheckFileExists(HasPerfScript, PerfScriptFilename);
CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename);
}
if (!llvm::sys::fs::exists(BinaryPath)) {
std::string Msg = "Input binary(" + BinaryPath + ") doesn't exist.";
exitWithError(Msg);
}
if (CSProfileGenerator::MaxCompressionSize < -1) {
exitWithError("Value of --compress-recursion should >= -1");
}
if (ShowSourceLocations && !ShowDisassemblyOnly) {
exitWithError("--show-source-locations should work together with "
"--show-disassembly-only!");
}
}
static PerfInputFile getPerfInputFile() {
PerfInputFile File;
if (PerfDataFilename.getNumOccurrences()) {
File.InputFile = PerfDataFilename;
File.Format = PerfFormat::PerfData;
} else if (PerfScriptFilename.getNumOccurrences()) {
File.InputFile = PerfScriptFilename;
File.Format = PerfFormat::PerfScript;
} else if (UnsymbolizedProfFilename.getNumOccurrences()) {
File.InputFile = UnsymbolizedProfFilename;
File.Format = PerfFormat::UnsymbolizedProfile;
}
return File;
}
int main(int argc, const char *argv[]) {
InitLLVM X(argc, argv);
// Initialize targets and assembly printers/parsers.
InitializeAllTargetInfos();
InitializeAllTargetMCs();
InitializeAllDisassemblers();
cl::HideUnrelatedOptions({&ProfGenCategory, &getColorCategory()});
cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");
validateCommandLine();
// Load symbols and disassemble the code of a given binary.
std::unique_ptr<ProfiledBinary> Binary =
std::make_unique<ProfiledBinary>(BinaryPath, DebugBinPath);
if (ShowDisassemblyOnly)
return EXIT_SUCCESS;
PerfInputFile PerfFile = getPerfInputFile();
std::unique_ptr<PerfReaderBase> Reader =
PerfReaderBase::create(Binary.get(), PerfFile);
// Parse perf events and samples
Reader->parsePerfTraces();
if (SkipSymbolization)
return EXIT_SUCCESS;
std::unique_ptr<ProfileGeneratorBase> Generator =
ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(),
Reader->profileIsCSFlat());
Generator->generateProfile();
Generator->write();
return EXIT_SUCCESS;
}

View file

@ -1,891 +0,0 @@
//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// FileCheck does a line-by line check of a file that validates whether it
// contains the expected content. This is useful for regression tests etc.
//
// This program exits with an exit status of 2 on error, exit status of 0 if
// the file matched the expected contents, and exit status of 1 if it did not
// contain the expected contents.
//
//===----------------------------------------------------------------------===//
#include "llvm/FileCheck/FileCheck.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <cmath>
#include <map>
using namespace llvm;
static cl::extrahelp FileCheckOptsEnv(
"\nOptions are parsed from the environment variable FILECHECK_OPTS and\n"
"from the command line.\n");
static cl::opt<std::string>
CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Optional);
static cl::opt<std::string>
InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
cl::init("-"), cl::value_desc("filename"));
static cl::list<std::string> CheckPrefixes(
"check-prefix",
cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
static cl::alias CheckPrefixesAlias(
"check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
cl::NotHidden,
cl::desc(
"Alias for -check-prefix permitting multiple comma separated values"));
static cl::list<std::string> CommentPrefixes(
"comment-prefixes", cl::CommaSeparated, cl::Hidden,
cl::desc("Comma-separated list of comment prefixes to use from check file\n"
"(defaults to 'COM,RUN'). Please avoid using this feature in\n"
"LLVM's LIT-based test suites, which should be easier to\n"
"maintain if they all follow a consistent comment style. This\n"
"feature is meant for non-LIT test suites using FileCheck."));
static cl::opt<bool> NoCanonicalizeWhiteSpace(
"strict-whitespace",
cl::desc("Do not treat all horizontal whitespace as equivalent"));
static cl::opt<bool> IgnoreCase(
"ignore-case",
cl::desc("Use case-insensitive matching"));
static cl::list<std::string> ImplicitCheckNot(
"implicit-check-not",
cl::desc("Add an implicit negative check with this pattern to every\n"
"positive check. This can be used to ensure that no instances of\n"
"this pattern occur which are not matched by a positive pattern"),
cl::value_desc("pattern"));
static cl::list<std::string>
GlobalDefines("D", cl::AlwaysPrefix,
cl::desc("Define a variable to be used in capture patterns."),
cl::value_desc("VAR=VALUE"));
static cl::opt<bool> AllowEmptyInput(
"allow-empty", cl::init(false),
cl::desc("Allow the input file to be empty. This is useful when making\n"
"checks that some error message does not occur, for example."));
static cl::opt<bool> AllowUnusedPrefixes(
"allow-unused-prefixes", cl::init(false), cl::ZeroOrMore,
cl::desc("Allow prefixes to be specified but not appear in the test."));
static cl::opt<bool> MatchFullLines(
"match-full-lines", cl::init(false),
cl::desc("Require all positive matches to cover an entire input line.\n"
"Allows leading and trailing whitespace if --strict-whitespace\n"
"is not also passed."));
static cl::opt<bool> EnableVarScope(
"enable-var-scope", cl::init(false),
cl::desc("Enables scope for regex variables. Variables with names that\n"
"do not start with '$' will be reset at the beginning of\n"
"each CHECK-LABEL block."));
static cl::opt<bool> AllowDeprecatedDagOverlap(
"allow-deprecated-dag-overlap", cl::init(false),
cl::desc("Enable overlapping among matches in a group of consecutive\n"
"CHECK-DAG directives. This option is deprecated and is only\n"
"provided for convenience as old tests are migrated to the new\n"
"non-overlapping CHECK-DAG implementation.\n"));
static cl::opt<bool> Verbose(
"v", cl::init(false), cl::ZeroOrMore,
cl::desc("Print directive pattern matches, or add them to the input dump\n"
"if enabled.\n"));
static cl::opt<bool> VerboseVerbose(
"vv", cl::init(false), cl::ZeroOrMore,
cl::desc("Print information helpful in diagnosing internal FileCheck\n"
"issues, or add it to the input dump if enabled. Implies\n"
"-v.\n"));
// The order of DumpInputValue members affects their precedence, as documented
// for -dump-input below.
enum DumpInputValue {
DumpInputNever,
DumpInputFail,
DumpInputAlways,
DumpInputHelp
};
static cl::list<DumpInputValue> DumpInputs(
"dump-input",
cl::desc("Dump input to stderr, adding annotations representing\n"
"currently enabled diagnostics. When there are multiple\n"
"occurrences of this option, the <value> that appears earliest\n"
"in the list below has precedence. The default is 'fail'.\n"),
cl::value_desc("mode"),
cl::values(clEnumValN(DumpInputHelp, "help", "Explain input dump and quit"),
clEnumValN(DumpInputAlways, "always", "Always dump input"),
clEnumValN(DumpInputFail, "fail", "Dump input on failure"),
clEnumValN(DumpInputNever, "never", "Never dump input")));
// The order of DumpInputFilterValue members affects their precedence, as
// documented for -dump-input-filter below.
enum DumpInputFilterValue {
DumpInputFilterError,
DumpInputFilterAnnotation,
DumpInputFilterAnnotationFull,
DumpInputFilterAll
};
static cl::list<DumpInputFilterValue> DumpInputFilters(
"dump-input-filter",
cl::desc("In the dump requested by -dump-input, print only input lines of\n"
"kind <value> plus any context specified by -dump-input-context.\n"
"When there are multiple occurrences of this option, the <value>\n"
"that appears earliest in the list below has precedence. The\n"
"default is 'error' when -dump-input=fail, and it's 'all' when\n"
"-dump-input=always.\n"),
cl::values(clEnumValN(DumpInputFilterAll, "all", "All input lines"),
clEnumValN(DumpInputFilterAnnotationFull, "annotation-full",
"Input lines with annotations"),
clEnumValN(DumpInputFilterAnnotation, "annotation",
"Input lines with starting points of annotations"),
clEnumValN(DumpInputFilterError, "error",
"Input lines with starting points of error "
"annotations")));
static cl::list<unsigned> DumpInputContexts(
"dump-input-context", cl::value_desc("N"),
cl::desc("In the dump requested by -dump-input, print <N> input lines\n"
"before and <N> input lines after any lines specified by\n"
"-dump-input-filter. When there are multiple occurrences of\n"
"this option, the largest specified <N> has precedence. The\n"
"default is 5.\n"));
typedef cl::list<std::string>::const_iterator prefix_iterator;
static void DumpCommandLine(int argc, char **argv) {
errs() << "FileCheck command line: ";
for (int I = 0; I < argc; I++)
errs() << " " << argv[I];
errs() << "\n";
}
struct MarkerStyle {
/// The starting char (before tildes) for marking the line.
char Lead;
/// What color to use for this annotation.
raw_ostream::Colors Color;
/// A note to follow the marker, or empty string if none.
std::string Note;
/// Does this marker indicate inclusion by -dump-input-filter=error?
bool FiltersAsError;
MarkerStyle() {}
MarkerStyle(char Lead, raw_ostream::Colors Color,
const std::string &Note = "", bool FiltersAsError = false)
: Lead(Lead), Color(Color), Note(Note), FiltersAsError(FiltersAsError) {
assert((!FiltersAsError || !Note.empty()) &&
"expected error diagnostic to have note");
}
};
static MarkerStyle GetMarker(FileCheckDiag::MatchType MatchTy) {
switch (MatchTy) {
case FileCheckDiag::MatchFoundAndExpected:
return MarkerStyle('^', raw_ostream::GREEN);
case FileCheckDiag::MatchFoundButExcluded:
return MarkerStyle('!', raw_ostream::RED, "error: no match expected",
/*FiltersAsError=*/true);
case FileCheckDiag::MatchFoundButWrongLine:
return MarkerStyle('!', raw_ostream::RED, "error: match on wrong line",
/*FiltersAsError=*/true);
case FileCheckDiag::MatchFoundButDiscarded:
return MarkerStyle('!', raw_ostream::CYAN,
"discard: overlaps earlier match");
case FileCheckDiag::MatchFoundErrorNote:
// Note should always be overridden within the FileCheckDiag.
return MarkerStyle('!', raw_ostream::RED,
"error: unknown error after match",
/*FiltersAsError=*/true);
case FileCheckDiag::MatchNoneAndExcluded:
return MarkerStyle('X', raw_ostream::GREEN);
case FileCheckDiag::MatchNoneButExpected:
return MarkerStyle('X', raw_ostream::RED, "error: no match found",
/*FiltersAsError=*/true);
case FileCheckDiag::MatchNoneForInvalidPattern:
return MarkerStyle('X', raw_ostream::RED,
"error: match failed for invalid pattern",
/*FiltersAsError=*/true);
case FileCheckDiag::MatchFuzzy:
return MarkerStyle('?', raw_ostream::MAGENTA, "possible intended match",
/*FiltersAsError=*/true);
}
llvm_unreachable_internal("unexpected match type");
}
static void DumpInputAnnotationHelp(raw_ostream &OS) {
OS << "The following description was requested by -dump-input=help to\n"
<< "explain the input dump printed by FileCheck.\n"
<< "\n"
<< "Related command-line options:\n"
<< "\n"
<< " - -dump-input=<value> enables or disables the input dump\n"
<< " - -dump-input-filter=<value> filters the input lines\n"
<< " - -dump-input-context=<N> adjusts the context of filtered lines\n"
<< " - -v and -vv add more annotations\n"
<< " - -color forces colors to be enabled both in the dump and below\n"
<< " - -help documents the above options in more detail\n"
<< "\n"
<< "These options can also be set via FILECHECK_OPTS. For example, for\n"
<< "maximum debugging output on failures:\n"
<< "\n"
<< " $ FILECHECK_OPTS='-dump-input-filter=all -vv -color' ninja check\n"
<< "\n"
<< "Input dump annotation format:\n"
<< "\n";
// Labels for input lines.
OS << " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "L:";
OS << " labels line number L of the input file\n"
<< " An extra space is added after each input line to represent"
<< " the\n"
<< " newline character\n";
// Labels for annotation lines.
OS << " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L";
OS << " labels the only match result for either (1) a pattern of type T"
<< " from\n"
<< " line L of the check file if L is an integer or (2) the"
<< " I-th implicit\n"
<< " pattern if L is \"imp\" followed by an integer "
<< "I (index origin one)\n";
OS << " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L'N";
OS << " labels the Nth match result for such a pattern\n";
// Markers on annotation lines.
OS << " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "^~~";
OS << " marks good match (reported if -v)\n"
<< " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "!~~";
OS << " marks bad match, such as:\n"
<< " - CHECK-NEXT on same line as previous match (error)\n"
<< " - CHECK-NOT found (error)\n"
<< " - CHECK-DAG overlapping match (discarded, reported if "
<< "-vv)\n"
<< " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "X~~";
OS << " marks search range when no match is found, such as:\n"
<< " - CHECK-NEXT not found (error)\n"
<< " - CHECK-NOT not found (success, reported if -vv)\n"
<< " - CHECK-DAG not found after discarded matches (error)\n"
<< " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "?";
OS << " marks fuzzy match when no match is found\n";
// Elided lines.
OS << " - ";
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "...";
OS << " indicates elided input lines and annotations, as specified by\n"
<< " -dump-input-filter and -dump-input-context\n";
// Colors.
OS << " - colors ";
WithColor(OS, raw_ostream::GREEN, true) << "success";
OS << ", ";
WithColor(OS, raw_ostream::RED, true) << "error";
OS << ", ";
WithColor(OS, raw_ostream::MAGENTA, true) << "fuzzy match";
OS << ", ";
WithColor(OS, raw_ostream::CYAN, true, false) << "discarded match";
OS << ", ";
WithColor(OS, raw_ostream::CYAN, true, true) << "unmatched input";
OS << "\n";
}
/// An annotation for a single input line.
struct InputAnnotation {
/// The index of the match result across all checks
unsigned DiagIndex;
/// The label for this annotation.
std::string Label;
/// Is this the initial fragment of a diagnostic that has been broken across
/// multiple lines?
bool IsFirstLine;
/// What input line (one-origin indexing) this annotation marks. This might
/// be different from the starting line of the original diagnostic if
/// !IsFirstLine.
unsigned InputLine;
/// The column range (one-origin indexing, open end) in which to mark the
/// input line. If InputEndCol is UINT_MAX, treat it as the last column
/// before the newline.
unsigned InputStartCol, InputEndCol;
/// The marker to use.
MarkerStyle Marker;
/// Whether this annotation represents a good match for an expected pattern.
bool FoundAndExpectedMatch;
};
/// Get an abbreviation for the check type.
static std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) {
switch (Ty) {
case Check::CheckPlain:
if (Ty.getCount() > 1)
return "count";
return "check";
case Check::CheckNext:
return "next";
case Check::CheckSame:
return "same";
case Check::CheckNot:
return "not";
case Check::CheckDAG:
return "dag";
case Check::CheckLabel:
return "label";
case Check::CheckEmpty:
return "empty";
case Check::CheckComment:
return "com";
case Check::CheckEOF:
return "eof";
case Check::CheckBadNot:
return "bad-not";
case Check::CheckBadCount:
return "bad-count";
case Check::CheckNone:
llvm_unreachable("invalid FileCheckType");
}
llvm_unreachable("unknown FileCheckType");
}
static void
BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
const std::pair<unsigned, unsigned> &ImpPatBufferIDRange,
const std::vector<FileCheckDiag> &Diags,
std::vector<InputAnnotation> &Annotations,
unsigned &LabelWidth) {
struct CompareSMLoc {
bool operator()(const SMLoc &LHS, const SMLoc &RHS) const {
return LHS.getPointer() < RHS.getPointer();
}
};
// How many diagnostics does each pattern have?
std::map<SMLoc, unsigned, CompareSMLoc> DiagCountPerPattern;
for (auto Diag : Diags)
++DiagCountPerPattern[Diag.CheckLoc];
// How many diagnostics have we seen so far per pattern?
std::map<SMLoc, unsigned, CompareSMLoc> DiagIndexPerPattern;
// How many total diagnostics have we seen so far?
unsigned DiagIndex = 0;
// What's the widest label?
LabelWidth = 0;
for (auto DiagItr = Diags.begin(), DiagEnd = Diags.end(); DiagItr != DiagEnd;
++DiagItr) {
InputAnnotation A;
A.DiagIndex = DiagIndex++;
// Build label, which uniquely identifies this check result.
unsigned CheckBufferID = SM.FindBufferContainingLoc(DiagItr->CheckLoc);
auto CheckLineAndCol =
SM.getLineAndColumn(DiagItr->CheckLoc, CheckBufferID);
llvm::raw_string_ostream Label(A.Label);
Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":";
if (CheckBufferID == CheckFileBufferID)
Label << CheckLineAndCol.first;
else if (ImpPatBufferIDRange.first <= CheckBufferID &&
CheckBufferID < ImpPatBufferIDRange.second)
Label << "imp" << (CheckBufferID - ImpPatBufferIDRange.first + 1);
else
llvm_unreachable("expected diagnostic's check location to be either in "
"the check file or for an implicit pattern");
if (DiagCountPerPattern[DiagItr->CheckLoc] > 1)
Label << "'" << DiagIndexPerPattern[DiagItr->CheckLoc]++;
LabelWidth = std::max((std::string::size_type)LabelWidth, A.Label.size());
A.Marker = GetMarker(DiagItr->MatchTy);
if (!DiagItr->Note.empty()) {
A.Marker.Note = DiagItr->Note;
// It's less confusing if notes that don't actually have ranges don't have
// markers. For example, a marker for 'with "VAR" equal to "5"' would
// seem to indicate where "VAR" matches, but the location we actually have
// for the marker simply points to the start of the match/search range for
// the full pattern of which the substitution is potentially just one
// component.
if (DiagItr->InputStartLine == DiagItr->InputEndLine &&
DiagItr->InputStartCol == DiagItr->InputEndCol)
A.Marker.Lead = ' ';
}
if (DiagItr->MatchTy == FileCheckDiag::MatchFoundErrorNote) {
assert(!DiagItr->Note.empty() &&
"expected custom note for MatchFoundErrorNote");
A.Marker.Note = "error: " + A.Marker.Note;
}
A.FoundAndExpectedMatch =
DiagItr->MatchTy == FileCheckDiag::MatchFoundAndExpected;
// Compute the mark location, and break annotation into multiple
// annotations if it spans multiple lines.
A.IsFirstLine = true;
A.InputLine = DiagItr->InputStartLine;
A.InputStartCol = DiagItr->InputStartCol;
if (DiagItr->InputStartLine == DiagItr->InputEndLine) {
// Sometimes ranges are empty in order to indicate a specific point, but
// that would mean nothing would be marked, so adjust the range to
// include the following character.
A.InputEndCol =
std::max(DiagItr->InputStartCol + 1, DiagItr->InputEndCol);
Annotations.push_back(A);
} else {
assert(DiagItr->InputStartLine < DiagItr->InputEndLine &&
"expected input range not to be inverted");
A.InputEndCol = UINT_MAX;
Annotations.push_back(A);
for (unsigned L = DiagItr->InputStartLine + 1, E = DiagItr->InputEndLine;
L <= E; ++L) {
// If a range ends before the first column on a line, then it has no
// characters on that line, so there's nothing to render.
if (DiagItr->InputEndCol == 1 && L == E)
break;
InputAnnotation B;
B.DiagIndex = A.DiagIndex;
B.Label = A.Label;
B.IsFirstLine = false;
B.InputLine = L;
B.Marker = A.Marker;
B.Marker.Lead = '~';
B.Marker.Note = "";
B.InputStartCol = 1;
if (L != E)
B.InputEndCol = UINT_MAX;
else
B.InputEndCol = DiagItr->InputEndCol;
B.FoundAndExpectedMatch = A.FoundAndExpectedMatch;
Annotations.push_back(B);
}
}
}
}
static unsigned FindInputLineInFilter(
DumpInputFilterValue DumpInputFilter, unsigned CurInputLine,
const std::vector<InputAnnotation>::iterator &AnnotationBeg,
const std::vector<InputAnnotation>::iterator &AnnotationEnd) {
if (DumpInputFilter == DumpInputFilterAll)
return CurInputLine;
for (auto AnnotationItr = AnnotationBeg; AnnotationItr != AnnotationEnd;
++AnnotationItr) {
switch (DumpInputFilter) {
case DumpInputFilterAll:
llvm_unreachable("unexpected DumpInputFilterAll");
break;
case DumpInputFilterAnnotationFull:
return AnnotationItr->InputLine;
case DumpInputFilterAnnotation:
if (AnnotationItr->IsFirstLine)
return AnnotationItr->InputLine;
break;
case DumpInputFilterError:
if (AnnotationItr->IsFirstLine && AnnotationItr->Marker.FiltersAsError)
return AnnotationItr->InputLine;
break;
}
}
return UINT_MAX;
}
/// To OS, print a vertical ellipsis (right-justified at LabelWidth) if it would
/// occupy less lines than ElidedLines, but print ElidedLines otherwise. Either
/// way, clear ElidedLines. Thus, if ElidedLines is empty, do nothing.
static void DumpEllipsisOrElidedLines(raw_ostream &OS, std::string &ElidedLines,
unsigned LabelWidth) {
if (ElidedLines.empty())
return;
unsigned EllipsisLines = 3;
if (EllipsisLines < StringRef(ElidedLines).count('\n')) {
for (unsigned i = 0; i < EllipsisLines; ++i) {
WithColor(OS, raw_ostream::BLACK, /*Bold=*/true)
<< right_justify(".", LabelWidth);
OS << '\n';
}
} else
OS << ElidedLines;
ElidedLines.clear();
}
static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req,
DumpInputFilterValue DumpInputFilter,
unsigned DumpInputContext,
StringRef InputFileText,
std::vector<InputAnnotation> &Annotations,
unsigned LabelWidth) {
OS << "Input was:\n<<<<<<\n";
// Sort annotations.
llvm::sort(Annotations,
[](const InputAnnotation &A, const InputAnnotation &B) {
// 1. Sort annotations in the order of the input lines.
//
// This makes it easier to find relevant annotations while
// iterating input lines in the implementation below. FileCheck
// does not always produce diagnostics in the order of input
// lines due to, for example, CHECK-DAG and CHECK-NOT.
if (A.InputLine != B.InputLine)
return A.InputLine < B.InputLine;
// 2. Sort annotations in the temporal order FileCheck produced
// their associated diagnostics.
//
// This sort offers several benefits:
//
// A. On a single input line, the order of annotations reflects
// the FileCheck logic for processing directives/patterns.
// This can be helpful in understanding cases in which the
// order of the associated directives/patterns in the check
// file or on the command line either (i) does not match the
// temporal order in which FileCheck looks for matches for the
// directives/patterns (due to, for example, CHECK-LABEL,
// CHECK-NOT, or `--implicit-check-not`) or (ii) does match
// that order but does not match the order of those
// diagnostics along an input line (due to, for example,
// CHECK-DAG).
//
// On the other hand, because our presentation format presents
// input lines in order, there's no clear way to offer the
// same benefit across input lines. For consistency, it might
// then seem worthwhile to have annotations on a single line
// also sorted in input order (that is, by input column).
// However, in practice, this appears to be more confusing
// than helpful. Perhaps it's intuitive to expect annotations
// to be listed in the temporal order in which they were
// produced except in cases the presentation format obviously
// and inherently cannot support it (that is, across input
// lines).
//
// B. When diagnostics' annotations are split among multiple
// input lines, the user must track them from one input line
// to the next. One property of the sort chosen here is that
// it facilitates the user in this regard by ensuring the
// following: when comparing any two input lines, a
// diagnostic's annotations are sorted in the same position
// relative to all other diagnostics' annotations.
return A.DiagIndex < B.DiagIndex;
});
// Compute the width of the label column.
const unsigned char *InputFilePtr = InputFileText.bytes_begin(),
*InputFileEnd = InputFileText.bytes_end();
unsigned LineCount = InputFileText.count('\n');
if (InputFileEnd[-1] != '\n')
++LineCount;
unsigned LineNoWidth = std::log10(LineCount) + 1;
// +3 below adds spaces (1) to the left of the (right-aligned) line numbers
// on input lines and (2) to the right of the (left-aligned) labels on
// annotation lines so that input lines and annotation lines are more
// visually distinct. For example, the spaces on the annotation lines ensure
// that input line numbers and check directive line numbers never align
// horizontally. Those line numbers might not even be for the same file.
// One space would be enough to achieve that, but more makes it even easier
// to see.
LabelWidth = std::max(LabelWidth, LineNoWidth) + 3;
// Print annotated input lines.
unsigned PrevLineInFilter = 0; // 0 means none so far
unsigned NextLineInFilter = 0; // 0 means uncomputed, UINT_MAX means none
std::string ElidedLines;
raw_string_ostream ElidedLinesOS(ElidedLines);
ColorMode TheColorMode =
WithColor(OS).colorsEnabled() ? ColorMode::Enable : ColorMode::Disable;
if (TheColorMode == ColorMode::Enable)
ElidedLinesOS.enable_colors(true);
auto AnnotationItr = Annotations.begin(), AnnotationEnd = Annotations.end();
for (unsigned Line = 1;
InputFilePtr != InputFileEnd || AnnotationItr != AnnotationEnd;
++Line) {
const unsigned char *InputFileLine = InputFilePtr;
// Compute the previous and next line included by the filter.
if (NextLineInFilter < Line)
NextLineInFilter = FindInputLineInFilter(DumpInputFilter, Line,
AnnotationItr, AnnotationEnd);
assert(NextLineInFilter && "expected NextLineInFilter to be computed");
if (NextLineInFilter == Line)
PrevLineInFilter = Line;
// Elide this input line and its annotations if it's not within the
// context specified by -dump-input-context of an input line included by
// -dump-input-filter. However, in case the resulting ellipsis would occupy
// more lines than the input lines and annotations it elides, buffer the
// elided lines and annotations so we can print them instead.
raw_ostream *LineOS = &OS;
if ((!PrevLineInFilter || PrevLineInFilter + DumpInputContext < Line) &&
(NextLineInFilter == UINT_MAX ||
Line + DumpInputContext < NextLineInFilter))
LineOS = &ElidedLinesOS;
else {
LineOS = &OS;
DumpEllipsisOrElidedLines(OS, ElidedLinesOS.str(), LabelWidth);
}
// Print right-aligned line number.
WithColor(*LineOS, raw_ostream::BLACK, /*Bold=*/true, /*BF=*/false,
TheColorMode)
<< format_decimal(Line, LabelWidth) << ": ";
// For the case where -v and colors are enabled, find the annotations for
// good matches for expected patterns in order to highlight everything
// else in the line. There are no such annotations if -v is disabled.
std::vector<InputAnnotation> FoundAndExpectedMatches;
if (Req.Verbose && TheColorMode == ColorMode::Enable) {
for (auto I = AnnotationItr; I != AnnotationEnd && I->InputLine == Line;
++I) {
if (I->FoundAndExpectedMatch)
FoundAndExpectedMatches.push_back(*I);
}
}
// Print numbered line with highlighting where there are no matches for
// expected patterns.
bool Newline = false;
{
WithColor COS(*LineOS, raw_ostream::SAVEDCOLOR, /*Bold=*/false,
/*BG=*/false, TheColorMode);
bool InMatch = false;
if (Req.Verbose)
COS.changeColor(raw_ostream::CYAN, true, true);
for (unsigned Col = 1; InputFilePtr != InputFileEnd && !Newline; ++Col) {
bool WasInMatch = InMatch;
InMatch = false;
for (auto M : FoundAndExpectedMatches) {
if (M.InputStartCol <= Col && Col < M.InputEndCol) {
InMatch = true;
break;
}
}
if (!WasInMatch && InMatch)
COS.resetColor();
else if (WasInMatch && !InMatch)
COS.changeColor(raw_ostream::CYAN, true, true);
if (*InputFilePtr == '\n') {
Newline = true;
COS << ' ';
} else
COS << *InputFilePtr;
++InputFilePtr;
}
}
*LineOS << '\n';
unsigned InputLineWidth = InputFilePtr - InputFileLine;
// Print any annotations.
while (AnnotationItr != AnnotationEnd &&
AnnotationItr->InputLine == Line) {
WithColor COS(*LineOS, AnnotationItr->Marker.Color, /*Bold=*/true,
/*BG=*/false, TheColorMode);
// The two spaces below are where the ": " appears on input lines.
COS << left_justify(AnnotationItr->Label, LabelWidth) << " ";
unsigned Col;
for (Col = 1; Col < AnnotationItr->InputStartCol; ++Col)
COS << ' ';
COS << AnnotationItr->Marker.Lead;
// If InputEndCol=UINT_MAX, stop at InputLineWidth.
for (++Col; Col < AnnotationItr->InputEndCol && Col <= InputLineWidth;
++Col)
COS << '~';
const std::string &Note = AnnotationItr->Marker.Note;
if (!Note.empty()) {
// Put the note at the end of the input line. If we were to instead
// put the note right after the marker, subsequent annotations for the
// same input line might appear to mark this note instead of the input
// line.
for (; Col <= InputLineWidth; ++Col)
COS << ' ';
COS << ' ' << Note;
}
COS << '\n';
++AnnotationItr;
}
}
DumpEllipsisOrElidedLines(OS, ElidedLinesOS.str(), LabelWidth);
OS << ">>>>>>\n";
}
int main(int argc, char **argv) {
// Enable use of ANSI color codes because FileCheck is using them to
// highlight text.
llvm::sys::Process::UseANSIEscapeCodes(true);
InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv, /*Overview*/ "", /*Errs*/ nullptr,
"FILECHECK_OPTS");
// Select -dump-input* values. The -help documentation specifies the default
// value and which value to choose if an option is specified multiple times.
// In the latter case, the general rule of thumb is to choose the value that
// provides the most information.
DumpInputValue DumpInput =
DumpInputs.empty()
? DumpInputFail
: *std::max_element(DumpInputs.begin(), DumpInputs.end());
DumpInputFilterValue DumpInputFilter;
if (DumpInputFilters.empty())
DumpInputFilter = DumpInput == DumpInputAlways ? DumpInputFilterAll
: DumpInputFilterError;
else
DumpInputFilter =
*std::max_element(DumpInputFilters.begin(), DumpInputFilters.end());
unsigned DumpInputContext = DumpInputContexts.empty()
? 5
: *std::max_element(DumpInputContexts.begin(),
DumpInputContexts.end());
if (DumpInput == DumpInputHelp) {
DumpInputAnnotationHelp(outs());
return 0;
}
if (CheckFilename.empty()) {
errs() << "<check-file> not specified\n";
return 2;
}
FileCheckRequest Req;
append_range(Req.CheckPrefixes, CheckPrefixes);
append_range(Req.CommentPrefixes, CommentPrefixes);
append_range(Req.ImplicitCheckNot, ImplicitCheckNot);
bool GlobalDefineError = false;
for (StringRef G : GlobalDefines) {
size_t EqIdx = G.find('=');
if (EqIdx == std::string::npos) {
errs() << "Missing equal sign in command-line definition '-D" << G
<< "'\n";
GlobalDefineError = true;
continue;
}
if (EqIdx == 0) {
errs() << "Missing variable name in command-line definition '-D" << G
<< "'\n";
GlobalDefineError = true;
continue;
}
Req.GlobalDefines.push_back(G);
}
if (GlobalDefineError)
return 2;
Req.AllowEmptyInput = AllowEmptyInput;
Req.AllowUnusedPrefixes = AllowUnusedPrefixes;
Req.EnableVarScope = EnableVarScope;
Req.AllowDeprecatedDagOverlap = AllowDeprecatedDagOverlap;
Req.Verbose = Verbose;
Req.VerboseVerbose = VerboseVerbose;
Req.NoCanonicalizeWhiteSpace = NoCanonicalizeWhiteSpace;
Req.MatchFullLines = MatchFullLines;
Req.IgnoreCase = IgnoreCase;
if (VerboseVerbose)
Req.Verbose = true;
FileCheck FC(Req);
if (!FC.ValidateCheckPrefixes())
return 2;
Regex PrefixRE = FC.buildCheckPrefixRegex();
std::string REError;
if (!PrefixRE.isValid(REError)) {
errs() << "Unable to combine check-prefix strings into a prefix regular "
"expression! This is likely a bug in FileCheck's verification of "
"the check-prefix strings. Regular expression parsing failed "
"with the following error: "
<< REError << "\n";
return 2;
}
SourceMgr SM;
// Read the expected strings from the check file.
ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
MemoryBuffer::getFileOrSTDIN(CheckFilename, /*IsText=*/true);
if (std::error_code EC = CheckFileOrErr.getError()) {
errs() << "Could not open check file '" << CheckFilename
<< "': " << EC.message() << '\n';
return 2;
}
MemoryBuffer &CheckFile = *CheckFileOrErr.get();
SmallString<4096> CheckFileBuffer;
StringRef CheckFileText = FC.CanonicalizeFile(CheckFile, CheckFileBuffer);
unsigned CheckFileBufferID =
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
CheckFileText, CheckFile.getBufferIdentifier()),
SMLoc());
std::pair<unsigned, unsigned> ImpPatBufferIDRange;
if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange))
return 2;
// Open the file to check and add it to SourceMgr.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
MemoryBuffer::getFileOrSTDIN(InputFilename, /*IsText=*/true);
if (InputFilename == "-")
InputFilename = "<stdin>"; // Overwrite for improved diagnostic messages
if (std::error_code EC = InputFileOrErr.getError()) {
errs() << "Could not open input file '" << InputFilename
<< "': " << EC.message() << '\n';
return 2;
}
MemoryBuffer &InputFile = *InputFileOrErr.get();
if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
DumpCommandLine(argc, argv);
return 2;
}
SmallString<4096> InputFileBuffer;
StringRef InputFileText = FC.CanonicalizeFile(InputFile, InputFileBuffer);
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
InputFileText, InputFile.getBufferIdentifier()),
SMLoc());
std::vector<FileCheckDiag> Diags;
int ExitCode = FC.checkInput(SM, InputFileText,
DumpInput == DumpInputNever ? nullptr : &Diags)
? EXIT_SUCCESS
: 1;
if (DumpInput == DumpInputAlways ||
(ExitCode == 1 && DumpInput == DumpInputFail)) {
errs() << "\n"
<< "Input file: " << InputFilename << "\n"
<< "Check file: " << CheckFilename << "\n"
<< "\n"
<< "-dump-input=help explains the following input dump.\n"
<< "\n";
std::vector<InputAnnotation> Annotations;
unsigned LabelWidth;
BuildInputAnnotations(SM, CheckFileBufferID, ImpPatBufferIDRange, Diags,
Annotations, LabelWidth);
DumpAnnotatedInput(errs(), Req, DumpInputFilter, DumpInputContext,
InputFileText, Annotations, LabelWidth);
}
return ExitCode;
}

View file

@ -29,11 +29,7 @@
using namespace std;
using namespace llvm;
#if LDC_LLVM_VER >= 1500
#define BUILTIN_NAME_STRING "ClangBuiltinName"
#else
#define BUILTIN_NAME_STRING "GCCBuiltinName"
#endif
string dtype(Record* rec, bool readOnlyMem)
{