mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-05-13 22:48:43 +03:00
Drop support for LLVM 14
Mainly because opaque IR pointers appear buggy in LLVM 14 (LLVM crashes when compiling druntime).
This commit is contained in:
parent
3067b7b115
commit
2cd14c59dd
30 changed files with 18 additions and 8934 deletions
|
@ -105,7 +105,7 @@ jobs:
|
|||
environment:
|
||||
- PARALLELISM: 4
|
||||
- CI_OS: linux
|
||||
- LLVM_MAJOR: 14
|
||||
- LLVM_MAJOR: 15
|
||||
- HOST_LDC_VERSION: 1.24.0
|
||||
- EXTRA_CMAKE_FLAGS: "-DMULTILIB=ON -DRT_SUPPORT_SANITIZERS=ON -DBUILD_LTO_LIBS=ON"
|
||||
Ubuntu-20.04-sharedLibsOnly-gdmd:
|
||||
|
@ -116,7 +116,7 @@ jobs:
|
|||
environment:
|
||||
- PARALLELISM: 4
|
||||
- CI_OS: linux
|
||||
- LLVM_MAJOR: 14
|
||||
- LLVM_MAJOR: 15
|
||||
- EXTRA_APT_PACKAGES: gdmd
|
||||
- EXTRA_CMAKE_FLAGS: "-DBUILD_SHARED_LIBS=ON -DBUILD_LTO_LIBS=ON -DD_COMPILER=gdmd -DLDC_LINK_MANUALLY=ON"
|
||||
|
||||
|
|
|
@ -36,11 +36,6 @@ jobs:
|
|||
host_dc: dmd-beta
|
||||
llvm_version: 15.0.6
|
||||
cmake_flags: -DBUILD_SHARED_LIBS=ON -DRT_SUPPORT_SANITIZERS=ON -DLIB_SUFFIX=64 -DLDC_LINK_MANUALLY=ON
|
||||
- job_name: macOS 11, LLVM 14, latest DMD beta
|
||||
os: macos-11
|
||||
host_dc: dmd-beta
|
||||
llvm_version: 14.0.6
|
||||
cmake_flags: -DBUILD_SHARED_LIBS=ON -DRT_SUPPORT_SANITIZERS=ON -DLDC_LINK_MANUALLY=ON -DCMAKE_CXX_COMPILER=/usr/bin/c++ -DCMAKE_C_COMPILER=/usr/bin/cc
|
||||
name: ${{ matrix.job_name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#### Big news
|
||||
|
||||
#### Platform support
|
||||
- Supports LLVM 14 - 18. Support for LLVM 11, 12 and 13 was dropped.
|
||||
- Supports LLVM 15 - 18. Support for LLVM 11 - 14 was dropped. The CLI options `-passmanager` and `-opaque-pointers` were removed.
|
||||
|
||||
#### Bug fixes
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ endfunction()
|
|||
# Locate LLVM.
|
||||
#
|
||||
|
||||
find_package(LLVM 14.0 REQUIRED
|
||||
find_package(LLVM 15.0 REQUIRED
|
||||
all-targets analysis asmparser asmprinter bitreader bitwriter codegen core
|
||||
debuginfodwarf debuginfomsf debuginfopdb demangle
|
||||
instcombine ipo instrumentation irreader libdriver linker lto mc
|
||||
|
@ -823,13 +823,8 @@ if (LDC_INSTALL_LLVM_RUNTIME_LIBS)
|
|||
copy_compilerrt_lib("darwin/libclang_rt.xray-fdr_osx.a" "libldc_rt.xray-fdr.a" FALSE)
|
||||
copy_compilerrt_lib("darwin/libclang_rt.xray-profiling_osx.a" "libldc_rt.xray-profiling.a" FALSE)
|
||||
elseif(UNIX)
|
||||
if(LDC_LLVM_VER LESS 1500)
|
||||
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "linux")
|
||||
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "x86_64")
|
||||
else()
|
||||
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "x86_64-unknown-linux-gnu")
|
||||
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "")
|
||||
endif()
|
||||
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS "${LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT}" CACHE STRING "Non-Mac Posix: OS used as directory name for the compiler-rt source libraries, e.g., 'freebsd'.")
|
||||
set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH "${LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT}" CACHE STRING "Non-Mac Posix: architecture used as libname suffix for the compiler-rt source libraries, e.g., 'aarch64'.")
|
||||
if(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH STREQUAL "")
|
||||
|
|
|
@ -36,7 +36,6 @@ set(llvm_config_names llvm-config-18.1 llvm-config181 llvm-config-18
|
|||
llvm-config-17.0 llvm-config170 llvm-config-17
|
||||
llvm-config-16.0 llvm-config160 llvm-config-16
|
||||
llvm-config-15.0 llvm-config150 llvm-config-15
|
||||
llvm-config-14.0 llvm-config140 llvm-config-14
|
||||
llvm-config)
|
||||
find_program(LLVM_CONFIG
|
||||
NAMES ${llvm_config_names}
|
||||
|
@ -49,12 +48,10 @@ if(APPLE)
|
|||
NAMES ${llvm_config_names}
|
||||
PATHS /opt/local/libexec/llvm-18/bin /opt/local/libexec/llvm-17/bin
|
||||
/opt/local/libexec/llvm-16/bin /opt/local/libexec/llvm-15/bin
|
||||
/opt/local/libexec/llvm-14/bin /opt/local/libexec/llvm-13/bin
|
||||
/opt/local/libexec/llvm-12/bin /opt/local/libexec/llvm-11/bin
|
||||
/opt/local/libexec/llvm/bin
|
||||
/usr/local/opt/llvm@18/bin /usr/local/opt/llvm@17/bin
|
||||
/usr/local/opt/llvm@16/bin /usr/local/opt/llvm@15/bin
|
||||
/usr/local/opt/llvm@14/bin /usr/local/opt/llvm/bin
|
||||
/usr/local/opt/llvm/bin
|
||||
NO_DEFAULT_PATH)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -561,10 +561,8 @@ void parseCommandLine(Strings &sourceFiles) {
|
|||
// enforce opaque IR pointers
|
||||
#if LDC_LLVM_VER >= 1700
|
||||
// supports opaque IR pointers only
|
||||
#elif LDC_LLVM_VER >= 1500
|
||||
#else
|
||||
getGlobalContext().setOpaquePointers(true);
|
||||
#else // LLVM 14
|
||||
getGlobalContext().enableOpaquePointers();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -264,13 +264,7 @@ struct X86TargetABI : TargetABI {
|
|||
// Keep alignment for LLVM 13+, to prevent invalid `movaps` etc.,
|
||||
// but limit to 4 (required according to runnable/ldc_cabi1.d).
|
||||
auto align4 = llvm::Align(4);
|
||||
if (arg->attrs.getAlignment().
|
||||
#if LDC_LLVM_VER >= 1500
|
||||
value_or
|
||||
#else
|
||||
getValueOr
|
||||
#endif
|
||||
(align4) > align4)
|
||||
if (arg->attrs.getAlignment().value_or(align4) > align4)
|
||||
arg->attrs.addAlignmentAttr(align4);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1139,11 +1139,7 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) {
|
|||
|
||||
// function attributes
|
||||
if (gABI->needsUnwindTables()) {
|
||||
#if LDC_LLVM_VER >= 1500
|
||||
func->setUWTableKind(llvm::UWTableKind::Default);
|
||||
#else
|
||||
func->addFnAttr(LLAttribute::UWTable);
|
||||
#endif
|
||||
}
|
||||
if (opts::isAnySanitizerEnabled() &&
|
||||
!opts::functionIsInSanitizerBlacklist(fd)) {
|
||||
|
|
|
@ -299,11 +299,7 @@ void addCoverageAnalysis(Module *m) {
|
|||
ctor->setCallingConv(gABI->callingConv(LINK::d));
|
||||
// Set function attributes. See functions.cpp:DtoDefineFunction()
|
||||
if (global.params.targetTriple->getArch() == llvm::Triple::x86_64) {
|
||||
#if LDC_LLVM_VER >= 1500
|
||||
ctor->setUWTableKind(llvm::UWTableKind::Default);
|
||||
#else
|
||||
ctor->addFnAttr(LLAttribute::UWTable);
|
||||
#endif
|
||||
}
|
||||
|
||||
llvm::BasicBlock *bb = llvm::BasicBlock::Create(gIR->context(), "", ctor);
|
||||
|
@ -404,11 +400,7 @@ void registerModuleInfo(Module *m) {
|
|||
}
|
||||
|
||||
void addModuleFlags(llvm::Module &m) {
|
||||
#if LDC_LLVM_VER >= 1500
|
||||
const auto ModuleMinFlag = llvm::Module::Min;
|
||||
#else
|
||||
const auto ModuleMinFlag = llvm::Module::Warning; // Fallback value
|
||||
#endif
|
||||
|
||||
if (opts::fCFProtection == opts::CFProtectionType::Return ||
|
||||
opts::fCFProtection == opts::CFProtectionType::Full) {
|
||||
|
|
|
@ -507,20 +507,11 @@ llvm::CallInst *DtoInlineAsmExpr(const Loc &loc, llvm::StringRef code,
|
|||
llvm::FunctionType *FT =
|
||||
llvm::FunctionType::get(returnType, operandTypes, false);
|
||||
|
||||
#if LDC_LLVM_VER < 1500
|
||||
// make sure the constraints are valid
|
||||
if (!llvm::InlineAsm::Verify(FT, constraints)) {
|
||||
error(loc, "inline asm constraints are invalid");
|
||||
fatal();
|
||||
}
|
||||
#else
|
||||
if (auto err = llvm::InlineAsm::verify(FT, constraints)) {
|
||||
error(loc, "inline asm constraints are invalid");
|
||||
llvm::errs() << err;
|
||||
fatal();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// build asm call
|
||||
bool sideeffect = true;
|
||||
|
|
|
@ -273,11 +273,7 @@ struct LazyFunctionDeclarer {
|
|||
// FIXME: Move to better place (abi-x86-64.cpp?)
|
||||
// NOTE: There are several occurances if this line.
|
||||
if (global.params.targetTriple->getArch() == llvm::Triple::x86_64) {
|
||||
#if LDC_LLVM_VER >= 1500
|
||||
fn->setUWTableKind(llvm::UWTableKind::Default);
|
||||
#else
|
||||
fn->addFnAttr(LLAttribute::UWTable);
|
||||
#endif
|
||||
}
|
||||
|
||||
fn->setCallingConv(gABI->callingConv(dty, false));
|
||||
|
|
|
@ -19,8 +19,7 @@ else
|
|||
static assert(false, "This module is only valid for LDC");
|
||||
}
|
||||
|
||||
version (LDC_LLVM_1400) enum LLVM_version = 1400;
|
||||
else version (LDC_LLVM_1500) enum LLVM_version = 1500;
|
||||
version (LDC_LLVM_1500) enum LLVM_version = 1500;
|
||||
else version (LDC_LLVM_1600) enum LLVM_version = 1600;
|
||||
else version (LDC_LLVM_1700) enum LLVM_version = 1700;
|
||||
else version (LDC_LLVM_1800) enum LLVM_version = 1800;
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
// Test basic use of sample-based PGO profile
|
||||
|
||||
// REQUIRES: atleast_llvm1500
|
||||
|
||||
// RUN: split-file %s %t
|
||||
// RUN: %ldc -O2 -c -gline-tables-only -output-ll -of=%t.ll -fprofile-sample-use=%t/pgo-sample.prof %t/testcase.d && FileCheck %s < %t.ll
|
||||
|
||||
|
|
|
@ -1,48 +1,44 @@
|
|||
// https://github.com/ldc-developers/ldc/issues/3692
|
||||
|
||||
// REQUIRES: target_X86
|
||||
// REQUIRES: atmost_llvm1409
|
||||
// RUN: %ldc -mtriple=x86_64-linux-gnu -output-ll -of=%t.ll %s
|
||||
// RUN: FileCheck %s < %t.ll
|
||||
|
||||
|
||||
// D `int[3]` rewritten to LL `{ i64, i32 }` for SysV ABI - mismatching size and alignment
|
||||
// CHECK: define void @_D6gh36924takeFG3iZv({ i64, i32 } %a_arg)
|
||||
// CHECK-LABEL: define void @_D6gh36924takeFG3iZv({ i64, i32 } %a_arg)
|
||||
void take(int[3] a)
|
||||
{
|
||||
// the `{ i64, i32 }` size is 16 bytes, so we need a padded alloca (with 8-bytes alignment)
|
||||
// CHECK-NEXT: %.BaseBitcastABIRewrite_param_storage = alloca { i64, i32 }, align 8
|
||||
// CHECK-NEXT: store { i64, i32 } %a_arg, { i64, i32 }* %.BaseBitcastABIRewrite_param_storage
|
||||
// CHECK-NEXT: %a = bitcast { i64, i32 }* %.BaseBitcastABIRewrite_param_storage to [3 x i32]*
|
||||
// CHECK-NEXT: = alloca { i64, i32 }, align 8
|
||||
}
|
||||
|
||||
// CHECK: define void @_D6gh36924passFZv()
|
||||
// CHECK-LABEL: define void @_D6gh36924passFZv()
|
||||
void pass()
|
||||
{
|
||||
// CHECK-NEXT: %arrayliteral = alloca [3 x i32], align 4
|
||||
// we need an extra padded alloca with proper alignment
|
||||
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i32 }, align 8
|
||||
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, { i64, i32 }* %.BaseBitcastABIRewrite_padded_arg_storage
|
||||
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, {{\{ i64, i32 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
|
||||
take([1, 2, 3]);
|
||||
}
|
||||
|
||||
|
||||
// D `int[4]` rewritten to LL `{ i64, i64 }` for SysV ABI - mismatching alignment only
|
||||
// CHECK: define void @_D6gh36925take4FG4iZv({ i64, i64 } %a_arg)
|
||||
// CHECK-LABEL: define void @_D6gh36925take4FG4iZv({ i64, i64 } %a_arg)
|
||||
void take4(int[4] a)
|
||||
{
|
||||
// the alloca should have 8-bytes alignment, even though a.alignof == 4
|
||||
// CHECK-NEXT: %a = alloca [4 x i32], align 8
|
||||
// CHECK-NEXT: %1 = bitcast [4 x i32]* %a to { i64, i64 }*
|
||||
// CHECK-NEXT: store { i64, i64 } %a_arg, { i64, i64 }* %1
|
||||
// CHECK: store { i64, i64 } %a_arg, {{\{ i64, i64 \}\*|ptr}} %
|
||||
}
|
||||
|
||||
// CHECK: define void @_D6gh36925pass4FZv()
|
||||
// CHECK-LABEL: define void @_D6gh36925pass4FZv()
|
||||
void pass4()
|
||||
{
|
||||
// CHECK-NEXT: %arrayliteral = alloca [4 x i32], align 4
|
||||
// we need an extra alloca with 8-bytes alignment
|
||||
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i64 }, align 8
|
||||
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, { i64, i64 }* %.BaseBitcastABIRewrite_padded_arg_storage
|
||||
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, {{\{ i64, i64 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
|
||||
take4([1, 2, 3, 4]);
|
||||
}
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
// https://github.com/ldc-developers/ldc/issues/3692
|
||||
|
||||
// REQUIRES: target_X86
|
||||
// REQUIRES: atleast_llvm1500
|
||||
// RUN: %ldc -mtriple=x86_64-linux-gnu -output-ll -of=%t.ll %s
|
||||
// RUN: FileCheck %s < %t.ll
|
||||
|
||||
|
||||
// D `int[3]` rewritten to LL `{ i64, i32 }` for SysV ABI - mismatching size and alignment
|
||||
// CHECK-LABEL: define void @_D13gh3692_llvm154takeFG3iZv({ i64, i32 } %a_arg)
|
||||
void take(int[3] a)
|
||||
{
|
||||
// the `{ i64, i32 }` size is 16 bytes, so we need a padded alloca (with 8-bytes alignment)
|
||||
// CHECK-NEXT: = alloca { i64, i32 }, align 8
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define void @_D13gh3692_llvm154passFZv()
|
||||
void pass()
|
||||
{
|
||||
// CHECK-NEXT: %arrayliteral = alloca [3 x i32], align 4
|
||||
// we need an extra padded alloca with proper alignment
|
||||
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i32 }, align 8
|
||||
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, {{\{ i64, i32 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
|
||||
take([1, 2, 3]);
|
||||
}
|
||||
|
||||
|
||||
// D `int[4]` rewritten to LL `{ i64, i64 }` for SysV ABI - mismatching alignment only
|
||||
// CHECK-LABEL: define void @_D13gh3692_llvm155take4FG4iZv({ i64, i64 } %a_arg)
|
||||
void take4(int[4] a)
|
||||
{
|
||||
// the alloca should have 8-bytes alignment, even though a.alignof == 4
|
||||
// CHECK-NEXT: %a = alloca [4 x i32], align 8
|
||||
// CHECK: store { i64, i64 } %a_arg, {{\{ i64, i64 \}\*|ptr}} %
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define void @_D13gh3692_llvm155pass4FZv()
|
||||
void pass4()
|
||||
{
|
||||
// CHECK-NEXT: %arrayliteral = alloca [4 x i32], align 4
|
||||
// we need an extra alloca with 8-bytes alignment
|
||||
// CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i64 }, align 8
|
||||
// CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, {{\{ i64, i64 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage
|
||||
take4([1, 2, 3, 4]);
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,23 +0,0 @@
|
|||
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
AllTargetsDescs
|
||||
AllTargetsDisassemblers
|
||||
AllTargetsInfos
|
||||
DebugInfoDWARF
|
||||
Core
|
||||
MC
|
||||
IPO
|
||||
MCDisassembler
|
||||
Object
|
||||
ProfileData
|
||||
Support
|
||||
Symbolize
|
||||
)
|
||||
|
||||
add_llvm_tool(llvm-profgen
|
||||
llvm-profgen.cpp
|
||||
PerfReader.cpp
|
||||
CSPreInliner.cpp
|
||||
ProfiledBinary.cpp
|
||||
ProfileGenerator.cpp
|
||||
)
|
|
@ -1,285 +0,0 @@
|
|||
//===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "CSPreInliner.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include <cstdint>
|
||||
#include <queue>
|
||||
|
||||
#define DEBUG_TYPE "cs-preinliner"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
STATISTIC(PreInlNumCSInlined,
|
||||
"Number of functions inlined with context sensitive profile");
|
||||
STATISTIC(PreInlNumCSNotInlined,
|
||||
"Number of functions not inlined with context sensitive profile");
|
||||
STATISTIC(PreInlNumCSInlinedHitMinLimit,
|
||||
"Number of functions with FDO inline stopped due to min size limit");
|
||||
STATISTIC(PreInlNumCSInlinedHitMaxLimit,
|
||||
"Number of functions with FDO inline stopped due to max size limit");
|
||||
STATISTIC(
|
||||
PreInlNumCSInlinedHitGrowthLimit,
|
||||
"Number of functions with FDO inline stopped due to growth size limit");
|
||||
|
||||
// The switches specify inline thresholds used in SampleProfileLoader inlining.
|
||||
// TODO: the actual threshold to be tuned here because the size here is based
|
||||
// on machine code not LLVM IR.
|
||||
extern cl::opt<int> SampleHotCallSiteThreshold;
|
||||
extern cl::opt<int> SampleColdCallSiteThreshold;
|
||||
extern cl::opt<int> ProfileInlineGrowthLimit;
|
||||
extern cl::opt<int> ProfileInlineLimitMin;
|
||||
extern cl::opt<int> ProfileInlineLimitMax;
|
||||
extern cl::opt<bool> SortProfiledSCC;
|
||||
|
||||
cl::opt<bool> EnableCSPreInliner(
|
||||
"csspgo-preinliner", cl::Hidden, cl::init(true),
|
||||
cl::desc("Run a global pre-inliner to merge context profile based on "
|
||||
"estimated global top-down inline decisions"));
|
||||
|
||||
cl::opt<bool> UseContextCostForPreInliner(
|
||||
"use-context-cost-for-preinliner", cl::Hidden, cl::init(true),
|
||||
cl::desc("Use context-sensitive byte size cost for preinliner decisions"));
|
||||
|
||||
static cl::opt<bool> SamplePreInlineReplay(
|
||||
"csspgo-replay-preinline", cl::Hidden, cl::init(false),
|
||||
cl::desc(
|
||||
"Replay previous inlining and adjust context profile accordingly"));
|
||||
|
||||
CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
|
||||
uint64_t HotThreshold, uint64_t ColdThreshold)
|
||||
: UseContextCost(UseContextCostForPreInliner),
|
||||
// TODO: Pass in a guid-to-name map in order for
|
||||
// ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
|
||||
// as their profile context.
|
||||
ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary),
|
||||
HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {
|
||||
// Set default preinliner hot/cold call site threshold tuned with CSSPGO.
|
||||
// for good performance with reasonable profile size.
|
||||
if (!SampleHotCallSiteThreshold.getNumOccurrences())
|
||||
SampleHotCallSiteThreshold = 1500;
|
||||
if (!SampleColdCallSiteThreshold.getNumOccurrences())
|
||||
SampleColdCallSiteThreshold = 0;
|
||||
}
|
||||
|
||||
std::vector<StringRef> CSPreInliner::buildTopDownOrder() {
|
||||
std::vector<StringRef> Order;
|
||||
ProfiledCallGraph ProfiledCG(ContextTracker);
|
||||
|
||||
// Now that we have a profiled call graph, construct top-down order
|
||||
// by building up SCC and reversing SCC order.
|
||||
scc_iterator<ProfiledCallGraph *> I = scc_begin(&ProfiledCG);
|
||||
while (!I.isAtEnd()) {
|
||||
auto Range = *I;
|
||||
if (SortProfiledSCC) {
|
||||
// Sort nodes in one SCC based on callsite hotness.
|
||||
scc_member_iterator<ProfiledCallGraph *> SI(*I);
|
||||
Range = *SI;
|
||||
}
|
||||
for (auto *Node : Range) {
|
||||
if (Node != ProfiledCG.getEntryNode())
|
||||
Order.push_back(Node->Name);
|
||||
}
|
||||
++I;
|
||||
}
|
||||
std::reverse(Order.begin(), Order.end());
|
||||
|
||||
return Order;
|
||||
}
|
||||
|
||||
bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue,
|
||||
const FunctionSamples *CallerSamples) {
|
||||
assert(CallerSamples && "Expect non-null caller samples");
|
||||
|
||||
// Ideally we want to consider everything a function calls, but as far as
|
||||
// context profile is concerned, only those frames that are children of
|
||||
// current one in the trie is relavent. So we walk the trie instead of call
|
||||
// targets from function profile.
|
||||
ContextTrieNode *CallerNode =
|
||||
ContextTracker.getContextFor(CallerSamples->getContext());
|
||||
|
||||
bool HasNewCandidate = false;
|
||||
for (auto &Child : CallerNode->getAllChildContext()) {
|
||||
ContextTrieNode *CalleeNode = &Child.second;
|
||||
FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples();
|
||||
if (!CalleeSamples)
|
||||
continue;
|
||||
|
||||
// Call site count is more reliable, so we look up the corresponding call
|
||||
// target profile in caller's context profile to retrieve call site count.
|
||||
uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
|
||||
uint64_t CallsiteCount = 0;
|
||||
LineLocation Callsite = CalleeNode->getCallSiteLoc();
|
||||
if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
|
||||
SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
|
||||
auto It = TargetCounts.find(CalleeSamples->getName());
|
||||
if (It != TargetCounts.end())
|
||||
CallsiteCount = It->second;
|
||||
}
|
||||
|
||||
// TODO: call site and callee entry count should be mostly consistent, add
|
||||
// check for that.
|
||||
HasNewCandidate = true;
|
||||
uint32_t CalleeSize = getFuncSize(*CalleeSamples);
|
||||
CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount),
|
||||
CalleeSize);
|
||||
}
|
||||
|
||||
return HasNewCandidate;
|
||||
}
|
||||
|
||||
uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) {
|
||||
if (UseContextCost) {
|
||||
return Binary.getFuncSizeForContext(FSamples.getContext());
|
||||
}
|
||||
|
||||
return FSamples.getBodySamples().size();
|
||||
}
|
||||
|
||||
bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) {
|
||||
// If replay inline is requested, simply follow the inline decision of the
|
||||
// profiled binary.
|
||||
if (SamplePreInlineReplay)
|
||||
return Candidate.CalleeSamples->getContext().hasAttribute(
|
||||
ContextWasInlined);
|
||||
|
||||
// Adjust threshold based on call site hotness, only do this for callsite
|
||||
// prioritized inliner because otherwise cost-benefit check is done earlier.
|
||||
unsigned int SampleThreshold = SampleColdCallSiteThreshold;
|
||||
if (Candidate.CallsiteCount > HotCountThreshold)
|
||||
SampleThreshold = SampleHotCallSiteThreshold;
|
||||
|
||||
// TODO: for small cold functions, we may inlined them and we need to keep
|
||||
// context profile accordingly.
|
||||
if (Candidate.CallsiteCount < ColdCountThreshold)
|
||||
SampleThreshold = SampleColdCallSiteThreshold;
|
||||
|
||||
return (Candidate.SizeCost < SampleThreshold);
|
||||
}
|
||||
|
||||
void CSPreInliner::processFunction(const StringRef Name) {
|
||||
FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name);
|
||||
if (!FSamples)
|
||||
return;
|
||||
|
||||
unsigned FuncSize = getFuncSize(*FSamples);
|
||||
unsigned FuncFinalSize = FuncSize;
|
||||
unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit;
|
||||
SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
|
||||
SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Process " << Name
|
||||
<< " for context-sensitive pre-inlining (pre-inline size: "
|
||||
<< FuncSize << ", size limit: " << SizeLimit << ")\n");
|
||||
|
||||
ProfiledCandidateQueue CQueue;
|
||||
getInlineCandidates(CQueue, FSamples);
|
||||
|
||||
while (!CQueue.empty() && FuncFinalSize < SizeLimit) {
|
||||
ProfiledInlineCandidate Candidate = CQueue.top();
|
||||
CQueue.pop();
|
||||
bool ShouldInline = false;
|
||||
if ((ShouldInline = shouldInline(Candidate))) {
|
||||
// We mark context as inlined as the corresponding context profile
|
||||
// won't be merged into that function's base profile.
|
||||
++PreInlNumCSInlined;
|
||||
ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples);
|
||||
Candidate.CalleeSamples->getContext().setAttribute(
|
||||
ContextShouldBeInlined);
|
||||
FuncFinalSize += Candidate.SizeCost;
|
||||
getInlineCandidates(CQueue, Candidate.CalleeSamples);
|
||||
} else {
|
||||
++PreInlNumCSNotInlined;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined")
|
||||
<< " context profile for: "
|
||||
<< Candidate.CalleeSamples->getContext().toString()
|
||||
<< " (callee size: " << Candidate.SizeCost
|
||||
<< ", call count:" << Candidate.CallsiteCount << ")\n");
|
||||
}
|
||||
|
||||
if (!CQueue.empty()) {
|
||||
if (SizeLimit == (unsigned)ProfileInlineLimitMax)
|
||||
++PreInlNumCSInlinedHitMaxLimit;
|
||||
else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
|
||||
++PreInlNumCSInlinedHitMinLimit;
|
||||
else
|
||||
++PreInlNumCSInlinedHitGrowthLimit;
|
||||
}
|
||||
|
||||
LLVM_DEBUG({
|
||||
if (!CQueue.empty())
|
||||
dbgs() << " Inline candidates ignored due to size limit (inliner "
|
||||
"original size: "
|
||||
<< FuncSize << ", inliner final size: " << FuncFinalSize
|
||||
<< ", size limit: " << SizeLimit << ")\n";
|
||||
|
||||
while (!CQueue.empty()) {
|
||||
ProfiledInlineCandidate Candidate = CQueue.top();
|
||||
CQueue.pop();
|
||||
bool WasInlined =
|
||||
Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
|
||||
dbgs() << " " << Candidate.CalleeSamples->getContext().toString()
|
||||
<< " (candidate size:" << Candidate.SizeCost
|
||||
<< ", call count: " << Candidate.CallsiteCount << ", previously "
|
||||
<< (WasInlined ? "inlined)\n" : "not inlined)\n");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void CSPreInliner::run() {
|
||||
#ifndef NDEBUG
|
||||
auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) {
|
||||
dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles ("
|
||||
<< Profiles.size() << " total):\n";
|
||||
for (auto &It : Profiles) {
|
||||
const FunctionSamples &Samples = It.second;
|
||||
dbgs() << " [" << Samples.getContext().toString() << "] "
|
||||
<< Samples.getTotalSamples() << ":" << Samples.getHeadSamples()
|
||||
<< "\n";
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
LLVM_DEBUG(printProfileNames(ProfileMap, true));
|
||||
|
||||
// Execute global pre-inliner to estimate a global top-down inline
|
||||
// decision and merge profiles accordingly. This helps with profile
|
||||
// merge for ThinLTO otherwise we won't be able to merge profiles back
|
||||
// to base profile across module/thin-backend boundaries.
|
||||
// It also helps better compress context profile to control profile
|
||||
// size, as we now only need context profile for functions going to
|
||||
// be inlined.
|
||||
for (StringRef FuncName : buildTopDownOrder()) {
|
||||
processFunction(FuncName);
|
||||
}
|
||||
|
||||
// Not inlined context profiles are merged into its base, so we can
|
||||
// trim out such profiles from the output.
|
||||
std::vector<SampleContext> ProfilesToBeRemoved;
|
||||
for (auto &It : ProfileMap) {
|
||||
SampleContext &Context = It.second.getContext();
|
||||
if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) {
|
||||
assert(Context.hasState(MergedContext) &&
|
||||
"Not inlined context profile should be merged already");
|
||||
ProfilesToBeRemoved.push_back(It.first);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &ContextName : ProfilesToBeRemoved) {
|
||||
ProfileMap.erase(ContextName);
|
||||
}
|
||||
|
||||
// Make sure ProfileMap's key is consistent with FunctionSamples' name.
|
||||
SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles();
|
||||
|
||||
LLVM_DEBUG(printProfileNames(ProfileMap, false));
|
||||
}
|
|
@ -1,95 +0,0 @@
|
|||
//===-- CSPreInliner.h - Profile guided preinliner ---------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
|
||||
#define LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
|
||||
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/ADT/PriorityQueue.h"
|
||||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
|
||||
#include "llvm/Transforms/IPO/SampleContextTracker.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
// Inline candidate seen from profile
|
||||
struct ProfiledInlineCandidate {
|
||||
ProfiledInlineCandidate(const FunctionSamples *Samples, uint64_t Count,
|
||||
uint32_t Size)
|
||||
: CalleeSamples(Samples), CallsiteCount(Count), SizeCost(Size) {}
|
||||
// Context-sensitive function profile for inline candidate
|
||||
const FunctionSamples *CalleeSamples;
|
||||
// Call site count for an inline candidate
|
||||
// TODO: make sure entry count for context profile and call site
|
||||
// target count for corresponding call are consistent.
|
||||
uint64_t CallsiteCount;
|
||||
// Size proxy for function under particular call context.
|
||||
uint64_t SizeCost;
|
||||
};
|
||||
|
||||
// Inline candidate comparer using call site weight
|
||||
struct ProfiledCandidateComparer {
|
||||
bool operator()(const ProfiledInlineCandidate &LHS,
|
||||
const ProfiledInlineCandidate &RHS) {
|
||||
if (LHS.CallsiteCount != RHS.CallsiteCount)
|
||||
return LHS.CallsiteCount < RHS.CallsiteCount;
|
||||
|
||||
if (LHS.SizeCost != RHS.SizeCost)
|
||||
return LHS.SizeCost > RHS.SizeCost;
|
||||
|
||||
// Tie breaker using GUID so we have stable/deterministic inlining order
|
||||
assert(LHS.CalleeSamples && RHS.CalleeSamples &&
|
||||
"Expect non-null FunctionSamples");
|
||||
return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
|
||||
RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
|
||||
}
|
||||
};
|
||||
|
||||
using ProfiledCandidateQueue =
|
||||
PriorityQueue<ProfiledInlineCandidate, std::vector<ProfiledInlineCandidate>,
|
||||
ProfiledCandidateComparer>;
|
||||
|
||||
// Pre-compilation inliner based on context-sensitive profile.
|
||||
// The PreInliner estimates inline decision using hotness from profile
|
||||
// and cost estimation from machine code size. It helps merges context
|
||||
// profile globally and achieves better post-inine profile quality, which
|
||||
// otherwise won't be possible for ThinLTO. It also reduce context profile
|
||||
// size by only keep context that is estimated to be inlined.
|
||||
class CSPreInliner {
|
||||
public:
|
||||
CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
|
||||
uint64_t HotThreshold, uint64_t ColdThreshold);
|
||||
void run();
|
||||
|
||||
private:
|
||||
bool getInlineCandidates(ProfiledCandidateQueue &CQueue,
|
||||
const FunctionSamples *FCallerContextSamples);
|
||||
std::vector<StringRef> buildTopDownOrder();
|
||||
void processFunction(StringRef Name);
|
||||
bool shouldInline(ProfiledInlineCandidate &Candidate);
|
||||
uint32_t getFuncSize(const FunctionSamples &FSamples);
|
||||
bool UseContextCost;
|
||||
SampleContextTracker ContextTracker;
|
||||
SampleProfileMap &ProfileMap;
|
||||
ProfiledBinary &Binary;
|
||||
|
||||
// Count thresholds to answer isHotCount and isColdCount queries.
|
||||
// Mirrors the threshold in ProfileSummaryInfo.
|
||||
uint64_t HotCountThreshold;
|
||||
uint64_t ColdCountThreshold;
|
||||
};
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
|
@ -1,59 +0,0 @@
|
|||
//===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
|
||||
#define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
|
||||
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
inline std::string getCallSite(const SampleContextFrame &Callsite) {
|
||||
std::string CallsiteStr = Callsite.FuncName.str();
|
||||
CallsiteStr += ":";
|
||||
CallsiteStr += Twine(Callsite.Location.LineOffset).str();
|
||||
if (Callsite.Location.Discriminator > 0) {
|
||||
CallsiteStr += ".";
|
||||
CallsiteStr += Twine(Callsite.Location.Discriminator).str();
|
||||
}
|
||||
return CallsiteStr;
|
||||
}
|
||||
|
||||
// TODO: This operation is expansive. If it ever gets called multiple times we
|
||||
// may think of making a class wrapper with internal states for it.
|
||||
inline std::string getLocWithContext(const SampleContextFrameVector &Context) {
|
||||
std::ostringstream OContextStr;
|
||||
for (const auto &Callsite : Context) {
|
||||
if (OContextStr.str().size())
|
||||
OContextStr << " @ ";
|
||||
OContextStr << getCallSite(Callsite);
|
||||
}
|
||||
return OContextStr.str();
|
||||
}
|
||||
|
||||
// Reverse call context, i.e., in the order of callee frames to caller frames,
|
||||
// is useful during instruction printing or pseudo probe printing.
|
||||
inline std::string
|
||||
getReversedLocWithContext(const SampleContextFrameVector &Context) {
|
||||
std::ostringstream OContextStr;
|
||||
for (const auto &Callsite : reverse(Context)) {
|
||||
if (OContextStr.str().size())
|
||||
OContextStr << " @ ";
|
||||
OContextStr << getCallSite(Callsite);
|
||||
}
|
||||
return OContextStr.str();
|
||||
}
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
|
@ -1,56 +0,0 @@
|
|||
//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
|
||||
#define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H
|
||||
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/ErrorOr.h"
|
||||
#include "llvm/Support/WithColor.h"
|
||||
#include <system_error>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
[[noreturn]] inline void exitWithError(const Twine &Message,
|
||||
StringRef Whence = StringRef(),
|
||||
StringRef Hint = StringRef()) {
|
||||
WithColor::error(errs(), "llvm-profgen");
|
||||
if (!Whence.empty())
|
||||
errs() << Whence.str() << ": ";
|
||||
errs() << Message << "\n";
|
||||
if (!Hint.empty())
|
||||
WithColor::note() << Hint.str() << "\n";
|
||||
::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
[[noreturn]] inline void exitWithError(std::error_code EC,
|
||||
StringRef Whence = StringRef()) {
|
||||
exitWithError(EC.message(), Whence);
|
||||
}
|
||||
|
||||
[[noreturn]] inline void exitWithError(Error E, StringRef Whence) {
|
||||
exitWithError(errorToErrorCode(std::move(E)), Whence);
|
||||
}
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
T unwrapOrError(Expected<T> EO, Ts &&... Args) {
|
||||
if (EO)
|
||||
return std::move(*EO);
|
||||
exitWithError(EO.takeError(), std::forward<Ts>(Args)...);
|
||||
}
|
||||
|
||||
inline void emitWarningSummary(uint64_t Num, uint64_t Total, StringRef Msg) {
|
||||
if (!Total || !Num)
|
||||
return;
|
||||
WithColor::warning() << format("%.2f", static_cast<double>(Num) * 100 / Total)
|
||||
<< "%(" << Num << "/" << Total << ") " << Msg << "\n";
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load diff
|
@ -1,728 +0,0 @@
|
|||
//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
|
||||
#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
|
||||
#include "ErrorHandling.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Regex.h"
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
// Stream based trace line iterator
|
||||
class TraceStream {
|
||||
std::string CurrentLine;
|
||||
std::ifstream Fin;
|
||||
bool IsAtEoF = false;
|
||||
uint64_t LineNumber = 0;
|
||||
|
||||
public:
|
||||
TraceStream(StringRef Filename) : Fin(Filename.str()) {
|
||||
if (!Fin.good())
|
||||
exitWithError("Error read input perf script file", Filename);
|
||||
advance();
|
||||
}
|
||||
|
||||
StringRef getCurrentLine() {
|
||||
assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
|
||||
return CurrentLine;
|
||||
}
|
||||
|
||||
uint64_t getLineNumber() { return LineNumber; }
|
||||
|
||||
bool isAtEoF() { return IsAtEoF; }
|
||||
|
||||
// Read the next line
|
||||
void advance() {
|
||||
if (!std::getline(Fin, CurrentLine)) {
|
||||
IsAtEoF = true;
|
||||
return;
|
||||
}
|
||||
LineNumber++;
|
||||
}
|
||||
};
|
||||
|
||||
// The type of input format.
|
||||
enum PerfFormat {
|
||||
UnknownFormat = 0,
|
||||
PerfData = 1, // Raw linux perf.data.
|
||||
PerfScript = 2, // Perf script create by `perf script` command.
|
||||
UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
|
||||
|
||||
};
|
||||
|
||||
// The type of perfscript content.
|
||||
enum PerfContent {
|
||||
UnknownContent = 0,
|
||||
LBR = 1, // Only LBR sample.
|
||||
LBRStack = 2, // Hybrid sample including call stack and LBR stack.
|
||||
};
|
||||
|
||||
struct PerfInputFile {
|
||||
std::string InputFile;
|
||||
PerfFormat Format = PerfFormat::UnknownFormat;
|
||||
PerfContent Content = PerfContent::UnknownContent;
|
||||
};
|
||||
|
||||
// The parsed LBR sample entry.
|
||||
struct LBREntry {
|
||||
uint64_t Source = 0;
|
||||
uint64_t Target = 0;
|
||||
// An artificial branch stands for a series of consecutive branches starting
|
||||
// from the current binary with a transition through external code and
|
||||
// eventually landing back in the current binary.
|
||||
bool IsArtificial = false;
|
||||
LBREntry(uint64_t S, uint64_t T, bool I)
|
||||
: Source(S), Target(T), IsArtificial(I) {}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void print() const {
|
||||
dbgs() << "from " << format("%#010x", Source) << " to "
|
||||
<< format("%#010x", Target);
|
||||
if (IsArtificial)
|
||||
dbgs() << " Artificial";
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifndef NDEBUG
|
||||
static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
|
||||
for (size_t I = 0; I < LBRStack.size(); I++) {
|
||||
dbgs() << "[" << I << "] ";
|
||||
LBRStack[I].print();
|
||||
dbgs() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
|
||||
for (size_t I = 0; I < CallStack.size(); I++) {
|
||||
dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Hash interface for generic data of type T
|
||||
// Data should implement a \fn getHashCode and a \fn isEqual
|
||||
// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
|
||||
// i.e we explicitly calculate hash of derived class, assign to base class's
|
||||
// HashCode. This also provides the flexibility for calculating the hash code
|
||||
// incrementally(like rolling hash) during frame stack unwinding since unwinding
|
||||
// only changes the leaf of frame stack. \fn isEqual is a virtual function,
|
||||
// which will have perf overhead. In the future, if we redesign a better hash
|
||||
// function, then we can just skip this or switch to non-virtual function(like
|
||||
// just ignore comparision if hash conflicts probabilities is low)
|
||||
template <class T> class Hashable {
|
||||
public:
|
||||
std::shared_ptr<T> Data;
|
||||
Hashable(const std::shared_ptr<T> &D) : Data(D) {}
|
||||
|
||||
// Hash code generation
|
||||
struct Hash {
|
||||
uint64_t operator()(const Hashable<T> &Key) const {
|
||||
// Don't make it virtual for getHashCode
|
||||
uint64_t Hash = Key.Data->getHashCode();
|
||||
assert(Hash && "Should generate HashCode for it!");
|
||||
return Hash;
|
||||
}
|
||||
};
|
||||
|
||||
// Hash equal
|
||||
struct Equal {
|
||||
bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
|
||||
// Precisely compare the data, vtable will have overhead.
|
||||
return LHS.Data->isEqual(RHS.Data.get());
|
||||
}
|
||||
};
|
||||
|
||||
T *getPtr() const { return Data.get(); }
|
||||
};
|
||||
|
||||
struct PerfSample {
|
||||
// LBR stack recorded in FIFO order.
|
||||
SmallVector<LBREntry, 16> LBRStack;
|
||||
// Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
|
||||
// generation
|
||||
SmallVector<uint64_t, 16> CallStack;
|
||||
|
||||
virtual ~PerfSample() = default;
|
||||
uint64_t getHashCode() const {
|
||||
// Use simple DJB2 hash
|
||||
auto HashCombine = [](uint64_t H, uint64_t V) {
|
||||
return ((H << 5) + H) + V;
|
||||
};
|
||||
uint64_t Hash = 5381;
|
||||
for (const auto &Value : CallStack) {
|
||||
Hash = HashCombine(Hash, Value);
|
||||
}
|
||||
for (const auto &Entry : LBRStack) {
|
||||
Hash = HashCombine(Hash, Entry.Source);
|
||||
Hash = HashCombine(Hash, Entry.Target);
|
||||
}
|
||||
return Hash;
|
||||
}
|
||||
|
||||
bool isEqual(const PerfSample *Other) const {
|
||||
const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
|
||||
const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
|
||||
|
||||
if (CallStack.size() != OtherCallStack.size() ||
|
||||
LBRStack.size() != OtherLBRStack.size())
|
||||
return false;
|
||||
|
||||
if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
|
||||
return false;
|
||||
|
||||
for (size_t I = 0; I < OtherLBRStack.size(); I++) {
|
||||
if (LBRStack[I].Source != OtherLBRStack[I].Source ||
|
||||
LBRStack[I].Target != OtherLBRStack[I].Target)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void print() const {
|
||||
dbgs() << "LBR stack\n";
|
||||
printLBRStack(LBRStack);
|
||||
dbgs() << "Call stack\n";
|
||||
printCallStack(CallStack);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
// After parsing the sample, we record the samples by aggregating them
|
||||
// into this counter. The key stores the sample data and the value is
|
||||
// the sample repeat times.
|
||||
using AggregatedCounter =
|
||||
std::unordered_map<Hashable<PerfSample>, uint64_t,
|
||||
Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
|
||||
|
||||
using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
|
||||
|
||||
// The state for the unwinder, it doesn't hold the data but only keep the
|
||||
// pointer/index of the data, While unwinding, the CallStack is changed
|
||||
// dynamicially and will be recorded as the context of the sample
|
||||
struct UnwindState {
|
||||
// Profiled binary that current frame address belongs to
|
||||
const ProfiledBinary *Binary;
|
||||
// Call stack trie node
|
||||
struct ProfiledFrame {
|
||||
const uint64_t Address = DummyRoot;
|
||||
ProfiledFrame *Parent;
|
||||
SampleVector RangeSamples;
|
||||
SampleVector BranchSamples;
|
||||
std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
|
||||
|
||||
ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
|
||||
: Address(Addr), Parent(P) {}
|
||||
ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
|
||||
assert(Address && "Address can't be zero!");
|
||||
auto Ret = Children.emplace(
|
||||
Address, std::make_unique<ProfiledFrame>(Address, this));
|
||||
return Ret.first->second.get();
|
||||
}
|
||||
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
|
||||
RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
|
||||
}
|
||||
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
|
||||
BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
|
||||
}
|
||||
bool isDummyRoot() { return Address == DummyRoot; }
|
||||
bool isExternalFrame() { return Address == ExternalAddr; }
|
||||
bool isLeafFrame() { return Children.empty(); }
|
||||
};
|
||||
|
||||
ProfiledFrame DummyTrieRoot;
|
||||
ProfiledFrame *CurrentLeafFrame;
|
||||
// Used to fall through the LBR stack
|
||||
uint32_t LBRIndex = 0;
|
||||
// Reference to PerfSample.LBRStack
|
||||
const SmallVector<LBREntry, 16> &LBRStack;
|
||||
// Used to iterate the address range
|
||||
InstructionPointer InstPtr;
|
||||
UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
|
||||
: Binary(Binary), LBRStack(Sample->LBRStack),
|
||||
InstPtr(Binary, Sample->CallStack.front()) {
|
||||
initFrameTrie(Sample->CallStack);
|
||||
}
|
||||
|
||||
bool validateInitialState() {
|
||||
uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
|
||||
uint64_t LeafAddr = CurrentLeafFrame->Address;
|
||||
assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
|
||||
"External leading LBR should match the leaf frame.");
|
||||
|
||||
// When we take a stack sample, ideally the sampling distance between the
|
||||
// leaf IP of stack and the last LBR target shouldn't be very large.
|
||||
// Use a heuristic size (0x100) to filter out broken records.
|
||||
if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
|
||||
WithColor::warning() << "Bogus trace: stack tip = "
|
||||
<< format("%#010x", LeafAddr)
|
||||
<< ", LBR tip = " << format("%#010x\n", LBRLeaf);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void checkStateConsistency() {
|
||||
assert(InstPtr.Address == CurrentLeafFrame->Address &&
|
||||
"IP should align with context leaf");
|
||||
}
|
||||
|
||||
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
|
||||
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
|
||||
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
|
||||
const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
|
||||
bool IsLastLBR() const { return LBRIndex == 0; }
|
||||
bool getLBRStackSize() const { return LBRStack.size(); }
|
||||
void advanceLBR() { LBRIndex++; }
|
||||
ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
|
||||
|
||||
void pushFrame(uint64_t Address) {
|
||||
CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
|
||||
}
|
||||
|
||||
void switchToFrame(uint64_t Address) {
|
||||
if (CurrentLeafFrame->Address == Address)
|
||||
return;
|
||||
CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
|
||||
}
|
||||
|
||||
void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
|
||||
|
||||
void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
|
||||
|
||||
void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
|
||||
ProfiledFrame *Cur = &DummyTrieRoot;
|
||||
for (auto Address : reverse(CallStack)) {
|
||||
Cur = Cur->getOrCreateChildFrame(Address);
|
||||
}
|
||||
CurrentLeafFrame = Cur;
|
||||
}
|
||||
|
||||
ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
|
||||
};
|
||||
|
||||
// Base class for sample counter key with context
|
||||
struct ContextKey {
|
||||
uint64_t HashCode = 0;
|
||||
virtual ~ContextKey() = default;
|
||||
uint64_t getHashCode() {
|
||||
if (HashCode == 0)
|
||||
genHashCode();
|
||||
return HashCode;
|
||||
}
|
||||
virtual void genHashCode() = 0;
|
||||
virtual bool isEqual(const ContextKey *K) const {
|
||||
return HashCode == K->HashCode;
|
||||
};
|
||||
|
||||
// Utilities for LLVM-style RTTI
|
||||
enum ContextKind { CK_StringBased, CK_ProbeBased };
|
||||
const ContextKind Kind;
|
||||
ContextKind getKind() const { return Kind; }
|
||||
ContextKey(ContextKind K) : Kind(K){};
|
||||
};
|
||||
|
||||
// String based context id
|
||||
struct StringBasedCtxKey : public ContextKey {
|
||||
SampleContextFrameVector Context;
|
||||
|
||||
bool WasLeafInlined;
|
||||
StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
|
||||
static bool classof(const ContextKey *K) {
|
||||
return K->getKind() == CK_StringBased;
|
||||
}
|
||||
|
||||
bool isEqual(const ContextKey *K) const override {
|
||||
const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
|
||||
return Context == Other->Context;
|
||||
}
|
||||
|
||||
void genHashCode() override {
|
||||
HashCode = hash_value(SampleContextFrames(Context));
|
||||
}
|
||||
};
|
||||
|
||||
// Probe based context key as the intermediate key of context
|
||||
// String based context key will introduce redundant string handling
|
||||
// since the callee context is inferred from the context string which
|
||||
// need to be splitted by '@' to get the last location frame, so we
|
||||
// can just use probe instead and generate the string in the end.
|
||||
struct ProbeBasedCtxKey : public ContextKey {
|
||||
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
|
||||
|
||||
ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
|
||||
static bool classof(const ContextKey *K) {
|
||||
return K->getKind() == CK_ProbeBased;
|
||||
}
|
||||
|
||||
bool isEqual(const ContextKey *K) const override {
|
||||
const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
|
||||
assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
|
||||
return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
|
||||
O->Probes.end());
|
||||
}
|
||||
|
||||
void genHashCode() override {
|
||||
for (const auto *P : Probes) {
|
||||
HashCode = hash_combine(HashCode, P);
|
||||
}
|
||||
if (HashCode == 0) {
|
||||
// Avoid zero value of HashCode when it's an empty list
|
||||
HashCode = 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// The counter of branch samples for one function indexed by the branch,
|
||||
// which is represented as the source and target offset pair.
|
||||
using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
|
||||
// The counter of range samples for one function indexed by the range,
|
||||
// which is represented as the start and end offset pair.
|
||||
using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
|
||||
// Wrapper for sample counters including range counter and branch counter
|
||||
struct SampleCounter {
|
||||
RangeSample RangeCounter;
|
||||
BranchSample BranchCounter;
|
||||
|
||||
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
|
||||
assert(Start <= End && "Invalid instruction range");
|
||||
RangeCounter[{Start, End}] += Repeat;
|
||||
}
|
||||
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
|
||||
BranchCounter[{Source, Target}] += Repeat;
|
||||
}
|
||||
};
|
||||
|
||||
// Sample counter with context to support context-sensitive profile
|
||||
using ContextSampleCounterMap =
|
||||
std::unordered_map<Hashable<ContextKey>, SampleCounter,
|
||||
Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
|
||||
|
||||
struct FrameStack {
|
||||
SmallVector<uint64_t, 16> Stack;
|
||||
ProfiledBinary *Binary;
|
||||
FrameStack(ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
assert(!Cur->isExternalFrame() &&
|
||||
"External frame's not expected for context stack.");
|
||||
Stack.push_back(Cur->Address);
|
||||
return true;
|
||||
}
|
||||
|
||||
void popFrame() {
|
||||
if (!Stack.empty())
|
||||
Stack.pop_back();
|
||||
}
|
||||
std::shared_ptr<StringBasedCtxKey> getContextKey();
|
||||
};
|
||||
|
||||
struct ProbeStack {
|
||||
SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
|
||||
ProfiledBinary *Binary;
|
||||
ProbeStack(ProfiledBinary *B) : Binary(B) {}
|
||||
bool pushFrame(UnwindState::ProfiledFrame *Cur) {
|
||||
assert(!Cur->isExternalFrame() &&
|
||||
"External frame's not expected for context stack.");
|
||||
const MCDecodedPseudoProbe *CallProbe =
|
||||
Binary->getCallProbeForAddr(Cur->Address);
|
||||
// We may not find a probe for a merged or external callsite.
|
||||
// Callsite merging may cause the loss of original probe IDs.
|
||||
// Cutting off the context from here since the inliner will
|
||||
// not know how to consume a context with unknown callsites.
|
||||
if (!CallProbe)
|
||||
return false;
|
||||
Stack.push_back(CallProbe);
|
||||
return true;
|
||||
}
|
||||
|
||||
void popFrame() {
|
||||
if (!Stack.empty())
|
||||
Stack.pop_back();
|
||||
}
|
||||
// Use pseudo probe based context key to get the sample counter
|
||||
// A context stands for a call path from 'main' to an uninlined
|
||||
// callee with all inline frames recovered on that path. The probes
|
||||
// belonging to that call path is the probes either originated from
|
||||
// the callee or from any functions inlined into the callee. Since
|
||||
// pseudo probes are organized in a tri-tree style after decoded,
|
||||
// the tree path from the tri-tree root (which is the uninlined
|
||||
// callee) to the probe node forms an inline context.
|
||||
// Here we use a list of probe(pointer) as the context key to speed up
|
||||
// aggregation and the final context string will be generate in
|
||||
// ProfileGenerator
|
||||
std::shared_ptr<ProbeBasedCtxKey> getContextKey();
|
||||
};
|
||||
|
||||
/*
|
||||
As in hybrid sample we have a group of LBRs and the most recent sampling call
|
||||
stack, we can walk through those LBRs to infer more call stacks which would be
|
||||
used as context for profile. VirtualUnwinder is the class to do the call stack
|
||||
unwinding based on LBR state. Two types of unwinding are processd here:
|
||||
1) LBR unwinding and 2) linear range unwinding.
|
||||
Specifically, for each LBR entry(can be classified into call, return, regular
|
||||
branch), LBR unwinding will replay the operation by pushing, popping or
|
||||
switching leaf frame towards the call stack and since the initial call stack
|
||||
is most recently sampled, the replay should be in anti-execution order, i.e. for
|
||||
the regular case, pop the call stack when LBR is call, push frame on call stack
|
||||
when LBR is return. After each LBR processed, it also needs to align with the
|
||||
next LBR by going through instructions from previous LBR's target to current
|
||||
LBR's source, which is the linear unwinding. As instruction from linear range
|
||||
can come from different function by inlining, linear unwinding will do the range
|
||||
splitting and record counters by the range with same inline context. Over those
|
||||
unwinding process we will record each call stack as context id and LBR/linear
|
||||
range as sample counter for further CS profile generation.
|
||||
*/
|
||||
class VirtualUnwinder {
|
||||
public:
|
||||
VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
|
||||
: CtxCounterMap(Counter), Binary(B) {}
|
||||
bool unwind(const PerfSample *Sample, uint64_t Repeat);
|
||||
std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
|
||||
|
||||
uint64_t NumTotalBranches = 0;
|
||||
uint64_t NumExtCallBranch = 0;
|
||||
uint64_t NumMissingExternalFrame = 0;
|
||||
uint64_t NumMismatchedProEpiBranch = 0;
|
||||
uint64_t NumMismatchedExtCallBranch = 0;
|
||||
|
||||
private:
|
||||
bool isCallState(UnwindState &State) const {
|
||||
// The tail call frame is always missing here in stack sample, we will
|
||||
// use a specific tail call tracker to infer it.
|
||||
return Binary->addressIsCall(State.getCurrentLBRSource());
|
||||
}
|
||||
|
||||
bool isReturnState(UnwindState &State) const {
|
||||
// Simply check addressIsReturn, as ret is always reliable, both for
|
||||
// regular call and tail call.
|
||||
if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
|
||||
return false;
|
||||
|
||||
// In a callback case, a return from internal code, say A, to external
|
||||
// runtime can happen. The external runtime can then call back to
|
||||
// another internal routine, say B. Making an artificial branch that
|
||||
// looks like a return from A to B can confuse the unwinder to treat
|
||||
// the instruction before B as the call instruction. Here we detect this
|
||||
// case if the return target is not the next inst of call inst, then we just
|
||||
// do not treat it as a return.
|
||||
uint64_t CallAddr =
|
||||
Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget());
|
||||
return (CallAddr != 0);
|
||||
}
|
||||
|
||||
void unwindCall(UnwindState &State);
|
||||
void unwindLinear(UnwindState &State, uint64_t Repeat);
|
||||
void unwindReturn(UnwindState &State);
|
||||
void unwindBranch(UnwindState &State);
|
||||
|
||||
template <typename T>
|
||||
void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
|
||||
// Collect each samples on trie node by DFS traversal
|
||||
template <typename T>
|
||||
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
|
||||
void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
|
||||
|
||||
void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
|
||||
uint64_t Repeat);
|
||||
void recordBranchCount(const LBREntry &Branch, UnwindState &State,
|
||||
uint64_t Repeat);
|
||||
|
||||
ContextSampleCounterMap *CtxCounterMap;
|
||||
// Profiled binary that current frame address belongs to
|
||||
ProfiledBinary *Binary;
|
||||
// Keep track of all untracked callsites
|
||||
std::set<uint64_t> UntrackedCallsites;
|
||||
};
|
||||
|
||||
// Read perf trace to parse the events and samples.
|
||||
class PerfReaderBase {
|
||||
public:
|
||||
PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
|
||||
: Binary(B), PerfTraceFile(PerfTrace) {
|
||||
// Initialize the base address to preferred address.
|
||||
Binary->setBaseAddress(Binary->getPreferredBaseAddress());
|
||||
};
|
||||
virtual ~PerfReaderBase() = default;
|
||||
static std::unique_ptr<PerfReaderBase> create(ProfiledBinary *Binary,
|
||||
PerfInputFile &PerfInput);
|
||||
|
||||
// Entry of the reader to parse multiple perf traces
|
||||
virtual void parsePerfTraces() = 0;
|
||||
const ContextSampleCounterMap &getSampleCounters() const {
|
||||
return SampleCounters;
|
||||
}
|
||||
bool profileIsCSFlat() { return ProfileIsCSFlat; }
|
||||
|
||||
protected:
|
||||
ProfiledBinary *Binary = nullptr;
|
||||
StringRef PerfTraceFile;
|
||||
|
||||
ContextSampleCounterMap SampleCounters;
|
||||
bool ProfileIsCSFlat = false;
|
||||
|
||||
uint64_t NumTotalSample = 0;
|
||||
uint64_t NumLeafExternalFrame = 0;
|
||||
uint64_t NumLeadingOutgoingLBR = 0;
|
||||
};
|
||||
|
||||
// Read perf script to parse the events and samples.
|
||||
class PerfScriptReader : public PerfReaderBase {
|
||||
public:
|
||||
PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace)
|
||||
: PerfReaderBase(B, PerfTrace){};
|
||||
|
||||
// Entry of the reader to parse multiple perf traces
|
||||
virtual void parsePerfTraces() override;
|
||||
// Generate perf script from perf data
|
||||
static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
|
||||
PerfInputFile &File);
|
||||
// Extract perf script type by peaking at the input
|
||||
static PerfContent checkPerfScriptType(StringRef FileName);
|
||||
|
||||
protected:
|
||||
// The parsed MMap event
|
||||
struct MMapEvent {
|
||||
uint64_t PID = 0;
|
||||
uint64_t Address = 0;
|
||||
uint64_t Size = 0;
|
||||
uint64_t Offset = 0;
|
||||
StringRef BinaryPath;
|
||||
};
|
||||
|
||||
// Check whether a given line is LBR sample
|
||||
static bool isLBRSample(StringRef Line);
|
||||
// Check whether a given line is MMAP event
|
||||
static bool isMMap2Event(StringRef Line);
|
||||
// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
|
||||
// mapping between the binary name and its memory layout.
|
||||
static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
|
||||
MMapEvent &MMap);
|
||||
// Update base address based on mmap events
|
||||
void updateBinaryAddress(const MMapEvent &Event);
|
||||
// Parse mmap event and update binary address
|
||||
void parseMMap2Event(TraceStream &TraceIt);
|
||||
// Parse perf events/samples and do aggregation
|
||||
void parseAndAggregateTrace();
|
||||
// Parse either an MMAP event or a perf sample
|
||||
void parseEventOrSample(TraceStream &TraceIt);
|
||||
// Warn if the relevant mmap event is missing.
|
||||
void warnIfMissingMMap();
|
||||
// Emit accumulate warnings.
|
||||
void warnTruncatedStack();
|
||||
// Warn if range is invalid.
|
||||
void warnInvalidRange();
|
||||
// Extract call stack from the perf trace lines
|
||||
bool extractCallstack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<uint64_t> &CallStack);
|
||||
// Extract LBR stack from one perf trace line
|
||||
bool extractLBRStack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<LBREntry> &LBRStack);
|
||||
uint64_t parseAggregatedCount(TraceStream &TraceIt);
|
||||
// Parse one sample from multiple perf lines, override this for different
|
||||
// sample type
|
||||
void parseSample(TraceStream &TraceIt);
|
||||
// An aggregated count is given to indicate how many times the sample is
|
||||
// repeated.
|
||||
virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
|
||||
void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
|
||||
// Post process the profile after trace aggregation, we will do simple range
|
||||
// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
|
||||
virtual void generateUnsymbolizedProfile();
|
||||
void writeUnsymbolizedProfile(StringRef Filename);
|
||||
void writeUnsymbolizedProfile(raw_fd_ostream &OS);
|
||||
|
||||
// Samples with the repeating time generated by the perf reader
|
||||
AggregatedCounter AggregatedSamples;
|
||||
// Keep track of all invalid return addresses
|
||||
std::set<uint64_t> InvalidReturnAddresses;
|
||||
};
|
||||
|
||||
/*
|
||||
The reader of LBR only perf script.
|
||||
A typical LBR sample is like:
|
||||
40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
|
||||
... 0x4005c8/0x4005dc/P/-/-/0
|
||||
*/
|
||||
class LBRPerfReader : public PerfScriptReader {
|
||||
public:
|
||||
LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
|
||||
: PerfScriptReader(Binary, PerfTrace){};
|
||||
// Parse the LBR only sample.
|
||||
virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
|
||||
};
|
||||
|
||||
/*
|
||||
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
|
||||
which is used to generate CS profile. An example of hybrid sample:
|
||||
4005dc # call stack leaf
|
||||
400634
|
||||
400684 # call stack root
|
||||
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
|
||||
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
|
||||
*/
|
||||
class HybridPerfReader : public PerfScriptReader {
|
||||
public:
|
||||
HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
|
||||
: PerfScriptReader(Binary, PerfTrace){};
|
||||
// Parse the hybrid sample including the call and LBR line
|
||||
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
|
||||
void generateUnsymbolizedProfile() override;
|
||||
|
||||
private:
|
||||
// Unwind the hybrid samples after aggregration
|
||||
void unwindSamples();
|
||||
};
|
||||
|
||||
/*
|
||||
Format of unsymbolized profile:
|
||||
|
||||
[frame1 @ frame2 @ ...] # If it's a CS profile
|
||||
number of entries in RangeCounter
|
||||
from_1-to_1:count_1
|
||||
from_2-to_2:count_2
|
||||
......
|
||||
from_n-to_n:count_n
|
||||
number of entries in BranchCounter
|
||||
src_1->dst_1:count_1
|
||||
src_2->dst_2:count_2
|
||||
......
|
||||
src_n->dst_n:count_n
|
||||
[frame1 @ frame2 @ ...] # Next context
|
||||
......
|
||||
|
||||
Note that non-CS profile doesn't have the empty `[]` context.
|
||||
*/
|
||||
class UnsymbolizedProfileReader : public PerfReaderBase {
|
||||
public:
|
||||
UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
|
||||
: PerfReaderBase(Binary, PerfTrace){};
|
||||
void parsePerfTraces() override;
|
||||
|
||||
private:
|
||||
void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
|
||||
void readUnsymbolizedProfile(StringRef Filename);
|
||||
|
||||
std::unordered_set<std::string> ContextStrSet;
|
||||
};
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
|
@ -1,979 +0,0 @@
|
|||
//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ProfileGenerator.h"
|
||||
#include "ErrorHandling.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
#include <float.h>
|
||||
#include <unordered_set>
|
||||
|
||||
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
|
||||
cl::Required,
|
||||
cl::desc("Output profile file"));
|
||||
static cl::alias OutputA("o", cl::desc("Alias for --output"),
|
||||
cl::aliasopt(OutputFilename));
|
||||
|
||||
static cl::opt<SampleProfileFormat> OutputFormat(
|
||||
"format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
|
||||
cl::values(
|
||||
clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
|
||||
clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
|
||||
clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
|
||||
clEnumValN(SPF_Text, "text", "Text encoding"),
|
||||
clEnumValN(SPF_GCC, "gcc",
|
||||
"GCC encoding (only meaningful for -sample)")));
|
||||
|
||||
cl::opt<bool> UseMD5(
|
||||
"use-md5", cl::init(false), cl::Hidden,
|
||||
cl::desc("Use md5 to represent function names in the output profile (only "
|
||||
"meaningful for -extbinary)"));
|
||||
|
||||
static cl::opt<bool> PopulateProfileSymbolList(
|
||||
"populate-profile-symbol-list", cl::init(false), cl::Hidden,
|
||||
cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
|
||||
|
||||
static cl::opt<bool> FillZeroForAllFuncs(
|
||||
"fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
|
||||
cl::desc("Attribute all functions' range with zero count "
|
||||
"even it's not hit by any samples."));
|
||||
|
||||
static cl::opt<int32_t, true> RecursionCompression(
|
||||
"compress-recursion",
|
||||
cl::desc("Compressing recursion by deduplicating adjacent frame "
|
||||
"sequences up to the specified size. -1 means no size limit."),
|
||||
cl::Hidden,
|
||||
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
|
||||
|
||||
static cl::opt<bool>
|
||||
TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("If the total count of the profile is smaller "
|
||||
"than threshold, it will be trimmed."));
|
||||
|
||||
static cl::opt<bool> CSProfMergeColdContext(
|
||||
"csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
|
||||
cl::desc("If the total count of context profile is smaller than "
|
||||
"the threshold, it will be merged into context-less base "
|
||||
"profile."));
|
||||
|
||||
static cl::opt<uint32_t> CSProfMaxColdContextDepth(
|
||||
"csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
|
||||
cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
|
||||
"context-less base profile"));
|
||||
|
||||
static cl::opt<int, true> CSProfMaxContextDepth(
|
||||
"csprof-max-context-depth", cl::ZeroOrMore,
|
||||
cl::desc("Keep the last K contexts while merging profile. -1 means no "
|
||||
"depth limit."),
|
||||
cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
|
||||
|
||||
static cl::opt<double> HotFunctionDensityThreshold(
|
||||
"hot-function-density-threshold", llvm::cl::init(1000),
|
||||
llvm::cl::desc(
|
||||
"specify density threshold for hot functions (default: 1000)"),
|
||||
llvm::cl::Optional);
|
||||
static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
|
||||
llvm::cl::desc("show profile density details"),
|
||||
llvm::cl::Optional);
|
||||
|
||||
static cl::opt<bool> UpdateTotalSamples(
|
||||
"update-total-samples", llvm::cl::init(false),
|
||||
llvm::cl::desc(
|
||||
"Update total samples by accumulating all its body samples."),
|
||||
llvm::cl::Optional);
|
||||
|
||||
extern cl::opt<int> ProfileSummaryCutoffHot;
|
||||
|
||||
static cl::opt<bool> GenCSNestedProfile(
|
||||
"gen-cs-nested-profile", cl::Hidden, cl::init(false),
|
||||
cl::desc("Generate nested function profiles for CSSPGO"));
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
// Initialize the MaxCompressionSize to -1 which means no size limit
|
||||
int32_t CSProfileGenerator::MaxCompressionSize = -1;
|
||||
|
||||
int CSProfileGenerator::MaxContextDepth = -1;
|
||||
|
||||
bool ProfileGeneratorBase::UseFSDiscriminator = false;
|
||||
|
||||
std::unique_ptr<ProfileGeneratorBase>
|
||||
ProfileGeneratorBase::create(ProfiledBinary *Binary,
|
||||
const ContextSampleCounterMap &SampleCounters,
|
||||
bool ProfileIsCSFlat) {
|
||||
std::unique_ptr<ProfileGeneratorBase> Generator;
|
||||
if (ProfileIsCSFlat) {
|
||||
if (Binary->useFSDiscriminator())
|
||||
exitWithError("FS discriminator is not supported in CS profile.");
|
||||
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
|
||||
} else {
|
||||
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
|
||||
}
|
||||
ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
|
||||
FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
|
||||
|
||||
return Generator;
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
|
||||
SampleProfileMap &ProfileMap) {
|
||||
// Populate profile symbol list if extended binary format is used.
|
||||
ProfileSymbolList SymbolList;
|
||||
|
||||
if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
|
||||
Binary->populateSymbolListFromDWARF(SymbolList);
|
||||
Writer->setProfileSymbolList(&SymbolList);
|
||||
}
|
||||
|
||||
if (std::error_code EC = Writer->write(ProfileMap))
|
||||
exitWithError(std::move(EC));
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::write() {
|
||||
auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
|
||||
if (std::error_code EC = WriterOrErr.getError())
|
||||
exitWithError(EC, OutputFilename);
|
||||
|
||||
if (UseMD5) {
|
||||
if (OutputFormat != SPF_Ext_Binary)
|
||||
WithColor::warning() << "-use-md5 is ignored. Specify "
|
||||
"--format=extbinary to enable it\n";
|
||||
else
|
||||
WriterOrErr.get()->setUseMD5();
|
||||
}
|
||||
|
||||
write(std::move(WriterOrErr.get()), ProfileMap);
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::showDensitySuggestion(double Density) {
|
||||
if (Density == 0.0)
|
||||
WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
|
||||
"set too low. Please check your command.\n";
|
||||
else if (Density < HotFunctionDensityThreshold)
|
||||
WithColor::warning()
|
||||
<< "AutoFDO is estimated to optimize better with "
|
||||
<< format("%.1f", HotFunctionDensityThreshold / Density)
|
||||
<< "x more samples. Please consider increasing sampling rate or "
|
||||
"profiling for longer duration to get more samples.\n";
|
||||
|
||||
if (ShowDensity)
|
||||
outs() << "Minimum profile density for hot functions with top "
|
||||
<< format("%.2f",
|
||||
static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
|
||||
10000)
|
||||
<< "% total samples: " << format("%.1f", Density) << "\n";
|
||||
}
|
||||
|
||||
double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
|
||||
uint64_t HotCntThreshold) {
|
||||
double Density = DBL_MAX;
|
||||
std::vector<const FunctionSamples *> HotFuncs;
|
||||
for (auto &I : Profiles) {
|
||||
auto &FuncSamples = I.second;
|
||||
if (FuncSamples.getTotalSamples() < HotCntThreshold)
|
||||
continue;
|
||||
HotFuncs.emplace_back(&FuncSamples);
|
||||
}
|
||||
|
||||
for (auto *FuncSamples : HotFuncs) {
|
||||
auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
|
||||
if (!Func)
|
||||
continue;
|
||||
uint64_t FuncSize = Func->getFuncSize();
|
||||
if (FuncSize == 0)
|
||||
continue;
|
||||
Density =
|
||||
std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
|
||||
FuncSize);
|
||||
}
|
||||
|
||||
return Density == DBL_MAX ? 0.0 : Density;
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
|
||||
const RangeSample &Ranges) {
|
||||
|
||||
/*
|
||||
Regions may overlap with each other. Using the boundary info, find all
|
||||
disjoint ranges and their sample count. BoundaryPoint contains the count
|
||||
multiple samples begin/end at this points.
|
||||
|
||||
|<--100-->| Sample1
|
||||
|<------200------>| Sample2
|
||||
A B C
|
||||
|
||||
In the example above,
|
||||
Sample1 begins at A, ends at B, its value is 100.
|
||||
Sample2 beings at A, ends at C, its value is 200.
|
||||
For A, BeginCount is the sum of sample begins at A, which is 300 and no
|
||||
samples ends at A, so EndCount is 0.
|
||||
Then boundary points A, B, and C with begin/end counts are:
|
||||
A: (300, 0)
|
||||
B: (0, 100)
|
||||
C: (0, 200)
|
||||
*/
|
||||
struct BoundaryPoint {
|
||||
// Sum of sample counts beginning at this point
|
||||
uint64_t BeginCount = UINT64_MAX;
|
||||
// Sum of sample counts ending at this point
|
||||
uint64_t EndCount = UINT64_MAX;
|
||||
// Is the begin point of a zero range.
|
||||
bool IsZeroRangeBegin = false;
|
||||
// Is the end point of a zero range.
|
||||
bool IsZeroRangeEnd = false;
|
||||
|
||||
void addBeginCount(uint64_t Count) {
|
||||
if (BeginCount == UINT64_MAX)
|
||||
BeginCount = 0;
|
||||
BeginCount += Count;
|
||||
}
|
||||
|
||||
void addEndCount(uint64_t Count) {
|
||||
if (EndCount == UINT64_MAX)
|
||||
EndCount = 0;
|
||||
EndCount += Count;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
For the above example. With boundary points, follwing logic finds two
|
||||
disjoint region of
|
||||
|
||||
[A,B]: 300
|
||||
[B+1,C]: 200
|
||||
|
||||
If there is a boundary point that both begin and end, the point itself
|
||||
becomes a separate disjoint region. For example, if we have original
|
||||
ranges of
|
||||
|
||||
|<--- 100 --->|
|
||||
|<--- 200 --->|
|
||||
A B C
|
||||
|
||||
there are three boundary points with their begin/end counts of
|
||||
|
||||
A: (100, 0)
|
||||
B: (200, 100)
|
||||
C: (0, 200)
|
||||
|
||||
the disjoint ranges would be
|
||||
|
||||
[A, B-1]: 100
|
||||
[B, B]: 300
|
||||
[B+1, C]: 200.
|
||||
|
||||
Example for zero value range:
|
||||
|
||||
|<--- 100 --->|
|
||||
|<--- 200 --->|
|
||||
|<--------------- 0 ----------------->|
|
||||
A B C D E F
|
||||
|
||||
[A, B-1] : 0
|
||||
[B, C] : 100
|
||||
[C+1, D-1]: 0
|
||||
[D, E] : 200
|
||||
[E+1, F] : 0
|
||||
*/
|
||||
std::map<uint64_t, BoundaryPoint> Boundaries;
|
||||
|
||||
for (const auto &Item : Ranges) {
|
||||
assert(Item.first.first <= Item.first.second &&
|
||||
"Invalid instruction range");
|
||||
auto &BeginPoint = Boundaries[Item.first.first];
|
||||
auto &EndPoint = Boundaries[Item.first.second];
|
||||
uint64_t Count = Item.second;
|
||||
|
||||
BeginPoint.addBeginCount(Count);
|
||||
EndPoint.addEndCount(Count);
|
||||
if (Count == 0) {
|
||||
BeginPoint.IsZeroRangeBegin = true;
|
||||
EndPoint.IsZeroRangeEnd = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Use UINT64_MAX to indicate there is no existing range between BeginAddress
|
||||
// and the next valid address
|
||||
uint64_t BeginAddress = UINT64_MAX;
|
||||
int ZeroRangeDepth = 0;
|
||||
uint64_t Count = 0;
|
||||
for (const auto &Item : Boundaries) {
|
||||
uint64_t Address = Item.first;
|
||||
const BoundaryPoint &Point = Item.second;
|
||||
if (Point.BeginCount != UINT64_MAX) {
|
||||
if (BeginAddress != UINT64_MAX)
|
||||
DisjointRanges[{BeginAddress, Address - 1}] = Count;
|
||||
Count += Point.BeginCount;
|
||||
BeginAddress = Address;
|
||||
ZeroRangeDepth += Point.IsZeroRangeBegin;
|
||||
}
|
||||
if (Point.EndCount != UINT64_MAX) {
|
||||
assert((BeginAddress != UINT64_MAX) &&
|
||||
"First boundary point cannot be 'end' point");
|
||||
DisjointRanges[{BeginAddress, Address}] = Count;
|
||||
assert(Count >= Point.EndCount && "Mismatched live ranges");
|
||||
Count -= Point.EndCount;
|
||||
BeginAddress = Address + 1;
|
||||
ZeroRangeDepth -= Point.IsZeroRangeEnd;
|
||||
// If the remaining count is zero and it's no longer in a zero range, this
|
||||
// means we consume all the ranges before, thus mark BeginAddress as
|
||||
// UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
|
||||
// [<---- 10 ---->]
|
||||
// [<---- 20 ---->]
|
||||
// A B C D
|
||||
// The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
|
||||
// have the [B+1, C-1] zero range.
|
||||
if (Count == 0 && ZeroRangeDepth == 0)
|
||||
BeginAddress = UINT64_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
|
||||
FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
|
||||
uint64_t Count) {
|
||||
// Use the maximum count of samples with same line location
|
||||
uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
|
||||
|
||||
// Use duplication factor to compensated for loop unroll/vectorization.
|
||||
// Note that this is only needed when we're taking MAX of the counts at
|
||||
// the location instead of SUM.
|
||||
Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
|
||||
|
||||
ErrorOr<uint64_t> R =
|
||||
FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
|
||||
|
||||
uint64_t PreviousCount = R ? R.get() : 0;
|
||||
if (PreviousCount <= Count) {
|
||||
FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
|
||||
Count - PreviousCount);
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::updateTotalSamples() {
|
||||
if (!UpdateTotalSamples)
|
||||
return;
|
||||
|
||||
for (auto &Item : ProfileMap) {
|
||||
FunctionSamples &FunctionProfile = Item.second;
|
||||
FunctionProfile.updateTotalSamples();
|
||||
}
|
||||
}
|
||||
|
||||
FunctionSamples &
|
||||
ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
|
||||
SampleContext Context(FuncName);
|
||||
auto Ret = ProfileMap.emplace(Context, FunctionSamples());
|
||||
if (Ret.second) {
|
||||
FunctionSamples &FProfile = Ret.first->second;
|
||||
FProfile.setContext(Context);
|
||||
}
|
||||
return Ret.first->second;
|
||||
}
|
||||
|
||||
void ProfileGenerator::generateProfile() {
|
||||
if (Binary->usePseudoProbes()) {
|
||||
// TODO: Support probe based profile generation
|
||||
exitWithError("Probe based profile generation not supported for AutoFDO, "
|
||||
"consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
|
||||
} else {
|
||||
generateLineNumBasedProfile();
|
||||
}
|
||||
postProcessProfiles();
|
||||
}
|
||||
|
||||
void ProfileGenerator::postProcessProfiles() {
|
||||
computeSummaryAndThreshold();
|
||||
trimColdProfiles(ProfileMap, ColdCountThreshold);
|
||||
calculateAndShowDensity(ProfileMap);
|
||||
}
|
||||
|
||||
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
|
||||
uint64_t ColdCntThreshold) {
|
||||
if (!TrimColdProfile)
|
||||
return;
|
||||
|
||||
// Move cold profiles into a tmp container.
|
||||
std::vector<SampleContext> ColdProfiles;
|
||||
for (const auto &I : ProfileMap) {
|
||||
if (I.second.getTotalSamples() < ColdCntThreshold)
|
||||
ColdProfiles.emplace_back(I.first);
|
||||
}
|
||||
|
||||
// Remove the cold profile from ProfileMap.
|
||||
for (const auto &I : ColdProfiles)
|
||||
ProfileMap.erase(I);
|
||||
}
|
||||
|
||||
void ProfileGenerator::generateLineNumBasedProfile() {
|
||||
assert(SampleCounters.size() == 1 &&
|
||||
"Must have one entry for profile generation.");
|
||||
const SampleCounter &SC = SampleCounters.begin()->second;
|
||||
// Fill in function body samples
|
||||
populateBodySamplesForAllFunctions(SC.RangeCounter);
|
||||
// Fill in boundary sample counts as well as call site samples for calls
|
||||
populateBoundarySamplesForAllFunctions(SC.BranchCounter);
|
||||
|
||||
updateTotalSamples();
|
||||
}
|
||||
|
||||
FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
|
||||
const SampleContextFrameVector &FrameVec, uint64_t Count) {
|
||||
// Get top level profile
|
||||
FunctionSamples *FunctionProfile =
|
||||
&getTopLevelFunctionProfile(FrameVec[0].FuncName);
|
||||
FunctionProfile->addTotalSamples(Count);
|
||||
|
||||
for (size_t I = 1; I < FrameVec.size(); I++) {
|
||||
LineLocation Callsite(
|
||||
FrameVec[I - 1].Location.LineOffset,
|
||||
getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
|
||||
FunctionSamplesMap &SamplesMap =
|
||||
FunctionProfile->functionSamplesAt(Callsite);
|
||||
auto Ret =
|
||||
SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
|
||||
if (Ret.second) {
|
||||
SampleContext Context(FrameVec[I].FuncName);
|
||||
Ret.first->second.setContext(Context);
|
||||
}
|
||||
FunctionProfile = &Ret.first->second;
|
||||
FunctionProfile->addTotalSamples(Count);
|
||||
}
|
||||
|
||||
return *FunctionProfile;
|
||||
}
|
||||
|
||||
RangeSample
|
||||
ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
|
||||
RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
|
||||
if (FillZeroForAllFuncs) {
|
||||
for (auto &FuncI : Binary->getAllBinaryFunctions()) {
|
||||
for (auto &R : FuncI.second.Ranges) {
|
||||
Ranges[{R.first, R.second - 1}] += 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// For each range, we search for all ranges of the function it belongs to
|
||||
// and initialize it with zero count, so it remains zero if doesn't hit any
|
||||
// samples. This is to be consistent with compiler that interpret zero count
|
||||
// as unexecuted(cold).
|
||||
for (const auto &I : RangeCounter) {
|
||||
uint64_t StartOffset = I.first.first;
|
||||
for (const auto &Range : Binary->getRangesForOffset(StartOffset))
|
||||
Ranges[{Range.first, Range.second - 1}] += 0;
|
||||
}
|
||||
}
|
||||
RangeSample DisjointRanges;
|
||||
findDisjointRanges(DisjointRanges, Ranges);
|
||||
return DisjointRanges;
|
||||
}
|
||||
|
||||
void ProfileGenerator::populateBodySamplesForAllFunctions(
|
||||
const RangeSample &RangeCounter) {
|
||||
for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
|
||||
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
|
||||
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
|
||||
uint64_t Count = Range.second;
|
||||
|
||||
InstructionPointer IP(Binary, RangeBegin, true);
|
||||
// Disjoint ranges may have range in the middle of two instr,
|
||||
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
|
||||
// can be Addr1+1 to Addr2-1. We should ignore such range.
|
||||
if (IP.Address > RangeEnd)
|
||||
continue;
|
||||
|
||||
do {
|
||||
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
|
||||
const SampleContextFrameVector &FrameVec =
|
||||
Binary->getFrameLocationStack(Offset);
|
||||
if (!FrameVec.empty()) {
|
||||
// FIXME: As accumulating total count per instruction caused some
|
||||
// regression, we changed to accumulate total count per byte as a
|
||||
// workaround. Tuning hotness threshold on the compiler side might be
|
||||
// necessary in the future.
|
||||
FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
|
||||
FrameVec, Count * Binary->getInstSize(Offset));
|
||||
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
|
||||
Count);
|
||||
}
|
||||
} while (IP.advance() && IP.Address <= RangeEnd);
|
||||
}
|
||||
}
|
||||
|
||||
StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
|
||||
// Get the function range by branch target if it's a call branch.
|
||||
auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
|
||||
|
||||
// We won't accumulate sample count for a range whose start is not the real
|
||||
// function entry such as outlined function or inner labels.
|
||||
if (!FRange || !FRange->IsFuncEntry)
|
||||
return StringRef();
|
||||
|
||||
return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
|
||||
}
|
||||
|
||||
void ProfileGenerator::populateBoundarySamplesForAllFunctions(
|
||||
const BranchSample &BranchCounters) {
|
||||
for (const auto &Entry : BranchCounters) {
|
||||
uint64_t SourceOffset = Entry.first.first;
|
||||
uint64_t TargetOffset = Entry.first.second;
|
||||
uint64_t Count = Entry.second;
|
||||
assert(Count != 0 && "Unexpected zero weight branch");
|
||||
|
||||
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
|
||||
if (CalleeName.size() == 0)
|
||||
continue;
|
||||
// Record called target sample and its count.
|
||||
const SampleContextFrameVector &FrameVec =
|
||||
Binary->getFrameLocationStack(SourceOffset);
|
||||
if (!FrameVec.empty()) {
|
||||
FunctionSamples &FunctionProfile =
|
||||
getLeafProfileAndAddTotalSamples(FrameVec, 0);
|
||||
FunctionProfile.addCalledTargetSamples(
|
||||
FrameVec.back().Location.LineOffset,
|
||||
getBaseDiscriminator(FrameVec.back().Location.Discriminator),
|
||||
CalleeName, Count);
|
||||
}
|
||||
// Add head samples for callee.
|
||||
FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
|
||||
CalleeProfile.addHeadSamples(Count);
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::calculateAndShowDensity(
|
||||
const SampleProfileMap &Profiles) {
|
||||
double Density = calculateDensity(Profiles, HotCountThreshold);
|
||||
showDensitySuggestion(Density);
|
||||
}
|
||||
|
||||
FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
|
||||
const SampleContextFrameVector &Context, bool WasLeafInlined) {
|
||||
auto I = ProfileMap.find(SampleContext(Context));
|
||||
if (I == ProfileMap.end()) {
|
||||
// Save the new context for future references.
|
||||
SampleContextFrames NewContext = *Contexts.insert(Context).first;
|
||||
SampleContext FContext(NewContext, RawContext);
|
||||
auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
|
||||
if (WasLeafInlined)
|
||||
FContext.setAttribute(ContextWasInlined);
|
||||
FunctionSamples &FProfile = Ret.first->second;
|
||||
FProfile.setContext(FContext);
|
||||
return Ret.first->second;
|
||||
}
|
||||
return I->second;
|
||||
}
|
||||
|
||||
void CSProfileGenerator::generateProfile() {
|
||||
FunctionSamples::ProfileIsCSFlat = true;
|
||||
|
||||
if (Binary->getTrackFuncContextSize())
|
||||
computeSizeForProfiledFunctions();
|
||||
|
||||
if (Binary->usePseudoProbes()) {
|
||||
// Enable pseudo probe functionalities in SampleProf
|
||||
FunctionSamples::ProfileIsProbeBased = true;
|
||||
generateProbeBasedProfile();
|
||||
} else {
|
||||
generateLineNumBasedProfile();
|
||||
}
|
||||
postProcessProfiles();
|
||||
}
|
||||
|
||||
void CSProfileGenerator::computeSizeForProfiledFunctions() {
|
||||
// Hash map to deduplicate the function range and the item is a pair of
|
||||
// function start and end offset.
|
||||
std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
|
||||
// Go through all the ranges in the CS counters, use the start of the range to
|
||||
// look up the function it belongs and record the function range.
|
||||
for (const auto &CI : SampleCounters) {
|
||||
for (const auto &Item : CI.second.RangeCounter) {
|
||||
// FIXME: Filter the bogus crossing function range.
|
||||
uint64_t StartOffset = Item.first.first;
|
||||
// Note that a function can be spilt into multiple ranges, so get all
|
||||
// ranges of the function.
|
||||
for (const auto &Range : Binary->getRangesForOffset(StartOffset))
|
||||
AggregatedRanges[Range.first] = Range.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &I : AggregatedRanges) {
|
||||
uint64_t StartOffset = I.first;
|
||||
uint64_t EndOffset = I.second;
|
||||
Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::generateLineNumBasedProfile() {
|
||||
for (const auto &CI : SampleCounters) {
|
||||
const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
|
||||
|
||||
// Get or create function profile for the range
|
||||
FunctionSamples &FunctionProfile =
|
||||
getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
|
||||
|
||||
// Fill in function body samples
|
||||
populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
|
||||
// Fill in boundary sample counts as well as call site samples for calls
|
||||
populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
|
||||
CI.second.BranchCounter);
|
||||
}
|
||||
// Fill in call site value sample for inlined calls and also use context to
|
||||
// infer missing samples. Since we don't have call count for inlined
|
||||
// functions, we estimate it from inlinee's profile using the entry of the
|
||||
// body sample.
|
||||
populateInferredFunctionSamples();
|
||||
|
||||
updateTotalSamples();
|
||||
}
|
||||
|
||||
void CSProfileGenerator::populateBodySamplesForFunction(
|
||||
FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
|
||||
// Compute disjoint ranges first, so we can use MAX
|
||||
// for calculating count for each location.
|
||||
RangeSample Ranges;
|
||||
findDisjointRanges(Ranges, RangeCounter);
|
||||
for (const auto &Range : Ranges) {
|
||||
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
|
||||
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
|
||||
uint64_t Count = Range.second;
|
||||
// Disjoint ranges have introduce zero-filled gap that
|
||||
// doesn't belong to current context, filter them out.
|
||||
if (Count == 0)
|
||||
continue;
|
||||
|
||||
InstructionPointer IP(Binary, RangeBegin, true);
|
||||
// Disjoint ranges may have range in the middle of two instr,
|
||||
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
|
||||
// can be Addr1+1 to Addr2-1. We should ignore such range.
|
||||
if (IP.Address > RangeEnd)
|
||||
continue;
|
||||
|
||||
do {
|
||||
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
|
||||
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
|
||||
if (LeafLoc.hasValue()) {
|
||||
// Recording body sample for this specific context
|
||||
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
|
||||
FunctionProfile.addTotalSamples(Count);
|
||||
}
|
||||
} while (IP.advance() && IP.Address <= RangeEnd);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::populateBoundarySamplesForFunction(
|
||||
SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
|
||||
const BranchSample &BranchCounters) {
|
||||
|
||||
for (const auto &Entry : BranchCounters) {
|
||||
uint64_t SourceOffset = Entry.first.first;
|
||||
uint64_t TargetOffset = Entry.first.second;
|
||||
uint64_t Count = Entry.second;
|
||||
assert(Count != 0 && "Unexpected zero weight branch");
|
||||
|
||||
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
|
||||
if (CalleeName.size() == 0)
|
||||
continue;
|
||||
|
||||
// Record called target sample and its count
|
||||
auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
|
||||
if (!LeafLoc.hasValue())
|
||||
continue;
|
||||
FunctionProfile.addCalledTargetSamples(
|
||||
LeafLoc->Location.LineOffset,
|
||||
getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
|
||||
Count);
|
||||
|
||||
// Record head sample for called target(callee)
|
||||
SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
|
||||
assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
|
||||
"Leaf function name doesn't match");
|
||||
CalleeCtx.back() = *LeafLoc;
|
||||
CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
|
||||
FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
|
||||
CalleeProfile.addHeadSamples(Count);
|
||||
}
|
||||
}
|
||||
|
||||
static SampleContextFrame
|
||||
getCallerContext(SampleContextFrames CalleeContext,
|
||||
SampleContextFrameVector &CallerContext) {
|
||||
assert(CalleeContext.size() > 1 && "Unexpected empty context");
|
||||
CalleeContext = CalleeContext.drop_back();
|
||||
CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
|
||||
SampleContextFrame CallerFrame = CallerContext.back();
|
||||
CallerContext.back().Location = LineLocation(0, 0);
|
||||
return CallerFrame;
|
||||
}
|
||||
|
||||
void CSProfileGenerator::populateInferredFunctionSamples() {
|
||||
for (const auto &Item : ProfileMap) {
|
||||
const auto &CalleeContext = Item.first;
|
||||
const FunctionSamples &CalleeProfile = Item.second;
|
||||
|
||||
// If we already have head sample counts, we must have value profile
|
||||
// for call sites added already. Skip to avoid double counting.
|
||||
if (CalleeProfile.getHeadSamples())
|
||||
continue;
|
||||
// If we don't have context, nothing to do for caller's call site.
|
||||
// This could happen for entry point function.
|
||||
if (CalleeContext.isBaseContext())
|
||||
continue;
|
||||
|
||||
// Infer Caller's frame loc and context ID through string splitting
|
||||
SampleContextFrameVector CallerContextId;
|
||||
SampleContextFrame &&CallerLeafFrameLoc =
|
||||
getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
|
||||
SampleContextFrames CallerContext(CallerContextId);
|
||||
|
||||
// It's possible that we haven't seen any sample directly in the caller,
|
||||
// in which case CallerProfile will not exist. But we can't modify
|
||||
// ProfileMap while iterating it.
|
||||
// TODO: created function profile for those callers too
|
||||
if (ProfileMap.find(CallerContext) == ProfileMap.end())
|
||||
continue;
|
||||
FunctionSamples &CallerProfile = ProfileMap[CallerContext];
|
||||
|
||||
// Since we don't have call count for inlined functions, we
|
||||
// estimate it from inlinee's profile using entry body sample.
|
||||
uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
|
||||
// If we don't have samples with location, use 1 to indicate live.
|
||||
if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
|
||||
EstimatedCallCount = 1;
|
||||
CallerProfile.addCalledTargetSamples(
|
||||
CallerLeafFrameLoc.Location.LineOffset,
|
||||
CallerLeafFrameLoc.Location.Discriminator,
|
||||
CalleeProfile.getContext().getName(), EstimatedCallCount);
|
||||
CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
|
||||
CallerLeafFrameLoc.Location.Discriminator,
|
||||
EstimatedCallCount);
|
||||
CallerProfile.addTotalSamples(EstimatedCallCount);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::postProcessProfiles() {
|
||||
// Compute hot/cold threshold based on profile. This will be used for cold
|
||||
// context profile merging/trimming.
|
||||
computeSummaryAndThreshold();
|
||||
|
||||
// Run global pre-inliner to adjust/merge context profile based on estimated
|
||||
// inline decisions.
|
||||
if (EnableCSPreInliner) {
|
||||
CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
|
||||
.run();
|
||||
// Turn off the profile merger by default unless it is explicitly enabled.
|
||||
if (!CSProfMergeColdContext.getNumOccurrences())
|
||||
CSProfMergeColdContext = false;
|
||||
}
|
||||
|
||||
// Trim and merge cold context profile using cold threshold above.
|
||||
if (TrimColdProfile || CSProfMergeColdContext) {
|
||||
SampleContextTrimmer(ProfileMap)
|
||||
.trimAndMergeColdContextProfiles(
|
||||
HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
|
||||
CSProfMaxColdContextDepth, EnableCSPreInliner);
|
||||
}
|
||||
|
||||
// Merge function samples of CS profile to calculate profile density.
|
||||
sampleprof::SampleProfileMap ContextLessProfiles;
|
||||
for (const auto &I : ProfileMap) {
|
||||
ContextLessProfiles[I.second.getName()].merge(I.second);
|
||||
}
|
||||
|
||||
calculateAndShowDensity(ContextLessProfiles);
|
||||
if (GenCSNestedProfile) {
|
||||
CSProfileConverter CSConverter(ProfileMap);
|
||||
CSConverter.convertProfiles();
|
||||
FunctionSamples::ProfileIsCSFlat = false;
|
||||
FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::computeSummaryAndThreshold() {
|
||||
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
||||
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
|
||||
HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
|
||||
(Summary->getDetailedSummary()));
|
||||
ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
|
||||
(Summary->getDetailedSummary()));
|
||||
}
|
||||
|
||||
// Helper function to extract context prefix string stack
|
||||
// Extract context stack for reusing, leaf context stack will
|
||||
// be added compressed while looking up function profile
|
||||
static void extractPrefixContextStack(
|
||||
SampleContextFrameVector &ContextStack,
|
||||
const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
|
||||
ProfiledBinary *Binary) {
|
||||
for (const auto *P : Probes) {
|
||||
Binary->getInlineContextForProbe(P, ContextStack, true);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::generateProbeBasedProfile() {
|
||||
for (const auto &CI : SampleCounters) {
|
||||
const ProbeBasedCtxKey *CtxKey =
|
||||
dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
|
||||
SampleContextFrameVector ContextStack;
|
||||
extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
|
||||
// Fill in function body samples from probes, also infer caller's samples
|
||||
// from callee's probe
|
||||
populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
|
||||
// Fill in boundary samples for a call probe
|
||||
populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
|
||||
ProbeCounterMap &ProbeCounter) {
|
||||
RangeSample Ranges;
|
||||
findDisjointRanges(Ranges, RangeCounter);
|
||||
for (const auto &Range : Ranges) {
|
||||
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
|
||||
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
|
||||
uint64_t Count = Range.second;
|
||||
// Disjoint ranges have introduce zero-filled gap that
|
||||
// doesn't belong to current context, filter them out.
|
||||
if (Count == 0)
|
||||
continue;
|
||||
|
||||
InstructionPointer IP(Binary, RangeBegin, true);
|
||||
// Disjoint ranges may have range in the middle of two instr,
|
||||
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
|
||||
// can be Addr1+1 to Addr2-1. We should ignore such range.
|
||||
if (IP.Address > RangeEnd)
|
||||
continue;
|
||||
|
||||
do {
|
||||
const AddressProbesMap &Address2ProbesMap =
|
||||
Binary->getAddress2ProbesMap();
|
||||
auto It = Address2ProbesMap.find(IP.Address);
|
||||
if (It != Address2ProbesMap.end()) {
|
||||
for (const auto &Probe : It->second) {
|
||||
if (!Probe.isBlock())
|
||||
continue;
|
||||
ProbeCounter[&Probe] += Count;
|
||||
}
|
||||
}
|
||||
} while (IP.advance() && IP.Address <= RangeEnd);
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::populateBodySamplesWithProbes(
|
||||
const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
|
||||
ProbeCounterMap ProbeCounter;
|
||||
// Extract the top frame probes by looking up each address among the range in
|
||||
// the Address2ProbeMap
|
||||
extractProbesFromRange(RangeCounter, ProbeCounter);
|
||||
std::unordered_map<MCDecodedPseudoProbeInlineTree *,
|
||||
std::unordered_set<FunctionSamples *>>
|
||||
FrameSamples;
|
||||
for (const auto &PI : ProbeCounter) {
|
||||
const MCDecodedPseudoProbe *Probe = PI.first;
|
||||
uint64_t Count = PI.second;
|
||||
FunctionSamples &FunctionProfile =
|
||||
getFunctionProfileForLeafProbe(ContextStack, Probe);
|
||||
// Record the current frame and FunctionProfile whenever samples are
|
||||
// collected for non-danglie probes. This is for reporting all of the
|
||||
// zero count probes of the frame later.
|
||||
FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
|
||||
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
|
||||
FunctionProfile.addTotalSamples(Count);
|
||||
if (Probe->isEntry()) {
|
||||
FunctionProfile.addHeadSamples(Count);
|
||||
// Look up for the caller's function profile
|
||||
const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
|
||||
SampleContextFrames CalleeContextId =
|
||||
FunctionProfile.getContext().getContextFrames();
|
||||
if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
|
||||
// Since the context id will be compressed, we have to use callee's
|
||||
// context id to infer caller's context id to ensure they share the
|
||||
// same context prefix.
|
||||
SampleContextFrameVector CallerContextId;
|
||||
SampleContextFrame &&CallerLeafFrameLoc =
|
||||
getCallerContext(CalleeContextId, CallerContextId);
|
||||
uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
|
||||
assert(CallerIndex &&
|
||||
"Inferred caller's location index shouldn't be zero!");
|
||||
FunctionSamples &CallerProfile =
|
||||
getFunctionProfileForContext(CallerContextId);
|
||||
CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
|
||||
CallerProfile.addBodySamples(CallerIndex, 0, Count);
|
||||
CallerProfile.addTotalSamples(Count);
|
||||
CallerProfile.addCalledTargetSamples(
|
||||
CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assign zero count for remaining probes without sample hits to
|
||||
// differentiate from probes optimized away, of which the counts are unknown
|
||||
// and will be inferred by the compiler.
|
||||
for (auto &I : FrameSamples) {
|
||||
for (auto *FunctionProfile : I.second) {
|
||||
for (auto *Probe : I.first->getProbes()) {
|
||||
FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CSProfileGenerator::populateBoundarySamplesWithProbes(
|
||||
const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
|
||||
for (const auto &BI : BranchCounter) {
|
||||
uint64_t SourceOffset = BI.first.first;
|
||||
uint64_t TargetOffset = BI.first.second;
|
||||
uint64_t Count = BI.second;
|
||||
uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
|
||||
const MCDecodedPseudoProbe *CallProbe =
|
||||
Binary->getCallProbeForAddr(SourceAddress);
|
||||
if (CallProbe == nullptr)
|
||||
continue;
|
||||
FunctionSamples &FunctionProfile =
|
||||
getFunctionProfileForLeafProbe(ContextStack, CallProbe);
|
||||
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
|
||||
FunctionProfile.addTotalSamples(Count);
|
||||
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
|
||||
if (CalleeName.size() == 0)
|
||||
continue;
|
||||
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
|
||||
Count);
|
||||
}
|
||||
}
|
||||
|
||||
FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
|
||||
SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
|
||||
|
||||
// Explicitly copy the context for appending the leaf context
|
||||
SampleContextFrameVector NewContextStack(ContextStack.begin(),
|
||||
ContextStack.end());
|
||||
Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
|
||||
// For leaf inlined context with the top frame, we should strip off the top
|
||||
// frame's probe id, like:
|
||||
// Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
|
||||
auto LeafFrame = NewContextStack.back();
|
||||
LeafFrame.Location = LineLocation(0, 0);
|
||||
NewContextStack.pop_back();
|
||||
// Compress the context string except for the leaf frame
|
||||
CSProfileGenerator::compressRecursionContext(NewContextStack);
|
||||
CSProfileGenerator::trimContext(NewContextStack);
|
||||
NewContextStack.push_back(LeafFrame);
|
||||
|
||||
const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
|
||||
bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
|
||||
FunctionSamples &FunctionProile =
|
||||
getFunctionProfileForContext(NewContextStack, WasLeafInlined);
|
||||
FunctionProile.setFunctionHash(FuncDesc->FuncHash);
|
||||
return FunctionProile;
|
||||
}
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
|
@ -1,312 +0,0 @@
|
|||
//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
|
||||
#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
|
||||
#include "CSPreInliner.h"
|
||||
#include "ErrorHandling.h"
|
||||
#include "PerfReader.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/ProfileData/SampleProfWriter.h"
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
// This base class for profile generation of sample-based PGO. We reuse all
|
||||
// structures relating to function profiles and profile writers as seen in
|
||||
// /ProfileData/SampleProf.h.
|
||||
class ProfileGeneratorBase {
|
||||
|
||||
public:
|
||||
ProfileGeneratorBase(ProfiledBinary *Binary,
|
||||
const ContextSampleCounterMap &Counters)
|
||||
: Binary(Binary), SampleCounters(Counters){};
|
||||
virtual ~ProfileGeneratorBase() = default;
|
||||
static std::unique_ptr<ProfileGeneratorBase>
|
||||
create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
|
||||
bool ProfileIsCSFlat);
|
||||
virtual void generateProfile() = 0;
|
||||
void write();
|
||||
|
||||
static uint32_t
|
||||
getDuplicationFactor(unsigned Discriminator,
|
||||
bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
|
||||
return UseFSD ? 1
|
||||
: llvm::DILocation::getDuplicationFactorFromDiscriminator(
|
||||
Discriminator);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
getBaseDiscriminator(unsigned Discriminator,
|
||||
bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
|
||||
return UseFSD ? Discriminator
|
||||
: DILocation::getBaseDiscriminatorFromDiscriminator(
|
||||
Discriminator, /* IsFSDiscriminator */ false);
|
||||
}
|
||||
|
||||
static bool UseFSDiscriminator;
|
||||
|
||||
protected:
|
||||
// Use SampleProfileWriter to serialize profile map
|
||||
void write(std::unique_ptr<SampleProfileWriter> Writer,
|
||||
SampleProfileMap &ProfileMap);
|
||||
/*
|
||||
For each region boundary point, mark if it is begin or end (or both) of
|
||||
the region. Boundary points are inclusive. Log the sample count as well
|
||||
so we can use it when we compute the sample count of each disjoint region
|
||||
later. Note that there might be multiple ranges with different sample
|
||||
count that share same begin/end point. We need to accumulate the sample
|
||||
count for the boundary point for such case, because for the example
|
||||
below,
|
||||
|
||||
|<--100-->|
|
||||
|<------200------>|
|
||||
A B C
|
||||
|
||||
sample count for disjoint region [A,B] would be 300.
|
||||
*/
|
||||
void findDisjointRanges(RangeSample &DisjointRanges,
|
||||
const RangeSample &Ranges);
|
||||
// Helper function for updating body sample for a leaf location in
|
||||
// FunctionProfile
|
||||
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
|
||||
const SampleContextFrame &LeafLoc,
|
||||
uint64_t Count);
|
||||
void updateTotalSamples();
|
||||
|
||||
StringRef getCalleeNameForOffset(uint64_t TargetOffset);
|
||||
|
||||
void computeSummaryAndThreshold();
|
||||
|
||||
void calculateAndShowDensity(const SampleProfileMap &Profiles);
|
||||
|
||||
double calculateDensity(const SampleProfileMap &Profiles,
|
||||
uint64_t HotCntThreshold);
|
||||
|
||||
void showDensitySuggestion(double Density);
|
||||
|
||||
// Thresholds from profile summary to answer isHotCount/isColdCount queries.
|
||||
uint64_t HotCountThreshold;
|
||||
|
||||
uint64_t ColdCountThreshold;
|
||||
|
||||
// Used by SampleProfileWriter
|
||||
SampleProfileMap ProfileMap;
|
||||
|
||||
ProfiledBinary *Binary = nullptr;
|
||||
|
||||
const ContextSampleCounterMap &SampleCounters;
|
||||
};
|
||||
|
||||
class ProfileGenerator : public ProfileGeneratorBase {
|
||||
|
||||
public:
|
||||
ProfileGenerator(ProfiledBinary *Binary,
|
||||
const ContextSampleCounterMap &Counters)
|
||||
: ProfileGeneratorBase(Binary, Counters){};
|
||||
void generateProfile() override;
|
||||
|
||||
private:
|
||||
void generateLineNumBasedProfile();
|
||||
RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
|
||||
FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName);
|
||||
// Helper function to get the leaf frame's FunctionProfile by traversing the
|
||||
// inline stack and meanwhile it adds the total samples for each frame's
|
||||
// function profile.
|
||||
FunctionSamples &
|
||||
getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
|
||||
uint64_t Count);
|
||||
void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
|
||||
void
|
||||
populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
|
||||
void postProcessProfiles();
|
||||
void trimColdProfiles(const SampleProfileMap &Profiles,
|
||||
uint64_t ColdCntThreshold);
|
||||
};
|
||||
|
||||
using ProbeCounterMap =
|
||||
std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
|
||||
|
||||
class CSProfileGenerator : public ProfileGeneratorBase {
|
||||
public:
|
||||
CSProfileGenerator(ProfiledBinary *Binary,
|
||||
const ContextSampleCounterMap &Counters)
|
||||
: ProfileGeneratorBase(Binary, Counters){};
|
||||
|
||||
void generateProfile() override;
|
||||
|
||||
// Trim the context stack at a given depth.
|
||||
template <typename T>
|
||||
static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
|
||||
if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
|
||||
return;
|
||||
std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
|
||||
S.begin());
|
||||
S.resize(Depth);
|
||||
}
|
||||
|
||||
// Remove adjacent repeated context sequences up to a given sequence length,
|
||||
// -1 means no size limit. Note that repeated sequences are identified based
|
||||
// on the exact call site, this is finer granularity than function recursion.
|
||||
template <typename T>
|
||||
static void compressRecursionContext(SmallVectorImpl<T> &Context,
|
||||
int32_t CSize = MaxCompressionSize) {
|
||||
uint32_t I = 1;
|
||||
uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
|
||||
uint32_t MaxDedupSize =
|
||||
CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
|
||||
auto BeginIter = Context.begin();
|
||||
// Use an in-place algorithm to save memory copy
|
||||
// End indicates the end location of current iteration's data
|
||||
uint32_t End = 0;
|
||||
// Deduplicate from length 1 to the max possible size of a repeated
|
||||
// sequence.
|
||||
while (I <= MaxDedupSize) {
|
||||
// This is a linear algorithm that deduplicates adjacent repeated
|
||||
// sequences of size I. The deduplication detection runs on a sliding
|
||||
// window whose size is 2*I and it keeps sliding the window to deduplicate
|
||||
// the data inside. Once duplication is detected, deduplicate it by
|
||||
// skipping the right half part of the window, otherwise just copy back
|
||||
// the new one by appending them at the back of End pointer(for the next
|
||||
// iteration).
|
||||
//
|
||||
// For example:
|
||||
// Input: [a1, a2, b1, b2]
|
||||
// (Added index to distinguish the same char, the origin is [a, a, b,
|
||||
// b], the size of the dedup window is 2(I = 1) at the beginning)
|
||||
//
|
||||
// 1) The initial status is a dummy window[null, a1], then just copy the
|
||||
// right half of the window(End = 0), then slide the window.
|
||||
// Result: [a1], a2, b1, b2 (End points to the element right before ],
|
||||
// after ] is the data of the previous iteration)
|
||||
//
|
||||
// 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
|
||||
// the window i.e the duplication happen. Only slide the window.
|
||||
// Result: [a1], a2, b1, b2
|
||||
//
|
||||
// 3) Next window is [a2, b1], copy the right half of the window(b1 is
|
||||
// new) to the End and slide the window.
|
||||
// Result: [a1, b1], b1, b2
|
||||
//
|
||||
// 4) Next window is [b1, b2], same to 2), skip b2.
|
||||
// Result: [a1, b1], b1, b2
|
||||
// After resize, it will be [a, b]
|
||||
|
||||
// Use pointers like below to do comparison inside the window
|
||||
// [a b c a b c]
|
||||
// | | | | |
|
||||
// LeftBoundary Left Right Left+I Right+I
|
||||
// A duplication found if Left < LeftBoundry.
|
||||
|
||||
int32_t Right = I - 1;
|
||||
End = I;
|
||||
int32_t LeftBoundary = 0;
|
||||
while (Right + I < Context.size()) {
|
||||
// To avoids scanning a part of a sequence repeatedly, it finds out
|
||||
// the common suffix of two hald in the window. The common suffix will
|
||||
// serve as the common prefix of next possible pair of duplicate
|
||||
// sequences. The non-common part will be ignored and never scanned
|
||||
// again.
|
||||
|
||||
// For example.
|
||||
// Input: [a, b1], c1, b2, c2
|
||||
// I = 2
|
||||
//
|
||||
// 1) For the window [a, b1, c1, b2], non-common-suffix for the right
|
||||
// part is 'c1', copy it and only slide the window 1 step.
|
||||
// Result: [a, b1, c1], b2, c2
|
||||
//
|
||||
// 2) Next window is [b1, c1, b2, c2], so duplication happen.
|
||||
// Result after resize: [a, b, c]
|
||||
|
||||
int32_t Left = Right;
|
||||
while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
|
||||
// Find the longest suffix inside the window. When stops, Left points
|
||||
// at the diverging point in the current sequence.
|
||||
Left--;
|
||||
}
|
||||
|
||||
bool DuplicationFound = (Left < LeftBoundary);
|
||||
// Don't need to recheck the data before Right
|
||||
LeftBoundary = Right + 1;
|
||||
if (DuplicationFound) {
|
||||
// Duplication found, skip right half of the window.
|
||||
Right += I;
|
||||
} else {
|
||||
// Copy the non-common-suffix part of the adjacent sequence.
|
||||
std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
|
||||
BeginIter + End);
|
||||
End += Left + I - Right;
|
||||
// Only slide the window by the size of non-common-suffix
|
||||
Right = Left + I;
|
||||
}
|
||||
}
|
||||
// Don't forget the remaining part that's not scanned.
|
||||
std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
|
||||
End += Context.size() - Right - 1;
|
||||
I++;
|
||||
Context.resize(End);
|
||||
MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void generateLineNumBasedProfile();
|
||||
// Lookup or create FunctionSamples for the context
|
||||
FunctionSamples &
|
||||
getFunctionProfileForContext(const SampleContextFrameVector &Context,
|
||||
bool WasLeafInlined = false);
|
||||
// For profiled only functions, on-demand compute their inline context
|
||||
// function byte size which is used by the pre-inliner.
|
||||
void computeSizeForProfiledFunctions();
|
||||
// Post processing for profiles before writing out, such as mermining
|
||||
// and trimming cold profiles, running preinliner on profiles.
|
||||
void postProcessProfiles();
|
||||
|
||||
void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
|
||||
const RangeSample &RangeCounters);
|
||||
void populateBoundarySamplesForFunction(SampleContextFrames ContextId,
|
||||
FunctionSamples &FunctionProfile,
|
||||
const BranchSample &BranchCounters);
|
||||
void populateInferredFunctionSamples();
|
||||
|
||||
void generateProbeBasedProfile();
|
||||
// Go through each address from range to extract the top frame probe by
|
||||
// looking up in the Address2ProbeMap
|
||||
void extractProbesFromRange(const RangeSample &RangeCounter,
|
||||
ProbeCounterMap &ProbeCounter);
|
||||
// Fill in function body samples from probes
|
||||
void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
|
||||
SampleContextFrames ContextStack);
|
||||
// Fill in boundary samples for a call probe
|
||||
void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
|
||||
SampleContextFrames ContextStack);
|
||||
// Helper function to get FunctionSamples for the leaf probe
|
||||
FunctionSamples &
|
||||
getFunctionProfileForLeafProbe(SampleContextFrames ContextStack,
|
||||
const MCDecodedPseudoProbe *LeafProbe);
|
||||
|
||||
// Underlying context table serves for sample profile writer.
|
||||
std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
|
||||
|
||||
public:
|
||||
// Deduplicate adjacent repeated context sequences up to a given sequence
|
||||
// length. -1 means no size limit.
|
||||
static int32_t MaxCompressionSize;
|
||||
static int MaxContextDepth;
|
||||
};
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
|
@ -1,790 +0,0 @@
|
|||
//===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ProfiledBinary.h"
|
||||
#include "ErrorHandling.h"
|
||||
#include "ProfileGenerator.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Demangle/Demangle.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
|
||||
#define DEBUG_TYPE "load-binary"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("Print disassembled code."));
|
||||
|
||||
cl::opt<bool> ShowSourceLocations("show-source-locations", cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("Print source locations."));
|
||||
|
||||
static cl::opt<bool>
|
||||
ShowCanonicalFnName("show-canonical-fname", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Print canonical function name."));
|
||||
|
||||
static cl::opt<bool> ShowPseudoProbe(
|
||||
"show-pseudo-probe", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Print pseudo probe section and disassembled info."));
|
||||
|
||||
static cl::opt<bool> UseDwarfCorrelation(
|
||||
"use-dwarf-correlation", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Use dwarf for profile correlation even when binary contains "
|
||||
"pseudo probe."));
|
||||
|
||||
static cl::list<std::string> DisassembleFunctions(
|
||||
"disassemble-functions", cl::CommaSeparated,
|
||||
cl::desc("List of functions to print disassembly for. Accept demangled "
|
||||
"names only. Only work with show-disassembly-only"));
|
||||
|
||||
extern cl::opt<bool> ShowDetailedWarning;
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
static const Target *getTarget(const ObjectFile *Obj) {
|
||||
Triple TheTriple = Obj->makeTriple();
|
||||
std::string Error;
|
||||
std::string ArchName;
|
||||
const Target *TheTarget =
|
||||
TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
|
||||
if (!TheTarget)
|
||||
exitWithError(Error, Obj->getFileName());
|
||||
return TheTarget;
|
||||
}
|
||||
|
||||
void BinarySizeContextTracker::addInstructionForContext(
|
||||
const SampleContextFrameVector &Context, uint32_t InstrSize) {
|
||||
ContextTrieNode *CurNode = &RootContext;
|
||||
bool IsLeaf = true;
|
||||
for (const auto &Callsite : reverse(Context)) {
|
||||
StringRef CallerName = Callsite.FuncName;
|
||||
LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
|
||||
CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
|
||||
IsLeaf = false;
|
||||
}
|
||||
|
||||
CurNode->addFunctionSize(InstrSize);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) {
|
||||
ContextTrieNode *CurrNode = &RootContext;
|
||||
ContextTrieNode *PrevNode = nullptr;
|
||||
SampleContextFrames Frames = Context.getContextFrames();
|
||||
int32_t I = Frames.size() - 1;
|
||||
Optional<uint32_t> Size;
|
||||
|
||||
// Start from top-level context-less function, traverse down the reverse
|
||||
// context trie to find the best/longest match for given context, then
|
||||
// retrieve the size.
|
||||
|
||||
while (CurrNode && I >= 0) {
|
||||
// Process from leaf function to callers (added to context).
|
||||
const auto &ChildFrame = Frames[I--];
|
||||
PrevNode = CurrNode;
|
||||
CurrNode =
|
||||
CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName);
|
||||
if (CurrNode && CurrNode->getFunctionSize().hasValue())
|
||||
Size = CurrNode->getFunctionSize().getValue();
|
||||
}
|
||||
|
||||
// If we traversed all nodes along the path of the context and haven't
|
||||
// found a size yet, pivot to look for size from sibling nodes, i.e size
|
||||
// of inlinee under different context.
|
||||
if (!Size.hasValue()) {
|
||||
if (!CurrNode)
|
||||
CurrNode = PrevNode;
|
||||
while (!Size.hasValue() && CurrNode &&
|
||||
!CurrNode->getAllChildContext().empty()) {
|
||||
CurrNode = &CurrNode->getAllChildContext().begin()->second;
|
||||
if (CurrNode->getFunctionSize().hasValue())
|
||||
Size = CurrNode->getFunctionSize().getValue();
|
||||
}
|
||||
}
|
||||
|
||||
assert(Size.hasValue() && "We should at least find one context size.");
|
||||
return Size.getValue();
|
||||
}
|
||||
|
||||
void BinarySizeContextTracker::trackInlineesOptimizedAway(
|
||||
MCPseudoProbeDecoder &ProbeDecoder) {
|
||||
ProbeFrameStack ProbeContext;
|
||||
for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
|
||||
trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
|
||||
}
|
||||
|
||||
void BinarySizeContextTracker::trackInlineesOptimizedAway(
|
||||
MCPseudoProbeDecoder &ProbeDecoder,
|
||||
MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
|
||||
StringRef FuncName =
|
||||
ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
|
||||
ProbeContext.emplace_back(FuncName, 0);
|
||||
|
||||
// This ProbeContext has a probe, so it has code before inlining and
|
||||
// optimization. Make sure we mark its size as known.
|
||||
if (!ProbeNode.getProbes().empty()) {
|
||||
ContextTrieNode *SizeContext = &RootContext;
|
||||
for (auto &ProbeFrame : reverse(ProbeContext)) {
|
||||
StringRef CallerName = ProbeFrame.first;
|
||||
LineLocation CallsiteLoc(ProbeFrame.second, 0);
|
||||
SizeContext =
|
||||
SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
|
||||
}
|
||||
// Add 0 size to make known.
|
||||
SizeContext->addFunctionSize(0);
|
||||
}
|
||||
|
||||
// DFS down the probe inline tree
|
||||
for (const auto &ChildNode : ProbeNode.getChildren()) {
|
||||
InlineSite Location = ChildNode.first;
|
||||
ProbeContext.back().second = std::get<1>(Location);
|
||||
trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
|
||||
}
|
||||
|
||||
ProbeContext.pop_back();
|
||||
}
|
||||
|
||||
void ProfiledBinary::warnNoFuncEntry() {
|
||||
uint64_t NoFuncEntryNum = 0;
|
||||
for (auto &F : BinaryFunctions) {
|
||||
if (F.second.Ranges.empty())
|
||||
continue;
|
||||
bool hasFuncEntry = false;
|
||||
for (auto &R : F.second.Ranges) {
|
||||
if (FuncRange *FR = findFuncRangeForStartOffset(R.first)) {
|
||||
if (FR->IsFuncEntry) {
|
||||
hasFuncEntry = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasFuncEntry) {
|
||||
NoFuncEntryNum++;
|
||||
if (ShowDetailedWarning)
|
||||
WithColor::warning()
|
||||
<< "Failed to determine function entry for " << F.first
|
||||
<< " due to inconsistent name from symbol table and dwarf info.\n";
|
||||
}
|
||||
}
|
||||
emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
|
||||
"of functions failed to determine function entry due to "
|
||||
"inconsistent name from symbol table and dwarf info.");
|
||||
}
|
||||
|
||||
void ProfiledBinary::load() {
|
||||
// Attempt to open the binary.
|
||||
OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
|
||||
Binary &ExeBinary = *OBinary.getBinary();
|
||||
|
||||
auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
|
||||
if (!Obj)
|
||||
exitWithError("not a valid Elf image", Path);
|
||||
|
||||
TheTriple = Obj->makeTriple();
|
||||
// Current only support X86
|
||||
if (!TheTriple.isX86())
|
||||
exitWithError("unsupported target", TheTriple.getTriple());
|
||||
LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
|
||||
|
||||
// Find the preferred load address for text sections.
|
||||
setPreferredTextSegmentAddresses(Obj);
|
||||
|
||||
// Decode pseudo probe related section
|
||||
decodePseudoProbe(Obj);
|
||||
|
||||
// Load debug info of subprograms from DWARF section.
|
||||
// If path of debug info binary is specified, use the debug info from it,
|
||||
// otherwise use the debug info from the executable binary.
|
||||
if (!DebugBinaryPath.empty()) {
|
||||
OwningBinary<Binary> DebugPath =
|
||||
unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
|
||||
loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(DebugPath.getBinary()));
|
||||
} else {
|
||||
loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(&ExeBinary));
|
||||
}
|
||||
|
||||
// Disassemble the text sections.
|
||||
disassemble(Obj);
|
||||
|
||||
// Track size for optimized inlinees when probe is available
|
||||
if (UsePseudoProbes && TrackFuncContextSize)
|
||||
FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
|
||||
|
||||
// Use function start and return address to infer prolog and epilog
|
||||
ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap);
|
||||
ProEpilogTracker.inferEpilogOffsets(RetOffsets);
|
||||
|
||||
warnNoFuncEntry();
|
||||
|
||||
// TODO: decode other sections.
|
||||
}
|
||||
|
||||
bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
|
||||
uint64_t Offset1 = virtualAddrToOffset(Address1);
|
||||
uint64_t Offset2 = virtualAddrToOffset(Address2);
|
||||
const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
|
||||
const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2);
|
||||
if (Context1.size() != Context2.size())
|
||||
return false;
|
||||
if (Context1.empty())
|
||||
return false;
|
||||
// The leaf frame contains location within the leaf, and it
|
||||
// needs to be remove that as it's not part of the calling context
|
||||
return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
|
||||
Context2.begin(), Context2.begin() + Context2.size() - 1);
|
||||
}
|
||||
|
||||
SampleContextFrameVector
|
||||
ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
|
||||
bool &WasLeafInlined) {
|
||||
SampleContextFrameVector ContextVec;
|
||||
// Process from frame root to leaf
|
||||
for (auto Address : Stack) {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
const SampleContextFrameVector &ExpandedContext =
|
||||
getFrameLocationStack(Offset);
|
||||
// An instruction without a valid debug line will be ignored by sample
|
||||
// processing
|
||||
if (ExpandedContext.empty())
|
||||
return SampleContextFrameVector();
|
||||
// Set WasLeafInlined to the size of inlined frame count for the last
|
||||
// address which is leaf
|
||||
WasLeafInlined = (ExpandedContext.size() > 1);
|
||||
ContextVec.append(ExpandedContext);
|
||||
}
|
||||
|
||||
// Replace with decoded base discriminator
|
||||
for (auto &Frame : ContextVec) {
|
||||
Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
|
||||
Frame.Location.Discriminator, UseFSDiscriminator);
|
||||
}
|
||||
|
||||
assert(ContextVec.size() && "Context length should be at least 1");
|
||||
|
||||
// Compress the context string except for the leaf frame
|
||||
auto LeafFrame = ContextVec.back();
|
||||
LeafFrame.Location = LineLocation(0, 0);
|
||||
ContextVec.pop_back();
|
||||
CSProfileGenerator::compressRecursionContext(ContextVec);
|
||||
CSProfileGenerator::trimContext(ContextVec);
|
||||
ContextVec.push_back(LeafFrame);
|
||||
return ContextVec;
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
|
||||
const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
|
||||
// FIXME: This should be the page size of the system running profiling.
|
||||
// However such info isn't available at post-processing time, assuming
|
||||
// 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
|
||||
// because we may build the tools on non-linux.
|
||||
uint32_t PageSize = 0x1000;
|
||||
for (const typename ELFT::Phdr &Phdr : PhdrRange) {
|
||||
if (Phdr.p_type == ELF::PT_LOAD) {
|
||||
if (!FirstLoadableAddress)
|
||||
FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
|
||||
if (Phdr.p_flags & ELF::PF_X) {
|
||||
// Segments will always be loaded at a page boundary.
|
||||
PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
|
||||
~(PageSize - 1U));
|
||||
TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (PreferredTextSegmentAddresses.empty())
|
||||
exitWithError("no executable segment found", FileName);
|
||||
}
|
||||
|
||||
void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
|
||||
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
|
||||
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
|
||||
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
|
||||
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
|
||||
else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
|
||||
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
|
||||
else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
|
||||
setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
|
||||
else
|
||||
llvm_unreachable("invalid ELF object format");
|
||||
}
|
||||
|
||||
void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
|
||||
if (UseDwarfCorrelation)
|
||||
return;
|
||||
|
||||
StringRef FileName = Obj->getFileName();
|
||||
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
|
||||
SI != SE; ++SI) {
|
||||
const SectionRef &Section = *SI;
|
||||
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
|
||||
|
||||
if (SectionName == ".pseudo_probe_desc") {
|
||||
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
|
||||
if (!ProbeDecoder.buildGUID2FuncDescMap(
|
||||
reinterpret_cast<const uint8_t *>(Contents.data()),
|
||||
Contents.size()))
|
||||
exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section");
|
||||
} else if (SectionName == ".pseudo_probe") {
|
||||
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
|
||||
if (!ProbeDecoder.buildAddress2ProbeMap(
|
||||
reinterpret_cast<const uint8_t *>(Contents.data()),
|
||||
Contents.size()))
|
||||
exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
|
||||
// set UsePseudoProbes flag, used for PerfReader
|
||||
UsePseudoProbes = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (ShowPseudoProbe)
|
||||
ProbeDecoder.printGUID2FuncDescMap(outs());
|
||||
}
|
||||
|
||||
void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) {
|
||||
// Note that the start offset of each ELF section can be a non-function
|
||||
// symbol, we need to binary search for the start of a real function range.
|
||||
auto *FuncRange = findFuncRangeForOffset(Offset);
|
||||
// Skip external function symbol.
|
||||
if (!FuncRange)
|
||||
return;
|
||||
|
||||
// Set IsFuncEntry to ture if there is only one range in the function or the
|
||||
// RangeSymName from ELF is equal to its DWARF-based function name.
|
||||
if (FuncRange->Func->Ranges.size() == 1 ||
|
||||
(!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
|
||||
FuncRange->IsFuncEntry = true;
|
||||
}
|
||||
|
||||
bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
|
||||
SectionSymbolsTy &Symbols,
|
||||
const SectionRef &Section) {
|
||||
std::size_t SE = Symbols.size();
|
||||
uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
|
||||
uint64_t SectSize = Section.getSize();
|
||||
uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
|
||||
uint64_t NextStartOffset =
|
||||
(SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
|
||||
: SectionOffset + SectSize;
|
||||
setIsFuncEntry(StartOffset,
|
||||
FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
|
||||
|
||||
StringRef SymbolName =
|
||||
ShowCanonicalFnName
|
||||
? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
|
||||
: Symbols[SI].Name;
|
||||
bool ShowDisassembly =
|
||||
ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
|
||||
DisassembleFunctionSet.count(SymbolName));
|
||||
if (ShowDisassembly)
|
||||
outs() << '<' << SymbolName << ">:\n";
|
||||
|
||||
auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
|
||||
WithColor::warning() << "Invalid instructions at "
|
||||
<< format("%8" PRIx64, Start) << " - "
|
||||
<< format("%8" PRIx64, End) << "\n";
|
||||
};
|
||||
|
||||
uint64_t Offset = StartOffset;
|
||||
// Size of a consecutive invalid instruction range starting from Offset -1
|
||||
// backwards.
|
||||
uint64_t InvalidInstLength = 0;
|
||||
while (Offset < NextStartOffset) {
|
||||
MCInst Inst;
|
||||
uint64_t Size;
|
||||
// Disassemble an instruction.
|
||||
bool Disassembled =
|
||||
DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
|
||||
Offset + getPreferredBaseAddress(), nulls());
|
||||
if (Size == 0)
|
||||
Size = 1;
|
||||
|
||||
if (ShowDisassembly) {
|
||||
if (ShowPseudoProbe) {
|
||||
ProbeDecoder.printProbeForAddress(outs(),
|
||||
Offset + getPreferredBaseAddress());
|
||||
}
|
||||
outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress());
|
||||
size_t Start = outs().tell();
|
||||
if (Disassembled)
|
||||
IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
|
||||
else
|
||||
outs() << "\t<unknown>";
|
||||
if (ShowSourceLocations) {
|
||||
unsigned Cur = outs().tell() - Start;
|
||||
if (Cur < 40)
|
||||
outs().indent(40 - Cur);
|
||||
InstructionPointer IP(this, Offset);
|
||||
outs() << getReversedLocWithContext(
|
||||
symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
|
||||
}
|
||||
outs() << "\n";
|
||||
}
|
||||
|
||||
if (Disassembled) {
|
||||
const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
|
||||
|
||||
// Record instruction size.
|
||||
Offset2InstSizeMap[Offset] = Size;
|
||||
|
||||
// Populate address maps.
|
||||
CodeAddrOffsets.push_back(Offset);
|
||||
if (MCDesc.isCall())
|
||||
CallOffsets.insert(Offset);
|
||||
else if (MCDesc.isReturn())
|
||||
RetOffsets.insert(Offset);
|
||||
else if (MCDesc.isBranch())
|
||||
BranchOffsets.insert(Offset);
|
||||
|
||||
if (InvalidInstLength) {
|
||||
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
|
||||
InvalidInstLength = 0;
|
||||
}
|
||||
} else {
|
||||
InvalidInstLength += Size;
|
||||
}
|
||||
|
||||
Offset += Size;
|
||||
}
|
||||
|
||||
if (InvalidInstLength)
|
||||
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
|
||||
|
||||
if (ShowDisassembly)
|
||||
outs() << "\n";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
|
||||
const Target *TheTarget = getTarget(Obj);
|
||||
std::string TripleName = TheTriple.getTriple();
|
||||
StringRef FileName = Obj->getFileName();
|
||||
|
||||
MRI.reset(TheTarget->createMCRegInfo(TripleName));
|
||||
if (!MRI)
|
||||
exitWithError("no register info for target " + TripleName, FileName);
|
||||
|
||||
MCTargetOptions MCOptions;
|
||||
AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
|
||||
if (!AsmInfo)
|
||||
exitWithError("no assembly info for target " + TripleName, FileName);
|
||||
|
||||
SubtargetFeatures Features = Obj->getFeatures();
|
||||
STI.reset(
|
||||
TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
|
||||
if (!STI)
|
||||
exitWithError("no subtarget info for target " + TripleName, FileName);
|
||||
|
||||
MII.reset(TheTarget->createMCInstrInfo());
|
||||
if (!MII)
|
||||
exitWithError("no instruction info for target " + TripleName, FileName);
|
||||
|
||||
MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
|
||||
std::unique_ptr<MCObjectFileInfo> MOFI(
|
||||
TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
|
||||
Ctx.setObjectFileInfo(MOFI.get());
|
||||
DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
|
||||
if (!DisAsm)
|
||||
exitWithError("no disassembler for target " + TripleName, FileName);
|
||||
|
||||
MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
|
||||
|
||||
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
|
||||
IPrinter.reset(TheTarget->createMCInstPrinter(
|
||||
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
|
||||
IPrinter->setPrintBranchImmAsAddress(true);
|
||||
}
|
||||
|
||||
void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
|
||||
// Set up disassembler and related components.
|
||||
setUpDisassembler(Obj);
|
||||
|
||||
// Create a mapping from virtual address to symbol name. The symbols in text
|
||||
// sections are the candidates to dissassemble.
|
||||
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
|
||||
StringRef FileName = Obj->getFileName();
|
||||
for (const SymbolRef &Symbol : Obj->symbols()) {
|
||||
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
|
||||
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
|
||||
section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
|
||||
if (SecI != Obj->section_end())
|
||||
AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
|
||||
}
|
||||
|
||||
// Sort all the symbols. Use a stable sort to stabilize the output.
|
||||
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
|
||||
stable_sort(SecSyms.second);
|
||||
|
||||
DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
|
||||
DisassembleFunctions.end());
|
||||
assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
|
||||
"Functions to disassemble should be only specified together with "
|
||||
"--show-disassembly-only");
|
||||
|
||||
if (ShowDisassemblyOnly)
|
||||
outs() << "\nDisassembly of " << FileName << ":\n";
|
||||
|
||||
// Dissassemble a text section.
|
||||
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
|
||||
SI != SE; ++SI) {
|
||||
const SectionRef &Section = *SI;
|
||||
if (!Section.isText())
|
||||
continue;
|
||||
|
||||
uint64_t ImageLoadAddr = getPreferredBaseAddress();
|
||||
uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
|
||||
uint64_t SectSize = Section.getSize();
|
||||
if (!SectSize)
|
||||
continue;
|
||||
|
||||
// Register the text section.
|
||||
TextSections.insert({SectionOffset, SectSize});
|
||||
|
||||
StringRef SectionName = unwrapOrError(Section.getName(), FileName);
|
||||
|
||||
if (ShowDisassemblyOnly) {
|
||||
outs() << "\nDisassembly of section " << SectionName;
|
||||
outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
|
||||
<< format("0x%" PRIx64, Section.getAddress() + SectSize)
|
||||
<< "]:\n\n";
|
||||
}
|
||||
|
||||
if (SectionName == ".plt")
|
||||
continue;
|
||||
|
||||
// Get the section data.
|
||||
ArrayRef<uint8_t> Bytes =
|
||||
arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
|
||||
|
||||
// Get the list of all the symbols in this section.
|
||||
SectionSymbolsTy &Symbols = AllSymbols[Section];
|
||||
|
||||
// Disassemble symbol by symbol.
|
||||
for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
|
||||
if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
|
||||
exitWithError("disassembling error", FileName);
|
||||
}
|
||||
}
|
||||
|
||||
// Dissassemble rodata section to check if FS discriminator symbol exists.
|
||||
checkUseFSDiscriminator(Obj, AllSymbols);
|
||||
}
|
||||
|
||||
void ProfiledBinary::checkUseFSDiscriminator(
|
||||
const ELFObjectFileBase *Obj,
|
||||
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
|
||||
const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
|
||||
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
|
||||
SI != SE; ++SI) {
|
||||
const SectionRef &Section = *SI;
|
||||
if (!Section.isData() || Section.getSize() == 0)
|
||||
continue;
|
||||
SectionSymbolsTy &Symbols = AllSymbols[Section];
|
||||
|
||||
for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
|
||||
if (Symbols[SI].Name == FSDiscriminatorVar) {
|
||||
UseFSDiscriminator = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
|
||||
auto DebugContext = llvm::DWARFContext::create(Obj);
|
||||
if (!DebugContext)
|
||||
exitWithError("Misssing debug info.", Path);
|
||||
|
||||
for (const auto &CompilationUnit : DebugContext->compile_units()) {
|
||||
for (const auto &DieInfo : CompilationUnit->dies()) {
|
||||
llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo);
|
||||
|
||||
if (!Die.isSubprogramDIE())
|
||||
continue;
|
||||
auto Name = Die.getName(llvm::DINameKind::LinkageName);
|
||||
if (!Name)
|
||||
Name = Die.getName(llvm::DINameKind::ShortName);
|
||||
if (!Name)
|
||||
continue;
|
||||
|
||||
auto RangesOrError = Die.getAddressRanges();
|
||||
if (!RangesOrError)
|
||||
continue;
|
||||
const DWARFAddressRangesVector &Ranges = RangesOrError.get();
|
||||
|
||||
if (Ranges.empty())
|
||||
continue;
|
||||
|
||||
// Different DWARF symbols can have same function name, search or create
|
||||
// BinaryFunction indexed by the name.
|
||||
auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
|
||||
auto &Func = Ret.first->second;
|
||||
if (Ret.second)
|
||||
Func.FuncName = Ret.first->first;
|
||||
|
||||
for (const auto &Range : Ranges) {
|
||||
uint64_t FuncStart = Range.LowPC;
|
||||
uint64_t FuncSize = Range.HighPC - FuncStart;
|
||||
|
||||
if (FuncSize == 0 || FuncStart < getPreferredBaseAddress())
|
||||
continue;
|
||||
|
||||
uint64_t StartOffset = FuncStart - getPreferredBaseAddress();
|
||||
uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress();
|
||||
|
||||
// We may want to know all ranges for one function. Here group the
|
||||
// ranges and store them into BinaryFunction.
|
||||
Func.Ranges.emplace_back(StartOffset, EndOffset);
|
||||
|
||||
auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange());
|
||||
if (R.second) {
|
||||
FuncRange &FRange = R.first->second;
|
||||
FRange.Func = &Func;
|
||||
FRange.StartOffset = StartOffset;
|
||||
FRange.EndOffset = EndOffset;
|
||||
} else {
|
||||
WithColor::warning()
|
||||
<< "Duplicated symbol start address at "
|
||||
<< format("%8" PRIx64, StartOffset + getPreferredBaseAddress())
|
||||
<< " " << R.first->second.getFuncName() << " and " << Name
|
||||
<< "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info.");
|
||||
}
|
||||
|
||||
void ProfiledBinary::populateSymbolListFromDWARF(
|
||||
ProfileSymbolList &SymbolList) {
|
||||
for (auto &I : StartOffset2FuncRangeMap)
|
||||
SymbolList.add(I.second.getFuncName());
|
||||
}
|
||||
|
||||
void ProfiledBinary::setupSymbolizer() {
|
||||
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
|
||||
SymbolizerOpts.PrintFunctions =
|
||||
DILineInfoSpecifier::FunctionNameKind::LinkageName;
|
||||
SymbolizerOpts.Demangle = false;
|
||||
SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
|
||||
SymbolizerOpts.UseSymbolTable = false;
|
||||
SymbolizerOpts.RelativeAddresses = false;
|
||||
Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
|
||||
}
|
||||
|
||||
SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
|
||||
bool UseCanonicalFnName,
|
||||
bool UseProbeDiscriminator) {
|
||||
assert(this == IP.Binary &&
|
||||
"Binary should only symbolize its own instruction");
|
||||
auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
|
||||
object::SectionedAddress::UndefSection};
|
||||
DIInliningInfo InlineStack = unwrapOrError(
|
||||
Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
|
||||
SymbolizerPath);
|
||||
|
||||
SampleContextFrameVector CallStack;
|
||||
for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
|
||||
const auto &CallerFrame = InlineStack.getFrame(I);
|
||||
if (CallerFrame.FunctionName == "<invalid>")
|
||||
break;
|
||||
|
||||
StringRef FunctionName(CallerFrame.FunctionName);
|
||||
if (UseCanonicalFnName)
|
||||
FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
|
||||
|
||||
uint32_t Discriminator = CallerFrame.Discriminator;
|
||||
uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
|
||||
if (UseProbeDiscriminator) {
|
||||
LineOffset =
|
||||
PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
|
||||
Discriminator = 0;
|
||||
}
|
||||
|
||||
LineLocation Line(LineOffset, Discriminator);
|
||||
auto It = NameStrings.insert(FunctionName.str());
|
||||
CallStack.emplace_back(*It.first, Line);
|
||||
}
|
||||
|
||||
return CallStack;
|
||||
}
|
||||
|
||||
void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
|
||||
uint64_t EndOffset) {
|
||||
uint64_t RangeBegin = offsetToVirtualAddr(StartOffset);
|
||||
uint64_t RangeEnd = offsetToVirtualAddr(EndOffset);
|
||||
InstructionPointer IP(this, RangeBegin, true);
|
||||
|
||||
if (IP.Address != RangeBegin)
|
||||
WithColor::warning() << "Invalid start instruction at "
|
||||
<< format("%8" PRIx64, RangeBegin) << "\n";
|
||||
|
||||
if (IP.Address >= RangeEnd)
|
||||
return;
|
||||
|
||||
do {
|
||||
uint64_t Offset = virtualAddrToOffset(IP.Address);
|
||||
const SampleContextFrameVector &SymbolizedCallStack =
|
||||
getFrameLocationStack(Offset, UsePseudoProbes);
|
||||
uint64_t Size = Offset2InstSizeMap[Offset];
|
||||
|
||||
// Record instruction size for the corresponding context
|
||||
FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
|
||||
|
||||
} while (IP.advance() && IP.Address < RangeEnd);
|
||||
}
|
||||
|
||||
InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
|
||||
uint64_t Address, bool RoundToNext)
|
||||
: Binary(Binary), Address(Address) {
|
||||
Index = Binary->getIndexForAddr(Address);
|
||||
if (RoundToNext) {
|
||||
// we might get address which is not the code
|
||||
// it should round to the next valid address
|
||||
if (Index >= Binary->getCodeOffsetsSize())
|
||||
this->Address = UINT64_MAX;
|
||||
else
|
||||
this->Address = Binary->getAddressforIndex(Index);
|
||||
}
|
||||
}
|
||||
|
||||
bool InstructionPointer::advance() {
|
||||
Index++;
|
||||
if (Index >= Binary->getCodeOffsetsSize()) {
|
||||
Address = UINT64_MAX;
|
||||
return false;
|
||||
}
|
||||
Address = Binary->getAddressforIndex(Index);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InstructionPointer::backward() {
|
||||
if (Index == 0) {
|
||||
Address = 0;
|
||||
return false;
|
||||
}
|
||||
Index--;
|
||||
Address = Binary->getAddressforIndex(Index);
|
||||
return true;
|
||||
}
|
||||
|
||||
void InstructionPointer::update(uint64_t Addr) {
|
||||
Address = Addr;
|
||||
Index = Binary->getIndexForAddr(Address);
|
||||
}
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
|
@ -1,541 +0,0 @@
|
|||
//===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
|
||||
#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
|
||||
|
||||
#include "CallContext.h"
|
||||
#include "ErrorHandling.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCInstrAnalysis.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCObjectFileInfo.h"
|
||||
#include "llvm/MC/MCPseudoProbe.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCTargetOptions.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Transforms/IPO/SampleContextTracker.h"
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
extern cl::opt<bool> EnableCSPreInliner;
|
||||
extern cl::opt<bool> UseContextCostForPreInliner;
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
using namespace llvm::object;
|
||||
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
class ProfiledBinary;
|
||||
|
||||
struct InstructionPointer {
|
||||
const ProfiledBinary *Binary;
|
||||
union {
|
||||
// Offset of the executable segment of the binary.
|
||||
uint64_t Offset = 0;
|
||||
// Also used as address in unwinder
|
||||
uint64_t Address;
|
||||
};
|
||||
// Index to the sorted code address array of the binary.
|
||||
uint64_t Index = 0;
|
||||
InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
|
||||
bool RoundToNext = false);
|
||||
bool advance();
|
||||
bool backward();
|
||||
void update(uint64_t Addr);
|
||||
};
|
||||
|
||||
// The special frame addresses.
|
||||
enum SpecialFrameAddr {
|
||||
// Dummy root of frame trie.
|
||||
DummyRoot = 0,
|
||||
// Represent all the addresses outside of current binary.
|
||||
// This's also used to indicate the call stack should be truncated since this
|
||||
// isn't a real call context the compiler will see.
|
||||
ExternalAddr = 1,
|
||||
};
|
||||
|
||||
using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
|
||||
|
||||
struct BinaryFunction {
|
||||
StringRef FuncName;
|
||||
// End of range is an exclusive bound.
|
||||
RangesTy Ranges;
|
||||
|
||||
uint64_t getFuncSize() {
|
||||
uint64_t Sum = 0;
|
||||
for (auto &R : Ranges) {
|
||||
Sum += R.second - R.first;
|
||||
}
|
||||
return Sum;
|
||||
}
|
||||
};
|
||||
|
||||
// Info about function range. A function can be split into multiple
|
||||
// non-continuous ranges, each range corresponds to one FuncRange.
|
||||
struct FuncRange {
|
||||
uint64_t StartOffset;
|
||||
// EndOffset is an exclusive bound.
|
||||
uint64_t EndOffset;
|
||||
// Function the range belongs to
|
||||
BinaryFunction *Func;
|
||||
// Whether the start offset is the real entry of the function.
|
||||
bool IsFuncEntry = false;
|
||||
|
||||
StringRef getFuncName() { return Func->FuncName; }
|
||||
};
|
||||
|
||||
// PrologEpilog offset tracker, used to filter out broken stack samples
|
||||
// Currently we use a heuristic size (two) to infer prolog and epilog
|
||||
// based on the start address and return address. In the future,
|
||||
// we will switch to Dwarf CFI based tracker
|
||||
struct PrologEpilogTracker {
|
||||
// A set of prolog and epilog offsets. Used by virtual unwinding.
|
||||
std::unordered_set<uint64_t> PrologEpilogSet;
|
||||
ProfiledBinary *Binary;
|
||||
PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
|
||||
|
||||
// Take the two addresses from the start of function as prolog
|
||||
void inferPrologOffsets(std::map<uint64_t, FuncRange> &FuncStartOffsetMap) {
|
||||
for (auto I : FuncStartOffsetMap) {
|
||||
PrologEpilogSet.insert(I.first);
|
||||
InstructionPointer IP(Binary, I.first);
|
||||
if (!IP.advance())
|
||||
break;
|
||||
PrologEpilogSet.insert(IP.Offset);
|
||||
}
|
||||
}
|
||||
|
||||
// Take the last two addresses before the return address as epilog
|
||||
void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) {
|
||||
for (auto Addr : RetAddrs) {
|
||||
PrologEpilogSet.insert(Addr);
|
||||
InstructionPointer IP(Binary, Addr);
|
||||
if (!IP.backward())
|
||||
break;
|
||||
PrologEpilogSet.insert(IP.Offset);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Track function byte size under different context (outlined version as well as
|
||||
// various inlined versions). It also provides query support to get function
|
||||
// size with the best matching context, which is used to help pre-inliner use
|
||||
// accurate post-optimization size to make decisions.
|
||||
// TODO: If an inlinee is completely optimized away, ideally we should have zero
|
||||
// for its context size, currently we would misss such context since it doesn't
|
||||
// have instructions. To fix this, we need to mark all inlinee with entry probe
|
||||
// but without instructions as having zero size.
|
||||
class BinarySizeContextTracker {
|
||||
public:
|
||||
// Add instruction with given size to a context
|
||||
void addInstructionForContext(const SampleContextFrameVector &Context,
|
||||
uint32_t InstrSize);
|
||||
|
||||
// Get function size with a specific context. When there's no exact match
|
||||
// for the given context, try to retrieve the size of that function from
|
||||
// closest matching context.
|
||||
uint32_t getFuncSizeForContext(const SampleContext &Context);
|
||||
|
||||
// For inlinees that are full optimized away, we can establish zero size using
|
||||
// their remaining probes.
|
||||
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
|
||||
|
||||
void dump() { RootContext.dumpTree(); }
|
||||
|
||||
private:
|
||||
using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
|
||||
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
|
||||
MCDecodedPseudoProbeInlineTree &ProbeNode,
|
||||
ProbeFrameStack &Context);
|
||||
|
||||
// Root node for context trie tree, node that this is a reverse context trie
|
||||
// with callee as parent and caller as child. This way we can traverse from
|
||||
// root to find the best/longest matching context if an exact match does not
|
||||
// exist. It gives us the best possible estimate for function's post-inline,
|
||||
// post-optimization byte size.
|
||||
ContextTrieNode RootContext;
|
||||
};
|
||||
|
||||
using OffsetRange = std::pair<uint64_t, uint64_t>;
|
||||
|
||||
class ProfiledBinary {
|
||||
// Absolute path of the executable binary.
|
||||
std::string Path;
|
||||
// Path of the debug info binary.
|
||||
std::string DebugBinaryPath;
|
||||
// Path of symbolizer path which should be pointed to binary with debug info.
|
||||
StringRef SymbolizerPath;
|
||||
// The target triple.
|
||||
Triple TheTriple;
|
||||
// The runtime base address that the first executable segment is loaded at.
|
||||
uint64_t BaseAddress = 0;
|
||||
// The runtime base address that the first loadabe segment is loaded at.
|
||||
uint64_t FirstLoadableAddress = 0;
|
||||
// The preferred load address of each executable segment.
|
||||
std::vector<uint64_t> PreferredTextSegmentAddresses;
|
||||
// The file offset of each executable segment.
|
||||
std::vector<uint64_t> TextSegmentOffsets;
|
||||
|
||||
// Mutiple MC component info
|
||||
std::unique_ptr<const MCRegisterInfo> MRI;
|
||||
std::unique_ptr<const MCAsmInfo> AsmInfo;
|
||||
std::unique_ptr<const MCSubtargetInfo> STI;
|
||||
std::unique_ptr<const MCInstrInfo> MII;
|
||||
std::unique_ptr<MCDisassembler> DisAsm;
|
||||
std::unique_ptr<const MCInstrAnalysis> MIA;
|
||||
std::unique_ptr<MCInstPrinter> IPrinter;
|
||||
// A list of text sections sorted by start RVA and size. Used to check
|
||||
// if a given RVA is a valid code address.
|
||||
std::set<std::pair<uint64_t, uint64_t>> TextSections;
|
||||
|
||||
// A map of mapping function name to BinaryFunction info.
|
||||
std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
|
||||
|
||||
// An ordered map of mapping function's start offset to function range
|
||||
// relevant info. Currently to determine if the offset of ELF is the start of
|
||||
// a real function, we leverage the function range info from DWARF.
|
||||
std::map<uint64_t, FuncRange> StartOffset2FuncRangeMap;
|
||||
|
||||
// Offset to context location map. Used to expand the context.
|
||||
std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
|
||||
|
||||
// Offset to instruction size map. Also used for quick offset lookup.
|
||||
std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
|
||||
|
||||
// An array of offsets of all instructions sorted in increasing order. The
|
||||
// sorting is needed to fast advance to the next forward/backward instruction.
|
||||
std::vector<uint64_t> CodeAddrOffsets;
|
||||
// A set of call instruction offsets. Used by virtual unwinding.
|
||||
std::unordered_set<uint64_t> CallOffsets;
|
||||
// A set of return instruction offsets. Used by virtual unwinding.
|
||||
std::unordered_set<uint64_t> RetOffsets;
|
||||
// A set of branch instruction offsets.
|
||||
std::unordered_set<uint64_t> BranchOffsets;
|
||||
|
||||
// Estimate and track function prolog and epilog ranges.
|
||||
PrologEpilogTracker ProEpilogTracker;
|
||||
|
||||
// Track function sizes under different context
|
||||
BinarySizeContextTracker FuncSizeTracker;
|
||||
|
||||
// The symbolizer used to get inline context for an instruction.
|
||||
std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
|
||||
|
||||
// String table owning function name strings created from the symbolizer.
|
||||
std::unordered_set<std::string> NameStrings;
|
||||
|
||||
// A collection of functions to print disassembly for.
|
||||
StringSet<> DisassembleFunctionSet;
|
||||
|
||||
// Pseudo probe decoder
|
||||
MCPseudoProbeDecoder ProbeDecoder;
|
||||
|
||||
bool UsePseudoProbes = false;
|
||||
|
||||
bool UseFSDiscriminator = false;
|
||||
|
||||
// Whether we need to symbolize all instructions to get function context size.
|
||||
bool TrackFuncContextSize = false;
|
||||
|
||||
// Indicate if the base loading address is parsed from the mmap event or uses
|
||||
// the preferred address
|
||||
bool IsLoadedByMMap = false;
|
||||
// Use to avoid redundant warning.
|
||||
bool MissingMMapWarned = false;
|
||||
|
||||
void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
|
||||
|
||||
template <class ELFT>
|
||||
void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
|
||||
|
||||
void decodePseudoProbe(const ELFObjectFileBase *Obj);
|
||||
|
||||
void
|
||||
checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
|
||||
std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
|
||||
|
||||
// Set up disassembler and related components.
|
||||
void setUpDisassembler(const ELFObjectFileBase *Obj);
|
||||
void setupSymbolizer();
|
||||
|
||||
// Load debug info of subprograms from DWARF section.
|
||||
void loadSymbolsFromDWARF(ObjectFile &Obj);
|
||||
|
||||
// A function may be spilt into multiple non-continuous address ranges. We use
|
||||
// this to set whether start offset of a function is the real entry of the
|
||||
// function and also set false to the non-function label.
|
||||
void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName);
|
||||
|
||||
// Warn if no entry range exists in the function.
|
||||
void warnNoFuncEntry();
|
||||
|
||||
/// Dissassemble the text section and build various address maps.
|
||||
void disassemble(const ELFObjectFileBase *O);
|
||||
|
||||
/// Helper function to dissassemble the symbol and extract info for unwinding
|
||||
bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
|
||||
SectionSymbolsTy &Symbols, const SectionRef &Section);
|
||||
/// Symbolize a given instruction pointer and return a full call context.
|
||||
SampleContextFrameVector symbolize(const InstructionPointer &IP,
|
||||
bool UseCanonicalFnName = false,
|
||||
bool UseProbeDiscriminator = false);
|
||||
/// Decode the interesting parts of the binary and build internal data
|
||||
/// structures. On high level, the parts of interest are:
|
||||
/// 1. Text sections, including the main code section and the PLT
|
||||
/// entries that will be used to handle cross-module call transitions.
|
||||
/// 2. The .debug_line section, used by Dwarf-based profile generation.
|
||||
/// 3. Pseudo probe related sections, used by probe-based profile
|
||||
/// generation.
|
||||
void load();
|
||||
|
||||
public:
|
||||
ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath)
|
||||
: Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
|
||||
TrackFuncContextSize(EnableCSPreInliner &&
|
||||
UseContextCostForPreInliner) {
|
||||
// Point to executable binary if debug info binary is not specified.
|
||||
SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
|
||||
setupSymbolizer();
|
||||
load();
|
||||
}
|
||||
uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
|
||||
return VirtualAddress - BaseAddress;
|
||||
}
|
||||
uint64_t offsetToVirtualAddr(uint64_t Offset) const {
|
||||
return Offset + BaseAddress;
|
||||
}
|
||||
StringRef getPath() const { return Path; }
|
||||
StringRef getName() const { return llvm::sys::path::filename(Path); }
|
||||
uint64_t getBaseAddress() const { return BaseAddress; }
|
||||
void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
|
||||
|
||||
// Return the preferred load address for the first executable segment.
|
||||
uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; }
|
||||
// Return the preferred load address for the first loadable segment.
|
||||
uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; }
|
||||
// Return the file offset for the first executable segment.
|
||||
uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; }
|
||||
const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const {
|
||||
return PreferredTextSegmentAddresses;
|
||||
}
|
||||
const std::vector<uint64_t> &getTextSegmentOffsets() const {
|
||||
return TextSegmentOffsets;
|
||||
}
|
||||
|
||||
uint64_t getInstSize(uint64_t Offset) const {
|
||||
auto I = Offset2InstSizeMap.find(Offset);
|
||||
if (I == Offset2InstSizeMap.end())
|
||||
return 0;
|
||||
return I->second;
|
||||
}
|
||||
|
||||
bool offsetIsCode(uint64_t Offset) const {
|
||||
return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
|
||||
}
|
||||
bool addressIsCode(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
return offsetIsCode(Offset);
|
||||
}
|
||||
bool addressIsCall(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
return CallOffsets.count(Offset);
|
||||
}
|
||||
bool addressIsReturn(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
return RetOffsets.count(Offset);
|
||||
}
|
||||
bool addressInPrologEpilog(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
return ProEpilogTracker.PrologEpilogSet.count(Offset);
|
||||
}
|
||||
|
||||
bool offsetIsTransfer(uint64_t Offset) {
|
||||
return BranchOffsets.count(Offset) || RetOffsets.count(Offset) ||
|
||||
CallOffsets.count(Offset);
|
||||
}
|
||||
|
||||
uint64_t getAddressforIndex(uint64_t Index) const {
|
||||
return offsetToVirtualAddr(CodeAddrOffsets[Index]);
|
||||
}
|
||||
|
||||
size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
|
||||
|
||||
bool usePseudoProbes() const { return UsePseudoProbes; }
|
||||
bool useFSDiscriminator() const { return UseFSDiscriminator; }
|
||||
// Get the index in CodeAddrOffsets for the address
|
||||
// As we might get an address which is not the code
|
||||
// here it would round to the next valid code address by
|
||||
// using lower bound operation
|
||||
uint32_t getIndexForOffset(uint64_t Offset) const {
|
||||
auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
|
||||
return Low - CodeAddrOffsets.begin();
|
||||
}
|
||||
uint32_t getIndexForAddr(uint64_t Address) const {
|
||||
uint64_t Offset = virtualAddrToOffset(Address);
|
||||
return getIndexForOffset(Offset);
|
||||
}
|
||||
|
||||
uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
|
||||
if (FrameAddr == ExternalAddr)
|
||||
return ExternalAddr;
|
||||
auto I = getIndexForAddr(FrameAddr);
|
||||
FrameAddr = I ? getAddressforIndex(I - 1) : 0;
|
||||
if (FrameAddr && addressIsCall(FrameAddr))
|
||||
return FrameAddr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
FuncRange *findFuncRangeForStartOffset(uint64_t Offset) {
|
||||
auto I = StartOffset2FuncRangeMap.find(Offset);
|
||||
if (I == StartOffset2FuncRangeMap.end())
|
||||
return nullptr;
|
||||
return &I->second;
|
||||
}
|
||||
|
||||
// Binary search the function range which includes the input offset.
|
||||
FuncRange *findFuncRangeForOffset(uint64_t Offset) {
|
||||
auto I = StartOffset2FuncRangeMap.upper_bound(Offset);
|
||||
if (I == StartOffset2FuncRangeMap.begin())
|
||||
return nullptr;
|
||||
I--;
|
||||
|
||||
if (Offset >= I->second.EndOffset)
|
||||
return nullptr;
|
||||
|
||||
return &I->second;
|
||||
}
|
||||
|
||||
// Get all ranges of one function.
|
||||
RangesTy getRangesForOffset(uint64_t Offset) {
|
||||
auto *FRange = findFuncRangeForOffset(Offset);
|
||||
// Ignore the range which falls into plt section or system lib.
|
||||
if (!FRange)
|
||||
return RangesTy();
|
||||
|
||||
return FRange->Func->Ranges;
|
||||
}
|
||||
|
||||
const std::unordered_map<std::string, BinaryFunction> &
|
||||
getAllBinaryFunctions() {
|
||||
return BinaryFunctions;
|
||||
}
|
||||
|
||||
BinaryFunction *getBinaryFunction(StringRef FName) {
|
||||
auto I = BinaryFunctions.find(FName.str());
|
||||
if (I == BinaryFunctions.end())
|
||||
return nullptr;
|
||||
return &I->second;
|
||||
}
|
||||
|
||||
uint32_t getFuncSizeForContext(SampleContext &Context) {
|
||||
return FuncSizeTracker.getFuncSizeForContext(Context);
|
||||
}
|
||||
|
||||
// Load the symbols from debug table and populate into symbol list.
|
||||
void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList);
|
||||
|
||||
const SampleContextFrameVector &
|
||||
getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
|
||||
auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
|
||||
if (I.second) {
|
||||
InstructionPointer IP(this, Offset);
|
||||
I.first->second = symbolize(IP, true, UseProbeDiscriminator);
|
||||
}
|
||||
return I.first->second;
|
||||
}
|
||||
|
||||
Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
|
||||
const auto &Stack = getFrameLocationStack(Offset);
|
||||
if (Stack.empty())
|
||||
return {};
|
||||
return Stack.back();
|
||||
}
|
||||
|
||||
// Compare two addresses' inline context
|
||||
bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
|
||||
|
||||
// Get the full context of the current stack with inline context filled in.
|
||||
// It will search the disassembling info stored in Offset2LocStackMap. This is
|
||||
// used as the key of function sample map
|
||||
SampleContextFrameVector
|
||||
getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
|
||||
bool &WasLeafInlined);
|
||||
// Go through instructions among the given range and record its size for the
|
||||
// inline context.
|
||||
void computeInlinedContextSizeForRange(uint64_t StartOffset,
|
||||
uint64_t EndOffset);
|
||||
|
||||
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
|
||||
return ProbeDecoder.getCallProbeForAddr(Address);
|
||||
}
|
||||
|
||||
void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe,
|
||||
SampleContextFrameVector &InlineContextStack,
|
||||
bool IncludeLeaf = false) const {
|
||||
SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext;
|
||||
ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext,
|
||||
IncludeLeaf);
|
||||
for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) {
|
||||
auto &Callsite = ProbeInlineContext[I];
|
||||
// Clear the current context for an unknown probe.
|
||||
if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) {
|
||||
InlineContextStack.clear();
|
||||
continue;
|
||||
}
|
||||
InlineContextStack.emplace_back(Callsite.first,
|
||||
LineLocation(Callsite.second, 0));
|
||||
}
|
||||
}
|
||||
const AddressProbesMap &getAddress2ProbesMap() const {
|
||||
return ProbeDecoder.getAddress2ProbesMap();
|
||||
}
|
||||
const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
|
||||
return ProbeDecoder.getFuncDescForGUID(GUID);
|
||||
}
|
||||
|
||||
const MCPseudoProbeFuncDesc *
|
||||
getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) {
|
||||
return ProbeDecoder.getInlinerDescForProbe(Probe);
|
||||
}
|
||||
|
||||
bool getTrackFuncContextSize() { return TrackFuncContextSize; }
|
||||
|
||||
bool getIsLoadedByMMap() { return IsLoadedByMMap; }
|
||||
|
||||
void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
|
||||
|
||||
bool getMissingMMapWarned() { return MissingMMapWarned; }
|
||||
|
||||
void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; }
|
||||
};
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
|
@ -1,164 +0,0 @@
|
|||
//===- llvm-profgen.cpp - LLVM SPGO profile generation tool -----*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// llvm-profgen generates SPGO profiles from perf script ouput.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ErrorHandling.h"
|
||||
#include "PerfReader.h"
|
||||
#include "ProfileGenerator.h"
|
||||
#include "ProfiledBinary.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/InitLLVM.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
|
||||
static cl::OptionCategory ProfGenCategory("ProfGen Options");
|
||||
|
||||
static cl::opt<std::string> PerfScriptFilename(
|
||||
"perfscript", cl::value_desc("perfscript"), cl::ZeroOrMore,
|
||||
llvm::cl::MiscFlags::CommaSeparated,
|
||||
cl::desc("Path of perf-script trace created by Linux perf tool with "
|
||||
"`script` command(the raw perf.data should be profiled with -b)"),
|
||||
cl::cat(ProfGenCategory));
|
||||
static cl::alias PSA("ps", cl::desc("Alias for --perfscript"),
|
||||
cl::aliasopt(PerfScriptFilename));
|
||||
|
||||
static cl::opt<std::string> PerfDataFilename(
|
||||
"perfdata", cl::value_desc("perfdata"), cl::ZeroOrMore,
|
||||
llvm::cl::MiscFlags::CommaSeparated,
|
||||
cl::desc("Path of raw perf data created by Linux perf tool (it should be "
|
||||
"profiled with -b)"),
|
||||
cl::cat(ProfGenCategory));
|
||||
static cl::alias PDA("pd", cl::desc("Alias for --perfdata"),
|
||||
cl::aliasopt(PerfDataFilename));
|
||||
|
||||
static cl::opt<std::string> UnsymbolizedProfFilename(
|
||||
"unsymbolized-profile", cl::value_desc("unsymbolized profile"),
|
||||
cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
|
||||
cl::desc("Path of the unsymbolized profile created by "
|
||||
"`llvm-profgen` with `--skip-symbolization`"),
|
||||
cl::cat(ProfGenCategory));
|
||||
static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"),
|
||||
cl::aliasopt(UnsymbolizedProfFilename));
|
||||
|
||||
static cl::opt<std::string>
|
||||
BinaryPath("binary", cl::value_desc("binary"), cl::Required,
|
||||
cl::desc("Path of profiled executable binary."),
|
||||
cl::cat(ProfGenCategory));
|
||||
|
||||
static cl::opt<std::string> DebugBinPath(
|
||||
"debug-binary", cl::value_desc("debug-binary"), cl::ZeroOrMore,
|
||||
cl::desc("Path of debug info binary, llvm-profgen will load the DWARF info "
|
||||
"from it instead of the executable binary."),
|
||||
cl::cat(ProfGenCategory));
|
||||
|
||||
extern cl::opt<bool> ShowDisassemblyOnly;
|
||||
extern cl::opt<bool> ShowSourceLocations;
|
||||
extern cl::opt<bool> SkipSymbolization;
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
// Validate the command line input.
|
||||
static void validateCommandLine() {
|
||||
// Allow the missing perfscript if we only use to show binary disassembly.
|
||||
if (!ShowDisassemblyOnly) {
|
||||
// Validate input profile is provided only once
|
||||
uint16_t HasPerfData = PerfDataFilename.getNumOccurrences();
|
||||
uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences();
|
||||
uint16_t HasUnsymbolizedProfile =
|
||||
UnsymbolizedProfFilename.getNumOccurrences();
|
||||
uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile;
|
||||
if (S != 1) {
|
||||
std::string Msg =
|
||||
S > 1
|
||||
? "`--perfscript`, `--perfdata` and `--unsymbolized-profile` "
|
||||
"cannot be used together."
|
||||
: "Perf input file is missing, please use one of `--perfscript`, "
|
||||
"`--perfdata` and `--unsymbolized-profile` for the input.";
|
||||
exitWithError(Msg);
|
||||
}
|
||||
|
||||
auto CheckFileExists = [](bool H, StringRef File) {
|
||||
if (H && !llvm::sys::fs::exists(File)) {
|
||||
std::string Msg = "Input perf file(" + File.str() + ") doesn't exist.";
|
||||
exitWithError(Msg);
|
||||
}
|
||||
};
|
||||
|
||||
CheckFileExists(HasPerfData, PerfDataFilename);
|
||||
CheckFileExists(HasPerfScript, PerfScriptFilename);
|
||||
CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename);
|
||||
}
|
||||
|
||||
if (!llvm::sys::fs::exists(BinaryPath)) {
|
||||
std::string Msg = "Input binary(" + BinaryPath + ") doesn't exist.";
|
||||
exitWithError(Msg);
|
||||
}
|
||||
|
||||
if (CSProfileGenerator::MaxCompressionSize < -1) {
|
||||
exitWithError("Value of --compress-recursion should >= -1");
|
||||
}
|
||||
if (ShowSourceLocations && !ShowDisassemblyOnly) {
|
||||
exitWithError("--show-source-locations should work together with "
|
||||
"--show-disassembly-only!");
|
||||
}
|
||||
}
|
||||
|
||||
static PerfInputFile getPerfInputFile() {
|
||||
PerfInputFile File;
|
||||
if (PerfDataFilename.getNumOccurrences()) {
|
||||
File.InputFile = PerfDataFilename;
|
||||
File.Format = PerfFormat::PerfData;
|
||||
} else if (PerfScriptFilename.getNumOccurrences()) {
|
||||
File.InputFile = PerfScriptFilename;
|
||||
File.Format = PerfFormat::PerfScript;
|
||||
} else if (UnsymbolizedProfFilename.getNumOccurrences()) {
|
||||
File.InputFile = UnsymbolizedProfFilename;
|
||||
File.Format = PerfFormat::UnsymbolizedProfile;
|
||||
}
|
||||
return File;
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
InitLLVM X(argc, argv);
|
||||
|
||||
// Initialize targets and assembly printers/parsers.
|
||||
InitializeAllTargetInfos();
|
||||
InitializeAllTargetMCs();
|
||||
InitializeAllDisassemblers();
|
||||
|
||||
cl::HideUnrelatedOptions({&ProfGenCategory, &getColorCategory()});
|
||||
cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");
|
||||
validateCommandLine();
|
||||
|
||||
// Load symbols and disassemble the code of a given binary.
|
||||
std::unique_ptr<ProfiledBinary> Binary =
|
||||
std::make_unique<ProfiledBinary>(BinaryPath, DebugBinPath);
|
||||
if (ShowDisassemblyOnly)
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
PerfInputFile PerfFile = getPerfInputFile();
|
||||
std::unique_ptr<PerfReaderBase> Reader =
|
||||
PerfReaderBase::create(Binary.get(), PerfFile);
|
||||
// Parse perf events and samples
|
||||
Reader->parsePerfTraces();
|
||||
|
||||
if (SkipSymbolization)
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
std::unique_ptr<ProfileGeneratorBase> Generator =
|
||||
ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(),
|
||||
Reader->profileIsCSFlat());
|
||||
Generator->generateProfile();
|
||||
Generator->write();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -1,891 +0,0 @@
|
|||
//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// FileCheck does a line-by line check of a file that validates whether it
|
||||
// contains the expected content. This is useful for regression tests etc.
|
||||
//
|
||||
// This program exits with an exit status of 2 on error, exit status of 0 if
|
||||
// the file matched the expected contents, and exit status of 1 if it did not
|
||||
// contain the expected contents.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/FileCheck/FileCheck.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/InitLLVM.h"
|
||||
#include "llvm/Support/Process.h"
|
||||
#include "llvm/Support/WithColor.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
using namespace llvm;
|
||||
|
||||
static cl::extrahelp FileCheckOptsEnv(
|
||||
"\nOptions are parsed from the environment variable FILECHECK_OPTS and\n"
|
||||
"from the command line.\n");
|
||||
|
||||
static cl::opt<std::string>
|
||||
CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Optional);
|
||||
|
||||
static cl::opt<std::string>
|
||||
InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
|
||||
cl::init("-"), cl::value_desc("filename"));
|
||||
|
||||
static cl::list<std::string> CheckPrefixes(
|
||||
"check-prefix",
|
||||
cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
|
||||
static cl::alias CheckPrefixesAlias(
|
||||
"check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
|
||||
cl::NotHidden,
|
||||
cl::desc(
|
||||
"Alias for -check-prefix permitting multiple comma separated values"));
|
||||
|
||||
static cl::list<std::string> CommentPrefixes(
|
||||
"comment-prefixes", cl::CommaSeparated, cl::Hidden,
|
||||
cl::desc("Comma-separated list of comment prefixes to use from check file\n"
|
||||
"(defaults to 'COM,RUN'). Please avoid using this feature in\n"
|
||||
"LLVM's LIT-based test suites, which should be easier to\n"
|
||||
"maintain if they all follow a consistent comment style. This\n"
|
||||
"feature is meant for non-LIT test suites using FileCheck."));
|
||||
|
||||
static cl::opt<bool> NoCanonicalizeWhiteSpace(
|
||||
"strict-whitespace",
|
||||
cl::desc("Do not treat all horizontal whitespace as equivalent"));
|
||||
|
||||
static cl::opt<bool> IgnoreCase(
|
||||
"ignore-case",
|
||||
cl::desc("Use case-insensitive matching"));
|
||||
|
||||
static cl::list<std::string> ImplicitCheckNot(
|
||||
"implicit-check-not",
|
||||
cl::desc("Add an implicit negative check with this pattern to every\n"
|
||||
"positive check. This can be used to ensure that no instances of\n"
|
||||
"this pattern occur which are not matched by a positive pattern"),
|
||||
cl::value_desc("pattern"));
|
||||
|
||||
static cl::list<std::string>
|
||||
GlobalDefines("D", cl::AlwaysPrefix,
|
||||
cl::desc("Define a variable to be used in capture patterns."),
|
||||
cl::value_desc("VAR=VALUE"));
|
||||
|
||||
static cl::opt<bool> AllowEmptyInput(
|
||||
"allow-empty", cl::init(false),
|
||||
cl::desc("Allow the input file to be empty. This is useful when making\n"
|
||||
"checks that some error message does not occur, for example."));
|
||||
|
||||
static cl::opt<bool> AllowUnusedPrefixes(
|
||||
"allow-unused-prefixes", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Allow prefixes to be specified but not appear in the test."));
|
||||
|
||||
static cl::opt<bool> MatchFullLines(
|
||||
"match-full-lines", cl::init(false),
|
||||
cl::desc("Require all positive matches to cover an entire input line.\n"
|
||||
"Allows leading and trailing whitespace if --strict-whitespace\n"
|
||||
"is not also passed."));
|
||||
|
||||
static cl::opt<bool> EnableVarScope(
|
||||
"enable-var-scope", cl::init(false),
|
||||
cl::desc("Enables scope for regex variables. Variables with names that\n"
|
||||
"do not start with '$' will be reset at the beginning of\n"
|
||||
"each CHECK-LABEL block."));
|
||||
|
||||
static cl::opt<bool> AllowDeprecatedDagOverlap(
|
||||
"allow-deprecated-dag-overlap", cl::init(false),
|
||||
cl::desc("Enable overlapping among matches in a group of consecutive\n"
|
||||
"CHECK-DAG directives. This option is deprecated and is only\n"
|
||||
"provided for convenience as old tests are migrated to the new\n"
|
||||
"non-overlapping CHECK-DAG implementation.\n"));
|
||||
|
||||
static cl::opt<bool> Verbose(
|
||||
"v", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Print directive pattern matches, or add them to the input dump\n"
|
||||
"if enabled.\n"));
|
||||
|
||||
static cl::opt<bool> VerboseVerbose(
|
||||
"vv", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Print information helpful in diagnosing internal FileCheck\n"
|
||||
"issues, or add it to the input dump if enabled. Implies\n"
|
||||
"-v.\n"));
|
||||
|
||||
// The order of DumpInputValue members affects their precedence, as documented
|
||||
// for -dump-input below.
|
||||
enum DumpInputValue {
|
||||
DumpInputNever,
|
||||
DumpInputFail,
|
||||
DumpInputAlways,
|
||||
DumpInputHelp
|
||||
};
|
||||
|
||||
static cl::list<DumpInputValue> DumpInputs(
|
||||
"dump-input",
|
||||
cl::desc("Dump input to stderr, adding annotations representing\n"
|
||||
"currently enabled diagnostics. When there are multiple\n"
|
||||
"occurrences of this option, the <value> that appears earliest\n"
|
||||
"in the list below has precedence. The default is 'fail'.\n"),
|
||||
cl::value_desc("mode"),
|
||||
cl::values(clEnumValN(DumpInputHelp, "help", "Explain input dump and quit"),
|
||||
clEnumValN(DumpInputAlways, "always", "Always dump input"),
|
||||
clEnumValN(DumpInputFail, "fail", "Dump input on failure"),
|
||||
clEnumValN(DumpInputNever, "never", "Never dump input")));
|
||||
|
||||
// The order of DumpInputFilterValue members affects their precedence, as
|
||||
// documented for -dump-input-filter below.
|
||||
enum DumpInputFilterValue {
|
||||
DumpInputFilterError,
|
||||
DumpInputFilterAnnotation,
|
||||
DumpInputFilterAnnotationFull,
|
||||
DumpInputFilterAll
|
||||
};
|
||||
|
||||
static cl::list<DumpInputFilterValue> DumpInputFilters(
|
||||
"dump-input-filter",
|
||||
cl::desc("In the dump requested by -dump-input, print only input lines of\n"
|
||||
"kind <value> plus any context specified by -dump-input-context.\n"
|
||||
"When there are multiple occurrences of this option, the <value>\n"
|
||||
"that appears earliest in the list below has precedence. The\n"
|
||||
"default is 'error' when -dump-input=fail, and it's 'all' when\n"
|
||||
"-dump-input=always.\n"),
|
||||
cl::values(clEnumValN(DumpInputFilterAll, "all", "All input lines"),
|
||||
clEnumValN(DumpInputFilterAnnotationFull, "annotation-full",
|
||||
"Input lines with annotations"),
|
||||
clEnumValN(DumpInputFilterAnnotation, "annotation",
|
||||
"Input lines with starting points of annotations"),
|
||||
clEnumValN(DumpInputFilterError, "error",
|
||||
"Input lines with starting points of error "
|
||||
"annotations")));
|
||||
|
||||
static cl::list<unsigned> DumpInputContexts(
|
||||
"dump-input-context", cl::value_desc("N"),
|
||||
cl::desc("In the dump requested by -dump-input, print <N> input lines\n"
|
||||
"before and <N> input lines after any lines specified by\n"
|
||||
"-dump-input-filter. When there are multiple occurrences of\n"
|
||||
"this option, the largest specified <N> has precedence. The\n"
|
||||
"default is 5.\n"));
|
||||
|
||||
typedef cl::list<std::string>::const_iterator prefix_iterator;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void DumpCommandLine(int argc, char **argv) {
|
||||
errs() << "FileCheck command line: ";
|
||||
for (int I = 0; I < argc; I++)
|
||||
errs() << " " << argv[I];
|
||||
errs() << "\n";
|
||||
}
|
||||
|
||||
struct MarkerStyle {
|
||||
/// The starting char (before tildes) for marking the line.
|
||||
char Lead;
|
||||
/// What color to use for this annotation.
|
||||
raw_ostream::Colors Color;
|
||||
/// A note to follow the marker, or empty string if none.
|
||||
std::string Note;
|
||||
/// Does this marker indicate inclusion by -dump-input-filter=error?
|
||||
bool FiltersAsError;
|
||||
MarkerStyle() {}
|
||||
MarkerStyle(char Lead, raw_ostream::Colors Color,
|
||||
const std::string &Note = "", bool FiltersAsError = false)
|
||||
: Lead(Lead), Color(Color), Note(Note), FiltersAsError(FiltersAsError) {
|
||||
assert((!FiltersAsError || !Note.empty()) &&
|
||||
"expected error diagnostic to have note");
|
||||
}
|
||||
};
|
||||
|
||||
static MarkerStyle GetMarker(FileCheckDiag::MatchType MatchTy) {
|
||||
switch (MatchTy) {
|
||||
case FileCheckDiag::MatchFoundAndExpected:
|
||||
return MarkerStyle('^', raw_ostream::GREEN);
|
||||
case FileCheckDiag::MatchFoundButExcluded:
|
||||
return MarkerStyle('!', raw_ostream::RED, "error: no match expected",
|
||||
/*FiltersAsError=*/true);
|
||||
case FileCheckDiag::MatchFoundButWrongLine:
|
||||
return MarkerStyle('!', raw_ostream::RED, "error: match on wrong line",
|
||||
/*FiltersAsError=*/true);
|
||||
case FileCheckDiag::MatchFoundButDiscarded:
|
||||
return MarkerStyle('!', raw_ostream::CYAN,
|
||||
"discard: overlaps earlier match");
|
||||
case FileCheckDiag::MatchFoundErrorNote:
|
||||
// Note should always be overridden within the FileCheckDiag.
|
||||
return MarkerStyle('!', raw_ostream::RED,
|
||||
"error: unknown error after match",
|
||||
/*FiltersAsError=*/true);
|
||||
case FileCheckDiag::MatchNoneAndExcluded:
|
||||
return MarkerStyle('X', raw_ostream::GREEN);
|
||||
case FileCheckDiag::MatchNoneButExpected:
|
||||
return MarkerStyle('X', raw_ostream::RED, "error: no match found",
|
||||
/*FiltersAsError=*/true);
|
||||
case FileCheckDiag::MatchNoneForInvalidPattern:
|
||||
return MarkerStyle('X', raw_ostream::RED,
|
||||
"error: match failed for invalid pattern",
|
||||
/*FiltersAsError=*/true);
|
||||
case FileCheckDiag::MatchFuzzy:
|
||||
return MarkerStyle('?', raw_ostream::MAGENTA, "possible intended match",
|
||||
/*FiltersAsError=*/true);
|
||||
}
|
||||
llvm_unreachable_internal("unexpected match type");
|
||||
}
|
||||
|
||||
static void DumpInputAnnotationHelp(raw_ostream &OS) {
|
||||
OS << "The following description was requested by -dump-input=help to\n"
|
||||
<< "explain the input dump printed by FileCheck.\n"
|
||||
<< "\n"
|
||||
<< "Related command-line options:\n"
|
||||
<< "\n"
|
||||
<< " - -dump-input=<value> enables or disables the input dump\n"
|
||||
<< " - -dump-input-filter=<value> filters the input lines\n"
|
||||
<< " - -dump-input-context=<N> adjusts the context of filtered lines\n"
|
||||
<< " - -v and -vv add more annotations\n"
|
||||
<< " - -color forces colors to be enabled both in the dump and below\n"
|
||||
<< " - -help documents the above options in more detail\n"
|
||||
<< "\n"
|
||||
<< "These options can also be set via FILECHECK_OPTS. For example, for\n"
|
||||
<< "maximum debugging output on failures:\n"
|
||||
<< "\n"
|
||||
<< " $ FILECHECK_OPTS='-dump-input-filter=all -vv -color' ninja check\n"
|
||||
<< "\n"
|
||||
<< "Input dump annotation format:\n"
|
||||
<< "\n";
|
||||
|
||||
// Labels for input lines.
|
||||
OS << " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "L:";
|
||||
OS << " labels line number L of the input file\n"
|
||||
<< " An extra space is added after each input line to represent"
|
||||
<< " the\n"
|
||||
<< " newline character\n";
|
||||
|
||||
// Labels for annotation lines.
|
||||
OS << " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L";
|
||||
OS << " labels the only match result for either (1) a pattern of type T"
|
||||
<< " from\n"
|
||||
<< " line L of the check file if L is an integer or (2) the"
|
||||
<< " I-th implicit\n"
|
||||
<< " pattern if L is \"imp\" followed by an integer "
|
||||
<< "I (index origin one)\n";
|
||||
OS << " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L'N";
|
||||
OS << " labels the Nth match result for such a pattern\n";
|
||||
|
||||
// Markers on annotation lines.
|
||||
OS << " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "^~~";
|
||||
OS << " marks good match (reported if -v)\n"
|
||||
<< " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "!~~";
|
||||
OS << " marks bad match, such as:\n"
|
||||
<< " - CHECK-NEXT on same line as previous match (error)\n"
|
||||
<< " - CHECK-NOT found (error)\n"
|
||||
<< " - CHECK-DAG overlapping match (discarded, reported if "
|
||||
<< "-vv)\n"
|
||||
<< " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "X~~";
|
||||
OS << " marks search range when no match is found, such as:\n"
|
||||
<< " - CHECK-NEXT not found (error)\n"
|
||||
<< " - CHECK-NOT not found (success, reported if -vv)\n"
|
||||
<< " - CHECK-DAG not found after discarded matches (error)\n"
|
||||
<< " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "?";
|
||||
OS << " marks fuzzy match when no match is found\n";
|
||||
|
||||
// Elided lines.
|
||||
OS << " - ";
|
||||
WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "...";
|
||||
OS << " indicates elided input lines and annotations, as specified by\n"
|
||||
<< " -dump-input-filter and -dump-input-context\n";
|
||||
|
||||
// Colors.
|
||||
OS << " - colors ";
|
||||
WithColor(OS, raw_ostream::GREEN, true) << "success";
|
||||
OS << ", ";
|
||||
WithColor(OS, raw_ostream::RED, true) << "error";
|
||||
OS << ", ";
|
||||
WithColor(OS, raw_ostream::MAGENTA, true) << "fuzzy match";
|
||||
OS << ", ";
|
||||
WithColor(OS, raw_ostream::CYAN, true, false) << "discarded match";
|
||||
OS << ", ";
|
||||
WithColor(OS, raw_ostream::CYAN, true, true) << "unmatched input";
|
||||
OS << "\n";
|
||||
}
|
||||
|
||||
/// An annotation for a single input line.
|
||||
struct InputAnnotation {
|
||||
/// The index of the match result across all checks
|
||||
unsigned DiagIndex;
|
||||
/// The label for this annotation.
|
||||
std::string Label;
|
||||
/// Is this the initial fragment of a diagnostic that has been broken across
|
||||
/// multiple lines?
|
||||
bool IsFirstLine;
|
||||
/// What input line (one-origin indexing) this annotation marks. This might
|
||||
/// be different from the starting line of the original diagnostic if
|
||||
/// !IsFirstLine.
|
||||
unsigned InputLine;
|
||||
/// The column range (one-origin indexing, open end) in which to mark the
|
||||
/// input line. If InputEndCol is UINT_MAX, treat it as the last column
|
||||
/// before the newline.
|
||||
unsigned InputStartCol, InputEndCol;
|
||||
/// The marker to use.
|
||||
MarkerStyle Marker;
|
||||
/// Whether this annotation represents a good match for an expected pattern.
|
||||
bool FoundAndExpectedMatch;
|
||||
};
|
||||
|
||||
/// Get an abbreviation for the check type.
|
||||
static std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) {
|
||||
switch (Ty) {
|
||||
case Check::CheckPlain:
|
||||
if (Ty.getCount() > 1)
|
||||
return "count";
|
||||
return "check";
|
||||
case Check::CheckNext:
|
||||
return "next";
|
||||
case Check::CheckSame:
|
||||
return "same";
|
||||
case Check::CheckNot:
|
||||
return "not";
|
||||
case Check::CheckDAG:
|
||||
return "dag";
|
||||
case Check::CheckLabel:
|
||||
return "label";
|
||||
case Check::CheckEmpty:
|
||||
return "empty";
|
||||
case Check::CheckComment:
|
||||
return "com";
|
||||
case Check::CheckEOF:
|
||||
return "eof";
|
||||
case Check::CheckBadNot:
|
||||
return "bad-not";
|
||||
case Check::CheckBadCount:
|
||||
return "bad-count";
|
||||
case Check::CheckNone:
|
||||
llvm_unreachable("invalid FileCheckType");
|
||||
}
|
||||
llvm_unreachable("unknown FileCheckType");
|
||||
}
|
||||
|
||||
static void
|
||||
BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
|
||||
const std::pair<unsigned, unsigned> &ImpPatBufferIDRange,
|
||||
const std::vector<FileCheckDiag> &Diags,
|
||||
std::vector<InputAnnotation> &Annotations,
|
||||
unsigned &LabelWidth) {
|
||||
struct CompareSMLoc {
|
||||
bool operator()(const SMLoc &LHS, const SMLoc &RHS) const {
|
||||
return LHS.getPointer() < RHS.getPointer();
|
||||
}
|
||||
};
|
||||
// How many diagnostics does each pattern have?
|
||||
std::map<SMLoc, unsigned, CompareSMLoc> DiagCountPerPattern;
|
||||
for (auto Diag : Diags)
|
||||
++DiagCountPerPattern[Diag.CheckLoc];
|
||||
// How many diagnostics have we seen so far per pattern?
|
||||
std::map<SMLoc, unsigned, CompareSMLoc> DiagIndexPerPattern;
|
||||
// How many total diagnostics have we seen so far?
|
||||
unsigned DiagIndex = 0;
|
||||
// What's the widest label?
|
||||
LabelWidth = 0;
|
||||
for (auto DiagItr = Diags.begin(), DiagEnd = Diags.end(); DiagItr != DiagEnd;
|
||||
++DiagItr) {
|
||||
InputAnnotation A;
|
||||
A.DiagIndex = DiagIndex++;
|
||||
|
||||
// Build label, which uniquely identifies this check result.
|
||||
unsigned CheckBufferID = SM.FindBufferContainingLoc(DiagItr->CheckLoc);
|
||||
auto CheckLineAndCol =
|
||||
SM.getLineAndColumn(DiagItr->CheckLoc, CheckBufferID);
|
||||
llvm::raw_string_ostream Label(A.Label);
|
||||
Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":";
|
||||
if (CheckBufferID == CheckFileBufferID)
|
||||
Label << CheckLineAndCol.first;
|
||||
else if (ImpPatBufferIDRange.first <= CheckBufferID &&
|
||||
CheckBufferID < ImpPatBufferIDRange.second)
|
||||
Label << "imp" << (CheckBufferID - ImpPatBufferIDRange.first + 1);
|
||||
else
|
||||
llvm_unreachable("expected diagnostic's check location to be either in "
|
||||
"the check file or for an implicit pattern");
|
||||
if (DiagCountPerPattern[DiagItr->CheckLoc] > 1)
|
||||
Label << "'" << DiagIndexPerPattern[DiagItr->CheckLoc]++;
|
||||
LabelWidth = std::max((std::string::size_type)LabelWidth, A.Label.size());
|
||||
|
||||
A.Marker = GetMarker(DiagItr->MatchTy);
|
||||
if (!DiagItr->Note.empty()) {
|
||||
A.Marker.Note = DiagItr->Note;
|
||||
// It's less confusing if notes that don't actually have ranges don't have
|
||||
// markers. For example, a marker for 'with "VAR" equal to "5"' would
|
||||
// seem to indicate where "VAR" matches, but the location we actually have
|
||||
// for the marker simply points to the start of the match/search range for
|
||||
// the full pattern of which the substitution is potentially just one
|
||||
// component.
|
||||
if (DiagItr->InputStartLine == DiagItr->InputEndLine &&
|
||||
DiagItr->InputStartCol == DiagItr->InputEndCol)
|
||||
A.Marker.Lead = ' ';
|
||||
}
|
||||
if (DiagItr->MatchTy == FileCheckDiag::MatchFoundErrorNote) {
|
||||
assert(!DiagItr->Note.empty() &&
|
||||
"expected custom note for MatchFoundErrorNote");
|
||||
A.Marker.Note = "error: " + A.Marker.Note;
|
||||
}
|
||||
A.FoundAndExpectedMatch =
|
||||
DiagItr->MatchTy == FileCheckDiag::MatchFoundAndExpected;
|
||||
|
||||
// Compute the mark location, and break annotation into multiple
|
||||
// annotations if it spans multiple lines.
|
||||
A.IsFirstLine = true;
|
||||
A.InputLine = DiagItr->InputStartLine;
|
||||
A.InputStartCol = DiagItr->InputStartCol;
|
||||
if (DiagItr->InputStartLine == DiagItr->InputEndLine) {
|
||||
// Sometimes ranges are empty in order to indicate a specific point, but
|
||||
// that would mean nothing would be marked, so adjust the range to
|
||||
// include the following character.
|
||||
A.InputEndCol =
|
||||
std::max(DiagItr->InputStartCol + 1, DiagItr->InputEndCol);
|
||||
Annotations.push_back(A);
|
||||
} else {
|
||||
assert(DiagItr->InputStartLine < DiagItr->InputEndLine &&
|
||||
"expected input range not to be inverted");
|
||||
A.InputEndCol = UINT_MAX;
|
||||
Annotations.push_back(A);
|
||||
for (unsigned L = DiagItr->InputStartLine + 1, E = DiagItr->InputEndLine;
|
||||
L <= E; ++L) {
|
||||
// If a range ends before the first column on a line, then it has no
|
||||
// characters on that line, so there's nothing to render.
|
||||
if (DiagItr->InputEndCol == 1 && L == E)
|
||||
break;
|
||||
InputAnnotation B;
|
||||
B.DiagIndex = A.DiagIndex;
|
||||
B.Label = A.Label;
|
||||
B.IsFirstLine = false;
|
||||
B.InputLine = L;
|
||||
B.Marker = A.Marker;
|
||||
B.Marker.Lead = '~';
|
||||
B.Marker.Note = "";
|
||||
B.InputStartCol = 1;
|
||||
if (L != E)
|
||||
B.InputEndCol = UINT_MAX;
|
||||
else
|
||||
B.InputEndCol = DiagItr->InputEndCol;
|
||||
B.FoundAndExpectedMatch = A.FoundAndExpectedMatch;
|
||||
Annotations.push_back(B);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned FindInputLineInFilter(
|
||||
DumpInputFilterValue DumpInputFilter, unsigned CurInputLine,
|
||||
const std::vector<InputAnnotation>::iterator &AnnotationBeg,
|
||||
const std::vector<InputAnnotation>::iterator &AnnotationEnd) {
|
||||
if (DumpInputFilter == DumpInputFilterAll)
|
||||
return CurInputLine;
|
||||
for (auto AnnotationItr = AnnotationBeg; AnnotationItr != AnnotationEnd;
|
||||
++AnnotationItr) {
|
||||
switch (DumpInputFilter) {
|
||||
case DumpInputFilterAll:
|
||||
llvm_unreachable("unexpected DumpInputFilterAll");
|
||||
break;
|
||||
case DumpInputFilterAnnotationFull:
|
||||
return AnnotationItr->InputLine;
|
||||
case DumpInputFilterAnnotation:
|
||||
if (AnnotationItr->IsFirstLine)
|
||||
return AnnotationItr->InputLine;
|
||||
break;
|
||||
case DumpInputFilterError:
|
||||
if (AnnotationItr->IsFirstLine && AnnotationItr->Marker.FiltersAsError)
|
||||
return AnnotationItr->InputLine;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return UINT_MAX;
|
||||
}
|
||||
|
||||
/// To OS, print a vertical ellipsis (right-justified at LabelWidth) if it would
|
||||
/// occupy less lines than ElidedLines, but print ElidedLines otherwise. Either
|
||||
/// way, clear ElidedLines. Thus, if ElidedLines is empty, do nothing.
|
||||
static void DumpEllipsisOrElidedLines(raw_ostream &OS, std::string &ElidedLines,
|
||||
unsigned LabelWidth) {
|
||||
if (ElidedLines.empty())
|
||||
return;
|
||||
unsigned EllipsisLines = 3;
|
||||
if (EllipsisLines < StringRef(ElidedLines).count('\n')) {
|
||||
for (unsigned i = 0; i < EllipsisLines; ++i) {
|
||||
WithColor(OS, raw_ostream::BLACK, /*Bold=*/true)
|
||||
<< right_justify(".", LabelWidth);
|
||||
OS << '\n';
|
||||
}
|
||||
} else
|
||||
OS << ElidedLines;
|
||||
ElidedLines.clear();
|
||||
}
|
||||
|
||||
static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req,
|
||||
DumpInputFilterValue DumpInputFilter,
|
||||
unsigned DumpInputContext,
|
||||
StringRef InputFileText,
|
||||
std::vector<InputAnnotation> &Annotations,
|
||||
unsigned LabelWidth) {
|
||||
OS << "Input was:\n<<<<<<\n";
|
||||
|
||||
// Sort annotations.
|
||||
llvm::sort(Annotations,
|
||||
[](const InputAnnotation &A, const InputAnnotation &B) {
|
||||
// 1. Sort annotations in the order of the input lines.
|
||||
//
|
||||
// This makes it easier to find relevant annotations while
|
||||
// iterating input lines in the implementation below. FileCheck
|
||||
// does not always produce diagnostics in the order of input
|
||||
// lines due to, for example, CHECK-DAG and CHECK-NOT.
|
||||
if (A.InputLine != B.InputLine)
|
||||
return A.InputLine < B.InputLine;
|
||||
// 2. Sort annotations in the temporal order FileCheck produced
|
||||
// their associated diagnostics.
|
||||
//
|
||||
// This sort offers several benefits:
|
||||
//
|
||||
// A. On a single input line, the order of annotations reflects
|
||||
// the FileCheck logic for processing directives/patterns.
|
||||
// This can be helpful in understanding cases in which the
|
||||
// order of the associated directives/patterns in the check
|
||||
// file or on the command line either (i) does not match the
|
||||
// temporal order in which FileCheck looks for matches for the
|
||||
// directives/patterns (due to, for example, CHECK-LABEL,
|
||||
// CHECK-NOT, or `--implicit-check-not`) or (ii) does match
|
||||
// that order but does not match the order of those
|
||||
// diagnostics along an input line (due to, for example,
|
||||
// CHECK-DAG).
|
||||
//
|
||||
// On the other hand, because our presentation format presents
|
||||
// input lines in order, there's no clear way to offer the
|
||||
// same benefit across input lines. For consistency, it might
|
||||
// then seem worthwhile to have annotations on a single line
|
||||
// also sorted in input order (that is, by input column).
|
||||
// However, in practice, this appears to be more confusing
|
||||
// than helpful. Perhaps it's intuitive to expect annotations
|
||||
// to be listed in the temporal order in which they were
|
||||
// produced except in cases the presentation format obviously
|
||||
// and inherently cannot support it (that is, across input
|
||||
// lines).
|
||||
//
|
||||
// B. When diagnostics' annotations are split among multiple
|
||||
// input lines, the user must track them from one input line
|
||||
// to the next. One property of the sort chosen here is that
|
||||
// it facilitates the user in this regard by ensuring the
|
||||
// following: when comparing any two input lines, a
|
||||
// diagnostic's annotations are sorted in the same position
|
||||
// relative to all other diagnostics' annotations.
|
||||
return A.DiagIndex < B.DiagIndex;
|
||||
});
|
||||
|
||||
// Compute the width of the label column.
|
||||
const unsigned char *InputFilePtr = InputFileText.bytes_begin(),
|
||||
*InputFileEnd = InputFileText.bytes_end();
|
||||
unsigned LineCount = InputFileText.count('\n');
|
||||
if (InputFileEnd[-1] != '\n')
|
||||
++LineCount;
|
||||
unsigned LineNoWidth = std::log10(LineCount) + 1;
|
||||
// +3 below adds spaces (1) to the left of the (right-aligned) line numbers
|
||||
// on input lines and (2) to the right of the (left-aligned) labels on
|
||||
// annotation lines so that input lines and annotation lines are more
|
||||
// visually distinct. For example, the spaces on the annotation lines ensure
|
||||
// that input line numbers and check directive line numbers never align
|
||||
// horizontally. Those line numbers might not even be for the same file.
|
||||
// One space would be enough to achieve that, but more makes it even easier
|
||||
// to see.
|
||||
LabelWidth = std::max(LabelWidth, LineNoWidth) + 3;
|
||||
|
||||
// Print annotated input lines.
|
||||
unsigned PrevLineInFilter = 0; // 0 means none so far
|
||||
unsigned NextLineInFilter = 0; // 0 means uncomputed, UINT_MAX means none
|
||||
std::string ElidedLines;
|
||||
raw_string_ostream ElidedLinesOS(ElidedLines);
|
||||
ColorMode TheColorMode =
|
||||
WithColor(OS).colorsEnabled() ? ColorMode::Enable : ColorMode::Disable;
|
||||
if (TheColorMode == ColorMode::Enable)
|
||||
ElidedLinesOS.enable_colors(true);
|
||||
auto AnnotationItr = Annotations.begin(), AnnotationEnd = Annotations.end();
|
||||
for (unsigned Line = 1;
|
||||
InputFilePtr != InputFileEnd || AnnotationItr != AnnotationEnd;
|
||||
++Line) {
|
||||
const unsigned char *InputFileLine = InputFilePtr;
|
||||
|
||||
// Compute the previous and next line included by the filter.
|
||||
if (NextLineInFilter < Line)
|
||||
NextLineInFilter = FindInputLineInFilter(DumpInputFilter, Line,
|
||||
AnnotationItr, AnnotationEnd);
|
||||
assert(NextLineInFilter && "expected NextLineInFilter to be computed");
|
||||
if (NextLineInFilter == Line)
|
||||
PrevLineInFilter = Line;
|
||||
|
||||
// Elide this input line and its annotations if it's not within the
|
||||
// context specified by -dump-input-context of an input line included by
|
||||
// -dump-input-filter. However, in case the resulting ellipsis would occupy
|
||||
// more lines than the input lines and annotations it elides, buffer the
|
||||
// elided lines and annotations so we can print them instead.
|
||||
raw_ostream *LineOS = &OS;
|
||||
if ((!PrevLineInFilter || PrevLineInFilter + DumpInputContext < Line) &&
|
||||
(NextLineInFilter == UINT_MAX ||
|
||||
Line + DumpInputContext < NextLineInFilter))
|
||||
LineOS = &ElidedLinesOS;
|
||||
else {
|
||||
LineOS = &OS;
|
||||
DumpEllipsisOrElidedLines(OS, ElidedLinesOS.str(), LabelWidth);
|
||||
}
|
||||
|
||||
// Print right-aligned line number.
|
||||
WithColor(*LineOS, raw_ostream::BLACK, /*Bold=*/true, /*BF=*/false,
|
||||
TheColorMode)
|
||||
<< format_decimal(Line, LabelWidth) << ": ";
|
||||
|
||||
// For the case where -v and colors are enabled, find the annotations for
|
||||
// good matches for expected patterns in order to highlight everything
|
||||
// else in the line. There are no such annotations if -v is disabled.
|
||||
std::vector<InputAnnotation> FoundAndExpectedMatches;
|
||||
if (Req.Verbose && TheColorMode == ColorMode::Enable) {
|
||||
for (auto I = AnnotationItr; I != AnnotationEnd && I->InputLine == Line;
|
||||
++I) {
|
||||
if (I->FoundAndExpectedMatch)
|
||||
FoundAndExpectedMatches.push_back(*I);
|
||||
}
|
||||
}
|
||||
|
||||
// Print numbered line with highlighting where there are no matches for
|
||||
// expected patterns.
|
||||
bool Newline = false;
|
||||
{
|
||||
WithColor COS(*LineOS, raw_ostream::SAVEDCOLOR, /*Bold=*/false,
|
||||
/*BG=*/false, TheColorMode);
|
||||
bool InMatch = false;
|
||||
if (Req.Verbose)
|
||||
COS.changeColor(raw_ostream::CYAN, true, true);
|
||||
for (unsigned Col = 1; InputFilePtr != InputFileEnd && !Newline; ++Col) {
|
||||
bool WasInMatch = InMatch;
|
||||
InMatch = false;
|
||||
for (auto M : FoundAndExpectedMatches) {
|
||||
if (M.InputStartCol <= Col && Col < M.InputEndCol) {
|
||||
InMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!WasInMatch && InMatch)
|
||||
COS.resetColor();
|
||||
else if (WasInMatch && !InMatch)
|
||||
COS.changeColor(raw_ostream::CYAN, true, true);
|
||||
if (*InputFilePtr == '\n') {
|
||||
Newline = true;
|
||||
COS << ' ';
|
||||
} else
|
||||
COS << *InputFilePtr;
|
||||
++InputFilePtr;
|
||||
}
|
||||
}
|
||||
*LineOS << '\n';
|
||||
unsigned InputLineWidth = InputFilePtr - InputFileLine;
|
||||
|
||||
// Print any annotations.
|
||||
while (AnnotationItr != AnnotationEnd &&
|
||||
AnnotationItr->InputLine == Line) {
|
||||
WithColor COS(*LineOS, AnnotationItr->Marker.Color, /*Bold=*/true,
|
||||
/*BG=*/false, TheColorMode);
|
||||
// The two spaces below are where the ": " appears on input lines.
|
||||
COS << left_justify(AnnotationItr->Label, LabelWidth) << " ";
|
||||
unsigned Col;
|
||||
for (Col = 1; Col < AnnotationItr->InputStartCol; ++Col)
|
||||
COS << ' ';
|
||||
COS << AnnotationItr->Marker.Lead;
|
||||
// If InputEndCol=UINT_MAX, stop at InputLineWidth.
|
||||
for (++Col; Col < AnnotationItr->InputEndCol && Col <= InputLineWidth;
|
||||
++Col)
|
||||
COS << '~';
|
||||
const std::string &Note = AnnotationItr->Marker.Note;
|
||||
if (!Note.empty()) {
|
||||
// Put the note at the end of the input line. If we were to instead
|
||||
// put the note right after the marker, subsequent annotations for the
|
||||
// same input line might appear to mark this note instead of the input
|
||||
// line.
|
||||
for (; Col <= InputLineWidth; ++Col)
|
||||
COS << ' ';
|
||||
COS << ' ' << Note;
|
||||
}
|
||||
COS << '\n';
|
||||
++AnnotationItr;
|
||||
}
|
||||
}
|
||||
DumpEllipsisOrElidedLines(OS, ElidedLinesOS.str(), LabelWidth);
|
||||
|
||||
OS << ">>>>>>\n";
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Enable use of ANSI color codes because FileCheck is using them to
|
||||
// highlight text.
|
||||
llvm::sys::Process::UseANSIEscapeCodes(true);
|
||||
|
||||
InitLLVM X(argc, argv);
|
||||
cl::ParseCommandLineOptions(argc, argv, /*Overview*/ "", /*Errs*/ nullptr,
|
||||
"FILECHECK_OPTS");
|
||||
|
||||
// Select -dump-input* values. The -help documentation specifies the default
|
||||
// value and which value to choose if an option is specified multiple times.
|
||||
// In the latter case, the general rule of thumb is to choose the value that
|
||||
// provides the most information.
|
||||
DumpInputValue DumpInput =
|
||||
DumpInputs.empty()
|
||||
? DumpInputFail
|
||||
: *std::max_element(DumpInputs.begin(), DumpInputs.end());
|
||||
DumpInputFilterValue DumpInputFilter;
|
||||
if (DumpInputFilters.empty())
|
||||
DumpInputFilter = DumpInput == DumpInputAlways ? DumpInputFilterAll
|
||||
: DumpInputFilterError;
|
||||
else
|
||||
DumpInputFilter =
|
||||
*std::max_element(DumpInputFilters.begin(), DumpInputFilters.end());
|
||||
unsigned DumpInputContext = DumpInputContexts.empty()
|
||||
? 5
|
||||
: *std::max_element(DumpInputContexts.begin(),
|
||||
DumpInputContexts.end());
|
||||
|
||||
if (DumpInput == DumpInputHelp) {
|
||||
DumpInputAnnotationHelp(outs());
|
||||
return 0;
|
||||
}
|
||||
if (CheckFilename.empty()) {
|
||||
errs() << "<check-file> not specified\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
FileCheckRequest Req;
|
||||
append_range(Req.CheckPrefixes, CheckPrefixes);
|
||||
|
||||
append_range(Req.CommentPrefixes, CommentPrefixes);
|
||||
|
||||
append_range(Req.ImplicitCheckNot, ImplicitCheckNot);
|
||||
|
||||
bool GlobalDefineError = false;
|
||||
for (StringRef G : GlobalDefines) {
|
||||
size_t EqIdx = G.find('=');
|
||||
if (EqIdx == std::string::npos) {
|
||||
errs() << "Missing equal sign in command-line definition '-D" << G
|
||||
<< "'\n";
|
||||
GlobalDefineError = true;
|
||||
continue;
|
||||
}
|
||||
if (EqIdx == 0) {
|
||||
errs() << "Missing variable name in command-line definition '-D" << G
|
||||
<< "'\n";
|
||||
GlobalDefineError = true;
|
||||
continue;
|
||||
}
|
||||
Req.GlobalDefines.push_back(G);
|
||||
}
|
||||
if (GlobalDefineError)
|
||||
return 2;
|
||||
|
||||
Req.AllowEmptyInput = AllowEmptyInput;
|
||||
Req.AllowUnusedPrefixes = AllowUnusedPrefixes;
|
||||
Req.EnableVarScope = EnableVarScope;
|
||||
Req.AllowDeprecatedDagOverlap = AllowDeprecatedDagOverlap;
|
||||
Req.Verbose = Verbose;
|
||||
Req.VerboseVerbose = VerboseVerbose;
|
||||
Req.NoCanonicalizeWhiteSpace = NoCanonicalizeWhiteSpace;
|
||||
Req.MatchFullLines = MatchFullLines;
|
||||
Req.IgnoreCase = IgnoreCase;
|
||||
|
||||
if (VerboseVerbose)
|
||||
Req.Verbose = true;
|
||||
|
||||
FileCheck FC(Req);
|
||||
if (!FC.ValidateCheckPrefixes())
|
||||
return 2;
|
||||
|
||||
Regex PrefixRE = FC.buildCheckPrefixRegex();
|
||||
std::string REError;
|
||||
if (!PrefixRE.isValid(REError)) {
|
||||
errs() << "Unable to combine check-prefix strings into a prefix regular "
|
||||
"expression! This is likely a bug in FileCheck's verification of "
|
||||
"the check-prefix strings. Regular expression parsing failed "
|
||||
"with the following error: "
|
||||
<< REError << "\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
SourceMgr SM;
|
||||
|
||||
// Read the expected strings from the check file.
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
|
||||
MemoryBuffer::getFileOrSTDIN(CheckFilename, /*IsText=*/true);
|
||||
if (std::error_code EC = CheckFileOrErr.getError()) {
|
||||
errs() << "Could not open check file '" << CheckFilename
|
||||
<< "': " << EC.message() << '\n';
|
||||
return 2;
|
||||
}
|
||||
MemoryBuffer &CheckFile = *CheckFileOrErr.get();
|
||||
|
||||
SmallString<4096> CheckFileBuffer;
|
||||
StringRef CheckFileText = FC.CanonicalizeFile(CheckFile, CheckFileBuffer);
|
||||
|
||||
unsigned CheckFileBufferID =
|
||||
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
|
||||
CheckFileText, CheckFile.getBufferIdentifier()),
|
||||
SMLoc());
|
||||
|
||||
std::pair<unsigned, unsigned> ImpPatBufferIDRange;
|
||||
if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange))
|
||||
return 2;
|
||||
|
||||
// Open the file to check and add it to SourceMgr.
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
|
||||
MemoryBuffer::getFileOrSTDIN(InputFilename, /*IsText=*/true);
|
||||
if (InputFilename == "-")
|
||||
InputFilename = "<stdin>"; // Overwrite for improved diagnostic messages
|
||||
if (std::error_code EC = InputFileOrErr.getError()) {
|
||||
errs() << "Could not open input file '" << InputFilename
|
||||
<< "': " << EC.message() << '\n';
|
||||
return 2;
|
||||
}
|
||||
MemoryBuffer &InputFile = *InputFileOrErr.get();
|
||||
|
||||
if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
|
||||
errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
|
||||
DumpCommandLine(argc, argv);
|
||||
return 2;
|
||||
}
|
||||
|
||||
SmallString<4096> InputFileBuffer;
|
||||
StringRef InputFileText = FC.CanonicalizeFile(InputFile, InputFileBuffer);
|
||||
|
||||
SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
|
||||
InputFileText, InputFile.getBufferIdentifier()),
|
||||
SMLoc());
|
||||
|
||||
std::vector<FileCheckDiag> Diags;
|
||||
int ExitCode = FC.checkInput(SM, InputFileText,
|
||||
DumpInput == DumpInputNever ? nullptr : &Diags)
|
||||
? EXIT_SUCCESS
|
||||
: 1;
|
||||
if (DumpInput == DumpInputAlways ||
|
||||
(ExitCode == 1 && DumpInput == DumpInputFail)) {
|
||||
errs() << "\n"
|
||||
<< "Input file: " << InputFilename << "\n"
|
||||
<< "Check file: " << CheckFilename << "\n"
|
||||
<< "\n"
|
||||
<< "-dump-input=help explains the following input dump.\n"
|
||||
<< "\n";
|
||||
std::vector<InputAnnotation> Annotations;
|
||||
unsigned LabelWidth;
|
||||
BuildInputAnnotations(SM, CheckFileBufferID, ImpPatBufferIDRange, Diags,
|
||||
Annotations, LabelWidth);
|
||||
DumpAnnotatedInput(errs(), Req, DumpInputFilter, DumpInputContext,
|
||||
InputFileText, Annotations, LabelWidth);
|
||||
}
|
||||
|
||||
return ExitCode;
|
||||
}
|
|
@ -29,11 +29,7 @@
|
|||
using namespace std;
|
||||
using namespace llvm;
|
||||
|
||||
#if LDC_LLVM_VER >= 1500
|
||||
#define BUILTIN_NAME_STRING "ClangBuiltinName"
|
||||
#else
|
||||
#define BUILTIN_NAME_STRING "GCCBuiltinName"
|
||||
#endif
|
||||
|
||||
string dtype(Record* rec, bool readOnlyMem)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue