Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Lots of tooling updates - too many to list, here's a few highlights:

   - Various subcommand updates to 'perf trace', 'perf report', 'perf
     record', 'perf annotate', 'perf script', 'perf test', etc.

   - CPU and NUMA topology and affinity handling improvements,

   - HW tracing and HW support updates:
      - Intel PT updates
      - ARM CoreSight updates
      - vendor HW event updates

   - BPF updates

   - Tons of infrastructure updates, both on the build system and the
     library support side

   - Documentation updates.

   - ... and lots of other changes, see the changelog for details.

  Kernel side updates:

   - Tighten up kprobes blacklist handling, reduce the number of places
     where developers can install a kprobe and hang/crash the system.

   - Fix/enhance vma address filter handling.

   - Various PMU driver updates, small fixes and additions.

   - refcount_t conversions

   - BPF updates

   - error code propagation enhancements

   - misc other changes"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (238 commits)
  perf script python: Add Python3 support to syscall-counts-by-pid.py
  perf script python: Add Python3 support to syscall-counts.py
  perf script python: Add Python3 support to stat-cpi.py
  perf script python: Add Python3 support to stackcollapse.py
  perf script python: Add Python3 support to sctop.py
  perf script python: Add Python3 support to powerpc-hcalls.py
  perf script python: Add Python3 support to net_dropmonitor.py
  perf script python: Add Python3 support to mem-phys-addr.py
  perf script python: Add Python3 support to failed-syscalls-by-pid.py
  perf script python: Add Python3 support to netdev-times.py
  perf tools: Add perf_exe() helper to find perf binary
  perf script: Handle missing fields with -F +..
  perf data: Add perf_data__open_dir_data function
  perf data: Add perf_data__(create_dir|close_dir) functions
  perf data: Fail check_backup in case of error
  perf data: Make check_backup work over directories
  perf tools: Add rm_rf_perf_data function
  perf tools: Add pattern name checking to rm_rf
  perf tools: Add depth checking to rm_rf
  perf data: Add global path holder
  ...
This commit is contained in:
Linus Torvalds 2019-03-06 07:59:36 -08:00
commit 203b6609e0
315 changed files with 10259 additions and 2608 deletions

View File

@ -630,12 +630,6 @@ static int __hw_perf_event_init(struct perf_event *event)
return ev;
}
/* The EV67 does not support mode exclusion */
if (attr->exclude_kernel || attr->exclude_user
|| attr->exclude_hv || attr->exclude_idle) {
return -EPERM;
}
/*
* We place the event type in event_base here and leave calculation
* of the codes to programme the PMU for alpha_pmu_enable() because
@ -771,6 +765,7 @@ static struct pmu pmu = {
.start = alpha_pmu_start,
.stop = alpha_pmu_stop,
.read = alpha_pmu_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};

View File

@ -294,13 +294,7 @@ static int mmdc_pmu_event_init(struct perf_event *event)
return -EOPNOTSUPP;
}
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period)
if (event->attr.sample_period)
return -EINVAL;
if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS)
@ -456,6 +450,7 @@ static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc,
.start = mmdc_pmu_event_start,
.stop = mmdc_pmu_event_stop,
.read = mmdc_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.mmdc_base = mmdc_base,
.dev = dev,

View File

@ -314,14 +314,6 @@ static int l2x0_pmu_event_init(struct perf_event *event)
event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
@ -544,6 +536,7 @@ static __init int l2x0_pmu_init(void)
.del = l2x0_pmu_event_del,
.event_init = l2x0_pmu_event_init,
.attr_groups = l2x0_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
l2x0_pmu_reset();

View File

@ -1306,15 +1306,6 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@ -1577,6 +1568,7 @@ static struct pmu h_24x7_pmu = {
.start_txn = h_24x7_event_start_txn,
.commit_txn = h_24x7_event_commit_txn,
.cancel_txn = h_24x7_event_cancel_txn,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int hv_24x7_init(void)

View File

@ -232,15 +232,6 @@ static int h_gpci_event_init(struct perf_event *event)
return -EINVAL;
}
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@ -285,6 +276,7 @@ static struct pmu h_gpci_pmu = {
.start = h_gpci_event_start,
.stop = h_gpci_event_stop,
.read = h_gpci_event_update,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int hv_gpci_init(void)

View File

@ -473,15 +473,6 @@ static int nest_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
@ -748,15 +739,6 @@ static int core_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
@ -1069,6 +1051,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_event_update;
pmu->pmu.attr_groups = pmu->attr_groups;
pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
switch (pmu->domain) {

View File

@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb)
/*
* aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
* @cpu: On which to allocate, -1 means current
* @event: Event the buffer is setup for, event->cpu == -1 means current
* @pages: Array of pointers to buffer pages passed from perf core
* @nr_pages: Total pages
* @snapshot: Flag for snapshot mode
@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb)
*
* Return the private AUX buffer structure if success or NULL if fails.
*/
static void *aux_buffer_setup(int cpu, void **pages, int nr_pages,
bool snapshot)
static void *aux_buffer_setup(struct perf_event *event, void **pages,
int nr_pages, bool snapshot)
{
struct sf_buffer *sfb;
struct aux_buffer *aux;

View File

@ -253,15 +253,6 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
return -EOPNOTSUPP;
}
static const struct perf_event_attr ibs_notsupp = {
.exclude_user = 1,
.exclude_kernel = 1,
.exclude_hv = 1,
.exclude_idle = 1,
.exclude_host = 1,
.exclude_guest = 1,
};
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@ -282,9 +273,6 @@ static int perf_ibs_init(struct perf_event *event)
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
return -EINVAL;
if (config & ~perf_ibs->config_mask)
return -EINVAL;
@ -537,6 +525,7 @@ static struct perf_ibs perf_ibs_fetch = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSFETCHCTL,
.config_mask = IBS_FETCH_CONFIG_MASK,

View File

@ -223,11 +223,6 @@ static int perf_iommu_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
/* IOMMU counters do not have usr/os/guest/host bits */
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_host || event->attr.exclude_guest)
return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
@ -414,6 +409,7 @@ static const struct pmu iommu_pmu __initconst = {
.read = perf_iommu_read,
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_iommu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static __init int init_one_iommu(unsigned int idx)

View File

@ -136,14 +136,7 @@ static int pmu_event_init(struct perf_event *event)
return -ENOENT;
/* Unsupported modes and filters. */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
/* no sampling */
event->attr.sample_period)
if (event->attr.sample_period)
return -EINVAL;
if (cfg != AMD_POWER_EVENTSEL_PKG)
@ -226,6 +219,7 @@ static struct pmu pmu_class = {
.start = pmu_event_start,
.stop = pmu_event_stop,
.read = pmu_event_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int power_cpu_exit(unsigned int cpu)

View File

@ -201,11 +201,6 @@ static int amd_uncore_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
/* NB and Last level cache counters do not have usr/os/guest/host bits */
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_host || event->attr.exclude_guest)
return -EINVAL;
/* and we do not enable counter overflow interrupts */
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
@ -307,6 +302,7 @@ static struct pmu amd_nb_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static struct pmu amd_llc_pmu = {
@ -317,6 +313,7 @@ static struct pmu amd_llc_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)

View File

@ -77,10 +77,12 @@ static size_t buf_size(struct page *page)
}
static void *
bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
bts_buffer_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool overwrite)
{
struct bts_buffer *buf;
struct page *page;
int cpu = event->cpu;
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
unsigned long offset;
size_t size = nr_pages << PAGE_SHIFT;

View File

@ -18,6 +18,7 @@
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include "../perf_event.h"
@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
/*
* If PMU counter has PEBS enabled it is not enough to disable counter
* on a guest entry since PEBS memory write can overshoot guest entry
* and corrupt guest memory. Disabling PEBS solves the problem.
*/
arr[1].msr = MSR_IA32_PEBS_ENABLE;
arr[1].host = cpuc->pebs_enabled;
arr[1].guest = 0;
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
arr[0].guest &= ~cpuc->pebs_enabled;
else
arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
*nr = 1;
if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
/*
* If PMU counter has PEBS enabled it is not enough to
* disable counter on a guest entry since PEBS memory
* write can overshoot guest entry and corrupt guest
* memory. Disabling PEBS solves the problem.
*
* Don't do this if the CPU already enforces it.
*/
arr[1].msr = MSR_IA32_PEBS_ENABLE;
arr[1].host = cpuc->pebs_enabled;
arr[1].guest = 0;
*nr = 2;
}
*nr = 2;
return arr;
}
@ -3748,36 +3760,62 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
}
static int intel_snb_pebs_broken(int cpu)
static const struct x86_cpu_desc isolation_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e),
{}
};
static void intel_check_pebs_isolation(void)
{
u32 rev = UINT_MAX; /* default to broken for unknown models */
x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
}
switch (cpu_data(cpu).x86_model) {
case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
static __init void intel_pebs_isolation_quirk(void)
{
WARN_ON_ONCE(x86_pmu.check_microcode);
x86_pmu.check_microcode = intel_check_pebs_isolation;
intel_check_pebs_isolation();
}
case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_stepping) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}
}
static const struct x86_cpu_desc pebs_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618),
INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c),
{}
};
return (cpu_data(cpu).microcode < rev);
static bool intel_snb_pebs_broken(void)
{
return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
}
static void intel_snb_check_microcode(void)
{
int pebs_broken = 0;
int cpu;
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
if (pebs_broken == x86_pmu.pebs_broken)
if (intel_snb_pebs_broken() == x86_pmu.pebs_broken)
return;
/*
@ -3894,23 +3932,22 @@ static __init void intel_nehalem_quirk(void)
}
}
static bool intel_glp_counter_freezing_broken(int cpu)
static const struct x86_cpu_desc counter_freezing_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_X, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006),
{}
};
static bool intel_counter_freezing_broken(void)
{
u32 rev = UINT_MAX; /* default to broken for unknown stepping */
switch (cpu_data(cpu).x86_stepping) {
case 1:
rev = 0x28;
break;
case 8:
rev = 0x6;
break;
}
return (cpu_data(cpu).microcode < rev);
return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
}
static __init void intel_glp_counter_freezing_quirk(void)
static __init void intel_counter_freezing_quirk(void)
{
/* Check if it's already disabled */
if (disable_counter_freezing)
@ -3920,7 +3957,7 @@ static __init void intel_glp_counter_freezing_quirk(void)
* If the system starts with the wrong ucode, leave the
* counter-freezing feature permanently disabled.
*/
if (intel_glp_counter_freezing_broken(raw_smp_processor_id())) {
if (intel_counter_freezing_broken()) {
pr_info("PMU counter freezing disabled due to CPU errata,"
"please upgrade microcode\n");
x86_pmu.counter_freezing = false;
@ -4271,6 +4308,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_X:
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@ -4297,7 +4335,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
x86_add_quirk(intel_glp_counter_freezing_quirk);
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@ -4440,6 +4478,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_HASWELL_ULT:
case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@ -4471,6 +4510,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@ -4533,6 +4573,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

View File

@ -280,13 +280,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
return -ENOENT;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period) /* no sampling */
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
if (event->cpu < 0)
@ -437,7 +431,7 @@ static struct pmu cstate_core_pmu = {
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
.module = THIS_MODULE,
};
@ -451,7 +445,7 @@ static struct pmu cstate_pkg_pmu = {
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
.module = THIS_MODULE,
};

View File

@ -1628,6 +1628,8 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
int format = x86_pmu.intel_cap.pebs_format;

View File

@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
* Return: Our private PT buffer structure.
*/
static void *
pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
pt_buffer_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool snapshot)
{
struct pt_buffer *buf;
int node, ret;
int node, ret, cpu = event->cpu;
if (!nr_pages)
return NULL;
@ -1222,7 +1223,8 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
static void pt_event_addr_filters_sync(struct perf_event *event)
{
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
unsigned long msr_a, msr_b;
struct perf_addr_filter_range *fr = event->addr_filter_ranges;
struct pt_filters *filters = event->hw.addr_filters;
struct perf_addr_filter *filter;
int range = 0;
@ -1231,12 +1233,12 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
if (filter->path.dentry && !offs[range]) {
if (filter->path.dentry && !fr[range].start) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
msr_a = filter->offset + offs[range];
msr_b = filter->size + msr_a - 1;
msr_a = fr[range].start;
msr_b = msr_a + fr[range].size - 1;
}
filters->filter[range].msr_a = msr_a;

View File

@ -397,13 +397,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period) /* no sampling */
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
/* must be done before validate_group */
@ -699,6 +693,7 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
return 0;
}

View File

@ -695,14 +695,6 @@ static int uncore_pmu_event_init(struct perf_event *event)
if (pmu->func_id < 0)
return -ENOENT;
/*
* Uncore PMU does measure at all privilege level all the time.
* So it doesn't make sense to specify any exclude bits.
*/
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_hv || event->attr.exclude_idle)
return -EINVAL;
/* Sampling not supported yet */
if (hwc->sample_period)
return -EINVAL;
@ -800,6 +792,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
.module = THIS_MODULE,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
} else {
pmu->pmu = *pmu->type->pmu;

View File

@ -397,13 +397,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
return -EINVAL;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period) /* no sampling */
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
/*
@ -497,6 +491,7 @@ static struct pmu snb_uncore_imc_pmu = {
.start = uncore_pmu_event_start,
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static struct intel_uncore_ops snb_uncore_imc_ops = {

View File

@ -160,13 +160,7 @@ static int msr_event_init(struct perf_event *event)
return -ENOENT;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period) /* no sampling */
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
if (cfg >= PERF_MSR_EVENT_MAX)
@ -256,7 +250,7 @@ static struct pmu pmu_msr = {
.start = msr_event_start,
.stop = msr_event_stop,
.read = msr_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
};
static int __init msr_init(void)

View File

@ -601,13 +601,14 @@ struct x86_pmu {
/*
* Intel DebugStore bits
*/
unsigned int bts :1,
bts_active :1,
pebs :1,
pebs_active :1,
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1;
unsigned int bts :1,
bts_active :1,
pebs :1,
pebs_active :1,
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1,
pebs_no_isolation :1;
int pebs_record_size;
int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs);

View File

@ -11,4 +11,32 @@
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
/*
* Match specific microcode revisions.
*
* vendor/family/model/stepping must be all set.
*
* Only checks against the boot CPU. When mixed-stepping configs are
* valid for a CPU model, add a quirk for every valid stepping and
* do the fine-tuning in the quirk handler.
*/
struct x86_cpu_desc {
__u8 x86_family;
__u8 x86_vendor;
__u8 x86_model;
__u8 x86_stepping;
__u32 x86_microcode_rev;
};
#define INTEL_CPU_DESC(mod, step, rev) { \
.x86_family = 6, \
.x86_vendor = X86_VENDOR_INTEL, \
.x86_model = mod, \
.x86_stepping = step, \
.x86_microcode_rev = rev, \
}
extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
#endif

View File

@ -11,6 +11,7 @@
#include <linux/stop_machine.h>
#include <linux/slab.h>
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@ -764,8 +765,8 @@ int poke_int3_handler(struct pt_regs *regs)
regs->ip = (unsigned long) bp_int3_handler;
return 1;
}
NOKPROBE_SYMBOL(poke_int3_handler);
/**
* text_poke_bp() -- update instructions on live kernel on SMP

View File

@ -819,11 +819,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
* all up to and including B1.
*/
if (c->x86_model <= 1 && c->x86_stepping <= 1)
/* Fix erratum 1076: CPB feature bit not being set in CPUID. */
if (!cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
}

View File

@ -48,3 +48,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
return NULL;
}
EXPORT_SYMBOL(x86_match_cpu);
static const struct x86_cpu_desc *
x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
const struct x86_cpu_desc *m;
for (m = match; m->x86_family | m->x86_model; m++) {
if (c->x86_vendor != m->x86_vendor)
continue;
if (c->x86 != m->x86_family)
continue;
if (c->x86_model != m->x86_model)
continue;
if (c->x86_stepping != m->x86_stepping)
continue;
return m;
}
return NULL;
}
bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table)
{
const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table);
if (!res || res->x86_microcode_rev > boot_cpu_data.microcode)
return false;
return true;
}
EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev);

View File

@ -269,7 +269,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
static int is_ftrace_caller(unsigned long ip)
static nokprobe_inline int is_ftrace_caller(unsigned long ip)
{
if (ip == ftrace_update_func)
return 1;
@ -299,6 +299,7 @@ int ftrace_int3_handler(struct pt_regs *regs)
return 1;
}
NOKPROBE_SYMBOL(ftrace_int3_handler);
static int ftrace_write(unsigned long ip, const char *val, int size)
{

View File

@ -1028,6 +1028,13 @@ NOKPROBE_SYMBOL(kprobe_fault_handler);
int __init arch_populate_kprobe_blacklist(void)
{
int ret;
ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
(unsigned long)__irqentry_text_end);
if (ret)
return ret;
return kprobe_add_area_blacklist((unsigned long)__entry_text_start,
(unsigned long)__entry_text_end);
}

View File

@ -97,6 +97,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
}
asm (
".pushsection .rodata\n"
"optprobe_template_func:\n"
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
@ -136,8 +137,7 @@ asm (
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n"
".type optprobe_template_func, @function\n"
".size optprobe_template_func, .-optprobe_template_func\n");
".popsection\n");
void optprobe_template_func(void);
STACK_FRAME_NON_STANDARD(optprobe_template_func);

View File

@ -111,6 +111,7 @@ void ist_enter(struct pt_regs *regs)
/* This code is a bit fragile. Test it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
}
NOKPROBE_SYMBOL(ist_enter);
void ist_exit(struct pt_regs *regs)
{

View File

@ -14,6 +14,7 @@
#include <linux/perf_event.h>
#include <linux/percpu-defs.h>
#include <linux/slab.h>
#include <linux/stringhash.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@ -30,11 +31,14 @@ static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC));
PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS));
PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK));
/* Sink ID - same for all ETMs */
PMU_FORMAT_ATTR(sinkid, "config2:0-31");
static struct attribute *etm_config_formats_attr[] = {
&format_attr_cycacc.attr,
&format_attr_timestamp.attr,
&format_attr_retstack.attr,
&format_attr_sinkid.attr,
NULL,
};
@ -43,8 +47,18 @@ static const struct attribute_group etm_pmu_format_group = {
.attrs = etm_config_formats_attr,
};
static struct attribute *etm_config_sinks_attr[] = {
NULL,
};
static const struct attribute_group etm_pmu_sinks_group = {
.name = "sinks",
.attrs = etm_config_sinks_attr,
};
static const struct attribute_group *etm_pmu_attr_groups[] = {
&etm_pmu_format_group,
&etm_pmu_sinks_group,
NULL,
};
@ -177,31 +191,28 @@ static void etm_free_aux(void *data)
schedule_work(&event_data->work);
}
static void *etm_setup_aux(int event_cpu, void **pages,
static void *etm_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool overwrite)
{
int cpu;
u32 id;
int cpu = event->cpu;
cpumask_t *mask;
struct coresight_device *sink;
struct etm_event_data *event_data = NULL;
event_data = alloc_event_data(event_cpu);
event_data = alloc_event_data(cpu);
if (!event_data)
return NULL;
INIT_WORK(&event_data->work, free_event_data);
/*
* In theory nothing prevent tracers in a trace session from being
* associated with different sinks, nor having a sink per tracer. But
* until we have HW with this kind of topology we need to assume tracers
* in a trace session are using the same sink. Therefore go through
* the coresight bus and pick the first enabled sink.
*
* When operated from sysFS users are responsible to enable the sink
* while from perf, the perf tools will do it based on the choice made
* on the cmd line. As such the "enable_sink" flag in sysFS is reset.
*/
sink = coresight_get_enabled_sink(true);
/* First get the selected sink from user space. */
if (event->attr.config2) {
id = (u32)event->attr.config2;
sink = coresight_get_sink_by_id(id);
} else {
sink = coresight_get_enabled_sink(true);
}
if (!sink || !sink_ops(sink)->alloc_buffer)
goto err;
@ -422,15 +433,16 @@ static int etm_addr_filters_validate(struct list_head *filters)
static void etm_addr_filters_sync(struct perf_event *event)
{
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
unsigned long start, stop, *offs = event->addr_filters_offs;
unsigned long start, stop;
struct perf_addr_filter_range *fr = event->addr_filter_ranges;
struct etm_filters *filters = event->hw.addr_filters;
struct etm_filter *etm_filter;
struct perf_addr_filter *filter;
int i = 0;
list_for_each_entry(filter, &head->list, entry) {
start = filter->offset + offs[i];
stop = start + filter->size;
start = fr[i].start;
stop = start + fr[i].size;
etm_filter = &filters->etm_filter[i];
switch (filter->action) {
@ -479,6 +491,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link)
return 0;
}
static ssize_t etm_perf_sink_name_show(struct device *dev,
struct device_attribute *dattr,
char *buf)
{
struct dev_ext_attribute *ea;
ea = container_of(dattr, struct dev_ext_attribute, attr);
return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var));
}
int etm_perf_add_symlink_sink(struct coresight_device *csdev)
{
int ret;
unsigned long hash;
const char *name;
struct device *pmu_dev = etm_pmu.dev;
struct device *pdev = csdev->dev.parent;
struct dev_ext_attribute *ea;
if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
return -EINVAL;
if (csdev->ea != NULL)
return -EINVAL;
if (!etm_perf_up)
return -EPROBE_DEFER;
ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL);
if (!ea)
return -ENOMEM;
name = dev_name(pdev);
/* See function coresight_get_sink_by_id() to know where this is used */
hash = hashlen_hash(hashlen_string(NULL, name));
ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL);
if (!ea->attr.attr.name)
return -ENOMEM;
ea->attr.attr.mode = 0444;
ea->attr.show = etm_perf_sink_name_show;
ea->var = (unsigned long *)hash;
ret = sysfs_add_file_to_group(&pmu_dev->kobj,
&ea->attr.attr, "sinks");
if (!ret)
csdev->ea = ea;
return ret;
}
void etm_perf_del_symlink_sink(struct coresight_device *csdev)
{
struct device *pmu_dev = etm_pmu.dev;
struct dev_ext_attribute *ea = csdev->ea;
if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
return;
if (!ea)
return;
sysfs_remove_file_from_group(&pmu_dev->kobj,
&ea->attr.attr, "sinks");
csdev->ea = NULL;
}
static int __init etm_perf_init(void)
{
int ret;

View File

@ -59,6 +59,8 @@ struct etm_event_data {
#ifdef CONFIG_CORESIGHT
int etm_perf_symlink(struct coresight_device *csdev, bool link);
int etm_perf_add_symlink_sink(struct coresight_device *csdev);
void etm_perf_del_symlink_sink(struct coresight_device *csdev);
static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
{
struct etm_event_data *data = perf_get_aux(handle);
@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
#else
static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
{ return -EINVAL; }
int etm_perf_add_symlink_sink(struct coresight_device *csdev)
{ return -EINVAL; }
void etm_perf_del_symlink_sink(struct coresight_device *csdev) {}
static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
{
return NULL;

View File

@ -147,6 +147,7 @@ void coresight_disable_path(struct list_head *path);
int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
struct coresight_device *coresight_get_sink(struct list_head *path);
struct coresight_device *coresight_get_enabled_sink(bool reset);
struct coresight_device *coresight_get_sink_by_id(u32 id);
struct list_head *coresight_build_path(struct coresight_device *csdev,
struct coresight_device *sink);
void coresight_release_path(struct list_head *path);

View File

@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/stringhash.h>
#include <linux/mutex.h>
#include <linux/clk.h>
#include <linux/coresight.h>
@ -18,6 +19,7 @@
#include <linux/delay.h>
#include <linux/pm_runtime.h>
#include "coresight-etm-perf.h"
#include "coresight-priv.h"
static DEFINE_MUTEX(coresight_mutex);
@ -540,6 +542,47 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate)
return dev ? to_coresight_device(dev) : NULL;
}
static int coresight_sink_by_id(struct device *dev, void *data)
{
struct coresight_device *csdev = to_coresight_device(dev);
unsigned long hash;
if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
if (!csdev->ea)
return 0;
/*
* See function etm_perf_add_symlink_sink() to know where
* this comes from.
*/
hash = (unsigned long)csdev->ea->var;
if ((u32)hash == *(u32 *)data)
return 1;
}
return 0;
}
/**
* coresight_get_sink_by_id - returns the sink that matches the id
* @id: Id of the sink to match
*
* The name of a sink is unique, whether it is found on the AMBA bus or
* otherwise. As such the hash of that name can easily be used to identify
* a sink.
*/
struct coresight_device *coresight_get_sink_by_id(u32 id)
{
struct device *dev = NULL;
dev = bus_find_device(&coresight_bustype, NULL, &id,
coresight_sink_by_id);
return dev ? to_coresight_device(dev) : NULL;
}
/*
* coresight_grab_device - Power up this device and any of the helper
* devices connected to it for trace operation. Since the helper devices
@ -1167,6 +1210,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
goto err_out;
}
if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
ret = etm_perf_add_symlink_sink(csdev);
if (ret) {
device_unregister(&csdev->dev);
/*
* As with the above, all resources are free'd
* explicitly via coresight_device_release() triggered
* from put_device(), which is in turn called from
* function device_unregister().
*/
goto err_out;
}
}
mutex_lock(&coresight_mutex);
coresight_fixup_device_conns(csdev);
@ -1185,6 +1244,7 @@ EXPORT_SYMBOL_GPL(coresight_register);
void coresight_unregister(struct coresight_device *csdev)
{
etm_perf_del_symlink_sink(csdev);
/* Remove references of that device in the topology */
coresight_remove_conns(csdev);
device_unregister(&csdev->dev);

View File

@ -1327,15 +1327,6 @@ static int cci_pmu_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EOPNOTSUPP;
/* We have no filtering of any kind */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
/*
* Following the example set by other "uncore" PMUs, we accept any CPU
* and rewrite its affinity dynamically rather than having perf core
@ -1433,6 +1424,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
.stop = cci_pmu_stop,
.read = pmu_read,
.attr_groups = pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
cci_pmu->plat_device = pdev;

View File

@ -741,10 +741,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
return -EOPNOTSUPP;
}
if (has_branch_stack(event) || event->attr.exclude_user ||
event->attr.exclude_kernel || event->attr.exclude_hv ||
event->attr.exclude_idle || event->attr.exclude_host ||
event->attr.exclude_guest) {
if (has_branch_stack(event)) {
dev_dbg(ccn->dev, "Can't exclude execution levels!\n");
return -EINVAL;
}
@ -1290,6 +1287,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
.read = arm_ccn_pmu_event_read,
.pmu_enable = arm_ccn_pmu_enable,
.pmu_disable = arm_ccn_pmu_disable,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
/* No overflow interrupt? Have to use a timer instead. */

View File

@ -562,13 +562,7 @@ static int dsu_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
if (has_branch_stack(event) ||
event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest) {
if (has_branch_stack(event)) {
dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n");
return -EINVAL;
}
@ -735,6 +729,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
.read = dsu_pmu_read,
.attr_groups = dsu_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
rc = perf_pmu_register(&dsu_pmu->pmu, name, -1);

View File

@ -356,13 +356,6 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
return ret;
}
static int
event_requires_mode_exclusion(struct perf_event_attr *attr)
{
return attr->exclude_idle || attr->exclude_user ||
attr->exclude_kernel || attr->exclude_hv;
}
static int
__hw_perf_event_init(struct perf_event *event)
{
@ -393,9 +386,8 @@ __hw_perf_event_init(struct perf_event *event)
/*
* Check whether we need to exclude the counter from certain modes.
*/
if ((!armpmu->set_event_filter ||
armpmu->set_event_filter(hwc, &event->attr)) &&
event_requires_mode_exclusion(&event->attr)) {
if (armpmu->set_event_filter &&
armpmu->set_event_filter(hwc, &event->attr)) {
pr_debug("ARM performance counters do not support "
"mode exclusion\n");
return -EOPNOTSUPP;
@ -867,6 +859,9 @@ int armpmu_register(struct arm_pmu *pmu)
if (ret)
return ret;
if (!pmu->set_event_filter)
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
if (ret)
goto out_destroy;

View File

@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event)
{
}
static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages,
bool snapshot)
static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool snapshot)
{
int i;
int i, cpu = event->cpu;
struct page **pglist;
struct arm_spe_pmu_buf *buf;

View File

@ -396,6 +396,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
.stop = hisi_uncore_pmu_stop,
.read = hisi_uncore_pmu_read,
.attr_groups = hisi_ddrc_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);

View File

@ -407,6 +407,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
.stop = hisi_uncore_pmu_stop,
.read = hisi_uncore_pmu_read,
.attr_groups = hisi_hha_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
ret = perf_pmu_register(&hha_pmu->pmu, name, -1);

View File

@ -397,6 +397,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
.stop = hisi_uncore_pmu_stop,
.read = hisi_uncore_pmu_read,
.attr_groups = hisi_l3c_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);

View File

@ -142,15 +142,6 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EOPNOTSUPP;
/* counters do not have these bits */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.exclude_hv ||
event->attr.exclude_idle)
return -EINVAL;
/*
* The uncore counters not specific to any CPU, so cannot
* support per-task

View File

@ -509,14 +509,6 @@ static int l2_cache_event_init(struct perf_event *event)
return -EOPNOTSUPP;
}
/* We cannot filter accurately so we just don't allow it. */
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_hv || event->attr.exclude_idle) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
"Can't exclude execution levels\n");
return -EOPNOTSUPP;
}
if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
((event->attr.config & ~L2_EVT_MASK) != 0)) &&
(event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
@ -982,6 +974,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev)
.stop = l2_cache_event_stop,
.read = l2_cache_event_read,
.attr_groups = l2_cache_pmu_attr_grps,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
l2cache_pmu->num_counters = get_num_counters();

View File

@ -494,13 +494,6 @@ static int qcom_l3_cache__event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
/*
* There are no per-counter mode filters in the PMU.
*/
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_hv || event->attr.exclude_idle)
return -EINVAL;
/*
* Sampling not supported since these events are not core-attributable.
*/
@ -777,6 +770,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
.read = qcom_l3_cache__event_read,
.attr_groups = qcom_l3_cache_pmu_attr_grps,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);

View File

@ -424,15 +424,6 @@ static int tx2_uncore_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
/* We have no filtering of any kind */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest)
return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
@ -572,6 +563,7 @@ static int tx2_uncore_pmu_register(
.start = tx2_uncore_event_start,
.stop = tx2_uncore_event_stop,
.read = tx2_uncore_event_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
tx2_pmu->pmu.name = devm_kasprintf(dev, GFP_KERNEL,

View File

@ -917,11 +917,6 @@ static int xgene_perf_event_init(struct perf_event *event)
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
/* SOC counters do not have usr/os/guest/host bits */
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_host || event->attr.exclude_guest)
return -EINVAL;
if (event->cpu < 0)
return -EINVAL;
/*
@ -1136,6 +1131,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
.start = xgene_perf_start,
.stop = xgene_perf_stop,
.read = xgene_perf_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
/* Hardware counter init */

View File

@ -154,8 +154,9 @@ struct coresight_connection {
* @orphan: true if the component has connections that haven't been linked.
* @enable: 'true' if component is currently part of an active path.
* @activated: 'true' only if a _sink_ has been activated. A sink can be
activated but not yet enabled. Enabling for a _sink_
happens when a source has been selected for that it.
* activated but not yet enabled. Enabling for a _sink_
* appens when a source has been selected for that it.
* @ea: Device attribute for sink representation under PMU directory.
*/
struct coresight_device {
struct coresight_connection *conns;
@ -168,7 +169,9 @@ struct coresight_device {
atomic_t *refcnt;
bool orphan;
bool enable; /* true only if configured as part of a path */
/* sink specific fields */
bool activated; /* true only if a sink is part of a path */
struct dev_ext_attribute *ea;
};
#define to_coresight_device(d) container_of(d, struct coresight_device, dev)

View File

@ -1007,6 +1007,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size,
void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);
void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
#else /* CONFIG_BPF_JIT */
@ -1062,6 +1063,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
{
}
static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
sym[0] = '\0';
}
#endif /* CONFIG_BPF_JIT */
void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);

View File

@ -53,8 +53,8 @@ struct perf_guest_info_callbacks {
#include <linux/atomic.h>
#include <linux/sysfs.h>
#include <linux/perf_regs.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/refcount.h>
#include <asm/local.h>
struct perf_callchain_entry {
@ -244,6 +244,7 @@ struct perf_event;
#define PERF_PMU_CAP_EXCLUSIVE 0x10
#define PERF_PMU_CAP_ITRACE 0x20
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
#define PERF_PMU_CAP_NO_EXCLUDE 0x80
/**
* struct pmu - generic performance monitoring unit
@ -409,7 +410,7 @@ struct pmu {
/*
* Set up pmu-private data structures for an AUX area
*/
void *(*setup_aux) (int cpu, void **pages,
void *(*setup_aux) (struct perf_event *event, void **pages,
int nr_pages, bool overwrite);
/* optional */
@ -494,6 +495,11 @@ struct perf_addr_filters_head {
unsigned int nr_file_filters;
};
struct perf_addr_filter_range {
unsigned long start;
unsigned long size;
};
/**
* enum perf_event_state - the states of an event:
*/
@ -670,7 +676,7 @@ struct perf_event {
/* address range filters */
struct perf_addr_filters_head addr_filters;
/* vma address array for file-based filders */
unsigned long *addr_filters_offs;
struct perf_addr_filter_range *addr_filter_ranges;
unsigned long addr_filters_gen;
void (*destroy)(struct perf_event *);
@ -742,7 +748,7 @@ struct perf_event_context {
int nr_stat;
int nr_freq;
int rotate_disable;
atomic_t refcount;
refcount_t refcount;
struct task_struct *task;
/*
@ -983,9 +989,9 @@ extern void perf_event_output_forward(struct perf_event *event,
extern void perf_event_output_backward(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs);
extern void perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs);
extern int perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs);
static inline bool
is_default_overflow_handler(struct perf_event *event)
@ -1009,6 +1015,15 @@ perf_event__output_id_sample(struct perf_event *event,
extern void
perf_log_lost_samples(struct perf_event *event, u64 lost);
static inline bool event_has_any_exclude_flag(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
return attr->exclude_idle || attr->exclude_user ||
attr->exclude_kernel || attr->exclude_hv ||
attr->exclude_guest || attr->exclude_host;
}
static inline bool is_sampling_event(struct perf_event *event)
{
return event->attr.sample_period != 0;
@ -1118,6 +1133,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
}
extern void perf_event_mmap(struct vm_area_struct *vma);
extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
bool unregister, const char *sym);
extern void perf_event_bpf_event(struct bpf_prog *prog,
enum perf_bpf_event_type type,
u16 flags);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@ -1338,6 +1360,13 @@ static inline int perf_unregister_guest_info_callbacks
(struct perf_guest_info_callbacks *callbacks) { return 0; }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
bool unregister, const char *sym) { }
static inline void perf_event_bpf_event(struct bpf_prog *prog,
enum perf_bpf_event_type type,
u16 flags) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }

View File

@ -372,7 +372,9 @@ struct perf_event_attr {
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */
__reserved_1 : 33;
union {
__u32 wakeup_events; /* wakeup every n events */
@ -445,8 +447,6 @@ struct perf_event_query_bpf {
__u32 ids[0];
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
/*
* Ioctls that can be done on a perf event fd:
*/
@ -965,9 +965,58 @@ enum perf_event_type {
*/
PERF_RECORD_NAMESPACES = 16,
/*
* Record ksymbol register/unregister events:
*
* struct {
* struct perf_event_header header;
* u64 addr;
* u32 len;
* u16 ksym_type;
* u16 flags;
* char name[];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_KSYMBOL = 17,
/*
* Record bpf events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
* PERF_BPF_EVENT_PROG_UNLOAD = 2,
* };
*
* struct {
* struct perf_event_header header;
* u16 type;
* u16 flags;
* u32 id;
* u8 tag[BPF_TAG_SIZE];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_BPF_EVENT = 18,
PERF_RECORD_MAX, /* non-ABI */
};
enum perf_record_ksymbol_type {
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
};
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
PERF_BPF_EVENT_UNKNOWN = 0,
PERF_BPF_EVENT_PROG_LOAD = 1,
PERF_BPF_EVENT_PROG_UNLOAD = 2,
PERF_BPF_EVENT_MAX, /* non-ABI */
};
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8

View File

@ -539,7 +539,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
*symbol_end = addr + hdr->pages * PAGE_SIZE;
}
static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
const char *end = sym + KSYM_NAME_LEN;
const struct btf_type *type;

View File

@ -1258,6 +1258,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
bpf_prog_kallsyms_del_all(prog);
@ -1631,6 +1632,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
}
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
return err;
free_used_maps:

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Performance events callchain code, extracted from core.c:
*
@ -5,8 +6,6 @@
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Performance events core code:
*
@ -5,8 +6,6 @@
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
#include <linux/fs.h>
@ -385,6 +384,8 @@ static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@ -1171,7 +1172,7 @@ static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
static void get_ctx(struct perf_event_context *ctx)
{
WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
refcount_inc(&ctx->refcount);
}
static void free_ctx(struct rcu_head *head)
@ -1185,7 +1186,7 @@ static void free_ctx(struct rcu_head *head)
static void put_ctx(struct perf_event_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
if (refcount_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
put_ctx(ctx->parent_ctx);
if (ctx->task && ctx->task != TASK_TOMBSTONE)
@ -1254,6 +1255,7 @@ static void put_ctx(struct perf_event_context *ctx)
* perf_event_context::lock
* perf_event::mmap_mutex
* mmap_sem
* perf_addr_filters_head::lock
*
* cpu_hotplug_lock
* pmus_lock
@ -1267,7 +1269,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
again:
rcu_read_lock();
ctx = READ_ONCE(event->ctx);
if (!atomic_inc_not_zero(&ctx->refcount)) {
if (!refcount_inc_not_zero(&ctx->refcount)) {
rcu_read_unlock();
goto again;
}
@ -1400,7 +1402,7 @@ retry:
}
if (ctx->task == TASK_TOMBSTONE ||
!atomic_inc_not_zero(&ctx->refcount)) {
!refcount_inc_not_zero(&ctx->refcount)) {
raw_spin_unlock(&ctx->lock);
ctx = NULL;
} else {
@ -2797,7 +2799,7 @@ static int perf_event_stop(struct perf_event *event, int restart)
*
* (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
* we update the addresses of corresponding vmas in
* event::addr_filters_offs array and bump the event::addr_filters_gen;
* event::addr_filter_ranges array and bump the event::addr_filters_gen;
* (p2) when an event is scheduled in (pmu::add), it calls
* perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
* if the generation has changed since the previous call.
@ -4056,7 +4058,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
INIT_LIST_HEAD(&ctx->event_list);
INIT_LIST_HEAD(&ctx->pinned_active);
INIT_LIST_HEAD(&ctx->flexible_active);
atomic_set(&ctx->refcount, 1);
refcount_set(&ctx->refcount, 1);
}
static struct perf_event_context *
@ -4235,7 +4237,7 @@ static bool is_sb_event(struct perf_event *event)
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
attr->comm || attr->comm_exec ||
attr->task ||
attr->task || attr->ksymbol ||
attr->context_switch)
return true;
return false;
@ -4305,6 +4307,10 @@ static void unaccount_event(struct perf_event *event)
dec = true;
if (has_branch_stack(event))
dec = true;
if (event->attr.ksymbol)
atomic_dec(&nr_ksymbol_events);
if (event->attr.bpf_event)
atomic_dec(&nr_bpf_events);
if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
@ -4440,7 +4446,7 @@ static void _free_event(struct perf_event *event)
perf_event_free_bpf_prog(event);
perf_addr_filters_splice(event, NULL);
kfree(event->addr_filters_offs);
kfree(event->addr_filter_ranges);
if (event->destroy)
event->destroy(event);
@ -5396,7 +5402,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event)
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
if (!atomic_inc_not_zero(&rb->refcount))
if (!refcount_inc_not_zero(&rb->refcount))
rb = NULL;
}
rcu_read_unlock();
@ -5406,7 +5412,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event)
void ring_buffer_put(struct ring_buffer *rb)
{
if (!atomic_dec_and_test(&rb->refcount))
if (!refcount_dec_and_test(&rb->refcount))
return;
WARN_ON_ONCE(!list_empty(&rb->event_list));
@ -5471,7 +5477,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
/* this has to be the last one */
rb_free_aux(rb);
WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
mutex_unlock(&event->mmap_mutex);
}
@ -6497,7 +6503,7 @@ void perf_prepare_sample(struct perf_event_header *header,
data->phys_addr = perf_virt_to_phys(data->addr);
}
static __always_inline void
static __always_inline int
__perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs,
@ -6507,13 +6513,15 @@ __perf_event_output(struct perf_event *event,
{
struct perf_output_handle handle;
struct perf_event_header header;
int err;
/* protect the callchain buffers */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
if (output_begin(&handle, event, header.size))
err = output_begin(&handle, event, header.size);
if (err)
goto exit;
perf_output_sample(&handle, &header, data, event);
@ -6522,6 +6530,7 @@ __perf_event_output(struct perf_event *event,
exit:
rcu_read_unlock();
return err;
}
void
@ -6540,12 +6549,12 @@ perf_event_output_backward(struct perf_event *event,
__perf_event_output(event, data, regs, perf_output_begin_backward);
}
void
int
perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
__perf_event_output(event, data, regs, perf_output_begin);
return __perf_event_output(event, data, regs, perf_output_begin);
}
/*
@ -6686,7 +6695,8 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
if (filter->path.dentry) {
event->addr_filters_offs[count] = 0;
event->addr_filter_ranges[count].start = 0;
event->addr_filter_ranges[count].size = 0;
restart++;
}
@ -7366,28 +7376,47 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
return true;
}
static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
struct vm_area_struct *vma,
struct perf_addr_filter_range *fr)
{
unsigned long vma_size = vma->vm_end - vma->vm_start;
unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
struct file *file = vma->vm_file;
if (!perf_addr_filter_match(filter, file, off, vma_size))
return false;
if (filter->offset < off) {
fr->start = vma->vm_start;
fr->size = min(vma_size, filter->size - (off - filter->offset));
} else {
fr->start = vma->vm_start + filter->offset - off;
fr->size = min(vma->vm_end - fr->start, filter->size);
}
return true;
}
static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
{
struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
struct vm_area_struct *vma = data;
unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
struct file *file = vma->vm_file;
struct perf_addr_filter *filter;
unsigned int restart = 0, count = 0;
unsigned long flags;
if (!has_addr_filter(event))
return;
if (!file)
if (!vma->vm_file)
return;
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
if (perf_addr_filter_match(filter, file, off,
vma->vm_end - vma->vm_start)) {
event->addr_filters_offs[count] = vma->vm_start;
if (perf_addr_filter_vma_adjust(filter, vma,
&event->addr_filter_ranges[count]))
restart++;
}
count++;
}
@ -7658,6 +7687,207 @@ static void perf_log_throttle(struct perf_event *event, int enable)
perf_output_end(&handle);
}
/*
* ksymbol register/unregister tracking
*/
struct perf_ksymbol_event {
const char *name;
int name_len;
struct {
struct perf_event_header header;
u64 addr;
u32 len;
u16 ksym_type;
u16 flags;
} event_id;
};
static int perf_event_ksymbol_match(struct perf_event *event)
{
return event->attr.ksymbol;
}
static void perf_event_ksymbol_output(struct perf_event *event, void *data)
{
struct perf_ksymbol_event *ksymbol_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_ksymbol_match(event))
return;
perf_event_header__init_id(&ksymbol_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
ksymbol_event->event_id.header.size);
if (ret)
return;
perf_output_put(&handle, ksymbol_event->event_id);
__output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
const char *sym)
{
struct perf_ksymbol_event ksymbol_event;
char name[KSYM_NAME_LEN];
u16 flags = 0;
int name_len;
if (!atomic_read(&nr_ksymbol_events))
return;
if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
goto err;
strlcpy(name, sym, KSYM_NAME_LEN);
name_len = strlen(name) + 1;
while (!IS_ALIGNED(name_len, sizeof(u64)))
name[name_len++] = '\0';
BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
if (unregister)
flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
ksymbol_event = (struct perf_ksymbol_event){
.name = name,
.name_len = name_len,
.event_id = {
.header = {
.type = PERF_RECORD_KSYMBOL,
.size = sizeof(ksymbol_event.event_id) +
name_len,
},
.addr = addr,
.len = len,
.ksym_type = ksym_type,
.flags = flags,
},
};
perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
return;
err:
WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
}
/*
* bpf program load/unload tracking
*/
struct perf_bpf_event {
struct bpf_prog *prog;
struct {
struct perf_event_header header;
u16 type;
u16 flags;
u32 id;
u8 tag[BPF_TAG_SIZE];
} event_id;
};
static int perf_event_bpf_match(struct perf_event *event)
{
return event->attr.bpf_event;
}
static void perf_event_bpf_output(struct perf_event *event, void *data)
{
struct perf_bpf_event *bpf_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_bpf_match(event))
return;
perf_event_header__init_id(&bpf_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
bpf_event->event_id.header.size);
if (ret)
return;
perf_output_put(&handle, bpf_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
enum perf_bpf_event_type type)
{
bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
char sym[KSYM_NAME_LEN];
int i;
if (prog->aux->func_cnt == 0) {
bpf_get_prog_name(prog, sym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)prog->bpf_func,
prog->jited_len, unregister, sym);
} else {
for (i = 0; i < prog->aux->func_cnt; i++) {
struct bpf_prog *subprog = prog->aux->func[i];
bpf_get_prog_name(subprog, sym);
perf_event_ksymbol(
PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)subprog->bpf_func,
subprog->jited_len, unregister, sym);
}
}
}
void perf_event_bpf_event(struct bpf_prog *prog,
enum perf_bpf_event_type type,
u16 flags)
{
struct perf_bpf_event bpf_event;
if (type <= PERF_BPF_EVENT_UNKNOWN ||
type >= PERF_BPF_EVENT_MAX)
return;
switch (type) {
case PERF_BPF_EVENT_PROG_LOAD:
case PERF_BPF_EVENT_PROG_UNLOAD:
if (atomic_read(&nr_ksymbol_events))
perf_event_bpf_emit_ksymbols(prog, type);
break;
default:
break;
}
if (!atomic_read(&nr_bpf_events))
return;
bpf_event = (struct perf_bpf_event){
.prog = prog,
.event_id = {
.header = {
.type = PERF_RECORD_BPF_EVENT,
.size = sizeof(bpf_event.event_id),
},
.type = type,
.flags = flags,
.id = prog->aux->id,
},
};
BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
}
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
@ -8776,26 +9006,19 @@ static void perf_addr_filters_splice(struct perf_event *event,
* @filter; if so, adjust filter's address range.
* Called with mm::mmap_sem down for reading.
*/
static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
struct mm_struct *mm)
static void perf_addr_filter_apply(struct perf_addr_filter *filter,
struct mm_struct *mm,
struct perf_addr_filter_range *fr)
{
struct vm_area_struct *vma;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
struct file *file = vma->vm_file;
unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
unsigned long vma_size = vma->vm_end - vma->vm_start;
if (!file)
if (!vma->vm_file)
continue;
if (!perf_addr_filter_match(filter, file, off, vma_size))
continue;
return vma->vm_start;
if (perf_addr_filter_vma_adjust(filter, vma, fr))
return;
}
return 0;
}
/*
@ -8829,15 +9052,15 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
event->addr_filters_offs[count] = 0;
event->addr_filter_ranges[count].start = 0;
event->addr_filter_ranges[count].size = 0;
/*
* Adjust base offset if the filter is associated to a binary
* that needs to be mapped:
*/
if (filter->path.dentry)
event->addr_filters_offs[count] =
perf_addr_filter_apply(filter, mm);
perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
count++;
}
@ -9788,6 +10011,15 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
if (ctx)
perf_event_ctx_unlock(event->group_leader, ctx);
if (!ret) {
if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
event_has_any_exclude_flag(event)) {
if (event->destroy)
event->destroy(event);
ret = -EINVAL;
}
}
if (ret)
module_put(pmu->module);
@ -9916,6 +10148,10 @@ static void account_event(struct perf_event *event)
inc = true;
if (is_cgroup_event(event))
inc = true;
if (event->attr.ksymbol)
atomic_inc(&nr_ksymbol_events);
if (event->attr.bpf_event)
atomic_inc(&nr_bpf_events);
if (inc) {
/*
@ -10098,14 +10334,28 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_pmu;
if (has_addr_filter(event)) {
event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
sizeof(unsigned long),
GFP_KERNEL);
if (!event->addr_filters_offs) {
event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
sizeof(struct perf_addr_filter_range),
GFP_KERNEL);
if (!event->addr_filter_ranges) {
err = -ENOMEM;
goto err_per_task;
}
/*
* Clone the parent's vma offsets: they are valid until exec()
* even if the mm is not shared with the parent.
*/
if (event->parent) {
struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
raw_spin_lock_irq(&ifh->lock);
memcpy(event->addr_filter_ranges,
event->parent->addr_filter_ranges,
pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
raw_spin_unlock_irq(&ifh->lock);
}
/* force hw sync on the address filters */
event->addr_filters_gen = 1;
}
@ -10124,7 +10374,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
return event;
err_addr_filters:
kfree(event->addr_filters_offs);
kfree(event->addr_filter_ranges);
err_per_task:
exclusive_event_destroy(event);
@ -10407,7 +10657,7 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
again:
rcu_read_lock();
gctx = READ_ONCE(group_leader->ctx);
if (!atomic_inc_not_zero(&gctx->refcount)) {
if (!refcount_inc_not_zero(&gctx->refcount)) {
rcu_read_unlock();
goto again;
}

View File

@ -1,18 +1,5 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) 2007 Alan Stern
* Copyright (C) IBM Corporation, 2009
* Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>

View File

@ -4,13 +4,14 @@
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/refcount.h>
/* Buffer handling */
#define RING_BUFFER_WRITABLE 0x01
struct ring_buffer {
atomic_t refcount;
refcount_t refcount;
struct rcu_head rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
@ -48,7 +49,7 @@ struct ring_buffer {
atomic_t aux_mmap_count;
unsigned long aux_mmap_locked;
void (*free_aux)(void *);
atomic_t aux_refcount;
refcount_t aux_refcount;
void **aux_pages;
void *aux_priv;

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Performance events ring-buffer code:
*
@ -5,8 +6,6 @@
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>
@ -285,7 +284,7 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
else
rb->overwrite = 1;
atomic_set(&rb->refcount, 1);
refcount_set(&rb->refcount, 1);
INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
@ -358,7 +357,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!atomic_read(&rb->aux_mmap_count))
goto err;
if (!atomic_inc_not_zero(&rb->aux_refcount))
if (!refcount_inc_not_zero(&rb->aux_refcount))
goto err;
/*
@ -658,7 +657,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
goto out;
}
rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages,
overwrite);
if (!rb->aux_priv)
goto out;
@ -671,7 +670,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
* we keep a refcount here to make sure either of the two can
* reference them safely.
*/
atomic_set(&rb->aux_refcount, 1);
refcount_set(&rb->aux_refcount, 1);
rb->aux_overwrite = overwrite;
rb->aux_watermark = watermark;
@ -690,7 +689,7 @@ out:
void rb_free_aux(struct ring_buffer *rb)
{
if (atomic_dec_and_test(&rb->aux_refcount))
if (refcount_dec_and_test(&rb->aux_refcount))
__rb_free_aux(rb);
}

View File

@ -1,20 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* User-space Probes (UProbes)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2008-2012
* Authors:
* Srikar Dronamraju

View File

@ -494,7 +494,7 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
static int get_ksymbol_bpf(struct kallsym_iter *iter)
{
iter->module_name[0] = '\0';
strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN);
iter->exported = 0;
return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
&iter->value, &iter->type,

View File

@ -1396,7 +1396,7 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr)
addr < (unsigned long)__kprobes_text_end;
}
bool within_kprobe_blacklist(unsigned long addr)
static bool __within_kprobe_blacklist(unsigned long addr)
{
struct kprobe_blacklist_entry *ent;
@ -1410,7 +1410,26 @@ bool within_kprobe_blacklist(unsigned long addr)
if (addr >= ent->start_addr && addr < ent->end_addr)
return true;
}
return false;
}
bool within_kprobe_blacklist(unsigned long addr)
{
char symname[KSYM_NAME_LEN], *p;
if (__within_kprobe_blacklist(addr))
return true;
/* Check if the address is on a suffixed-symbol */
if (!lookup_symbol_name(addr, symname)) {
p = strchr(symname, '.');
if (!p)
return false;
*p = '\0';
addr = (unsigned long)kprobe_lookup_name(symname, 0);
if (addr)
return __within_kprobe_blacklist(addr);
}
return false;
}

View File

@ -52,6 +52,7 @@
#include <linux/jhash.h>
#include <linux/nmi.h>
#include <linux/rcupdate.h>
#include <linux/kprobes.h>
#include <asm/sections.h>
@ -3161,6 +3162,7 @@ void lockdep_hardirqs_on(unsigned long ip)
__trace_hardirqs_on_caller(ip);
current->lockdep_recursion = 0;
}
NOKPROBE_SYMBOL(lockdep_hardirqs_on);
/*
* Hardirqs were disabled:
@ -3190,6 +3192,7 @@ void lockdep_hardirqs_off(unsigned long ip)
} else
debug_atomic_inc(redundant_hardirqs_off);
}
NOKPROBE_SYMBOL(lockdep_hardirqs_off);
/*
* Softirqs will be enabled:
@ -4007,7 +4010,8 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
return 0;
}
static int __lock_is_held(const struct lockdep_map *lock, int read)
static nokprobe_inline
int __lock_is_held(const struct lockdep_map *lock, int read)
{
struct task_struct *curr = current;
int i;
@ -4240,6 +4244,7 @@ int lock_is_held_type(const struct lockdep_map *lock, int read)
return ret;
}
EXPORT_SYMBOL_GPL(lock_is_held_type);
NOKPROBE_SYMBOL(lock_is_held_type);
struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
{

View File

@ -50,6 +50,7 @@
#include <linux/ftrace.h>
#include <linux/tick.h>
#include <linux/sysrq.h>
#include <linux/kprobes.h>
#include "tree.h"
#include "rcu.h"
@ -882,6 +883,7 @@ void rcu_nmi_enter(void)
{
rcu_nmi_enter_common(false);
}
NOKPROBE_SYMBOL(rcu_nmi_enter);
/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle

View File

@ -39,6 +39,7 @@
#include <linux/tick.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched/isolation.h>
#include <linux/kprobes.h>
#define CREATE_TRACE_POINTS
@ -236,6 +237,7 @@ int notrace debug_lockdep_rcu_enabled(void)
current->lockdep_recursion == 0;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);
/**
* rcu_read_lock_held() - might we be in RCU read-side critical section?

View File

@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
perf_event_output(event, sd, regs);
return 0;
return perf_event_output(event, sd, regs);
}
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,

View File

@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include "trace.h"
@ -365,7 +366,7 @@ out:
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}
static inline void
static nokprobe_inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
{
int cpu;
@ -401,7 +402,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
atomic_dec(&data->disabled);
}
static inline void
static nokprobe_inline void
stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
{
int cpu;
@ -443,6 +444,7 @@ void start_critical_timings(void)
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
}
EXPORT_SYMBOL_GPL(start_critical_timings);
NOKPROBE_SYMBOL(start_critical_timings);
void stop_critical_timings(void)
{
@ -452,6 +454,7 @@ void stop_critical_timings(void)
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
}
EXPORT_SYMBOL_GPL(stop_critical_timings);
NOKPROBE_SYMBOL(stop_critical_timings);
#ifdef CONFIG_FUNCTION_TRACER
static bool function_enabled;
@ -611,6 +614,7 @@ void tracer_hardirqs_on(unsigned long a0, unsigned long a1)
if (!preempt_trace(pc) && irq_trace())
stop_critical_timing(a0, a1, pc);
}
NOKPROBE_SYMBOL(tracer_hardirqs_on);
void tracer_hardirqs_off(unsigned long a0, unsigned long a1)
{
@ -619,6 +623,7 @@ void tracer_hardirqs_off(unsigned long a0, unsigned long a1)
if (!preempt_trace(pc) && irq_trace())
start_critical_timing(a0, a1, pc);
}
NOKPROBE_SYMBOL(tracer_hardirqs_off);
static int irqsoff_tracer_init(struct trace_array *tr)
{

View File

@ -9,6 +9,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
@ -30,6 +31,7 @@ void trace_hardirqs_on(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
NOKPROBE_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
@ -43,6 +45,7 @@ void trace_hardirqs_off(void)
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off);
NOKPROBE_SYMBOL(trace_hardirqs_off);
__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
@ -56,6 +59,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
@ -69,6 +73,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
NOKPROBE_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_TRACE_IRQFLAGS */
#ifdef CONFIG_TRACE_PREEMPT_TOGGLE

View File

@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/bsearch.h>
#include <linux/kprobes.h>
/*
* bsearch - binary search an array of elements
@ -53,3 +54,4 @@ void *bsearch(const void *key, const void *base, size_t num, size_t size,
return NULL;
}
EXPORT_SYMBOL(bsearch);
NOKPROBE_SYMBOL(bsearch);

View File

@ -5,10 +5,11 @@
* DEBUG_PREEMPT variant of smp_processor_id().
*/
#include <linux/export.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
notrace static unsigned int check_preemption_disabled(const char *what1,
const char *what2)
notrace static nokprobe_inline
unsigned int check_preemption_disabled(const char *what1, const char *what2)
{
int this_cpu = raw_smp_processor_id();
@ -56,9 +57,11 @@ notrace unsigned int debug_smp_processor_id(void)
return check_preemption_disabled("smp_processor_id", "");
}
EXPORT_SYMBOL(debug_smp_processor_id);
NOKPROBE_SYMBOL(debug_smp_processor_id);
notrace void __this_cpu_preempt_check(const char *op)
{
check_preemption_disabled("__this_cpu_", op);
}
EXPORT_SYMBOL(__this_cpu_preempt_check);
NOKPROBE_SYMBOL(__this_cpu_preempt_check);

View File

@ -53,10 +53,6 @@ FEATURE_TESTS_BASIC := \
libslang \
libcrypto \
libunwind \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
pthread-attr-setaffinity-np \
pthread-barrier \
reallocarray \
@ -70,7 +66,6 @@ FEATURE_TESTS_BASIC := \
sched_getcpu \
sdt \
setns \
libopencsd \
libaio
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
@ -84,6 +79,11 @@ FEATURE_TESTS_EXTRA := \
libbabeltrace \
libbfd-liberty \
libbfd-liberty-z \
libopencsd \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
libunwind-debug-frame \
libunwind-debug-frame-arm \
libunwind-debug-frame-aarch64 \

View File

@ -170,14 +170,14 @@
# include "test-setns.c"
#undef main
#define main main_test_libopencsd
# include "test-libopencsd.c"
#undef main
#define main main_test_libaio
# include "test-libaio.c"
#undef main
#define main main_test_reallocarray
# include "test-reallocarray.c"
#undef main
int main(int argc, char *argv[])
{
main_test_libpython();
@ -217,8 +217,8 @@ int main(int argc, char *argv[])
main_test_sched_getcpu();
main_test_sdt();
main_test_setns();
main_test_libopencsd();
main_test_libaio();
main_test_reallocarray();
return 0;
}

View File

@ -8,3 +8,4 @@ int main(void)
free(get_current_dir_name());
return 0;
}
#undef _GNU_SOURCE

View File

@ -7,3 +7,4 @@ int main(void)
return 0;
}
#undef _GNU_SOURCE

View File

@ -6,3 +6,5 @@ int main(void)
{
return !!reallocarray(NULL, 1, 1);
}
#undef _GNU_SOURCE

View File

@ -8,3 +8,5 @@ int main(void)
{
return sched_getcpu();
}
#undef _GNU_SOURCE

View File

@ -5,3 +5,4 @@ int main(void)
{
return setns(0, 0);
}
#undef _GNU_SOURCE

View File

@ -43,13 +43,28 @@ struct rb_root {
struct rb_node *rb_node;
};
/*
* Leftmost-cached rbtrees.
*
* We do not cache the rightmost node based on footprint
* size vs number of potential users that could benefit
* from O(1) rb_last(). Just not worth it, users that want
* this feature can always implement the logic explicitly.
* Furthermore, users that want to cache both pointers may
* find it a bit asymmetric, but that's ok.
*/
struct rb_root_cached {
struct rb_root rb_root;
struct rb_node *rb_leftmost;
};
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
#define RB_ROOT (struct rb_root) { NULL, }
#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
#define RB_EMPTY_NODE(node) \
@ -68,6 +83,12 @@ extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);
extern void rb_insert_color_cached(struct rb_node *,
struct rb_root_cached *, bool);
extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
/* Same as rb_first(), but O(1) */
#define rb_first_cached(root) (root)->rb_leftmost
/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);
@ -75,6 +96,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
struct rb_root_cached *root);
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
@ -90,12 +113,29 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
____ptr ? rb_entry(____ptr, type, member) : NULL; \
})
/*
* Handy for checking that we are not deleting an entry that is
* already in a list, found in block/{blk-throttle,cfq-iosched}.c,
* probably should be moved to lib/rbtree.c...
/**
* rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
* given type allowing the backing memory of @pos to be invalidated
*
* @pos: the 'type *' to use as a loop cursor.
* @n: another 'type *' to use as temporary storage
* @root: 'rb_root *' of the rbtree.
* @field: the name of the rb_node field within 'type'.
*
* rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
* list_for_each_entry_safe() and allows the iteration to continue independent
* of changes to @pos by the body of the loop.
*
* Note, however, that it cannot handle other modifications that re-order the
* rbtree it is iterating over. This includes calling rb_erase() on @pos, as
* rb_erase() may rebalance the tree, causing us to miss some nodes.
*/
#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
typeof(*pos), field); 1; }); \
pos = n)
static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
{
rb_erase(n, root);

View File

@ -44,7 +44,9 @@ struct rb_augment_callbacks {
void (*rotate)(struct rb_node *old, struct rb_node *new);
};
extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
extern void __rb_insert_augmented(struct rb_node *node,
struct rb_root *root,
bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
/*
* Fixup the rbtree and update the augmented information when rebalancing.
@ -60,7 +62,16 @@ static inline void
rb_insert_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
__rb_insert_augmented(node, root, augment->rotate);
__rb_insert_augmented(node, root, false, NULL, augment->rotate);
}
static inline void
rb_insert_augmented_cached(struct rb_node *node,
struct rb_root_cached *root, bool newleft,
const struct rb_augment_callbacks *augment)
{
__rb_insert_augmented(node, &root->rb_root,
newleft, &root->rb_leftmost, augment->rotate);
}
#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
@ -93,7 +104,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
old->rbaugmented = rbcompute(old); \
} \
rbstatic const struct rb_augment_callbacks rbname = { \
rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
.propagate = rbname ## _propagate, \
.copy = rbname ## _copy, \
.rotate = rbname ## _rotate \
};
@ -126,11 +139,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
{
if (parent) {
if (parent->rb_left == old)
parent->rb_left = new;
WRITE_ONCE(parent->rb_left, new);
else
parent->rb_right = new;
WRITE_ONCE(parent->rb_right, new);
} else
root->rb_node = new;
WRITE_ONCE(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@ -138,12 +151,17 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
struct rb_node **leftmost,
const struct rb_augment_callbacks *augment)
{
struct rb_node *child = node->rb_right, *tmp = node->rb_left;
struct rb_node *child = node->rb_right;
struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
if (leftmost && node == *leftmost)
*leftmost = rb_next(node);
if (!tmp) {
/*
* Case 1: node to erase has no more than 1 child (easy!)
@ -170,6 +188,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
tmp = child->rb_left;
if (!tmp) {
/*
@ -183,6 +202,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
*/
parent = successor;
child2 = successor->rb_right;
augment->copy(node, successor);
} else {
/*
@ -204,19 +224,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
parent->rb_left = child2 = successor->rb_right;
successor->rb_right = child;
child2 = successor->rb_right;
WRITE_ONCE(parent->rb_left, child2);
WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
augment->copy(node, successor);
augment->propagate(parent, successor);
}
successor->rb_left = tmp = node->rb_left;
tmp = node->rb_left;
WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
@ -237,9 +261,21 @@ static __always_inline void
rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
struct rb_node *rebalance = __rb_erase_augmented(node, root,
NULL, augment);
if (rebalance)
__rb_erase_color(rebalance, root, augment->rotate);
}
#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
static __always_inline void
rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
const struct rb_augment_callbacks *augment)
{
struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root,
&root->rb_leftmost,
augment);
if (rebalance)
__rb_erase_color(rebalance, &root->rb_root, augment->rotate);
}
#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */

View File

@ -372,7 +372,9 @@ struct perf_event_attr {
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */
__reserved_1 : 33;
union {
__u32 wakeup_events; /* wakeup every n events */
@ -445,8 +447,6 @@ struct perf_event_query_bpf {
__u32 ids[0];
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
/*
* Ioctls that can be done on a perf event fd:
*/
@ -965,9 +965,58 @@ enum perf_event_type {
*/
PERF_RECORD_NAMESPACES = 16,
/*
* Record ksymbol register/unregister events:
*
* struct {
* struct perf_event_header header;
* u64 addr;
* u32 len;
* u16 ksym_type;
* u16 flags;
* char name[];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_KSYMBOL = 17,
/*
* Record bpf events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
* PERF_BPF_EVENT_PROG_UNLOAD = 2,
* };
*
* struct {
* struct perf_event_header header;
* u16 type;
* u16 flags;
* u32 id;
* u8 tag[BPF_TAG_SIZE];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_BPF_EVENT = 18,
PERF_RECORD_MAX, /* non-ABI */
};
enum perf_record_ksymbol_type {
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
};
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
PERF_BPF_EVENT_UNKNOWN = 0,
PERF_BPF_EVENT_PROG_LOAD = 1,
PERF_BPF_EVENT_PROG_UNLOAD = 2,
PERF_BPF_EVENT_MAX, /* non-ABI */
};
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8

View File

@ -22,6 +22,7 @@
*/
#include <linux/rbtree_augmented.h>
#include <linux/export.h>
/*
* red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
@ -43,6 +44,30 @@
* parentheses and have some accompanying text comment.
*/
/*
* Notes on lockless lookups:
*
* All stores to the tree structure (rb_left and rb_right) must be done using
* WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
* tree structure as seen in program order.
*
* These two requirements will allow lockless iteration of the tree -- not
* correct iteration mind you, tree rotations are not atomic so a lookup might
* miss entire subtrees.
*
* But they do guarantee that any such traversal will only see valid elements
* and that it will indeed complete -- does not get stuck in a loop.
*
* It also guarantees that if the lookup returns an element it is the 'correct'
* one. But not returning an element does _NOT_ mean it's not present.
*
* NOTE:
*
* Stores to __rb_parent_color are not important for simple lookups so those
* are left undone as of now. Nor did I check for loops involving parent
* pointers.
*/
static inline void rb_set_black(struct rb_node *rb)
{
rb->__rb_parent_color |= RB_BLACK;
@ -70,22 +95,35 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
static __always_inline void
__rb_insert(struct rb_node *node, struct rb_root *root,
bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
if (newleft)
*leftmost = node;
while (true) {
/*
* Loop invariant: node is red
*
* If there is a black parent, we are done.
* Otherwise, take some corrective action as we don't
* want a red root or two consecutive red nodes.
* Loop invariant: node is red.
*/
if (!parent) {
if (unlikely(!parent)) {
/*
* The inserted node is root. Either this is the
* first node, or we recursed at Case 1 below and
* are no longer violating 4).
*/
rb_set_parent_color(node, NULL, RB_BLACK);
break;
} else if (rb_is_black(parent))
}
/*
* If there is a black parent, we are done.
* Otherwise, take some corrective action as,
* per 4), we don't want a red root or two
* consecutive red nodes.
*/
if(rb_is_black(parent))
break;
gparent = rb_red_parent(parent);
@ -94,7 +132,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
if (parent != tmp) { /* parent == gparent->rb_left */
if (tmp && rb_is_red(tmp)) {
/*
* Case 1 - color flips
* Case 1 - node's uncle is red (color flips).
*
* G g
* / \ / \
@ -117,7 +155,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
tmp = parent->rb_right;
if (node == tmp) {
/*
* Case 2 - left rotate at parent
* Case 2 - node's uncle is black and node is
* the parent's right child (left rotate at parent).
*
* G G
* / \ / \
@ -128,8 +167,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
parent->rb_right = tmp = node->rb_left;
node->rb_left = parent;
tmp = node->rb_left;
WRITE_ONCE(parent->rb_right, tmp);
WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@ -140,7 +180,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}
/*
* Case 3 - right rotate at gparent
* Case 3 - node's uncle is black and node is
* the parent's left child (right rotate at gparent).
*
* G P
* / \ / \
@ -148,8 +189,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* / \
* n U
*/
gparent->rb_left = tmp; /* == parent->rb_right */
parent->rb_right = gparent;
WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@ -170,8 +211,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
parent->rb_left = tmp = node->rb_right;
node->rb_right = parent;
tmp = node->rb_right;
WRITE_ONCE(parent->rb_left, tmp);
WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@ -182,8 +224,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}
/* Case 3 - left rotate at gparent */
gparent->rb_right = tmp; /* == parent->rb_left */
parent->rb_left = gparent;
WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@ -223,8 +265,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* Sl Sr N Sl
*/
parent->rb_right = tmp1 = sibling->rb_left;
sibling->rb_left = parent;
tmp1 = sibling->rb_left;
WRITE_ONCE(parent->rb_right, tmp1);
WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@ -268,15 +311,31 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
*
* (p) (p)
* / \ / \
* N S --> N Sl
* N S --> N sl
* / \ \
* sl Sr s
* sl Sr S
* \
* Sr
*
* Note: p might be red, and then both
* p and sl are red after rotation(which
* breaks property 4). This is fixed in
* Case 4 (in __rb_rotate_set_parents()
* which set sl the color of p
* and set p RB_BLACK)
*
* (p) (sl)
* / \ / \
* N sl --> P S
* \ / \
* S N Sr
* \
* Sr
*/
sibling->rb_left = tmp1 = tmp2->rb_right;
tmp2->rb_right = sibling;
parent->rb_right = tmp2;
tmp1 = tmp2->rb_right;
WRITE_ONCE(sibling->rb_left, tmp1);
WRITE_ONCE(tmp2->rb_right, sibling);
WRITE_ONCE(parent->rb_right, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@ -296,8 +355,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* (sl) sr N (sl)
*/
parent->rb_right = tmp2 = sibling->rb_left;
sibling->rb_left = parent;
tmp2 = sibling->rb_left;
WRITE_ONCE(parent->rb_right, tmp2);
WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
@ -309,8 +369,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = parent->rb_left;
if (rb_is_red(sibling)) {
/* Case 1 - right rotate at parent */
parent->rb_left = tmp1 = sibling->rb_right;
sibling->rb_right = parent;
tmp1 = sibling->rb_right;
WRITE_ONCE(parent->rb_left, tmp1);
WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@ -334,10 +395,11 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
}
break;
}
/* Case 3 - right rotate at sibling */
sibling->rb_right = tmp1 = tmp2->rb_left;
tmp2->rb_left = sibling;
parent->rb_left = tmp2;
/* Case 3 - left rotate at sibling */
tmp1 = tmp2->rb_left;
WRITE_ONCE(sibling->rb_right, tmp1);
WRITE_ONCE(tmp2->rb_left, sibling);
WRITE_ONCE(parent->rb_left, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@ -345,9 +407,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
tmp1 = sibling;
sibling = tmp2;
}
/* Case 4 - left rotate at parent + color flips */
parent->rb_left = tmp2 = sibling->rb_right;
sibling->rb_right = parent;
/* Case 4 - right rotate at parent + color flips */
tmp2 = sibling->rb_right;
WRITE_ONCE(parent->rb_left, tmp2);
WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
@ -378,22 +441,41 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
static const struct rb_augment_callbacks dummy_callbacks = {
dummy_propagate, dummy_copy, dummy_rotate
.propagate = dummy_propagate,
.copy = dummy_copy,
.rotate = dummy_rotate
};
void rb_insert_color(struct rb_node *node, struct rb_root *root)
{
__rb_insert(node, root, dummy_rotate);
__rb_insert(node, root, false, NULL, dummy_rotate);
}
void rb_erase(struct rb_node *node, struct rb_root *root)
{
struct rb_node *rebalance;
rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
rebalance = __rb_erase_augmented(node, root,
NULL, &dummy_callbacks);
if (rebalance)
____rb_erase_color(rebalance, root, dummy_rotate);
}
void rb_insert_color_cached(struct rb_node *node,
struct rb_root_cached *root, bool leftmost)
{
__rb_insert(node, &root->rb_root, leftmost,
&root->rb_leftmost, dummy_rotate);
}
void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
{
struct rb_node *rebalance;
rebalance = __rb_erase_augmented(node, &root->rb_root,
&root->rb_leftmost, &dummy_callbacks);
if (rebalance)
____rb_erase_color(rebalance, &root->rb_root, dummy_rotate);
}
/*
* Augmented rbtree manipulation functions.
*
@ -402,9 +484,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
*/
void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
bool newleft, struct rb_node **leftmost,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
__rb_insert(node, root, augment_rotate);
__rb_insert(node, root, newleft, leftmost, augment_rotate);
}
/*
@ -498,15 +581,24 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
{
struct rb_node *parent = rb_parent(victim);
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
/* Set the surrounding nodes to point to the replacement */
__rb_change_child(victim, new, parent, root);
if (victim->rb_left)
rb_set_parent(victim->rb_left, new);
if (victim->rb_right)
rb_set_parent(victim->rb_right, new);
__rb_change_child(victim, new, parent, root);
}
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
struct rb_root_cached *root)
{
rb_replace_node(victim, new, &root->rb_root);
if (root->rb_leftmost == victim)
root->rb_leftmost = new;
}
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)

View File

@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_
CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
libperf-y += util/
libperf-y += arch/
libperf-y += ui/
libperf-y += scripts/
libperf-$(CONFIG_TRACE) += trace/beauty/
perf-y += util/
perf-y += arch/
perf-y += ui/
perf-y += scripts/
perf-$(CONFIG_TRACE) += trace/beauty/
gtk-y += ui/gtk/

View File

@ -120,6 +120,10 @@ Given a $HOME/.perfconfig like this:
children = true
group = true
[llvm]
dump-obj = true
clang-opt = -g
You can hide source code of annotate feature setting the config to false with
% perf config annotate.hide_src_code=true
@ -553,6 +557,33 @@ trace.*::
trace.show_zeros::
Do not suppress syscall arguments that are equal to zero.
llvm.*::
llvm.clang-path::
Path to clang. If omit, search it from $PATH.
llvm.clang-bpf-cmd-template::
Cmdline template. Below lines show its default value. Environment
variable is used to pass options.
"$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \
-Wno-unused-value -Wno-pointer-sign -working-directory \
$WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -"
llvm.clang-opt::
Options passed to clang.
llvm.kbuild-dir::
kbuild directory. If not set, use /lib/modules/`uname -r`/build.
If set to "" deliberately, skip kernel header auto-detector.
llvm.kbuild-opts::
Options passed to 'make' when detecting kernel header options.
llvm.dump-obj::
Enable perf dump BPF object files compiled by LLVM.
llvm.opts::
Options passed to llc.
SEE ALSO
--------
linkperf:perf[1]

View File

@ -88,6 +88,20 @@ OPTIONS
If you want to profile write accesses in [0x1000~1008), just set
'mem:0x1000/8:w'.
- a BPF source file (ending in .c) or a precompiled object file (ending
in .o) selects one or more BPF events.
The BPF program can attach to various perf events based on the ELF section
names.
When processing a '.c' file, perf searches an installed LLVM to compile it
into an object file first. Optional clang options can be passed via the
'--clang-opt' command line option, e.g.:
perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \
-e tests/bpf-script-example.c
Note: '--clang-opt' must be placed before '--event/-e'.
- a group of events surrounded by a pair of brace ("{event1,event2,...}").
Each event is separated by commas and the group should be quoted to
prevent the shell interpretation. You also need to use --group on
@ -440,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default:
Asynchronous mode is supported only when linking Perf tool with libc library
providing implementation for Posix AIO API.
--affinity=mode::
Set affinity mask of trace reading thread according to the policy defined by 'mode' value:
node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
cpu - thread affinity mask is set to cpu of the processed mmap buffer
--all-kernel::
Configure all used events to run in kernel space.

View File

@ -159,6 +159,12 @@ OPTIONS
the override, and the result of the above is that only S/W and H/W
events are displayed with the given fields.
It's possible tp add/remove fields only for specific event type:
-Fsw:-cpu,-period
removes cpu and period from software events.
For the 'wildcard' option if a user selected field is invalid for an
event type, a message is displayed to the user that the option is
ignored for that type. For example:

View File

@ -210,6 +210,14 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
may happen, for instance, when a thread gets migrated to a different CPU
while processing a syscall.
--map-dump::
Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls
living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this
dumps just boolean map values and integer keys, in time this will print in hex
by default and use BTF when available, as well as use functions to do pretty
printing using the existing 'perf trace' syscall arg beautifiers to map integer
arguments to strings (pid to comm, syscall id to syscall name, etc).
PAGEFAULTS
----------

View File

@ -43,11 +43,10 @@ struct perf_file_section {
Flags section:
The header is followed by different optional headers, described by the bits set
in flags. Only headers for which the bit is set are included. Each header
consists of a perf_file_section located after the initial header.
The respective perf_file_section points to the data of the additional
header and defines its size.
For each of the optional features a perf_file_section it placed after the data
section if the feature bit is set in the perf_header flags bitset. The
respective perf_file_section points to the data of the additional header and
defines its size.
Some headers consist of strings, which are defined like this:
@ -131,7 +130,7 @@ An uint64_t with the total memory in bytes.
HEADER_CMDLINE = 11,
A perf_header_string with the perf command line used to collect the data.
A perf_header_string_list with the perf arg-vector used to collect the data.
HEADER_EVENT_DESC = 12,

View File

@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm
FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64
FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86
FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64
FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
CFLAGS += -fno-omit-frame-pointer
CFLAGS += -ggdb3
CFLAGS += -funwind-tables
@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1)
$(call detected,CONFIG_SETNS)
endif
ifndef NO_CORESIGHT
ifdef CORESIGHT
$(call feature_check,libopencsd)
ifeq ($(feature-libopencsd), 1)
CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
LDFLAGS += $(LIBOPENCSD_LDFLAGS)
@ -482,6 +492,7 @@ endif
ifndef NO_LIBUNWIND
have_libunwind :=
$(call feature_check,libunwind-x86)
ifeq ($(feature-libunwind-x86), 1)
$(call detected,CONFIG_LIBUNWIND_X86)
CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND
have_libunwind = 1
endif
$(call feature_check,libunwind-aarch64)
ifeq ($(feature-libunwind-aarch64), 1)
$(call detected,CONFIG_LIBUNWIND_AARCH64)
CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT

View File

@ -102,7 +102,7 @@ include ../scripts/utilities.mak
# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
# llvm-config is not in $PATH.
#
# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding.
# Define CORESIGHT if you DO WANT support for CoreSight trace decoding.
#
# Define NO_AIO if you do not want support of Posix AIO based trace
# streaming for record mode. Currently Posix AIO trace streaming is
@ -344,9 +344,9 @@ endif
export PERL_PATH
LIB_FILE=$(OUTPUT)libperf.a
LIBPERF_A=$(OUTPUT)libperf.a
PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
ifndef NO_LIBBPF
PERFLIBS += $(LIBBPF)
endif
@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
LIBPERF_IN := $(OUTPUT)libperf-in.o
export JEVENTS
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN)
$(PMU_EVENTS_IN): $(JEVENTS) FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@
$(GTK_IN): FORCE
$(Q)$(MAKE) $(build)=gtk
@ -683,12 +688,7 @@ endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
$(LIBPERF_A): $(LIBPERF_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
@ -863,8 +863,8 @@ ifndef NO_LIBPYTHON
$(call QUIET_INSTALL, python-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/python/*.py -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
endif
$(call QUIET_INSTALL, perf_completion-script) \
@ -910,7 +910,7 @@ python-clean:
$(python-clean)
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so

View File

@ -1,2 +1,2 @@
libperf-y += common.o
libperf-y += $(SRCARCH)/
perf-y += common.o
perf-y += $(SRCARCH)/

View File

@ -1,2 +1,2 @@
libperf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/
perf-y += util/
perf-$(CONFIG_DWARF_UNWIND) += tests/

View File

@ -1,5 +1,5 @@
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
libperf-y += vectors-page.o
perf-y += regs_load.o
perf-y += dwarf-unwind.o
perf-y += vectors-page.o
libperf-y += arch-tests.o
perf-y += arch-tests.o

View File

@ -3,6 +3,7 @@
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "map_groups.h"
#include "event.h"
#include "debug.h"
#include "tests/tests.h"

View File

@ -1,6 +1,6 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o

View File

@ -5,6 +5,7 @@
*/
#include <api/fs/fs.h>
#include <linux/bits.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/coresight-pmu.h>
@ -22,12 +23,10 @@
#include "../../util/thread_map.h"
#include "../../util/cs-etm.h"
#include <errno.h>
#include <stdlib.h>
#include <sys/stat.h>
#define ENABLE_SINK_MAX 128
#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
struct cs_etm_recording {
struct auxtrace_record itr;
struct perf_pmu *cs_etm_pmu;
@ -60,10 +59,48 @@ static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
return 0;
}
static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
struct perf_evsel *evsel)
{
char msg[BUFSIZ], path[PATH_MAX], *sink;
struct perf_evsel_config_term *term;
int ret = -EINVAL;
u32 hash;
if (evsel->attr.config2 & GENMASK(31, 0))
return 0;
list_for_each_entry(term, &evsel->config_terms, list) {
if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
continue;
sink = term->val.drv_cfg;
snprintf(path, PATH_MAX, "sinks/%s", sink);
ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
if (ret != 1) {
pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n",
sink, perf_evsel__name(evsel), errno,
str_error_r(errno, msg, sizeof(msg)));
return ret;
}
evsel->attr.config2 |= hash;
return 0;
}
/*
* No sink was provided on the command line - for _now_ treat
* this as an error.
*/
return ret;
}
static int cs_etm_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
int ret;
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
@ -92,6 +129,10 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
if (!cs_etm_evsel)
return 0;
ret = cs_etm_set_sink_attr(cs_etm_pmu, cs_etm_evsel);
if (ret)
return ret;
if (opts->use_clockid) {
pr_err("Cannot use clockid (-k option) with %s\n",
CORESIGHT_ETM_PMU_NAME);
@ -598,54 +639,3 @@ struct auxtrace_record *cs_etm_record_init(int *err)
out:
return NULL;
}
static FILE *cs_device__open_file(const char *name)
{
struct stat st;
char path[PATH_MAX];
const char *sysfs;
sysfs = sysfs__mountpoint();
if (!sysfs)
return NULL;
snprintf(path, PATH_MAX,
"%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
if (stat(path, &st) < 0)
return NULL;
return fopen(path, "w");
}
static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
{
va_list args;
FILE *file;
int ret = -EINVAL;
va_start(args, fmt);
file = cs_device__open_file(name);
if (file) {
ret = vfprintf(file, fmt, args);
fclose(file);
}
va_end(args);
return ret;
}
int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
{
int ret;
char enable_sink[ENABLE_SINK_MAX];
snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
term->val.drv_cfg, "enable_sink");
ret = cs_device__print_file(enable_sink, "%d", 1);
if (ret < 0)
return ret;
return 0;
}

View File

@ -7,9 +7,6 @@
#ifndef INCLUDE__PERF_CS_ETM_H__
#define INCLUDE__PERF_CS_ETM_H__
#include "../../util/evsel.h"
struct auxtrace_record *cs_etm_record_init(int *err);
int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
#endif

View File

@ -7,8 +7,8 @@
#include <string.h>
#include <linux/coresight-pmu.h>
#include <linux/perf_event.h>
#include <linux/string.h>
#include "cs-etm.h"
#include "arm-spe.h"
#include "../../util/pmu.h"
@ -19,7 +19,6 @@ struct perf_event_attr
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
pmu->selectable = true;
pmu->set_drv_config = cs_etm_set_drv_config;
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
return arm_spe_pmu_default_config(pmu);

View File

@ -1,2 +1,2 @@
libperf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/
perf-y += util/
perf-$(CONFIG_DWARF_UNWIND) += tests/

View File

@ -1,4 +1,4 @@
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
perf-y += regs_load.o
perf-y += dwarf-unwind.o
libperf-y += arch-tests.o
perf-y += arch-tests.o

View File

@ -3,6 +3,7 @@
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "map_groups.h"
#include "event.h"
#include "debug.h"
#include "tests/tests.h"

View File

@ -1,10 +1,10 @@
libperf-y += header.o
libperf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-y += header.o
perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o \
arm-spe.o

View File

@ -1 +1 @@
libperf-y += util/
perf-y += util/

Some files were not shown because too many files have changed in this diff Show More