Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main changes in this cycle on the kernel side:

   - rework kprobes blacklist handling (Masami Hiramatsu)

   - misc cleanups

  on the tooling side these areas were the main focus:

   - 'perf trace'     enhancements (Arnaldo Carvalho de Melo)

   - 'perf bench'     enhancements (Davidlohr Bueso)

   - 'perf record'    enhancements (Alexey Budankov)

   - 'perf annotate'  enhancements (Jin Yao)

   - 'perf top'       enhancements (Jiri Olsa)

   - Intel hw tracing enhancements (Adrian Hunter)

   - ARM hw tracing   enhancements (Leo Yan, Mathieu Poirier)

   - ... plus lots of other enhancements, cleanups and fixes"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (171 commits)
  tools uapi asm: Update asm-generic/unistd.h copy
  perf symbols: Relax checks on perf-PID.map ownership
  perf trace: Wire up the fadvise 'advice' table generator
  perf beauty: Add generator for fadvise64's 'advice' arg constants
  tools headers uapi: Grab a copy of fadvise.h
  perf beauty mmap: Print mmap's 'offset' arg in hexadecimal
  perf beauty mmap: Print PROT_READ before PROT_EXEC to match strace output
  perf trace beauty: Beautify arch_prctl()'s arguments
  perf trace: When showing string prefixes show prefix + ??? for unknown entries
  perf trace: Move strarrays to beauty.h for further reuse
  perf beauty: Wire up the x86_arch prctl code table generator
  perf beauty: Add a string table generator for x86's 'arch_prctl' codes
  tools include arch: Grab a copy of x86's prctl.h
  perf trace: Show NULL when syscall pointer args are 0
  perf trace: Enclose the errno strings with ()
  perf augmented_raw_syscalls: Copy 'access' arg as well
  perf trace: Add alignment spaces after the closing parens
  perf trace beauty: Print O_RDONLY when (flags & O_ACCMODE) == 0
  perf trace: Allow asking for not suppressing common string prefixes
  perf trace: Add a prefix member to the strarray class
  ...
This commit is contained in:
Linus Torvalds 2018-12-26 14:45:18 -08:00
commit 116b081c28
204 changed files with 35660 additions and 952 deletions

View File

@ -1026,12 +1026,10 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
}
NOKPROBE_SYMBOL(kprobe_fault_handler);
bool arch_within_kprobe_blacklist(unsigned long addr)
int __init arch_populate_kprobe_blacklist(void)
{
return (addr >= (unsigned long)__kprobes_text_start &&
addr < (unsigned long)__kprobes_text_end) ||
(addr >= (unsigned long)__entry_text_start &&
addr < (unsigned long)__entry_text_end);
return kprobe_add_area_blacklist((unsigned long)__entry_text_start,
(unsigned long)__entry_text_end);
}
int __init arch_init_kprobes(void)

View File

@ -242,10 +242,13 @@ extern int arch_init_kprobes(void);
extern void show_registers(struct pt_regs *regs);
extern void kprobes_inc_nmissed_count(struct kprobe *p);
extern bool arch_within_kprobe_blacklist(unsigned long addr);
extern int arch_populate_kprobe_blacklist(void);
extern bool arch_kprobe_on_func_entry(unsigned long offset);
extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
extern bool within_kprobe_blacklist(unsigned long addr);
extern int kprobe_add_ksym_blacklist(unsigned long entry);
extern int kprobe_add_area_blacklist(unsigned long start, unsigned long end);
struct kprobe_insn_cache {
struct mutex mutex;

View File

@ -262,8 +262,8 @@ struct pmu {
*/
int capabilities;
int * __percpu pmu_disable_count;
struct perf_cpu_context * __percpu pmu_cpu_context;
int __percpu *pmu_disable_count;
struct perf_cpu_context __percpu *pmu_cpu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;

View File

@ -5541,7 +5541,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
static const struct vm_operations_struct perf_mmap_vmops = {
.open = perf_mmap_open,
.close = perf_mmap_close, /* non mergable */
.close = perf_mmap_close, /* non mergeable */
.fault = perf_mmap_fault,
.page_mkwrite = perf_mmap_fault,
};

View File

@ -238,7 +238,7 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
}
/*
* Contraints to check before allowing this new breakpoint counter:
* Constraints to check before allowing this new breakpoint counter:
*
* == Non-pinned counter == (Considered as pinned for now)
*

View File

@ -2093,6 +2093,47 @@ void dump_kprobe(struct kprobe *kp)
}
NOKPROBE_SYMBOL(dump_kprobe);
int kprobe_add_ksym_blacklist(unsigned long entry)
{
struct kprobe_blacklist_entry *ent;
unsigned long offset = 0, size = 0;
if (!kernel_text_address(entry) ||
!kallsyms_lookup_size_offset(entry, &size, &offset))
return -EINVAL;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
ent->start_addr = entry;
ent->end_addr = entry + size;
INIT_LIST_HEAD(&ent->list);
list_add_tail(&ent->list, &kprobe_blacklist);
return (int)size;
}
/* Add all symbols in given area into kprobe blacklist */
int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
{
unsigned long entry;
int ret = 0;
for (entry = start; entry < end; entry += ret) {
ret = kprobe_add_ksym_blacklist(entry);
if (ret < 0)
return ret;
if (ret == 0) /* In case of alias symbol */
ret = 1;
}
return 0;
}
int __init __weak arch_populate_kprobe_blacklist(void)
{
return 0;
}
/*
* Lookup and populate the kprobe_blacklist.
*
@ -2104,26 +2145,24 @@ NOKPROBE_SYMBOL(dump_kprobe);
static int __init populate_kprobe_blacklist(unsigned long *start,
unsigned long *end)
{
unsigned long entry;
unsigned long *iter;
struct kprobe_blacklist_entry *ent;
unsigned long entry, offset = 0, size = 0;
int ret;
for (iter = start; iter < end; iter++) {
entry = arch_deref_entry_point((void *)*iter);
if (!kernel_text_address(entry) ||
!kallsyms_lookup_size_offset(entry, &size, &offset))
ret = kprobe_add_ksym_blacklist(entry);
if (ret == -EINVAL)
continue;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
ent->start_addr = entry;
ent->end_addr = entry + size;
INIT_LIST_HEAD(&ent->list);
list_add_tail(&ent->list, &kprobe_blacklist);
if (ret < 0)
return ret;
}
return 0;
/* Symbols in __kprobes_text are blacklisted */
ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
(unsigned long)__kprobes_text_end);
return ret ? : arch_populate_kprobe_blacklist();
}
/* Module notifier call back, checking kprobes on the module */

View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
#define ARCH_GET_CPUID 0x1011
#define ARCH_SET_CPUID 0x1012
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */

View File

@ -31,6 +31,7 @@ FEATURE_TESTS_BASIC := \
backtrace \
dwarf \
dwarf_getlocations \
eventfd \
fortify-source \
sync-compare-and-swap \
get_current_dir_name \
@ -69,7 +70,8 @@ FEATURE_TESTS_BASIC := \
sched_getcpu \
sdt \
setns \
libopencsd
libopencsd \
libaio
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
@ -115,7 +117,8 @@ FEATURE_DISPLAY ?= \
zlib \
lzma \
get_cpuid \
bpf
bpf \
libaio
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not

View File

@ -5,6 +5,7 @@ FILES= \
test-bionic.bin \
test-dwarf.bin \
test-dwarf_getlocations.bin \
test-eventfd.bin \
test-fortify-source.bin \
test-sync-compare-and-swap.bin \
test-get_current_dir_name.bin \
@ -54,12 +55,14 @@ FILES= \
test-sdt.bin \
test-cxx.bin \
test-jvmti.bin \
test-jvmti-cmlr.bin \
test-sched_getcpu.bin \
test-setns.bin \
test-libopencsd.bin \
test-clang.bin \
test-llvm.bin \
test-llvm-version.bin
test-llvm-version.bin \
test-libaio.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@ -102,6 +105,9 @@ $(OUTPUT)test-bionic.bin:
$(OUTPUT)test-libelf.bin:
$(BUILD) -lelf
$(OUTPUT)test-eventfd.bin:
$(BUILD)
$(OUTPUT)test-get_current_dir_name.bin:
$(BUILD)
@ -263,6 +269,9 @@ $(OUTPUT)test-cxx.bin:
$(OUTPUT)test-jvmti.bin:
$(BUILD)
$(OUTPUT)test-jvmti-cmlr.bin:
$(BUILD)
$(OUTPUT)test-llvm.bin:
$(BUILDXX) -std=gnu++11 \
-I$(shell $(LLVM_CONFIG) --includedir) \
@ -289,6 +298,9 @@ $(OUTPUT)test-clang.bin:
-include $(OUTPUT)*.d
$(OUTPUT)test-libaio.bin:
$(BUILD) -lrt
###############################
clean:

View File

@ -50,6 +50,10 @@
# include "test-dwarf_getlocations.c"
#undef main
#define main main_test_eventfd
# include "test-eventfd.c"
#undef main
#define main main_test_libelf_getphdrnum
# include "test-libelf-getphdrnum.c"
#undef main
@ -170,6 +174,10 @@
# include "test-libopencsd.c"
#undef main
#define main main_test_libaio
# include "test-libaio.c"
#undef main
int main(int argc, char *argv[])
{
main_test_libpython();
@ -182,6 +190,7 @@ int main(int argc, char *argv[])
main_test_glibc();
main_test_dwarf();
main_test_dwarf_getlocations();
main_test_eventfd();
main_test_libelf_getphdrnum();
main_test_libelf_gelf_getnote();
main_test_libelf_getshdrstrndx();
@ -209,6 +218,7 @@ int main(int argc, char *argv[])
main_test_sdt();
main_test_setns();
main_test_libopencsd();
main_test_libaio();
return 0;
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
#include <sys/eventfd.h>
int main(void)
{
return eventfd(0, EFD_NONBLOCK);
}

View File

@ -0,0 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <jvmti.h>
#include <jvmticmlr.h>
int main(void)
{
jvmtiCompiledMethodLoadInlineRecord rec __attribute__((unused));
jvmtiCompiledMethodLoadRecordHeader hdr __attribute__((unused));
PCStackInfo p __attribute__((unused));
return 0;
}

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <jvmti.h>
#include <jvmticmlr.h>
int main(void)
{

View File

@ -0,0 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
#include <aio.h>
int main(void)
{
struct aiocb aiocb;
aiocb.aio_fildes = 0;
aiocb.aio_offset = 0;
aiocb.aio_buf = 0;
aiocb.aio_nbytes = 0;
aiocb.aio_reqprio = 0;
aiocb.aio_sigevent.sigev_notify = 1 /*SIGEV_NONE*/;
return (int)aio_return(&aiocb);
}

View File

@ -1,6 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <opencsd/c_api/opencsd_c_api.h>
/*
* Check OpenCSD library version is sufficient to provide required features
*/
#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
#error "OpenCSD >= 0.10.0 is required"
#endif
int main(void)
{
(void)ocsd_get_version();

View File

@ -59,4 +59,17 @@ static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
else
return 0;
}
/**
* ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
* @ptr: The pointer to cast.
*
* Explicitly cast an error-valued pointer to another pointer type in such a
* way as to make it clear that's what's going on.
*/
static inline void * __must_check ERR_CAST(__force const void *ptr)
{
/* cast away the const */
return (void *) ptr;
}
#endif /* _LINUX_ERR_H */

View File

@ -760,8 +760,10 @@ __SYSCALL(__NR_rseq, sys_rseq)
#define __NR_ftruncate __NR3264_ftruncate
#define __NR_lseek __NR3264_lseek
#define __NR_sendfile __NR3264_sendfile
#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
#define __NR_newfstatat __NR3264_fstatat
#define __NR_fstat __NR3264_fstat
#endif
#define __NR_mmap __NR3264_mmap
#define __NR_fadvise64 __NR3264_fadvise64
#ifdef __NR3264_stat
@ -776,8 +778,10 @@ __SYSCALL(__NR_rseq, sys_rseq)
#define __NR_ftruncate64 __NR3264_ftruncate
#define __NR_llseek __NR3264_lseek
#define __NR_sendfile64 __NR3264_sendfile
#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
#define __NR_fstatat64 __NR3264_fstatat
#define __NR_fstat64 __NR3264_fstat
#endif
#define __NR_mmap2 __NR3264_mmap
#define __NR_fadvise64_64 __NR3264_fadvise64
#ifdef __NR3264_stat

View File

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef FADVISE_H_INCLUDED
#define FADVISE_H_INCLUDED
#define POSIX_FADV_NORMAL 0 /* No further special treatment. */
#define POSIX_FADV_RANDOM 1 /* Expect random page references. */
#define POSIX_FADV_SEQUENTIAL 2 /* Expect sequential page references. */
#define POSIX_FADV_WILLNEED 3 /* Will need these pages. */
/*
* The advise values for POSIX_FADV_DONTNEED and POSIX_ADV_NOREUSE
* for s390-64 differ from the values for the rest of the world.
*/
#if defined(__s390x__)
#define POSIX_FADV_DONTNEED 6 /* Don't need these pages. */
#define POSIX_FADV_NOREUSE 7 /* Data will be accessed once. */
#else
#define POSIX_FADV_DONTNEED 4 /* Don't need these pages. */
#define POSIX_FADV_NOREUSE 5 /* Data will be accessed once. */
#endif
#endif /* FADVISE_H_INCLUDED */

View File

@ -36,8 +36,6 @@ endif
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
CFLAGS += -I$(srctree)/tools/include/
CFLAGS += -I$(srctree)/include/uapi
CFLAGS += -I$(srctree)/include
SUBCMD_IN := $(OUTPUT)libsubcmd-in.o

View File

@ -71,7 +71,7 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
*
* `argh`::
* token to explain the kind of argument this option wants. Keep it
* homogenous across the repository.
* homogeneous across the repository.
*
* `help`::
* the short help associated to what the option does.
@ -80,7 +80,7 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
*
* `flags`::
* mask of parse_opt_option_flags.
* PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
* PARSE_OPT_OPTARG: says that the argument is optional (not for BOOLEANs)
* PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
* PARSE_OPT_NONEG: says that this option cannot be negated
* PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in

View File

@ -25,6 +25,7 @@ endef
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)
$(call allow-override,NM,$(CROSS_COMPILE)nm)
$(call allow-override,PKG_CONFIG,pkg-config)
EXT = -std=gnu99
INSTALL = install
@ -47,6 +48,8 @@ prefix ?= /usr/local
libdir = $(prefix)/$(libdir_relative)
man_dir = $(prefix)/share/man
man_dir_SQ = '$(subst ','\'',$(man_dir))'
pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \
--variable pc_path pkg-config | tr ":" " "))
export man_dir man_dir_SQ INSTALL
export DESTDIR DESTDIR_SQ
@ -174,7 +177,7 @@ $(TE_IN): force
$(Q)$(MAKE) $(build)=libtraceevent
$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
$(QUIET_LINK)$(CC) --shared $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@
$(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@
@ln -sf $(@F) $(OUTPUT)libtraceevent.so
@ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION)
@ -193,7 +196,7 @@ $(PLUGINS_IN): force
$(Q)$(MAKE) $(build)=$(plugin_obj)
$(OUTPUT)%.so: $(OUTPUT)%-in.o
$(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^
$(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
define make_version.h
(echo '/* This file is automatically generated. Do not modify. */'; \
@ -270,7 +273,19 @@ define do_generate_dynamic_list_file
fi
endef
install_lib: all_cmd install_plugins
PKG_CONFIG_FILE = libtraceevent.pc
define do_install_pkgconfig_file
if [ -n "${pkgconfig_dir}" ]; then \
cp -f ${PKG_CONFIG_FILE}.template ${PKG_CONFIG_FILE}; \
sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
$(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \
else \
(echo Failed to locate pkg-config directory) 1>&2; \
fi
endef
install_lib: all_cmd install_plugins install_headers install_pkgconfig
$(call QUIET_INSTALL, $(LIB_TARGET)) \
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
@ -279,18 +294,24 @@ install_plugins: $(PLUGINS)
$(call QUIET_INSTALL, trace_plugins) \
$(call do_install_plugins, $(PLUGINS))
install_pkgconfig:
$(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \
$(call do_install_pkgconfig_file,$(prefix))
install_headers:
$(call QUIET_INSTALL, headers) \
$(call do_install,event-parse.h,$(prefix)/include/traceevent,644); \
$(call do_install,event-utils.h,$(prefix)/include/traceevent,644); \
$(call do_install,trace-seq.h,$(prefix)/include/traceevent,644); \
$(call do_install,kbuffer.h,$(prefix)/include/traceevent,644)
install: install_lib
clean:
$(call QUIET_CLEAN, libtraceevent) \
$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \
$(RM) TRACEEVENT-CFLAGS tags TAGS
$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
$(RM) TRACEEVENT-CFLAGS tags TAGS; \
$(RM) $(PKG_CONFIG_FILE)
PHONY += force plugins
force:

View File

@ -15,7 +15,7 @@
* This returns pointer to the first element of the events array
* If @tep is NULL, NULL is returned.
*/
struct tep_event_format *tep_get_first_event(struct tep_handle *tep)
struct tep_event *tep_get_first_event(struct tep_handle *tep)
{
if (tep && tep->events)
return tep->events[0];
@ -51,7 +51,7 @@ void tep_set_flag(struct tep_handle *tep, int flag)
tep->flags |= flag;
}
unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data)
unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data)
{
unsigned short swap;
@ -64,7 +64,7 @@ unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data)
return swap;
}
unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data)
unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data)
{
unsigned int swap;
@ -80,7 +80,7 @@ unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data)
}
unsigned long long
__tep_data2host8(struct tep_handle *pevent, unsigned long long data)
tep_data2host8(struct tep_handle *pevent, unsigned long long data)
{
unsigned long long swap;

View File

@ -50,9 +50,9 @@ struct tep_handle {
unsigned int printk_count;
struct tep_event_format **events;
struct tep_event **events;
int nr_events;
struct tep_event_format **sort_events;
struct tep_event **sort_events;
enum tep_event_sort_type last_type;
int type_offset;
@ -84,9 +84,16 @@ struct tep_handle {
struct tep_function_handler *func_handlers;
/* cache */
struct tep_event_format *last_event;
struct tep_event *last_event;
char *trace_clock;
};
void tep_free_event(struct tep_event *event);
void tep_free_format_field(struct tep_format_field *field);
unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data);
unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data);
unsigned long long tep_data2host8(struct tep_handle *pevent, unsigned long long data);
#endif /* _PARSE_EVENTS_INT_H */

View File

@ -96,7 +96,7 @@ struct tep_function_handler {
static unsigned long long
process_defined_func(struct trace_seq *s, void *data, int size,
struct tep_event_format *event, struct tep_print_arg *arg);
struct tep_event *event, struct tep_print_arg *arg);
static void free_func_handle(struct tep_function_handler *func);
@ -739,16 +739,16 @@ void tep_print_printk(struct tep_handle *pevent)
}
}
static struct tep_event_format *alloc_event(void)
static struct tep_event *alloc_event(void)
{
return calloc(1, sizeof(struct tep_event_format));
return calloc(1, sizeof(struct tep_event));
}
static int add_event(struct tep_handle *pevent, struct tep_event_format *event)
static int add_event(struct tep_handle *pevent, struct tep_event *event)
{
int i;
struct tep_event_format **events = realloc(pevent->events, sizeof(event) *
(pevent->nr_events + 1));
struct tep_event **events = realloc(pevent->events, sizeof(event) *
(pevent->nr_events + 1));
if (!events)
return -1;
@ -1145,7 +1145,7 @@ static enum tep_event_type read_token(char **tok)
}
/**
* tep_read_token - access to utilites to use the pevent parser
* tep_read_token - access to utilities to use the pevent parser
* @tok: The token to return
*
* This will parse tokens from the string given by
@ -1355,7 +1355,7 @@ static unsigned int type_size(const char *name)
return 0;
}
static int event_read_fields(struct tep_event_format *event, struct tep_format_field **fields)
static int event_read_fields(struct tep_event *event, struct tep_format_field **fields)
{
struct tep_format_field *field = NULL;
enum tep_event_type type;
@ -1642,7 +1642,7 @@ static int event_read_fields(struct tep_event_format *event, struct tep_format_f
return -1;
}
static int event_read_format(struct tep_event_format *event)
static int event_read_format(struct tep_event *event)
{
char *token;
int ret;
@ -1675,11 +1675,11 @@ static int event_read_format(struct tep_event_format *event)
}
static enum tep_event_type
process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
char **tok, enum tep_event_type type);
static enum tep_event_type
process_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
enum tep_event_type type;
char *token;
@ -1691,14 +1691,14 @@ process_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **to
}
static enum tep_event_type
process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok);
process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok);
/*
* For __print_symbolic() and __print_flags, we need to completely
* evaluate the first argument, which defines what to print next.
*/
static enum tep_event_type
process_field_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
enum tep_event_type type;
@ -1712,7 +1712,7 @@ process_field_arg(struct tep_event_format *event, struct tep_print_arg *arg, cha
}
static enum tep_event_type
process_cond(struct tep_event_format *event, struct tep_print_arg *top, char **tok)
process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok)
{
struct tep_print_arg *arg, *left, *right;
enum tep_event_type type;
@ -1768,7 +1768,7 @@ process_cond(struct tep_event_format *event, struct tep_print_arg *top, char **t
}
static enum tep_event_type
process_array(struct tep_event_format *event, struct tep_print_arg *top, char **tok)
process_array(struct tep_event *event, struct tep_print_arg *top, char **tok)
{
struct tep_print_arg *arg;
enum tep_event_type type;
@ -1870,7 +1870,7 @@ static int set_op_prio(struct tep_print_arg *arg)
/* Note, *tok does not get freed, but will most likely be saved */
static enum tep_event_type
process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
struct tep_print_arg *left, *right = NULL;
enum tep_event_type type;
@ -2071,7 +2071,7 @@ process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok
}
static enum tep_event_type
process_entry(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
char **tok)
{
enum tep_event_type type;
@ -2110,7 +2110,7 @@ process_entry(struct tep_event_format *event __maybe_unused, struct tep_print_ar
return TEP_EVENT_ERROR;
}
static int alloc_and_process_delim(struct tep_event_format *event, char *next_token,
static int alloc_and_process_delim(struct tep_event *event, char *next_token,
struct tep_print_arg **print_arg)
{
struct tep_print_arg *field;
@ -2445,7 +2445,7 @@ static char *arg_eval (struct tep_print_arg *arg)
}
static enum tep_event_type
process_fields(struct tep_event_format *event, struct tep_print_flag_sym **list, char **tok)
process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok)
{
enum tep_event_type type;
struct tep_print_arg *arg = NULL;
@ -2526,7 +2526,7 @@ process_fields(struct tep_event_format *event, struct tep_print_flag_sym **list,
}
static enum tep_event_type
process_flags(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
struct tep_print_arg *field;
enum tep_event_type type;
@ -2579,7 +2579,7 @@ process_flags(struct tep_event_format *event, struct tep_print_arg *arg, char **
}
static enum tep_event_type
process_symbols(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
struct tep_print_arg *field;
enum tep_event_type type;
@ -2618,7 +2618,7 @@ process_symbols(struct tep_event_format *event, struct tep_print_arg *arg, char
}
static enum tep_event_type
process_hex_common(struct tep_event_format *event, struct tep_print_arg *arg,
process_hex_common(struct tep_event *event, struct tep_print_arg *arg,
char **tok, enum tep_print_arg_type type)
{
memset(arg, 0, sizeof(*arg));
@ -2641,20 +2641,20 @@ process_hex_common(struct tep_event_format *event, struct tep_print_arg *arg,
}
static enum tep_event_type
process_hex(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
return process_hex_common(event, arg, tok, TEP_PRINT_HEX);
}
static enum tep_event_type
process_hex_str(struct tep_event_format *event, struct tep_print_arg *arg,
process_hex_str(struct tep_event *event, struct tep_print_arg *arg,
char **tok)
{
return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR);
}
static enum tep_event_type
process_int_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
memset(arg, 0, sizeof(*arg));
arg->type = TEP_PRINT_INT_ARRAY;
@ -2682,7 +2682,7 @@ process_int_array(struct tep_event_format *event, struct tep_print_arg *arg, cha
}
static enum tep_event_type
process_dynamic_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
struct tep_format_field *field;
enum tep_event_type type;
@ -2746,7 +2746,7 @@ process_dynamic_array(struct tep_event_format *event, struct tep_print_arg *arg,
}
static enum tep_event_type
process_dynamic_array_len(struct tep_event_format *event, struct tep_print_arg *arg,
process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg,
char **tok)
{
struct tep_format_field *field;
@ -2782,7 +2782,7 @@ process_dynamic_array_len(struct tep_event_format *event, struct tep_print_arg *
}
static enum tep_event_type
process_paren(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok)
{
struct tep_print_arg *item_arg;
enum tep_event_type type;
@ -2845,7 +2845,7 @@ process_paren(struct tep_event_format *event, struct tep_print_arg *arg, char **
static enum tep_event_type
process_str(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
char **tok)
{
enum tep_event_type type;
@ -2874,7 +2874,7 @@ process_str(struct tep_event_format *event __maybe_unused, struct tep_print_arg
}
static enum tep_event_type
process_bitmask(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
char **tok)
{
enum tep_event_type type;
@ -2935,7 +2935,7 @@ static void remove_func_handler(struct tep_handle *pevent, char *func_name)
}
static enum tep_event_type
process_func_handler(struct tep_event_format *event, struct tep_function_handler *func,
process_func_handler(struct tep_event *event, struct tep_function_handler *func,
struct tep_print_arg *arg, char **tok)
{
struct tep_print_arg **next_arg;
@ -2993,7 +2993,7 @@ process_func_handler(struct tep_event_format *event, struct tep_function_handler
}
static enum tep_event_type
process_function(struct tep_event_format *event, struct tep_print_arg *arg,
process_function(struct tep_event *event, struct tep_print_arg *arg,
char *token, char **tok)
{
struct tep_function_handler *func;
@ -3049,7 +3049,7 @@ process_function(struct tep_event_format *event, struct tep_print_arg *arg,
}
static enum tep_event_type
process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
char **tok, enum tep_event_type type)
{
char *token;
@ -3137,7 +3137,7 @@ process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
return type;
}
static int event_read_print_args(struct tep_event_format *event, struct tep_print_arg **list)
static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list)
{
enum tep_event_type type = TEP_EVENT_ERROR;
struct tep_print_arg *arg;
@ -3195,7 +3195,7 @@ static int event_read_print_args(struct tep_event_format *event, struct tep_prin
return args;
}
static int event_read_print(struct tep_event_format *event)
static int event_read_print(struct tep_event *event)
{
enum tep_event_type type;
char *token;
@ -3258,10 +3258,10 @@ static int event_read_print(struct tep_event_format *event)
* @name: the name of the common field to return
*
* Returns a common field from the event by the given @name.
* This only searchs the common fields and not all field.
* This only searches the common fields and not all field.
*/
struct tep_format_field *
tep_find_common_field(struct tep_event_format *event, const char *name)
tep_find_common_field(struct tep_event *event, const char *name)
{
struct tep_format_field *format;
@ -3283,7 +3283,7 @@ tep_find_common_field(struct tep_event_format *event, const char *name)
* This does not search common fields.
*/
struct tep_format_field *
tep_find_field(struct tep_event_format *event, const char *name)
tep_find_field(struct tep_event *event, const char *name)
{
struct tep_format_field *format;
@ -3302,11 +3302,11 @@ tep_find_field(struct tep_event_format *event, const char *name)
* @name: the name of the field
*
* Returns a field by the given @name.
* This searchs the common field names first, then
* This searches the common field names first, then
* the non-common ones if a common one was not found.
*/
struct tep_format_field *
tep_find_any_field(struct tep_event_format *event, const char *name)
tep_find_any_field(struct tep_event *event, const char *name)
{
struct tep_format_field *format;
@ -3328,15 +3328,18 @@ tep_find_any_field(struct tep_event_format *event, const char *name)
unsigned long long tep_read_number(struct tep_handle *pevent,
const void *ptr, int size)
{
unsigned long long val;
switch (size) {
case 1:
return *(unsigned char *)ptr;
case 2:
return tep_data2host2(pevent, ptr);
return tep_data2host2(pevent, *(unsigned short *)ptr);
case 4:
return tep_data2host4(pevent, ptr);
return tep_data2host4(pevent, *(unsigned int *)ptr);
case 8:
return tep_data2host8(pevent, ptr);
memcpy(&val, (ptr), sizeof(unsigned long long));
return tep_data2host8(pevent, val);
default:
/* BUG! */
return 0;
@ -3375,7 +3378,7 @@ int tep_read_number_field(struct tep_format_field *field, const void *data,
static int get_common_info(struct tep_handle *pevent,
const char *type, int *offset, int *size)
{
struct tep_event_format *event;
struct tep_event *event;
struct tep_format_field *field;
/*
@ -3462,11 +3465,11 @@ static int events_id_cmp(const void *a, const void *b);
*
* Returns an event that has a given @id.
*/
struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id)
struct tep_event *tep_find_event(struct tep_handle *pevent, int id)
{
struct tep_event_format **eventptr;
struct tep_event_format key;
struct tep_event_format *pkey = &key;
struct tep_event **eventptr;
struct tep_event key;
struct tep_event *pkey = &key;
/* Check cache first */
if (pevent->last_event && pevent->last_event->id == id)
@ -3494,11 +3497,11 @@ struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id)
* This returns an event with a given @name and under the system
* @sys. If @sys is NULL the first event with @name is returned.
*/
struct tep_event_format *
struct tep_event *
tep_find_event_by_name(struct tep_handle *pevent,
const char *sys, const char *name)
{
struct tep_event_format *event;
struct tep_event *event = NULL;
int i;
if (pevent->last_event &&
@ -3523,7 +3526,7 @@ tep_find_event_by_name(struct tep_handle *pevent,
}
static unsigned long long
eval_num_arg(void *data, int size, struct tep_event_format *event, struct tep_print_arg *arg)
eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg)
{
struct tep_handle *pevent = event->pevent;
unsigned long long val = 0;
@ -3838,7 +3841,7 @@ static void print_bitmask_to_seq(struct tep_handle *pevent,
/*
* data points to a bit mask of size bytes.
* In the kernel, this is an array of long words, thus
* endianess is very important.
* endianness is very important.
*/
if (pevent->file_bigendian)
index = size - (len + 1);
@ -3863,7 +3866,7 @@ static void print_bitmask_to_seq(struct tep_handle *pevent,
}
static void print_str_arg(struct trace_seq *s, void *data, int size,
struct tep_event_format *event, const char *format,
struct tep_event *event, const char *format,
int len_arg, struct tep_print_arg *arg)
{
struct tep_handle *pevent = event->pevent;
@ -4062,7 +4065,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
f = tep_find_any_field(event, arg->string.string);
arg->string.offset = f->offset;
}
str_offset = tep_data2host4(pevent, data + arg->string.offset);
str_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->string.offset));
str_offset &= 0xffff;
print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
break;
@ -4080,7 +4083,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
f = tep_find_any_field(event, arg->bitmask.bitmask);
arg->bitmask.offset = f->offset;
}
bitmask_offset = tep_data2host4(pevent, data + arg->bitmask.offset);
bitmask_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->bitmask.offset));
bitmask_size = bitmask_offset >> 16;
bitmask_offset &= 0xffff;
print_bitmask_to_seq(pevent, s, format, len_arg,
@ -4118,7 +4121,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
static unsigned long long
process_defined_func(struct trace_seq *s, void *data, int size,
struct tep_event_format *event, struct tep_print_arg *arg)
struct tep_event *event, struct tep_print_arg *arg)
{
struct tep_function_handler *func_handle = arg->func.func;
struct func_params *param;
@ -4213,7 +4216,7 @@ static void free_args(struct tep_print_arg *args)
}
}
static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event_format *event)
static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event)
{
struct tep_handle *pevent = event->pevent;
struct tep_format_field *field, *ip_field;
@ -4221,7 +4224,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
unsigned long long ip, val;
char *ptr;
void *bptr;
int vsize;
int vsize = 0;
field = pevent->bprint_buf_field;
ip_field = pevent->bprint_ip_field;
@ -4390,7 +4393,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
static char *
get_bprint_format(void *data, int size __maybe_unused,
struct tep_event_format *event)
struct tep_event *event)
{
struct tep_handle *pevent = event->pevent;
unsigned long long addr;
@ -4425,7 +4428,7 @@ get_bprint_format(void *data, int size __maybe_unused,
}
static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
struct tep_event_format *event, struct tep_print_arg *arg)
struct tep_event *event, struct tep_print_arg *arg)
{
unsigned char *buf;
const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
@ -4578,7 +4581,7 @@ static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
* %pISpc print an IP address based on sockaddr; p adds port.
*/
static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
void *data, int size, struct tep_event_format *event,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
unsigned char *buf;
@ -4615,7 +4618,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
}
static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
void *data, int size, struct tep_event_format *event,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
char have_c = 0;
@ -4665,7 +4668,7 @@ static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
}
static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
void *data, int size, struct tep_event_format *event,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
char have_c = 0, have_p = 0;
@ -4747,7 +4750,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
}
static int print_ip_arg(struct trace_seq *s, const char *ptr,
void *data, int size, struct tep_event_format *event,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
char i = *ptr; /* 'i' or 'I' */
@ -4854,7 +4857,7 @@ void tep_print_field(struct trace_seq *s, void *data,
}
void tep_print_fields(struct trace_seq *s, void *data,
int size __maybe_unused, struct tep_event_format *event)
int size __maybe_unused, struct tep_event *event)
{
struct tep_format_field *field;
@ -4866,7 +4869,7 @@ void tep_print_fields(struct trace_seq *s, void *data,
}
}
static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event_format *event)
static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
{
struct tep_handle *pevent = event->pevent;
struct tep_print_fmt *print_fmt = &event->print_fmt;
@ -4881,7 +4884,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e
char format[32];
int show_func;
int len_as_arg;
int len_arg;
int len_arg = 0;
int len;
int ls;
@ -4970,6 +4973,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e
if (arg->type == TEP_PRINT_BSTRING) {
trace_seq_puts(s, arg->string.string);
arg = arg->next;
break;
}
@ -5146,8 +5150,8 @@ void tep_data_lat_fmt(struct tep_handle *pevent,
static int migrate_disable_exists;
unsigned int lat_flags;
unsigned int pc;
int lock_depth;
int migrate_disable;
int lock_depth = 0;
int migrate_disable = 0;
int hardirq;
int softirq;
void *data = record->data;
@ -5229,7 +5233,7 @@ int tep_data_type(struct tep_handle *pevent, struct tep_record *rec)
*
* This returns the event form a given @type;
*/
struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type)
struct tep_event *tep_data_event_from_type(struct tep_handle *pevent, int type)
{
return tep_find_event(pevent, type);
}
@ -5313,9 +5317,9 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *ne
* This returns the cmdline structure that holds a pid for a given
* comm, or NULL if none found. As there may be more than one pid for
* a given comm, the result of this call can be passed back into
* a recurring call in the @next paramater, and then it will find the
* a recurring call in the @next parameter, and then it will find the
* next pid.
* Also, it does a linear seach, so it may be slow.
* Also, it does a linear search, so it may be slow.
*/
struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
struct cmdline *next)
@ -5387,7 +5391,7 @@ int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline)
* This parses the raw @data using the given @event information and
* writes the print format into the trace_seq.
*/
void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
void tep_event_info(struct trace_seq *s, struct tep_event *event,
struct tep_record *record)
{
int print_pretty = 1;
@ -5409,7 +5413,7 @@ void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
{
if (!use_trace_clock)
if (!trace_clock || !use_trace_clock)
return true;
if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global")
@ -5428,7 +5432,7 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
* Returns the associated event for a given record, or NULL if non is
* is found.
*/
struct tep_event_format *
struct tep_event *
tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
{
int type;
@ -5453,7 +5457,7 @@ tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
* Writes the tasks comm, pid and CPU to @s.
*/
void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
struct tep_event_format *event,
struct tep_event *event,
struct tep_record *record)
{
void *data = record->data;
@ -5481,7 +5485,7 @@ void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
* Writes the timestamp of the record into @s.
*/
void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
struct tep_event_format *event,
struct tep_event *event,
struct tep_record *record,
bool use_trace_clock)
{
@ -5531,7 +5535,7 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
* Writes the parsing of the record's data to @s.
*/
void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
struct tep_event_format *event,
struct tep_event *event,
struct tep_record *record)
{
static const char *spaces = " "; /* 20 spaces */
@ -5550,7 +5554,7 @@ void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
struct tep_record *record, bool use_trace_clock)
{
struct tep_event_format *event;
struct tep_event *event;
event = tep_find_event_by_record(pevent, record);
if (!event) {
@ -5572,8 +5576,8 @@ void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
static int events_id_cmp(const void *a, const void *b)
{
struct tep_event_format * const * ea = a;
struct tep_event_format * const * eb = b;
struct tep_event * const * ea = a;
struct tep_event * const * eb = b;
if ((*ea)->id < (*eb)->id)
return -1;
@ -5586,8 +5590,8 @@ static int events_id_cmp(const void *a, const void *b)
static int events_name_cmp(const void *a, const void *b)
{
struct tep_event_format * const * ea = a;
struct tep_event_format * const * eb = b;
struct tep_event * const * ea = a;
struct tep_event * const * eb = b;
int res;
res = strcmp((*ea)->name, (*eb)->name);
@ -5603,8 +5607,8 @@ static int events_name_cmp(const void *a, const void *b)
static int events_system_cmp(const void *a, const void *b)
{
struct tep_event_format * const * ea = a;
struct tep_event_format * const * eb = b;
struct tep_event * const * ea = a;
struct tep_event * const * eb = b;
int res;
res = strcmp((*ea)->system, (*eb)->system);
@ -5618,9 +5622,9 @@ static int events_system_cmp(const void *a, const void *b)
return events_id_cmp(a, b);
}
struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type)
struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type)
{
struct tep_event_format **events;
struct tep_event **events;
int (*sort)(const void *a, const void *b);
events = pevent->sort_events;
@ -5703,7 +5707,7 @@ get_event_fields(const char *type, const char *name,
* Returns an allocated array of fields. The last item in the array is NULL.
* The array must be freed with free().
*/
struct tep_format_field **tep_event_common_fields(struct tep_event_format *event)
struct tep_format_field **tep_event_common_fields(struct tep_event *event)
{
return get_event_fields("common", event->name,
event->format.nr_common,
@ -5717,7 +5721,7 @@ struct tep_format_field **tep_event_common_fields(struct tep_event_format *event
* Returns an allocated array of fields. The last item in the array is NULL.
* The array must be freed with free().
*/
struct tep_format_field **tep_event_fields(struct tep_event_format *event)
struct tep_format_field **tep_event_fields(struct tep_event *event)
{
return get_event_fields("event", event->name,
event->format.nr_fields,
@ -5959,7 +5963,7 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si
return 0;
}
static int event_matches(struct tep_event_format *event,
static int event_matches(struct tep_event *event,
int id, const char *sys_name,
const char *event_name)
{
@ -5982,7 +5986,7 @@ static void free_handler(struct event_handler *handle)
free(handle);
}
static int find_event_handle(struct tep_handle *pevent, struct tep_event_format *event)
static int find_event_handle(struct tep_handle *pevent, struct tep_event *event)
{
struct event_handler *handle, **next;
@ -6023,11 +6027,11 @@ static int find_event_handle(struct tep_handle *pevent, struct tep_event_format
*
* /sys/kernel/debug/tracing/events/.../.../format
*/
enum tep_errno __tep_parse_format(struct tep_event_format **eventp,
enum tep_errno __tep_parse_format(struct tep_event **eventp,
struct tep_handle *pevent, const char *buf,
unsigned long size, const char *sys)
{
struct tep_event_format *event;
struct tep_event *event;
int ret;
init_input_buf(buf, size);
@ -6132,12 +6136,12 @@ enum tep_errno __tep_parse_format(struct tep_event_format **eventp,
static enum tep_errno
__parse_event(struct tep_handle *pevent,
struct tep_event_format **eventp,
struct tep_event **eventp,
const char *buf, unsigned long size,
const char *sys)
{
int ret = __tep_parse_format(eventp, pevent, buf, size, sys);
struct tep_event_format *event = *eventp;
struct tep_event *event = *eventp;
if (event == NULL)
return ret;
@ -6154,7 +6158,7 @@ __parse_event(struct tep_handle *pevent,
return 0;
event_add_failed:
tep_free_format(event);
tep_free_event(event);
return ret;
}
@ -6174,7 +6178,7 @@ __parse_event(struct tep_handle *pevent,
* /sys/kernel/debug/tracing/events/.../.../format
*/
enum tep_errno tep_parse_format(struct tep_handle *pevent,
struct tep_event_format **eventp,
struct tep_event **eventp,
const char *buf,
unsigned long size, const char *sys)
{
@ -6198,7 +6202,7 @@ enum tep_errno tep_parse_format(struct tep_handle *pevent,
enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
unsigned long size, const char *sys)
{
struct tep_event_format *event = NULL;
struct tep_event *event = NULL;
return __parse_event(pevent, &event, buf, size, sys);
}
@ -6235,7 +6239,7 @@ int get_field_val(struct trace_seq *s, struct tep_format_field *field,
*
* On failure, it returns NULL.
*/
void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
int *len, int err)
{
@ -6282,7 +6286,7 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
*
* Returns 0 on success -1 on field not found.
*/
int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
unsigned long long *val, int err)
{
@ -6307,7 +6311,7 @@ int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
*
* Returns 0 on success -1 on field not found.
*/
int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event,
int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
unsigned long long *val, int err)
{
@ -6332,7 +6336,7 @@ int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event
*
* Returns 0 on success -1 on field not found.
*/
int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
unsigned long long *val, int err)
{
@ -6358,7 +6362,7 @@ int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
* Returns: 0 on success, -1 field not found, or 1 if buffer is full.
*/
int tep_print_num_field(struct trace_seq *s, const char *fmt,
struct tep_event_format *event, const char *name,
struct tep_event *event, const char *name,
struct tep_record *record, int err)
{
struct tep_format_field *field = tep_find_field(event, name);
@ -6390,7 +6394,7 @@ int tep_print_num_field(struct trace_seq *s, const char *fmt,
* Returns: 0 on success, -1 field not found, or 1 if buffer is full.
*/
int tep_print_func_field(struct trace_seq *s, const char *fmt,
struct tep_event_format *event, const char *name,
struct tep_event *event, const char *name,
struct tep_record *record, int err)
{
struct tep_format_field *field = tep_find_field(event, name);
@ -6550,11 +6554,11 @@ int tep_unregister_print_function(struct tep_handle *pevent,
return -1;
}
static struct tep_event_format *search_event(struct tep_handle *pevent, int id,
const char *sys_name,
const char *event_name)
static struct tep_event *search_event(struct tep_handle *pevent, int id,
const char *sys_name,
const char *event_name)
{
struct tep_event_format *event;
struct tep_event *event;
if (id >= 0) {
/* search by id */
@ -6594,7 +6598,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id,
const char *sys_name, const char *event_name,
tep_event_handler_func func, void *context)
{
struct tep_event_format *event;
struct tep_event *event;
struct event_handler *handle;
event = search_event(pevent, id, sys_name, event_name);
@ -6678,7 +6682,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id,
const char *sys_name, const char *event_name,
tep_event_handler_func func, void *context)
{
struct tep_event_format *event;
struct tep_event *event;
struct event_handler *handle;
struct event_handler **next;
@ -6730,6 +6734,13 @@ void tep_ref(struct tep_handle *pevent)
pevent->ref_count++;
}
int tep_get_ref(struct tep_handle *tep)
{
if (tep)
return tep->ref_count;
return 0;
}
void tep_free_format_field(struct tep_format_field *field)
{
free(field->type);
@ -6756,7 +6767,7 @@ static void free_formats(struct tep_format *format)
free_format_fields(format->fields);
}
void tep_free_format(struct tep_event_format *event)
void tep_free_event(struct tep_event *event)
{
free(event->name);
free(event->system);
@ -6842,7 +6853,7 @@ void tep_free(struct tep_handle *pevent)
}
for (i = 0; i < pevent->nr_events; i++)
tep_free_format(pevent->events[i]);
tep_free_event(pevent->events[i]);
while (pevent->handlers) {
handle = pevent->handlers;

View File

@ -57,11 +57,11 @@ struct tep_record {
/* ----------------------- tep ----------------------- */
struct tep_handle;
struct tep_event_format;
struct tep_event;
typedef int (*tep_event_handler_func)(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event,
struct tep_event *event,
void *context);
typedef int (*tep_plugin_load_func)(struct tep_handle *pevent);
@ -143,7 +143,7 @@ enum tep_format_flags {
struct tep_format_field {
struct tep_format_field *next;
struct tep_event_format *event;
struct tep_event *event;
char *type;
char *name;
char *alias;
@ -277,7 +277,7 @@ struct tep_print_fmt {
struct tep_print_arg *args;
};
struct tep_event_format {
struct tep_event {
struct tep_handle *pevent;
char *name;
int id;
@ -409,20 +409,6 @@ void tep_print_plugins(struct trace_seq *s,
typedef char *(tep_func_resolver_t)(void *priv,
unsigned long long *addrp, char **modp);
void tep_set_flag(struct tep_handle *tep, int flag);
unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data);
unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data);
unsigned long long
__tep_data2host8(struct tep_handle *pevent, unsigned long long data);
#define tep_data2host2(pevent, ptr) __tep_data2host2(pevent, *(unsigned short *)(ptr))
#define tep_data2host4(pevent, ptr) __tep_data2host4(pevent, *(unsigned int *)(ptr))
#define tep_data2host8(pevent, ptr) \
({ \
unsigned long long __val; \
\
memcpy(&__val, (ptr), sizeof(unsigned long long)); \
__tep_data2host8(pevent, __val); \
})
static inline int tep_host_bigendian(void)
{
@ -454,14 +440,14 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
int tep_pid_is_registered(struct tep_handle *pevent, int pid);
void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
struct tep_event_format *event,
struct tep_event *event,
struct tep_record *record);
void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
struct tep_event_format *event,
struct tep_event *event,
struct tep_record *record,
bool use_trace_clock);
void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
struct tep_event_format *event,
struct tep_event *event,
struct tep_record *record);
void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
struct tep_record *record, bool use_trace_clock);
@ -472,32 +458,30 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si
enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
unsigned long size, const char *sys);
enum tep_errno tep_parse_format(struct tep_handle *pevent,
struct tep_event_format **eventp,
struct tep_event **eventp,
const char *buf,
unsigned long size, const char *sys);
void tep_free_format(struct tep_event_format *event);
void tep_free_format_field(struct tep_format_field *field);
void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
int *len, int err);
int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
unsigned long long *val, int err);
int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event,
int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
unsigned long long *val, int err);
int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
const char *name, struct tep_record *record,
unsigned long long *val, int err);
int tep_print_num_field(struct trace_seq *s, const char *fmt,
struct tep_event_format *event, const char *name,
struct tep_event *event, const char *name,
struct tep_record *record, int err);
int tep_print_func_field(struct trace_seq *s, const char *fmt,
struct tep_event_format *event, const char *name,
struct tep_event *event, const char *name,
struct tep_record *record, int err);
int tep_register_event_handler(struct tep_handle *pevent, int id,
@ -513,9 +497,9 @@ int tep_register_print_function(struct tep_handle *pevent,
int tep_unregister_print_function(struct tep_handle *pevent,
tep_func_handler func, char *name);
struct tep_format_field *tep_find_common_field(struct tep_event_format *event, const char *name);
struct tep_format_field *tep_find_field(struct tep_event_format *event, const char *name);
struct tep_format_field *tep_find_any_field(struct tep_event_format *event, const char *name);
struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name);
struct tep_format_field *tep_find_field(struct tep_event *event, const char *name);
struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name);
const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr);
unsigned long long
@ -524,19 +508,19 @@ unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, i
int tep_read_number_field(struct tep_format_field *field, const void *data,
unsigned long long *value);
struct tep_event_format *tep_get_first_event(struct tep_handle *tep);
struct tep_event *tep_get_first_event(struct tep_handle *tep);
int tep_get_events_count(struct tep_handle *tep);
struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id);
struct tep_event *tep_find_event(struct tep_handle *pevent, int id);
struct tep_event_format *
struct tep_event *
tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name);
struct tep_event_format *
struct tep_event *
tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
void tep_data_lat_fmt(struct tep_handle *pevent,
struct trace_seq *s, struct tep_record *record);
int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type);
struct tep_event *tep_data_event_from_type(struct tep_handle *pevent, int type);
int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec);
@ -549,15 +533,15 @@ int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline);
void tep_print_field(struct trace_seq *s, void *data,
struct tep_format_field *field);
void tep_print_fields(struct trace_seq *s, void *data,
int size __maybe_unused, struct tep_event_format *event);
void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
struct tep_record *record);
int size __maybe_unused, struct tep_event *event);
void tep_event_info(struct trace_seq *s, struct tep_event *event,
struct tep_record *record);
int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum,
char *buf, size_t buflen);
char *buf, size_t buflen);
struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type);
struct tep_format_field **tep_event_common_fields(struct tep_event_format *event);
struct tep_format_field **tep_event_fields(struct tep_event_format *event);
struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type);
struct tep_format_field **tep_event_common_fields(struct tep_event *event);
struct tep_format_field **tep_event_fields(struct tep_event *event);
enum tep_endian {
TEP_LITTLE_ENDIAN = 0,
@ -581,6 +565,7 @@ struct tep_handle *tep_alloc(void);
void tep_free(struct tep_handle *pevent);
void tep_ref(struct tep_handle *pevent);
void tep_unref(struct tep_handle *pevent);
int tep_get_ref(struct tep_handle *tep);
/* access to the internal parser */
void tep_buffer_init(const char *buf, unsigned long long size);
@ -712,7 +697,7 @@ struct tep_filter_arg {
struct tep_filter_type {
int event_id;
struct tep_event_format *event;
struct tep_event *event;
struct tep_filter_arg *filter;
};

View File

@ -0,0 +1,10 @@
prefix=INSTALL_PREFIX
libdir=${prefix}/lib64
includedir=${prefix}/include/traceevent
Name: libtraceevent
URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
Description: Linux kernel trace event library
Version: LIB_VERSION
Cflags: -I${includedir}
Libs: -L${libdir} -ltraceevent

View File

@ -27,7 +27,7 @@ static struct tep_format_field cpu = {
struct event_list {
struct event_list *next;
struct tep_event_format *event;
struct tep_event *event;
};
static void show_error(char *error_buf, const char *fmt, ...)
@ -229,7 +229,7 @@ static void free_arg(struct tep_filter_arg *arg)
}
static int add_event(struct event_list **events,
struct tep_event_format *event)
struct tep_event *event)
{
struct event_list *list;
@ -243,7 +243,7 @@ static int add_event(struct event_list **events,
return 0;
}
static int event_match(struct tep_event_format *event,
static int event_match(struct tep_event *event,
regex_t *sreg, regex_t *ereg)
{
if (sreg) {
@ -259,7 +259,7 @@ static enum tep_errno
find_event(struct tep_handle *pevent, struct event_list **events,
char *sys_name, char *event_name)
{
struct tep_event_format *event;
struct tep_event *event;
regex_t ereg;
regex_t sreg;
int match = 0;
@ -334,7 +334,7 @@ static void free_events(struct event_list *events)
}
static enum tep_errno
create_arg_item(struct tep_event_format *event, const char *token,
create_arg_item(struct tep_event *event, const char *token,
enum tep_event_type type, struct tep_filter_arg **parg, char *error_str)
{
struct tep_format_field *field;
@ -940,7 +940,7 @@ static int collapse_tree(struct tep_filter_arg *arg,
}
static enum tep_errno
process_filter(struct tep_event_format *event, struct tep_filter_arg **parg,
process_filter(struct tep_event *event, struct tep_filter_arg **parg,
char *error_str, int not)
{
enum tep_event_type type;
@ -1180,7 +1180,7 @@ process_filter(struct tep_event_format *event, struct tep_filter_arg **parg,
}
static enum tep_errno
process_event(struct tep_event_format *event, const char *filter_str,
process_event(struct tep_event *event, const char *filter_str,
struct tep_filter_arg **parg, char *error_str)
{
int ret;
@ -1205,7 +1205,7 @@ process_event(struct tep_event_format *event, const char *filter_str,
}
static enum tep_errno
filter_event(struct tep_event_filter *filter, struct tep_event_format *event,
filter_event(struct tep_event_filter *filter, struct tep_event *event,
const char *filter_str, char *error_str)
{
struct tep_filter_type *filter_type;
@ -1457,7 +1457,7 @@ static int copy_filter_type(struct tep_event_filter *filter,
struct tep_filter_type *filter_type)
{
struct tep_filter_arg *arg;
struct tep_event_format *event;
struct tep_event *event;
const char *sys;
const char *name;
char *str;
@ -1539,7 +1539,7 @@ int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *s
{
struct tep_handle *src_pevent;
struct tep_handle *dest_pevent;
struct tep_event_format *event;
struct tep_event *event;
struct tep_filter_type *filter_type;
struct tep_filter_arg *arg;
char *str;
@ -1683,11 +1683,11 @@ int tep_filter_event_has_trivial(struct tep_event_filter *filter,
}
}
static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg,
static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err);
static const char *
get_comm(struct tep_event_format *event, struct tep_record *record)
get_comm(struct tep_event *event, struct tep_record *record)
{
const char *comm;
int pid;
@ -1698,7 +1698,7 @@ get_comm(struct tep_event_format *event, struct tep_record *record)
}
static unsigned long long
get_value(struct tep_event_format *event,
get_value(struct tep_event *event,
struct tep_format_field *field, struct tep_record *record)
{
unsigned long long val;
@ -1734,11 +1734,11 @@ get_value(struct tep_event_format *event,
}
static unsigned long long
get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err);
static unsigned long long
get_exp_value(struct tep_event_format *event, struct tep_filter_arg *arg,
get_exp_value(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err)
{
unsigned long long lval, rval;
@ -1793,7 +1793,7 @@ get_exp_value(struct tep_event_format *event, struct tep_filter_arg *arg,
}
static unsigned long long
get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err)
{
switch (arg->type) {
@ -1817,7 +1817,7 @@ get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
return 0;
}
static int test_num(struct tep_event_format *event, struct tep_filter_arg *arg,
static int test_num(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err)
{
unsigned long long lval, rval;
@ -1860,7 +1860,7 @@ static int test_num(struct tep_event_format *event, struct tep_filter_arg *arg,
static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
{
struct tep_event_format *event;
struct tep_event *event;
struct tep_handle *pevent;
unsigned long long addr;
const char *val = NULL;
@ -1908,7 +1908,7 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *
return val;
}
static int test_str(struct tep_event_format *event, struct tep_filter_arg *arg,
static int test_str(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err)
{
const char *val;
@ -1939,7 +1939,7 @@ static int test_str(struct tep_event_format *event, struct tep_filter_arg *arg,
}
}
static int test_op(struct tep_event_format *event, struct tep_filter_arg *arg,
static int test_op(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err)
{
switch (arg->op.type) {
@ -1961,7 +1961,7 @@ static int test_op(struct tep_event_format *event, struct tep_filter_arg *arg,
}
}
static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg,
static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
struct tep_record *record, enum tep_errno *err)
{
if (*err) {

View File

@ -124,7 +124,7 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
}
static int function_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
struct tep_handle *pevent = event->pevent;
unsigned long long function;

View File

@ -27,7 +27,7 @@
static int timer_expire_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
trace_seq_printf(s, "hrtimer=");
@ -47,7 +47,7 @@ static int timer_expire_handler(struct trace_seq *s,
static int timer_start_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
trace_seq_printf(s, "hrtimer=");

View File

@ -25,7 +25,7 @@
#include "trace-seq.h"
static int call_site_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
struct tep_format_field *field;
unsigned long long val, addr;

View File

@ -249,7 +249,7 @@ static const char *find_exit_reason(unsigned isa, int val)
}
static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, const char *field)
struct tep_event *event, const char *field)
{
unsigned long long isa;
unsigned long long val;
@ -270,7 +270,7 @@ static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
}
static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
unsigned long long info1 = 0, info2 = 0;
@ -293,7 +293,7 @@ static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
static int kvm_emulate_insn_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
unsigned long long rip, csbase, len, flags, failed;
int llen;
@ -332,7 +332,7 @@ static int kvm_emulate_insn_handler(struct trace_seq *s,
static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
if (print_exit_reason(s, record, event, "exit_code") < 0)
return -1;
@ -346,7 +346,7 @@ static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_reco
}
static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
@ -372,7 +372,7 @@ union kvm_mmu_page_role {
};
static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
unsigned long long val;
static const char *access_str[] = {
@ -387,7 +387,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
/*
* We can only use the structure if file is of the same
* endianess.
* endianness.
*/
if (tep_is_file_bigendian(event->pevent) ==
tep_is_host_bigendian(event->pevent)) {
@ -419,7 +419,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
static int kvm_mmu_get_page_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
unsigned long long val;

View File

@ -26,7 +26,7 @@
#define INDENT 65
static void print_string(struct trace_seq *s, struct tep_event_format *event,
static void print_string(struct trace_seq *s, struct tep_event *event,
const char *name, const void *data)
{
struct tep_format_field *f = tep_find_field(event, name);
@ -60,7 +60,7 @@ static void print_string(struct trace_seq *s, struct tep_event_format *event,
static int drv_bss_info_changed(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
void *data = record->data;

View File

@ -67,7 +67,7 @@ static void write_and_save_comm(struct tep_format_field *field,
static int sched_wakeup_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
struct tep_format_field *field;
unsigned long long val;
@ -96,7 +96,7 @@ static int sched_wakeup_handler(struct trace_seq *s,
static int sched_switch_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event_format *event, void *context)
struct tep_event *event, void *context)
{
struct tep_format_field *field;
unsigned long long val;

View File

@ -58,6 +58,9 @@ SUBSYSTEM
'futex'::
Futex stressing benchmarks.
'epoll'::
Eventpoll (epoll) stressing benchmarks.
'all'::
All benchmark subsystems.
@ -203,6 +206,13 @@ Suite for evaluating requeue calls.
*lock-pi*::
Suite for evaluating futex lock_pi calls.
SUITES FOR 'epoll'
~~~~~~~~~~~~~~~~~~
*wait*::
Suite for evaluating concurrent epoll_wait calls.
*ctl*::
Suite for evaluating multiple epoll_ctl calls.
SEE ALSO
--------

View File

@ -199,6 +199,12 @@ colors.*::
Colors for headers in the output of a sub-commands (top, report).
Default values are 'white', 'blue'.
core.*::
core.proc-map-timeout::
Sets a timeout (in milliseconds) for parsing /proc/<pid>/maps files.
Can be overridden by the --proc-map-timeout option on supported
subcommands. The default timeout is 500ms.
tui.*, gtk.*::
Subcommands that can be configured here are 'top', 'report' and 'annotate'.
These values are booleans, for example:
@ -515,6 +521,38 @@ diff.*::
Possible values are 'delta', 'delta-abs', 'ratio' and
'wdiff'. Default is 'delta'.
trace.*::
trace.add_events::
Allows adding a set of events to add to the ones specified
by the user, or use as a default one if none was specified.
The initial use case is to add augmented_raw_syscalls.o to
activate the 'perf trace' logic that looks for syscall
pointer contents after the normal tracepoint payload.
trace.args_alignment::
Number of columns to align the argument list, default is 70,
use 40 for the strace default, zero to no alignment.
trace.no_inherit::
Do not follow children threads.
trace.show_arg_names::
Should syscall argument names be printed? If not then trace.show_zeros
will be set.
trace.show_duration::
Show syscall duration.
trace.show_prefix::
If set to 'yes' will show common string prefixes in tables. The default
is to remove the common prefix in things like "MAP_SHARED", showing just "SHARED".
trace.show_timestamp::
Show syscall start timestamp.
trace.show_zeros::
Do not suppress syscall arguments that are equal to zero.
SEE ALSO
--------
linkperf:perf[1]

View File

@ -172,7 +172,7 @@ like cycles and instructions and some software events.
Other PMUs and global measurements are normally root only.
Some event qualifiers, such as "any", are also root only.
This can be overriden by setting the kernel.perf_event_paranoid
This can be overridden by setting the kernel.perf_event_paranoid
sysctl to -1, which allows non root to use these events.
For accessing trace point events perf needs to have read access to

View File

@ -435,6 +435,11 @@ Specify vmlinux path which has debuginfo.
--buildid-all::
Record build-id of all DSOs regardless whether it's actually hit or not.
--aio[=n]::
Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4).
Asynchronous mode is supported only when linking Perf tool with libc library
providing implementation for Posix AIO API.
--all-kernel::
Configure all used events to run in kernel space.

View File

@ -126,6 +126,14 @@ OPTIONS
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
When the sort key symbol is specified, columns "IPC" and "IPC Coverage"
are enabled automatically. Column "IPC" reports the average IPC per function
and column "IPC coverage" reports the percentage of instructions with
sampled IPC in this function. IPC means Instruction Per Cycle. If it's low,
it indicates there may be a performance bottleneck when the function is
executed, such as a memory access bottleneck. If a function has high overhead
and low IPC, it's worth further analyzing it to optimize its performance.
If the --mem-mode option is used, the following sort keys are also available
(incompatible with --branch-stack):
symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
@ -244,7 +252,7 @@ OPTIONS
Usually more convenient to use --branch-history for this.
value can be:
- percent: diplay overhead percent (default)
- percent: display overhead percent (default)
- period: display event period
- count: display event count

View File

@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc.
brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace

View File

@ -50,7 +50,7 @@ report::
/sys/bus/event_source/devices/<pmu>/format/*
Note that the last two syntaxes support prefix and glob matching in
the PMU name to simplify creation of events accross multiple instances
the PMU name to simplify creation of events across multiple instances
of the same type of PMU in large systems (e.g. memory controller PMUs).
Multiple PMU instances are typical for uncore PMUs, so the prefix
'uncore_' is also ignored when performing this match.
@ -277,7 +277,7 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
for best results. Otherwise the bottlenecks may be inconsistent
on workload with changing phases.
This enables --metric-only, unless overriden with --no-metric-only.
This enables --metric-only, unless overridden with --no-metric-only.
To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using

View File

@ -70,6 +70,9 @@ Default is to monitor all CPUS.
--ignore-vmlinux::
Ignore vmlinux files.
--kallsyms=<file>::
kallsyms pathname
-m <pages>::
--mmap-pages=<pages>::
Number of mmap data pages (must be a power of two) or size

View File

@ -205,6 +205,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
because the file may be huge. A time out is needed in such cases.
This option sets the time out limit. The default value is 500 ms.
--sort-events::
Do sorting on batches of events, use when noticing out of order events that
may happen, for instance, when a thread gets migrated to a different CPU
while processing a syscall.
PAGEFAULTS
----------

View File

@ -294,16 +294,21 @@ ifndef NO_BIONIC
$(call feature_check,bionic)
ifeq ($(feature-bionic), 1)
BIONIC := 1
CFLAGS += -DLACKS_SIGQUEUE_PROTOTYPE
CFLAGS += -DLACKS_OPEN_MEMSTREAM_PROTOTYPE
EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
endif
endif
ifeq ($(feature-eventfd), 1)
CFLAGS += -DHAVE_EVENTFD
endif
ifeq ($(feature-get_current_dir_name), 1)
CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
endif
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
@ -362,6 +367,12 @@ ifeq ($(feature-glibc), 1)
CFLAGS += -DHAVE_GLIBC_SUPPORT
endif
ifeq ($(feature-libaio), 1)
ifndef NO_AIO
CFLAGS += -DHAVE_AIO_SUPPORT
endif
endif
ifdef NO_DWARF
NO_LIBDW_DWARF_UNWIND := 1
endif
@ -585,7 +596,7 @@ endif
ifndef NO_LIBCRYPTO
ifneq ($(feature-libcrypto), 1)
msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
msg := $(warning No libcrypto.h found, disables jitted code injection, please install openssl-devel or libssl-dev);
NO_LIBCRYPTO := 1
else
CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
@ -852,6 +863,13 @@ ifndef NO_JVMTI
$(call feature_check,jvmti)
ifeq ($(feature-jvmti), 1)
$(call detected_var,JDIR)
ifndef NO_JVMTI_CMLR
FEATURE_CHECK_CFLAGS-jvmti-cmlr := $(FEATURE_CHECK_CFLAGS-jvmti)
$(call feature_check,jvmti-cmlr)
ifeq ($(feature-jvmti-cmlr), 1)
CFLAGS += -DHAVE_JVMTI_CMLR
endif
endif # NO_JVMTI_CMLR
else
$(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel)
NO_JVMTI := 1

View File

@ -95,11 +95,19 @@ include ../scripts/utilities.mak
#
# Define NO_JVMTI if you do not want jvmti agent built
#
# Define NO_JVMTI_CMLR (debug only) if you do not want to process CMLR
# data for java source lines.
#
# Define LIBCLANGLLVM if you DO want builtin clang and llvm support.
# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
# llvm-config is not in $PATH.
#
# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding.
#
# Define NO_AIO if you do not want support of Posix AIO based trace
# streaming for record mode. Currently Posix AIO trace streaming is
# supported only when linking with glibc.
#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@ -388,6 +396,7 @@ SHELL = $(SHELL_PATH)
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/
beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@ -401,6 +410,12 @@ _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_ou
$(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
$(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
fadvise_advice_array := $(beauty_outdir)/fadvise_advice_array.c
fadvise_advice_tbl := $(srctree)/tools/perf/trace/beauty/fadvise.sh
$(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl)
$(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@
pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@ -466,7 +481,7 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_t
mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl)
$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
$(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
mount_flags_array := $(beauty_outdir)/mount_flags_array.c
@ -482,6 +497,18 @@ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
$(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
x86_arch_prctl_code_array := $(beauty_outdir)/x86_arch_prctl_code_array.c
x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh
$(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl)
$(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@
rename_flags_array := $(beauty_outdir)/rename_flags_array.c
rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh
$(rename_flags_array): $(linux_uapi_dir)/fs.h $(rename_flags_tbl)
$(Q)$(SHELL) '$(rename_flags_tbl)' $(linux_uapi_dir) > $@
arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c
arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
@ -584,6 +611,7 @@ __build-dir = $(subst $(OUTPUT),,$(dir $@))
build-dir = $(if $(__build-dir),$(__build-dir),.)
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \
$(fadvise_advice_array) \
$(pkey_alloc_access_rights_array) \
$(sndrv_pcm_ioctl_array) \
$(sndrv_ctl_ioctl_array) \
@ -596,6 +624,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(mount_flags_array) \
$(perf_ioctl_array) \
$(prctl_option_array) \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array)
$(OUTPUT)%.o: %.c prepare FORCE
@ -639,7 +669,7 @@ $(LIBJVMTI_IN): FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $<
$(QUIET_LINK)$(CC) $(LDFLAGS) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $<
endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
@ -879,6 +909,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
$(OUTPUT)pmu-events/pmu-events.c \
$(OUTPUT)$(fadvise_advice_array) \
$(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(mmap_flags_array) \
$(OUTPUT)$(mount_flags_array) \
@ -892,6 +923,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
$(OUTPUT)$(x86_arch_prctl_code_array) \
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array)
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
static int arc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
arch->initialized = true;
arch->objdump.comment_char = ';';
return 0;
}

View File

@ -5,6 +5,13 @@
#include "../util/util.h"
#include "../util/debug.h"
const char *const arc_triplets[] = {
"arc-linux-",
"arc-snps-linux-uclibc-",
"arc-snps-linux-gnu-",
NULL
};
const char *const arm_triplets[] = {
"arm-eabi-",
"arm-linux-androideabi-",
@ -147,7 +154,9 @@ static int perf_env__lookup_binutils_path(struct perf_env *env,
zfree(&buf);
}
if (!strcmp(arch, "arm"))
if (!strcmp(arch, "arc"))
path_list = arc_triplets;
else if (!strcmp(arch, "arm"))
path_list = arm_triplets;
else if (!strcmp(arch, "arm64"))
path_list = arm64_triplets;
@ -200,3 +209,13 @@ int perf_env__lookup_objdump(struct perf_env *env, const char **path)
return perf_env__lookup_binutils_path(env, "objdump", path);
}
/*
* Some architectures have a single address space for kernel and user addresses,
* which makes it possible to determine if an address is in kernel space or user
* space.
*/
bool perf_env__single_address_space(struct perf_env *env)
{
return strcmp(perf_env__arch(env), "sparc");
}

View File

@ -5,5 +5,6 @@
#include "../util/env.h"
int perf_env__lookup_objdump(struct perf_env *env, const char **path);
bool perf_env__single_address_space(struct perf_env *env);
#endif /* ARCH_PERF_COMMON_H */

View File

@ -170,7 +170,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
*
* If the test passes %0 is returned, otherwise %-1 is returned. Use the
* verbose (-v) option to see all the instructions and whether or not they
* decoded successfuly.
* decoded successfully.
*/
int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused)
{

View File

@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include "../../util/header.h"
@ -70,9 +71,72 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *buf = malloc(128);
if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
if (buf && __get_cpuid(buf, 128, "%s-%u-%X-%X$") < 0) {
free(buf);
return NULL;
}
return buf;
}
/* Full CPUID format for x86 is vendor-family-model-stepping */
static bool is_full_cpuid(const char *id)
{
const char *tmp = id;
int count = 0;
while ((tmp = strchr(tmp, '-')) != NULL) {
count++;
tmp++;
}
if (count == 3)
return true;
return false;
}
int strcmp_cpuid_str(const char *mapcpuid, const char *id)
{
regex_t re;
regmatch_t pmatch[1];
int match;
bool full_mapcpuid = is_full_cpuid(mapcpuid);
bool full_cpuid = is_full_cpuid(id);
/*
* Full CPUID format is required to identify a platform.
* Error out if the cpuid string is incomplete.
*/
if (full_mapcpuid && !full_cpuid) {
pr_info("Invalid CPUID %s. Full CPUID is required, "
"vendor-family-model-stepping\n", id);
return 1;
}
if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
/* Warn unable to generate match particular string. */
pr_info("Invalid regular expression %s\n", mapcpuid);
return 1;
}
match = !regexec(&re, id, 1, pmatch, 0);
regfree(&re);
if (match) {
size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
size_t cpuid_len;
/* If the full CPUID format isn't required,
* ignoring the stepping.
*/
if (!full_mapcpuid && full_cpuid)
cpuid_len = strrchr(id, '-') - id;
else
cpuid_len = strlen(id);
/* Verify the entire string matched. */
if (match_len == cpuid_len)
return 0;
}
return 1;
}

View File

@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
struct perf_evsel *evsel)
{
int err;
char c;
if (!evsel)
return 0;
/*
* If supported, force pass-through config term (pt=1) even if user
* sets pt=0, which avoids senseless kernel errors.
*/
if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
!(evsel->attr.config & 1)) {
pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
evsel->attr.config |= 1;
}
err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
"cyc_thresh", "caps/psb_cyc",
evsel->attr.config);

View File

@ -156,7 +156,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
if (strstr(cpuid, "Intel")) {
kvm->exit_reasons = vmx_exit_reasons;
kvm->exit_reasons_isa = "VMX";
} else if (strstr(cpuid, "AMD")) {
} else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) {
kvm->exit_reasons = svm_exit_reasons;
kvm->exit_reasons_isa = "SVM";
} else

View File

@ -7,6 +7,9 @@ perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o

View File

@ -38,6 +38,9 @@ int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
#define BENCH_FORMAT_SIMPLE_STR "simple"
@ -48,4 +51,15 @@ int bench_futex_lock_pi(int argc, const char **argv);
extern int bench_format;
extern unsigned int bench_repeat;
#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
#include <pthread.h>
#include <linux/compiler.h>
static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused,
size_t cpusetsize __maybe_unused,
cpu_set_t *cpuset __maybe_unused)
{
return 0;
}
#endif
#endif

View File

@ -0,0 +1,413 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2018 Davidlohr Bueso.
*
* Benchmark the various operations allowed for epoll_ctl(2).
* The idea is to concurrently stress a single epoll instance
*/
#ifdef HAVE_EVENTFD
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
#include <errno.h>
#include <inttypes.h>
#include <signal.h>
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include "bench.h"
#include "cpumap.h"
#include <err.h>
#define printinfo(fmt, arg...) \
do { if (__verbose) printf(fmt, ## arg); } while (0)
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
struct timeval start, end, runtime;
static bool done, __verbose, randomize;
/*
* epoll related shared variables.
*/
/* Maximum number of nesting allowed inside epoll sets */
#define EPOLL_MAXNESTS 4
enum {
OP_EPOLL_ADD,
OP_EPOLL_MOD,
OP_EPOLL_DEL,
EPOLL_NR_OPS,
};
static int epollfd;
static int *epollfdp;
static bool noaffinity;
static unsigned int nested = 0;
/* amount of fds to monitor, per thread */
static unsigned int nfds = 64;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats all_stats[EPOLL_NR_OPS];
static pthread_cond_t thread_parent, thread_worker;
struct worker {
int tid;
pthread_t thread;
unsigned long ops[EPOLL_NR_OPS];
int *fdmap;
};
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"),
OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"),
OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"),
OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"),
OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"),
OPT_END()
};
static const char * const bench_epoll_ctl_usage[] = {
"perf bench epoll ctl <options>",
NULL
};
static void toggle_done(int sig __maybe_unused,
siginfo_t *info __maybe_unused,
void *uc __maybe_unused)
{
/* inform all threads that we're done for the day */
done = true;
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
}
static void nest_epollfd(void)
{
unsigned int i;
struct epoll_event ev;
if (nested > EPOLL_MAXNESTS)
nested = EPOLL_MAXNESTS;
printinfo("Nesting level(s): %d\n", nested);
epollfdp = calloc(nested, sizeof(int));
if (!epollfd)
err(EXIT_FAILURE, "calloc");
for (i = 0; i < nested; i++) {
epollfdp[i] = epoll_create(1);
if (epollfd < 0)
err(EXIT_FAILURE, "epoll_create");
}
ev.events = EPOLLHUP; /* anything */
ev.data.u64 = i; /* any number */
for (i = nested - 1; i; i--) {
if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD,
epollfdp[i], &ev) < 0)
err(EXIT_FAILURE, "epoll_ctl");
}
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0)
err(EXIT_FAILURE, "epoll_ctl");
}
static inline void do_epoll_op(struct worker *w, int op, int fd)
{
int error;
struct epoll_event ev;
ev.events = EPOLLIN;
ev.data.u64 = fd;
switch (op) {
case OP_EPOLL_ADD:
error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
break;
case OP_EPOLL_MOD:
ev.events = EPOLLOUT;
error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev);
break;
case OP_EPOLL_DEL:
error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
break;
default:
error = 1;
break;
}
if (!error)
w->ops[op]++;
}
static inline void do_random_epoll_op(struct worker *w)
{
unsigned long rnd1 = random(), rnd2 = random();
int op, fd;
fd = w->fdmap[rnd1 % nfds];
op = rnd2 % EPOLL_NR_OPS;
do_epoll_op(w, op, fd);
}
static void *workerfn(void *arg)
{
unsigned int i;
struct worker *w = (struct worker *) arg;
struct timespec ts = { .tv_sec = 0,
.tv_nsec = 250 };
pthread_mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
pthread_cond_signal(&thread_parent);
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);
/* Let 'em loose */
do {
/* random */
if (randomize) {
do_random_epoll_op(w);
} else {
for (i = 0; i < nfds; i++) {
do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]);
do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]);
do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]);
}
}
nanosleep(&ts, NULL);
} while (!done);
return NULL;
}
static void init_fdmaps(struct worker *w, int pct)
{
unsigned int i;
int inc;
struct epoll_event ev;
if (!pct)
return;
inc = 100/pct;
for (i = 0; i < nfds; i+=inc) {
ev.data.fd = w->fdmap[i];
ev.events = EPOLLIN;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0)
err(EXIT_FAILURE, "epoll_ct");
}
}
static int do_threads(struct worker *worker, struct cpu_map *cpu)
{
pthread_attr_t thread_attr, *attrp = NULL;
cpu_set_t cpuset;
unsigned int i, j;
int ret;
if (!noaffinity)
pthread_attr_init(&thread_attr);
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
w->tid = i;
w->fdmap = calloc(nfds, sizeof(int));
if (!w->fdmap)
return 1;
for (j = 0; j < nfds; j++) {
w->fdmap[j] = eventfd(0, EFD_NONBLOCK);
if (w->fdmap[j] < 0)
err(EXIT_FAILURE, "eventfd");
}
/*
* Lets add 50% of the fdmap to the epoll instance, and
* do it before any threads are started; otherwise there is
* an initial bias of the call failing (mod and del ops).
*/
if (randomize)
init_fdmaps(w, 50);
if (!noaffinity) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
attrp = &thread_attr;
}
ret = pthread_create(&w->thread, attrp, workerfn,
(void *)(struct worker *) w);
if (ret)
err(EXIT_FAILURE, "pthread_create");
}
if (!noaffinity)
pthread_attr_destroy(&thread_attr);
return ret;
}
static void print_summary(void)
{
int i;
unsigned long avg[EPOLL_NR_OPS];
double stddev[EPOLL_NR_OPS];
for (i = 0; i < EPOLL_NR_OPS; i++) {
avg[i] = avg_stats(&all_stats[i]);
stddev[i] = stddev_stats(&all_stats[i]);
}
printf("\nAveraged %ld ADD operations (+- %.2f%%)\n",
avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD],
avg[OP_EPOLL_ADD]));
printf("Averaged %ld MOD operations (+- %.2f%%)\n",
avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD],
avg[OP_EPOLL_MOD]));
printf("Averaged %ld DEL operations (+- %.2f%%)\n",
avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL],
avg[OP_EPOLL_DEL]));
}
int bench_epoll_ctl(int argc, const char **argv)
{
int j, ret = 0;
struct sigaction act;
struct worker *worker = NULL;
struct cpu_map *cpu;
struct rlimit rl, prevrl;
unsigned int i;
argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0);
if (argc) {
usage_with_options(bench_epoll_ctl_usage, options);
exit(EXIT_FAILURE);
}
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
cpu = cpu_map__new(NULL);
if (!cpu)
goto errmem;
/* a single, main epoll instance */
epollfd = epoll_create(1);
if (epollfd < 0)
err(EXIT_FAILURE, "epoll_create");
/*
* Deal with nested epolls, if any.
*/
if (nested)
nest_epollfd();
/* default to the number of CPUs */
if (!nthreads)
nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
goto errmem;
if (getrlimit(RLIMIT_NOFILE, &prevrl))
err(EXIT_FAILURE, "getrlimit");
rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50;
printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n",
(uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max);
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
err(EXIT_FAILURE, "setrlimit");
printf("Run summary [PID %d]: %d threads doing epoll_ctl ops "
"%d file-descriptors for %d secs.\n\n",
getpid(), nthreads, nfds, nsecs);
for (i = 0; i < EPOLL_NR_OPS; i++)
init_stats(&all_stats[i]);
pthread_mutex_init(&thread_lock, NULL);
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
threads_starting = nthreads;
gettimeofday(&start, NULL);
do_threads(worker, cpu);
pthread_mutex_lock(&thread_lock);
while (threads_starting)
pthread_cond_wait(&thread_parent, &thread_lock);
pthread_cond_broadcast(&thread_worker);
pthread_mutex_unlock(&thread_lock);
sleep(nsecs);
toggle_done(0, NULL, NULL);
printinfo("main thread: toggling done\n");
for (i = 0; i < nthreads; i++) {
ret = pthread_join(worker[i].thread, NULL);
if (ret)
err(EXIT_FAILURE, "pthread_join");
}
/* cleanup & report results */
pthread_cond_destroy(&thread_parent);
pthread_cond_destroy(&thread_worker);
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
unsigned long t[EPOLL_NR_OPS];
for (j = 0; j < EPOLL_NR_OPS; j++) {
t[j] = worker[i].ops[j];
update_stats(&all_stats[j], t[j]);
}
if (nfds == 1)
printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n",
worker[i].tid, &worker[i].fdmap[0],
t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]);
else
printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n",
worker[i].tid, &worker[i].fdmap[0],
&worker[i].fdmap[nfds-1],
t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]);
}
print_summary();
close(epollfd);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
}
#endif // HAVE_EVENTFD

View File

@ -0,0 +1,540 @@
// SPDX-License-Identifier: GPL-2.0
#ifdef HAVE_EVENTFD
/*
* Copyright (C) 2018 Davidlohr Bueso.
*
* This program benchmarks concurrent epoll_wait(2) monitoring multiple
* file descriptors under one or two load balancing models. The first,
* and default, is the single/combined queueing (which refers to a single
* epoll instance for N worker threads):
*
* |---> [worker A]
* |---> [worker B]
* [combined queue] .---> [worker C]
* |---> [worker D]
* |---> [worker E]
*
* While the second model, enabled via --multiq option, uses multiple
* queueing (which refers to one epoll instance per worker). For example,
* short lived tcp connections in a high throughput httpd server will
* ditribute the accept()'ing connections across CPUs. In this case each
* worker does a limited amount of processing.
*
* [queue A] ---> [worker]
* [queue B] ---> [worker]
* [queue C] ---> [worker]
* [queue D] ---> [worker]
* [queue E] ---> [worker]
*
* Naturally, the single queue will enforce more concurrency on the epoll
* instance, and can therefore scale poorly compared to multiple queues.
* However, this is a benchmark raw data and must be taken with a grain of
* salt when choosing how to make use of sys_epoll.
* Each thread has a number of private, nonblocking file descriptors,
* referred to as fdmap. A writer thread will constantly be writing to
* the fdmaps of all threads, minimizing each threads's chances of
* epoll_wait not finding any ready read events and blocking as this
* is not what we want to stress. The size of the fdmap can be adjusted
* by the user; enlarging the value will increase the chances of
* epoll_wait(2) blocking as the lineal writer thread will take "longer",
* at least at a high level.
*
* Note that because fds are private to each thread, this workload does
* not stress scenarios where multiple tasks are awoken per ready IO; ie:
* EPOLLEXCLUSIVE semantics.
*
* The end result/metric is throughput: number of ops/second where an
* operation consists of:
*
* epoll_wait(2) + [others]
*
* ... where [others] is the cost of re-adding the fd (EPOLLET),
* or rearming it (EPOLLONESHOT).
*
*
* The purpose of this is program is that it be useful for measuring
* kernel related changes to the sys_epoll, and not comparing different
* IO polling methods, for example. Hence everything is very adhoc and
* outputs raw microbenchmark numbers. Also this uses eventfd, similar
* tools tend to use pipes or sockets, but the result is the same.
*/
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
#include <errno.h>
#include <inttypes.h>
#include <signal.h>
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include <sys/types.h>
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include "bench.h"
#include "cpumap.h"
#include <err.h>
#define printinfo(fmt, arg...) \
do { if (__verbose) { printf(fmt, ## arg); fflush(stdout); } } while (0)
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
struct timeval start, end, runtime;
static bool wdone, done, __verbose, randomize, nonblocking;
/*
* epoll related shared variables.
*/
/* Maximum number of nesting allowed inside epoll sets */
#define EPOLL_MAXNESTS 4
static int epollfd;
static int *epollfdp;
static bool noaffinity;
static unsigned int nested = 0;
static bool et; /* edge-trigger */
static bool oneshot;
static bool multiq; /* use an epoll instance per thread */
/* amount of fds to monitor, per thread */
static unsigned int nfds = 64;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
static pthread_cond_t thread_parent, thread_worker;
struct worker {
int tid;
int epollfd; /* for --multiq */
pthread_t thread;
unsigned long ops;
int *fdmap;
};
static const struct option options[] = {
/* general benchmark options */
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"),
OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"),
OPT_BOOLEAN('R', "randomize", &randomize, "Enable random write behaviour (default is lineal)"),
OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"),
/* epoll specific options */
OPT_BOOLEAN( 'm', "multiq", &multiq, "Use multiple epoll instances (one per thread)"),
OPT_BOOLEAN( 'B', "nonblocking", &nonblocking, "Nonblocking epoll_wait(2) behaviour"),
OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"),
OPT_BOOLEAN( 'S', "oneshot", &oneshot, "Use EPOLLONESHOT semantics"),
OPT_BOOLEAN( 'E', "edge", &et, "Use Edge-triggered interface (default is LT)"),
OPT_END()
};
static const char * const bench_epoll_wait_usage[] = {
"perf bench epoll wait <options>",
NULL
};
/*
* Arrange the N elements of ARRAY in random order.
* Only effective if N is much smaller than RAND_MAX;
* if this may not be the case, use a better random
* number generator. -- Ben Pfaff.
*/
static void shuffle(void *array, size_t n, size_t size)
{
char *carray = array;
void *aux;
size_t i;
if (n <= 1)
return;
aux = calloc(1, size);
if (!aux)
err(EXIT_FAILURE, "calloc");
for (i = 1; i < n; ++i) {
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
j *= size;
memcpy(aux, &carray[j], size);
memcpy(&carray[j], &carray[i*size], size);
memcpy(&carray[i*size], aux, size);
}
free(aux);
}
static void *workerfn(void *arg)
{
int fd, ret, r;
struct worker *w = (struct worker *) arg;
unsigned long ops = w->ops;
struct epoll_event ev;
uint64_t val;
int to = nonblocking? 0 : -1;
int efd = multiq ? w->epollfd : epollfd;
pthread_mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
pthread_cond_signal(&thread_parent);
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);
do {
/*
* Block undefinitely waiting for the IN event.
* In order to stress the epoll_wait(2) syscall,
* call it event per event, instead of a larger
* batch (max)limit.
*/
do {
ret = epoll_wait(efd, &ev, 1, to);
} while (ret < 0 && errno == EINTR);
if (ret < 0)
err(EXIT_FAILURE, "epoll_wait");
fd = ev.data.fd;
do {
r = read(fd, &val, sizeof(val));
} while (!done && (r < 0 && errno == EAGAIN));
if (et) {
ev.events = EPOLLIN | EPOLLET;
ret = epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev);
}
if (oneshot) {
/* rearm the file descriptor with a new event mask */
ev.events |= EPOLLIN | EPOLLONESHOT;
ret = epoll_ctl(efd, EPOLL_CTL_MOD, fd, &ev);
}
ops++;
} while (!done);
if (multiq)
close(w->epollfd);
w->ops = ops;
return NULL;
}
static void nest_epollfd(struct worker *w)
{
unsigned int i;
struct epoll_event ev;
int efd = multiq ? w->epollfd : epollfd;
if (nested > EPOLL_MAXNESTS)
nested = EPOLL_MAXNESTS;
epollfdp = calloc(nested, sizeof(*epollfdp));
if (!epollfdp)
err(EXIT_FAILURE, "calloc");
for (i = 0; i < nested; i++) {
epollfdp[i] = epoll_create(1);
if (epollfdp[i] < 0)
err(EXIT_FAILURE, "epoll_create");
}
ev.events = EPOLLHUP; /* anything */
ev.data.u64 = i; /* any number */
for (i = nested - 1; i; i--) {
if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD,
epollfdp[i], &ev) < 0)
err(EXIT_FAILURE, "epoll_ctl");
}
if (epoll_ctl(efd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0)
err(EXIT_FAILURE, "epoll_ctl");
}
static void toggle_done(int sig __maybe_unused,
siginfo_t *info __maybe_unused,
void *uc __maybe_unused)
{
/* inform all threads that we're done for the day */
done = true;
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
}
static void print_summary(void)
{
unsigned long avg = avg_stats(&throughput_stats);
double stddev = stddev_stats(&throughput_stats);
printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
avg, rel_stddev_stats(stddev, avg),
(int) runtime.tv_sec);
}
static int do_threads(struct worker *worker, struct cpu_map *cpu)
{
pthread_attr_t thread_attr, *attrp = NULL;
cpu_set_t cpuset;
unsigned int i, j;
int ret, events = EPOLLIN;
if (oneshot)
events |= EPOLLONESHOT;
if (et)
events |= EPOLLET;
printinfo("starting worker/consumer %sthreads%s\n",
noaffinity ? "":"CPU affinity ",
nonblocking ? " (nonblocking)":"");
if (!noaffinity)
pthread_attr_init(&thread_attr);
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
if (multiq) {
w->epollfd = epoll_create(1);
if (w->epollfd < 0)
err(EXIT_FAILURE, "epoll_create");
if (nested)
nest_epollfd(w);
}
w->tid = i;
w->fdmap = calloc(nfds, sizeof(int));
if (!w->fdmap)
return 1;
for (j = 0; j < nfds; j++) {
int efd = multiq ? w->epollfd : epollfd;
struct epoll_event ev;
w->fdmap[j] = eventfd(0, EFD_NONBLOCK);
if (w->fdmap[j] < 0)
err(EXIT_FAILURE, "eventfd");
ev.data.fd = w->fdmap[j];
ev.events = events;
ret = epoll_ctl(efd, EPOLL_CTL_ADD,
w->fdmap[j], &ev);
if (ret < 0)
err(EXIT_FAILURE, "epoll_ctl");
}
if (!noaffinity) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
attrp = &thread_attr;
}
ret = pthread_create(&w->thread, attrp, workerfn,
(void *)(struct worker *) w);
if (ret)
err(EXIT_FAILURE, "pthread_create");
}
if (!noaffinity)
pthread_attr_destroy(&thread_attr);
return ret;
}
static void *writerfn(void *p)
{
struct worker *worker = p;
size_t i, j, iter;
const uint64_t val = 1;
ssize_t sz;
struct timespec ts = { .tv_sec = 0,
.tv_nsec = 500 };
printinfo("starting writer-thread: doing %s writes ...\n",
randomize? "random":"lineal");
for (iter = 0; !wdone; iter++) {
if (randomize) {
shuffle((void *)worker, nthreads, sizeof(*worker));
}
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
if (randomize) {
shuffle((void *)w->fdmap, nfds, sizeof(int));
}
for (j = 0; j < nfds; j++) {
do {
sz = write(w->fdmap[j], &val, sizeof(val));
} while (!wdone && (sz < 0 && errno == EAGAIN));
}
}
nanosleep(&ts, NULL);
}
printinfo("exiting writer-thread (total full-loops: %zd)\n", iter);
return NULL;
}
static int cmpworker(const void *p1, const void *p2)
{
struct worker *w1 = (struct worker *) p1;
struct worker *w2 = (struct worker *) p2;
return w1->tid > w2->tid;
}
int bench_epoll_wait(int argc, const char **argv)
{
int ret = 0;
struct sigaction act;
unsigned int i;
struct worker *worker = NULL;
struct cpu_map *cpu;
pthread_t wthread;
struct rlimit rl, prevrl;
argc = parse_options(argc, argv, options, bench_epoll_wait_usage, 0);
if (argc) {
usage_with_options(bench_epoll_wait_usage, options);
exit(EXIT_FAILURE);
}
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
cpu = cpu_map__new(NULL);
if (!cpu)
goto errmem;
/* a single, main epoll instance */
if (!multiq) {
epollfd = epoll_create(1);
if (epollfd < 0)
err(EXIT_FAILURE, "epoll_create");
/*
* Deal with nested epolls, if any.
*/
if (nested)
nest_epollfd(NULL);
}
printinfo("Using %s queue model\n", multiq ? "multi" : "single");
printinfo("Nesting level(s): %d\n", nested);
/* default to the number of CPUs and leave one for the writer pthread */
if (!nthreads)
nthreads = cpu->nr - 1;
worker = calloc(nthreads, sizeof(*worker));
if (!worker) {
goto errmem;
}
if (getrlimit(RLIMIT_NOFILE, &prevrl))
err(EXIT_FAILURE, "getrlimit");
rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50;
printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n",
(uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max);
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
err(EXIT_FAILURE, "setrlimit");
printf("Run summary [PID %d]: %d threads monitoring%s on "
"%d file-descriptors for %d secs.\n\n",
getpid(), nthreads, oneshot ? " (EPOLLONESHOT semantics)": "", nfds, nsecs);
init_stats(&throughput_stats);
pthread_mutex_init(&thread_lock, NULL);
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
threads_starting = nthreads;
gettimeofday(&start, NULL);
do_threads(worker, cpu);
pthread_mutex_lock(&thread_lock);
while (threads_starting)
pthread_cond_wait(&thread_parent, &thread_lock);
pthread_cond_broadcast(&thread_worker);
pthread_mutex_unlock(&thread_lock);
/*
* At this point the workers should be blocked waiting for read events
* to become ready. Launch the writer which will constantly be writing
* to each thread's fdmap.
*/
ret = pthread_create(&wthread, NULL, writerfn,
(void *)(struct worker *) worker);
if (ret)
err(EXIT_FAILURE, "pthread_create");
sleep(nsecs);
toggle_done(0, NULL, NULL);
printinfo("main thread: toggling done\n");
sleep(1); /* meh */
wdone = true;
ret = pthread_join(wthread, NULL);
if (ret)
err(EXIT_FAILURE, "pthread_join");
/* cleanup & report results */
pthread_cond_destroy(&thread_parent);
pthread_cond_destroy(&thread_worker);
pthread_mutex_destroy(&thread_lock);
/* sort the array back before reporting */
if (randomize)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
unsigned long t = worker[i].ops/runtime.tv_sec;
update_stats(&throughput_stats, t);
if (nfds == 1)
printf("[thread %2d] fdmap: %p [ %04ld ops/sec ]\n",
worker[i].tid, &worker[i].fdmap[0], t);
else
printf("[thread %2d] fdmap: %p ... %p [ %04ld ops/sec ]\n",
worker[i].tid, &worker[i].fdmap[0],
&worker[i].fdmap[nfds-1], t);
}
print_summary();
close(epollfd);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
}
#endif // HAVE_EVENTFD

View File

@ -86,16 +86,4 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
val, opflags);
}
#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
#include <pthread.h>
#include <linux/compiler.h>
static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused,
size_t cpusetsize __maybe_unused,
cpu_set_t *cpuset __maybe_unused)
{
return 0;
}
#endif
#endif /* _FUTEX_H */

View File

@ -14,6 +14,7 @@
* mem ... memory access performance
* numa ... NUMA scheduling and MM performance
* futex ... Futex performance
* epoll ... Event poll performance
*/
#include "perf.h"
#include "util/util.h"
@ -67,6 +68,15 @@ static struct bench futex_benchmarks[] = {
{ NULL, NULL, NULL }
};
#ifdef HAVE_EVENTFD
static struct bench epoll_benchmarks[] = {
{ "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait },
{ "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl },
{ "all", "Run all futex benchmarks", NULL },
{ NULL, NULL, NULL }
};
#endif // HAVE_EVENTFD
struct collection {
const char *name;
const char *summary;
@ -80,6 +90,9 @@ static struct collection collections[] = {
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
#endif
{"futex", "Futex stressing benchmarks", futex_benchmarks },
#ifdef HAVE_EVENTFD
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
#endif
{ "all", "All benchmarks", NULL },
{ NULL, NULL, NULL }
};

View File

@ -196,6 +196,7 @@ int cmd_config(int argc, const char **argv)
pr_err("Error: takes no arguments\n");
parse_options_usage(config_usage, config_options, "l", 1);
} else {
do_action_list:
if (show_config(set) < 0) {
pr_err("Nothing configured, "
"please check your %s \n", config_filename);
@ -204,10 +205,8 @@ int cmd_config(int argc, const char **argv)
}
break;
default:
if (!argc) {
usage_with_options(config_usage, config_options);
break;
}
if (!argc)
goto do_action_list;
for (i = 0; argv[i]; i++) {
char *var, *value;

View File

@ -189,7 +189,7 @@ static void add_man_viewer(const char *name)
while (*p)
p = &((*p)->next);
*p = zalloc(sizeof(**p) + len + 1);
strncpy((*p)->name, name, len);
strcpy((*p)->name, name);
}
static int supported_man_viewer(const char *name, size_t len)

View File

@ -1364,7 +1364,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
"show events other than"
" HLT (x86 only) or Wait state (s390 only)"
" that take longer than duration usecs"),
OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_END()
};
@ -1394,7 +1394,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
kvm->opts.target.uses_mmap = false;
kvm->opts.target.uid_str = NULL;
kvm->opts.target.uid = UINT_MAX;
kvm->opts.proc_map_timeout = 500;
symbol__init(NULL);
disable_buildid_cache();
@ -1453,8 +1452,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
perf_session__set_id_hdr_size(kvm->session);
ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
kvm->evlist->threads, false,
kvm->opts.proc_map_timeout, 1);
kvm->evlist->threads, false, 1);
err = kvm_live_open_events(kvm);
if (err)
goto out;

View File

@ -124,6 +124,210 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
return 0;
}
#ifdef HAVE_AIO_SUPPORT
static int record__aio_write(struct aiocb *cblock, int trace_fd,
void *buf, size_t size, off_t off)
{
int rc;
cblock->aio_fildes = trace_fd;
cblock->aio_buf = buf;
cblock->aio_nbytes = size;
cblock->aio_offset = off;
cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
do {
rc = aio_write(cblock);
if (rc == 0) {
break;
} else if (errno != EAGAIN) {
cblock->aio_fildes = -1;
pr_err("failed to queue perf data, error: %m\n");
break;
}
} while (1);
return rc;
}
static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
{
void *rem_buf;
off_t rem_off;
size_t rem_size;
int rc, aio_errno;
ssize_t aio_ret, written;
aio_errno = aio_error(cblock);
if (aio_errno == EINPROGRESS)
return 0;
written = aio_ret = aio_return(cblock);
if (aio_ret < 0) {
if (aio_errno != EINTR)
pr_err("failed to write perf data, error: %m\n");
written = 0;
}
rem_size = cblock->aio_nbytes - written;
if (rem_size == 0) {
cblock->aio_fildes = -1;
/*
* md->refcount is incremented in perf_mmap__push() for
* every enqueued aio write request so decrement it because
* the request is now complete.
*/
perf_mmap__put(md);
rc = 1;
} else {
/*
* aio write request may require restart with the
* reminder if the kernel didn't write whole
* chunk at once.
*/
rem_off = cblock->aio_offset + written;
rem_buf = (void *)(cblock->aio_buf + written);
record__aio_write(cblock, cblock->aio_fildes,
rem_buf, rem_size, rem_off);
rc = 0;
}
return rc;
}
static int record__aio_sync(struct perf_mmap *md, bool sync_all)
{
struct aiocb **aiocb = md->aio.aiocb;
struct aiocb *cblocks = md->aio.cblocks;
struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
int i, do_suspend;
do {
do_suspend = 0;
for (i = 0; i < md->aio.nr_cblocks; ++i) {
if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
if (sync_all)
aiocb[i] = NULL;
else
return i;
} else {
/*
* Started aio write is not complete yet
* so it has to be waited before the
* next allocation.
*/
aiocb[i] = &cblocks[i];
do_suspend = 1;
}
}
if (!do_suspend)
return -1;
while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
} while (1);
}
static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
{
struct record *rec = to;
int ret, trace_fd = rec->session->data->file.fd;
rec->samples++;
ret = record__aio_write(cblock, trace_fd, bf, size, off);
if (!ret) {
rec->bytes_written += size;
if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
}
return ret;
}
static off_t record__aio_get_pos(int trace_fd)
{
return lseek(trace_fd, 0, SEEK_CUR);
}
static void record__aio_set_pos(int trace_fd, off_t pos)
{
lseek(trace_fd, pos, SEEK_SET);
}
static void record__aio_mmap_read_sync(struct record *rec)
{
int i;
struct perf_evlist *evlist = rec->evlist;
struct perf_mmap *maps = evlist->mmap;
if (!rec->opts.nr_cblocks)
return;
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = &maps[i];
if (map->base)
record__aio_sync(map, true);
}
}
static int nr_cblocks_default = 1;
static int nr_cblocks_max = 4;
static int record__aio_parse(const struct option *opt,
const char *str,
int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
if (unset) {
opts->nr_cblocks = 0;
} else {
if (str)
opts->nr_cblocks = strtol(str, NULL, 0);
if (!opts->nr_cblocks)
opts->nr_cblocks = nr_cblocks_default;
}
return 0;
}
#else /* HAVE_AIO_SUPPORT */
static int nr_cblocks_max = 0;
static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
{
return -1;
}
static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
{
return -1;
}
static off_t record__aio_get_pos(int trace_fd __maybe_unused)
{
return -1;
}
static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
{
}
static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
{
}
#endif
static int record__aio_enabled(struct record *rec)
{
return rec->opts.nr_cblocks > 0;
}
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@ -329,7 +533,7 @@ static int record__mmap_evlist(struct record *rec,
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode) < 0) {
opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@ -525,6 +729,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
int i;
int rc = 0;
struct perf_mmap *maps;
int trace_fd = rec->data.file.fd;
off_t off;
if (!evlist)
return 0;
@ -536,13 +742,30 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;
if (record__aio_enabled(rec))
off = record__aio_get_pos(trace_fd);
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = &maps[i];
if (map->base) {
if (perf_mmap__push(map, rec, record__pushfn) != 0) {
rc = -1;
goto out;
if (!record__aio_enabled(rec)) {
if (perf_mmap__push(map, rec, record__pushfn) != 0) {
rc = -1;
goto out;
}
} else {
int idx;
/*
* Call record__aio_sync() to wait till map->data buffer
* becomes available after previous aio write request.
*/
idx = record__aio_sync(map, false);
if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
record__aio_set_pos(trace_fd, off);
rc = -1;
goto out;
}
}
}
@ -553,6 +776,9 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
}
}
if (record__aio_enabled(rec))
record__aio_set_pos(trace_fd, off);
/*
* Mark the round finished in case we wrote
* at least one event.
@ -641,8 +867,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
process_synthesized_event,
&rec->session->machines.host,
rec->opts.sample_address,
rec->opts.proc_map_timeout);
rec->opts.sample_address);
thread_map__put(thread_map);
return err;
}
@ -658,6 +883,8 @@ record__switch_output(struct record *rec, bool at_exit)
/* Same Size: "2015122520103046"*/
char timestamp[] = "InvalidTimestamp";
record__aio_mmap_read_sync(rec);
record__synthesize(rec, true);
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);
@ -857,7 +1084,7 @@ static int record__synthesize(struct record *rec, bool tail)
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address,
opts->proc_map_timeout, 1);
1);
out:
return err;
}
@ -1168,6 +1395,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
record__aio_mmap_read_sync(rec);
if (forks) {
int exit_status;
@ -1301,6 +1530,13 @@ static int perf_record_config(const char *var, const char *value, void *cb)
var = "call-graph.record-mode";
return perf_default_config(var, value, cb);
}
#ifdef HAVE_AIO_SUPPORT
if (!strcmp(var, "record.aio")) {
rec->opts.nr_cblocks = strtol(value, NULL, 0);
if (!rec->opts.nr_cblocks)
rec->opts.nr_cblocks = nr_cblocks_default;
}
#endif
return 0;
}
@ -1546,7 +1782,6 @@ static struct record record = {
.uses_mmap = true,
.default_per_cpu = true,
},
.proc_map_timeout = 500,
},
.tool = {
.sample = process_sample_event,
@ -1676,7 +1911,7 @@ static struct option __record_options[] = {
parse_clockid),
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
"opts", "AUX area tracing Snapshot Mode", ""),
OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
"Record namespaces events"),
@ -1706,6 +1941,11 @@ static struct option __record_options[] = {
"signal"),
OPT_BOOLEAN(0, "dry-run", &dry_run,
"Parse options then exit"),
#ifdef HAVE_AIO_SUPPORT
OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
record__aio_parse),
#endif
OPT_END()
};
@ -1898,6 +2138,11 @@ int cmd_record(int argc, const char **argv)
goto out;
}
if (rec->opts.nr_cblocks > nr_cblocks_max)
rec->opts.nr_cblocks = nr_cblocks_max;
if (verbose > 0)
pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
err = __cmd_record(&record, argc, argv);
out:
perf_evlist__delete(rec->evlist);

View File

@ -85,6 +85,7 @@ struct report {
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
bool symbol_ipc;
};
static int report__config(const char *var, const char *value, void *cb)
@ -129,7 +130,7 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct mem_info *mi;
struct branch_info *bi;
if (!ui__has_annotation())
if (!ui__has_annotation() && !rep->symbol_ipc)
return 0;
hist__account_cycles(sample->branch_stack, al, sample,
@ -174,7 +175,7 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
struct perf_evsel *evsel = iter->evsel;
int err;
if (!ui__has_annotation())
if (!ui__has_annotation() && !rep->symbol_ipc)
return 0;
hist__account_cycles(sample->branch_stack, al, sample,
@ -1133,6 +1134,7 @@ int cmd_report(int argc, const char **argv)
.mode = PERF_DATA_MODE_READ,
};
int ret = hists__init();
char sort_tmp[128];
if (ret < 0)
return ret;
@ -1284,6 +1286,24 @@ int cmd_report(int argc, const char **argv)
else
use_browser = 0;
if (sort_order && strstr(sort_order, "ipc")) {
parse_options_usage(report_usage, options, "s", 1);
goto error;
}
if (sort_order && strstr(sort_order, "symbol")) {
if (sort__mode == SORT_MODE__BRANCH) {
snprintf(sort_tmp, sizeof(sort_tmp), "%s,%s",
sort_order, "ipc_lbr");
report.symbol_ipc = true;
} else {
snprintf(sort_tmp, sizeof(sort_tmp), "%s,%s",
sort_order, "ipc_null");
}
sort_order = sort_tmp;
}
if (setup_sorting(session->evlist) < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
@ -1311,7 +1331,7 @@ int cmd_report(int argc, const char **argv)
* so don't allocate extra space that won't be used in the stdio
* implementation.
*/
if (ui__has_annotation()) {
if (ui__has_annotation() || report.symbol_ipc) {
ret = symbol__annotation_init();
if (ret < 0)
goto error;

View File

@ -96,6 +96,7 @@ enum perf_output_field {
PERF_OUTPUT_UREGS = 1U << 27,
PERF_OUTPUT_METRIC = 1U << 28,
PERF_OUTPUT_MISC = 1U << 29,
PERF_OUTPUT_SRCCODE = 1U << 30,
};
struct output_option {
@ -132,6 +133,7 @@ struct output_option {
{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
{.str = "metric", .field = PERF_OUTPUT_METRIC},
{.str = "misc", .field = PERF_OUTPUT_MISC},
{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
};
enum {
@ -424,7 +426,7 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
pr_err("Display of DSO requested but no address to convert.\n");
return -EINVAL;
}
if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
if ((PRINT_FIELD(SRCLINE) || PRINT_FIELD(SRCCODE)) && !PRINT_FIELD(IP)) {
pr_err("Display of source line number requested but sample IP is not\n"
"selected. Hence, no address to lookup the source line number.\n");
return -EINVAL;
@ -566,30 +568,10 @@ static int perf_session__check_output_opt(struct perf_session *session)
return 0;
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
FILE *fp
)
{
struct regs_dump *regs = &sample->intr_regs;
uint64_t mask = attr->sample_regs_intr;
unsigned i = 0, r;
int printed = 0;
if (!regs)
return 0;
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
}
return printed;
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct regs_dump *regs = &sample->user_regs;
uint64_t mask = attr->sample_regs_user;
unsigned i = 0, r;
int printed = 0;
@ -603,9 +585,25 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
}
fprintf(fp, "\n");
return printed;
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
attr->sample_regs_intr, fp);
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
attr->sample_regs_user, fp);
}
static int perf_sample__fprintf_start(struct perf_sample *sample,
struct thread *thread,
struct perf_evsel *evsel,
@ -728,8 +726,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
if (PRINT_FIELD(DSO)) {
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
thread__find_map(thread, sample->cpumode, from, &alf);
thread__find_map(thread, sample->cpumode, to, &alt);
thread__find_map_fb(thread, sample->cpumode, from, &alf);
thread__find_map_fb(thread, sample->cpumode, to, &alt);
}
printed += fprintf(fp, " 0x%"PRIx64, from);
@ -775,8 +773,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
from = br->entries[i].from;
to = br->entries[i].to;
thread__find_symbol(thread, sample->cpumode, from, &alf);
thread__find_symbol(thread, sample->cpumode, to, &alt);
thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
if (PRINT_FIELD(DSO)) {
@ -820,11 +818,11 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
from = br->entries[i].from;
to = br->entries[i].to;
if (thread__find_map(thread, sample->cpumode, from, &alf) &&
if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
!alf.map->dso->adjust_symbols)
from = map__map_ip(alf.map, from);
if (thread__find_map(thread, sample->cpumode, to, &alt) &&
if (thread__find_map_fb(thread, sample->cpumode, to, &alt) &&
!alt.map->dso->adjust_symbols)
to = map__map_ip(alt.map, to);
@ -911,6 +909,22 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
return len;
}
static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr)
{
struct addr_location al;
int ret = 0;
memset(&al, 0, sizeof(al));
thread__find_map(thread, cpumode, addr, &al);
if (!al.map)
return 0;
ret = map__fprintf_srccode(al.map, al.addr, stdout,
&thread->srccode_state);
if (ret)
ret += printf("\n");
return ret;
}
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
int insn, FILE *fp, int *total_cycles)
@ -1002,6 +1016,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
x.cpumode, x.cpu, &lastsym, attr, fp);
printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
&x, buffer, len, 0, fp, &total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from);
}
/* Print all blocks */
@ -1031,12 +1047,16 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
if (ip == end) {
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
break;
} else {
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
dump_insn(&x, ip, buffer + off, len - off, &ilen));
if (ilen == 0)
break;
if (PRINT_FIELD(SRCCODE))
print_srccode(thread, x.cpumode, ip);
insn++;
}
}
@ -1067,6 +1087,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
dump_insn(&x, sample->ip, buffer, len, NULL));
if (PRINT_FIELD(SRCCODE))
print_srccode(thread, x.cpumode, sample->ip);
goto out;
}
for (off = 0; off <= end - start; off += ilen) {
@ -1074,6 +1096,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
if (ilen == 0)
break;
if (PRINT_FIELD(SRCCODE))
print_srccode(thread, x.cpumode, start + off);
}
out:
return printed;
@ -1256,7 +1280,16 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
return printed + fprintf(fp, "\n");
printed += fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
int ret = map__fprintf_srccode(al->map, al->addr, stdout,
&thread->srccode_state);
if (ret) {
printed += ret;
printed += printf("\n");
}
}
return printed;
}
static struct {
@ -1796,6 +1829,12 @@ static void process_event(struct perf_script *script,
fprintf(fp, "%16" PRIx64, sample->phys_addr);
fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
if (map__fprintf_srccode(al->map, al->addr, stdout,
&thread->srccode_state))
printf("\n");
}
if (PRINT_FIELD(METRIC))
perf_sample__fprint_metric(script, thread, evsel, sample, fp);

View File

@ -709,7 +709,7 @@ static int parse_metric_groups(const struct option *opt,
return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
}
static const struct option stat_options[] = {
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
OPT_CALLBACK('e', "event", &evsel_list, "event",
@ -1599,6 +1599,12 @@ int cmd_stat(int argc, const char **argv)
return -ENOMEM;
parse_events__shrink_config_terms();
/* String-parsing callback-based options would segfault when negated */
set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
(const char **) stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);

View File

@ -43,6 +43,10 @@
#include "util/data.h"
#include "util/debug.h"
#ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
FILE *open_memstream(char **ptr, size_t *sizeloc);
#endif
#define SUPPORT_OLD_POWER_EVENTS 1
#define PWR_EVENT_EXIT -1

View File

@ -46,6 +46,7 @@
#include "arch/common.h"
#include "util/debug.h"
#include "util/ordered-events.h"
#include <assert.h>
#include <elf.h>
@ -272,8 +273,6 @@ static void perf_top__print_sym_table(struct perf_top *top)
perf_top__header_snprintf(top, bf, sizeof(bf));
printf("%s\n", bf);
perf_top__reset_sample_counters(top);
printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
if (!top->record_opts.overwrite &&
@ -553,8 +552,6 @@ static void perf_top__sort_new_samples(void *arg)
struct perf_evsel *evsel = t->sym_evsel;
struct hists *hists;
perf_top__reset_sample_counters(t);
if (t->evlist->selected != NULL)
t->sym_evsel = t->evlist->selected;
@ -571,6 +568,15 @@ static void perf_top__sort_new_samples(void *arg)
hists__collapse_resort(hists, NULL);
perf_evsel__output_resort(evsel, NULL);
if (t->lost || t->drop)
pr_warning("Too slow to read ring buffer (change period (-c/-F) or limit CPUs (-C)\n");
}
static void stop_top(void)
{
session_done = 1;
done = 1;
}
static void *display_thread_tui(void *arg)
@ -595,7 +601,7 @@ static void *display_thread_tui(void *arg)
/*
* Initialize the uid_filter_str, in the future the TUI will allow
* Zooming in/out UIDs. For now juse use whatever the user passed
* Zooming in/out UIDs. For now just use whatever the user passed
* via --uid.
*/
evlist__for_each_entry(top->evlist, pos) {
@ -609,13 +615,13 @@ static void *display_thread_tui(void *arg)
!top->record_opts.overwrite,
&top->annotation_opts);
done = 1;
stop_top();
return NULL;
}
static void display_sig(int sig __maybe_unused)
{
done = 1;
stop_top();
}
static void display_setup_sig(void)
@ -668,7 +674,7 @@ static void *display_thread(void *arg)
if (perf_top__handle_keypress(top, c))
goto repeat;
done = 1;
stop_top();
}
}
@ -800,78 +806,61 @@ static void perf_event__process_sample(struct perf_tool *tool,
addr_location__put(&al);
}
static void
perf_top__process_lost(struct perf_top *top, union perf_event *event,
struct perf_evsel *evsel)
{
struct hists *hists = evsel__hists(evsel);
top->lost += event->lost.lost;
top->lost_total += event->lost.lost;
hists->stats.total_lost += event->lost.lost;
}
static void
perf_top__process_lost_samples(struct perf_top *top,
union perf_event *event,
struct perf_evsel *evsel)
{
struct hists *hists = evsel__hists(evsel);
top->lost += event->lost_samples.lost;
top->lost_total += event->lost_samples.lost;
hists->stats.total_lost_samples += event->lost_samples.lost;
}
static u64 last_timestamp;
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
{
struct record_opts *opts = &top->record_opts;
struct perf_evlist *evlist = top->evlist;
struct perf_sample sample;
struct perf_evsel *evsel;
struct perf_mmap *md;
struct perf_session *session = top->session;
union perf_event *event;
struct machine *machine;
int ret;
md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
if (perf_mmap__read_init(md) < 0)
return;
while ((event = perf_mmap__read_event(md)) != NULL) {
ret = perf_evlist__parse_sample(evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
goto next_event;
}
int ret;
evsel = perf_evlist__id2evsel(session->evlist, sample.id);
assert(evsel != NULL);
if (event->header.type == PERF_RECORD_SAMPLE)
++top->samples;
switch (sample.cpumode) {
case PERF_RECORD_MISC_USER:
++top->us_samples;
if (top->hide_user_symbols)
goto next_event;
machine = &session->machines.host;
ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
if (ret && ret != -1)
break;
case PERF_RECORD_MISC_KERNEL:
++top->kernel_samples;
if (top->hide_kernel_symbols)
goto next_event;
machine = &session->machines.host;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
++top->guest_kernel_samples;
machine = perf_session__find_machine(session,
sample.pid);
break;
case PERF_RECORD_MISC_GUEST_USER:
++top->guest_us_samples;
/*
* TODO: we don't process guest user from host side
* except simple counting.
*/
goto next_event;
default:
if (event->header.type == PERF_RECORD_SAMPLE)
goto next_event;
machine = &session->machines.host;
break;
}
ret = ordered_events__queue(top->qe.in, event, last_timestamp, 0);
if (ret)
break;
if (event->header.type == PERF_RECORD_SAMPLE) {
perf_event__process_sample(&top->tool, event, evsel,
&sample, machine);
} else if (event->header.type < PERF_RECORD_MAX) {
hists__inc_nr_events(evsel__hists(evsel), event->header.type);
machine__process_event(machine, event, &sample);
} else
++session->evlist->stats.nr_unknown_events;
next_event:
perf_mmap__consume(md);
if (top->qe.rotate) {
pthread_mutex_lock(&top->qe.mutex);
top->qe.rotate = false;
pthread_cond_signal(&top->qe.cond);
pthread_mutex_unlock(&top->qe.mutex);
}
}
perf_mmap__read_done(md);
@ -881,10 +870,8 @@ static void perf_top__mmap_read(struct perf_top *top)
{
bool overwrite = top->record_opts.overwrite;
struct perf_evlist *evlist = top->evlist;
unsigned long long start, end;
int i;
start = rdclock();
if (overwrite)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
@ -895,13 +882,6 @@ static void perf_top__mmap_read(struct perf_top *top)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
end = rdclock();
if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
ui__warning("Too slow to read ring buffer.\n"
"Please try increasing the period (-c) or\n"
"decreasing the freq (-F) or\n"
"limiting the number of CPUs (-C)\n");
}
/*
@ -1063,6 +1043,150 @@ static int callchain_param__setup_sample_type(struct callchain_param *callchain)
return 0;
}
static struct ordered_events *rotate_queues(struct perf_top *top)
{
struct ordered_events *in = top->qe.in;
if (top->qe.in == &top->qe.data[1])
top->qe.in = &top->qe.data[0];
else
top->qe.in = &top->qe.data[1];
return in;
}
static void *process_thread(void *arg)
{
struct perf_top *top = arg;
while (!done) {
struct ordered_events *out, *in = top->qe.in;
if (!in->nr_events) {
usleep(100);
continue;
}
out = rotate_queues(top);
pthread_mutex_lock(&top->qe.mutex);
top->qe.rotate = true;
pthread_cond_wait(&top->qe.cond, &top->qe.mutex);
pthread_mutex_unlock(&top->qe.mutex);
if (ordered_events__flush(out, OE_FLUSH__TOP))
pr_err("failed to process events\n");
}
return NULL;
}
/*
* Allow only 'top->delay_secs' seconds behind samples.
*/
static int should_drop(struct ordered_event *qevent, struct perf_top *top)
{
union perf_event *event = qevent->event;
u64 delay_timestamp;
if (event->header.type != PERF_RECORD_SAMPLE)
return false;
delay_timestamp = qevent->timestamp + top->delay_secs * NSEC_PER_SEC;
return delay_timestamp < last_timestamp;
}
static int deliver_event(struct ordered_events *qe,
struct ordered_event *qevent)
{
struct perf_top *top = qe->data;
struct perf_evlist *evlist = top->evlist;
struct perf_session *session = top->session;
union perf_event *event = qevent->event;
struct perf_sample sample;
struct perf_evsel *evsel;
struct machine *machine;
int ret = -1;
if (should_drop(qevent, top)) {
top->drop++;
top->drop_total++;
return 0;
}
ret = perf_evlist__parse_sample(evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
goto next_event;
}
evsel = perf_evlist__id2evsel(session->evlist, sample.id);
assert(evsel != NULL);
if (event->header.type == PERF_RECORD_SAMPLE)
++top->samples;
switch (sample.cpumode) {
case PERF_RECORD_MISC_USER:
++top->us_samples;
if (top->hide_user_symbols)
goto next_event;
machine = &session->machines.host;
break;
case PERF_RECORD_MISC_KERNEL:
++top->kernel_samples;
if (top->hide_kernel_symbols)
goto next_event;
machine = &session->machines.host;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
++top->guest_kernel_samples;
machine = perf_session__find_machine(session,
sample.pid);
break;
case PERF_RECORD_MISC_GUEST_USER:
++top->guest_us_samples;
/*
* TODO: we don't process guest user from host side
* except simple counting.
*/
goto next_event;
default:
if (event->header.type == PERF_RECORD_SAMPLE)
goto next_event;
machine = &session->machines.host;
break;
}
if (event->header.type == PERF_RECORD_SAMPLE) {
perf_event__process_sample(&top->tool, event, evsel,
&sample, machine);
} else if (event->header.type == PERF_RECORD_LOST) {
perf_top__process_lost(top, event, evsel);
} else if (event->header.type == PERF_RECORD_LOST_SAMPLES) {
perf_top__process_lost_samples(top, event, evsel);
} else if (event->header.type < PERF_RECORD_MAX) {
hists__inc_nr_events(evsel__hists(evsel), event->header.type);
machine__process_event(machine, event, &sample);
} else
++session->evlist->stats.nr_unknown_events;
ret = 0;
next_event:
return ret;
}
static void init_process_thread(struct perf_top *top)
{
ordered_events__init(&top->qe.data[0], deliver_event, top);
ordered_events__init(&top->qe.data[1], deliver_event, top);
ordered_events__set_copy_on_queue(&top->qe.data[0], true);
ordered_events__set_copy_on_queue(&top->qe.data[1], true);
top->qe.in = &top->qe.data[0];
pthread_mutex_init(&top->qe.mutex, NULL);
pthread_cond_init(&top->qe.cond, NULL);
}
static int __cmd_top(struct perf_top *top)
{
char msg[512];
@ -1070,7 +1194,7 @@ static int __cmd_top(struct perf_top *top)
struct perf_evsel_config_term *err_term;
struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;
pthread_t thread;
pthread_t thread, thread_process;
int ret;
top->session = perf_session__new(NULL, false, NULL);
@ -1094,9 +1218,10 @@ static int __cmd_top(struct perf_top *top)
if (top->nr_threads_synthesize > 1)
perf_set_multithreaded();
init_process_thread(top);
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false,
opts->proc_map_timeout,
top->nr_threads_synthesize);
if (top->nr_threads_synthesize > 1)
@ -1135,10 +1260,15 @@ static int __cmd_top(struct perf_top *top)
perf_evlist__enable(top->evlist);
ret = -1;
if (pthread_create(&thread_process, NULL, process_thread, top)) {
ui__error("Could not create process thread.\n");
goto out_delete;
}
if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
display_thread), top)) {
ui__error("Could not create display thread.\n");
goto out_delete;
goto out_join_thread;
}
if (top->realtime_prio) {
@ -1173,6 +1303,9 @@ static int __cmd_top(struct perf_top *top)
ret = 0;
out_join:
pthread_join(thread, NULL);
out_join_thread:
pthread_cond_signal(&top->qe.cond);
pthread_join(thread_process, NULL);
out_delete:
perf_session__delete(top->session);
top->session = NULL;
@ -1256,7 +1389,6 @@ int cmd_top(int argc, const char **argv)
.target = {
.uses_mmap = true,
},
.proc_map_timeout = 500,
/*
* FIXME: This will lose PERF_RECORD_MMAP and other metadata
* when we pause, fix that and reenable. Probably using a
@ -1265,6 +1397,7 @@ int cmd_top(int argc, const char **argv)
* stays in overwrite mode. -acme
* */
.overwrite = 0,
.sample_time = true,
},
.max_stack = sysctl__max_stack(),
.annotation_opts = annotation__default_options,
@ -1289,6 +1422,8 @@ int cmd_top(int argc, const char **argv)
"file", "vmlinux pathname"),
OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
"don't load vmlinux even if found"),
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
"file", "kallsyms pathname"),
OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
"hide kernel symbols"),
OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
@ -1367,7 +1502,7 @@ int cmd_top(int argc, const char **argv)
OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
"width[,width...]",
"don't try to adjust column width, use these fixed values"),
OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
"branch any", "sample any taken branches",

View File

@ -18,9 +18,11 @@
#include <traceevent/event-parse.h>
#include <api/fs/tracing_path.h>
#include <bpf/bpf.h>
#include "builtin.h"
#include "util/cgroup.h"
#include "util/color.h"
#include "util/config.h"
#include "util/debug.h"
#include "util/env.h"
#include "util/event.h"
@ -75,6 +77,7 @@ struct trace {
struct {
int max;
struct syscall *table;
struct bpf_map *map;
struct {
struct perf_evsel *sys_enter,
*sys_exit,
@ -99,6 +102,7 @@ struct trace {
struct {
size_t nr;
pid_t *entries;
struct bpf_map *map;
} filter_pids;
double duration_filter;
double runtime_ms;
@ -108,6 +112,7 @@ struct trace {
} stats;
unsigned int max_stack;
unsigned int min_stack;
bool sort_events;
bool raw_augmented_syscalls;
bool not_ev_qualifier;
bool live;
@ -122,9 +127,19 @@ struct trace {
bool show_tool_stats;
bool trace_syscalls;
bool kernel_syscallchains;
s16 args_alignment;
bool show_tstamp;
bool show_duration;
bool show_zeros;
bool show_arg_names;
bool show_string_prefix;
bool force;
bool vfs_getname;
int trace_pgfaults;
struct {
struct ordered_events data;
u64 last;
} oe;
};
struct tp_field {
@ -256,7 +271,8 @@ static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
if (evsel->priv != NULL) {
if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
goto out_delete;
return 0;
}
@ -345,21 +361,25 @@ static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, v
({ struct syscall_tp *fields = evsel->priv; \
fields->name.pointer(&fields->name, sample); })
size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
{
int idx = val - sa->offset;
if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
return scnprintf(bf, size, intfmt, val);
if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
size_t printed = scnprintf(bf, size, intfmt, val);
if (show_prefix)
printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
return printed;
}
return scnprintf(bf, size, "%s", sa->entries[idx]);
return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
}
static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
const char *intfmt,
struct syscall_arg *arg)
{
return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
}
static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
@ -370,34 +390,32 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
#define SCA_STRARRAY syscall_arg__scnprintf_strarray
struct strarrays {
int nr_entries;
struct strarray **entries;
};
size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
{
size_t printed;
int i;
#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
.nr_entries = ARRAY_SIZE(array), \
.entries = array, \
for (i = 0; i < sas->nr_entries; ++i) {
struct strarray *sa = sas->entries[i];
int idx = val - sa->offset;
if (idx >= 0 && idx < sa->nr_entries) {
if (sa->entries[idx] == NULL)
break;
return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
}
}
printed = scnprintf(bf, size, intfmt, val);
if (show_prefix)
printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
return printed;
}
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
struct syscall_arg *arg)
{
struct strarrays *sas = arg->parm;
int i;
for (i = 0; i < sas->nr_entries; ++i) {
struct strarray *sa = sas->entries[i];
int idx = arg->val - sa->offset;
if (idx >= 0 && idx < sa->nr_entries) {
if (sa->entries[idx] == NULL)
break;
return scnprintf(bf, size, "%s", sa->entries[idx]);
}
}
return scnprintf(bf, size, "%d", arg->val);
return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
}
#ifndef AT_FDCWD
@ -408,9 +426,10 @@ static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
struct syscall_arg *arg)
{
int fd = arg->val;
const char *prefix = "AT_FD";
if (fd == AT_FDCWD)
return scnprintf(bf, size, "CWD");
return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
return syscall_arg__scnprintf_fd(bf, size, arg);
}
@ -427,6 +446,13 @@ size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg
return scnprintf(bf, size, "%#lx", arg->val);
}
size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
{
if (arg->val == 0)
return scnprintf(bf, size, "NULL");
return syscall_arg__scnprintf_hex(bf, size, arg);
}
size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
{
return scnprintf(bf, size, "%d", arg->val);
@ -441,13 +467,13 @@ static const char *bpf_cmd[] = {
"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
"MAP_GET_NEXT_KEY", "PROG_LOAD",
};
static DEFINE_STRARRAY(bpf_cmd);
static DEFINE_STRARRAY(bpf_cmd, "BPF_");
static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
static DEFINE_STRARRAY(itimers);
static DEFINE_STRARRAY(itimers, "ITIMER_");
static const char *keyctl_options[] = {
"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
@ -456,7 +482,7 @@ static const char *keyctl_options[] = {
"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
};
static DEFINE_STRARRAY(keyctl_options);
static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
static const char *whences[] = { "SET", "CUR", "END",
#ifdef SEEK_DATA
@ -466,7 +492,7 @@ static const char *whences[] = { "SET", "CUR", "END",
"HOLE",
#endif
};
static DEFINE_STRARRAY(whences);
static DEFINE_STRARRAY(whences, "SEEK_");
static const char *fcntl_cmds[] = {
"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
@ -474,7 +500,7 @@ static const char *fcntl_cmds[] = {
"SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
"GETOWNER_UIDS",
};
static DEFINE_STRARRAY(fcntl_cmds);
static DEFINE_STRARRAY(fcntl_cmds, "F_");
static const char *fcntl_linux_specific_cmds[] = {
"SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
@ -482,7 +508,7 @@ static const char *fcntl_linux_specific_cmds[] = {
"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
};
static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
static struct strarray *fcntl_cmds_arrays[] = {
&strarray__fcntl_cmds,
@ -496,29 +522,31 @@ static const char *rlimit_resources[] = {
"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
"RTTIME",
};
static DEFINE_STRARRAY(rlimit_resources);
static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
static DEFINE_STRARRAY(sighow);
static DEFINE_STRARRAY(sighow, "SIG_");
static const char *clockid[] = {
"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
};
static DEFINE_STRARRAY(clockid);
static DEFINE_STRARRAY(clockid, "CLOCK_");
static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
const char *suffix = "_OK";
size_t printed = 0;
int mode = arg->val;
if (mode == F_OK) /* 0 */
return scnprintf(bf, size, "F");
return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
#define P_MODE(n) \
if (mode & n##_OK) { \
printed += scnprintf(bf + printed, size - printed, "%s", #n); \
printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
mode &= ~n##_OK; \
}
@ -543,11 +571,13 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
const char *prefix = "O_";
int printed = 0, flags = arg->val;
#define P_FLAG(n) \
if (flags & O_##n) { \
printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
flags &= ~O_##n; \
}
@ -573,11 +603,13 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
const char *prefix = "GRND_";
int printed = 0, flags = arg->val;
#define P_FLAG(n) \
if (flags & GRND_##n) { \
printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
flags &= ~GRND_##n; \
}
@ -632,12 +664,15 @@ static struct syscall_fmt {
} syscall_fmts[] = {
{ .name = "access",
.arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
{ .name = "arch_prctl",
.arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
[1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
{ .name = "bind",
.arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, },
{ .name = "bpf",
.arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
{ .name = "brk", .hexret = true,
.arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
.arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
{ .name = "clock_gettime",
.arg = { [0] = STRARRAY(clk_id, clockid), }, },
{ .name = "clone", .errpid = true, .nr_args = 5,
@ -715,18 +750,14 @@ static struct syscall_fmt {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
{ .name = "mknodat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
{ .name = "mlock",
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
{ .name = "mlockall",
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
{ .name = "mmap", .hexret = true,
/* The standard mmap maps to old_mmap on s390x */
#if defined(__s390x__)
.alias = "old_mmap",
#endif
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
[2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
[3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
.arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
[3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ },
[5] = { .scnprintf = SCA_HEX, /* offset */ }, }, },
{ .name = "mount",
.arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
[3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
@ -737,13 +768,7 @@ static struct syscall_fmt {
{ .name = "mq_unlink",
.arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
{ .name = "mremap", .hexret = true,
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
[3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
[4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
{ .name = "munlock",
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
{ .name = "munmap",
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
.arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
{ .name = "name_to_handle_at",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
{ .name = "newfstatat",
@ -772,7 +797,7 @@ static struct syscall_fmt {
[3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
{ .name = "poll", .timeout = true, },
{ .name = "ppoll", .timeout = true, },
{ .name = "prctl", .alias = "arch_prctl",
{ .name = "prctl",
.arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
[1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
[2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
@ -790,7 +815,12 @@ static struct syscall_fmt {
{ .name = "recvmsg",
.arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
{ .name = "renameat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
.arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
[2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
{ .name = "renameat2",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
[2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
[4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
{ .name = "rt_sigaction",
.arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "rt_sigprocmask",
@ -883,7 +913,7 @@ static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
* args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
*/
struct syscall {
struct tep_event_format *tp_format;
struct tep_event *tp_format;
int nr_args;
int args_size;
bool is_exit;
@ -894,6 +924,10 @@ struct syscall {
struct syscall_arg_fmt *arg_fmt;
};
struct bpf_map_syscall_entry {
bool enabled;
};
/*
* We need to have this 'calculated' boolean because in some cases we really
* don't know what is the duration of a syscall, for instance, when we start
@ -1125,7 +1159,7 @@ static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, c
{
struct augmented_arg *augmented_arg = arg->augmented.args;
return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value);
return scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
}
static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
@ -1194,8 +1228,12 @@ static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread
static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
{
size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
printed += fprintf_duration(duration, duration_calculated, fp);
size_t printed = 0;
if (trace->show_tstamp)
printed = trace__fprintf_tstamp(trace, tstamp, fp);
if (trace->show_duration)
printed += fprintf_duration(duration, duration_calculated, fp);
return printed + trace__fprintf_comm_tid(trace, thread, fp);
}
@ -1262,7 +1300,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->threads, trace__tool_process, false,
trace->opts.proc_map_timeout, 1);
1);
out:
if (err)
symbol__exit();
@ -1314,8 +1352,8 @@ static int syscall__set_arg_fmts(struct syscall *sc)
strcmp(field->name, "path") == 0 ||
strcmp(field->name, "pathname") == 0))
sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
else if (field->flags & TEP_FIELD_IS_POINTER)
sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
sc->arg_fmt[idx].scnprintf = SCA_PTR;
else if (strcmp(field->type, "pid_t") == 0)
sc->arg_fmt[idx].scnprintf = SCA_PID;
else if (strcmp(field->type, "umode_t") == 0)
@ -1548,6 +1586,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
.mask = 0,
.trace = trace,
.thread = thread,
.show_string_prefix = trace->show_string_prefix,
};
struct thread_trace *ttrace = thread__priv(thread);
@ -1579,6 +1618,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
* strarray for it.
*/
if (val == 0 &&
!trace->show_zeros &&
!(sc->arg_fmt &&
(sc->arg_fmt[arg.idx].show_zero ||
sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
@ -1586,8 +1626,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
sc->arg_fmt[arg.idx].parm))
continue;
printed += scnprintf(bf + printed, size - printed,
"%s%s: ", printed ? ", " : "", field->name);
printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
if (trace->show_arg_names)
printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
}
} else if (IS_ERR(sc->tp_format)) {
@ -1700,7 +1743,7 @@ static int trace__printf_interrupted_entry(struct trace *trace)
return 0;
printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
printed += fprintf(trace->output, ")%-*s ...\n", trace->args_alignment, ttrace->entry_str);
ttrace->entry_pending = false;
++trace->nr_events_printed;
@ -1757,7 +1800,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
{
char *msg;
void *args;
size_t printed = 0;
int printed = 0;
struct thread *thread;
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
int augmented_args_size = 0;
@ -1806,8 +1849,13 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
if (sc->is_exit) {
if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
int alignment = 0;
trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
printed = fprintf(trace->output, "%s)", ttrace->entry_str);
if (trace->args_alignment > printed)
alignment = trace->args_alignment - printed;
fprintf(trace->output, "%*s= ?\n", alignment, " ");
}
} else {
ttrace->entry_pending = true;
@ -1902,7 +1950,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
u64 duration = 0;
bool duration_calculated = false;
struct thread *thread;
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
int alignment = trace->args_alignment;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
struct thread_trace *ttrace;
@ -1950,28 +1999,37 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
if (ttrace->entry_pending) {
fprintf(trace->output, "%-70s", ttrace->entry_str);
printed = fprintf(trace->output, "%s", ttrace->entry_str);
} else {
fprintf(trace->output, " ... [");
color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
fprintf(trace->output, "]: %s()", sc->name);
}
printed++; /* the closing ')' */
if (alignment > printed)
alignment -= printed;
else
alignment = 0;
fprintf(trace->output, ")%*s= ", alignment, " ");
if (sc->fmt == NULL) {
if (ret < 0)
goto errno_print;
signed_print:
fprintf(trace->output, ") = %ld", ret);
fprintf(trace->output, "%ld", ret);
} else if (ret < 0) {
errno_print: {
char bf[STRERR_BUFSIZE];
const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
*e = errno_to_name(evsel, -ret);
fprintf(trace->output, ") = -1 %s %s", e, emsg);
fprintf(trace->output, "-1 %s (%s)", e, emsg);
}
} else if (ret == 0 && sc->fmt->timeout)
fprintf(trace->output, ") = 0 Timeout");
fprintf(trace->output, "0 (Timeout)");
else if (ttrace->ret_scnprintf) {
char bf[1024];
struct syscall_arg arg = {
@ -1981,14 +2039,14 @@ errno_print: {
};
ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
ttrace->ret_scnprintf = NULL;
fprintf(trace->output, ") = %s", bf);
fprintf(trace->output, "%s", bf);
} else if (sc->fmt->hexret)
fprintf(trace->output, ") = %#lx", ret);
fprintf(trace->output, "%#lx", ret);
else if (sc->fmt->errpid) {
struct thread *child = machine__find_thread(trace->host, ret, ret);
if (child != NULL) {
fprintf(trace->output, ") = %ld", ret);
fprintf(trace->output, "%ld", ret);
if (child->comm_set)
fprintf(trace->output, " (%s)", thread__comm_str(child));
thread__put(child);
@ -2169,7 +2227,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
trace__printf_interrupted_entry(trace);
trace__fprintf_tstamp(trace, sample->time, trace->output);
if (trace->trace_syscalls)
if (trace->trace_syscalls && trace->show_duration)
fprintf(trace->output, "( ): ");
if (thread)
@ -2540,7 +2598,7 @@ static int trace__add_syscall_newtp(struct trace *trace)
goto out;
}
static int trace__set_ev_qualifier_filter(struct trace *trace)
static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
{
int err = -1;
struct perf_evsel *sys_exit;
@ -2565,9 +2623,93 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
goto out;
}
#ifdef HAVE_LIBBPF_SUPPORT
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
{
int fd = bpf_map__fd(trace->syscalls.map);
struct bpf_map_syscall_entry value = {
.enabled = !trace->not_ev_qualifier,
};
int err = 0;
size_t i;
for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
int key = trace->ev_qualifier_ids.entries[i];
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
if (err)
break;
}
return err;
}
static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
{
int fd = bpf_map__fd(trace->syscalls.map);
struct bpf_map_syscall_entry value = {
.enabled = enabled,
};
int err = 0, key;
for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
if (err)
break;
}
return err;
}
static int trace__init_syscalls_bpf_map(struct trace *trace)
{
bool enabled = true;
if (trace->ev_qualifier_ids.nr)
enabled = trace->not_ev_qualifier;
return __trace__init_syscalls_bpf_map(trace, enabled);
}
#else
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
{
return 0;
}
static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
{
return 0;
}
#endif // HAVE_LIBBPF_SUPPORT
static int trace__set_ev_qualifier_filter(struct trace *trace)
{
if (trace->syscalls.map)
return trace__set_ev_qualifier_bpf_filter(trace);
return trace__set_ev_qualifier_tp_filter(trace);
}
static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
size_t npids __maybe_unused, pid_t *pids __maybe_unused)
{
int err = 0;
#ifdef HAVE_LIBBPF_SUPPORT
bool value = true;
int map_fd = bpf_map__fd(map);
size_t i;
for (i = 0; i < npids; ++i) {
err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
if (err)
break;
}
#endif
return err;
}
static int trace__set_filter_loop_pids(struct trace *trace)
{
unsigned int nr = 1;
unsigned int nr = 1, err;
pid_t pids[32] = {
getpid(),
};
@ -2586,7 +2728,92 @@ static int trace__set_filter_loop_pids(struct trace *trace)
thread = parent;
}
return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids);
if (!err && trace->filter_pids.map)
err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
return err;
}
static int trace__set_filter_pids(struct trace *trace)
{
int err = 0;
/*
* Better not use !target__has_task() here because we need to cover the
* case where no threads were specified in the command line, but a
* workload was, and in that case we will fill in the thread_map when
* we fork the workload in perf_evlist__prepare_workload.
*/
if (trace->filter_pids.nr > 0) {
err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
trace->filter_pids.entries);
if (!err && trace->filter_pids.map) {
err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
trace->filter_pids.entries);
}
} else if (thread_map__pid(trace->evlist->threads, 0) == -1) {
err = trace__set_filter_loop_pids(trace);
}
return err;
}
static int __trace__deliver_event(struct trace *trace, union perf_event *event)
{
struct perf_evlist *evlist = trace->evlist;
struct perf_sample sample;
int err;
err = perf_evlist__parse_sample(evlist, event, &sample);
if (err)
fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
else
trace__handle_event(trace, event, &sample);
return 0;
}
static int __trace__flush_events(struct trace *trace)
{
u64 first = ordered_events__first_time(&trace->oe.data);
u64 flush = trace->oe.last - NSEC_PER_SEC;
/* Is there some thing to flush.. */
if (first && first < flush)
return ordered_events__flush_time(&trace->oe.data, flush);
return 0;
}
static int trace__flush_events(struct trace *trace)
{
return !trace->sort_events ? 0 : __trace__flush_events(trace);
}
static int trace__deliver_event(struct trace *trace, union perf_event *event)
{
int err;
if (!trace->sort_events)
return __trace__deliver_event(trace, event);
err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
if (err && err != -1)
return err;
err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
if (err)
return err;
return trace__flush_events(trace);
}
static int ordered_events__deliver_event(struct ordered_events *oe,
struct ordered_event *event)
{
struct trace *trace = container_of(oe, struct trace, oe.data);
return __trace__deliver_event(trace, event->event);
}
static int trace__run(struct trace *trace, int argc, const char **argv)
@ -2600,11 +2827,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
trace->live = true;
if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
goto out_error_raw_syscalls;
if (!trace->raw_augmented_syscalls) {
if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
goto out_error_raw_syscalls;
if (trace->trace_syscalls)
trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
if (trace->trace_syscalls)
trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
}
if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
@ -2695,27 +2924,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_error_open;
}
/*
* Better not use !target__has_task() here because we need to cover the
* case where no threads were specified in the command line, but a
* workload was, and in that case we will fill in the thread_map when
* we fork the workload in perf_evlist__prepare_workload.
*/
if (trace->filter_pids.nr > 0)
err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
else if (thread_map__pid(evlist->threads, 0) == -1)
err = trace__set_filter_loop_pids(trace);
err = trace__set_filter_pids(trace);
if (err < 0)
goto out_error_mem;
if (trace->syscalls.map)
trace__init_syscalls_bpf_map(trace);
if (trace->ev_qualifier_ids.nr > 0) {
err = trace__set_ev_qualifier_filter(trace);
if (err < 0)
goto out_errno;
pr_debug("event qualifier tracepoint filter: %s\n",
trace->syscalls.events.sys_exit->filter);
if (trace->syscalls.events.sys_exit) {
pr_debug("event qualifier tracepoint filter: %s\n",
trace->syscalls.events.sys_exit->filter);
}
}
err = perf_evlist__apply_filters(evlist, &evsel);
@ -2745,7 +2969,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
* Now that we already used evsel->attr to ask the kernel to setup the
* events, lets reuse evsel->attr.sample_max_stack as the limit in
* trace__resolve_callchain(), allowing per-event max-stack settings
* to override an explicitely set --max-stack global setting.
* to override an explicitly set --max-stack global setting.
*/
evlist__for_each_entry(evlist, evsel) {
if (evsel__has_callchain(evsel) &&
@ -2764,18 +2988,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
continue;
while ((event = perf_mmap__read_event(md)) != NULL) {
struct perf_sample sample;
++trace->nr_events;
err = perf_evlist__parse_sample(evlist, event, &sample);
if (err) {
fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
goto next_event;
}
err = trace__deliver_event(trace, event);
if (err)
goto out_disable;
trace__handle_event(trace, event, &sample);
next_event:
perf_mmap__consume(md);
if (interrupted)
@ -2797,6 +3015,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
draining = true;
goto again;
} else {
if (trace__flush_events(trace))
goto out_disable;
}
} else {
goto again;
@ -2807,6 +3028,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
perf_evlist__disable(evlist);
if (trace->sort_events)
ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
if (!err) {
if (trace->summary)
trace__fprintf_thread_summary(trace, trace->output);
@ -3104,8 +3328,8 @@ static int trace__set_duration(const struct option *opt, const char *str,
return 0;
}
static int trace__set_filter_pids(const struct option *opt, const char *str,
int unset __maybe_unused)
static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
int unset __maybe_unused)
{
int ret = -1;
size_t i;
@ -3315,6 +3539,68 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
return 0;
}
static struct bpf_map *bpf__find_map_by_name(const char *name)
{
struct bpf_object *obj, *tmp;
bpf_object__for_each_safe(obj, tmp) {
struct bpf_map *map = bpf_object__find_map_by_name(obj, name);
if (map)
return map;
}
return NULL;
}
static void trace__set_bpf_map_filtered_pids(struct trace *trace)
{
trace->filter_pids.map = bpf__find_map_by_name("pids_filtered");
}
static void trace__set_bpf_map_syscalls(struct trace *trace)
{
trace->syscalls.map = bpf__find_map_by_name("syscalls");
}
static int trace__config(const char *var, const char *value, void *arg)
{
struct trace *trace = arg;
int err = 0;
if (!strcmp(var, "trace.add_events")) {
struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events_option);
err = parse_events_option(&o, value, 0);
} else if (!strcmp(var, "trace.show_timestamp")) {
trace->show_tstamp = perf_config_bool(var, value);
} else if (!strcmp(var, "trace.show_duration")) {
trace->show_duration = perf_config_bool(var, value);
} else if (!strcmp(var, "trace.show_arg_names")) {
trace->show_arg_names = perf_config_bool(var, value);
if (!trace->show_arg_names)
trace->show_zeros = true;
} else if (!strcmp(var, "trace.show_zeros")) {
bool new_show_zeros = perf_config_bool(var, value);
if (!trace->show_arg_names && !new_show_zeros) {
pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
goto out;
}
trace->show_zeros = new_show_zeros;
} else if (!strcmp(var, "trace.show_prefix")) {
trace->show_string_prefix = perf_config_bool(var, value);
} else if (!strcmp(var, "trace.no_inherit")) {
trace->opts.no_inherit = perf_config_bool(var, value);
} else if (!strcmp(var, "trace.args_alignment")) {
int args_alignment = 0;
if (perf_config_int(&args_alignment, var, value) == 0)
trace->args_alignment = args_alignment;
}
out:
return err;
}
int cmd_trace(int argc, const char **argv)
{
const char *trace_usage[] = {
@ -3337,10 +3623,13 @@ int cmd_trace(int argc, const char **argv)
.user_interval = ULLONG_MAX,
.no_buffering = true,
.mmap_pages = UINT_MAX,
.proc_map_timeout = 500,
},
.output = stderr,
.show_comm = true,
.show_tstamp = true,
.show_duration = true,
.show_arg_names = true,
.args_alignment = 70,
.trace_syscalls = false,
.kernel_syscallchains = false,
.max_stack = UINT_MAX,
@ -3363,7 +3652,7 @@ int cmd_trace(int argc, const char **argv)
OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
"trace events on existing thread id"),
OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
"pids to filter (by the kernel)", trace__set_filter_pids),
"pids to filter (by the kernel)", trace__set_filter_pids_from_option),
OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
@ -3406,9 +3695,11 @@ int cmd_trace(int argc, const char **argv)
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
"Sort batch of events before processing, use if getting out of order events"),
OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
trace__parse_cgroups),
@ -3436,6 +3727,10 @@ int cmd_trace(int argc, const char **argv)
goto out;
}
err = perf_config(trace__config, &trace);
if (err)
goto out;
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
@ -3451,8 +3746,11 @@ int cmd_trace(int argc, const char **argv)
goto out;
}
if (evsel)
if (evsel) {
trace.syscalls.events.augmented = evsel;
trace__set_bpf_map_filtered_pids(&trace);
trace__set_bpf_map_syscalls(&trace);
}
err = bpf__setup_stdout(trace.evlist);
if (err) {
@ -3497,6 +3795,11 @@ int cmd_trace(int argc, const char **argv)
}
}
if (trace.sort_events) {
ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
ordered_events__set_copy_on_queue(&trace.oe.data, true);
}
/*
* If we are augmenting syscalls, then combine what we put in the
* __augmented_syscalls__ BPF map with what is in the

View File

@ -4,6 +4,7 @@
HEADERS='
include/uapi/drm/drm.h
include/uapi/drm/i915_drm.h
include/uapi/linux/fadvise.h
include/uapi/linux/fcntl.h
include/uapi/linux/fs.h
include/uapi/linux/kcmp.h
@ -21,6 +22,7 @@ include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/required-features.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/uapi/asm/prctl.h
arch/arm/include/uapi/asm/perf_regs.h
arch/arm64/include/uapi/asm/perf_regs.h
arch/powerpc/include/uapi/asm/perf_regs.h

View File

@ -14,8 +14,8 @@
* code that will combine entry/exit in a strace like way.
*/
#include <stdio.h>
#include <linux/socket.h>
#include <unistd.h>
#include <pid_filter.h>
/* bpf-output associated map */
struct bpf_map SEC("maps") __augmented_syscalls__ = {
@ -25,6 +25,17 @@ struct bpf_map SEC("maps") __augmented_syscalls__ = {
.max_entries = __NR_CPUS__,
};
struct syscall {
bool enabled;
};
struct bpf_map SEC("maps") syscalls = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct syscall),
.max_entries = 512,
};
struct syscall_enter_args {
unsigned long long common_tp_fields;
long syscall_nr;
@ -44,8 +55,11 @@ struct augmented_filename {
};
#define SYS_OPEN 2
#define SYS_ACCESS 21
#define SYS_OPENAT 257
pid_filter(pids_filtered);
SEC("raw_syscalls:sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
@ -53,10 +67,18 @@ int sys_enter(struct syscall_enter_args *args)
struct syscall_enter_args args;
struct augmented_filename filename;
} augmented_args;
struct syscall *syscall;
unsigned int len = sizeof(augmented_args);
const void *filename_arg = NULL;
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr);
if (syscall == NULL || !syscall->enabled)
return 0;
/*
* Yonghong and Edward Cree sayz:
*
@ -98,6 +120,7 @@ int sys_enter(struct syscall_enter_args *args)
* after the ctx memory access to prevent their down stream merging.
*/
switch (augmented_args.args.syscall_nr) {
case SYS_ACCESS:
case SYS_OPEN: filename_arg = (const void *)args->args[0];
__asm__ __volatile__("": : :"memory");
break;
@ -125,7 +148,19 @@ int sys_enter(struct syscall_enter_args *args)
SEC("raw_syscalls:sys_exit")
int sys_exit(struct syscall_exit_args *args)
{
return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
struct syscall_exit_args exit_args;
struct syscall *syscall;
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
probe_read(&exit_args, sizeof(exit_args), args);
syscall = bpf_map_lookup_elem(&syscalls, &exit_args.syscall_nr);
if (syscall == NULL || !syscall->enabled)
return 0;
return 1;
}
license(GPL);

View File

@ -18,6 +18,25 @@ struct bpf_map {
unsigned int numa_node;
};
/*
* FIXME: this should receive .max_entries as a parameter, as careful
* tuning of these limits is needed to avoid hitting limits that
* prevents other BPF constructs, such as tracepoint handlers,
* to get installed, with cryptic messages from libbpf, etc.
* For the current need, 'perf trace --filter-pids', 64 should
* be good enough, but this surely needs to be revisited.
*/
#define pid_map(name, value_type) \
struct bpf_map SEC("maps") name = { \
.type = BPF_MAP_TYPE_HASH, \
.key_size = sizeof(pid_t), \
.value_size = sizeof(value_type), \
.max_entries = 64, \
}
static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem;
static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem;
#define SEC(NAME) __attribute__((section(NAME), used))
#define probe(function, vars) \
@ -36,4 +55,6 @@ int _version SEC("version") = LINUX_VERSION_CODE;
static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read;
static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str;
static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output;
#endif /* _PERF_BPF_H */

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: LGPL-2.1
#ifndef _PERF_BPF_PID_FILTER_
#define _PERF_BPF_PID_FILTER_
#include <bpf.h>
#define pid_filter(name) pid_map(name, bool)
static int pid_filter__add(struct bpf_map *pids, pid_t pid)
{
bool value = true;
return bpf_map_update_elem(pids, &pid, &value, BPF_NOEXIST);
}
static bool pid_filter__has(struct bpf_map *pids, pid_t pid)
{
return bpf_map_lookup_elem(pids, &pid) != NULL;
}
#endif // _PERF_BPF_PID_FILTER_

View File

@ -9,9 +9,6 @@ struct bpf_map SEC("maps") __bpf_stdout__ = {
.max_entries = __NR_CPUS__,
};
static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) =
(void *)BPF_FUNC_perf_event_output;
#define puts(from) \
({ const int __len = sizeof(from); \
char __from[__len] = from; \

View File

@ -0,0 +1,10 @@
// SPDX-License-Identifier: LGPL-2.1
#include <bpf.h>
static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
static pid_t getpid(void)
{
return bpf_get_current_pid_tgid();
}

View File

@ -6,7 +6,9 @@
#include <stdlib.h>
#include <err.h>
#include <jvmti.h>
#ifdef HAVE_JVMTI_CMLR
#include <jvmticmlr.h>
#endif
#include <limits.h>
#include "jvmti_agent.h"
@ -27,6 +29,7 @@ static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
}
}
#ifdef HAVE_JVMTI_CMLR
static jvmtiError
do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
jvmti_line_info_t *tab, jint *nr)
@ -125,6 +128,15 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
*nr_lines = lines_total;
return JVMTI_ERROR_NONE;
}
#else /* HAVE_JVMTI_CMLR */
static jvmtiError
get_line_numbers(jvmtiEnv *jvmti __maybe_unused, const void *compile_info __maybe_unused,
jvmti_line_info_t **tab __maybe_unused, int *nr_lines __maybe_unused)
{
return JVMTI_ERROR_NONE;
}
#endif /* HAVE_JVMTI_CMLR */
static void
copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)

View File

@ -82,7 +82,7 @@ struct record_opts {
bool use_clockid;
clockid_t clockid;
u64 clockid_res_ns;
unsigned int proc_map_timeout;
int nr_cblocks;
};
struct option;

View File

@ -433,7 +433,7 @@
},
{
"PEBS": "1",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x41",
@ -445,7 +445,7 @@
},
{
"PEBS": "1",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x42",

View File

@ -317,7 +317,7 @@
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
"PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
"EventCode": "0x87",
"Counter": "0,1,2,3",
"UMask": "0x1",

View File

@ -439,7 +439,7 @@
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3"
},
@ -451,7 +451,7 @@
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"SampleAfterValue": "100003",
"L1_Hit_Indication": "1",
"CounterHTOff": "0,1,2,3"

View File

@ -322,7 +322,7 @@
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3",
"EventName": "ILD_STALL.LCP",
"PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
"PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},

View File

@ -439,7 +439,7 @@
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3"
},
@ -451,7 +451,7 @@
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"SampleAfterValue": "100003",
"L1_Hit_Indication": "1",
"CounterHTOff": "0,1,2,3"

View File

@ -322,7 +322,7 @@
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3",
"EventName": "ILD_STALL.LCP",
"PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
"PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,164 @@
[
{
"BriefDescription": "Instructions Per Cycle (per logical thread)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "TopDownL1",
"MetricName": "IPC"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline",
"MetricName": "UPI"
},
{
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
"MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
"MetricGroup": "DSB; Frontend_Bandwidth",
"MetricName": "DSB_Coverage"
},
{
"BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary",
"MetricName": "CPI"
},
{
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "CLKS"
},
{
"BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary",
"MetricName": "Instructions"
},
{
"BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT",
"MetricName": "CoreIPC"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
"MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost"
},
{
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "SMT",
"MetricName": "CORE_CLKS"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
"MetricGroup": "Memory_Bound;Memory_Lat",
"MetricName": "Load_Miss_Real_Latency"
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
"MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization"
},
{
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "CPU_Utilization"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
"MetricName": "Turbo_Utilization"
},
{
"BriefDescription": "Fraction of cycles where both hardware threads were active",
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
{
"BriefDescription": "C3 residency percent per core",
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency"
},
{
"BriefDescription": "C6 residency percent per core",
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency"
},
{
"BriefDescription": "C7 residency percent per core",
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency"
},
{
"BriefDescription": "C2 residency percent per package",
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency"
},
{
"BriefDescription": "C3 residency percent per package",
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency"
},
{
"BriefDescription": "C6 residency percent per package",
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency"
},
{
"BriefDescription": "C7 residency percent per package",
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency"
}
]

View File

@ -0,0 +1,85 @@
[
{
"EventCode": "0xC7",
"UMask": "0x1",
"BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x2",
"BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x4",
"BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x8",
"BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 4 calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x10",
"BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 4 calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x20",
"BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x40",
"BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x80",
"BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xCA",
"UMask": "0x1e",
"BriefDescription": "Cycles with any input/output SSE or FP assist",
"Counter": "0,1,2,3",
"EventName": "FP_ASSIST.ANY",
"CounterMask": "1",
"PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
}
]

View File

@ -0,0 +1,482 @@
[
{
"EventCode": "0x79",
"UMask": "0x4",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
"Counter": "0,1,2,3",
"EventName": "IDQ.MITE_CYCLES",
"CounterMask": "1",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x4",
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
"Counter": "0,1,2,3",
"EventName": "IDQ.MITE_UOPS",
"PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x8",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
"Counter": "0,1,2,3",
"EventName": "IDQ.DSB_CYCLES",
"CounterMask": "1",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x8",
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
"Counter": "0,1,2,3",
"EventName": "IDQ.DSB_UOPS",
"PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x10",
"BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
"EventName": "IDQ.MS_DSB_CYCLES",
"CounterMask": "1",
"PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x18",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
"Counter": "0,1,2,3",
"EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
"CounterMask": "1",
"PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x18",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
"Counter": "0,1,2,3",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
"CounterMask": "4",
"PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x20",
"BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
"EventName": "IDQ.MS_MITE_UOPS",
"PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x24",
"BriefDescription": "Cycles MITE is delivering any Uop",
"Counter": "0,1,2,3",
"EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
"CounterMask": "1",
"PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x24",
"BriefDescription": "Cycles MITE is delivering 4 Uops",
"Counter": "0,1,2,3",
"EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
"CounterMask": "4",
"PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x30",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
"EventName": "IDQ.MS_CYCLES",
"CounterMask": "1",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x79",
"UMask": "0x30",
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
"EventName": "IDQ.MS_UOPS",
"PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EdgeDetect": "1",
"EventCode": "0x79",
"UMask": "0x30",
"BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
"Counter": "0,1,2,3",
"EventName": "IDQ.MS_SWITCHES",
"CounterMask": "1",
"PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x80",
"UMask": "0x4",
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
"Counter": "0,1,2,3",
"EventName": "ICACHE_16B.IFDATA_STALL",
"PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x83",
"UMask": "0x1",
"BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
"Counter": "0,1,2,3",
"EventName": "ICACHE_64B.IFTAG_HIT",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x83",
"UMask": "0x2",
"BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
"Counter": "0,1,2,3",
"EventName": "ICACHE_64B.IFTAG_MISS",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x83",
"UMask": "0x4",
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"Counter": "0,1,2,3",
"EventName": "ICACHE_64B.IFTAG_STALL",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"Invert": "1",
"EventCode": "0x9C",
"UMask": "0x1",
"BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
"Counter": "0,1,2,3",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"CounterMask": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x9C",
"UMask": "0x1",
"BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
"Counter": "0,1,2,3",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
"CounterMask": "1",
"PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x9C",
"UMask": "0x1",
"BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
"Counter": "0,1,2,3",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
"CounterMask": "2",
"PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x9C",
"UMask": "0x1",
"BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
"Counter": "0,1,2,3",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
"CounterMask": "3",
"PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x9C",
"UMask": "0x1",
"BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
"Counter": "0,1,2,3",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
"CounterMask": "4",
"PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x9C",
"UMask": "0x1",
"BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
"Counter": "0,1,2,3",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
"PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xAB",
"UMask": "0x2",
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
"Counter": "0,1,2,3",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
"PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x400406",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x200206",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x400206",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
"PEBS": "1",
"MSRValue": "0x15",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. ",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired Instructions who experienced iTLB true miss.",
"PEBS": "1",
"MSRValue": "0x14",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
"PEBS": "1",
"MSRValue": "0x13",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
"PEBS": "1",
"MSRValue": "0x12",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
"PEBS": "1",
"MSRValue": "0x11",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. ",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x300206",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x100206",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x420006",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x410006",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x408006",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x404006",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x402006",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x401006",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC6",
"UMask": "0x1",
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
"PEBS": "1",
"MSRValue": "0x400806",
"Counter": "0,1,2,3",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
"TakenAlone": "1",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3"
}
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,969 @@
[
{
"EventCode": "0x00",
"UMask": "0x1",
"BriefDescription": "Instructions retired from execution.",
"Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.ANY",
"PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
"SampleAfterValue": "2000003",
"CounterHTOff": "Fixed counter 0"
},
{
"EventCode": "0x00",
"UMask": "0x2",
"BriefDescription": "Core cycles when the thread is not in halt state",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
"PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
"SampleAfterValue": "2000003",
"CounterHTOff": "Fixed counter 1"
},
{
"EventCode": "0x00",
"UMask": "0x2",
"BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
"AnyThread": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "Fixed counter 1"
},
{
"EventCode": "0x00",
"UMask": "0x3",
"BriefDescription": "Reference cycles when the core is not in halt state.",
"Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
"PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"CounterHTOff": "Fixed counter 2"
},
{
"EventCode": "0x03",
"UMask": "0x2",
"BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
"Counter": "0,1,2,3",
"EventName": "LD_BLOCKS.STORE_FORWARD",
"PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x03",
"UMask": "0x8",
"BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
"Counter": "0,1,2,3",
"EventName": "LD_BLOCKS.NO_SR",
"PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x07",
"UMask": "0x1",
"BriefDescription": "False dependencies in MOB due to partial compare on address.",
"Counter": "0,1,2,3",
"EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
"PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x0D",
"UMask": "0x1",
"BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
"Counter": "0,1,2,3",
"EventName": "INT_MISC.RECOVERY_CYCLES",
"PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x0D",
"UMask": "0x1",
"BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
"Counter": "0,1,2,3",
"EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
"AnyThread": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x0D",
"UMask": "0x80",
"BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
"Counter": "0,1,2,3",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"Invert": "1",
"EventCode": "0x0E",
"UMask": "0x1",
"BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
"Counter": "0,1,2,3",
"EventName": "UOPS_ISSUED.STALL_CYCLES",
"CounterMask": "1",
"PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x0E",
"UMask": "0x1",
"BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
"Counter": "0,1,2,3",
"EventName": "UOPS_ISSUED.ANY",
"PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x0E",
"UMask": "0x2",
"BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
"Counter": "0,1,2,3",
"EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
"PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x0E",
"UMask": "0x20",
"BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
"Counter": "0,1,2,3",
"EventName": "UOPS_ISSUED.SLOW_LEA",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x14",
"UMask": "0x1",
"BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"Counter": "0,1,2,3",
"EventName": "ARITH.DIVIDER_ACTIVE",
"CounterMask": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x0",
"BriefDescription": "Thread cycles when thread is not in halt state",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
"PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x0",
"BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
"AnyThread": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EdgeDetect": "1",
"EventCode": "0x3C",
"UMask": "0x0",
"BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
"CounterMask": "1",
"PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x1",
"BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
"SampleAfterValue": "25003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x1",
"BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
"AnyThread": "1",
"SampleAfterValue": "25003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x1",
"BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
"AnyThread": "1",
"SampleAfterValue": "25003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x1",
"BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_UNHALTED.REF_XCLK",
"SampleAfterValue": "25003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x2",
"BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
"SampleAfterValue": "25003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x3C",
"UMask": "0x2",
"BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"Counter": "0,1,2,3",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
"SampleAfterValue": "25003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x4C",
"UMask": "0x1",
"BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
"Counter": "0,1,2,3",
"EventName": "LOAD_HIT_PRE.SW_PF",
"PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x59",
"UMask": "0x1",
"BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
"Counter": "0,1,2,3",
"EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
"PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EdgeDetect": "1",
"Invert": "1",
"EventCode": "0x5E",
"UMask": "0x1",
"BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
"Counter": "0,1,2,3",
"EventName": "RS_EVENTS.EMPTY_END",
"CounterMask": "1",
"PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x5E",
"UMask": "0x1",
"BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
"Counter": "0,1,2,3",
"EventName": "RS_EVENTS.EMPTY_CYCLES",
"PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x87",
"UMask": "0x1",
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3",
"EventName": "ILD_STALL.LCP",
"PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x1",
"BriefDescription": "Cycles per thread when uops are executed in port 0",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_0",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x2",
"BriefDescription": "Cycles per thread when uops are executed in port 1",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_1",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x4",
"BriefDescription": "Cycles per thread when uops are executed in port 2",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_2",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x8",
"BriefDescription": "Cycles per thread when uops are executed in port 3",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_3",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x10",
"BriefDescription": "Cycles per thread when uops are executed in port 4",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_4",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x20",
"BriefDescription": "Cycles per thread when uops are executed in port 5",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_5",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x40",
"BriefDescription": "Cycles per thread when uops are executed in port 6",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_6",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA1",
"UMask": "0x80",
"BriefDescription": "Cycles per thread when uops are executed in port 7",
"Counter": "0,1,2,3",
"EventName": "UOPS_DISPATCHED_PORT.PORT_7",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xa2",
"UMask": "0x1",
"BriefDescription": "Resource-related stall cycles",
"Counter": "0,1,2,3",
"EventName": "RESOURCE_STALLS.ANY",
"PublicDescription": "Counts resource-related stall cycles.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA2",
"UMask": "0x8",
"BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
"Counter": "0,1,2,3",
"EventName": "RESOURCE_STALLS.SB",
"PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0x1",
"BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
"CounterMask": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0x4",
"BriefDescription": "Total execution stalls.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
"CounterMask": "4",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0x5",
"BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
"CounterMask": "5",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0x8",
"BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
"CounterMask": "8",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0xc",
"BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
"CounterMask": "12",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0x10",
"BriefDescription": "Cycles while memory subsystem has an outstanding load.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
"CounterMask": "16",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA3",
"UMask": "0x14",
"BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
"Counter": "0,1,2,3",
"EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
"CounterMask": "20",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xA6",
"UMask": "0x1",
"BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
"Counter": "0,1,2,3",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
"PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA6",
"UMask": "0x2",
"BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
"PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA6",
"UMask": "0x4",
"BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
"PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA6",
"UMask": "0x8",
"BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
"PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA6",
"UMask": "0x10",
"BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
"PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA6",
"UMask": "0x40",
"BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
"Counter": "0,1,2,3",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA8",
"UMask": "0x1",
"BriefDescription": "Number of Uops delivered by the LSD.",
"Counter": "0,1,2,3",
"EventName": "LSD.UOPS",
"PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA8",
"UMask": "0x1",
"BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
"MSRValue": "0x00",
"Counter": "0,1,2,3",
"EventName": "LSD.CYCLES_4_UOPS",
"CounterMask": "4",
"PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xA8",
"UMask": "0x1",
"BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3",
"EventName": "LSD.CYCLES_ACTIVE",
"CounterMask": "1",
"PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x1",
"BriefDescription": "Cycles where at least 4 uops were executed per-thread",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
"CounterMask": "4",
"PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x1",
"BriefDescription": "Cycles where at least 3 uops were executed per-thread",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
"CounterMask": "3",
"PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x1",
"BriefDescription": "Cycles where at least 2 uops were executed per-thread",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
"CounterMask": "2",
"PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x1",
"BriefDescription": "Cycles where at least 1 uop was executed per-thread",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
"CounterMask": "1",
"PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"Invert": "1",
"EventCode": "0xB1",
"UMask": "0x1",
"BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"CounterMask": "1",
"PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x1",
"BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.THREAD",
"PublicDescription": "Number of uops to be executed per-thread each cycle.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x2",
"BriefDescription": "Number of uops executed on the core.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CORE",
"PublicDescription": "Number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"Invert": "1",
"EventCode": "0xB1",
"UMask": "0x2",
"BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
"CounterMask": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x2",
"BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
"CounterMask": "4",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x2",
"BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
"CounterMask": "3",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x2",
"BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
"CounterMask": "2",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x2",
"BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
"CounterMask": "1",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xB1",
"UMask": "0x10",
"BriefDescription": "Counts the number of x87 uops dispatched.",
"Counter": "0,1,2,3",
"EventName": "UOPS_EXECUTED.X87",
"PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC0",
"UMask": "0x0",
"BriefDescription": "Number of instructions retired. General Counter - architectural event",
"Counter": "0,1,2,3",
"EventName": "INST_RETIRED.ANY_P",
"Errata": "SKL091, SKL044",
"PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC0",
"UMask": "0x1",
"BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
"PEBS": "2",
"Counter": "1",
"EventName": "INST_RETIRED.PREC_DIST",
"Errata": "SKL091, SKL044",
"PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
"SampleAfterValue": "2000003",
"CounterHTOff": "1"
},
{
"Invert": "1",
"EventCode": "0xC0",
"UMask": "0x1",
"BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.",
"PEBS": "2",
"Counter": "0,2,3",
"EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
"CounterMask": "10",
"Errata": "SKL091, SKL044",
"PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,2,3"
},
{
"EventCode": "0xC1",
"UMask": "0x3f",
"BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
"Counter": "0,1,2,3",
"EventName": "OTHER_ASSISTS.ANY",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"Invert": "1",
"EventCode": "0xC2",
"UMask": "0x2",
"BriefDescription": "Cycles with less than 10 actually retired uops.",
"Counter": "0,1,2,3",
"EventName": "UOPS_RETIRED.TOTAL_CYCLES",
"CounterMask": "10",
"PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"Invert": "1",
"EventCode": "0xC2",
"UMask": "0x2",
"BriefDescription": "Cycles without actually retired uops.",
"Counter": "0,1,2,3",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"CounterMask": "1",
"PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC2",
"UMask": "0x2",
"BriefDescription": "Retirement slots used.",
"Counter": "0,1,2,3",
"EventName": "UOPS_RETIRED.RETIRE_SLOTS",
"PublicDescription": "Counts the retirement slots used.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EdgeDetect": "1",
"EventCode": "0xC3",
"UMask": "0x1",
"BriefDescription": "Number of machine clears (nukes) of any type.",
"Counter": "0,1,2,3",
"EventName": "MACHINE_CLEARS.COUNT",
"CounterMask": "1",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC3",
"UMask": "0x4",
"BriefDescription": "Self-modifying code (SMC) detected.",
"Counter": "0,1,2,3",
"EventName": "MACHINE_CLEARS.SMC",
"PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x0",
"BriefDescription": "All (macro) branch instructions retired.",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
"Errata": "SKL091",
"PublicDescription": "Counts all (macro) branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x1",
"BriefDescription": "Conditional branch instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.CONDITIONAL",
"Errata": "SKL091",
"PublicDescription": "This event counts conditional branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x2",
"BriefDescription": "Direct and indirect near call instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
"Errata": "SKL091",
"PublicDescription": "This event counts both direct and indirect near call instructions retired.",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x4",
"BriefDescription": "All (macro) branch instructions retired.",
"PEBS": "2",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
"Errata": "SKL091",
"PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC4",
"UMask": "0x8",
"BriefDescription": "Return instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.NEAR_RETURN",
"Errata": "SKL091",
"PublicDescription": "This event counts return instructions retired.",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x10",
"BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.NOT_TAKEN",
"Errata": "SKL091",
"PublicDescription": "This event counts not taken branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x20",
"BriefDescription": "Taken branch instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
"Errata": "SKL091",
"PublicDescription": "This event counts taken branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC4",
"UMask": "0x40",
"BriefDescription": "Far branch instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
"Errata": "SKL091",
"PublicDescription": "This event counts far branch instructions retired.",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC5",
"UMask": "0x0",
"BriefDescription": "All mispredicted macro branch instructions retired.",
"Counter": "0,1,2,3",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
"PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC5",
"UMask": "0x1",
"BriefDescription": "Mispredicted conditional branch instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_MISP_RETIRED.CONDITIONAL",
"PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC5",
"UMask": "0x2",
"BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_MISP_RETIRED.NEAR_CALL",
"PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC5",
"UMask": "0x4",
"BriefDescription": "Mispredicted macro branch instructions retired.",
"PEBS": "2",
"Counter": "0,1,2,3",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
"PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3"
},
{
"EventCode": "0xC5",
"UMask": "0x20",
"BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
"PEBS": "1",
"Counter": "0,1,2,3",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
"SampleAfterValue": "400009",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xCC",
"UMask": "0x20",
"BriefDescription": "Increments whenever there is an update to the LBR array.",
"Counter": "0,1,2,3",
"EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
"PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xCC",
"UMask": "0x40",
"BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
"Counter": "0,1,2,3",
"EventName": "ROB_MISC_EVENTS.PAUSE_INST",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xE6",
"UMask": "0x1",
"BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
"Counter": "0,1,2,3",
"EventName": "BACLEARS.ANY",
"PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
}
]

View File

@ -0,0 +1,117 @@
[
{
"BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "LLC_MISSES.MEM_READ",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "iMC"
},
{
"BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "LLC_MISSES.MEM_WRITE",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0xC",
"Unit": "iMC"
},
{
"BriefDescription": "Memory controller clock ticks",
"Counter": "0,1,2,3",
"EventName": "UNC_M_CLOCKTICKS",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode+C37",
"Counter": "0,1,2,3",
"EventCode": "0x85",
"EventName": "UNC_M_POWER_CHANNEL_PPD",
"MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
"MetricName": "power_channel_ppd %",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "Cycles Memory is in self refresh power mode",
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "UNC_M_POWER_SELF_REFRESH",
"MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
"MetricName": "power_self_refresh %",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "Pre-charges due to page misses",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "iMC"
},
{
"BriefDescription": "Pre-charge for reads",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UNC_M_PRE_COUNT.RD",
"PerPkg": "1",
"UMask": "0x4",
"Unit": "iMC"
},
{
"BriefDescription": "Pre-charge for writes",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UNC_M_PRE_COUNT.WR",
"PerPkg": "1",
"UMask": "0x8",
"Unit": "iMC"
},
{
"BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.READ",
"PerPkg": "1",
"ScaleUnit": "6.103515625E-5MB/sec",
"Unit": "iMC"
},
{
"BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE7",
"EventName": "UNC_M_PMM_BANDWIDTH.WRITE",
"PerPkg": "1",
"ScaleUnit": "6.103515625E-5MB/sec",
"Unit": "iMC"
},
{
"BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS",
"MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"PerPkg": "1",
"ScaleUnit": "6.103515625E-5MB/sec",
"Unit": "iMC"
},
{
"BriefDescription": "Intel Optane DC persistent memory read latency (ns). Derived from unc_m_pmm_rpq_occupancy.all",
"Counter": "0,1,2,3",
"EventCode": "0xE0",
"EventName": "UNC_M_PMM_READ_LATENCY",
"MetricExpr": "UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS / UNC_M_CLOCKTICKS",
"MetricName": "UNC_M_PMM_READ_LATENCY",
"PerPkg": "1",
"ScaleUnit": "6000000000ns",
"UMask": "0x1",
"Unit": "iMC"
}
]

View File

@ -0,0 +1,255 @@
[
{
"BriefDescription": "Uncore cache clock ticks",
"Counter": "0,1,2,3",
"EventName": "UNC_CHA_CLOCKTICKS",
"PerPkg": "1",
"Unit": "CHA"
},
{
"BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_MISSES.UNCACHEABLE",
"Filter": "config1=0x40e33",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "MMIO reads. Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_MISSES.MMIO_READ",
"Filter": "config1=0x40040e33",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "MMIO writes. Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_MISSES.MMIO_WRITE",
"Filter": "config1=0x40041e33",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "Streaming stores (full cache line). Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_REFERENCES.STREAMING_FULL",
"Filter": "config1=0x41833",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "Streaming stores (partial cache line). Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
"Filter": "config1=0x41a33",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "read requests from home agent",
"Counter": "0,1,2,3",
"EventCode": "0x50",
"EventName": "UNC_CHA_REQUESTS.READS",
"PerPkg": "1",
"UMask": "0x03",
"Unit": "CHA"
},
{
"BriefDescription": "read requests from local home agent",
"Counter": "0,1,2,3",
"EventCode": "0x50",
"EventName": "UNC_CHA_REQUESTS.READS_LOCAL",
"PerPkg": "1",
"UMask": "0x01",
"Unit": "CHA"
},
{
"BriefDescription": "read requests from remote home agent",
"Counter": "0,1,2,3",
"EventCode": "0x50",
"EventName": "UNC_CHA_REQUESTS.READS_REMOTE",
"PerPkg": "1",
"UMask": "0x02",
"Unit": "CHA"
},
{
"BriefDescription": "write requests from home agent",
"Counter": "0,1,2,3",
"EventCode": "0x50",
"EventName": "UNC_CHA_REQUESTS.WRITES",
"PerPkg": "1",
"UMask": "0x0C",
"Unit": "CHA"
},
{
"BriefDescription": "write requests from local home agent",
"Counter": "0,1,2,3",
"EventCode": "0x50",
"EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL",
"PerPkg": "1",
"UMask": "0x04",
"Unit": "CHA"
},
{
"BriefDescription": "write requests from remote home agent",
"Counter": "0,1,2,3",
"EventCode": "0x50",
"EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
"PerPkg": "1",
"UMask": "0x08",
"Unit": "CHA"
},
{
"BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UPI_DATA_BANDWIDTH_TX",
"PerPkg": "1",
"ScaleUnit": "7.11E-06Bytes",
"UMask": "0x0F",
"Unit": "UPI LL"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "LLC_MISSES.PCIE_READ",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "LLC_MISSES.PCIE_WRITE",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
"FCMask": "0x07",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
"FCMask": "0x07",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
"Counter": "0,1",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"Unit": "IIO"
}
]

View File

@ -0,0 +1,285 @@
[
{
"EventCode": "0x08",
"UMask": "0x1",
"BriefDescription": "Load misses in all DTLB levels that cause page walks",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
"PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0x2",
"BriefDescription": "Page walk completed due to a demand data load to a 4K page",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
"PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0x4",
"BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
"PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0x8",
"BriefDescription": "Page walk completed due to a demand data load to a 1G page",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
"PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0xe",
"BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
"PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0x10",
"BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
"PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0x10",
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
"CounterMask": "1",
"PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x08",
"UMask": "0x20",
"BriefDescription": "Loads that miss the DTLB and hit the STLB.",
"Counter": "0,1,2,3",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
"PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x1",
"BriefDescription": "Store misses in all DTLB levels that cause page walks",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
"PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x2",
"BriefDescription": "Page walk completed due to a demand data store to a 4K page",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
"PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x4",
"BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
"PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x8",
"BriefDescription": "Page walk completed due to a demand data store to a 1G page",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
"PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0xe",
"BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
"PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x10",
"BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
"PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x10",
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
"CounterMask": "1",
"PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x49",
"UMask": "0x20",
"BriefDescription": "Stores that miss the DTLB and hit the STLB.",
"Counter": "0,1,2,3",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
"PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x4F",
"UMask": "0x10",
"BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
"Counter": "0,1,2,3",
"EventName": "EPT.WALK_PENDING",
"PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x1",
"BriefDescription": "Misses at all ITLB levels that cause page walks",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
"PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x2",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
"PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x4",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
"PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x8",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
"PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0xe",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
"PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x10",
"BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.WALK_PENDING",
"PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x10",
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
"MSRValue": "0x00",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
"CounterMask": "1",
"PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x85",
"UMask": "0x20",
"BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
"Counter": "0,1,2,3",
"EventName": "ITLB_MISSES.STLB_HIT",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xAE",
"UMask": "0x1",
"BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
"Counter": "0,1,2,3",
"EventName": "ITLB.ITLB_FLUSH",
"PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xBD",
"UMask": "0x1",
"BriefDescription": "DTLB flush attempts of the thread-specific entries",
"Counter": "0,1,2,3",
"EventName": "TLB_FLUSH.DTLB_THREAD",
"PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xBD",
"UMask": "0x20",
"BriefDescription": "STLB flush attempts",
"Counter": "0,1,2,3",
"EventName": "TLB_FLUSH.STLB_ANY",
"PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
"SampleAfterValue": "100007",
"CounterHTOff": "0,1,2,3,4,5,6,7"
}
]

View File

@ -31,7 +31,7 @@
},
{
"PEBS": "1",
"PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x41",
@ -42,7 +42,7 @@
},
{
"PEBS": "1",
"PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x42",

View File

@ -778,7 +778,7 @@
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
"PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
"EventCode": "0x03",
"Counter": "0,1,2,3",
"UMask": "0x2",

View File

@ -121,7 +121,7 @@
"EventName": "OFFCORE_RESPONSE.ANY_PF_L2.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -187,7 +187,7 @@
"EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts any Read request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts any Read request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -253,7 +253,7 @@
"EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Demand code reads and prefetch code read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Demand code reads and prefetch code read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -319,7 +319,7 @@
"EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Demand cacheable data write requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Demand cacheable data write requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -385,7 +385,7 @@
"EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -451,7 +451,7 @@
"EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -539,7 +539,7 @@
"EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -605,7 +605,7 @@
"EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -682,7 +682,7 @@
"EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -748,7 +748,7 @@
"EventName": "OFFCORE_RESPONSE.UC_CODE_READS.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -869,7 +869,7 @@
"EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -935,7 +935,7 @@
"EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -1067,7 +1067,7 @@
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -1133,7 +1133,7 @@
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{
@ -1199,7 +1199,7 @@
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
"SampleAfterValue": "100007",
"BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
"BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
"Offcore": "1"
},
{

View File

@ -31,4 +31,5 @@ GenuineIntel-6-2A,v15,sandybridge,core
GenuineIntel-6-2C,v2,westmereep-dp,core
GenuineIntel-6-25,v2,westmereep-sp,core
GenuineIntel-6-2F,v2,westmereex,core
GenuineIntel-6-55,v1,skylakex,core
GenuineIntel-6-55-[01234],v1,skylakex,core
GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core

1 Family-model Version Filename EventType
31 GenuineIntel-6-2C v2 westmereep-dp core
32 GenuineIntel-6-25 v2 westmereep-sp core
33 GenuineIntel-6-2F v2 westmereex core
34 GenuineIntel-6-55 GenuineIntel-6-55-[01234] v1 skylakex core
35 GenuineIntel-6-55-[56789ABCDEF] v1 cascadelakex core

View File

@ -31,7 +31,7 @@
},
{
"PEBS": "1",
"PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x41",
@ -42,7 +42,7 @@
},
{
"PEBS": "1",
"PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x42",

View File

@ -778,7 +778,7 @@
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
"PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
"EventCode": "0x03",
"Counter": "0,1,2,3",
"UMask": "0x2",

View File

@ -73,7 +73,7 @@
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
"MetricGroup": "Memory_Bound;Memory_Lat",
"MetricName": "Load_Miss_Real_Latency"
},

View File

@ -73,7 +73,7 @@
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
"MetricGroup": "Memory_Bound;Memory_Lat",
"MetricName": "Load_Miss_Real_Latency"
},

View File

@ -428,7 +428,7 @@
"EventCode": "0x5C",
"EventName": "UNC_CHA_SNOOP_RESP.RSP_WBWB",
"PerPkg": "1",
"PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home. This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured. This reponse will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.",
"PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home. This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured. This response will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.",
"UMask": "0x10",
"Unit": "CHA"
},
@ -967,7 +967,7 @@
"EventCode": "0x57",
"EventName": "UNC_M2M_PREFCAM_INSERTS",
"PerPkg": "1",
"PublicDescription": "Counts when the M2M (Mesh to Memory) recieves a prefetch request and inserts it into its outstanding prefetch queue. Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory",
"PublicDescription": "Counts when the M2M (Mesh to Memory) receives a prefetch request and inserts it into its outstanding prefetch queue. Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory",
"Unit": "M2M"
},
{
@ -1041,7 +1041,7 @@
"EventCode": "0x31",
"EventName": "UNC_UPI_RxL_BYPASSED.SLOT0",
"PerPkg": "1",
"PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
"PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transferred, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
"UMask": "0x1",
"Unit": "UPI LL"
},
@ -1051,17 +1051,17 @@
"EventCode": "0x31",
"EventName": "UNC_UPI_RxL_BYPASSED.SLOT1",
"PerPkg": "1",
"PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer (Receive Queue) and passed directly across the BGF and into the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
"PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer (Receive Queue) and passed directly across the BGF and into the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transferred, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
"UMask": "0x2",
"Unit": "UPI LL"
},
{
"BriefDescription": "FLITs received which bypassed the Slot0 Recieve Buffer",
"BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer",
"Counter": "0,1,2,3",
"EventCode": "0x31",
"EventName": "UNC_UPI_RxL_BYPASSED.SLOT2",
"PerPkg": "1",
"PublicDescription": "Counts incoming FLITs (FLow control unITs) whcih bypassed the slot2 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
"PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot2 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transferred, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
"UMask": "0x4",
"Unit": "UPI LL"
},

Some files were not shown because too many files have changed in this diff Show More