habanalabs: add uapi to retrieve aggregate H/W events

Add a new opcode to INFO IOCTL to retrieve aggregate H/W events. i.e. the
events counters are NOT cleared upon device reset, but count from the
loading of the driver.

Add the code to support it in the device event handling function.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
This commit is contained in:
Oded Gabbay 2019-08-28 21:51:52 +03:00
parent 75b3cb2bb0
commit e9730763a2
5 changed files with 22 additions and 7 deletions

View File

@ -4469,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
struct goya_device *goya = hdev->asic_specific;
goya->events_stat[event_type]++;
goya->events_stat_aggregate[event_type]++;
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@ -4550,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
}
}
void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
{
struct goya_device *goya = hdev->asic_specific;
*size = (u32) sizeof(goya->events_stat);
if (aggregate) {
*size = (u32) sizeof(goya->events_stat_aggregate);
return goya->events_stat_aggregate;
}
*size = (u32) sizeof(goya->events_stat);
return goya->events_stat;
}

View File

@ -162,6 +162,7 @@ struct goya_device {
u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
u8 device_cpu_mmu_mappings_done;
};
@ -215,7 +216,7 @@ int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);

View File

@ -558,7 +558,8 @@ struct hl_asic_funcs {
struct hl_eq_entry *eq_entry);
void (*set_pll_profile)(struct hl_device *hdev,
enum hl_pll_frequency freq);
void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
u32 *size);
u64 (*read_pte)(struct hl_device *hdev, u64 addr);
void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);

View File

@ -75,7 +75,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
}
static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
static int hw_events_info(struct hl_device *hdev, bool aggregate,
struct hl_info_args *args)
{
u32 size, max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
@ -84,7 +85,7 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
arr = hdev->asic_funcs->get_events_stat(hdev, &size);
arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
}
@ -251,7 +252,7 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
switch (args->op) {
case HL_INFO_HW_EVENTS:
rc = hw_events_info(hdev, args);
rc = hw_events_info(hdev, false, args);
break;
case HL_INFO_DRAM_USAGE:
@ -266,6 +267,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
rc = device_utilization(hdev, args);
break;
case HL_INFO_HW_EVENTS_AGGREGATE:
rc = hw_events_info(hdev, true, args);
break;
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;

View File

@ -93,6 +93,8 @@ enum hl_device_status {
* The period can be between 100ms to 1s, in
* resolution of 100ms. The return value is a
* percentage of the utilization rate.
* HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
* event occurred since the driver was loaded.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@ -100,6 +102,7 @@ enum hl_device_status {
#define HL_INFO_HW_IDLE 3
#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_DEVICE_UTILIZATION 6
#define HL_INFO_HW_EVENTS_AGGREGATE 7
#define HL_INFO_VERSION_MAX_LEN 128