New feature to add support for NTB virtual MSI interrupts, the ability

to test and use this feature in the NTB transport layer.  Also, bug
 fixes for the AMD and Switchtec drivers, as well as some general
 patches.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEoE9b9c3U2JxX98mqbmZLrHqL0iMFAl0w0EwACgkQbmZLrHqL
 0iNt4RAAiw2Cb2y5D36jI+lPl8xN2REk4x7b/FKFfLSzM/7Bu1okprrElKO6735r
 0wRpyZS5lbYYr0QybzMrNfDMLYywFmA+C7GY7gQpR7GLYLb9chmvOmubfEbakHYI
 4qKAWgaZt6PaiLQp76y3bFjUiBrd8KzVi5J891lDKvjzsGUjdNeFSlPlDbzTP8H9
 +pBxnYn4G7MxVTebxjpOs6bQPO1sJZl4cyW40avTC1x+6bMlU1B361IhwtuabQH8
 6rq6MQbaDzHTpuBZqZR8O+JxFzpffnGRu+smWPNQS2p95AewGIaDUsjZ6ph81rF1
 mUxt/UJYP0RlIY+RQQ0Ie3cBUZ/I7wQzpFAXnqb8Nzba4tpc9ITX04hfnWKg6Kr0
 sgj2Ro6I4TS3TSw6vquwvR2hn6+RZi06z/D0AXdUl+R3QxJXMyZygDDPDpTImZNy
 cnu+1Y7pdgjASXBvHKhM39+ynGLGze7xBIs6GO6TjJoVgZdMaW1/N/wsldYRc3Uf
 K0VSHowGHOyIV8d33oYWK2oSgWFrRCCanrCXIltNwl6NM05/b474L/8fiSnJ37PZ
 dJspdPdrut9W3018cRVHBhAX9csucbSa46FKz1Z0FEFjDJJzqy0isKDsoExBZqBH
 AqCnP548sbJF9+yOC475W36KMHBhyNvLS8XxD2J3DQpPDMRj//Y=
 =P1lR
 -----END PGP SIGNATURE-----

Merge tag 'ntb-5.3' of git://github.com/jonmason/ntb

Pull NTB updates from Jon Mason:
 "New feature to add support for NTB virtual MSI interrupts, the ability
  to test and use this feature in the NTB transport layer.

  Also, bug fixes for the AMD and Switchtec drivers, as well as some
  general patches"

* tag 'ntb-5.3' of git://github.com/jonmason/ntb: (22 commits)
  NTB: Describe the ntb_msi_test client in the documentation.
  NTB: Add MSI interrupt support to ntb_transport
  NTB: Add ntb_msi_test support to ntb_test
  NTB: Introduce NTB MSI Test Client
  NTB: Introduce MSI library
  NTB: Rename ntb.c to support multiple source files in the module
  NTB: Introduce functions to calculate multi-port resource index
  NTB: Introduce helper functions to calculate logical port number
  PCI/switchtec: Add module parameter to request more interrupts
  PCI/MSI: Support allocating virtual MSI interrupts
  ntb_hw_switchtec: Fix setup MW with failure bug
  ntb_hw_switchtec: Skip unnecessary re-setup of shared memory window for crosslink case
  ntb_hw_switchtec: Remove redundant steps of switchtec_ntb_reinit_peer() function
  NTB: correct ntb_dev_ops and ntb_dev comment typos
  NTB: amd: Silence shift wrapping warning in amd_ntb_db_vector_mask()
  ntb_hw_switchtec: potential shift wrapping bug in switchtec_ntb_init_sndev()
  NTB: ntb_transport: Ensure qp->tx_mw_dma_addr is initaliazed
  NTB: ntb_hw_amd: set peer limit register
  NTB: ntb_perf: Clear stale values in doorbell and command SPAD register
  NTB: ntb_perf: Disable NTB link after clearing peer XLAT registers
  ...
This commit is contained in:
Linus Torvalds 2019-07-21 09:46:59 -07:00
commit bec5545ede
19 changed files with 1458 additions and 60 deletions

View File

@ -200,6 +200,33 @@ Debugfs Files:
This file is used to read and write peer scratchpads. See
*spad* for details.
NTB MSI Test Client (ntb\_msi\_test)
------------------------------------
The MSI test client serves to test and debug the MSI library which
allows for passing MSI interrupts across NTB memory windows. The
test client is interacted with through the debugfs filesystem:
* *debugfs*/ntb\_tool/*hw*/
A directory in debugfs will be created for each
NTB device probed by the tool. This directory is shortened to *hw*
below.
* *hw*/port
This file describes the local port number
* *hw*/irq*_occurrences
One occurrences file exists for each interrupt and, when read,
returns the number of times the interrupt has been triggered.
* *hw*/peer*/port
This file describes the port number for each peer
* *hw*/peer*/count
This file describes the number of interrupts that can be
triggered on each peer
* *hw*/peer*/trigger
Writing an interrupt number (any number less than the value
specified in count) will trigger the interrupt on the
specified peer. That peer's interrupt's occurrence file
should be incremented.
NTB Hardware Drivers
====================

View File

@ -13,6 +13,17 @@ menuconfig NTB
if NTB
config NTB_MSI
bool "MSI Interrupt Support"
depends on PCI_MSI
help
Support using MSI interrupt forwarding instead of (or in addition to)
hardware doorbells. MSI interrupts typically offer lower latency
than doorbells and more MSI interrupts can be made available to
clients. However this requires an extra memory window and support
in the hardware driver for creating the MSI interrupts.
If unsure, say N.
source "drivers/ntb/hw/Kconfig"
source "drivers/ntb/test/Kconfig"

View File

@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_NTB) += ntb.o hw/ test/
obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
ntb-y := core.o
ntb-$(CONFIG_NTB_MSI) += msi.o

View File

@ -160,8 +160,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
}
/* set and verify setting the limit */
write64(limit, mmio + limit_reg);
reg_val = read64(mmio + limit_reg);
write64(limit, peer_mmio + limit_reg);
reg_val = read64(peer_mmio + limit_reg);
if (reg_val != limit) {
write64(base_addr, mmio + limit_reg);
write64(0, peer_mmio + xlat_reg);
@ -183,8 +183,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
}
/* set and verify setting the limit */
writel(limit, mmio + limit_reg);
reg_val = readl(mmio + limit_reg);
writel(limit, peer_mmio + limit_reg);
reg_val = readl(peer_mmio + limit_reg);
if (reg_val != limit) {
writel(base_addr, mmio + limit_reg);
writel(0, peer_mmio + xlat_reg);
@ -333,7 +333,7 @@ static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
if (db_vector < 0 || db_vector > ndev->db_count)
return 0;
return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
return ntb_ndev(ntb)->db_valid_mask & (1ULL << db_vector);
}
static u64 amd_ntb_db_read(struct ntb_dev *ntb)

View File

@ -532,9 +532,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
return 0;
}
int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
resource_size_t *db_size,
u64 *db_data, int db_bit)
static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
resource_size_t *db_size,
u64 *db_data, int db_bit)
{
phys_addr_t db_addr_base;
struct intel_ntb_dev *ndev = ntb_ndev(ntb);

View File

@ -86,7 +86,8 @@ struct switchtec_ntb {
bool link_is_up;
enum ntb_speed link_speed;
enum ntb_width link_width;
struct work_struct link_reinit_work;
struct work_struct check_link_status_work;
bool link_force_down;
};
static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
@ -485,33 +486,11 @@ enum switchtec_msg {
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
static void link_reinit_work(struct work_struct *work)
{
struct switchtec_ntb *sndev;
sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
switchtec_ntb_reinit_peer(sndev);
}
static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
enum switchtec_msg msg)
static void switchtec_ntb_link_status_update(struct switchtec_ntb *sndev)
{
int link_sta;
int old = sndev->link_is_up;
if (msg == MSG_LINK_FORCE_DOWN) {
schedule_work(&sndev->link_reinit_work);
if (sndev->link_is_up) {
sndev->link_is_up = 0;
ntb_link_event(&sndev->ntb);
dev_info(&sndev->stdev->dev, "ntb link forced down\n");
}
return;
}
link_sta = sndev->self_shared->link_sta;
if (link_sta) {
u64 peer = ioread64(&sndev->peer_shared->magic);
@ -536,6 +515,38 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
}
}
static void check_link_status_work(struct work_struct *work)
{
struct switchtec_ntb *sndev;
sndev = container_of(work, struct switchtec_ntb,
check_link_status_work);
if (sndev->link_force_down) {
sndev->link_force_down = false;
switchtec_ntb_reinit_peer(sndev);
if (sndev->link_is_up) {
sndev->link_is_up = 0;
ntb_link_event(&sndev->ntb);
dev_info(&sndev->stdev->dev, "ntb link forced down\n");
}
return;
}
switchtec_ntb_link_status_update(sndev);
}
static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
enum switchtec_msg msg)
{
if (msg == MSG_LINK_FORCE_DOWN)
sndev->link_force_down = true;
schedule_work(&sndev->check_link_status_work);
}
static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
{
struct switchtec_ntb *sndev = stdev->sndev;
@ -568,7 +579,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
sndev->self_shared->link_sta = 1;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
switchtec_ntb_link_status_update(sndev);
return 0;
}
@ -582,7 +593,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
sndev->self_shared->link_sta = 0;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
switchtec_ntb_link_status_update(sndev);
return 0;
}
@ -835,7 +846,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
sndev->ntb.topo = NTB_TOPO_SWITCH;
sndev->ntb.ops = &switchtec_ntb_ops;
INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
INIT_WORK(&sndev->check_link_status_work, check_link_status_work);
sndev->link_force_down = false;
sndev->self_partition = sndev->stdev->partition;
@ -872,7 +884,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
}
sndev->peer_partition = ffs(tpart_vec) - 1;
if (!(part_map & (1 << sndev->peer_partition))) {
if (!(part_map & (1ULL << sndev->peer_partition))) {
dev_err(&sndev->stdev->dev,
"ntb target partition is not NT partition\n");
return -ENODEV;
@ -1448,10 +1460,16 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
{
dev_info(&sndev->stdev->dev, "peer reinitialized\n");
switchtec_ntb_deinit_shared_mw(sndev);
switchtec_ntb_init_mw(sndev);
return switchtec_ntb_init_shared_mw(sndev);
int rc;
if (crosslink_is_enabled(sndev))
return 0;
dev_info(&sndev->stdev->dev, "reinitialize shared memory window\n");
rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0,
sndev->self_partition,
sndev->self_shared_dma);
return rc;
}
static int switchtec_ntb_add(struct device *dev,

415
drivers/ntb/msi.c Normal file
View File

@ -0,0 +1,415 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/ntb.h>
#include <linux/msi.h>
#include <linux/pci.h>
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION("0.1");
MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
MODULE_DESCRIPTION("NTB MSI Interrupt Library");
struct ntb_msi {
u64 base_addr;
u64 end_addr;
void (*desc_changed)(void *ctx);
u32 __iomem *peer_mws[];
};
/**
* ntb_msi_init() - Initialize the MSI context
* @ntb: NTB device context
*
* This function must be called before any other ntb_msi function.
* It initializes the context for MSI operations and maps
* the peer memory windows.
*
* This function reserves the last N outbound memory windows (where N
* is the number of peers).
*
* Return: Zero on success, otherwise a negative error number.
*/
int ntb_msi_init(struct ntb_dev *ntb,
void (*desc_changed)(void *ctx))
{
phys_addr_t mw_phys_addr;
resource_size_t mw_size;
size_t struct_size;
int peer_widx;
int peers;
int ret;
int i;
peers = ntb_peer_port_count(ntb);
if (peers <= 0)
return -EINVAL;
struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers;
ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
if (!ntb->msi)
return -ENOMEM;
ntb->msi->desc_changed = desc_changed;
for (i = 0; i < peers; i++) {
peer_widx = ntb_peer_mw_count(ntb) - 1 - i;
ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr,
&mw_size);
if (ret)
goto unroll;
ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr,
mw_size);
if (!ntb->msi->peer_mws[i]) {
ret = -EFAULT;
goto unroll;
}
}
return 0;
unroll:
for (i = 0; i < peers; i++)
if (ntb->msi->peer_mws[i])
devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]);
devm_kfree(&ntb->dev, ntb->msi);
ntb->msi = NULL;
return ret;
}
EXPORT_SYMBOL(ntb_msi_init);
/**
* ntb_msi_setup_mws() - Initialize the MSI inbound memory windows
* @ntb: NTB device context
*
* This function sets up the required inbound memory windows. It should be
* called from a work function after a link up event.
*
* Over the entire network, this function will reserves the last N
* inbound memory windows for each peer (where N is the number of peers).
*
* ntb_msi_init() must be called before this function.
*
* Return: Zero on success, otherwise a negative error number.
*/
int ntb_msi_setup_mws(struct ntb_dev *ntb)
{
struct msi_desc *desc;
u64 addr;
int peer, peer_widx;
resource_size_t addr_align, size_align, size_max;
resource_size_t mw_size = SZ_32K;
resource_size_t mw_min_size = mw_size;
int i;
int ret;
if (!ntb->msi)
return -EINVAL;
desc = first_msi_entry(&ntb->pdev->dev);
addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
if (peer_widx < 0)
return peer_widx;
ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align,
NULL, NULL);
if (ret)
return ret;
addr &= ~(addr_align - 1);
}
for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
if (peer_widx < 0) {
ret = peer_widx;
goto error_out;
}
ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL,
&size_align, &size_max);
if (ret)
goto error_out;
mw_size = round_up(mw_size, size_align);
mw_size = max(mw_size, size_max);
if (mw_size < mw_min_size)
mw_min_size = mw_size;
ret = ntb_mw_set_trans(ntb, peer, peer_widx,
addr, mw_size);
if (ret)
goto error_out;
}
ntb->msi->base_addr = addr;
ntb->msi->end_addr = addr + mw_min_size;
return 0;
error_out:
for (i = 0; i < peer; i++) {
peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
if (peer_widx < 0)
continue;
ntb_mw_clear_trans(ntb, i, peer_widx);
}
return ret;
}
EXPORT_SYMBOL(ntb_msi_setup_mws);
/**
* ntb_msi_clear_mws() - Clear all inbound memory windows
* @ntb: NTB device context
*
* This function tears down the resources used by ntb_msi_setup_mws().
*/
void ntb_msi_clear_mws(struct ntb_dev *ntb)
{
int peer;
int peer_widx;
for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
if (peer_widx < 0)
continue;
ntb_mw_clear_trans(ntb, peer, peer_widx);
}
}
EXPORT_SYMBOL(ntb_msi_clear_mws);
struct ntb_msi_devres {
struct ntb_dev *ntb;
struct msi_desc *entry;
struct ntb_msi_desc *msi_desc;
};
static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry,
struct ntb_msi_desc *msi_desc)
{
u64 addr;
addr = entry->msg.address_lo +
((uint64_t)entry->msg.address_hi << 32);
if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) {
dev_warn_once(&ntb->dev,
"IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n",
entry->irq, addr, ntb->msi->base_addr,
ntb->msi->end_addr);
return -EFAULT;
}
msi_desc->addr_offset = addr - ntb->msi->base_addr;
msi_desc->data = entry->msg.data;
return 0;
}
static void ntb_msi_write_msg(struct msi_desc *entry, void *data)
{
struct ntb_msi_devres *dr = data;
WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc));
if (dr->ntb->msi->desc_changed)
dr->ntb->msi->desc_changed(dr->ntb->ctx);
}
static void ntbm_msi_callback_release(struct device *dev, void *res)
{
struct ntb_msi_devres *dr = res;
dr->entry->write_msi_msg = NULL;
dr->entry->write_msi_msg_data = NULL;
}
static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry,
struct ntb_msi_desc *msi_desc)
{
struct ntb_msi_devres *dr;
dr = devres_alloc(ntbm_msi_callback_release,
sizeof(struct ntb_msi_devres), GFP_KERNEL);
if (!dr)
return -ENOMEM;
dr->ntb = ntb;
dr->entry = entry;
dr->msi_desc = msi_desc;
devres_add(&ntb->dev, dr);
dr->entry->write_msi_msg = ntb_msi_write_msg;
dr->entry->write_msi_msg_data = dr;
return 0;
}
/**
* ntbm_msi_request_threaded_irq() - allocate an MSI interrupt
* @ntb: NTB device context
* @handler: Function to be called when the IRQ occurs
* @thread_fn: Function to be called in a threaded interrupt context. NULL
* for clients which handle everything in @handler
* @devname: An ascii name for the claiming device, dev_name(dev) if NULL
* @dev_id: A cookie passed back to the handler function
*
* This function assigns an interrupt handler to an unused
* MSI interrupt and returns the descriptor used to trigger
* it. The descriptor can then be sent to a peer to trigger
* the interrupt.
*
* The interrupt resource is managed with devres so it will
* be automatically freed when the NTB device is torn down.
*
* If an IRQ allocated with this function needs to be freed
* separately, ntbm_free_irq() must be used.
*
* Return: IRQ number assigned on success, otherwise a negative error number.
*/
int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
irq_handler_t thread_fn,
const char *name, void *dev_id,
struct ntb_msi_desc *msi_desc)
{
struct msi_desc *entry;
struct irq_desc *desc;
int ret;
if (!ntb->msi)
return -EINVAL;
for_each_pci_msi_entry(entry, ntb->pdev) {
desc = irq_to_desc(entry->irq);
if (desc->action)
continue;
ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
thread_fn, 0, name, dev_id);
if (ret)
continue;
if (ntb_msi_set_desc(ntb, entry, msi_desc)) {
devm_free_irq(&ntb->dev, entry->irq, dev_id);
continue;
}
ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
if (ret) {
devm_free_irq(&ntb->dev, entry->irq, dev_id);
return ret;
}
return entry->irq;
}
return -ENODEV;
}
EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
static int ntbm_msi_callback_match(struct device *dev, void *res, void *data)
{
struct ntb_dev *ntb = dev_ntb(dev);
struct ntb_msi_devres *dr = res;
return dr->ntb == ntb && dr->entry == data;
}
/**
* ntbm_msi_free_irq() - free an interrupt
* @ntb: NTB device context
* @irq: Interrupt line to free
* @dev_id: Device identity to free
*
* This function should be used to manually free IRQs allocated with
* ntbm_request_[threaded_]irq().
*/
void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id)
{
struct msi_desc *entry = irq_get_msi_desc(irq);
entry->write_msi_msg = NULL;
entry->write_msi_msg_data = NULL;
WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release,
ntbm_msi_callback_match, entry));
devm_free_irq(&ntb->dev, irq, dev_id);
}
EXPORT_SYMBOL(ntbm_msi_free_irq);
/**
* ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer
* @ntb: NTB device context
* @peer: Peer index
* @desc: MSI descriptor data which triggers the interrupt
*
* This function triggers an interrupt on a peer. It requires
* the descriptor structure to have been passed from that peer
* by some other means.
*
* Return: Zero on success, otherwise a negative error number.
*/
int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
struct ntb_msi_desc *desc)
{
int idx;
if (!ntb->msi)
return -EINVAL;
idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]);
iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]);
return 0;
}
EXPORT_SYMBOL(ntb_msi_peer_trigger);
/**
* ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt
* @ntb: NTB device context
* @peer: Peer index
* @desc: MSI descriptor data which triggers the interrupt
* @msi_addr: Physical address to trigger the interrupt
*
* This function allows using DMA engines to trigger an interrupt
* (for example, trigger an interrupt to process the data after
* sending it). To trigger the interrupt, write @desc.data to the address
* returned in @msi_addr
*
* Return: Zero on success, otherwise a negative error number.
*/
int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
struct ntb_msi_desc *desc,
phys_addr_t *msi_addr)
{
int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer;
phys_addr_t mw_phys_addr;
int ret;
ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL);
if (ret)
return ret;
if (msi_addr)
*msi_addr = mw_phys_addr + desc->addr_offset;
return 0;
}
EXPORT_SYMBOL(ntb_msi_peer_addr);

View File

@ -93,6 +93,12 @@ static bool use_dma;
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
static bool use_msi;
#ifdef CONFIG_NTB_MSI
module_param(use_msi, bool, 0644);
MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
#endif
static struct dentry *nt_debugfs_dir;
/* Only two-ports NTB devices are supported */
@ -188,6 +194,11 @@ struct ntb_transport_qp {
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
bool use_msi;
int msi_irq;
struct ntb_msi_desc msi_desc;
struct ntb_msi_desc peer_msi_desc;
};
struct ntb_transport_mw {
@ -221,6 +232,10 @@ struct ntb_transport_ctx {
u64 qp_bitmap;
u64 qp_bitmap_free;
bool use_msi;
unsigned int msi_spad_offset;
u64 msi_db_mask;
bool link_is_up;
struct delayed_work link_work;
struct work_struct link_cleanup;
@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
return 0;
}
static irqreturn_t ntb_transport_isr(int irq, void *dev)
{
struct ntb_transport_qp *qp = dev;
tasklet_schedule(&qp->rxc_db_work);
return IRQ_HANDLED;
}
static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
unsigned int qp_num)
{
struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
int spad = qp_num * 2 + nt->msi_spad_offset;
if (!nt->use_msi)
return;
if (spad >= ntb_spad_count(nt->ndev))
return;
qp->peer_msi_desc.addr_offset =
ntb_peer_spad_read(qp->ndev, PIDX, spad);
qp->peer_msi_desc.data =
ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
if (qp->peer_msi_desc.addr_offset) {
qp->use_msi = true;
dev_info(&qp->ndev->pdev->dev,
"Using MSI interrupts for QP%d\n", qp_num);
}
}
static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
unsigned int qp_num)
{
struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
int spad = qp_num * 2 + nt->msi_spad_offset;
int rc;
if (!nt->use_msi)
return;
if (spad >= ntb_spad_count(nt->ndev)) {
dev_warn_once(&qp->ndev->pdev->dev,
"Not enough SPADS to use MSI interrupts\n");
return;
}
ntb_spad_write(qp->ndev, spad, 0);
ntb_spad_write(qp->ndev, spad + 1, 0);
if (!qp->msi_irq) {
qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
KBUILD_MODNAME, qp,
&qp->msi_desc);
if (qp->msi_irq < 0) {
dev_warn(&qp->ndev->pdev->dev,
"Unable to allocate MSI interrupt for qp%d\n",
qp_num);
return;
}
}
rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
if (rc)
goto err_free_interrupt;
rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
if (rc)
goto err_free_interrupt;
dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
qp->msi_desc.data);
return;
err_free_interrupt:
devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
}
static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
{
int i;
dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
for (i = 0; i < nt->qp_count; i++)
ntb_transport_setup_qp_peer_msi(nt, i);
}
static void ntb_transport_msi_desc_changed(void *data)
{
struct ntb_transport_ctx *nt = data;
int i;
dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
for (i = 0; i < nt->qp_count; i++)
ntb_transport_setup_qp_msi(nt, i);
ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
}
static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
{
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work)
int rc = 0, i, spad;
/* send the local info, in the opposite order of the way we read it */
if (nt->use_msi) {
rc = ntb_msi_setup_mws(ndev);
if (rc) {
dev_warn(&pdev->dev,
"Failed to register MSI memory window: %d\n",
rc);
nt->use_msi = false;
}
}
for (i = 0; i < nt->qp_count; i++)
ntb_transport_setup_qp_msi(nt, i);
for (i = 0; i < nt->mw_count; i++) {
size = nt->mw_vec[i].phys_size;
@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work)
struct ntb_transport_qp *qp = &nt->qp_vec[i];
ntb_transport_setup_qp_mw(nt, i);
ntb_transport_setup_qp_peer_msi(nt, i);
if (qp->client_ready)
schedule_delayed_work(&qp->link_work, 0);
@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
return -ENOMEM;
nt->ndev = ndev;
/*
* If we are using MSI, and have at least one extra memory window,
* we will reserve the last MW for the MSI window.
*/
if (use_msi && mw_count > 1) {
rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
if (!rc) {
mw_count -= 1;
nt->use_msi = true;
}
}
spad_count = ntb_spad_count(ndev);
/* Limit the MW's based on the availability of scratchpads */
@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
nt->mw_count = min(mw_count, max_mw_count_for_spads);
nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
GFP_KERNEL, node);
if (!nt->mw_vec) {
@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
qp_bitmap = ntb_db_valid_mask(ndev);
qp_count = ilog2(qp_bitmap);
if (nt->use_msi) {
qp_count -= 1;
nt->msi_db_mask = 1 << qp_count;
ntb_db_clear_mask(ndev, nt->msi_db_mask);
}
if (max_num_clients && max_num_clients < qp_count)
qp_count = max_num_clients;
else if (nt->mw_count < qp_count)
@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data,
iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
if (qp->use_msi)
ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
else
ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
/* The entry length can only be zero if the packet is intended to be a
* "link down" or similar. Since no payload is being sent in these
@ -1869,6 +2031,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
qp->rx_dma_chan = NULL;
}
qp->tx_mw_dma_addr = 0;
if (qp->tx_dma_chan) {
qp->tx_mw_dma_addr =
dma_map_resource(qp->tx_dma_chan->device->dev,
@ -2268,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
u64 db_bits;
unsigned int qp_num;
if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
ntb_transport_msi_peer_desc_changed(nt);
ntb_db_clear(nt->ndev, nt->msi_db_mask);
}
db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
ntb_db_vector_mask(nt->ndev, vector));

View File

@ -26,3 +26,12 @@ config NTB_PERF
to and from the window without additional software interaction.
If unsure, say N.
config NTB_MSI_TEST
tristate "NTB MSI Test Client"
depends on NTB_MSI
help
This tool demonstrates the use of the NTB MSI library to
send MSI interrupts between peers.
If unsure, say N.

View File

@ -2,3 +2,4 @@
obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
obj-$(CONFIG_NTB_PERF) += ntb_perf.o
obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o

View File

@ -0,0 +1,433 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/ntb.h>
#include <linux/pci.h>
#include <linux/radix-tree.h>
#include <linux/workqueue.h>
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION("0.1");
MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
MODULE_DESCRIPTION("Test for sending MSI interrupts over an NTB memory window");
static int num_irqs = 4;
module_param(num_irqs, int, 0644);
MODULE_PARM_DESC(num_irqs, "number of irqs to use");
struct ntb_msit_ctx {
struct ntb_dev *ntb;
struct dentry *dbgfs_dir;
struct work_struct setup_work;
struct ntb_msit_isr_ctx {
int irq_idx;
int irq_num;
int occurrences;
struct ntb_msit_ctx *nm;
struct ntb_msi_desc desc;
} *isr_ctx;
struct ntb_msit_peer {
struct ntb_msit_ctx *nm;
int pidx;
int num_irqs;
struct completion init_comp;
struct ntb_msi_desc *msi_desc;
} peers[];
};
static struct dentry *ntb_msit_dbgfs_topdir;
static irqreturn_t ntb_msit_isr(int irq, void *dev)
{
struct ntb_msit_isr_ctx *isr_ctx = dev;
struct ntb_msit_ctx *nm = isr_ctx->nm;
dev_dbg(&nm->ntb->dev, "Interrupt Occurred: %d",
isr_ctx->irq_idx);
isr_ctx->occurrences++;
return IRQ_HANDLED;
}
static void ntb_msit_setup_work(struct work_struct *work)
{
struct ntb_msit_ctx *nm = container_of(work, struct ntb_msit_ctx,
setup_work);
int irq_count = 0;
int irq;
int ret;
uintptr_t i;
ret = ntb_msi_setup_mws(nm->ntb);
if (ret) {
dev_err(&nm->ntb->dev, "Unable to setup MSI windows: %d\n",
ret);
return;
}
for (i = 0; i < num_irqs; i++) {
nm->isr_ctx[i].irq_idx = i;
nm->isr_ctx[i].nm = nm;
if (!nm->isr_ctx[i].irq_num) {
irq = ntbm_msi_request_irq(nm->ntb, ntb_msit_isr,
KBUILD_MODNAME,
&nm->isr_ctx[i],
&nm->isr_ctx[i].desc);
if (irq < 0)
break;
nm->isr_ctx[i].irq_num = irq;
}
ret = ntb_spad_write(nm->ntb, 2 * i + 1,
nm->isr_ctx[i].desc.addr_offset);
if (ret)
break;
ret = ntb_spad_write(nm->ntb, 2 * i + 2,
nm->isr_ctx[i].desc.data);
if (ret)
break;
irq_count++;
}
ntb_spad_write(nm->ntb, 0, irq_count);
ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
}
static void ntb_msit_desc_changed(void *ctx)
{
struct ntb_msit_ctx *nm = ctx;
int i;
dev_dbg(&nm->ntb->dev, "MSI Descriptors Changed\n");
for (i = 0; i < num_irqs; i++) {
ntb_spad_write(nm->ntb, 2 * i + 1,
nm->isr_ctx[i].desc.addr_offset);
ntb_spad_write(nm->ntb, 2 * i + 2,
nm->isr_ctx[i].desc.data);
}
ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
}
static void ntb_msit_link_event(void *ctx)
{
struct ntb_msit_ctx *nm = ctx;
if (!ntb_link_is_up(nm->ntb, NULL, NULL))
return;
schedule_work(&nm->setup_work);
}
static void ntb_msit_copy_peer_desc(struct ntb_msit_ctx *nm, int peer)
{
int i;
struct ntb_msi_desc *desc = nm->peers[peer].msi_desc;
int irq_count = nm->peers[peer].num_irqs;
for (i = 0; i < irq_count; i++) {
desc[i].addr_offset = ntb_peer_spad_read(nm->ntb, peer,
2 * i + 1);
desc[i].data = ntb_peer_spad_read(nm->ntb, peer, 2 * i + 2);
}
dev_info(&nm->ntb->dev, "Found %d interrupts on peer %d\n",
irq_count, peer);
complete_all(&nm->peers[peer].init_comp);
}
static void ntb_msit_db_event(void *ctx, int vec)
{
struct ntb_msit_ctx *nm = ctx;
struct ntb_msi_desc *desc;
u64 peer_mask = ntb_db_read(nm->ntb);
u32 irq_count;
int peer;
ntb_db_clear(nm->ntb, peer_mask);
for (peer = 0; peer < sizeof(peer_mask) * 8; peer++) {
if (!(peer_mask & BIT(peer)))
continue;
irq_count = ntb_peer_spad_read(nm->ntb, peer, 0);
if (irq_count == -1)
continue;
desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC);
if (!desc)
continue;
kfree(nm->peers[peer].msi_desc);
nm->peers[peer].msi_desc = desc;
nm->peers[peer].num_irqs = irq_count;
ntb_msit_copy_peer_desc(nm, peer);
}
}
static const struct ntb_ctx_ops ntb_msit_ops = {
.link_event = ntb_msit_link_event,
.db_event = ntb_msit_db_event,
};
static int ntb_msit_dbgfs_trigger(void *data, u64 idx)
{
struct ntb_msit_peer *peer = data;
if (idx >= peer->num_irqs)
return -EINVAL;
dev_dbg(&peer->nm->ntb->dev, "trigger irq %llu on peer %u\n",
idx, peer->pidx);
return ntb_msi_peer_trigger(peer->nm->ntb, peer->pidx,
&peer->msi_desc[idx]);
}
DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_trigger_fops, NULL,
ntb_msit_dbgfs_trigger, "%llu\n");
static int ntb_msit_dbgfs_port_get(void *data, u64 *port)
{
struct ntb_msit_peer *peer = data;
*port = ntb_peer_port_number(peer->nm->ntb, peer->pidx);
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_port_fops, ntb_msit_dbgfs_port_get,
NULL, "%llu\n");
static int ntb_msit_dbgfs_count_get(void *data, u64 *count)
{
struct ntb_msit_peer *peer = data;
*count = peer->num_irqs;
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_count_fops, ntb_msit_dbgfs_count_get,
NULL, "%llu\n");
static int ntb_msit_dbgfs_ready_get(void *data, u64 *ready)
{
struct ntb_msit_peer *peer = data;
*ready = try_wait_for_completion(&peer->init_comp);
return 0;
}
static int ntb_msit_dbgfs_ready_set(void *data, u64 ready)
{
struct ntb_msit_peer *peer = data;
return wait_for_completion_interruptible(&peer->init_comp);
}
DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_ready_fops, ntb_msit_dbgfs_ready_get,
ntb_msit_dbgfs_ready_set, "%llu\n");
static int ntb_msit_dbgfs_occurrences_get(void *data, u64 *occurrences)
{
struct ntb_msit_isr_ctx *isr_ctx = data;
*occurrences = isr_ctx->occurrences;
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_occurrences_fops,
ntb_msit_dbgfs_occurrences_get,
NULL, "%llu\n");
static int ntb_msit_dbgfs_local_port_get(void *data, u64 *port)
{
struct ntb_msit_ctx *nm = data;
*port = ntb_port_number(nm->ntb);
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_local_port_fops,
ntb_msit_dbgfs_local_port_get,
NULL, "%llu\n");
static void ntb_msit_create_dbgfs(struct ntb_msit_ctx *nm)
{
struct pci_dev *pdev = nm->ntb->pdev;
char buf[32];
int i;
struct dentry *peer_dir;
nm->dbgfs_dir = debugfs_create_dir(pci_name(pdev),
ntb_msit_dbgfs_topdir);
debugfs_create_file("port", 0400, nm->dbgfs_dir, nm,
&ntb_msit_local_port_fops);
for (i = 0; i < ntb_peer_port_count(nm->ntb); i++) {
nm->peers[i].pidx = i;
nm->peers[i].nm = nm;
init_completion(&nm->peers[i].init_comp);
snprintf(buf, sizeof(buf), "peer%d", i);
peer_dir = debugfs_create_dir(buf, nm->dbgfs_dir);
debugfs_create_file_unsafe("trigger", 0200, peer_dir,
&nm->peers[i],
&ntb_msit_trigger_fops);
debugfs_create_file_unsafe("port", 0400, peer_dir,
&nm->peers[i], &ntb_msit_port_fops);
debugfs_create_file_unsafe("count", 0400, peer_dir,
&nm->peers[i],
&ntb_msit_count_fops);
debugfs_create_file_unsafe("ready", 0600, peer_dir,
&nm->peers[i],
&ntb_msit_ready_fops);
}
for (i = 0; i < num_irqs; i++) {
snprintf(buf, sizeof(buf), "irq%d_occurrences", i);
debugfs_create_file_unsafe(buf, 0400, nm->dbgfs_dir,
&nm->isr_ctx[i],
&ntb_msit_occurrences_fops);
}
}
static void ntb_msit_remove_dbgfs(struct ntb_msit_ctx *nm)
{
debugfs_remove_recursive(nm->dbgfs_dir);
}
static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb)
{
struct ntb_msit_ctx *nm;
size_t struct_size;
int peers;
int ret;
peers = ntb_peer_port_count(ntb);
if (peers <= 0)
return -EINVAL;
if (ntb_spad_is_unsafe(ntb) || ntb_spad_count(ntb) < 2 * num_irqs + 1) {
dev_err(&ntb->dev, "NTB MSI test requires at least %d spads for %d irqs\n",
2 * num_irqs + 1, num_irqs);
return -EFAULT;
}
ret = ntb_spad_write(ntb, 0, -1);
if (ret) {
dev_err(&ntb->dev, "Unable to write spads: %d\n", ret);
return ret;
}
ret = ntb_db_clear_mask(ntb, GENMASK(peers - 1, 0));
if (ret) {
dev_err(&ntb->dev, "Unable to clear doorbell mask: %d\n", ret);
return ret;
}
ret = ntb_msi_init(ntb, ntb_msit_desc_changed);
if (ret) {
dev_err(&ntb->dev, "Unable to initialize MSI library: %d\n",
ret);
return ret;
}
struct_size = sizeof(*nm) + sizeof(*nm->peers) * peers;
nm = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
if (!nm)
return -ENOMEM;
nm->isr_ctx = devm_kcalloc(&ntb->dev, num_irqs, sizeof(*nm->isr_ctx),
GFP_KERNEL);
if (!nm->isr_ctx)
return -ENOMEM;
INIT_WORK(&nm->setup_work, ntb_msit_setup_work);
nm->ntb = ntb;
ntb_msit_create_dbgfs(nm);
ret = ntb_set_ctx(ntb, nm, &ntb_msit_ops);
if (ret)
goto remove_dbgfs;
if (!nm->isr_ctx)
goto remove_dbgfs;
ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
return 0;
remove_dbgfs:
ntb_msit_remove_dbgfs(nm);
devm_kfree(&ntb->dev, nm->isr_ctx);
devm_kfree(&ntb->dev, nm);
return ret;
}
static void ntb_msit_remove(struct ntb_client *client, struct ntb_dev *ntb)
{
struct ntb_msit_ctx *nm = ntb->ctx;
int i;
ntb_link_disable(ntb);
ntb_db_set_mask(ntb, ntb_db_valid_mask(ntb));
ntb_msi_clear_mws(ntb);
for (i = 0; i < ntb_peer_port_count(ntb); i++)
kfree(nm->peers[i].msi_desc);
ntb_clear_ctx(ntb);
ntb_msit_remove_dbgfs(nm);
}
static struct ntb_client ntb_msit_client = {
.ops = {
.probe = ntb_msit_probe,
.remove = ntb_msit_remove
}
};
static int __init ntb_msit_init(void)
{
int ret;
if (debugfs_initialized())
ntb_msit_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME,
NULL);
ret = ntb_register_client(&ntb_msit_client);
if (ret)
debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
return ret;
}
module_init(ntb_msit_init);
static void __exit ntb_msit_exit(void)
{
ntb_unregister_client(&ntb_msit_client);
debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
}
module_exit(ntb_msit_exit);

View File

@ -100,7 +100,7 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
#define DMA_TRIES 100
#define DMA_MDELAY 10
#define MSG_TRIES 500
#define MSG_TRIES 1000
#define MSG_UDELAY_LOW 1000
#define MSG_UDELAY_HIGH 2000
@ -734,8 +734,6 @@ static void perf_disable_service(struct perf_ctx *perf)
{
int pidx;
ntb_link_disable(perf->ntb);
if (perf->cmd_send == perf_msg_cmd_send) {
u64 inbits;
@ -752,6 +750,16 @@ static void perf_disable_service(struct perf_ctx *perf)
for (pidx = 0; pidx < perf->pcnt; pidx++)
flush_work(&perf->peers[pidx].service);
for (pidx = 0; pidx < perf->pcnt; pidx++) {
struct perf_peer *peer = &perf->peers[pidx];
ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0);
}
ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
ntb_link_disable(perf->ntb);
}
/*==============================================================================

View File

@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
{
if (desc->msi_attrib.is_virtual)
return NULL;
return desc->mask_base +
desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
}
@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
{
u32 mask_bits = desc->masked;
void __iomem *desc_addr;
if (pci_msi_ignore_mask)
return 0;
desc_addr = pci_msix_desc_addr(desc);
if (!desc_addr)
return 0;
mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
if (flag)
mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
return mask_bits;
}
@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
if (!base) {
WARN_ON(1);
return;
}
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
} else if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
if (!base)
goto skip;
writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
msg->data);
}
}
skip:
entry->msg = *msg;
if (entry->write_msi_msg)
entry->write_msi_msg(entry, entry->write_msi_msg_data);
}
void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
entry->msi_attrib.is_msix = 0;
entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
entry->msi_attrib.is_virtual = 0;
entry->msi_attrib.entry_nr = 0;
entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct irq_affinity_desc *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
int vec_count = pci_msix_vec_count(dev);
if (affd)
masks = irq_create_affinity_masks(nvec, affd);
@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.entry_nr = entries[i].entry;
else
entry->msi_attrib.entry_nr = i;
entry->msi_attrib.is_virtual =
entry->msi_attrib.entry_nr >= vec_count;
entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base;
@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev,
{
struct msi_desc *entry;
int i = 0;
void __iomem *desc_addr;
for_each_pci_msi_entry(entry, dev) {
if (entries)
entries[i++].vector = entry->irq;
entry->masked = readl(pci_msix_desc_addr(entry) +
PCI_MSIX_ENTRY_VECTOR_CTRL);
desc_addr = pci_msix_desc_addr(entry);
if (desc_addr)
entry->masked = readl(desc_addr +
PCI_MSIX_ENTRY_VECTOR_CTRL);
else
entry->masked = 0;
msix_mask_irq(entry, 1);
}
}
@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev)
EXPORT_SYMBOL(pci_msix_vec_count);
static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
int nvec, struct irq_affinity *affd)
int nvec, struct irq_affinity *affd, int flags)
{
int nr_entries;
int i, j;
@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
nr_entries = pci_msix_vec_count(dev);
if (nr_entries < 0)
return nr_entries;
if (nvec > nr_entries)
if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
return nr_entries;
if (entries) {
@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi);
static int __pci_enable_msix_range(struct pci_dev *dev,
struct msix_entry *entries, int minvec,
int maxvec, struct irq_affinity *affd)
int maxvec, struct irq_affinity *affd,
int flags)
{
int rc, nvec = maxvec;
@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
return -ENOSPC;
}
rc = __pci_enable_msix(dev, entries, nvec, affd);
rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
if (rc == 0)
return nvec;
@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec)
{
return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
}
EXPORT_SYMBOL(pci_enable_msix_range);
@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
if (flags & PCI_IRQ_MSIX) {
msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
max_vecs, affd);
max_vecs, affd, flags);
if (msix_vecs > 0)
return msix_vecs;
}

View File

@ -30,6 +30,10 @@ module_param(use_dma_mrpc, bool, 0644);
MODULE_PARM_DESC(use_dma_mrpc,
"Enable the use of the DMA MRPC feature");
static int nirqs = 32;
module_param(nirqs, int, 0644);
MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)");
static dev_t switchtec_devt;
static DEFINE_IDA(switchtec_minor_ida);
@ -1263,8 +1267,12 @@ static int switchtec_init_isr(struct switchtec_dev *stdev)
int dma_mrpc_irq;
int rc;
nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, 4,
PCI_IRQ_MSIX | PCI_IRQ_MSI);
if (nirqs < 4)
nirqs = 4;
nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs,
PCI_IRQ_MSIX | PCI_IRQ_MSI |
PCI_IRQ_VIRTUAL);
if (nvecs < 0)
return nvecs;

View File

@ -64,6 +64,10 @@ struct ti_sci_inta_msi_desc {
* @msg: The last set MSI message cached for reuse
* @affinity: Optional pointer to a cpu affinity mask for this descriptor
*
* @write_msi_msg: Callback that may be called when the MSI message
* address or data changes
* @write_msi_msg_data: Data parameter for the callback.
*
* @masked: [PCI MSI/X] Mask bits
* @is_msix: [PCI MSI/X] True if MSI-X
* @multiple: [PCI MSI/X] log2 num of messages allocated
@ -90,6 +94,9 @@ struct msi_desc {
const void *iommu_cookie;
#endif
void (*write_msi_msg)(struct msi_desc *entry, void *data);
void *write_msi_msg_data;
union {
/* PCI MSI/X specific data */
struct {
@ -100,6 +107,7 @@ struct msi_desc {
u8 multi_cap : 3;
u8 maskbit : 1;
u8 is_64 : 1;
u8 is_virtual : 1;
u16 entry_nr;
unsigned default_irq;
} msi_attrib;

View File

@ -58,9 +58,11 @@
#include <linux/completion.h>
#include <linux/device.h>
#include <linux/interrupt.h>
struct ntb_client;
struct ntb_dev;
struct ntb_msi;
struct pci_dev;
/**
@ -205,7 +207,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
}
/**
* struct ntb_ctx_ops - ntb device operations
* struct ntb_dev_ops - ntb device operations
* @port_number: See ntb_port_number().
* @peer_port_count: See ntb_peer_port_count().
* @peer_port_number: See ntb_peer_port_number().
@ -404,7 +406,7 @@ struct ntb_client {
#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
/**
* struct ntb_device - ntb device
* struct ntb_dev - ntb device
* @dev: Linux device object.
* @pdev: PCI device entry of the ntb.
* @topo: Detected topology of the ntb.
@ -426,6 +428,10 @@ struct ntb_dev {
spinlock_t ctx_lock;
/* block unregister until device is fully released */
struct completion released;
#ifdef CONFIG_NTB_MSI
struct ntb_msi *msi;
#endif
};
#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
@ -616,7 +622,6 @@ static inline int ntb_port_number(struct ntb_dev *ntb)
return ntb->ops->port_number(ntb);
}
/**
* ntb_peer_port_count() - get the number of peer device ports
* @ntb: NTB device context.
@ -653,6 +658,58 @@ static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
return ntb->ops->peer_port_number(ntb, pidx);
}
/**
* ntb_logical_port_number() - get the logical port number of the local port
* @ntb: NTB device context.
*
* The Logical Port Number is defined to be a unique number for each
* port starting from zero through to the number of ports minus one.
* This is in contrast to the Port Number where each port can be assigned
* any unique physical number by the hardware.
*
* The logical port number is useful for calculating the resource indexes
* used by peers.
*
* Return: the logical port number or negative value indicating an error
*/
static inline int ntb_logical_port_number(struct ntb_dev *ntb)
{
int lport = ntb_port_number(ntb);
int pidx;
if (lport < 0)
return lport;
for (pidx = 0; pidx < ntb_peer_port_count(ntb); pidx++)
if (lport <= ntb_peer_port_number(ntb, pidx))
return pidx;
return pidx;
}
/**
* ntb_peer_logical_port_number() - get the logical peer port by given index
* @ntb: NTB device context.
* @pidx: Peer port index.
*
* The Logical Port Number is defined to be a unique number for each
* port starting from zero through to the number of ports minus one.
* This is in contrast to the Port Number where each port can be assigned
* any unique physical number by the hardware.
*
* The logical port number is useful for calculating the resource indexes
* used by peers.
*
* Return: the peer's logical port number or negative value indicating an error
*/
static inline int ntb_peer_logical_port_number(struct ntb_dev *ntb, int pidx)
{
if (ntb_peer_port_number(ntb, pidx) < ntb_port_number(ntb))
return pidx;
else
return pidx + 1;
}
/**
* ntb_peer_port_idx() - get the peer device port index by given port number
* @ntb: NTB device context.
@ -1506,4 +1563,141 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
return ntb->ops->peer_msg_write(ntb, pidx, midx, msg);
}
/**
* ntb_peer_resource_idx() - get a resource index for a given peer idx
* @ntb: NTB device context.
* @pidx: Peer port index.
*
* When constructing a graph of peers, each remote peer must use a different
* resource index (mw, doorbell, etc) to communicate with each other
* peer.
*
* In a two peer system, this function should always return 0 such that
* resource 0 points to the remote peer on both ports.
*
* In a 5 peer system, this function will return the following matrix
*
* pidx \ port 0 1 2 3 4
* 0 0 0 1 2 3
* 1 0 1 1 2 3
* 2 0 1 2 2 3
* 3 0 1 2 3 3
*
* For example, if this function is used to program peer's memory
* windows, port 0 will program MW 0 on all it's peers to point to itself.
* port 1 will program MW 0 in port 0 to point to itself and MW 1 on all
* other ports. etc.
*
* For the legacy two host case, ntb_port_number() and ntb_peer_port_number()
* both return zero and therefore this function will always return zero.
* So MW 0 on each host would be programmed to point to the other host.
*
* Return: the resource index to use for that peer.
*/
static inline int ntb_peer_resource_idx(struct ntb_dev *ntb, int pidx)
{
int local_port, peer_port;
if (pidx >= ntb_peer_port_count(ntb))
return -EINVAL;
local_port = ntb_logical_port_number(ntb);
peer_port = ntb_peer_logical_port_number(ntb, pidx);
if (peer_port < local_port)
return local_port - 1;
else
return local_port;
}
/**
* ntb_peer_highest_mw_idx() - get a memory window index for a given peer idx
* using the highest index memory windows first
*
* @ntb: NTB device context.
* @pidx: Peer port index.
*
* Like ntb_peer_resource_idx(), except it returns indexes starting with
* last memory window index.
*
* Return: the resource index to use for that peer.
*/
static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx)
{
int ret;
ret = ntb_peer_resource_idx(ntb, pidx);
if (ret < 0)
return ret;
return ntb_mw_count(ntb, pidx) - ret - 1;
}
struct ntb_msi_desc {
u32 addr_offset;
u32 data;
};
#ifdef CONFIG_NTB_MSI
int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx));
int ntb_msi_setup_mws(struct ntb_dev *ntb);
void ntb_msi_clear_mws(struct ntb_dev *ntb);
int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
irq_handler_t thread_fn,
const char *name, void *dev_id,
struct ntb_msi_desc *msi_desc);
void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id);
int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
struct ntb_msi_desc *desc);
int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
struct ntb_msi_desc *desc,
phys_addr_t *msi_addr);
#else /* not CONFIG_NTB_MSI */
static inline int ntb_msi_init(struct ntb_dev *ntb,
void (*desc_changed)(void *ctx))
{
return -EOPNOTSUPP;
}
static inline int ntb_msi_setup_mws(struct ntb_dev *ntb)
{
return -EOPNOTSUPP;
}
static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {}
static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb,
irq_handler_t handler,
irq_handler_t thread_fn,
const char *name, void *dev_id,
struct ntb_msi_desc *msi_desc)
{
return -EOPNOTSUPP;
}
static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq,
void *dev_id) {}
static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
struct ntb_msi_desc *desc)
{
return -EOPNOTSUPP;
}
static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
struct ntb_msi_desc *desc,
phys_addr_t *msi_addr)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_NTB_MSI */
static inline int ntbm_msi_request_irq(struct ntb_dev *ntb,
irq_handler_t handler,
const char *name, void *dev_id,
struct ntb_msi_desc *msi_desc)
{
return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name,
dev_id, msi_desc);
}
#endif

View File

@ -1412,6 +1412,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
#define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */
#define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */
#define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */
/*
* Virtual interrupts allow for more interrupts to be allocated
* than the device has interrupts for. These are not programmed
* into the device's MSI-X table and must be handled by some
* other driver means.
*/
#define PCI_IRQ_VIRTUAL (1 << 4)
#define PCI_IRQ_ALL_TYPES \
(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)

View File

@ -78,10 +78,10 @@ set -e
function _modprobe()
{
modprobe "$@"
modprobe "$@" || return 1
if [[ "$REMOTE_HOST" != "" ]]; then
ssh "$REMOTE_HOST" modprobe "$@"
ssh "$REMOTE_HOST" modprobe "$@" || return 1
fi
}
@ -442,6 +442,30 @@ function pingpong_test()
echo " Passed"
}
function msi_test()
{
LOC=$1
REM=$2
write_file 1 $LOC/ready
echo "Running MSI interrupt tests on: $(subdirname $LOC) / $(subdirname $REM)"
CNT=$(read_file "$LOC/count")
for ((i = 0; i < $CNT; i++)); do
START=$(read_file $REM/../irq${i}_occurrences)
write_file $i $LOC/trigger
END=$(read_file $REM/../irq${i}_occurrences)
if [[ $(($END - $START)) != 1 ]]; then
echo "MSI did not trigger the interrupt on the remote side!" >&2
exit 1
fi
done
echo " Passed"
}
function perf_test()
{
USE_DMA=$1
@ -520,6 +544,29 @@ function ntb_pingpong_tests()
_modprobe -r ntb_pingpong
}
function ntb_msi_tests()
{
LOCAL_MSI="$DEBUGFS/ntb_msi_test/$LOCAL_DEV"
REMOTE_MSI="$REMOTE_HOST:$DEBUGFS/ntb_msi_test/$REMOTE_DEV"
echo "Starting ntb_msi_test tests..."
if ! _modprobe ntb_msi_test 2> /dev/null; then
echo " Not doing MSI tests seeing the module is not available."
return
fi
port_test $LOCAL_MSI $REMOTE_MSI
LOCAL_PEER="$LOCAL_MSI/peer$LOCAL_PIDX"
REMOTE_PEER="$REMOTE_MSI/peer$REMOTE_PIDX"
msi_test $LOCAL_PEER $REMOTE_PEER
msi_test $REMOTE_PEER $LOCAL_PEER
_modprobe -r ntb_msi_test
}
function ntb_perf_tests()
{
LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
@ -541,6 +588,7 @@ function cleanup()
_modprobe -r ntb_perf 2> /dev/null
_modprobe -r ntb_pingpong 2> /dev/null
_modprobe -r ntb_transport 2> /dev/null
_modprobe -r ntb_msi_test 2> /dev/null
set -e
}
@ -577,5 +625,7 @@ ntb_tool_tests
echo
ntb_pingpong_tests
echo
ntb_msi_tests
echo
ntb_perf_tests
echo