IB/hfi1: add driver files

Signed-off-by: Andrew Friedley <andrew.friedley@intel.com>
Signed-off-by: Arthur Kepner <arthur.kepner@intel.com>
Signed-off-by: Brendan Cunningham <brendan.cunningham@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Caz Yokoyama <caz.yokoyama@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: Harish Chegondi <harish.chegondi@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Jim Snow <jim.m.snow@intel.com>
Signed-off-by: John Gregor <john.a.gregor@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Kevin Pine <kevin.pine@intel.com>
Signed-off-by: Kyle Liddell <kyle.liddell@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Ravi Krishnaswamy <ravi.krishnaswamy@intel.com>
Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: Sanath Kumar <sanath.s.kumar@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Vlad Danushevsky <vladimir.danusevsky@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Mike Marciniszyn 2015-07-30 15:17:43 -04:00 committed by Doug Ledford
parent d4ab347005
commit 7724105686
60 changed files with 57480 additions and 0 deletions

View File

@ -64,3 +64,23 @@ MTHCA
fw_ver - Firmware version
hca_type - HCA type: "MT23108", "MT25208 (MT23108 compat mode)",
or "MT25208"
HFI1
The hfi1 driver also creates these additional files:
hw_rev - hardware revision
board_id - manufacturing board id
tempsense - thermal sense information
serial - board serial number
nfreectxts - number of free user contexts
nctxts - number of allowed contexts (PSM2)
chip_reset - diagnostic (root only)
boardversion - board version
ports/1/
CMgtA/
cc_settings_bin - CCA tables used by PSM2
cc_table_bin
sc2v/ - 32 files (0 - 31) used to translate sl->vl
sl2sc/ - 32 files (0 - 31) used to translate sl->sc
vl2mtu/ - 16 (0 - 15) files used to determine MTU for vl

View File

@ -9809,6 +9809,12 @@ M: Arnaud Patard <arnaud.patard@rtp-net.org>
S: Odd Fixes
F: drivers/staging/xgifb/
HFI1 DRIVER
M: Mike Marciniszyn <infinipath@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/staging/rdma/hfi1
STARFIRE/DURALAN NETWORK DRIVER
M: Ion Badulescu <ionut@badula.org>
S: Odd Fixes

View File

@ -24,6 +24,8 @@ if STAGING_RDMA
source "drivers/staging/rdma/amso1100/Kconfig"
source "drivers/staging/rdma/hfi1/Kconfig"
source "drivers/staging/rdma/ipath/Kconfig"
endif

View File

@ -1,3 +1,4 @@
# Entries for RDMA_STAGING tree
obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_IPATH) += ipath/

View File

@ -0,0 +1,37 @@
config INFINIBAND_HFI1
tristate "Intel OPA Gen1 support"
depends on X86_64
default m
---help---
This is a low-level driver for Intel OPA Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER
bool "HFI1 SDMA Order debug"
depends on INFINIBAND_HFI1
default n
---help---
This is a debug flag to test for out of order
sdma completions for unit testing
config HFI1_VERBS_31BIT_PSN
bool "HFI1 enable 31 bit PSN"
depends on INFINIBAND_HFI1
default y
---help---
Setting this enables 31 BIT PSN
For verbs RC/UC
config SDMA_VERBOSITY
bool "Config SDMA Verbosity"
depends on INFINIBAND_HFI1
default n
---help---
This is a configuration flag to enable verbose
SDMA debug
config PRESCAN_RXQ
bool "Enable prescanning of the RX queue for ECNs"
depends on INFINIBAND_HFI1
default n
---help---
This option toggles the prescanning of the receive queue for
Explicit Congestion Notifications. If an ECN is detected, it
is processed as quickly as possible, the ECN is toggled off.
After the prescanning step, the receive queue is processed as
usual.

View File

@ -0,0 +1,19 @@
#
# HFI driver
#
#
#
# Called from the kernel module build system.
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o eprom.o file_ops.o firmware.o \
init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \
qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \
uc.o ud.o user_pages.o user_sdma.o verbs_mcast.o verbs.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
ifdef MVERSION
CFLAGS_driver.o = -DHFI_DRIVER_VERSION_BASE=\"$(MVERSION)\"
endif

View File

@ -0,0 +1,6 @@
July, 2015
- Remove unneeded file entries in sysfs
- Remove software processing of IB protocol and place in library for use
by qib, ipath (if still present), hfi1, and eventually soft-roce

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,415 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _COMMON_H
#define _COMMON_H
#include <rdma/hfi/hfi1_user.h>
/*
* This file contains defines, structures, etc. that are used
* to communicate between kernel and user code.
*/
/* version of protocol header (known to chip also). In the long run,
* we should be able to generate and accept a range of version numbers;
* for now we only accept one, and it's compiled in.
*/
#define IPS_PROTO_VERSION 2
/*
* These are compile time constants that you may want to enable or disable
* if you are trying to debug problems with code or performance.
* HFI1_VERBOSE_TRACING define as 1 if you want additional tracing in
* fast path code
* HFI1_TRACE_REGWRITES define as 1 if you want register writes to be
* traced in fast path code
* _HFI1_TRACING define as 0 if you want to remove all tracing in a
* compilation unit
*/
/*
* If a packet's QP[23:16] bits match this value, then it is
* a PSM packet and the hardware will expect a KDETH header
* following the BTH.
*/
#define DEFAULT_KDETH_QP 0x80
/* driver/hw feature set bitmask */
#define HFI1_CAP_USER_SHIFT 24
#define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1)
/* locked flag - if set, only HFI1_CAP_WRITABLE_MASK bits can be set */
#define HFI1_CAP_LOCKED_SHIFT 63
#define HFI1_CAP_LOCKED_MASK 0x1ULL
#define HFI1_CAP_LOCKED_SMASK (HFI1_CAP_LOCKED_MASK << HFI1_CAP_LOCKED_SHIFT)
/* extra bits used between kernel and user processes */
#define HFI1_CAP_MISC_SHIFT (HFI1_CAP_USER_SHIFT * 2)
#define HFI1_CAP_MISC_MASK ((1ULL << (HFI1_CAP_LOCKED_SHIFT - \
HFI1_CAP_MISC_SHIFT)) - 1)
#define HFI1_CAP_KSET(cap) ({ hfi1_cap_mask |= HFI1_CAP_##cap; hfi1_cap_mask; })
#define HFI1_CAP_KCLEAR(cap) \
({ \
hfi1_cap_mask &= ~HFI1_CAP_##cap; \
hfi1_cap_mask; \
})
#define HFI1_CAP_USET(cap) \
({ \
hfi1_cap_mask |= (HFI1_CAP_##cap << HFI1_CAP_USER_SHIFT); \
hfi1_cap_mask; \
})
#define HFI1_CAP_UCLEAR(cap) \
({ \
hfi1_cap_mask &= ~(HFI1_CAP_##cap << HFI1_CAP_USER_SHIFT); \
hfi1_cap_mask; \
})
#define HFI1_CAP_SET(cap) \
({ \
hfi1_cap_mask |= (HFI1_CAP_##cap | (HFI1_CAP_##cap << \
HFI1_CAP_USER_SHIFT)); \
hfi1_cap_mask; \
})
#define HFI1_CAP_CLEAR(cap) \
({ \
hfi1_cap_mask &= ~(HFI1_CAP_##cap | \
(HFI1_CAP_##cap << HFI1_CAP_USER_SHIFT)); \
hfi1_cap_mask; \
})
#define HFI1_CAP_LOCK() \
({ hfi1_cap_mask |= HFI1_CAP_LOCKED_SMASK; hfi1_cap_mask; })
#define HFI1_CAP_LOCKED() (!!(hfi1_cap_mask & HFI1_CAP_LOCKED_SMASK))
/*
* The set of capability bits that can be changed after initial load
* This set is the same for kernel and user contexts. However, for
* user contexts, the set can be further filtered by using the
* HFI1_CAP_RESERVED_MASK bits.
*/
#define HFI1_CAP_WRITABLE_MASK (HFI1_CAP_SDMA_AHG | \
HFI1_CAP_HDRSUPP | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_NODROP_RHQ_FULL | \
HFI1_CAP_NODROP_EGR_FULL | \
HFI1_CAP_ALLOW_PERM_JKEY | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_PRINT_UNIMPL)
/*
* A set of capability bits that are "global" and are not allowed to be
* set in the user bitmask.
*/
#define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \
HFI1_CAP_USE_SDMA_HEAD | \
HFI1_CAP_EXTENDED_PSN | \
HFI1_CAP_PRINT_UNIMPL | \
HFI1_CAP_QSFP_ENABLED | \
HFI1_CAP_NO_INTEGRITY | \
HFI1_CAP_PKEY_CHECK) << \
HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
* order to be allowed for user contexts, as well.
*/
#define HFI1_CAP_MUST_HAVE_KERN (HFI1_CAP_STATIC_RATE_CTRL)
/* Default enabled capabilities (both kernel and user) */
#define HFI1_CAP_MASK_DEFAULT (HFI1_CAP_HDRSUPP | \
HFI1_CAP_NODROP_RHQ_FULL | \
HFI1_CAP_NODROP_EGR_FULL | \
HFI1_CAP_SDMA | \
HFI1_CAP_PRINT_UNIMPL | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_QSFP_ENABLED | \
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_EXTENDED_PSN | \
((HFI1_CAP_HDRSUPP | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_EARLY_CREDIT_RETURN) << \
HFI1_CAP_USER_SHIFT))
/*
* A bitmask of kernel/global capabilities that should be communicated
* to user level processes.
*/
#define HFI1_CAP_K2U (HFI1_CAP_SDMA | \
HFI1_CAP_EXTENDED_PSN | \
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_NO_INTEGRITY)
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR)
#ifndef HFI1_KERN_TYPE
#define HFI1_KERN_TYPE 0
#endif
/*
* Similarly, this is the kernel version going back to the user. It's
* slightly different, in that we want to tell if the driver was built as
* part of a Intel release, or from the driver from openfabrics.org,
* kernel.org, or a standard distribution, for support reasons.
* The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
*
* It's returned by the driver to the user code during initialization in the
* spi_sw_version field of hfi1_base_info, so the user code can in turn
* check for compatibility with the kernel.
*/
#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
/*
* Define the driver version number. This is something that refers only
* to the driver itself, not the software interfaces it supports.
*/
#ifndef HFI1_DRIVER_VERSION_BASE
#define HFI1_DRIVER_VERSION_BASE "0.9-248"
#endif
/* create the final driver version string */
#ifdef HFI1_IDSTR
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
#else
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
#endif
/*
* Diagnostics can send a packet by writing the following
* struct to the diag packet special file.
*
* This allows a custom PBC qword, so that special modes and deliberate
* changes to CRCs can be used.
*/
#define _DIAG_PKT_VERS 1
struct diag_pkt {
__u16 version; /* structure version */
__u16 unit; /* which device */
__u16 sw_index; /* send sw index to use */
__u16 len; /* data length, in bytes */
__u16 port; /* port number */
__u16 unused;
__u32 flags; /* call flags */
__u64 data; /* user data pointer */
__u64 pbc; /* PBC for the packet */
};
/* diag_pkt flags */
#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software.
*/
/*
* Receive Header Flags
*/
#define RHF_PKT_LEN_SHIFT 0
#define RHF_PKT_LEN_MASK 0xfffull
#define RHF_PKT_LEN_SMASK (RHF_PKT_LEN_MASK << RHF_PKT_LEN_SHIFT)
#define RHF_RCV_TYPE_SHIFT 12
#define RHF_RCV_TYPE_MASK 0x7ull
#define RHF_RCV_TYPE_SMASK (RHF_RCV_TYPE_MASK << RHF_RCV_TYPE_SHIFT)
#define RHF_USE_EGR_BFR_SHIFT 15
#define RHF_USE_EGR_BFR_MASK 0x1ull
#define RHF_USE_EGR_BFR_SMASK (RHF_USE_EGR_BFR_MASK << RHF_USE_EGR_BFR_SHIFT)
#define RHF_EGR_INDEX_SHIFT 16
#define RHF_EGR_INDEX_MASK 0x7ffull
#define RHF_EGR_INDEX_SMASK (RHF_EGR_INDEX_MASK << RHF_EGR_INDEX_SHIFT)
#define RHF_DC_INFO_SHIFT 27
#define RHF_DC_INFO_MASK 0x1ull
#define RHF_DC_INFO_SMASK (RHF_DC_INFO_MASK << RHF_DC_INFO_SHIFT)
#define RHF_RCV_SEQ_SHIFT 28
#define RHF_RCV_SEQ_MASK 0xfull
#define RHF_RCV_SEQ_SMASK (RHF_RCV_SEQ_MASK << RHF_RCV_SEQ_SHIFT)
#define RHF_EGR_OFFSET_SHIFT 32
#define RHF_EGR_OFFSET_MASK 0xfffull
#define RHF_EGR_OFFSET_SMASK (RHF_EGR_OFFSET_MASK << RHF_EGR_OFFSET_SHIFT)
#define RHF_HDRQ_OFFSET_SHIFT 44
#define RHF_HDRQ_OFFSET_MASK 0x1ffull
#define RHF_HDRQ_OFFSET_SMASK (RHF_HDRQ_OFFSET_MASK << RHF_HDRQ_OFFSET_SHIFT)
#define RHF_K_HDR_LEN_ERR (0x1ull << 53)
#define RHF_DC_UNC_ERR (0x1ull << 54)
#define RHF_DC_ERR (0x1ull << 55)
#define RHF_RCV_TYPE_ERR_SHIFT 56
#define RHF_RCV_TYPE_ERR_MASK 0x7ul
#define RHF_RCV_TYPE_ERR_SMASK (RHF_RCV_TYPE_ERR_MASK << RHF_RCV_TYPE_ERR_SHIFT)
#define RHF_TID_ERR (0x1ull << 59)
#define RHF_LEN_ERR (0x1ull << 60)
#define RHF_ECC_ERR (0x1ull << 61)
#define RHF_VCRC_ERR (0x1ull << 62)
#define RHF_ICRC_ERR (0x1ull << 63)
#define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */
/* RHF receive types */
#define RHF_RCV_TYPE_EXPECTED 0
#define RHF_RCV_TYPE_EAGER 1
#define RHF_RCV_TYPE_IB 2 /* normal IB, IB Raw, or IPv6 */
#define RHF_RCV_TYPE_ERROR 3
#define RHF_RCV_TYPE_BYPASS 4
#define RHF_RCV_TYPE_INVALID5 5
#define RHF_RCV_TYPE_INVALID6 6
#define RHF_RCV_TYPE_INVALID7 7
/* RHF receive type error - expected packet errors */
#define RHF_RTE_EXPECTED_FLOW_SEQ_ERR 0x2
#define RHF_RTE_EXPECTED_FLOW_GEN_ERR 0x4
/* RHF receive type error - eager packet errors */
#define RHF_RTE_EAGER_NO_ERR 0x0
/* RHF receive type error - IB packet errors */
#define RHF_RTE_IB_NO_ERR 0x0
/* RHF receive type error - error packet errors */
#define RHF_RTE_ERROR_NO_ERR 0x0
#define RHF_RTE_ERROR_OP_CODE_ERR 0x1
#define RHF_RTE_ERROR_KHDR_MIN_LEN_ERR 0x2
#define RHF_RTE_ERROR_KHDR_HCRC_ERR 0x3
#define RHF_RTE_ERROR_KHDR_KVER_ERR 0x4
#define RHF_RTE_ERROR_CONTEXT_ERR 0x5
#define RHF_RTE_ERROR_KHDR_TID_ERR 0x6
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
/*
* This structure contains the first field common to all protocols
* that employ this chip.
*/
struct hfi1_message_header {
__be16 lrh[4];
};
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
/* misc. */
#define SIZE_OF_CRC 1
#define LIM_MGMT_P_KEY 0x7FFF
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
#define HFI1_PERMISSIVE_LID 0xFFFF
#define HFI1_AETH_CREDIT_SHIFT 24
#define HFI1_AETH_CREDIT_MASK 0x1F
#define HFI1_AETH_CREDIT_INVAL 0x1F
#define HFI1_MSN_MASK 0xFFFFFF
#define HFI1_QPN_MASK 0xFFFFFF
#define HFI1_FECN_SHIFT 31
#define HFI1_FECN_MASK 1
#define HFI1_FECN_SMASK (1 << HFI1_FECN_SHIFT)
#define HFI1_BECN_SHIFT 30
#define HFI1_BECN_MASK 1
#define HFI1_BECN_SMASK (1 << HFI1_BECN_SHIFT)
#define HFI1_MULTICAST_LID_BASE 0xC000
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));
}
static inline u64 rhf_err_flags(u64 rhf)
{
return rhf & RHF_ERROR_SMASK;
}
static inline u32 rhf_rcv_type(u64 rhf)
{
return (rhf >> RHF_RCV_TYPE_SHIFT) & RHF_RCV_TYPE_MASK;
}
static inline u32 rhf_rcv_type_err(u64 rhf)
{
return (rhf >> RHF_RCV_TYPE_ERR_SHIFT) & RHF_RCV_TYPE_ERR_MASK;
}
/* return size is in bytes, not DWORDs */
static inline u32 rhf_pkt_len(u64 rhf)
{
return ((rhf & RHF_PKT_LEN_SMASK) >> RHF_PKT_LEN_SHIFT) << 2;
}
static inline u32 rhf_egr_index(u64 rhf)
{
return (rhf >> RHF_EGR_INDEX_SHIFT) & RHF_EGR_INDEX_MASK;
}
static inline u32 rhf_rcv_seq(u64 rhf)
{
return (rhf >> RHF_RCV_SEQ_SHIFT) & RHF_RCV_SEQ_MASK;
}
/* returned offset is in DWORDS */
static inline u32 rhf_hdrq_offset(u64 rhf)
{
return (rhf >> RHF_HDRQ_OFFSET_SHIFT) & RHF_HDRQ_OFFSET_MASK;
}
static inline u64 rhf_use_egr_bfr(u64 rhf)
{
return rhf & RHF_USE_EGR_BFR_SMASK;
}
static inline u64 rhf_dc_info(u64 rhf)
{
return rhf & RHF_DC_INFO_SMASK;
}
static inline u32 rhf_egr_buf_offset(u64 rhf)
{
return (rhf >> RHF_EGR_OFFSET_SHIFT) & RHF_EGR_OFFSET_MASK;
}
#endif /* _COMMON_H */

View File

@ -0,0 +1,558 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/kthread.h>
#include "verbs.h"
#include "hfi.h"
/**
* hfi1_cq_enter - add a new entry to the completion queue
* @cq: completion queue
* @entry: work completion entry to add
* @sig: true if @entry is a solicited entry
*
* This may be called with qp->s_lock held.
*/
void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
{
struct hfi1_cq_wc *wc;
unsigned long flags;
u32 head;
u32 next;
spin_lock_irqsave(&cq->lock, flags);
/*
* Note that the head pointer might be writable by user processes.
* Take care to verify it is a sane value.
*/
wc = cq->queue;
head = wc->head;
if (head >= (unsigned) cq->ibcq.cqe) {
head = cq->ibcq.cqe;
next = 0;
} else
next = head + 1;
if (unlikely(next == wc->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
ev.device = cq->ibcq.device;
ev.element.cq = &cq->ibcq;
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
return;
}
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
wc->uqueue[head].opcode = entry->opcode;
wc->uqueue[head].vendor_err = entry->vendor_err;
wc->uqueue[head].byte_len = entry->byte_len;
wc->uqueue[head].ex.imm_data =
(__u32 __force)entry->ex.imm_data;
wc->uqueue[head].qp_num = entry->qp->qp_num;
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
wc->uqueue[head].pkey_index = entry->pkey_index;
wc->uqueue[head].slid = entry->slid;
wc->uqueue[head].sl = entry->sl;
wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
wc->uqueue[head].port_num = entry->port_num;
/* Make sure entry is written before the head index. */
smp_wmb();
} else
wc->kqueue[head] = *entry;
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
smp_read_barrier_depends(); /* see hfi1_cq_exit */
worker = cq->dd->worker;
if (likely(worker)) {
cq->notify = IB_CQ_NONE;
cq->triggered++;
queue_kthread_work(worker, &cq->comptask);
}
}
spin_unlock_irqrestore(&cq->lock, flags);
}
/**
* hfi1_poll_cq - poll for work completion entries
* @ibcq: the completion queue to poll
* @num_entries: the maximum number of entries to return
* @entry: pointer to array where work completions are placed
*
* Returns the number of completion entries polled.
*
* This may be called from interrupt context. Also called by ib_poll_cq()
* in the generic verbs code.
*/
int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct hfi1_cq *cq = to_icq(ibcq);
struct hfi1_cq_wc *wc;
unsigned long flags;
int npolled;
u32 tail;
/* The kernel can only poll a kernel completion queue */
if (cq->ip) {
npolled = -EINVAL;
goto bail;
}
spin_lock_irqsave(&cq->lock, flags);
wc = cq->queue;
tail = wc->tail;
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
else
tail++;
}
wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
bail:
return npolled;
}
static void send_complete(struct kthread_work *work)
{
struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask);
/*
* The completion handler will most likely rearm the notification
* and poll for all pending entries. If a new completion entry
* is added while we are in this routine, queue_work()
* won't call us again until we return so we check triggered to
* see if we need to call the handler again.
*/
for (;;) {
u8 triggered = cq->triggered;
/*
* IPoIB connected mode assumes the callback is from a
* soft IRQ. We simulate this by blocking "bottom halves".
* See the implementation for ipoib_cm_handle_tx_wc(),
* netif_tx_lock_bh() and netif_tx_lock().
*/
local_bh_disable();
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
local_bh_enable();
if (cq->triggered == triggered)
return;
}
}
/**
* hfi1_create_cq - create a completion queue
* @ibdev: the device this completion queue is attached to
* @attr: creation attributes
* @context: unused by the driver
* @udata: user data for libibverbs.so
*
* Returns a pointer to the completion queue or negative errno values
* for failure.
*
* Called by ib_create_cq() in the generic verbs code.
*/
struct ib_cq *hfi1_create_cq(
struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct hfi1_ibdev *dev = to_idev(ibdev);
struct hfi1_cq *cq;
struct hfi1_cq_wc *wc;
struct ib_cq *ret;
u32 sz;
unsigned int entries = attr->cqe;
if (attr->flags)
return ERR_PTR(-EINVAL);
if (entries < 1 || entries > hfi1_max_cqes)
return ERR_PTR(-EINVAL);
/* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
/*
* Allocate the completion queue entries and head/tail pointers.
* This is allocated separately so that it can be resized and
* also mapped into user space.
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
sz = sizeof(*wc);
if (udata && udata->outlen >= sizeof(__u64))
sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
else
sz += sizeof(struct ib_wc) * (entries + 1);
wc = vmalloc_user(sz);
if (!wc) {
ret = ERR_PTR(-ENOMEM);
goto bail_cq;
}
/*
* Return the address of the WC as the offset to mmap.
* See hfi1_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
cq->ip = hfi1_create_mmap_info(dev, sz, context, wc);
if (!cq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wc;
}
err = ib_copy_to_udata(udata, &cq->ip->offset,
sizeof(cq->ip->offset));
if (err) {
ret = ERR_PTR(err);
goto bail_ip;
}
} else
cq->ip = NULL;
spin_lock(&dev->n_cqs_lock);
if (dev->n_cqs_allocated == hfi1_max_cqs) {
spin_unlock(&dev->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
dev->n_cqs_allocated++;
spin_unlock(&dev->n_cqs_lock);
if (cq->ip) {
spin_lock_irq(&dev->pending_lock);
list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
* an error.
*/
cq->dd = dd_from_dev(dev);
cq->ibcq.cqe = entries;
cq->notify = IB_CQ_NONE;
cq->triggered = 0;
spin_lock_init(&cq->lock);
init_kthread_work(&cq->comptask, send_complete);
wc->head = 0;
wc->tail = 0;
cq->queue = wc;
ret = &cq->ibcq;
goto done;
bail_ip:
kfree(cq->ip);
bail_wc:
vfree(wc);
bail_cq:
kfree(cq);
done:
return ret;
}
/**
* hfi1_destroy_cq - destroy a completion queue
* @ibcq: the completion queue to destroy.
*
* Returns 0 for success.
*
* Called by ib_destroy_cq() in the generic verbs code.
*/
int hfi1_destroy_cq(struct ib_cq *ibcq)
{
struct hfi1_ibdev *dev = to_idev(ibcq->device);
struct hfi1_cq *cq = to_icq(ibcq);
flush_kthread_work(&cq->comptask);
spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
spin_unlock(&dev->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, hfi1_release_mmap_info);
else
vfree(cq->queue);
kfree(cq);
return 0;
}
/**
* hfi1_req_notify_cq - change the notification type for a completion queue
* @ibcq: the completion queue
* @notify_flags: the type of notification to request
*
* Returns 0 for success.
*
* This may be called from interrupt context. Also called by
* ib_req_notify_cq() in the generic verbs code.
*/
int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
struct hfi1_cq *cq = to_icq(ibcq);
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&cq->lock, flags);
/*
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
cq->queue->head != cq->queue->tail)
ret = 1;
spin_unlock_irqrestore(&cq->lock, flags);
return ret;
}
/**
* hfi1_resize_cq - change the size of the CQ
* @ibcq: the completion queue
*
* Returns 0 for success.
*/
int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct hfi1_cq *cq = to_icq(ibcq);
struct hfi1_cq_wc *old_wc;
struct hfi1_cq_wc *wc;
u32 head, tail, n;
int ret;
u32 sz;
if (cqe < 1 || cqe > hfi1_max_cqes) {
ret = -EINVAL;
goto bail;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
sz = sizeof(*wc);
if (udata && udata->outlen >= sizeof(__u64))
sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
else
sz += sizeof(struct ib_wc) * (cqe + 1);
wc = vmalloc_user(sz);
if (!wc) {
ret = -ENOMEM;
goto bail;
}
/* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
__u64 offset = 0;
ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
if (ret)
goto bail_free;
}
spin_lock_irq(&cq->lock);
/*
* Make sure head and tail are sane since they
* might be user writable.
*/
old_wc = cq->queue;
head = old_wc->head;
if (head > (u32) cq->ibcq.cqe)
head = (u32) cq->ibcq.cqe;
tail = old_wc->tail;
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
if (head < tail)
n = cq->ibcq.cqe + 1 + head - tail;
else
n = head - tail;
if (unlikely((u32)cqe < n)) {
ret = -EINVAL;
goto bail_unlock;
}
for (n = 0; tail != head; n++) {
if (cq->ip)
wc->uqueue[n] = old_wc->uqueue[tail];
else
wc->kqueue[n] = old_wc->kqueue[tail];
if (tail == (u32) cq->ibcq.cqe)
tail = 0;
else
tail++;
}
cq->ibcq.cqe = cqe;
wc->head = n;
wc->tail = 0;
cq->queue = wc;
spin_unlock_irq(&cq->lock);
vfree(old_wc);
if (cq->ip) {
struct hfi1_ibdev *dev = to_idev(ibcq->device);
struct hfi1_mmap_info *ip = cq->ip;
hfi1_update_mmap_info(dev, ip, sz, wc);
/*
* Return the offset to mmap.
* See hfi1_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
ret = ib_copy_to_udata(udata, &ip->offset,
sizeof(ip->offset));
if (ret)
goto bail;
}
spin_lock_irq(&dev->pending_lock);
if (list_empty(&ip->pending_mmaps))
list_add(&ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
ret = 0;
goto bail;
bail_unlock:
spin_unlock_irq(&cq->lock);
bail_free:
vfree(wc);
bail:
return ret;
}
int hfi1_cq_init(struct hfi1_devdata *dd)
{
int ret = 0;
int cpu;
struct task_struct *task;
if (dd->worker)
return 0;
dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
if (!dd->worker)
return -ENOMEM;
init_kthread_worker(dd->worker);
task = kthread_create_on_node(
kthread_worker_fn,
dd->worker,
dd->assigned_node_id,
"hfi1_cq%d", dd->unit);
if (IS_ERR(task))
goto task_fail;
cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
kthread_bind(task, cpu);
wake_up_process(task);
out:
return ret;
task_fail:
ret = PTR_ERR(task);
kfree(dd->worker);
dd->worker = NULL;
goto out;
}
void hfi1_cq_exit(struct hfi1_devdata *dd)
{
struct kthread_worker *worker;
worker = dd->worker;
if (!worker)
return;
/* blocks future queuing from send_complete() */
dd->worker = NULL;
smp_wmb(); /* See hfi1_cq_enter */
flush_kthread_worker(worker);
kthread_stop(worker->task);
kfree(worker);
}

View File

@ -0,0 +1,899 @@
#ifdef CONFIG_DEBUG_FS
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include "hfi.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
#include "sdma.h"
static struct dentry *hfi1_dbg_root;
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
#define DEBUGFS_SEQ_FILE_OPS(name) \
static const struct seq_operations _##name##_seq_ops = { \
.start = _##name##_seq_start, \
.next = _##name##_seq_next, \
.stop = _##name##_seq_stop, \
.show = _##name##_seq_show \
}
#define DEBUGFS_SEQ_FILE_OPEN(name) \
static int _##name##_open(struct inode *inode, struct file *s) \
{ \
struct seq_file *seq; \
int ret; \
ret = seq_open(s, &_##name##_seq_ops); \
if (ret) \
return ret; \
seq = s->private_data; \
seq->private = inode->i_private; \
return 0; \
}
#define DEBUGFS_FILE_OPS(name) \
static const struct file_operations _##name##_file_ops = { \
.owner = THIS_MODULE, \
.open = _##name##_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = seq_release \
}
#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
do { \
struct dentry *ent; \
ent = debugfs_create_file(name, mode, parent, \
data, ops); \
if (!ent) \
pr_warn("create of %s failed\n", name); \
} while (0)
#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
__acquires(RCU)
{
struct hfi1_opcode_stats_perctx *opstats;
rcu_read_lock();
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
++*pos;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
__releases(RCU)
{
rcu_read_unlock();
}
static int _opcode_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
loff_t i = *spos, j;
u64 n_packets = 0, n_bytes = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
for (j = 0; j < dd->first_user_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
}
if (!n_packets && !n_bytes)
return SEQ_SKIP;
seq_printf(s, "%02llx %llu/%llu\n", i,
(unsigned long long) n_packets,
(unsigned long long) n_bytes);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(opcode_stats);
DEBUGFS_SEQ_FILE_OPEN(opcode_stats)
DEBUGFS_FILE_OPS(opcode_stats);
static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
if (!*pos)
return SEQ_START_TOKEN;
if (*pos >= dd->first_user_ctxt)
return NULL;
return pos;
}
static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
if (v == SEQ_START_TOKEN)
return pos;
++*pos;
if (*pos >= dd->first_user_ctxt)
return NULL;
return pos;
}
static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
{
/* nothing allocated */
}
static int _ctx_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos;
loff_t i, j;
u64 n_packets = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
if (v == SEQ_START_TOKEN) {
seq_puts(s, "Ctx:npkts\n");
return 0;
}
spos = v;
i = *spos;
if (!dd->rcd[i])
return SEQ_SKIP;
for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
if (!n_packets)
return SEQ_SKIP;
seq_printf(s, " %llu:%llu\n", i, n_packets);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(ctx_stats);
DEBUGFS_SEQ_FILE_OPEN(ctx_stats)
DEBUGFS_FILE_OPS(ctx_stats);
static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
__acquires(RCU)
{
struct qp_iter *iter;
loff_t n = *pos;
rcu_read_lock();
iter = qp_iter_init(s->private);
if (!iter)
return NULL;
while (n--) {
if (qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
}
return iter;
}
static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
loff_t *pos)
{
struct qp_iter *iter = iter_ptr;
(*pos)++;
if (qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
return iter;
}
static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
__releases(RCU)
{
rcu_read_unlock();
}
static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
{
struct qp_iter *iter = iter_ptr;
if (!iter)
return 0;
qp_iter_print(s, iter);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(qp_stats);
DEBUGFS_SEQ_FILE_OPEN(qp_stats)
DEBUGFS_FILE_OPS(qp_stats);
static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
__acquires(RCU)
{
struct hfi1_ibdev *ibd;
struct hfi1_devdata *dd;
rcu_read_lock();
ibd = (struct hfi1_ibdev *)s->private;
dd = dd_from_dev(ibd);
if (!dd->per_sdma || *pos >= dd->num_sdma)
return NULL;
return pos;
}
static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
++*pos;
if (!dd->per_sdma || *pos >= dd->num_sdma)
return NULL;
return pos;
}
static void _sdes_seq_stop(struct seq_file *s, void *v)
__releases(RCU)
{
rcu_read_unlock();
}
static int _sdes_seq_show(struct seq_file *s, void *v)
{
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
loff_t *spos = v;
loff_t i = *spos;
sdma_seqfile_dump_sde(s, &dd->per_sdma[i]);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(sdes);
DEBUGFS_SEQ_FILE_OPEN(sdes)
DEBUGFS_FILE_OPS(sdes);
/* read the per-device counters */
static ssize_t dev_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u64 *counters;
size_t avail;
struct hfi1_devdata *dd;
ssize_t rval;
rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, *ppos, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
rcu_read_unlock();
return rval;
}
/* read the per-device counters */
static ssize_t dev_names_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
char *names;
size_t avail;
struct hfi1_devdata *dd;
ssize_t rval;
rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, *ppos, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
rcu_read_unlock();
return rval;
}
struct counter_info {
char *name;
const struct file_operations ops;
};
/*
* Could use file_inode(file)->i_ino to figure out which file,
* instead of separate routine for each, but for now, this works...
*/
/* read the per-port names (same for each port) */
static ssize_t portnames_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
char *names;
size_t avail;
struct hfi1_devdata *dd;
ssize_t rval;
rcu_read_lock();
dd = private2dd(file);
/* port number n/a here since names are constant */
avail = hfi1_read_portcntrs(dd, *ppos, 0, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
rcu_read_unlock();
return rval;
}
/* read the per-port counters */
static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u64 *counters;
size_t avail;
struct hfi1_devdata *dd;
struct hfi1_pportdata *ppd;
ssize_t rval;
rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
avail = hfi1_read_portcntrs(dd, *ppos, ppd->port - 1, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
rcu_read_unlock();
return rval;
}
/*
* read the per-port QSFP data for ppd
*/
static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct hfi1_pportdata *ppd;
char *tmp;
int ret;
rcu_read_lock();
ppd = private2ppd(file);
tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!tmp) {
rcu_read_unlock();
return -ENOMEM;
}
ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
if (ret > 0)
ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
rcu_read_unlock();
kfree(tmp);
return ret;
}
/* Do an i2c write operation on the chain for the given HFI. */
static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
int ret;
int i2c_addr;
int offset;
int total_written;
rcu_read_lock();
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
if (!buff) {
ret = -ENOMEM;
goto _return;
}
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
i2c_addr = (*ppos >> 16) & 0xff;
offset = *ppos & 0xffff;
total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
if (total_written < 0) {
ret = total_written;
goto _free;
}
*ppos += total_written;
ret = total_written;
_free:
kfree(buff);
_return:
rcu_read_unlock();
return ret;
}
/* Do an i2c write operation on chain for HFI 0. */
static ssize_t i2c1_debugfs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return __i2c_debugfs_write(file, buf, count, ppos, 0);
}
/* Do an i2c write operation on chain for HFI 1. */
static ssize_t i2c2_debugfs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return __i2c_debugfs_write(file, buf, count, ppos, 1);
}
/* Do an i2c read operation on the chain for the given HFI. */
static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
int ret;
int i2c_addr;
int offset;
int total_read;
rcu_read_lock();
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
if (!buff) {
ret = -ENOMEM;
goto _return;
}
i2c_addr = (*ppos >> 16) & 0xff;
offset = *ppos & 0xffff;
total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
if (total_read < 0) {
ret = total_read;
goto _free;
}
*ppos += total_read;
ret = copy_to_user(buf, buff, total_read);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
ret = total_read;
_free:
kfree(buff);
_return:
rcu_read_unlock();
return ret;
}
/* Do an i2c read operation on chain for HFI 0. */
static ssize_t i2c1_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
return __i2c_debugfs_read(file, buf, count, ppos, 0);
}
/* Do an i2c read operation on chain for HFI 1. */
static ssize_t i2c2_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
return __i2c_debugfs_read(file, buf, count, ppos, 1);
}
/* Do a QSFP write operation on the i2c chain for the given HFI. */
static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
int ret;
int total_written;
rcu_read_lock();
if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
ret = -EINVAL;
goto _return;
}
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
if (!buff) {
ret = -ENOMEM;
goto _return;
}
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
goto _free;
}
*ppos += total_written;
ret = total_written;
_free:
kfree(buff);
_return:
rcu_read_unlock();
return ret;
}
/* Do a QSFP write operation on i2c chain for HFI 0. */
static ssize_t qsfp1_debugfs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return __qsfp_debugfs_write(file, buf, count, ppos, 0);
}
/* Do a QSFP write operation on i2c chain for HFI 1. */
static ssize_t qsfp2_debugfs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return __qsfp_debugfs_write(file, buf, count, ppos, 1);
}
/* Do a QSFP read operation on the i2c chain for the given HFI. */
static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos, u32 target)
{
struct hfi1_pportdata *ppd;
char *buff;
int ret;
int total_read;
rcu_read_lock();
if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
ret = -EINVAL;
goto _return;
}
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
if (!buff) {
ret = -ENOMEM;
goto _return;
}
total_read = qsfp_read(ppd, target, *ppos, buff, count);
if (total_read < 0) {
ret = total_read;
goto _free;
}
*ppos += total_read;
ret = copy_to_user(buf, buff, total_read);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
ret = total_read;
_free:
kfree(buff);
_return:
rcu_read_unlock();
return ret;
}
/* Do a QSFP read operation on i2c chain for HFI 0. */
static ssize_t qsfp1_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
return __qsfp_debugfs_read(file, buf, count, ppos, 0);
}
/* Do a QSFP read operation on i2c chain for HFI 1. */
static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
return __qsfp_debugfs_read(file, buf, count, ppos, 1);
}
#define DEBUGFS_OPS(nm, readroutine, writeroutine) \
{ \
.name = nm, \
.ops = { \
.read = readroutine, \
.write = writeroutine, \
.llseek = generic_file_llseek, \
}, \
}
static const struct counter_info cntr_ops[] = {
DEBUGFS_OPS("counter_names", dev_names_read, NULL),
DEBUGFS_OPS("counters", dev_counters_read, NULL),
DEBUGFS_OPS("portcounter_names", portnames_read, NULL),
};
static const struct counter_info port_cntr_ops[] = {
DEBUGFS_OPS("port%dcounters", portcntrs_debugfs_read, NULL),
DEBUGFS_OPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write),
DEBUGFS_OPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write),
DEBUGFS_OPS("qsfp_dump%d", qsfp_debugfs_dump, NULL),
DEBUGFS_OPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write),
DEBUGFS_OPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write),
};
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
char link[10];
struct hfi1_devdata *dd = dd_from_dev(ibd);
struct hfi1_pportdata *ppd;
int unit = dd->unit;
int i, j;
if (!hfi1_dbg_root)
return;
snprintf(name, sizeof(name), "%s_%d", class_name(), unit);
snprintf(link, sizeof(link), "%d", unit);
ibd->hfi1_ibdev_dbg = debugfs_create_dir(name, hfi1_dbg_root);
if (!ibd->hfi1_ibdev_dbg) {
pr_warn("create of %s failed\n", name);
return;
}
ibd->hfi1_ibdev_link =
debugfs_create_symlink(link, hfi1_dbg_root, name);
if (!ibd->hfi1_ibdev_link) {
pr_warn("create of %s symlink failed\n", name);
return;
}
DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
ibd->hfi1_ibdev_dbg,
dd,
&cntr_ops[i].ops, S_IRUGO);
/* per port files */
for (ppd = dd->pport, j = 0; j < dd->num_pports; j++, ppd++)
for (i = 0; i < ARRAY_SIZE(port_cntr_ops); i++) {
snprintf(name,
sizeof(name),
port_cntr_ops[i].name,
j + 1);
DEBUGFS_FILE_CREATE(name,
ibd->hfi1_ibdev_dbg,
ppd,
&port_cntr_ops[i].ops,
port_cntr_ops[i].ops.write == NULL ?
S_IRUGO : S_IRUGO|S_IWUSR);
}
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
ibd->hfi1_ibdev_dbg = NULL;
synchronize_rcu();
}
/*
* driver stats field names, one line per stat, single string. Used by
* programs like hfistats to print the stats in a way which works for
* different versions of drivers, without changing program source.
* if hfi1_ib_stats changes, this needs to change. Names need to be
* 12 chars or less (w/o newline), for proper display by hfistats utility.
*/
static const char * const hfi1_statnames[] = {
/* must be element 0*/
"KernIntr",
"ErrorIntr",
"Tx_Errs",
"Rcv_Errs",
"H/W_Errs",
"NoPIOBufs",
"CtxtsOpen",
"RcvLen_Errs",
"EgrBufFull",
"EgrHdrFull"
};
static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
}
static void *_driver_stats_names_seq_next(
struct seq_file *s,
void *v,
loff_t *pos)
{
++*pos;
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
}
static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
__releases(RCU)
{
rcu_read_unlock();
}
static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
seq_printf(s, "%s\n", hfi1_statnames[*spos]);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(driver_stats_names);
DEBUGFS_SEQ_FILE_OPEN(driver_stats_names)
DEBUGFS_FILE_OPS(driver_stats_names);
static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
}
static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
++*pos;
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
}
static void _driver_stats_seq_stop(struct seq_file *s, void *v)
__releases(RCU)
{
rcu_read_unlock();
}
static u64 hfi1_sps_ints(void)
{
unsigned long flags;
struct hfi1_devdata *dd;
u64 sps_ints = 0;
spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) {
sps_ints += get_all_cpu_total(dd->int_counter);
}
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
return sps_ints;
}
static int _driver_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
char *buffer;
u64 *stats = (u64 *)&hfi1_stats;
size_t sz = seq_get_buf(s, &buffer);
if (sz < sizeof(u64))
return SEQ_SKIP;
/* special case for interrupts */
if (*spos == 0)
*(u64 *)buffer = hfi1_sps_ints();
else
*(u64 *)buffer = stats[*spos];
seq_commit(s, sizeof(u64));
return 0;
}
DEBUGFS_SEQ_FILE_OPS(driver_stats);
DEBUGFS_SEQ_FILE_OPEN(driver_stats)
DEBUGFS_FILE_OPS(driver_stats);
void hfi1_dbg_init(void)
{
hfi1_dbg_root = debugfs_create_dir(DRIVER_NAME, NULL);
if (!hfi1_dbg_root)
pr_warn("init of debugfs failed\n");
DEBUGFS_SEQ_FILE_CREATE(driver_stats_names, hfi1_dbg_root, NULL);
DEBUGFS_SEQ_FILE_CREATE(driver_stats, hfi1_dbg_root, NULL);
}
void hfi1_dbg_exit(void)
{
debugfs_remove_recursive(hfi1_dbg_root);
hfi1_dbg_root = NULL;
}
#endif

View File

@ -0,0 +1,78 @@
#ifndef _HFI1_DEBUGFS_H
#define _HFI1_DEBUGFS_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
struct hfi1_ibdev;
#ifdef CONFIG_DEBUG_FS
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
}
void hfi1_dbg_init(void)
{
}
void hfi1_dbg_exit(void)
{
}
#endif
#endif /* _HFI1_DEBUGFS_H */

View File

@ -0,0 +1,142 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/cdev.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include "hfi.h"
#include "device.h"
static struct class *class;
static dev_t hfi1_dev;
int hfi1_cdev_init(int minor, const char *name,
const struct file_operations *fops,
struct cdev *cdev, struct device **devp)
{
const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor);
struct device *device = NULL;
int ret;
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
kobject_set_name(&cdev->kobj, name);
ret = cdev_add(cdev, dev, 1);
if (ret < 0) {
pr_err("Could not add cdev for minor %d, %s (err %d)\n",
minor, name, -ret);
goto done;
}
device = device_create(class, NULL, dev, NULL, "%s", name);
if (!IS_ERR(device))
goto done;
ret = PTR_ERR(device);
device = NULL;
pr_err("Could not create device for minor %d, %s (err %d)\n",
minor, name, -ret);
cdev_del(cdev);
done:
*devp = device;
return ret;
}
void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp)
{
struct device *device = *devp;
if (device) {
device_unregister(device);
*devp = NULL;
cdev_del(cdev);
}
}
static const char *hfi1_class_name = "hfi1";
const char *class_name(void)
{
return hfi1_class_name;
}
int __init dev_init(void)
{
int ret;
ret = alloc_chrdev_region(&hfi1_dev, 0, HFI1_NMINORS, DRIVER_NAME);
if (ret < 0) {
pr_err("Could not allocate chrdev region (err %d)\n", -ret);
goto done;
}
class = class_create(THIS_MODULE, class_name());
if (IS_ERR(class)) {
ret = PTR_ERR(class);
pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
}
done:
return ret;
}
void dev_cleanup(void)
{
if (class) {
class_destroy(class);
class = NULL;
}
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
}

View File

@ -0,0 +1,61 @@
#ifndef _HFI1_DEVICE_H
#define _HFI1_DEVICE_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
int hfi1_cdev_init(int minor, const char *name,
const struct file_operations *fops,
struct cdev *cdev, struct device **devp);
void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp);
const char *class_name(void);
int __init dev_init(void);
void dev_cleanup(void);
#endif /* _HFI1_DEVICE_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,186 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/types.h>
#include <linux/scatterlist.h>
#include "verbs.h"
#define BAD_DMA_ADDRESS ((u64) 0)
/*
* The following functions implement driver specific replacements
* for the ib_dma_*() functions.
*
* These functions return kernel virtual addresses instead of
* device bus addresses since the driver uses the CPU to copy
* data instead of using hardware DMA.
*/
static int hfi1_mapping_error(struct ib_device *dev, u64 dma_addr)
{
return dma_addr == BAD_DMA_ADDRESS;
}
static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
size_t size, enum dma_data_direction direction)
{
if (WARN_ON(!valid_dma_direction(direction)))
return BAD_DMA_ADDRESS;
return (u64) cpu_addr;
}
static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
enum dma_data_direction direction)
{
/* This is a stub, nothing to be done here */
}
static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction)
{
u64 addr;
if (WARN_ON(!valid_dma_direction(direction)))
return BAD_DMA_ADDRESS;
if (offset + size > PAGE_SIZE)
return BAD_DMA_ADDRESS;
addr = (u64) page_address(page);
if (addr)
addr += offset;
return addr;
}
static void hfi1_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
enum dma_data_direction direction)
{
/* This is a stub, nothing to be done here */
}
static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction)
{
struct scatterlist *sg;
u64 addr;
int i;
int ret = nents;
if (WARN_ON(!valid_dma_direction(direction)))
return BAD_DMA_ADDRESS;
for_each_sg(sgl, sg, nents, i) {
addr = (u64) page_address(sg_page(sg));
if (!addr) {
ret = 0;
break;
}
sg->dma_address = addr + sg->offset;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length = sg->length;
#endif
}
return ret;
}
static void hfi1_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
/* This is a stub, nothing to be done here */
}
static void hfi1_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size, enum dma_data_direction dir)
{
}
static void hfi1_sync_single_for_device(struct ib_device *dev, u64 addr,
size_t size,
enum dma_data_direction dir)
{
}
static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
u64 *dma_handle, gfp_t flag)
{
struct page *p;
void *addr = NULL;
p = alloc_pages(flag, get_order(size));
if (p)
addr = page_address(p);
if (dma_handle)
*dma_handle = (u64) addr;
return addr;
}
static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
void *cpu_addr, u64 dma_handle)
{
free_pages((unsigned long) cpu_addr, get_order(size));
}
struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
.mapping_error = hfi1_mapping_error,
.map_single = hfi1_dma_map_single,
.unmap_single = hfi1_dma_unmap_single,
.map_page = hfi1_dma_map_page,
.unmap_page = hfi1_dma_unmap_page,
.map_sg = hfi1_map_sg,
.unmap_sg = hfi1_unmap_sg,
.sync_single_for_cpu = hfi1_sync_single_for_cpu,
.sync_single_for_device = hfi1_sync_single_for_device,
.alloc_coherent = hfi1_dma_alloc_coherent,
.free_coherent = hfi1_dma_free_coherent
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,475 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/delay.h>
#include "hfi.h"
#include "common.h"
#include "eprom.h"
/*
* The EPROM is logically divided into two partitions:
* partition 0: the first 128K, visible from PCI ROM BAR
* partition 1: the rest
*/
#define P0_SIZE (128 * 1024)
#define P1_START P0_SIZE
/* largest erase size supported by the controller */
#define SIZE_32KB (32 * 1024)
#define MASK_32KB (SIZE_32KB - 1)
/* controller page size, in bytes */
#define EP_PAGE_SIZE 256
#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1)
/* controller commands */
#define CMD_SHIFT 24
#define CMD_NOP (0)
#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr)
#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
#define CMD_READ_SR1 ((0x05 << CMD_SHIFT))
#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT))
#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr)
#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT))
#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT))
#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
/* controller interface speeds */
#define EP_SPEED_FULL 0x2 /* full speed */
/* controller status register 1 bits */
#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */
/* sleep length while waiting for controller */
#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */
#define COUNT_DELAY_SEC(n) ((n) * (1000000/WAIT_SLEEP_US))
/* GPIO pins */
#define EPROM_WP_N (1ull << 14) /* EPROM write line */
/*
* Use the EP mutex to guard against other callers from within the driver.
* Also covers usage of eprom_available.
*/
static DEFINE_MUTEX(eprom_mutex);
static int eprom_available; /* default: not available */
/*
* Turn on external enable line that allows writing on the flash.
*/
static void write_enable(struct hfi1_devdata *dd)
{
/* raise signal */
write_csr(dd, ASIC_GPIO_OUT,
read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
/* raise enable */
write_csr(dd, ASIC_GPIO_OE,
read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
}
/*
* Turn off external enable line that allows writing on the flash.
*/
static void write_disable(struct hfi1_devdata *dd)
{
/* lower signal */
write_csr(dd, ASIC_GPIO_OUT,
read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
/* lower enable */
write_csr(dd, ASIC_GPIO_OE,
read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
}
/*
* Wait for the device to become not busy. Must be called after all
* write or erase operations.
*/
static int wait_for_not_busy(struct hfi1_devdata *dd)
{
unsigned long count = 0;
u64 reg;
int ret = 0;
/* starts page mode */
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1);
while (1) {
udelay(WAIT_SLEEP_US);
usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5);
count++;
reg = read_csr(dd, ASIC_EEP_DATA);
if ((reg & SR1_BUSY) == 0)
break;
/* 200s is the largest time for a 128Mb device */
if (count > COUNT_DELAY_SEC(200)) {
dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n");
ret = -ETIMEDOUT;
break; /* break, not goto - must stop page mode */
}
}
/* stop page mode with a NOP */
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP);
return ret;
}
/*
* Read the device ID from the SPI controller.
*/
static u32 read_device_id(struct hfi1_devdata *dd)
{
/* read the Manufacture Device ID */
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID);
return (u32)read_csr(dd, ASIC_EEP_DATA);
}
/*
* Erase the whole flash.
*/
static int erase_chip(struct hfi1_devdata *dd)
{
int ret;
write_enable(dd);
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE);
ret = wait_for_not_busy(dd);
write_disable(dd);
return ret;
}
/*
* Erase a range using the 32KB erase command.
*/
static int erase_32kb_range(struct hfi1_devdata *dd, u32 start, u32 end)
{
int ret = 0;
if (end < start)
return -EINVAL;
if ((start & MASK_32KB) || (end & MASK_32KB)) {
dd_dev_err(dd,
"%s: non-aligned range (0x%x,0x%x) for a 32KB erase\n",
__func__, start, end);
return -EINVAL;
}
write_enable(dd);
for (; start < end; start += SIZE_32KB) {
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
write_csr(dd, ASIC_EEP_ADDR_CMD,
CMD_SECTOR_ERASE_32KB(start));
ret = wait_for_not_busy(dd);
if (ret)
goto done;
}
done:
write_disable(dd);
return ret;
}
/*
* Read a 256 byte (64 dword) EPROM page.
* All callers have verified the offset is at a page boundary.
*/
static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
{
int i;
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
for (i = 0; i < EP_PAGE_SIZE/sizeof(u32); i++)
result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
}
/*
* Read length bytes starting at offset. Copy to user address addr.
*/
static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
{
u32 offset;
u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
int ret = 0;
/* reject anything not on an EPROM page boundary */
if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
return -EINVAL;
for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
read_page(dd, start + offset, buffer);
if (copy_to_user((void __user *)(addr + offset),
buffer, EP_PAGE_SIZE)) {
ret = -EFAULT;
goto done;
}
}
done:
return ret;
}
/*
* Write a 256 byte (64 dword) EPROM page.
* All callers have verified the offset is at a page boundary.
*/
static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
{
int i;
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
write_csr(dd, ASIC_EEP_DATA, data[0]);
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
for (i = 1; i < EP_PAGE_SIZE/sizeof(u32); i++)
write_csr(dd, ASIC_EEP_DATA, data[i]);
/* will close the open page */
return wait_for_not_busy(dd);
}
/*
* Write length bytes starting at offset. Read from user address addr.
*/
static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
{
u32 offset;
u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
int ret = 0;
/* reject anything not on an EPROM page boundary */
if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
return -EINVAL;
write_enable(dd);
for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
if (copy_from_user(buffer, (void __user *)(addr + offset),
EP_PAGE_SIZE)) {
ret = -EFAULT;
goto done;
}
ret = write_page(dd, start + offset, buffer);
if (ret)
goto done;
}
done:
write_disable(dd);
return ret;
}
/*
* Perform the given operation on the EPROM. Called from user space. The
* user credentials have already been checked.
*
* Return 0 on success, -ERRNO on error
*/
int handle_eprom_command(const struct hfi1_cmd *cmd)
{
struct hfi1_devdata *dd;
u32 dev_id;
int ret = 0;
/*
* The EPROM is per-device, so use unit 0 as that will always
* exist.
*/
dd = hfi1_lookup(0);
if (!dd) {
pr_err("%s: cannot find unit 0!\n", __func__);
return -EINVAL;
}
/* lock against other callers touching the ASIC block */
mutex_lock(&eprom_mutex);
/* some platforms do not have an EPROM */
if (!eprom_available) {
ret = -ENOSYS;
goto done_asic;
}
/* lock against the other HFI on another OS */
ret = acquire_hw_mutex(dd);
if (ret) {
dd_dev_err(dd,
"%s: unable to acquire hw mutex, no EPROM support\n",
__func__);
goto done_asic;
}
dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
__func__, cmd->type, cmd->len, cmd->addr);
switch (cmd->type) {
case HFI1_CMD_EP_INFO:
if (cmd->len != sizeof(u32)) {
ret = -ERANGE;
break;
}
dev_id = read_device_id(dd);
/* addr points to a u32 user buffer */
if (copy_to_user((void __user *)cmd->addr, &dev_id,
sizeof(u32)))
ret = -EFAULT;
break;
case HFI1_CMD_EP_ERASE_CHIP:
ret = erase_chip(dd);
break;
case HFI1_CMD_EP_ERASE_P0:
if (cmd->len != P0_SIZE) {
ret = -ERANGE;
break;
}
ret = erase_32kb_range(dd, 0, cmd->len);
break;
case HFI1_CMD_EP_ERASE_P1:
/* check for overflow */
if (P1_START + cmd->len > ASIC_EEP_ADDR_CMD_EP_ADDR_MASK) {
ret = -ERANGE;
break;
}
ret = erase_32kb_range(dd, P1_START, P1_START + cmd->len);
break;
case HFI1_CMD_EP_READ_P0:
if (cmd->len != P0_SIZE) {
ret = -ERANGE;
break;
}
ret = read_length(dd, 0, cmd->len, cmd->addr);
break;
case HFI1_CMD_EP_READ_P1:
/* check for overflow */
if (P1_START + cmd->len > ASIC_EEP_ADDR_CMD_EP_ADDR_MASK) {
ret = -ERANGE;
break;
}
ret = read_length(dd, P1_START, cmd->len, cmd->addr);
break;
case HFI1_CMD_EP_WRITE_P0:
if (cmd->len > P0_SIZE) {
ret = -ERANGE;
break;
}
ret = write_length(dd, 0, cmd->len, cmd->addr);
break;
case HFI1_CMD_EP_WRITE_P1:
/* check for overflow */
if (P1_START + cmd->len > ASIC_EEP_ADDR_CMD_EP_ADDR_MASK) {
ret = -ERANGE;
break;
}
ret = write_length(dd, P1_START, cmd->len, cmd->addr);
break;
default:
dd_dev_err(dd, "%s: unexpected command %d\n",
__func__, cmd->type);
ret = -EINVAL;
break;
}
release_hw_mutex(dd);
done_asic:
mutex_unlock(&eprom_mutex);
return ret;
}
/*
* Initialize the EPROM handler.
*/
int eprom_init(struct hfi1_devdata *dd)
{
int ret = 0;
/* only the discrete chip has an EPROM, nothing to do */
if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
return 0;
/* lock against other callers */
mutex_lock(&eprom_mutex);
if (eprom_available) /* already initialized */
goto done_asic;
/*
* Lock against the other HFI on another OS - the mutex above
* would have caught anything in this driver. It is OK if
* both OSes reset the EPROM - as long as they don't do it at
* the same time.
*/
ret = acquire_hw_mutex(dd);
if (ret) {
dd_dev_err(dd,
"%s: unable to acquire hw mutex, no EPROM support\n",
__func__);
goto done_asic;
}
/* reset EPROM to be sure it is in a good state */
/* set reset */
write_csr(dd, ASIC_EEP_CTL_STAT,
ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
/* clear reset, set speed */
write_csr(dd, ASIC_EEP_CTL_STAT,
EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
/* wake the device with command "release powerdown NoID" */
write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
eprom_available = 1;
release_hw_mutex(dd);
done_asic:
mutex_unlock(&eprom_mutex);
return ret;
}

View File

@ -0,0 +1,55 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
struct hfi1_cmd;
struct hfi1_devdata;
int eprom_init(struct hfi1_devdata *dd);
int handle_eprom_command(const struct hfi1_cmd *cmd);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,207 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/pci.h>
#include <linux/delay.h>
#include "hfi.h"
#include "common.h"
#include "sdma.h"
/**
* format_hwmsg - format a single hwerror message
* @msg message buffer
* @msgl length of message buffer
* @hwmsg message to add to message buffer
*/
static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
{
strlcat(msg, "[", msgl);
strlcat(msg, hwmsg, msgl);
strlcat(msg, "]", msgl);
}
/**
* hfi1_format_hwerrors - format hardware error messages for display
* @hwerrs hardware errors bit vector
* @hwerrmsgs hardware error descriptions
* @nhwerrmsgs number of hwerrmsgs
* @msg message buffer
* @msgl message buffer length
*/
void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs,
size_t nhwerrmsgs, char *msg, size_t msgl)
{
int i;
for (i = 0; i < nhwerrmsgs; i++)
if (hwerrs & hwerrmsgs[i].mask)
format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
}
static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
{
struct ib_event event;
struct hfi1_devdata *dd = ppd->dd;
/*
* Only call ib_dispatch_event() if the IB device has been
* registered. HFI1_INITED is set iff the driver has successfully
* registered with the IB core.
*/
if (!(dd->flags & HFI1_INITTED))
return;
event.device = &dd->verbs_dev.ibdev;
event.element.port_num = ppd->port;
event.event = ev;
ib_dispatch_event(&event);
}
/*
* Handle a linkup or link down notification.
* This is called outside an interrupt.
*/
void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
{
struct hfi1_pportdata *ppd = &dd->pport[0];
enum ib_event_type ev;
if (!(ppd->linkup ^ !!linkup))
return; /* no change, nothing to do */
if (linkup) {
/*
* Quick linkup and all link up on the simulator does not
* trigger or implement:
* - VerifyCap interrupt
* - VerifyCap frames
* But rather moves directly to LinkUp.
*
* Do the work of the VerifyCap interrupt handler,
* handle_verify_cap(), but do not try moving the state to
* LinkUp as we are already there.
*
* NOTE: This uses this device's vAU, vCU, and vl15_init for
* the remote values. Both sides must be using the values.
*/
if (quick_linkup
|| dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
set_up_vl15(dd, dd->vau, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
ppd->neighbor_guid =
read_csr(dd,
DC_DC8051_STS_REMOTE_GUID);
ppd->neighbor_type =
read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
ppd->neighbor_port_number =
read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
dd_dev_info(dd,
"Neighbor GUID: %llx Neighbor type %d\n",
ppd->neighbor_guid,
ppd->neighbor_type);
}
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
/* link widths are not available until the link is fully up */
get_linkup_link_widths(ppd);
} else {
/* physical link went down */
ppd->linkup = 0;
/* clear HW details of the previous connection */
reset_link_credits(dd);
/* freeze after a link down to guarantee a clean egress */
start_freeze_handling(ppd, FREEZE_SELF|FREEZE_LINK_DOWN);
ev = IB_EVENT_PORT_ERR;
hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LINKDOWN_BIT);
/* if we are down, the neighbor is down */
ppd->neighbor_normal = 0;
/* notify IB of the link change */
signal_ib_event(ppd, ev);
}
}
/*
* Handle receive or urgent interrupts for user contexts. This means a user
* process was waiting for a packet to arrive, and didn't want to poll.
*/
void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
unsigned long flags;
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (!rcd->cnt)
goto done;
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
wake_up_interruptible(&rcd->wait);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt);
} else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG,
&rcd->event_flags)) {
rcd->urgent++;
wake_up_interruptible(&rcd->wait);
}
done:
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
}

View File

@ -0,0 +1,186 @@
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
/*
* typedef (*restart_t)() - restart callback
* @work: pointer to work structure
*/
typedef void (*restart_t)(struct work_struct *work);
struct sdma_txreq;
struct sdma_engine;
/**
* struct iowait - linkage for delayed progress/waiting
* @list: used to add/insert into QP/PQ wait lists
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
* @sdma_busy: # of packets in flight
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
*
* This is to be embedded in user's state structure
* (QP or PQ).
*
* The sleep and wakeup members are a
* bit misnamed. They do not strictly
* speaking sleep or wake up, but they
* are callbacks for the ULP to implement
* what ever queuing/dequeuing of
* the embedded iowait and its containing struct
* when a resource shortage like SDMA ring space is seen.
*
* Both potentially have locks help
* so sleeping is not allowed.
*
* The wait_dma member along with the iow
*/
struct iowait {
struct list_head list;
struct list_head tx_head;
int (*sleep)(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
unsigned seq);
void (*wakeup)(struct iowait *wait, int reason);
struct work_struct iowork;
wait_queue_head_t wait_dma;
atomic_t sdma_busy;
u32 count;
u32 tx_limit;
u32 tx_count;
};
#define SDMA_AVAIL_REASON 0
/**
* iowait_init() - initialize wait structure
* @wait: wait struct to initialize
* @tx_limit: limit for overflow queuing
* @func: restart function for workqueue
* @sleep: sleep function for no space
* @wakeup: wakeup function for no space
*
* This function initializes the iowait
* structure embedded in the QP or PQ.
*
*/
static inline void iowait_init(
struct iowait *wait,
u32 tx_limit,
void (*func)(struct work_struct *work),
int (*sleep)(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
unsigned seq),
void (*wakeup)(struct iowait *wait, int reason))
{
wait->count = 0;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
init_waitqueue_head(&wait->wait_dma);
atomic_set(&wait->sdma_busy, 0);
wait->tx_limit = tx_limit;
wait->sleep = sleep;
wait->wakeup = wakeup;
}
/**
* iowait_schedule() - initialize wait structure
* @wait: wait struct to schedule
* @wq: workqueue for schedule
*/
static inline void iowait_schedule(
struct iowait *wait,
struct workqueue_struct *wq)
{
queue_work(wq, &wait->iowork);
}
/**
* iowait_sdma_drain() - wait for DMAs to drain
*
* @wait: iowait structure
*
* This will delay until the iowait sdmas have
* completed.
*/
static inline void iowait_sdma_drain(struct iowait *wait)
{
wait_event(wait->wait_dma, !atomic_read(&wait->sdma_busy));
}
/**
* iowait_drain_wakeup() - trigger iowait_drain() waiter
*
* @wait: iowait structure
*
* This will trigger any waiters.
*/
static inline void iowait_drain_wakeup(struct iowait *wait)
{
wake_up(&wait->wait_dma);
}
#endif

View File

@ -0,0 +1,411 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "hfi.h"
/**
* hfi1_alloc_lkey - allocate an lkey
* @mr: memory region that this lkey protects
* @dma_region: 0->normal key, 1->restricted DMA key
*
* Returns 0 if successful, otherwise returns -errno.
*
* Increments mr reference count as required.
*
* Sets the lkey field mr for non-dma regions.
*
*/
int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region)
{
unsigned long flags;
u32 r;
u32 n;
int ret = 0;
struct hfi1_ibdev *dev = to_idev(mr->pd->device);
struct hfi1_lkey_table *rkt = &dev->lk_table;
hfi1_get_mr(mr);
spin_lock_irqsave(&rkt->lock, flags);
/* special case for dma_mr lkey == 0 */
if (dma_region) {
struct hfi1_mregion *tmr;
tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
} else {
hfi1_put_mr(mr);
}
goto success;
}
/* Find the next available LKEY */
r = rkt->next;
n = r;
for (;;) {
if (!rcu_access_pointer(rkt->table[r]))
break;
r = (r + 1) & (rkt->max - 1);
if (r == n)
goto bail;
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
* Make sure lkey is never zero which is reserved to indicate an
* unrestricted LKEY.
*/
rkt->gen++;
/*
* bits are capped in verbs.c to ensure enough bits for
* generation number
*/
mr->lkey = (r << (32 - hfi1_lkey_table_size)) |
((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen)
<< 8);
if (mr->lkey == 0) {
mr->lkey |= 1 << 8;
rkt->gen++;
}
rcu_assign_pointer(rkt->table[r], mr);
mr->lkey_published = 1;
success:
spin_unlock_irqrestore(&rkt->lock, flags);
out:
return ret;
bail:
hfi1_put_mr(mr);
spin_unlock_irqrestore(&rkt->lock, flags);
ret = -ENOMEM;
goto out;
}
/**
* hfi1_free_lkey - free an lkey
* @mr: mr to free from tables
*/
void hfi1_free_lkey(struct hfi1_mregion *mr)
{
unsigned long flags;
u32 lkey = mr->lkey;
u32 r;
struct hfi1_ibdev *dev = to_idev(mr->pd->device);
struct hfi1_lkey_table *rkt = &dev->lk_table;
int freed = 0;
spin_lock_irqsave(&rkt->lock, flags);
if (!mr->lkey_published)
goto out;
if (lkey == 0)
RCU_INIT_POINTER(dev->dma_mr, NULL);
else {
r = lkey >> (32 - hfi1_lkey_table_size);
RCU_INIT_POINTER(rkt->table[r], NULL);
}
mr->lkey_published = 0;
freed++;
out:
spin_unlock_irqrestore(&rkt->lock, flags);
if (freed) {
synchronize_rcu();
hfi1_put_mr(mr);
}
}
/**
* hfi1_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
* @pd: protection domain
* @isge: outgoing internal SGE
* @sge: SGE to check
* @acc: access flags
*
* Return 1 if valid and successful, otherwise returns 0.
*
* increments the reference count upon success
*
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
struct hfi1_sge *isge, struct ib_sge *sge, int acc)
{
struct hfi1_mregion *mr;
unsigned n, m;
size_t off;
/*
* We use LKEY == zero for kernel virtual addresses
* (see hfi1_get_dma_mr and dma.c).
*/
rcu_read_lock();
if (sge->lkey == 0) {
struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
atomic_inc(&mr->refcount);
rcu_read_unlock();
isge->mr = mr;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
isge->sge_length = sge->length;
isge->m = 0;
isge->n = 0;
goto ok;
}
mr = rcu_dereference(
rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]);
if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
atomic_inc(&mr->refcount);
rcu_read_unlock();
off += mr->offset;
if (mr->page_shift) {
/*
page sizes are uniform power of 2 so no loop is necessary
entries_spanned_by_off is the number of times the loop below
would have executed.
*/
size_t entries_spanned_by_off;
entries_spanned_by_off = off >> mr->page_shift;
off -= (entries_spanned_by_off << mr->page_shift);
m = entries_spanned_by_off / HFI1_SEGSZ;
n = entries_spanned_by_off % HFI1_SEGSZ;
} else {
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= HFI1_SEGSZ) {
m++;
n = 0;
}
}
}
isge->mr = mr;
isge->vaddr = mr->map[m]->segs[n].vaddr + off;
isge->length = mr->map[m]->segs[n].length - off;
isge->sge_length = sge->length;
isge->m = m;
isge->n = n;
ok:
return 1;
bail:
rcu_read_unlock();
return 0;
}
/**
* hfi1_rkey_ok - check the IB virtual address, length, and RKEY
* @qp: qp for validation
* @sge: SGE state
* @len: length of data
* @vaddr: virtual address to place data
* @rkey: rkey to check
* @acc: access flags
*
* Return 1 if successful, otherwise 0.
*
* increments the reference count upon success
*/
int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc)
{
struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct hfi1_mregion *mr;
unsigned n, m;
size_t off;
/*
* We use RKEY == zero for kernel virtual addresses
* (see hfi1_get_dma_mr and dma.c).
*/
rcu_read_lock();
if (rkey == 0) {
struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
atomic_inc(&mr->refcount);
rcu_read_unlock();
sge->mr = mr;
sge->vaddr = (void *) vaddr;
sge->length = len;
sge->sge_length = len;
sge->m = 0;
sge->n = 0;
goto ok;
}
mr = rcu_dereference(
rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]);
if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
atomic_inc(&mr->refcount);
rcu_read_unlock();
off += mr->offset;
if (mr->page_shift) {
/*
page sizes are uniform power of 2 so no loop is necessary
entries_spanned_by_off is the number of times the loop below
would have executed.
*/
size_t entries_spanned_by_off;
entries_spanned_by_off = off >> mr->page_shift;
off -= (entries_spanned_by_off << mr->page_shift);
m = entries_spanned_by_off / HFI1_SEGSZ;
n = entries_spanned_by_off % HFI1_SEGSZ;
} else {
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= HFI1_SEGSZ) {
m++;
n = 0;
}
}
}
sge->mr = mr;
sge->vaddr = mr->map[m]->segs[n].vaddr + off;
sge->length = mr->map[m]->segs[n].length - off;
sge->sge_length = len;
sge->m = m;
sge->n = n;
ok:
return 1;
bail:
rcu_read_unlock();
return 0;
}
/*
* Initialize the memory region specified by the work request.
*/
int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr)
{
struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
struct hfi1_mregion *mr;
u32 rkey = wr->wr.fast_reg.rkey;
unsigned i, n, m;
int ret = -EINVAL;
unsigned long flags;
u64 *page_list;
size_t ps;
spin_lock_irqsave(&rkt->lock, flags);
if (pd->user || rkey == 0)
goto bail;
mr = rcu_dereference_protected(
rkt->table[(rkey >> (32 - hfi1_lkey_table_size))],
lockdep_is_held(&rkt->lock));
if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
goto bail;
if (wr->wr.fast_reg.page_list_len > mr->max_segs)
goto bail;
ps = 1UL << wr->wr.fast_reg.page_shift;
if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
goto bail;
mr->user_base = wr->wr.fast_reg.iova_start;
mr->iova = wr->wr.fast_reg.iova_start;
mr->lkey = rkey;
mr->length = wr->wr.fast_reg.length;
mr->access_flags = wr->wr.fast_reg.access_flags;
page_list = wr->wr.fast_reg.page_list->page_list;
m = 0;
n = 0;
for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
mr->map[m]->segs[n].vaddr = (void *) page_list[i];
mr->map[m]->segs[n].length = ps;
if (++n == HFI1_SEGSZ) {
m++;
n = 0;
}
}
ret = 0;
bail:
spin_unlock_irqrestore(&rkt->lock, flags);
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,325 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _HFI1_MAD_H
#define _HFI1_MAD_H
#include <rdma/ib_pma.h>
#define USE_PI_LED_ENABLE 1 /* use led enabled bit in struct
* opa_port_states, if available */
#include <rdma/opa_smi.h>
#include <rdma/opa_port_info.h>
#ifndef PI_LED_ENABLE_SUP
#define PI_LED_ENABLE_SUP 0
#endif
#include "opa_compat.h"
#define IB_VLARB_LOWPRI_0_31 1
#define IB_VLARB_LOWPRI_32_63 2
#define IB_VLARB_HIGHPRI_0_31 3
#define IB_VLARB_HIGHPRI_32_63 4
#define OPA_MAX_PREEMPT_CAP 32
#define OPA_VLARB_LOW_ELEMENTS 0
#define OPA_VLARB_HIGH_ELEMENTS 1
#define OPA_VLARB_PREEMPT_ELEMENTS 2
#define OPA_VLARB_PREEMPT_MATRIX 3
#define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00)
struct ib_pma_portcounters_cong {
u8 reserved;
u8 reserved1;
__be16 port_check_rate;
__be16 symbol_error_counter;
u8 link_error_recovery_counter;
u8 link_downed_counter;
__be16 port_rcv_errors;
__be16 port_rcv_remphys_errors;
__be16 port_rcv_switch_relay_errors;
__be16 port_xmit_discards;
u8 port_xmit_constraint_errors;
u8 port_rcv_constraint_errors;
u8 reserved2;
u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
__be16 reserved3;
__be16 vl15_dropped;
__be64 port_xmit_data;
__be64 port_rcv_data;
__be64 port_xmit_packets;
__be64 port_rcv_packets;
__be64 port_xmit_wait;
__be64 port_adr_events;
} __packed;
#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004)
#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008)
#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C)
#define OPA_MAX_PREEMPT_CAP 32
#define OPA_VLARB_LOW_ELEMENTS 0
#define OPA_VLARB_HIGH_ELEMENTS 1
#define OPA_VLARB_PREEMPT_ELEMENTS 2
#define OPA_VLARB_PREEMPT_MATRIX 3
#define HFI1_XMIT_RATE_UNSUPPORTED 0x0
#define HFI1_XMIT_RATE_PICO 0x7
/* number of 4nsec cycles equaling 2secs */
#define HFI1_CONG_TIMER_PSINTERVAL 0x1DCD64EC
#define IB_CC_SVCTYPE_RC 0x0
#define IB_CC_SVCTYPE_UC 0x1
#define IB_CC_SVCTYPE_RD 0x2
#define IB_CC_SVCTYPE_UD 0x3
/*
* There should be an equivalent IB #define for the following, but
* I cannot find it.
*/
#define OPA_CC_LOG_TYPE_HFI 2
struct opa_hfi1_cong_log_event_internal {
u32 lqpn;
u32 rqpn;
u8 sl;
u8 svc_type;
u32 rlid;
s64 timestamp; /* wider than 32 bits to detect 32 bit rollover */
};
struct opa_hfi1_cong_log_event {
u8 local_qp_cn_entry[3];
u8 remote_qp_number_cn_entry[3];
u8 sl_svc_type_cn_entry; /* 5 bits SL, 3 bits svc type */
u8 reserved;
__be32 remote_lid_cn_entry;
__be32 timestamp_cn_entry;
} __packed;
#define OPA_CONG_LOG_ELEMS 96
struct opa_hfi1_cong_log {
u8 log_type;
u8 congestion_flags;
__be16 threshold_event_counter;
__be32 current_time_stamp;
u8 threshold_cong_event_map[OPA_MAX_SLS/8];
struct opa_hfi1_cong_log_event events[OPA_CONG_LOG_ELEMS];
} __packed;
#define IB_CC_TABLE_CAP_DEFAULT 31
/* Port control flags */
#define IB_CC_CCS_PC_SL_BASED 0x01
struct opa_congestion_setting_entry {
u8 ccti_increase;
u8 reserved;
__be16 ccti_timer;
u8 trigger_threshold;
u8 ccti_min; /* min CCTI for cc table */
} __packed;
struct opa_congestion_setting_entry_shadow {
u8 ccti_increase;
u8 reserved;
u16 ccti_timer;
u8 trigger_threshold;
u8 ccti_min; /* min CCTI for cc table */
} __packed;
struct opa_congestion_setting_attr {
__be32 control_map;
__be16 port_control;
struct opa_congestion_setting_entry entries[OPA_MAX_SLS];
} __packed;
struct opa_congestion_setting_attr_shadow {
u32 control_map;
u16 port_control;
struct opa_congestion_setting_entry_shadow entries[OPA_MAX_SLS];
} __packed;
#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
/* 64 Congestion Control table entries in a single MAD */
#define IB_CCT_ENTRIES 64
#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
struct ib_cc_table_entry {
__be16 entry; /* shift:2, multiplier:14 */
};
struct ib_cc_table_entry_shadow {
u16 entry; /* shift:2, multiplier:14 */
};
struct ib_cc_table_attr {
__be16 ccti_limit; /* max CCTI for cc table */
struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
} __packed;
struct ib_cc_table_attr_shadow {
u16 ccti_limit; /* max CCTI for cc table */
struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
} __packed;
#define CC_TABLE_SHADOW_MAX \
(IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
struct cc_table_shadow {
u16 ccti_limit; /* max CCTI for cc table */
struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
} __packed;
/*
* struct cc_state combines the (active) per-port congestion control
* table, and the (active) per-SL congestion settings. cc_state data
* may need to be read in code paths that we want to be fast, so it
* is an RCU protected structure.
*/
struct cc_state {
struct rcu_head rcu;
struct cc_table_shadow cct;
struct opa_congestion_setting_attr_shadow cong_setting;
};
/*
* OPA BufferControl MAD
*/
/* attribute modifier macros */
#define OPA_AM_NPORT_SHIFT 24
#define OPA_AM_NPORT_MASK 0xff
#define OPA_AM_NPORT_SMASK (OPA_AM_NPORT_MASK << OPA_AM_NPORT_SHIFT)
#define OPA_AM_NPORT(am) (((am) >> OPA_AM_NPORT_SHIFT) & \
OPA_AM_NPORT_MASK)
#define OPA_AM_NBLK_SHIFT 24
#define OPA_AM_NBLK_MASK 0xff
#define OPA_AM_NBLK_SMASK (OPA_AM_NBLK_MASK << OPA_AM_NBLK_SHIFT)
#define OPA_AM_NBLK(am) (((am) >> OPA_AM_NBLK_SHIFT) & \
OPA_AM_NBLK_MASK)
#define OPA_AM_START_BLK_SHIFT 0
#define OPA_AM_START_BLK_MASK 0xff
#define OPA_AM_START_BLK_SMASK (OPA_AM_START_BLK_MASK << \
OPA_AM_START_BLK_SHIFT)
#define OPA_AM_START_BLK(am) (((am) >> OPA_AM_START_BLK_SHIFT) & \
OPA_AM_START_BLK_MASK)
#define OPA_AM_PORTNUM_SHIFT 0
#define OPA_AM_PORTNUM_MASK 0xff
#define OPA_AM_PORTNUM_SMASK (OPA_AM_PORTNUM_MASK << OPA_AM_PORTNUM_SHIFT)
#define OPA_AM_PORTNUM(am) (((am) >> OPA_AM_PORTNUM_SHIFT) & \
OPA_AM_PORTNUM_MASK)
#define OPA_AM_ASYNC_SHIFT 12
#define OPA_AM_ASYNC_MASK 0x1
#define OPA_AM_ASYNC_SMASK (OPA_AM_ASYNC_MASK << OPA_AM_ASYNC_SHIFT)
#define OPA_AM_ASYNC(am) (((am) >> OPA_AM_ASYNC_SHIFT) & \
OPA_AM_ASYNC_MASK)
#define OPA_AM_START_SM_CFG_SHIFT 9
#define OPA_AM_START_SM_CFG_MASK 0x1
#define OPA_AM_START_SM_CFG_SMASK (OPA_AM_START_SM_CFG_MASK << \
OPA_AM_START_SM_CFG_SHIFT)
#define OPA_AM_START_SM_CFG(am) (((am) >> OPA_AM_START_SM_CFG_SHIFT) \
& OPA_AM_START_SM_CFG_MASK)
#define OPA_AM_CI_ADDR_SHIFT 19
#define OPA_AM_CI_ADDR_MASK 0xfff
#define OPA_AM_CI_ADDR_SMASK (OPA_AM_CI_ADDR_MASK << OPA_CI_ADDR_SHIFT)
#define OPA_AM_CI_ADDR(am) (((am) >> OPA_AM_CI_ADDR_SHIFT) & \
OPA_AM_CI_ADDR_MASK)
#define OPA_AM_CI_LEN_SHIFT 13
#define OPA_AM_CI_LEN_MASK 0x3f
#define OPA_AM_CI_LEN_SMASK (OPA_AM_CI_LEN_MASK << OPA_CI_LEN_SHIFT)
#define OPA_AM_CI_LEN(am) (((am) >> OPA_AM_CI_LEN_SHIFT) & \
OPA_AM_CI_LEN_MASK)
/* error info macros */
#define OPA_EI_STATUS_SMASK 0x80
#define OPA_EI_CODE_SMASK 0x0f
struct vl_limit {
__be16 dedicated;
__be16 shared;
};
struct buffer_control {
__be16 reserved;
__be16 overall_shared_limit;
struct vl_limit vl[OPA_MAX_VLS];
};
struct sc2vlnt {
u8 vlnt[32]; /* 5 bit VL, 3 bits reserved */
};
/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2.
* We support 5 counters which only count the mandatory quantities.
*/
#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
#define COUNTER_MASK0_9 \
cpu_to_be32(COUNTER_MASK(1, 0) | \
COUNTER_MASK(1, 1) | \
COUNTER_MASK(1, 2) | \
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
#endif /* _HFI1_MAD_H */

View File

@ -0,0 +1,192 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <asm/pgtable.h>
#include "verbs.h"
/**
* hfi1_release_mmap_info - free mmap info structure
* @ref: a pointer to the kref within struct hfi1_mmap_info
*/
void hfi1_release_mmap_info(struct kref *ref)
{
struct hfi1_mmap_info *ip =
container_of(ref, struct hfi1_mmap_info, ref);
struct hfi1_ibdev *dev = to_idev(ip->context->device);
spin_lock_irq(&dev->pending_lock);
list_del(&ip->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
vfree(ip->obj);
kfree(ip);
}
/*
* open and close keep track of how many times the CQ is mapped,
* to avoid releasing it.
*/
static void hfi1_vma_open(struct vm_area_struct *vma)
{
struct hfi1_mmap_info *ip = vma->vm_private_data;
kref_get(&ip->ref);
}
static void hfi1_vma_close(struct vm_area_struct *vma)
{
struct hfi1_mmap_info *ip = vma->vm_private_data;
kref_put(&ip->ref, hfi1_release_mmap_info);
}
static struct vm_operations_struct hfi1_vm_ops = {
.open = hfi1_vma_open,
.close = hfi1_vma_close,
};
/**
* hfi1_mmap - create a new mmap region
* @context: the IB user context of the process making the mmap() call
* @vma: the VMA to be initialized
* Return zero if the mmap is OK. Otherwise, return an errno.
*/
int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct hfi1_ibdev *dev = to_idev(context->device);
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
struct hfi1_mmap_info *ip, *pp;
int ret = -EINVAL;
/*
* Search the device's list of objects waiting for a mmap call.
* Normally, this list is very short since a call to create a
* CQ, QP, or SRQ is soon followed by a call to mmap().
*/
spin_lock_irq(&dev->pending_lock);
list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
pending_mmaps) {
/* Only the creator is allowed to mmap the object */
if (context != ip->context || (__u64) offset != ip->offset)
continue;
/* Don't allow a mmap larger than the object. */
if (size > ip->size)
break;
list_del_init(&ip->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret)
goto done;
vma->vm_ops = &hfi1_vm_ops;
vma->vm_private_data = ip;
hfi1_vma_open(vma);
goto done;
}
spin_unlock_irq(&dev->pending_lock);
done:
return ret;
}
/*
* Allocate information for hfi1_mmap
*/
struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
u32 size,
struct ib_ucontext *context,
void *obj) {
struct hfi1_mmap_info *ip;
ip = kmalloc(sizeof(*ip), GFP_KERNEL);
if (!ip)
goto bail;
size = PAGE_ALIGN(size);
spin_lock_irq(&dev->mmap_offset_lock);
if (dev->mmap_offset == 0)
dev->mmap_offset = PAGE_SIZE;
ip->offset = dev->mmap_offset;
dev->mmap_offset += size;
spin_unlock_irq(&dev->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
ip->size = size;
ip->context = context;
ip->obj = obj;
kref_init(&ip->ref);
bail:
return ip;
}
void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
u32 size, void *obj)
{
size = PAGE_ALIGN(size);
spin_lock_irq(&dev->mmap_offset_lock);
if (dev->mmap_offset == 0)
dev->mmap_offset = PAGE_SIZE;
ip->offset = dev->mmap_offset;
dev->mmap_offset += size;
spin_unlock_irq(&dev->mmap_offset_lock);
ip->size = size;
ip->obj = obj;
}

View File

@ -0,0 +1,546 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <rdma/ib_umem.h>
#include <rdma/ib_smi.h>
#include "hfi.h"
/* Fast memory region */
struct hfi1_fmr {
struct ib_fmr ibfmr;
struct hfi1_mregion mr; /* must be last */
};
static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct hfi1_fmr, ibfmr);
}
static int init_mregion(struct hfi1_mregion *mr, struct ib_pd *pd,
int count)
{
int m, i = 0;
int rval = 0;
m = (count + HFI1_SEGSZ - 1) / HFI1_SEGSZ;
for (; i < m; i++) {
mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
if (!mr->map[i])
goto bail;
}
mr->mapsz = m;
init_completion(&mr->comp);
/* count returning the ptr to user */
atomic_set(&mr->refcount, 1);
mr->pd = pd;
mr->max_segs = count;
out:
return rval;
bail:
while (i)
kfree(mr->map[--i]);
rval = -ENOMEM;
goto out;
}
static void deinit_mregion(struct hfi1_mregion *mr)
{
int i = mr->mapsz;
mr->mapsz = 0;
while (i)
kfree(mr->map[--i]);
}
/**
* hfi1_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
* @acc: access flags
*
* Returns the memory region on success, otherwise returns an errno.
* Note that all DMA addresses should be created via the
* struct ib_dma_mapping_ops functions (see dma.c).
*/
struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc)
{
struct hfi1_mr *mr = NULL;
struct ib_mr *ret;
int rval;
if (to_ipd(pd)->user) {
ret = ERR_PTR(-EPERM);
goto bail;
}
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
rval = init_mregion(&mr->mr, pd, 0);
if (rval) {
ret = ERR_PTR(rval);
goto bail;
}
rval = hfi1_alloc_lkey(&mr->mr, 1);
if (rval) {
ret = ERR_PTR(rval);
goto bail_mregion;
}
mr->mr.access_flags = acc;
ret = &mr->ibmr;
done:
return ret;
bail_mregion:
deinit_mregion(&mr->mr);
bail:
kfree(mr);
goto done;
}
static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
{
struct hfi1_mr *mr;
int rval = -ENOMEM;
int m;
/* Allocate struct plus pointers to first level page tables. */
m = (count + HFI1_SEGSZ - 1) / HFI1_SEGSZ;
mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
if (!mr)
goto bail;
rval = init_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
rval = hfi1_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
mr->ibmr.lkey = mr->mr.lkey;
mr->ibmr.rkey = mr->mr.lkey;
done:
return mr;
bail_mregion:
deinit_mregion(&mr->mr);
bail:
kfree(mr);
mr = ERR_PTR(rval);
goto done;
}
/**
* hfi1_reg_phys_mr - register a physical memory region
* @pd: protection domain for this memory region
* @buffer_list: pointer to the list of physical buffers to register
* @num_phys_buf: the number of physical buffers to register
* @iova_start: the starting address passed over IB which maps to this MR
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start)
{
struct hfi1_mr *mr;
int n, m, i;
struct ib_mr *ret;
mr = alloc_mr(num_phys_buf, pd);
if (IS_ERR(mr)) {
ret = (struct ib_mr *)mr;
goto bail;
}
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.access_flags = acc;
m = 0;
n = 0;
for (i = 0; i < num_phys_buf; i++) {
mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
mr->mr.map[m]->segs[n].length = buffer_list[i].size;
mr->mr.length += buffer_list[i].size;
n++;
if (n == HFI1_SEGSZ) {
m++;
n = 0;
}
}
ret = &mr->ibmr;
bail:
return ret;
}
/**
* hfi1_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
* @mr_access_flags: access flags for this memory region
* @udata: unused by the driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata)
{
struct hfi1_mr *mr;
struct ib_umem *umem;
struct scatterlist *sg;
int n, m, entry;
struct ib_mr *ret;
if (length == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
umem = ib_umem_get(pd->uobject->context, start, length,
mr_access_flags, 0);
if (IS_ERR(umem))
return (void *) umem;
n = umem->nmap;
mr = alloc_mr(n, pd);
if (IS_ERR(mr)) {
ret = (struct ib_mr *)mr;
ib_umem_release(umem);
goto bail;
}
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
mr->mr.offset = ib_umem_offset(umem);
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
if (is_power_of_2(umem->page_size))
mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == HFI1_SEGSZ) {
m++;
n = 0;
}
}
ret = &mr->ibmr;
bail:
return ret;
}
/**
* hfi1_dereg_mr - unregister and free a memory region
* @ibmr: the memory region to free
*
* Returns 0 on success.
*
* Note that this is called to free MRs created by hfi1_get_dma_mr()
* or hfi1_reg_user_mr().
*/
int hfi1_dereg_mr(struct ib_mr *ibmr)
{
struct hfi1_mr *mr = to_imr(ibmr);
int ret = 0;
unsigned long timeout;
hfi1_free_lkey(&mr->mr);
hfi1_put_mr(&mr->mr); /* will set completion if last */
timeout = wait_for_completion_timeout(&mr->mr.comp,
5 * HZ);
if (!timeout) {
dd_dev_err(
dd_from_ibdev(mr->mr.pd->device),
"hfi1_dereg_mr timeout mr %p pd %p refcount %u\n",
mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
hfi1_get_mr(&mr->mr);
ret = -EBUSY;
goto out;
}
deinit_mregion(&mr->mr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
out:
return ret;
}
/*
* Allocate a memory region usable with the
* IB_WR_FAST_REG_MR send work request.
*
* Return the memory region on success, otherwise return an errno.
*/
struct ib_mr *hfi1_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct hfi1_mr *mr;
mr = alloc_mr(max_page_list_len, pd);
if (IS_ERR(mr))
return (struct ib_mr *)mr;
return &mr->ibmr;
}
struct ib_fast_reg_page_list *
hfi1_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
{
unsigned size = page_list_len * sizeof(u64);
struct ib_fast_reg_page_list *pl;
if (size > PAGE_SIZE)
return ERR_PTR(-EINVAL);
pl = kzalloc(sizeof(*pl), GFP_KERNEL);
if (!pl)
return ERR_PTR(-ENOMEM);
pl->page_list = kzalloc(size, GFP_KERNEL);
if (!pl->page_list)
goto err_free;
return pl;
err_free:
kfree(pl);
return ERR_PTR(-ENOMEM);
}
void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
{
kfree(pl->page_list);
kfree(pl);
}
/**
* hfi1_alloc_fmr - allocate a fast memory region
* @pd: the protection domain for this memory region
* @mr_access_flags: access flags for this memory region
* @fmr_attr: fast memory region attributes
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct hfi1_fmr *fmr;
int m;
struct ib_fmr *ret;
int rval = -ENOMEM;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + HFI1_SEGSZ - 1) / HFI1_SEGSZ;
fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
if (!fmr)
goto bail;
rval = init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
if (rval)
goto bail;
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
rval = hfi1_alloc_lkey(&fmr->mr, 0);
if (rval)
goto bail_mregion;
fmr->ibfmr.rkey = fmr->mr.lkey;
fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
fmr->mr.page_shift = fmr_attr->page_shift;
ret = &fmr->ibfmr;
done:
return ret;
bail_mregion:
deinit_mregion(&fmr->mr);
bail:
kfree(fmr);
ret = ERR_PTR(rval);
goto done;
}
/**
* hfi1_map_phys_fmr - set up a fast memory region
* @ibmfr: the fast memory region to set up
* @page_list: the list of pages to associate with the fast memory region
* @list_len: the number of pages to associate with the fast memory region
* @iova: the virtual address of the start of the fast memory region
*
* This may be called from interrupt context.
*/
int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
int list_len, u64 iova)
{
struct hfi1_fmr *fmr = to_ifmr(ibfmr);
struct hfi1_lkey_table *rkt;
unsigned long flags;
int m, n, i;
u32 ps;
int ret;
i = atomic_read(&fmr->mr.refcount);
if (i > 2)
return -EBUSY;
if (list_len > fmr->mr.max_segs) {
ret = -EINVAL;
goto bail;
}
rkt = &to_idev(ibfmr->device)->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova;
fmr->mr.iova = iova;
ps = 1 << fmr->mr.page_shift;
fmr->mr.length = list_len * ps;
m = 0;
n = 0;
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
if (++n == HFI1_SEGSZ) {
m++;
n = 0;
}
}
spin_unlock_irqrestore(&rkt->lock, flags);
ret = 0;
bail:
return ret;
}
/**
* hfi1_unmap_fmr - unmap fast memory regions
* @fmr_list: the list of fast memory regions to unmap
*
* Returns 0 on success.
*/
int hfi1_unmap_fmr(struct list_head *fmr_list)
{
struct hfi1_fmr *fmr;
struct hfi1_lkey_table *rkt;
unsigned long flags;
list_for_each_entry(fmr, fmr_list, ibfmr.list) {
rkt = &to_idev(fmr->ibfmr.device)->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
spin_unlock_irqrestore(&rkt->lock, flags);
}
return 0;
}
/**
* hfi1_dealloc_fmr - deallocate a fast memory region
* @ibfmr: the fast memory region to deallocate
*
* Returns 0 on success.
*/
int hfi1_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct hfi1_fmr *fmr = to_ifmr(ibfmr);
int ret = 0;
unsigned long timeout;
hfi1_free_lkey(&fmr->mr);
hfi1_put_mr(&fmr->mr); /* will set completion if last */
timeout = wait_for_completion_timeout(&fmr->mr.comp,
5 * HZ);
if (!timeout) {
hfi1_get_mr(&fmr->mr);
ret = -EBUSY;
goto out;
}
deinit_mregion(&fmr->mr);
kfree(fmr);
out:
return ret;
}

View File

@ -0,0 +1,129 @@
#ifndef _LINUX_H
#define _LINUX_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
/*
* This header file is for OPA-specific definitions which are
* required by the HFI driver, and which aren't yet in the Linux
* IB core. We'll collect these all here, then merge them into
* the kernel when that's convenient.
*/
/* OPA SMA attribute IDs */
#define OPA_ATTRIB_ID_CONGESTION_INFO cpu_to_be16(0x008b)
#define OPA_ATTRIB_ID_HFI_CONGESTION_LOG cpu_to_be16(0x008f)
#define OPA_ATTRIB_ID_HFI_CONGESTION_SETTING cpu_to_be16(0x0090)
#define OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0091)
/* OPA PMA attribute IDs */
#define OPA_PM_ATTRIB_ID_PORT_STATUS cpu_to_be16(0x0040)
#define OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS cpu_to_be16(0x0041)
#define OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS cpu_to_be16(0x0042)
#define OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS cpu_to_be16(0x0043)
#define OPA_PM_ATTRIB_ID_ERROR_INFO cpu_to_be16(0x0044)
/* OPA status codes */
#define OPA_PM_STATUS_REQUEST_TOO_LARGE cpu_to_be16(0x100)
static inline u8 port_states_to_logical_state(struct opa_port_states *ps)
{
return ps->portphysstate_portstate & OPA_PI_MASK_PORT_STATE;
}
static inline u8 port_states_to_phys_state(struct opa_port_states *ps)
{
return ((ps->portphysstate_portstate &
OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4) & 0xf;
}
/*
* OPA port physical states
* IB Volume 1, Table 146 PortInfo/IB Volume 2 Section 5.4.2(1) PortPhysState
* values.
*
* When writing, only values 0-3 are valid, other values are ignored.
* When reading, 0 is reserved.
*
* Returned by the ibphys_portstate() routine.
*/
enum opa_port_phys_state {
IB_PORTPHYSSTATE_NOP = 0,
/* 1 is reserved */
IB_PORTPHYSSTATE_POLLING = 2,
IB_PORTPHYSSTATE_DISABLED = 3,
IB_PORTPHYSSTATE_TRAINING = 4,
IB_PORTPHYSSTATE_LINKUP = 5,
IB_PORTPHYSSTATE_LINK_ERROR_RECOVERY = 6,
IB_PORTPHYSSTATE_PHY_TEST = 7,
/* 8 is reserved */
OPA_PORTPHYSSTATE_OFFLINE = 9,
OPA_PORTPHYSSTATE_GANGED = 10,
OPA_PORTPHYSSTATE_TEST = 11,
OPA_PORTPHYSSTATE_MAX = 11,
/* values 12-15 are reserved/ignored */
};
/* OPA_PORT_TYPE_* definitions - these belong in opa_port_info.h */
#define OPA_PORT_TYPE_UNKNOWN 0
#define OPA_PORT_TYPE_DISCONNECTED 1
/* port is not currently usable, CableInfo not available */
#define OPA_PORT_TYPE_FIXED 2
/* A fixed backplane port in a director class switch. All OPA ASICS */
#define OPA_PORT_TYPE_VARIABLE 3
/* A backplane port in a blade system, possibly mixed configuration */
#define OPA_PORT_TYPE_STANDARD 4
/* implies a SFF-8636 defined format for CableInfo (QSFP) */
#define OPA_PORT_TYPE_SI_PHOTONICS 5
/* A silicon photonics module implies TBD defined format for CableInfo
* as defined by Intel SFO group */
/* 6 - 15 are reserved */
#endif /* _LINUX_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,224 @@
#ifndef _PIO_H
#define _PIO_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
/* send context types */
#define SC_KERNEL 0
#define SC_ACK 1
#define SC_USER 2
#define SC_MAX 3
/* invalid send context index */
#define INVALID_SCI 0xff
/* PIO buffer release callback function */
typedef void (*pio_release_cb)(void *arg, int code);
/* PIO release codes - in bits, as there could more than one that apply */
#define PRC_OK 0 /* no known error */
#define PRC_STATUS_ERR 0x01 /* credit return due to status error */
#define PRC_PBC 0x02 /* credit return due to PBC */
#define PRC_THRESHOLD 0x04 /* credit return due to threshold */
#define PRC_FILL_ERR 0x08 /* credit return due fill error */
#define PRC_FORCE 0x10 /* credit return due credit force */
#define PRC_SC_DISABLE 0x20 /* clean-up after a context disable */
/* byte helper */
union mix {
u64 val64;
u32 val32[2];
u8 val8[8];
};
/* an allocated PIO buffer */
struct pio_buf {
struct send_context *sc;/* back pointer to owning send context */
pio_release_cb cb; /* called when the buffer is released */
void *arg; /* argument for cb */
void __iomem *start; /* buffer start address */
void __iomem *end; /* context end address */
unsigned long size; /* context size, in bytes */
unsigned long sent_at; /* buffer is sent when <= free */
u32 block_count; /* size of buffer, in blocks */
u32 qw_written; /* QW written so far */
u32 carry_bytes; /* number of valid bytes in carry */
union mix carry; /* pending unwritten bytes */
};
/* cache line aligned pio buffer array */
union pio_shadow_ring {
struct pio_buf pbuf;
u64 unused[16]; /* cache line spacer */
} ____cacheline_aligned;
/* per-NUMA send context */
struct send_context {
/* read-only after init */
struct hfi1_devdata *dd; /* device */
void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
volatile __le64 *hw_free; /* HW free counter */
struct work_struct halt_work; /* halted context work queue entry */
unsigned long flags; /* flags */
int node; /* context home node */
int type; /* context type */
u32 sw_index; /* software index number */
u32 hw_context; /* hardware context number */
u32 credits; /* number of blocks in context */
u32 sr_size; /* size of the shadow ring */
u32 group; /* credit return group */
/* allocator fields */
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
unsigned long fill; /* official alloc count */
unsigned long alloc_free; /* copy of free (less cache thrash) */
u32 sr_head; /* shadow ring head */
/* releaser fields */
spinlock_t release_lock ____cacheline_aligned_in_smp;
unsigned long free; /* official free count */
u32 sr_tail; /* shadow ring tail */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
u64 credit_ctrl; /* cache for credit control */
u32 credit_intr_count; /* count of credit intr users */
atomic_t buffers_allocated; /* count of buffers allocated */
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
};
/* send context flags */
#define SCF_ENABLED 0x01
#define SCF_IN_FREE 0x02
#define SCF_HALTED 0x04
#define SCF_FROZEN 0x08
struct send_context_info {
struct send_context *sc; /* allocated working context */
u16 allocated; /* has this been allocated? */
u16 type; /* context type */
u16 base; /* base in PIO array */
u16 credits; /* size in PIO array */
};
/* DMA credit return, index is always (context & 0x7) */
struct credit_return {
volatile __le64 cr[8];
};
/* NUMA indexed credit return array */
struct credit_return_base {
struct credit_return *va;
dma_addr_t pa;
};
/* send context configuration sizes (one per type) */
struct sc_config_sizes {
short int size;
short int count;
};
/* send context functions */
int init_credit_return(struct hfi1_devdata *dd);
void free_credit_return(struct hfi1_devdata *dd);
int init_sc_pools_and_sizes(struct hfi1_devdata *dd);
int init_send_contexts(struct hfi1_devdata *dd);
int init_credit_return(struct hfi1_devdata *dd);
int init_pervl_scs(struct hfi1_devdata *dd);
struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
uint hdrqentsize, int numa);
void sc_free(struct send_context *sc);
int sc_enable(struct send_context *sc);
void sc_disable(struct send_context *sc);
int sc_restart(struct send_context *sc);
void sc_return_credits(struct send_context *sc);
void sc_flush(struct send_context *sc);
void sc_drop(struct send_context *sc);
void sc_stop(struct send_context *sc, int bit);
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
pio_release_cb cb, void *arg);
void sc_release_update(struct send_context *sc);
void sc_return_credits(struct send_context *sc);
void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
void sc_add_credit_return_intr(struct send_context *sc);
void sc_del_credit_return_intr(struct send_context *sc);
void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold);
u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize);
void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint);
void sc_wait(struct hfi1_devdata *dd);
void set_pio_integrity(struct send_context *sc);
/* support functions */
void pio_reset_all(struct hfi1_devdata *dd);
void pio_freeze(struct hfi1_devdata *dd);
void pio_kernel_unfreeze(struct hfi1_devdata *dd);
/* global PIO send control operations */
#define PSC_GLOBAL_ENABLE 0
#define PSC_GLOBAL_DISABLE 1
#define PSC_GLOBAL_VLARB_ENABLE 2
#define PSC_GLOBAL_VLARB_DISABLE 3
#define PSC_CM_RESET 4
#define PSC_DATA_VL_ENABLE 5
#define PSC_DATA_VL_DISABLE 6
void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl);
void pio_send_control(struct hfi1_devdata *dd, int op);
/* PIO copy routines */
void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
const void *from, size_t count);
void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
const void *from, size_t nbytes);
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
void seg_pio_copy_end(struct pio_buf *pbuf);
#endif /* _PIO_H */

View File

@ -0,0 +1,858 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "hfi.h"
/* additive distance between non-SOP and SOP space */
#define SOP_DISTANCE (TXE_PIO_SIZE / 2)
#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
/* number of QUADWORDs in a block */
#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
/**
* pio_copy - copy data block to MMIO space
* @pbuf: a number of blocks allocated within a PIO send context
* @pbc: PBC to send
* @from: source, must be 8 byte aligned
* @count: number of DWORD (32-bit) quantities to copy from source
*
* Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
* Must always write full BLOCK_SIZE bytes blocks. The first block must
* be written to the corresponding SOP=1 address.
*
* Known:
* o pbuf->start always starts on a block boundary
* o pbuf can wrap only at a block boundary
*/
void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
const void *from, size_t count)
{
void __iomem *dest = pbuf->start + SOP_DISTANCE;
void __iomem *send = dest + PIO_BLOCK_SIZE;
void __iomem *dend; /* 8-byte data end */
/* write the PBC */
writeq(pbc, dest);
dest += sizeof(u64);
/* calculate where the QWORD data ends - in SOP=1 space */
dend = dest + ((count>>1) * sizeof(u64));
if (dend < send) {
/* all QWORD data is within the SOP block, does *not*
reach the end of the SOP block */
while (dest < dend) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
/*
* No boundary checks are needed here:
* 0. We're not on the SOP block boundary
* 1. The possible DWORD dangle will still be within
* the SOP block
* 2. We cannot wrap except on a block boundary.
*/
} else {
/* QWORD data extends _to_ or beyond the SOP block */
/* write 8-byte SOP chunk data */
while (dest < send) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
/* drop out of the SOP range */
dest -= SOP_DISTANCE;
dend -= SOP_DISTANCE;
/*
* If the wrap comes before or matches the data end,
* copy until until the wrap, then wrap.
*
* If the data ends at the end of the SOP above and
* the buffer wraps, then pbuf->end == dend == dest
* and nothing will get written, but we will wrap in
* case there is a dangling DWORD.
*/
if (pbuf->end <= dend) {
while (dest < pbuf->end) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
while (dest < dend) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
}
/* at this point we have wrapped if we are going to wrap */
/* write dangling u32, if any */
if (count & 1) {
union mix val;
val.val64 = 0;
val.val32[0] = *(u32 *)from;
writeq(val.val64, dest);
dest += sizeof(u64);
}
/* fill in rest of block, no need to check pbuf->end
as we only wrap on a block boundary */
while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
writeq(0, dest);
dest += sizeof(u64);
}
/* finished with this buffer */
atomic_dec(&pbuf->sc->buffers_allocated);
}
/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
#define USE_SHIFTS 1
#ifdef USE_SHIFTS
/*
* Handle carry bytes using shifts and masks.
*
* NOTE: the value the unused portion of carry is expected to always be zero.
*/
/*
* "zero" shift - bit shift used to zero out upper bytes. Input is
* the count of LSB bytes to preserve.
*/
#define zshift(x) (8 * (8-(x)))
/*
* "merge" shift - bit shift used to merge with carry bytes. Input is
* the LSB byte count to move beyond.
*/
#define mshift(x) (8 * (x))
/*
* Read nbytes bytes from "from" and return them in the LSB bytes
* of pbuf->carry. Other bytes are zeroed. Any previous value
* pbuf->carry is lost.
*
* NOTES:
* o do not read from from if nbytes is zero
* o from may _not_ be u64 aligned
* o nbytes must not span a QW boundary
*/
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
unsigned long off;
if (nbytes == 0) {
pbuf->carry.val64 = 0;
} else {
/* align our pointer */
off = (unsigned long)from & 0x7;
from = (void *)((unsigned long)from & ~0x7l);
pbuf->carry.val64 = ((*(u64 *)from)
<< zshift(nbytes + off))/* zero upper bytes */
>> zshift(nbytes); /* place at bottom */
}
pbuf->carry_bytes = nbytes;
}
/*
* Read nbytes bytes from "from" and put them at the next significant bytes
* of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
* read does not overfill carry.
*
* NOTES:
* o from may _not_ be u64 aligned
* o nbytes may span a QW boundary
*/
static inline void read_extra_bytes(struct pio_buf *pbuf,
const void *from, unsigned int nbytes)
{
unsigned long off = (unsigned long)from & 0x7;
unsigned int room, xbytes;
/* align our pointer */
from = (void *)((unsigned long)from & ~0x7l);
/* check count first - don't read anything if count is zero */
while (nbytes) {
/* find the number of bytes in this u64 */
room = 8 - off; /* this u64 has room for this many bytes */
xbytes = nbytes > room ? room : nbytes;
/*
* shift down to zero lower bytes, shift up to zero upper
* bytes, shift back down to move into place
*/
pbuf->carry.val64 |= (((*(u64 *)from)
>> mshift(off))
<< zshift(xbytes))
>> zshift(xbytes+pbuf->carry_bytes);
off = 0;
pbuf->carry_bytes += xbytes;
nbytes -= xbytes;
from += sizeof(u64);
}
}
/*
* Zero extra bytes from the end of pbuf->carry.
*
* NOTES:
* o zbytes <= old_bytes
*/
static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
{
unsigned int remaining;
if (zbytes == 0) /* nothing to do */
return;
remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
/* NOTE: zshift only guaranteed to work if remaining != 0 */
if (remaining)
pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
>> zshift(remaining);
else
pbuf->carry.val64 = 0;
pbuf->carry_bytes = remaining;
}
/*
* Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
* Put the unused part of the next 8 bytes of src into the LSB bytes of
* pbuf->carry with the upper bytes zeroed..
*
* NOTES:
* o result must keep unused bytes zeroed
* o src must be u64 aligned
*/
static inline void merge_write8(
struct pio_buf *pbuf,
void __iomem *dest,
const void *src)
{
u64 new, temp;
new = *(u64 *)src;
temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
writeq(temp, dest);
pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
static inline void carry8_write8(union mix carry, void __iomem *dest)
{
writeq(carry.val64, dest);
}
/*
* Write a quad word using all the valid bytes of carry. If carry
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
{
if (pbuf->carry_bytes) {
/* unused bytes are always kept zeroed, so just write */
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
#else /* USE_SHIFTS */
/*
* Handle carry bytes using byte copies.
*
* NOTE: the value the unused portion of carry is left uninitialized.
*/
/*
* Jump copy - no-loop copy for < 8 bytes.
*/
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
{
switch (n) {
case 7:
*dest++ = *src++;
case 6:
*dest++ = *src++;
case 5:
*dest++ = *src++;
case 4:
*dest++ = *src++;
case 3:
*dest++ = *src++;
case 2:
*dest++ = *src++;
case 1:
*dest++ = *src++;
}
}
/*
* Read nbytes from "from" and and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
* NOTES:
* o do not read from from if nbytes is zero
* o from may _not_ be u64 aligned.
*/
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
}
/*
* Read nbytes bytes from "from" and put them at the end of pbuf->carry.
* It is expected that the extra read does not overfill carry.
*
* NOTES:
* o from may _not_ be u64 aligned
* o nbytes may span a QW boundary
*/
static inline void read_extra_bytes(struct pio_buf *pbuf,
const void *from, unsigned int nbytes)
{
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
pbuf->carry_bytes += nbytes;
}
/*
* Zero extra bytes from the end of pbuf->carry.
*
* We do not care about the value of unused bytes in carry, so just
* reduce the byte count.
*
* NOTES:
* o zbytes <= old_bytes
*/
static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
{
pbuf->carry_bytes -= zbytes;
}
/*
* Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
* Put the unused part of the next 8 bytes of src into the low bytes of
* pbuf->carry.
*/
static inline void merge_write8(
struct pio_buf *pbuf,
void *dest,
const void *src)
{
u32 remainder = 8 - pbuf->carry_bytes;
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
writeq(pbuf->carry.val64, dest);
jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
static inline void carry8_write8(union mix carry, void *dest)
{
writeq(carry.val64, dest);
}
/*
* Write a quad word using all the valid bytes of carry. If carry
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
static inline int carry_write8(struct pio_buf *pbuf, void *dest)
{
if (pbuf->carry_bytes) {
u64 zero = 0;
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
8 - pbuf->carry_bytes);
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
#endif /* USE_SHIFTS */
/*
* Segmented PIO Copy - start
*
* Start a PIO copy.
*
* @pbuf: destination buffer
* @pbc: the PBC for the PIO buffer
* @from: data source, QWORD aligned
* @nbytes: bytes to copy
*/
void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + SOP_DISTANCE;
void __iomem *send = dest + PIO_BLOCK_SIZE;
void __iomem *dend; /* 8-byte data end */
writeq(pbc, dest);
dest += sizeof(u64);
/* calculate where the QWORD data ends - in SOP=1 space */
dend = dest + ((nbytes>>3) * sizeof(u64));
if (dend < send) {
/* all QWORD data is within the SOP block, does *not*
reach the end of the SOP block */
while (dest < dend) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
/*
* No boundary checks are needed here:
* 0. We're not on the SOP block boundary
* 1. The possible DWORD dangle will still be within
* the SOP block
* 2. We cannot wrap except on a block boundary.
*/
} else {
/* QWORD data extends _to_ or beyond the SOP block */
/* write 8-byte SOP chunk data */
while (dest < send) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
/* drop out of the SOP range */
dest -= SOP_DISTANCE;
dend -= SOP_DISTANCE;
/*
* If the wrap comes before or matches the data end,
* copy until until the wrap, then wrap.
*
* If the data ends at the end of the SOP above and
* the buffer wraps, then pbuf->end == dend == dest
* and nothing will get written, but we will wrap in
* case there is a dangling DWORD.
*/
if (pbuf->end <= dend) {
while (dest < pbuf->end) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
while (dest < dend) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
}
/* at this point we have wrapped if we are going to wrap */
/* ...but it doesn't matter as we're done writing */
/* save dangling bytes, if any */
read_low_bytes(pbuf, from, nbytes & 0x7);
pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
}
/*
* Mid copy helper, "mixed case" - source is 64-bit aligned but carry
* bytes are non-zero.
*
* Whole u64s must be written to the chip, so bytes must be manually merged.
*
* @pbuf: destination buffer
* @from: data source, is QWORD aligned.
* @nbytes: bytes to copy
*
* Must handle nbytes < 8.
*/
static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
/* calculate 8-byte data end */
dend = dest + (qw_to_write * sizeof(u64));
if (pbuf->qw_written < PIO_BLOCK_QWS) {
/*
* Still within SOP block. We don't need to check for
* wrap because we are still in the first block and
* can only wrap on block boundaries.
*/
void __iomem *send; /* SOP end */
void __iomem *xend;
/* calculate the end of data or end of block, whichever
comes first */
send = pbuf->start + PIO_BLOCK_SIZE;
xend = send < dend ? send : dend;
/* shift up to SOP=1 space */
dest += SOP_DISTANCE;
xend += SOP_DISTANCE;
/* write 8-byte chunk data */
while (dest < xend) {
merge_write8(pbuf, dest, from);
from += sizeof(u64);
dest += sizeof(u64);
}
/* shift down to SOP=0 space */
dest -= SOP_DISTANCE;
}
/*
* At this point dest could be (either, both, or neither):
* - at dend
* - at the wrap
*/
/*
* If the wrap comes before or matches the data end,
* copy until until the wrap, then wrap.
*
* If dest is at the wrap, we will fall into the if,
* not do the loop, when wrap.
*
* If the data ends at the end of the SOP above and
* the buffer wraps, then pbuf->end == dend == dest
* and nothing will get written.
*/
if (pbuf->end <= dend) {
while (dest < pbuf->end) {
merge_write8(pbuf, dest, from);
from += sizeof(u64);
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
while (dest < dend) {
merge_write8(pbuf, dest, from);
from += sizeof(u64);
dest += sizeof(u64);
}
/* adjust carry */
if (pbuf->carry_bytes < bytes_left) {
/* need to read more */
read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
} else {
/* remove invalid bytes */
zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
}
pbuf->qw_written += qw_to_write;
}
/*
* Mid copy helper, "straight case" - source pointer is 64-bit aligned
* with no carry bytes.
*
* @pbuf: destination buffer
* @from: data source, is QWORD aligned
* @nbytes: bytes to copy
*
* Must handle nbytes < 8.
*/
static void mid_copy_straight(struct pio_buf *pbuf,
const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
/* calculate 8-byte data end */
dend = dest + ((nbytes>>3) * sizeof(u64));
if (pbuf->qw_written < PIO_BLOCK_QWS) {
/*
* Still within SOP block. We don't need to check for
* wrap because we are still in the first block and
* can only wrap on block boundaries.
*/
void __iomem *send; /* SOP end */
void __iomem *xend;
/* calculate the end of data or end of block, whichever
comes first */
send = pbuf->start + PIO_BLOCK_SIZE;
xend = send < dend ? send : dend;
/* shift up to SOP=1 space */
dest += SOP_DISTANCE;
xend += SOP_DISTANCE;
/* write 8-byte chunk data */
while (dest < xend) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
/* shift down to SOP=0 space */
dest -= SOP_DISTANCE;
}
/*
* At this point dest could be (either, both, or neither):
* - at dend
* - at the wrap
*/
/*
* If the wrap comes before or matches the data end,
* copy until until the wrap, then wrap.
*
* If dest is at the wrap, we will fall into the if,
* not do the loop, when wrap.
*
* If the data ends at the end of the SOP above and
* the buffer wraps, then pbuf->end == dend == dest
* and nothing will get written.
*/
if (pbuf->end <= dend) {
while (dest < pbuf->end) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
while (dest < dend) {
writeq(*(u64 *)from, dest);
from += sizeof(u64);
dest += sizeof(u64);
}
/* we know carry_bytes was zero on entry to this routine */
read_low_bytes(pbuf, from, nbytes & 0x7);
pbuf->qw_written += nbytes>>3;
}
/*
* Segmented PIO Copy - middle
*
* Must handle any aligned tail and any aligned source with any byte count.
*
* @pbuf: a number of blocks allocated within a PIO send context
* @from: data source
* @nbytes: number of bytes to copy
*/
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
unsigned long from_align = (unsigned long)from & 0x7;
if (pbuf->carry_bytes + nbytes < 8) {
/* not enough bytes to fill a QW */
read_extra_bytes(pbuf, from, nbytes);
return;
}
if (from_align) {
/* misaligned source pointer - align it */
unsigned long to_align;
/* bytes to read to align "from" */
to_align = 8 - from_align;
/*
* In the advance-to-alignment logic below, we do not need
* to check if we are using more than nbytes. This is because
* if we are here, we already know that carry+nbytes will
* fill at least one QW.
*/
if (pbuf->carry_bytes + to_align < 8) {
/* not enough align bytes to fill a QW */
read_extra_bytes(pbuf, from, to_align);
from += to_align;
nbytes -= to_align;
} else {
/* bytes to fill carry */
unsigned long to_fill = 8 - pbuf->carry_bytes;
/* bytes left over to be read */
unsigned long extra = to_align - to_fill;
void __iomem *dest;
/* fill carry... */
read_extra_bytes(pbuf, from, to_fill);
from += to_fill;
nbytes -= to_fill;
/* ...now write carry */
dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
/*
* The two checks immediately below cannot both be
* true, hence the else. If we have wrapped, we
* cannot still be within the first block.
* Conversely, if we are still in the first block, we
* cannot have wrapped. We do the wrap check first
* as that is more likely.
*/
/* adjust if we've wrapped */
if (dest >= pbuf->end)
dest -= pbuf->size;
/* jump to SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
carry8_write8(pbuf->carry, dest);
pbuf->qw_written++;
/* read any extra bytes to do final alignment */
/* this will overwrite anything in pbuf->carry */
read_low_bytes(pbuf, from, extra);
from += extra;
nbytes -= extra;
}
/* at this point, from is QW aligned */
}
if (pbuf->carry_bytes)
mid_copy_mix(pbuf, from, nbytes);
else
mid_copy_straight(pbuf, from, nbytes);
}
/*
* Segmented PIO Copy - end
*
* Write any remainder (in pbuf->carry) and finish writing the whole block.
*
* @pbuf: a number of blocks allocated within a PIO send context
*/
void seg_pio_copy_end(struct pio_buf *pbuf)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
/*
* The two checks immediately below cannot both be true, hence the
* else. If we have wrapped, we cannot still be within the first
* block. Conversely, if we are still in the first block, we
* cannot have wrapped. We do the wrap check first as that is
* more likely.
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
dest -= pbuf->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
/* write final bytes, if any */
if (carry_write8(pbuf, dest)) {
dest += sizeof(u64);
/*
* NOTE: We do not need to recalculate whether dest needs
* SOP_DISTANCE or not.
*
* If we are in the first block and the dangle write
* keeps us in the same block, dest will need
* to retain SOP_DISTANCE in the loop below.
*
* If we are in the first block and the dangle write pushes
* us to the next block, then loop below will not run
* and dest is not used. Hence we do not need to update
* it.
*
* If we are past the first block, then SOP_DISTANCE
* was never added, so there is nothing to do.
*/
}
/* fill in rest of block */
while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
writeq(0, dest);
dest += sizeof(u64);
}
/* finished with this buffer */
atomic_dec(&pbuf->sc->buffers_allocated);
}

View File

@ -0,0 +1,286 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef __PLATFORM_CONFIG_H
#define __PLATFORM_CONFIG_H
#define METADATA_TABLE_FIELD_START_SHIFT 0
#define METADATA_TABLE_FIELD_START_LEN_BITS 15
#define METADATA_TABLE_FIELD_LEN_SHIFT 16
#define METADATA_TABLE_FIELD_LEN_LEN_BITS 16
/* Header structure */
#define PLATFORM_CONFIG_HEADER_RECORD_IDX_SHIFT 0
#define PLATFORM_CONFIG_HEADER_RECORD_IDX_LEN_BITS 6
#define PLATFORM_CONFIG_HEADER_TABLE_LENGTH_SHIFT 16
#define PLATFORM_CONFIG_HEADER_TABLE_LENGTH_LEN_BITS 12
#define PLATFORM_CONFIG_HEADER_TABLE_TYPE_SHIFT 28
#define PLATFORM_CONFIG_HEADER_TABLE_TYPE_LEN_BITS 4
enum platform_config_table_type_encoding {
PLATFORM_CONFIG_TABLE_RESERVED,
PLATFORM_CONFIG_SYSTEM_TABLE,
PLATFORM_CONFIG_PORT_TABLE,
PLATFORM_CONFIG_RX_PRESET_TABLE,
PLATFORM_CONFIG_TX_PRESET_TABLE,
PLATFORM_CONFIG_QSFP_ATTEN_TABLE,
PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE,
PLATFORM_CONFIG_TABLE_MAX
};
enum platform_config_system_table_fields {
SYSTEM_TABLE_RESERVED,
SYSTEM_TABLE_NODE_STRING,
SYSTEM_TABLE_SYSTEM_IMAGE_GUID,
SYSTEM_TABLE_NODE_GUID,
SYSTEM_TABLE_REVISION,
SYSTEM_TABLE_VENDOR_OUI,
SYSTEM_TABLE_META_VERSION,
SYSTEM_TABLE_DEVICE_ID,
SYSTEM_TABLE_PARTITION_ENFORCEMENT_CAP,
SYSTEM_TABLE_QSFP_POWER_CLASS_MAX,
SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_12G,
SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G,
SYSTEM_TABLE_VARIABLE_TABLE_ENTRIES_PER_PORT,
SYSTEM_TABLE_MAX
};
enum platform_config_port_table_fields {
PORT_TABLE_RESERVED,
PORT_TABLE_PORT_TYPE,
PORT_TABLE_ATTENUATION_12G,
PORT_TABLE_ATTENUATION_25G,
PORT_TABLE_LINK_SPEED_SUPPORTED,
PORT_TABLE_LINK_WIDTH_SUPPORTED,
PORT_TABLE_VL_CAP,
PORT_TABLE_MTU_CAP,
PORT_TABLE_TX_LANE_ENABLE_MASK,
PORT_TABLE_LOCAL_MAX_TIMEOUT,
PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
PORT_TABLE_TX_PRESET_IDX_PASSIVE_CU,
PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
PORT_TABLE_RX_PRESET_IDX,
PORT_TABLE_CABLE_REACH_CLASS,
PORT_TABLE_MAX
};
enum platform_config_rx_preset_table_fields {
RX_PRESET_TABLE_RESERVED,
RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
RX_PRESET_TABLE_QSFP_RX_EQ_APPLY,
RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
RX_PRESET_TABLE_QSFP_RX_CDR,
RX_PRESET_TABLE_QSFP_RX_EQ,
RX_PRESET_TABLE_QSFP_RX_AMP,
RX_PRESET_TABLE_MAX
};
enum platform_config_tx_preset_table_fields {
TX_PRESET_TABLE_RESERVED,
TX_PRESET_TABLE_PRECUR,
TX_PRESET_TABLE_ATTN,
TX_PRESET_TABLE_POSTCUR,
TX_PRESET_TABLE_QSFP_TX_CDR_APPLY,
TX_PRESET_TABLE_QSFP_TX_EQ_APPLY,
TX_PRESET_TABLE_QSFP_TX_CDR,
TX_PRESET_TABLE_QSFP_TX_EQ,
TX_PRESET_TABLE_MAX
};
enum platform_config_qsfp_attn_table_fields {
QSFP_ATTEN_TABLE_RESERVED,
QSFP_ATTEN_TABLE_TX_PRESET_IDX,
QSFP_ATTEN_TABLE_RX_PRESET_IDX,
QSFP_ATTEN_TABLE_MAX
};
enum platform_config_variable_settings_table_fields {
VARIABLE_SETTINGS_TABLE_RESERVED,
VARIABLE_SETTINGS_TABLE_TX_PRESET_IDX,
VARIABLE_SETTINGS_TABLE_RX_PRESET_IDX,
VARIABLE_SETTINGS_TABLE_MAX
};
struct platform_config_data {
u32 *table;
u32 *table_metadata;
u32 num_table;
};
/*
* This struct acts as a quick reference into the platform_data binary image
* and is populated by parse_platform_config(...) depending on the specific
* META_VERSION
*/
struct platform_config_cache {
u8 cache_valid;
struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX];
};
static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
0,
SYSTEM_TABLE_MAX,
PORT_TABLE_MAX,
RX_PRESET_TABLE_MAX,
TX_PRESET_TABLE_MAX,
QSFP_ATTEN_TABLE_MAX,
VARIABLE_SETTINGS_TABLE_MAX
};
/* This section defines default values and encodings for the
* fields defined for each table above
*/
/*=====================================================
* System table encodings
*====================================================*/
#define PLATFORM_CONFIG_MAGIC_NUM 0x3d4f5041
#define PLATFORM_CONFIG_MAGIC_NUMBER_LEN 4
/*
* These power classes are the same as defined in SFF 8636 spec rev 2.4
* describing byte 129 in table 6-16, except enumerated in a different order
*/
enum platform_config_qsfp_power_class_encoding {
QSFP_POWER_CLASS_1 = 1,
QSFP_POWER_CLASS_2,
QSFP_POWER_CLASS_3,
QSFP_POWER_CLASS_4,
QSFP_POWER_CLASS_5,
QSFP_POWER_CLASS_6,
QSFP_POWER_CLASS_7
};
/*=====================================================
* Port table encodings
*==================================================== */
enum platform_config_port_type_encoding {
PORT_TYPE_RESERVED,
PORT_TYPE_DISCONNECTED,
PORT_TYPE_FIXED,
PORT_TYPE_VARIABLE,
PORT_TYPE_QSFP,
PORT_TYPE_MAX
};
enum platform_config_link_speed_supported_encoding {
LINK_SPEED_SUPP_12G = 1,
LINK_SPEED_SUPP_25G,
LINK_SPEED_SUPP_12G_25G,
LINK_SPEED_SUPP_MAX
};
/*
* This is a subset (not strict) of the link downgrades
* supported. The link downgrades supported are expected
* to be supplied to the driver by another entity such as
* the fabric manager
*/
enum platform_config_link_width_supported_encoding {
LINK_WIDTH_SUPP_1X = 1,
LINK_WIDTH_SUPP_2X,
LINK_WIDTH_SUPP_2X_1X,
LINK_WIDTH_SUPP_3X,
LINK_WIDTH_SUPP_3X_1X,
LINK_WIDTH_SUPP_3X_2X,
LINK_WIDTH_SUPP_3X_2X_1X,
LINK_WIDTH_SUPP_4X,
LINK_WIDTH_SUPP_4X_1X,
LINK_WIDTH_SUPP_4X_2X,
LINK_WIDTH_SUPP_4X_2X_1X,
LINK_WIDTH_SUPP_4X_3X,
LINK_WIDTH_SUPP_4X_3X_1X,
LINK_WIDTH_SUPP_4X_3X_2X,
LINK_WIDTH_SUPP_4X_3X_2X_1X,
LINK_WIDTH_SUPP_MAX
};
enum platform_config_virtual_lane_capability_encoding {
VL_CAP_VL0 = 1,
VL_CAP_VL0_1,
VL_CAP_VL0_2,
VL_CAP_VL0_3,
VL_CAP_VL0_4,
VL_CAP_VL0_5,
VL_CAP_VL0_6,
VL_CAP_VL0_7,
VL_CAP_VL0_8,
VL_CAP_VL0_9,
VL_CAP_VL0_10,
VL_CAP_VL0_11,
VL_CAP_VL0_12,
VL_CAP_VL0_13,
VL_CAP_VL0_14,
VL_CAP_MAX
};
/* Max MTU */
enum platform_config_mtu_capability_encoding {
MTU_CAP_256 = 1,
MTU_CAP_512 = 2,
MTU_CAP_1024 = 3,
MTU_CAP_2048 = 4,
MTU_CAP_4096 = 5,
MTU_CAP_8192 = 6,
MTU_CAP_10240 = 7
};
enum platform_config_local_max_timeout_encoding {
LOCAL_MAX_TIMEOUT_10_MS = 1,
LOCAL_MAX_TIMEOUT_100_MS,
LOCAL_MAX_TIMEOUT_1_S,
LOCAL_MAX_TIMEOUT_10_S,
LOCAL_MAX_TIMEOUT_100_S,
LOCAL_MAX_TIMEOUT_1000_S
};
#endif /*__PLATFORM_CONFIG_H*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,235 @@
#ifndef _QP_H
#define _QP_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/hash.h>
#include "verbs.h"
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
/*
* QPN-map pages start out as NULL, they get allocated upon
* first use and are never deallocated. This way,
* large bitmaps are not allocated unless large numbers of QPs are used.
*/
struct qpn_map {
void *page;
};
struct hfi1_qpn_table {
spinlock_t lock; /* protect changes in this struct */
unsigned flags; /* flags for QP0/1 allocated for each port */
u32 last; /* last QP number allocated */
u32 nmaps; /* size of the map table */
u16 limit;
u8 incr;
/* bit map of free QP numbers other than 0/1 */
struct qpn_map map[QPNMAP_ENTRIES];
};
struct hfi1_qp_ibdev {
u32 qp_table_size;
u32 qp_table_bits;
struct hfi1_qp __rcu **qp_table;
spinlock_t qpt_lock;
struct hfi1_qpn_table qpn_table;
};
static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn)
{
return hash_32(qpn, dev->qp_table_bits);
}
/**
* hfi1_lookup_qpn - return the QP with the given QPN
* @ibp: the ibport
* @qpn: the QP number to look up
*
* The caller must hold the rcu_read_lock(), and keep the lock until
* the returned qp is no longer in use.
*/
static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp,
u32 qpn) __must_hold(RCU)
{
struct hfi1_qp *qp = NULL;
if (unlikely(qpn <= 1)) {
qp = rcu_dereference(ibp->qp[qpn]);
} else {
struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
u32 n = qpn_hash(dev->qp_dev, qpn);
for (qp = rcu_dereference(dev->qp_dev->qp_table[n]); qp;
qp = rcu_dereference(qp->next))
if (qp->ibqp.qp_num == qpn)
break;
}
return qp;
}
/**
* hfi1_error_qp - put a QP into the error state
* @qp: the QP to put into the error state
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* Returns true if last WQE event should be generated.
* The QP r_lock and s_lock should be held and interrupts disabled.
* If we are already in error state, just return.
*/
int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err);
/**
* hfi1_modify_qp - modify the attributes of a queue pair
* @ibqp: the queue pair who's attributes we're modifying
* @attr: the new attributes
* @attr_mask: the mask of attributes to modify
* @udata: user data for libibverbs.so
*
* Returns 0 on success, otherwise returns an errno.
*/
int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
/**
* hfi1_compute_aeth - compute the AETH (syndrome + MSN)
* @qp: the queue pair to compute the AETH for
*
* Returns the AETH.
*/
__be32 hfi1_compute_aeth(struct hfi1_qp *qp);
/**
* hfi1_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
* @udata: user data for libibverbs.so
*
* Returns the queue pair on success, otherwise returns an errno.
*
* Called by the ib_create_qp() core verbs function.
*/
struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
/**
* hfi1_destroy_qp - destroy a queue pair
* @ibqp: the queue pair to destroy
*
* Returns 0 on success.
*
* Note that this can be called while the QP is actively sending or
* receiving!
*/
int hfi1_destroy_qp(struct ib_qp *ibqp);
/**
* hfi1_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
*/
void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth);
/**
* hfi1_qp_init - allocate QP tables
* @dev: a pointer to the hfi1_ibdev
*/
int hfi1_qp_init(struct hfi1_ibdev *dev);
/**
* hfi1_qp_exit - free the QP related structures
* @dev: a pointer to the hfi1_ibdev
*/
void hfi1_qp_exit(struct hfi1_ibdev *dev);
/**
* hfi1_qp_waitup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
*/
void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag);
struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5);
struct qp_iter;
/**
* qp_iter_init - wake up on the indicated event
* @dev: the hfi1_ibdev
*/
struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev);
/**
* qp_iter_next - wakeup on the indicated event
* @iter: the iterator for the qp hash list
*/
int qp_iter_next(struct qp_iter *iter);
/**
* qp_iter_next - wake up on the indicated event
* @s: the seq_file to emit the qp information on
* @iter: the iterator for the qp hash list
*/
void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
/**
* qp_comm_est - handle trap with QP established
* @qp: the QP
*/
void qp_comm_est(struct hfi1_qp *qp);
#endif /* _QP_H */

View File

@ -0,0 +1,546 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include "hfi.h"
#include "twsi.h"
/*
* QSFP support for hfi driver, using "Two Wire Serial Interface" driver
* in twsi.c
*/
#define I2C_MAX_RETRY 4
/*
* Unlocked i2c write. Must hold dd->qsfp_i2c_mutex.
*/
static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
int ret, cnt;
u8 *buff = bp;
/* Make sure TWSI bus is in sane state. */
ret = hfi1_twsi_reset(dd, target);
if (ret) {
hfi1_dev_porterr(dd, ppd->port,
"I2C interface Reset for write failed\n");
return -EIO;
}
cnt = 0;
while (cnt < len) {
int wlen = len - cnt;
ret = hfi1_twsi_blk_wr(dd, target, i2c_addr, offset,
buff + cnt, wlen);
if (ret) {
/* hfi1_twsi_blk_wr() 1 for error, else 0 */
return -EIO;
}
offset += wlen;
cnt += wlen;
}
/* Must wait min 20us between qsfp i2c transactions */
udelay(20);
return cnt;
}
int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
int ret;
ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
if (!ret) {
ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
mutex_unlock(&dd->qsfp_i2c_mutex);
}
return ret;
}
/*
* Unlocked i2c read. Must hold dd->qsfp_i2c_mutex.
*/
static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
int ret, cnt, pass = 0;
int stuck = 0;
u8 *buff = bp;
/* Make sure TWSI bus is in sane state. */
ret = hfi1_twsi_reset(dd, target);
if (ret) {
hfi1_dev_porterr(dd, ppd->port,
"I2C interface Reset for read failed\n");
ret = -EIO;
stuck = 1;
goto exit;
}
cnt = 0;
while (cnt < len) {
int rlen = len - cnt;
ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
buff + cnt, rlen);
/* Some QSFP's fail first try. Retry as experiment */
if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
continue;
if (ret) {
/* hfi1_twsi_blk_rd() 1 for error, else 0 */
ret = -EIO;
goto exit;
}
offset += rlen;
cnt += rlen;
}
ret = cnt;
exit:
if (stuck)
dd_dev_err(dd, "I2C interface bus stuck non-idle\n");
if (pass >= I2C_MAX_RETRY && ret)
hfi1_dev_porterr(dd, ppd->port,
"I2C failed even retrying\n");
else if (pass)
hfi1_dev_porterr(dd, ppd->port, "I2C retries: %d\n", pass);
/* Must wait min 20us between qsfp i2c transactions */
udelay(20);
return ret;
}
int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
int ret;
ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
if (!ret) {
ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
mutex_unlock(&dd->qsfp_i2c_mutex);
}
return ret;
}
int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
{
int count = 0;
int offset;
int nwrite;
int ret;
u8 page;
ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
if (ret)
return ret;
while (count < len) {
/*
* Set the qsfp page based on a zero-based addresss
* and a page size of QSFP_PAGESIZE bytes.
*/
page = (u8)(addr / QSFP_PAGESIZE);
ret = __i2c_write(ppd, target, QSFP_DEV,
QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
if (ret != 1) {
hfi1_dev_porterr(
ppd->dd,
ppd->port,
"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
ret = -EIO;
break;
}
/* truncate write to end of page if crossing page boundary */
offset = addr % QSFP_PAGESIZE;
nwrite = len - count;
if ((offset + nwrite) > QSFP_PAGESIZE)
nwrite = QSFP_PAGESIZE - offset;
ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count,
nwrite);
if (ret <= 0) /* stop on error or nothing read */
break;
count += ret;
addr += ret;
}
mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
if (ret < 0)
return ret;
return count;
}
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
{
int count = 0;
int offset;
int nread;
int ret;
u8 page;
ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
if (ret)
return ret;
while (count < len) {
/*
* Set the qsfp page based on a zero-based address
* and a page size of QSFP_PAGESIZE bytes.
*/
page = (u8)(addr / QSFP_PAGESIZE);
ret = __i2c_write(ppd, target, QSFP_DEV,
QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
if (ret != 1) {
hfi1_dev_porterr(
ppd->dd,
ppd->port,
"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
ret = -EIO;
break;
}
/* truncate read to end of page if crossing page boundary */
offset = addr % QSFP_PAGESIZE;
nread = len - count;
if ((offset + nread) > QSFP_PAGESIZE)
nread = QSFP_PAGESIZE - offset;
ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count,
nread);
if (ret <= 0) /* stop on error or nothing read */
break;
count += ret;
addr += ret;
}
mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
if (ret < 0)
return ret;
return count;
}
/*
* This function caches the QSFP memory range in 128 byte chunks.
* As an example, the next byte after address 255 is byte 128 from
* upper page 01H (if existing) rather than byte 0 from lower page 00H.
*/
int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
{
u32 target = ppd->dd->hfi1_id;
int ret;
unsigned long flags;
u8 *cache = &cp->cache[0];
/* ensure sane contents on invalid reads, for cable swaps */
memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
dd_dev_info(ppd->dd, "%s: called\n", __func__);
if (!qsfp_mod_present(ppd)) {
ret = -ENODEV;
goto bail;
}
ret = qsfp_read(ppd, target, 0, cache, 256);
if (ret != 256) {
dd_dev_info(ppd->dd,
"%s: Read of pages 00H failed, expected 256, got %d\n",
__func__, ret);
goto bail;
}
if (cache[0] != 0x0C && cache[0] != 0x0D)
goto bail;
/* Is paging enabled? */
if (!(cache[2] & 4)) {
/* Paging enabled, page 03 required */
if ((cache[195] & 0xC0) == 0xC0) {
/* all */
ret = qsfp_read(ppd, target, 384, cache + 256, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 640, cache + 384, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
} else if ((cache[195] & 0x80) == 0x80) {
/* only page 2 and 3 */
ret = qsfp_read(ppd, target, 640, cache + 384, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
} else if ((cache[195] & 0x40) == 0x40) {
/* only page 1 and 3 */
ret = qsfp_read(ppd, target, 384, cache + 256, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
} else {
/* only page 3 */
ret = qsfp_read(ppd, target, 896, cache + 512, 128);
if (ret <= 0 || ret != 128) {
dd_dev_info(ppd->dd, "%s: failed\n", __func__);
goto bail;
}
}
}
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.cache_valid = 1;
ppd->qsfp_info.cache_refresh_required = 0;
spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
return 0;
bail:
memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
return ret;
}
const char * const hfi1_qsfp_devtech[16] = {
"850nm VCSEL", "1310nm VCSEL", "1550nm VCSEL", "1310nm FP",
"1310nm DFB", "1550nm DFB", "1310nm EML", "1550nm EML",
"Cu Misc", "1490nm DFB", "Cu NoEq", "Cu Eq",
"Undef", "Cu Active BothEq", "Cu FarEq", "Cu NearEq"
};
#define QSFP_DUMP_CHUNK 16 /* Holds longest string */
#define QSFP_DEFAULT_HDR_CNT 224
static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
int qsfp_mod_present(struct hfi1_pportdata *ppd)
{
if (HFI1_CAP_IS_KSET(QSFP_ENABLED)) {
struct hfi1_devdata *dd = ppd->dd;
u64 reg;
reg = read_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
return !(reg & QSFP_HFI0_MODPRST_N);
}
/* always return cable present */
return 1;
}
/*
* This function maps QSFP memory addresses in 128 byte chunks in the following
* fashion per the CableInfo SMA query definition in the IBA 1.3 spec/OPA Gen 1
* spec
* For addr 000-127, lower page 00h
* For addr 128-255, upper page 00h
* For addr 256-383, upper page 01h
* For addr 384-511, upper page 02h
* For addr 512-639, upper page 03h
*
* For addresses beyond this range, it returns the invalid range of data buffer
* set to 0.
* For upper pages that are optional, if they are not valid, returns the
* particular range of bytes in the data buffer set to 0.
*/
int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
u8 *data)
{
struct hfi1_pportdata *ppd;
u32 excess_len = 0;
int ret = 0;
if (port_num > dd->num_pports || port_num < 1) {
dd_dev_info(dd, "%s: Invalid port number %d\n",
__func__, port_num);
ret = -EINVAL;
goto set_zeroes;
}
ppd = dd->pport + (port_num - 1);
if (!qsfp_mod_present(ppd)) {
ret = -ENODEV;
goto set_zeroes;
}
if (!ppd->qsfp_info.cache_valid) {
ret = -EINVAL;
goto set_zeroes;
}
if (addr >= (QSFP_MAX_NUM_PAGES * 128)) {
ret = -ERANGE;
goto set_zeroes;
}
if ((addr + len) > (QSFP_MAX_NUM_PAGES * 128)) {
excess_len = (addr + len) - (QSFP_MAX_NUM_PAGES * 128);
memcpy(data, &ppd->qsfp_info.cache[addr], (len - excess_len));
data += (len - excess_len);
goto set_zeroes;
}
memcpy(data, &ppd->qsfp_info.cache[addr], len);
return 0;
set_zeroes:
memset(data, 0, excess_len);
return ret;
}
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
{
u8 *cache = &ppd->qsfp_info.cache[0];
u8 bin_buff[QSFP_DUMP_CHUNK];
char lenstr[6];
int sofar, ret;
int bidx = 0;
u8 *atten = &cache[QSFP_ATTEN_OFFS];
u8 *vendor_oui = &cache[QSFP_VOUI_OFFS];
sofar = 0;
lenstr[0] = ' ';
lenstr[1] = '\0';
if (ppd->qsfp_info.cache_valid) {
if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
pwr_codes +
(QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]) * 4));
sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n",
lenstr,
hfi1_qsfp_devtech[(cache[QSFP_MOD_TECH_OFFS]) >> 4]);
sofar += scnprintf(buf + sofar, len - sofar, "Vendor:%.*s\n",
QSFP_VEND_LEN, &cache[QSFP_VEND_OFFS]);
sofar += scnprintf(buf + sofar, len - sofar, "OUI:%06X\n",
QSFP_OUI(vendor_oui));
sofar += scnprintf(buf + sofar, len - sofar, "Part#:%.*s\n",
QSFP_PN_LEN, &cache[QSFP_PN_OFFS]);
sofar += scnprintf(buf + sofar, len - sofar, "Rev:%.*s\n",
QSFP_REV_LEN, &cache[QSFP_REV_OFFS]);
if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
sofar += scnprintf(buf + sofar, len - sofar,
"Atten:%d, %d\n",
QSFP_ATTEN_SDR(atten),
QSFP_ATTEN_DDR(atten));
sofar += scnprintf(buf + sofar, len - sofar, "Serial:%.*s\n",
QSFP_SN_LEN, &cache[QSFP_SN_OFFS]);
sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
QSFP_DATE_LEN, &cache[QSFP_DATE_OFFS]);
sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
QSFP_LOT_LEN, &cache[QSFP_LOT_OFFS]);
while (bidx < QSFP_DEFAULT_HDR_CNT) {
int iidx;
memcpy(bin_buff, &cache[bidx], QSFP_DUMP_CHUNK);
for (iidx = 0; iidx < QSFP_DUMP_CHUNK; ++iidx) {
sofar += scnprintf(buf + sofar, len-sofar,
" %02X", bin_buff[iidx]);
}
sofar += scnprintf(buf + sofar, len - sofar, "\n");
bidx += QSFP_DUMP_CHUNK;
}
}
ret = sofar;
return ret;
}

View File

@ -0,0 +1,222 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
/* QSFP support common definitions, for hfi driver */
#define QSFP_DEV 0xA0
#define QSFP_PWR_LAG_MSEC 2000
#define QSFP_MODPRS_LAG_MSEC 20
/* 128 byte pages, per SFF 8636 rev 2.4 */
#define QSFP_MAX_NUM_PAGES 5
/*
* Below are masks for QSFP pins. Pins are the same for HFI0 and HFI1.
* _N means asserted low
*/
#define QSFP_HFI0_I2CCLK (1 << 0)
#define QSFP_HFI0_I2CDAT (1 << 1)
#define QSFP_HFI0_RESET_N (1 << 2)
#define QSFP_HFI0_INT_N (1 << 3)
#define QSFP_HFI0_MODPRST_N (1 << 4)
/* QSFP is paged at 256 bytes */
#define QSFP_PAGESIZE 256
/* Defined fields that Intel requires of qualified cables */
/* Byte 0 is Identifier, not checked */
/* Byte 1 is reserved "status MSB" */
/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
/*
* Rest of first 128 not used, although 127 is reserved for page select
* if module is not "Flat memory".
*/
#define QSFP_PAGE_SELECT_BYTE_OFFS 127
/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
#define QSFP_MOD_ID_OFFS 128
/*
* Byte 129 is "Extended Identifier". We only care about D7,D6: Power class
* 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
*/
#define QSFP_MOD_PWR_OFFS 129
/* Byte 130 is Connector type. Not Intel req'd */
/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
/* Byte 139 is encoding. code 0x01 is 8b10b. Not Intel req'd */
/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not Intel req'd */
/* Byte 141 is Extended Rate Select. Not Intel req'd */
/* Bytes 142..145 are lengths for various fiber types. Not Intel req'd */
/* Byte 146 is length for Copper. Units of 1 meter */
#define QSFP_MOD_LEN_OFFS 146
/*
* Byte 147 is Device technology. D0..3 not Intel req'd
* D4..7 select from 15 choices, translated by table:
*/
#define QSFP_MOD_TECH_OFFS 147
extern const char *const hfi1_qsfp_devtech[16];
/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
/* Attenuation should be valid for copper other than full/near Eq */
#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
/* Length is only valid if technology is "copper" */
#define QSFP_IS_CU(tech) ((0xED00 >> ((tech) >> 4)) & 1)
#define QSFP_TECH_1490 9
#define QSFP_OUI(oui) (((unsigned)oui[0] << 16) | ((unsigned)oui[1] << 8) | \
oui[2])
#define QSFP_OUI_AMPHENOL 0x415048
#define QSFP_OUI_FINISAR 0x009065
#define QSFP_OUI_GORE 0x002177
/* Bytes 148..163 are Vendor Name, Left-justified Blank-filled */
#define QSFP_VEND_OFFS 148
#define QSFP_VEND_LEN 16
/* Byte 164 is IB Extended transceiver codes Bits D0..3 are SDR,DDR,QDR,EDR */
#define QSFP_IBXCV_OFFS 164
/* Bytes 165..167 are Vendor OUI number */
#define QSFP_VOUI_OFFS 165
#define QSFP_VOUI_LEN 3
/* Bytes 168..183 are Vendor Part Number, string */
#define QSFP_PN_OFFS 168
#define QSFP_PN_LEN 16
/* Bytes 184,185 are Vendor Rev. Left Justified, Blank-filled */
#define QSFP_REV_OFFS 184
#define QSFP_REV_LEN 2
/*
* Bytes 186,187 are Wavelength, if Optical. Not Intel req'd
* If copper, they are attenuation in dB:
* Byte 186 is at 2.5Gb/sec (SDR), Byte 187 at 5.0Gb/sec (DDR)
*/
#define QSFP_ATTEN_OFFS 186
#define QSFP_ATTEN_LEN 2
/* Bytes 188,189 are Wavelength tolerance, not Intel req'd */
/* Byte 190 is Max Case Temp. Not Intel req'd */
/* Byte 191 is LSB of sum of bytes 128..190. Not Intel req'd */
#define QSFP_CC_OFFS 191
/* Bytes 192..195 are Options implemented in qsfp. Not Intel req'd */
/* Bytes 196..211 are Serial Number, String */
#define QSFP_SN_OFFS 196
#define QSFP_SN_LEN 16
/* Bytes 212..219 are date-code YYMMDD (MM==1 for Jan) */
#define QSFP_DATE_OFFS 212
#define QSFP_DATE_LEN 6
/* Bytes 218,219 are optional lot-code, string */
#define QSFP_LOT_OFFS 218
#define QSFP_LOT_LEN 2
/* Bytes 220, 221 indicate monitoring options, Not Intel req'd */
/* Byte 223 is LSB of sum of bytes 192..222 */
#define QSFP_CC_EXT_OFFS 223
/*
* Interrupt flag masks
*/
#define QSFP_DATA_NOT_READY 0x01
#define QSFP_HIGH_TEMP_ALARM 0x80
#define QSFP_LOW_TEMP_ALARM 0x40
#define QSFP_HIGH_TEMP_WARNING 0x20
#define QSFP_LOW_TEMP_WARNING 0x10
#define QSFP_HIGH_VCC_ALARM 0x80
#define QSFP_LOW_VCC_ALARM 0x40
#define QSFP_HIGH_VCC_WARNING 0x20
#define QSFP_LOW_VCC_WARNING 0x10
#define QSFP_HIGH_POWER_ALARM 0x88
#define QSFP_LOW_POWER_ALARM 0x44
#define QSFP_HIGH_POWER_WARNING 0x22
#define QSFP_LOW_POWER_WARNING 0x11
#define QSFP_HIGH_BIAS_ALARM 0x88
#define QSFP_LOW_BIAS_ALARM 0x44
#define QSFP_HIGH_BIAS_WARNING 0x22
#define QSFP_LOW_BIAS_WARNING 0x11
/*
* struct qsfp_data encapsulates state of QSFP device for one port.
* it will be part of port-specific data if a board supports QSFP.
*
* Since multiple board-types use QSFP, and their pport_data structs
* differ (in the chip-specific section), we need a pointer to its head.
*
* Avoiding premature optimization, we will have one work_struct per port,
* and let the qsfp_lock arbitrate access to common resources.
*
*/
#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
struct qsfp_data {
/* Helps to find our way */
struct hfi1_pportdata *ppd;
struct work_struct qsfp_work;
u8 cache[QSFP_MAX_NUM_PAGES*128];
spinlock_t qsfp_lock;
u8 check_interrupt_flags;
u8 qsfp_interrupt_functional;
u8 cache_valid;
u8 cache_refresh_required;
};
int refresh_qsfp_cache(struct hfi1_pportdata *ppd,
struct qsfp_data *cp);
int qsfp_mod_present(struct hfi1_pportdata *ppd);
int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr,
u32 len, u8 *data);
int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len);
int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len);
int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,948 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/spinlock.h>
#include "hfi.h"
#include "mad.h"
#include "qp.h"
#include "sdma.h"
/*
* Convert the AETH RNR timeout code into the number of microseconds.
*/
const u32 ib_hfi1_rnr_table[32] = {
655360, /* 00: 655.36 */
10, /* 01: .01 */
20, /* 02 .02 */
30, /* 03: .03 */
40, /* 04: .04 */
60, /* 05: .06 */
80, /* 06: .08 */
120, /* 07: .12 */
160, /* 08: .16 */
240, /* 09: .24 */
320, /* 0A: .32 */
480, /* 0B: .48 */
640, /* 0C: .64 */
960, /* 0D: .96 */
1280, /* 0E: 1.28 */
1920, /* 0F: 1.92 */
2560, /* 10: 2.56 */
3840, /* 11: 3.84 */
5120, /* 12: 5.12 */
7680, /* 13: 7.68 */
10240, /* 14: 10.24 */
15360, /* 15: 15.36 */
20480, /* 16: 20.48 */
30720, /* 17: 30.72 */
40960, /* 18: 40.96 */
61440, /* 19: 61.44 */
81920, /* 1A: 81.92 */
122880, /* 1B: 122.88 */
163840, /* 1C: 163.84 */
245760, /* 1D: 245.76 */
327680, /* 1E: 327.68 */
491520 /* 1F: 491.52 */
};
/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
{
int i, j, ret;
struct ib_wc wc;
struct hfi1_lkey_table *rkt;
struct hfi1_pd *pd;
struct hfi1_sge_state *ss;
rkt = &to_idev(qp->ibqp.device)->lk_table;
pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
ss = &qp->r_sge;
ss->sg_list = qp->r_sg_list;
qp->r_len = 0;
for (i = j = 0; i < wqe->num_sge; i++) {
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
if (!hfi1_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
j++;
}
ss->num_sge = j;
ss->total_len = qp->r_len;
ret = 1;
goto bail;
bad_lkey:
while (j) {
struct hfi1_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
hfi1_put_mr(sge->mr);
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr_id;
wc.status = IB_WC_LOC_PROT_ERR;
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
/* Signal solicited completion event. */
hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
bail:
return ret;
}
/**
* hfi1_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
* @wr_id_only: update qp->r_wr_id only, not qp->r_sge
*
* Return -1 if there is a local error, 0 if no RWQE is available,
* otherwise return 1.
*
* Can be called from interrupt level.
*/
int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
{
unsigned long flags;
struct hfi1_rq *rq;
struct hfi1_rwq *wq;
struct hfi1_srq *srq;
struct hfi1_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
u32 tail;
int ret;
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
srq = NULL;
handler = NULL;
rq = &qp->r_rq;
}
spin_lock_irqsave(&rq->lock, flags);
if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
if (tail >= rq->size)
tail = 0;
if (unlikely(tail == wq->head)) {
ret = 0;
goto unlock;
}
/* Make sure entry is read after head index is read. */
smp_rmb();
wqe = get_rwqe_ptr(rq, tail);
/*
* Even though we update the tail index in memory, the verbs
* consumer is not supposed to post more entries until a
* completion is generated.
*/
if (++tail >= rq->size)
tail = 0;
wq->tail = tail;
if (!wr_id_only && !init_sge(qp, wqe)) {
ret = -1;
goto unlock;
}
qp->r_wr_id = wqe->wr_id;
ret = 1;
set_bit(HFI1_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
/*
* Validate head pointer value and compute
* the number of remaining WQEs.
*/
n = wq->head;
if (n >= rq->size)
n = 0;
if (n < tail)
n += rq->size - tail;
else
n -= tail;
if (n < srq->limit) {
struct ib_event ev;
srq->limit = 0;
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
handler(&ev, srq->ibsrq.srq_context);
goto bail;
}
}
unlock:
spin_unlock_irqrestore(&rq->lock, flags);
bail:
return ret;
}
/*
* Switch to alternate path.
* The QP s_lock should be held and interrupts disabled.
*/
void hfi1_migrate_qp(struct hfi1_qp *qp)
{
struct ib_event ev;
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
qp->port_num = qp->alt_ah_attr.port_num;
qp->s_pkey_index = qp->s_alt_pkey_index;
qp->s_flags |= HFI1_S_AHG_CLEAR;
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_PATH_MIG;
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
{
if (!index) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
return cpu_to_be64(ppd->guid);
}
return ibp->guids[index - 1];
}
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
(gid->global.subnet_prefix == gid_prefix ||
gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
}
/*
*
* This should be called with the QP r_lock held.
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
int has_grh, struct hfi1_qp *qp, u32 bth0)
{
__be64 guid;
unsigned long flags;
u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
goto err;
} else {
if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
goto err;
guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
qp->alt_ah_attr.grh.dgid.global.interface_id))
goto err;
}
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
sc5, be16_to_cpu(hdr->lrh[3])))) {
hfi1_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
(u16)bth0,
(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
0, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
goto err;
} else {
if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
goto err;
guid = get_sguid(ibp,
qp->remote_ah_attr.grh.sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
qp->remote_ah_attr.grh.dgid.global.interface_id))
goto err;
}
if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
sc5, be16_to_cpu(hdr->lrh[3])))) {
hfi1_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
(u16)bth0,
(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
0, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
!(bth0 & IB_BTH_MIG_REQ))
qp->s_mig_state = IB_MIG_ARMED;
}
return 0;
err:
return 1;
}
/**
* ruc_loopback - handle UC and RC loopback requests
* @sqp: the sending QP
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
* Note that although we are single threaded due to the tasklet, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
static void ruc_loopback(struct hfi1_qp *sqp)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_qp *qp;
struct hfi1_swqe *wqe;
struct hfi1_sge *sge;
unsigned long flags;
struct ib_wc wc;
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
int release;
int ret;
rcu_read_lock();
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn);
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((sqp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT)) ||
!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
goto unlock;
sqp->s_flags |= HFI1_S_BUSY;
again:
if (sqp->s_last == sqp->s_head)
goto clr_busy;
wqe = get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work request. */
if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
if (!(ib_hfi1_state_ops[sqp->state] & HFI1_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
* We increment s_cur to indicate s_last is in progress.
*/
if (sqp->s_last == sqp->s_cur) {
if (++sqp->s_cur >= sqp->s_size)
sqp->s_cur = 0;
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) ||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
ibp->n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
*/
if (sqp->ibqp.qp_type == IB_QPT_RC)
send_status = IB_WC_RETRY_EXC_ERR;
else
send_status = IB_WC_SUCCESS;
goto serr;
}
memset(&wc, 0, sizeof(wc));
send_status = IB_WC_SUCCESS;
release = 1;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
ret = hfi1_get_rwqe(qp, 0);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
ret = hfi1_get_rwqe(qp, 1);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
if (wqe->length == 0)
break;
if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
qp->r_sge.num_sge = 1;
qp->r_sge.total_len = wqe->length;
break;
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = 0;
sqp->s_sge.sg_list = NULL;
sqp->s_sge.num_sge = 1;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
qp->r_sge.total_len = wqe->length;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->wr.wr.atomic.remote_addr,
wqe->wr.wr.atomic.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = wqe->wr.wr.atomic.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->wr.wr.atomic.swap);
hfi1_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
default:
send_status = IB_WC_LOC_QP_OP_ERR;
goto serr;
}
sge = &sqp->s_sge.sge;
while (sqp->s_len) {
u32 len = sqp->s_len;
if (len > sge->length)
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
hfi1_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (++sge->n >= HFI1_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
sqp->s_len -= len;
}
if (release)
hfi1_put_ss(&qp->r_sge);
if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
ibp->n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
hfi1_send_complete(sqp, wqe, send_status);
goto again;
rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
ibp->n_rnr_naks++;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
*/
if (sqp->s_rnr_retry == 0) {
send_status = IB_WC_RNR_RETRY_EXC_ERR;
goto serr;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK))
goto clr_busy;
sqp->s_flags |= HFI1_S_WAIT_RNR;
sqp->s_timer.function = hfi1_rc_rnr_retry;
sqp->s_timer.expires = jiffies +
usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]);
add_timer(&sqp->s_timer);
goto clr_busy;
op_err:
send_status = IB_WC_REM_OP_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
acc_err:
send_status = IB_WC_REM_ACCESS_ERR;
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
hfi1_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
hfi1_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
sqp->s_flags &= ~HFI1_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = sqp->ibqp.device;
ev.element.qp = &sqp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
}
goto done;
}
clr_busy:
sqp->s_flags &= ~HFI1_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
rcu_read_unlock();
}
/**
* hfi1_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
* @grh: the global route address to send to
* @hwords: the number of 32 bit words of header being sent
* @nwords: the number of 32 bit words of data being sent
*
* Return the size of the header in 32 bit words.
*/
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
(grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
(grh->flow_label << IB_GRH_FLOW_SHIFT));
hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
/* next_hdr is defined by C8-7 in ch. 8.4.1 */
hdr->next_hdr = IB_GRH_NEXT_HDR;
hdr->hop_limit = grh->hop_limit;
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
hdr->sgid.global.interface_id =
grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
ibp->guids[grh->sgid_index - 1] :
cpu_to_be64(ppd_from_ibp(ibp)->guid);
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
return sizeof(struct ib_grh) / sizeof(u32);
}
/*
* free_ahg - clear ahg from QP
*/
void clear_ahg(struct hfi1_qp *qp)
{
qp->s_hdr->ahgcount = 0;
qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
if (qp->s_sde)
sdma_ahg_free(qp->s_sde, qp->s_ahgidx);
qp->s_ahgidx = -1;
qp->s_sde = NULL;
}
#define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4)
/**
* build_ahg - create ahg in s_hdr
* @qp: a pointer to QP
* @npsn: the next PSN for the request/response
*
* This routine handles the AHG by allocating an ahg entry and causing the
* copy of the first middle.
*
* Subsequent middles use the copied entry, editing the
* PSN with 1 or 2 edits.
*/
static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
{
if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
clear_ahg(qp);
if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
/* first middle that needs copy */
if (qp->s_ahgidx < 0) {
if (!qp->s_sde)
qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
qp->s_ahgidx = sdma_ahg_alloc(qp->s_sde);
}
if (qp->s_ahgidx >= 0) {
qp->s_ahgpsn = npsn;
qp->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
/* save to protect a change in another thread */
qp->s_hdr->sde = qp->s_sde;
qp->s_hdr->ahgidx = qp->s_ahgidx;
qp->s_flags |= HFI1_S_AHG_VALID;
}
} else {
/* subsequent middle after valid */
if (qp->s_ahgidx >= 0) {
qp->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
qp->s_hdr->ahgidx = qp->s_ahgidx;
qp->s_hdr->ahgcount++;
qp->s_hdr->ahgdesc[0] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16((u16)npsn),
BTH2_OFFSET,
16,
16);
if ((npsn & 0xffff0000) !=
(qp->s_ahgpsn & 0xffff0000)) {
qp->s_hdr->ahgcount++;
qp->s_hdr->ahgdesc[1] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16(
(u16)(npsn >> 16)),
BTH2_OFFSET,
0,
16);
}
}
}
}
void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
u32 bth0, u32 bth2, int middle)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
u16 lrh0;
u32 nwords;
u32 extra_bytes;
u8 sc5;
u32 bth1;
/* Construct the header. */
extra_bytes = -qp->s_cur_size & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
&qp->remote_ah_attr.grh,
qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
middle = 0;
}
sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
qp->s_sc = sc5;
/*
* reset s_hdr/AHG fields
*
* This insures that the ahgentry/ahgcount
* are at a non-AHG default to protect
* build_verbs_tx_desc() from using
* an include ahgidx.
*
* build_ahg() will modify as appropriate
* to use the AHG feature.
*/
qp->s_hdr->tx_flags = 0;
qp->s_hdr->ahgcount = 0;
qp->s_hdr->ahgidx = 0;
qp->s_hdr->sde = NULL;
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
else
middle = 0;
if (middle)
build_ahg(qp, bth2);
else
qp->s_flags &= ~HFI1_S_AHG_VALID;
qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr->ibh.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
qp->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
qp->remote_ah_attr.src_path_bits);
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
bth1 = qp->remote_qpn;
if (qp->s_flags & HFI1_S_ECN) {
qp->s_flags &= ~HFI1_S_ECN;
/* we recently received a FECN, so return a BECN */
bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
}
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
}
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP (tasklet).
* Otherwise, two threads could send packets out of order.
*/
void hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
int (*make_req)(struct hfi1_qp *qp);
unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
!loopback &&
(qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) {
ruc_loopback(qp);
return;
}
if (qp->ibqp.qp_type == IB_QPT_RC)
make_req = hfi1_make_rc_req;
else if (qp->ibqp.qp_type == IB_QPT_UC)
make_req = hfi1_make_uc_req;
else
make_req = hfi1_make_ud_req;
spin_lock_irqsave(&qp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
return;
}
qp->s_flags |= HFI1_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
do {
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
/*
* If the packet cannot be sent now, return and
* the send tasklet will be woken up later.
*/
if (hfi1_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size))
break;
/* Record that s_hdr is empty. */
qp->s_hdrwords = 0;
}
} while (make_req(qp));
}
/*
* This should be called with s_lock held.
*/
void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
unsigned i;
if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
return;
for (i = 0; i < wqe->wr.num_sge; i++) {
struct hfi1_sge *sge = &wqe->sg_list[i];
hfi1_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
wc.qp = &qp->ibqp;
if (status == IB_WC_SUCCESS)
wc.byte_len = wqe->length;
hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
status != IB_WC_SUCCESS);
}
last = qp->s_last;
old_last = last;
if (++last >= qp->s_size)
last = 0;
qp->s_last = last;
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
qp->s_cur = last;
if (qp->s_tail == old_last)
qp->s_tail = last;
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
qp->s_draining = 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,397 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "verbs.h"
/**
* hfi1_post_srq_receive - post a receive on a shared receive queue
* @ibsrq: the SRQ to post the receive on
* @wr: the list of work requests to post
* @bad_wr: A pointer to the first WR to cause a problem is put here
*
* This may be called from interrupt context.
*/
int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct hfi1_srq *srq = to_isrq(ibsrq);
struct hfi1_rwq *wq;
unsigned long flags;
int ret;
for (; wr; wr = wr->next) {
struct hfi1_rwqe *wqe;
u32 next;
int i;
if ((unsigned) wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
ret = -EINVAL;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
wq = srq->rq.wq;
next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
if (next == wq->tail) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
wqe = get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
/* Make sure queue entry is written before the head index. */
smp_wmb();
wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
bail:
return ret;
}
/**
* hfi1_create_srq - create a shared receive queue
* @ibpd: the protection domain of the SRQ to create
* @srq_init_attr: the attributes of the SRQ
* @udata: data from libibverbs when creating a user SRQ
*/
struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
{
struct hfi1_ibdev *dev = to_idev(ibpd->device);
struct hfi1_srq *srq;
u32 sz;
struct ib_srq *ret;
if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
ret = ERR_PTR(-ENOSYS);
goto done;
}
if (srq_init_attr->attr.max_sge == 0 ||
srq_init_attr->attr.max_sge > hfi1_max_srq_sges ||
srq_init_attr->attr.max_wr == 0 ||
srq_init_attr->attr.max_wr > hfi1_max_srq_wrs) {
ret = ERR_PTR(-EINVAL);
goto done;
}
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
if (!srq) {
ret = ERR_PTR(-ENOMEM);
goto done;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
srq->rq.size = srq_init_attr->attr.max_wr + 1;
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct hfi1_rwqe);
srq->rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + srq->rq.size * sz);
if (!srq->rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_srq;
}
/*
* Return the address of the RWQ as the offset to mmap.
* See hfi1_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
u32 s = sizeof(struct hfi1_rwq) + srq->rq.size * sz;
srq->ip =
hfi1_create_mmap_info(dev, s, ibpd->uobject->context,
srq->rq.wq);
if (!srq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wq;
}
err = ib_copy_to_udata(udata, &srq->ip->offset,
sizeof(srq->ip->offset));
if (err) {
ret = ERR_PTR(err);
goto bail_ip;
}
} else
srq->ip = NULL;
/*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
srq->rq.wq->head = 0;
srq->rq.wq->tail = 0;
srq->limit = srq_init_attr->attr.srq_limit;
spin_lock(&dev->n_srqs_lock);
if (dev->n_srqs_allocated == hfi1_max_srqs) {
spin_unlock(&dev->n_srqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
dev->n_srqs_allocated++;
spin_unlock(&dev->n_srqs_lock);
if (srq->ip) {
spin_lock_irq(&dev->pending_lock);
list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
ret = &srq->ibsrq;
goto done;
bail_ip:
kfree(srq->ip);
bail_wq:
vfree(srq->rq.wq);
bail_srq:
kfree(srq);
done:
return ret;
}
/**
* hfi1_modify_srq - modify a shared receive queue
* @ibsrq: the SRQ to modify
* @attr: the new attributes of the SRQ
* @attr_mask: indicates which attributes to modify
* @udata: user data for libibverbs.so
*/
int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask,
struct ib_udata *udata)
{
struct hfi1_srq *srq = to_isrq(ibsrq);
struct hfi1_rwq *wq;
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
struct hfi1_rwq *owq;
struct hfi1_rwqe *p;
u32 sz, size, n, head, tail;
/* Check that the requested sizes are below the limits. */
if ((attr->max_wr > hfi1_max_srq_wrs) ||
((attr_mask & IB_SRQ_LIMIT) ?
attr->srq_limit : srq->limit) > attr->max_wr) {
ret = -EINVAL;
goto bail;
}
sz = sizeof(struct hfi1_rwqe) +
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
wq = vmalloc_user(sizeof(struct hfi1_rwq) + size * sz);
if (!wq) {
ret = -ENOMEM;
goto bail;
}
/* Check that we can write the offset to mmap. */
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 offset_addr;
__u64 offset = 0;
ret = ib_copy_from_udata(&offset_addr, udata,
sizeof(offset_addr));
if (ret)
goto bail_free;
udata->outbuf =
(void __user *) (unsigned long) offset_addr;
ret = ib_copy_to_udata(udata, &offset,
sizeof(offset));
if (ret)
goto bail_free;
}
spin_lock_irq(&srq->rq.lock);
/*
* validate head and tail pointer values and compute
* the number of remaining WQEs.
*/
owq = srq->rq.wq;
head = owq->head;
tail = owq->tail;
if (head >= srq->rq.size || tail >= srq->rq.size) {
ret = -EINVAL;
goto bail_unlock;
}
n = head;
if (n < tail)
n += srq->rq.size - tail;
else
n -= tail;
if (size <= n) {
ret = -EINVAL;
goto bail_unlock;
}
n = 0;
p = wq->wq;
while (tail != head) {
struct hfi1_rwqe *wqe;
int i;
wqe = get_rwqe_ptr(&srq->rq, tail);
p->wr_id = wqe->wr_id;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
p = (struct hfi1_rwqe *)((char *)p + sz);
if (++tail >= srq->rq.size)
tail = 0;
}
srq->rq.wq = wq;
srq->rq.size = size;
wq->head = n;
wq->tail = 0;
if (attr_mask & IB_SRQ_LIMIT)
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
vfree(owq);
if (srq->ip) {
struct hfi1_mmap_info *ip = srq->ip;
struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device);
u32 s = sizeof(struct hfi1_rwq) + size * sz;
hfi1_update_mmap_info(dev, ip, s, wq);
/*
* Return the offset to mmap.
* See hfi1_mmap() for details.
*/
if (udata && udata->inlen >= sizeof(__u64)) {
ret = ib_copy_to_udata(udata, &ip->offset,
sizeof(ip->offset));
if (ret)
goto bail;
}
/*
* Put user mapping info onto the pending list
* unless it already is on the list.
*/
spin_lock_irq(&dev->pending_lock);
if (list_empty(&ip->pending_mmaps))
list_add(&ip->pending_mmaps,
&dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
spin_lock_irq(&srq->rq.lock);
if (attr->srq_limit >= srq->rq.size)
ret = -EINVAL;
else
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
}
goto bail;
bail_unlock:
spin_unlock_irq(&srq->rq.lock);
bail_free:
vfree(wq);
bail:
return ret;
}
int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
struct hfi1_srq *srq = to_isrq(ibsrq);
attr->max_wr = srq->rq.size - 1;
attr->max_sge = srq->rq.max_sge;
attr->srq_limit = srq->limit;
return 0;
}
/**
* hfi1_destroy_srq - destroy a shared receive queue
* @ibsrq: the SRQ to destroy
*/
int hfi1_destroy_srq(struct ib_srq *ibsrq)
{
struct hfi1_srq *srq = to_isrq(ibsrq);
struct hfi1_ibdev *dev = to_idev(ibsrq->device);
spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
spin_unlock(&dev->n_srqs_lock);
if (srq->ip)
kref_put(&srq->ip->ref, hfi1_release_mmap_info);
else
vfree(srq->rq.wq);
kfree(srq);
return 0;
}

View File

@ -0,0 +1,739 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/ctype.h>
#include "hfi.h"
#include "mad.h"
#include "trace.h"
/*
* Start of per-port congestion control structures and support code
*/
/*
* Congestion control table size followed by table entries
*/
static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
int ret;
struct hfi1_pportdata *ppd =
container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
struct cc_state *cc_state;
ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+ sizeof(__be16);
if (pos > ret)
return -EINVAL;
if (count > ret - pos)
count = ret - pos;
if (!count)
return count;
rcu_read_lock();
cc_state = get_cc_state(ppd);
if (cc_state == NULL) {
rcu_read_unlock();
return -EINVAL;
}
memcpy(buf, &cc_state->cct, count);
rcu_read_unlock();
return count;
}
static void port_release(struct kobject *kobj)
{
/* nothing to do since memory is freed by hfi1_free_devdata() */
}
static struct kobj_type port_cc_ktype = {
.release = port_release,
};
static struct bin_attribute cc_table_bin_attr = {
.attr = {.name = "cc_table_bin", .mode = 0444},
.read = read_cc_table_bin,
.size = PAGE_SIZE,
};
/*
* Congestion settings: port control, control map and an array of 16
* entries for the congestion entries - increase, timer, event log
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
int ret;
struct hfi1_pportdata *ppd =
container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
struct cc_state *cc_state;
ret = sizeof(struct opa_congestion_setting_attr_shadow);
if (pos > ret)
return -EINVAL;
if (count > ret - pos)
count = ret - pos;
if (!count)
return count;
rcu_read_lock();
cc_state = get_cc_state(ppd);
if (cc_state == NULL) {
rcu_read_unlock();
return -EINVAL;
}
memcpy(buf, &cc_state->cong_setting, count);
rcu_read_unlock();
return count;
}
static struct bin_attribute cc_setting_bin_attr = {
.attr = {.name = "cc_settings_bin", .mode = 0444},
.read = read_cc_setting_bin,
.size = PAGE_SIZE,
};
/* Start sc2vl */
#define HFI1_SC2VL_ATTR(N) \
static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
.attr = { .name = __stringify(N), .mode = 0444 }, \
.sc = N \
}
struct hfi1_sc2vl_attr {
struct attribute attr;
int sc;
};
HFI1_SC2VL_ATTR(0);
HFI1_SC2VL_ATTR(1);
HFI1_SC2VL_ATTR(2);
HFI1_SC2VL_ATTR(3);
HFI1_SC2VL_ATTR(4);
HFI1_SC2VL_ATTR(5);
HFI1_SC2VL_ATTR(6);
HFI1_SC2VL_ATTR(7);
HFI1_SC2VL_ATTR(8);
HFI1_SC2VL_ATTR(9);
HFI1_SC2VL_ATTR(10);
HFI1_SC2VL_ATTR(11);
HFI1_SC2VL_ATTR(12);
HFI1_SC2VL_ATTR(13);
HFI1_SC2VL_ATTR(14);
HFI1_SC2VL_ATTR(15);
HFI1_SC2VL_ATTR(16);
HFI1_SC2VL_ATTR(17);
HFI1_SC2VL_ATTR(18);
HFI1_SC2VL_ATTR(19);
HFI1_SC2VL_ATTR(20);
HFI1_SC2VL_ATTR(21);
HFI1_SC2VL_ATTR(22);
HFI1_SC2VL_ATTR(23);
HFI1_SC2VL_ATTR(24);
HFI1_SC2VL_ATTR(25);
HFI1_SC2VL_ATTR(26);
HFI1_SC2VL_ATTR(27);
HFI1_SC2VL_ATTR(28);
HFI1_SC2VL_ATTR(29);
HFI1_SC2VL_ATTR(30);
HFI1_SC2VL_ATTR(31);
static struct attribute *sc2vl_default_attributes[] = {
&hfi1_sc2vl_attr_0.attr,
&hfi1_sc2vl_attr_1.attr,
&hfi1_sc2vl_attr_2.attr,
&hfi1_sc2vl_attr_3.attr,
&hfi1_sc2vl_attr_4.attr,
&hfi1_sc2vl_attr_5.attr,
&hfi1_sc2vl_attr_6.attr,
&hfi1_sc2vl_attr_7.attr,
&hfi1_sc2vl_attr_8.attr,
&hfi1_sc2vl_attr_9.attr,
&hfi1_sc2vl_attr_10.attr,
&hfi1_sc2vl_attr_11.attr,
&hfi1_sc2vl_attr_12.attr,
&hfi1_sc2vl_attr_13.attr,
&hfi1_sc2vl_attr_14.attr,
&hfi1_sc2vl_attr_15.attr,
&hfi1_sc2vl_attr_16.attr,
&hfi1_sc2vl_attr_17.attr,
&hfi1_sc2vl_attr_18.attr,
&hfi1_sc2vl_attr_19.attr,
&hfi1_sc2vl_attr_20.attr,
&hfi1_sc2vl_attr_21.attr,
&hfi1_sc2vl_attr_22.attr,
&hfi1_sc2vl_attr_23.attr,
&hfi1_sc2vl_attr_24.attr,
&hfi1_sc2vl_attr_25.attr,
&hfi1_sc2vl_attr_26.attr,
&hfi1_sc2vl_attr_27.attr,
&hfi1_sc2vl_attr_28.attr,
&hfi1_sc2vl_attr_29.attr,
&hfi1_sc2vl_attr_30.attr,
&hfi1_sc2vl_attr_31.attr,
NULL
};
static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct hfi1_sc2vl_attr *sattr =
container_of(attr, struct hfi1_sc2vl_attr, attr);
struct hfi1_pportdata *ppd =
container_of(kobj, struct hfi1_pportdata, sc2vl_kobj);
struct hfi1_devdata *dd = ppd->dd;
return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
}
static const struct sysfs_ops hfi1_sc2vl_ops = {
.show = sc2vl_attr_show,
};
static struct kobj_type hfi1_sc2vl_ktype = {
.release = port_release,
.sysfs_ops = &hfi1_sc2vl_ops,
.default_attrs = sc2vl_default_attributes
};
/* End sc2vl */
/* Start sl2sc */
#define HFI1_SL2SC_ATTR(N) \
static struct hfi1_sl2sc_attr hfi1_sl2sc_attr_##N = { \
.attr = { .name = __stringify(N), .mode = 0444 }, \
.sl = N \
}
struct hfi1_sl2sc_attr {
struct attribute attr;
int sl;
};
HFI1_SL2SC_ATTR(0);
HFI1_SL2SC_ATTR(1);
HFI1_SL2SC_ATTR(2);
HFI1_SL2SC_ATTR(3);
HFI1_SL2SC_ATTR(4);
HFI1_SL2SC_ATTR(5);
HFI1_SL2SC_ATTR(6);
HFI1_SL2SC_ATTR(7);
HFI1_SL2SC_ATTR(8);
HFI1_SL2SC_ATTR(9);
HFI1_SL2SC_ATTR(10);
HFI1_SL2SC_ATTR(11);
HFI1_SL2SC_ATTR(12);
HFI1_SL2SC_ATTR(13);
HFI1_SL2SC_ATTR(14);
HFI1_SL2SC_ATTR(15);
HFI1_SL2SC_ATTR(16);
HFI1_SL2SC_ATTR(17);
HFI1_SL2SC_ATTR(18);
HFI1_SL2SC_ATTR(19);
HFI1_SL2SC_ATTR(20);
HFI1_SL2SC_ATTR(21);
HFI1_SL2SC_ATTR(22);
HFI1_SL2SC_ATTR(23);
HFI1_SL2SC_ATTR(24);
HFI1_SL2SC_ATTR(25);
HFI1_SL2SC_ATTR(26);
HFI1_SL2SC_ATTR(27);
HFI1_SL2SC_ATTR(28);
HFI1_SL2SC_ATTR(29);
HFI1_SL2SC_ATTR(30);
HFI1_SL2SC_ATTR(31);
static struct attribute *sl2sc_default_attributes[] = {
&hfi1_sl2sc_attr_0.attr,
&hfi1_sl2sc_attr_1.attr,
&hfi1_sl2sc_attr_2.attr,
&hfi1_sl2sc_attr_3.attr,
&hfi1_sl2sc_attr_4.attr,
&hfi1_sl2sc_attr_5.attr,
&hfi1_sl2sc_attr_6.attr,
&hfi1_sl2sc_attr_7.attr,
&hfi1_sl2sc_attr_8.attr,
&hfi1_sl2sc_attr_9.attr,
&hfi1_sl2sc_attr_10.attr,
&hfi1_sl2sc_attr_11.attr,
&hfi1_sl2sc_attr_12.attr,
&hfi1_sl2sc_attr_13.attr,
&hfi1_sl2sc_attr_14.attr,
&hfi1_sl2sc_attr_15.attr,
&hfi1_sl2sc_attr_16.attr,
&hfi1_sl2sc_attr_17.attr,
&hfi1_sl2sc_attr_18.attr,
&hfi1_sl2sc_attr_19.attr,
&hfi1_sl2sc_attr_20.attr,
&hfi1_sl2sc_attr_21.attr,
&hfi1_sl2sc_attr_22.attr,
&hfi1_sl2sc_attr_23.attr,
&hfi1_sl2sc_attr_24.attr,
&hfi1_sl2sc_attr_25.attr,
&hfi1_sl2sc_attr_26.attr,
&hfi1_sl2sc_attr_27.attr,
&hfi1_sl2sc_attr_28.attr,
&hfi1_sl2sc_attr_29.attr,
&hfi1_sl2sc_attr_30.attr,
&hfi1_sl2sc_attr_31.attr,
NULL
};
static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct hfi1_sl2sc_attr *sattr =
container_of(attr, struct hfi1_sl2sc_attr, attr);
struct hfi1_pportdata *ppd =
container_of(kobj, struct hfi1_pportdata, sl2sc_kobj);
struct hfi1_ibport *ibp = &ppd->ibport_data;
return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
}
static const struct sysfs_ops hfi1_sl2sc_ops = {
.show = sl2sc_attr_show,
};
static struct kobj_type hfi1_sl2sc_ktype = {
.release = port_release,
.sysfs_ops = &hfi1_sl2sc_ops,
.default_attrs = sl2sc_default_attributes
};
/* End sl2sc */
/* Start vl2mtu */
#define HFI1_VL2MTU_ATTR(N) \
static struct hfi1_vl2mtu_attr hfi1_vl2mtu_attr_##N = { \
.attr = { .name = __stringify(N), .mode = 0444 }, \
.vl = N \
}
struct hfi1_vl2mtu_attr {
struct attribute attr;
int vl;
};
HFI1_VL2MTU_ATTR(0);
HFI1_VL2MTU_ATTR(1);
HFI1_VL2MTU_ATTR(2);
HFI1_VL2MTU_ATTR(3);
HFI1_VL2MTU_ATTR(4);
HFI1_VL2MTU_ATTR(5);
HFI1_VL2MTU_ATTR(6);
HFI1_VL2MTU_ATTR(7);
HFI1_VL2MTU_ATTR(8);
HFI1_VL2MTU_ATTR(9);
HFI1_VL2MTU_ATTR(10);
HFI1_VL2MTU_ATTR(11);
HFI1_VL2MTU_ATTR(12);
HFI1_VL2MTU_ATTR(13);
HFI1_VL2MTU_ATTR(14);
HFI1_VL2MTU_ATTR(15);
static struct attribute *vl2mtu_default_attributes[] = {
&hfi1_vl2mtu_attr_0.attr,
&hfi1_vl2mtu_attr_1.attr,
&hfi1_vl2mtu_attr_2.attr,
&hfi1_vl2mtu_attr_3.attr,
&hfi1_vl2mtu_attr_4.attr,
&hfi1_vl2mtu_attr_5.attr,
&hfi1_vl2mtu_attr_6.attr,
&hfi1_vl2mtu_attr_7.attr,
&hfi1_vl2mtu_attr_8.attr,
&hfi1_vl2mtu_attr_9.attr,
&hfi1_vl2mtu_attr_10.attr,
&hfi1_vl2mtu_attr_11.attr,
&hfi1_vl2mtu_attr_12.attr,
&hfi1_vl2mtu_attr_13.attr,
&hfi1_vl2mtu_attr_14.attr,
&hfi1_vl2mtu_attr_15.attr,
NULL
};
static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct hfi1_vl2mtu_attr *vlattr =
container_of(attr, struct hfi1_vl2mtu_attr, attr);
struct hfi1_pportdata *ppd =
container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj);
struct hfi1_devdata *dd = ppd->dd;
return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu);
}
static const struct sysfs_ops hfi1_vl2mtu_ops = {
.show = vl2mtu_attr_show,
};
static struct kobj_type hfi1_vl2mtu_ktype = {
.release = port_release,
.sysfs_ops = &hfi1_vl2mtu_ops,
.default_attrs = vl2mtu_default_attributes
};
/* end of per-port file structures and support code */
/*
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
if (!dd->boardname)
ret = -EINVAL;
else
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
static ssize_t show_boardversion(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
static ssize_t show_nctxts(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/*
* Return the smaller of send and receive contexts.
* Normally, user level applications would require both a send
* and a receive context, so returning the smaller of the two counts
* give a more accurate picture of total contexts available.
*/
return scnprintf(buf, PAGE_SIZE, "%u\n",
min(dd->num_rcv_contexts - dd->first_user_ctxt,
(u32)dd->sc_sizes[SC_USER].count));
}
static ssize_t show_nfreectxts(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
static ssize_t show_serial(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
}
static ssize_t store_chip_reset(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
if (count < 5 || memcmp(buf, "reset", 5) || !dd->diag_client) {
ret = -EINVAL;
goto bail;
}
ret = hfi1_reset_device(dd->unit);
bail:
return ret < 0 ? ret : count;
}
/*
* Convert the reported temperature from an integer (reported in
* units of 0.25C) to a floating point number.
*/
#define temp2str(temp, buf, size, idx) \
scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \
((temp) >> 2), ((temp) & 0x3) * 25)
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
*/
static ssize_t show_tempsense(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, ibdev.dev);
struct hfi1_devdata *dd = dd_from_dev(dev);
struct hfi1_temp temp;
int ret = -ENXIO;
ret = hfi1_tempsense_rd(dd, &temp);
if (!ret) {
int idx = 0;
idx += temp2str(temp.curr, buf, PAGE_SIZE, idx);
idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx);
idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx);
idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx);
idx += scnprintf(buf + idx, PAGE_SIZE - idx,
"%u %u %u\n", temp.triggers & 0x1,
temp.triggers & 0x2, temp.triggers & 0x4);
ret = idx;
}
return ret;
}
/*
* end of per-unit (or driver, in some cases, but replicated
* per unit) functions
*/
/* start of per-unit file structures and support code */
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
static struct device_attribute *hfi1_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_board_id,
&dev_attr_nctxts,
&dev_attr_nfreectxts,
&dev_attr_serial,
&dev_attr_boardversion,
&dev_attr_tempsense,
&dev_attr_chip_reset,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
struct kobject *kobj)
{
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
int ret;
if (!port_num || port_num > dd->num_pports) {
dd_dev_err(dd,
"Skipping infiniband class with invalid port %u\n",
port_num);
return -ENODEV;
}
ppd = &dd->pport[port_num - 1];
ret = kobject_init_and_add(&ppd->sc2vl_kobj, &hfi1_sc2vl_ktype, kobj,
"sc2vl");
if (ret) {
dd_dev_err(dd,
"Skipping sc2vl sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail;
}
kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD);
ret = kobject_init_and_add(&ppd->sl2sc_kobj, &hfi1_sl2sc_ktype, kobj,
"sl2sc");
if (ret) {
dd_dev_err(dd,
"Skipping sl2sc sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_sc2vl;
}
kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD);
ret = kobject_init_and_add(&ppd->vl2mtu_kobj, &hfi1_vl2mtu_ktype, kobj,
"vl2mtu");
if (ret) {
dd_dev_err(dd,
"Skipping vl2mtu sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_sl2sc;
}
kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
kobj, "CCMgtA");
if (ret) {
dd_dev_err(dd,
"Skipping Congestion Control sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_vl2mtu;
}
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
&cc_setting_bin_attr);
if (ret) {
dd_dev_err(dd,
"Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_cc;
}
ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
&cc_table_bin_attr);
if (ret) {
dd_dev_err(dd,
"Skipping Congestion Control table sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_cc_entry_bin;
}
dd_dev_info(dd,
"IB%u: Congestion Control Agent enabled for port %d\n",
dd->unit, port_num);
return 0;
bail_cc_entry_bin:
sysfs_remove_bin_file(&ppd->pport_cc_kobj,
&cc_setting_bin_attr);
bail_cc:
kobject_put(&ppd->pport_cc_kobj);
bail_vl2mtu:
kobject_put(&ppd->vl2mtu_kobj);
bail_sl2sc:
kobject_put(&ppd->sl2sc_kobj);
bail_sc2vl:
kobject_put(&ppd->sc2vl_kobj);
bail:
return ret;
}
/*
* Register and create our files in /sys/class/infiniband.
*/
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
{
struct ib_device *dev = &dd->verbs_dev.ibdev;
int i, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
if (ret)
goto bail;
}
return 0;
bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
return ret;
}
/*
* Unregister and remove our files in /sys/class/infiniband.
*/
void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd;
int i;
for (i = 0; i < dd->num_pports; i++) {
ppd = &dd->pport[i];
sysfs_remove_bin_file(&ppd->pport_cc_kobj,
&cc_setting_bin_attr);
sysfs_remove_bin_file(&ppd->pport_cc_kobj,
&cc_table_bin_attr);
kobject_put(&ppd->pport_cc_kobj);
kobject_put(&ppd->vl2mtu_kobj);
kobject_put(&ppd->sl2sc_kobj);
kobject_put(&ppd->sc2vl_kobj);
}
}

View File

@ -0,0 +1,221 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
{
struct hfi1_other_headers *ohdr;
u8 opcode;
u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
if (lnh == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
#define IMM_PRN "imm %d"
#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
#define AETH_PRN "aeth syn 0x%.2x msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
#define ATOMICACKETH_PRN "origdata %lld"
#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
static u64 ib_u64_get(__be32 *p)
{
return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]);
}
const char *parse_everbs_hdrs(
struct trace_seq *p,
u8 opcode,
void *ehdrs)
{
union ib_ehdrs *eh = ehdrs;
const char *ret = trace_seq_buffer_ptr(p);
switch (opcode) {
/* imm */
case OP(RC, SEND_LAST_WITH_IMMEDIATE):
case OP(UC, SEND_LAST_WITH_IMMEDIATE):
case OP(RC, SEND_ONLY_WITH_IMMEDIATE):
case OP(UC, SEND_ONLY_WITH_IMMEDIATE):
case OP(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
trace_seq_printf(p, IMM_PRN,
be32_to_cpu(eh->imm_data));
break;
/* reth + imm */
case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, RETH_PRN " " IMM_PRN,
(unsigned long long)ib_u64_get(
(__be32 *)&eh->rc.reth.vaddr),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length),
be32_to_cpu(eh->rc.imm_data));
break;
/* reth */
case OP(RC, RDMA_READ_REQUEST):
case OP(RC, RDMA_WRITE_FIRST):
case OP(UC, RDMA_WRITE_FIRST):
case OP(RC, RDMA_WRITE_ONLY):
case OP(UC, RDMA_WRITE_ONLY):
trace_seq_printf(p, RETH_PRN,
(unsigned long long)ib_u64_get(
(__be32 *)&eh->rc.reth.vaddr),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length));
break;
case OP(RC, RDMA_READ_RESPONSE_FIRST):
case OP(RC, RDMA_READ_RESPONSE_LAST):
case OP(RC, RDMA_READ_RESPONSE_ONLY):
case OP(RC, ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN,
be32_to_cpu(eh->aeth) >> 24,
be32_to_cpu(eh->aeth) & HFI1_QPN_MASK);
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
(be32_to_cpu(eh->at.aeth) >> 24) & 0xff,
be32_to_cpu(eh->at.aeth) & HFI1_QPN_MASK,
(unsigned long long)ib_u64_get(eh->at.atomic_ack_eth));
break;
/* atomiceth */
case OP(RC, COMPARE_SWAP):
case OP(RC, FETCH_ADD):
trace_seq_printf(p, ATOMICETH_PRN,
(unsigned long long)ib_u64_get(eh->atomic_eth.vaddr),
eh->atomic_eth.rkey,
(unsigned long long)ib_u64_get(
(__be32 *)&eh->atomic_eth.swap_data),
(unsigned long long) ib_u64_get(
(__be32 *)&eh->atomic_eth.compare_data));
break;
/* deth */
case OP(UD, SEND_ONLY):
case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, DETH_PRN,
be32_to_cpu(eh->ud.deth[0]),
be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK);
break;
}
trace_seq_putc(p, 0);
return ret;
}
const char *parse_sdma_flags(
struct trace_seq *p,
u64 desc0, u64 desc1)
{
const char *ret = trace_seq_buffer_ptr(p);
char flags[5] = { 'x', 'x', 'x', 'x', 0 };
flags[0] = (desc1 & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
flags[1] = (desc1 & SDMA_DESC1_HEAD_TO_HOST_FLAG) ? 'H' : '-';
flags[2] = (desc0 & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
flags[3] = (desc0 & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
trace_seq_printf(p, "%s", flags);
if (desc0 & SDMA_DESC0_FIRST_DESC_FLAG)
trace_seq_printf(p, " amode:%u aidx:%u alen:%u",
(u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT)
& SDMA_DESC1_HEADER_MODE_MASK),
(u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT)
& SDMA_DESC1_HEADER_INDEX_MASK),
(u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT)
& SDMA_DESC1_HEADER_DWS_MASK));
return ret;
}
const char *print_u32_array(
struct trace_seq *p,
u32 *arr, int len)
{
int i;
const char *ret = trace_seq_buffer_ptr(p);
for (i = 0; i < len ; i++)
trace_seq_printf(p, "%s%#x", i == 0 ? "" : " ", arr[i]);
trace_seq_putc(p, 0);
return ret;
}
const char *print_u64_array(
struct trace_seq *p,
u64 *arr, int len)
{
int i;
const char *ret = trace_seq_buffer_ptr(p);
for (i = 0; i < len; i++)
trace_seq_printf(p, "%s0x%016llx", i == 0 ? "" : " ", arr[i]);
trace_seq_putc(p, 0);
return ret;
}
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
__hfi1_trace_fn(SDMA);
__hfi1_trace_fn(LINKVERB);
__hfi1_trace_fn(DEBUG);
__hfi1_trace_fn(SNOOP);
__hfi1_trace_fn(CNTR);
__hfi1_trace_fn(PIO);
__hfi1_trace_fn(DC8051);
__hfi1_trace_fn(FIRMWARE);
__hfi1_trace_fn(RCVCTRL);
__hfi1_trace_fn(TID);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,518 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include "hfi.h"
#include "twsi.h"
/*
* "Two Wire Serial Interface" support.
*
* Originally written for a not-quite-i2c serial eeprom, which is
* still used on some supported boards. Later boards have added a
* variety of other uses, most board-specific, so the bit-boffing
* part has been split off to this file, while the other parts
* have been moved to chip-specific files.
*
* We have also dropped all pretense of fully generic (e.g. pretend
* we don't know whether '1' is the higher voltage) interface, as
* the restrictions of the generic i2c interface (e.g. no access from
* driver itself) make it unsuitable for this use.
*/
#define READ_CMD 1
#define WRITE_CMD 0
/**
* i2c_wait_for_writes - wait for a write
* @dd: the hfi1_ib device
*
* We use this instead of udelay directly, so we can make sure
* that previous register writes have been flushed all the way
* to the chip. Since we are delaying anyway, the cost doesn't
* hurt, and makes the bit twiddling more regular
*/
static void i2c_wait_for_writes(struct hfi1_devdata *dd, u32 target)
{
/*
* implicit read of EXTStatus is as good as explicit
* read of scratch, if all we want to do is flush
* writes.
*/
hfi1_gpio_mod(dd, target, 0, 0, 0);
rmb(); /* inlined, so prevent compiler reordering */
}
/*
* QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that
* for "almost compliant" modules
*/
#define SCL_WAIT_USEC 1000
/* BUF_WAIT is time bus must be free between STOP or ACK and to next START.
* Should be 20, but some chips need more.
*/
#define TWSI_BUF_WAIT_USEC 60
static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
{
u32 mask;
udelay(1);
mask = QSFP_HFI0_I2CCLK;
/* SCL is meant to be bare-drain, so never set "OUT", just DIR */
hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask);
/*
* Allow for slow slaves by simple
* delay for falling edge, sampling on rise.
*/
if (!bit)
udelay(2);
else {
int rise_usec;
for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
if (mask & hfi1_gpio_mod(dd, target, 0, 0, 0))
break;
udelay(2);
}
if (rise_usec <= 0)
dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
SCL_WAIT_USEC);
}
i2c_wait_for_writes(dd, target);
}
static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
{
u32 mask;
mask = QSFP_HFI0_I2CDAT;
/* SDA is meant to be bare-drain, so never set "OUT", just DIR */
hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask);
i2c_wait_for_writes(dd, target);
udelay(2);
}
static u8 sda_in(struct hfi1_devdata *dd, u32 target, int wait)
{
u32 read_val, mask;
mask = QSFP_HFI0_I2CDAT;
/* SDA is meant to be bare-drain, so never set "OUT", just DIR */
hfi1_gpio_mod(dd, target, 0, 0, mask);
read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
if (wait)
i2c_wait_for_writes(dd, target);
return (read_val & mask) >> GPIO_SDA_NUM;
}
/**
* i2c_ackrcv - see if ack following write is true
* @dd: the hfi1_ib device
*/
static int i2c_ackrcv(struct hfi1_devdata *dd, u32 target)
{
u8 ack_received;
/* AT ENTRY SCL = LOW */
/* change direction, ignore data */
ack_received = sda_in(dd, target, 1);
scl_out(dd, target, 1);
ack_received = sda_in(dd, target, 1) == 0;
scl_out(dd, target, 0);
return ack_received;
}
static void stop_cmd(struct hfi1_devdata *dd, u32 target);
/**
* rd_byte - read a byte, sending STOP on last, else ACK
* @dd: the hfi1_ib device
*
* Returns byte shifted out of device
*/
static int rd_byte(struct hfi1_devdata *dd, u32 target, int last)
{
int bit_cntr, data;
data = 0;
for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
data <<= 1;
scl_out(dd, target, 1);
data |= sda_in(dd, target, 0);
scl_out(dd, target, 0);
}
if (last) {
scl_out(dd, target, 1);
stop_cmd(dd, target);
} else {
sda_out(dd, target, 0);
scl_out(dd, target, 1);
scl_out(dd, target, 0);
sda_out(dd, target, 1);
}
return data;
}
/**
* wr_byte - write a byte, one bit at a time
* @dd: the hfi1_ib device
* @data: the byte to write
*
* Returns 0 if we got the following ack, otherwise 1
*/
static int wr_byte(struct hfi1_devdata *dd, u32 target, u8 data)
{
int bit_cntr;
u8 bit;
for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
bit = (data >> bit_cntr) & 1;
sda_out(dd, target, bit);
scl_out(dd, target, 1);
scl_out(dd, target, 0);
}
return (!i2c_ackrcv(dd, target)) ? 1 : 0;
}
/*
* issue TWSI start sequence:
* (both clock/data high, clock high, data low while clock is high)
*/
static void start_seq(struct hfi1_devdata *dd, u32 target)
{
sda_out(dd, target, 1);
scl_out(dd, target, 1);
sda_out(dd, target, 0);
udelay(1);
scl_out(dd, target, 0);
}
/**
* stop_seq - transmit the stop sequence
* @dd: the hfi1_ib device
*
* (both clock/data low, clock high, data high while clock is high)
*/
static void stop_seq(struct hfi1_devdata *dd, u32 target)
{
scl_out(dd, target, 0);
sda_out(dd, target, 0);
scl_out(dd, target, 1);
sda_out(dd, target, 1);
}
/**
* stop_cmd - transmit the stop condition
* @dd: the hfi1_ib device
*
* (both clock/data low, clock high, data high while clock is high)
*/
static void stop_cmd(struct hfi1_devdata *dd, u32 target)
{
stop_seq(dd, target);
udelay(TWSI_BUF_WAIT_USEC);
}
/**
* hfi1_twsi_reset - reset I2C communication
* @dd: the hfi1_ib device
*/
int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
{
int clock_cycles_left = 9;
int was_high = 0;
u32 pins, mask;
/* Both SCL and SDA should be high. If not, there
* is something wrong.
*/
mask = QSFP_HFI0_I2CCLK | QSFP_HFI0_I2CDAT;
/*
* Force pins to desired innocuous state.
* This is the default power-on state with out=0 and dir=0,
* So tri-stated and should be floating high (barring HW problems)
*/
hfi1_gpio_mod(dd, target, 0, 0, mask);
/*
* Clock nine times to get all listeners into a sane state.
* If SDA does not go high at any point, we are wedged.
* One vendor recommends then issuing START followed by STOP.
* we cannot use our "normal" functions to do that, because
* if SCL drops between them, another vendor's part will
* wedge, dropping SDA and keeping it low forever, at the end of
* the next transaction (even if it was not the device addressed).
* So our START and STOP take place with SCL held high.
*/
while (clock_cycles_left--) {
scl_out(dd, target, 0);
scl_out(dd, target, 1);
/* Note if SDA is high, but keep clocking to sync slave */
was_high |= sda_in(dd, target, 0);
}
if (was_high) {
/*
* We saw a high, which we hope means the slave is sync'd.
* Issue START, STOP, pause for T_BUF.
*/
pins = hfi1_gpio_mod(dd, target, 0, 0, 0);
if ((pins & mask) != mask)
dd_dev_err(dd, "GPIO pins not at rest: %d\n",
pins & mask);
/* Drop SDA to issue START */
udelay(1); /* Guarantee .6 uSec setup */
sda_out(dd, target, 0);
udelay(1); /* Guarantee .6 uSec hold */
/* At this point, SCL is high, SDA low. Raise SDA for STOP */
sda_out(dd, target, 1);
udelay(TWSI_BUF_WAIT_USEC);
}
return !was_high;
}
#define HFI1_TWSI_START 0x100
#define HFI1_TWSI_STOP 0x200
/* Write byte to TWSI, optionally prefixed with START or suffixed with
* STOP.
* returns 0 if OK (ACK received), else != 0
*/
static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags)
{
int ret = 1;
if (flags & HFI1_TWSI_START)
start_seq(dd, target);
/* Leaves SCL low (from i2c_ackrcv()) */
ret = wr_byte(dd, target, data);
if (flags & HFI1_TWSI_STOP)
stop_cmd(dd, target);
return ret;
}
/* Added functionality for IBA7220-based cards */
#define HFI1_TEMP_DEV 0x98
/*
* hfi1_twsi_blk_rd
* General interface for data transfer from twsi devices.
* One vestige of its former role is that it recognizes a device
* HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
* which responded to all TWSI device codes, interpreting them as
* address within device. On all other devices found on board handled by
* this driver, the device is followed by a one-byte "address" which selects
* the "register" or "offset" within the device from which data should
* be read.
*/
int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
void *buffer, int len)
{
int ret;
u8 *bp = buffer;
ret = 1;
if (dev == HFI1_TWSI_NO_DEV) {
/* legacy not-really-I2C */
addr = (addr << 1) | READ_CMD;
ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
} else {
/* Actual I2C */
ret = twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START);
if (ret) {
stop_cmd(dd, target);
ret = 1;
goto bail;
}
/*
* SFF spec claims we do _not_ stop after the addr
* but simply issue a start with the "read" dev-addr.
* Since we are implicitly waiting for ACK here,
* we need t_buf (nominally 20uSec) before that start,
* and cannot rely on the delay built in to the STOP
*/
ret = twsi_wr(dd, target, addr, 0);
udelay(TWSI_BUF_WAIT_USEC);
if (ret) {
dd_dev_err(dd,
"Failed to write interface read addr %02X\n",
addr);
ret = 1;
goto bail;
}
ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
}
if (ret) {
stop_cmd(dd, target);
ret = 1;
goto bail;
}
/*
* block devices keeps clocking data out as long as we ack,
* automatically incrementing the address. Some have "pages"
* whose boundaries will not be crossed, but the handling
* of these is left to the caller, who is in a better
* position to know.
*/
while (len-- > 0) {
/*
* Get and store data, sending ACK if length remaining,
* else STOP
*/
*bp++ = rd_byte(dd, target, !len);
}
ret = 0;
bail:
return ret;
}
/*
* hfi1_twsi_blk_wr
* General interface for data transfer to twsi devices.
* One vestige of its former role is that it recognizes a device
* HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
* which responded to all TWSI device codes, interpreting them as
* address within device. On all other devices found on board handled by
* this driver, the device is followed by a one-byte "address" which selects
* the "register" or "offset" within the device to which data should
* be written.
*/
int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
const void *buffer, int len)
{
int sub_len;
const u8 *bp = buffer;
int max_wait_time, i;
int ret = 1;
while (len > 0) {
if (dev == HFI1_TWSI_NO_DEV) {
if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
HFI1_TWSI_START)) {
goto failed_write;
}
} else {
/* Real I2C */
if (twsi_wr(dd, target,
dev | WRITE_CMD, HFI1_TWSI_START))
goto failed_write;
ret = twsi_wr(dd, target, addr, 0);
if (ret) {
dd_dev_err(dd,
"Failed to write interface write addr %02X\n",
addr);
goto failed_write;
}
}
sub_len = min(len, 4);
addr += sub_len;
len -= sub_len;
for (i = 0; i < sub_len; i++)
if (twsi_wr(dd, target, *bp++, 0))
goto failed_write;
stop_cmd(dd, target);
/*
* Wait for write complete by waiting for a successful
* read (the chip replies with a zero after the write
* cmd completes, and before it writes to the eeprom.
* The startcmd for the read will fail the ack until
* the writes have completed. We do this inline to avoid
* the debug prints that are in the real read routine
* if the startcmd fails.
* We also use the proper device address, so it doesn't matter
* whether we have real eeprom_dev. Legacy likes any address.
*/
max_wait_time = 100;
while (twsi_wr(dd, target,
dev | READ_CMD, HFI1_TWSI_START)) {
stop_cmd(dd, target);
if (!--max_wait_time)
goto failed_write;
}
/* now read (and ignore) the resulting byte */
rd_byte(dd, target, 1);
}
ret = 0;
goto bail;
failed_write:
stop_cmd(dd, target);
ret = 1;
bail:
return ret;
}

View File

@ -0,0 +1,68 @@
#ifndef _TWSI_H
#define _TWSI_H
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#define HFI1_TWSI_NO_DEV 0xFF
struct hfi1_devdata;
/* Bit position of SDA pin in ASIC_QSFP* registers */
#define GPIO_SDA_NUM 1
/* these functions must be called with qsfp_lock held */
int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
void *buffer, int len);
int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
const void *buffer, int len);
#endif /* _TWSI_H */

View File

@ -0,0 +1,585 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "hfi.h"
#include "sdma.h"
#include "qp.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
*
* Return 1 if constructed; otherwise, return 0.
*/
int hfi1_make_uc_req(struct hfi1_qp *qp)
{
struct hfi1_other_headers *ohdr;
struct hfi1_swqe *wqe;
unsigned long flags;
u32 hwords = 5;
u32 bth0 = 0;
u32 len;
u32 pmtu = qp->pmtu;
int ret = 0;
int middle = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_iowait.sdma_busy)) {
qp->s_flags |= HFI1_S_WAIT_DMA;
goto bail;
}
clear_ahg(qp);
wqe = get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
ohdr = &qp->s_hdr->ibh.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &qp->s_hdr->ibh.u.l.oth;
/* Get the next send request. */
wqe = get_swqe_ptr(qp, qp->s_cur);
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
if (!(ib_hfi1_state_ops[qp->state] &
HFI1_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head) {
clear_ahg(qp);
goto bail;
}
/*
* Start a new request.
*/
wqe->psn = qp->s_next_psn;
qp->s_psn = qp->s_next_psn;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
len = wqe->length;
qp->s_len = len;
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_SEND)
qp->s_state = OP(SEND_ONLY);
else {
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_ONLY);
else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
default:
goto bail;
}
break;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
if (wqe->wr.opcode == IB_WR_SEND)
qp->s_state = OP(SEND_LAST);
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_LAST);
else {
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
}
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_next_psn++), middle);
done:
ret = 1;
goto unlock;
bail:
qp->s_flags &= ~HFI1_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
/**
* hfi1_uc_rcv - handle an incoming UC packet
* @ibp: the port the packet came in on
* @hdr: the header of the packet
* @rcv_flags: flags relevant to rcv processing
* @data: the packet data
* @tlen: the length of the packet
* @qp: the QP for this packet.
*
* This is called from qp_rcv() to process an incoming UC packet
* for the given QP.
* Called at interrupt level.
*/
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct hfi1_qp *qp = packet->qp;
struct hfi1_other_headers *ohdr = packet->ohdr;
u32 opcode;
u32 hdrsize = packet->hlen;
u32 psn;
u32 pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
u32 bth1;
struct ib_grh *grh = NULL;
opcode = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
return;
bth1 = be32_to_cpu(ohdr->bth[1]);
if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
if (bth1 & HFI1_BECN_SMASK) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 rqpn, lqpn;
u16 rlid = be16_to_cpu(hdr->lrh[3]);
u8 sl, sc5;
lqpn = bth1 & HFI1_QPN_MASK;
rqpn = qp->remote_qpn;
sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
sl = ibp->sc_to_sl[sc5];
process_becn(ppd, sl, rlid, lqpn, rqpn,
IB_CC_SVCTYPE_UC);
}
if (bth1 & HFI1_FECN_SMASK) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
u16 slid = be16_to_cpu(hdr->lrh[3]);
u16 dlid = be16_to_cpu(hdr->lrh[1]);
u32 src_qp = qp->remote_qpn;
u8 sc5;
sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
}
}
psn = be32_to_cpu(ohdr->bth[2]);
opcode >>= 24;
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
/*
* Handle a sequence error.
* Silently drop any current message.
*/
qp->r_psn = psn;
inv:
if (qp->r_state == OP(SEND_FIRST) ||
qp->r_state == OP(SEND_MIDDLE)) {
set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
} else
hfi1_put_ss(&qp->r_sge);
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
goto send_first;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
goto rdma_first;
default:
goto drop;
}
}
/* Check for opcode sequence errors. */
switch (qp->r_state) {
case OP(SEND_FIRST):
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
goto inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
break;
goto inv;
default:
if (opcode == OP(SEND_FIRST) ||
opcode == OP(SEND_ONLY) ||
opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_FIRST) ||
opcode == OP(RDMA_WRITE_ONLY) ||
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
break;
goto inv;
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
qp_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
qp->r_sge = qp->s_rdma_read_sge;
else {
ret = hfi1_get_rwqe(qp, 0);
if (ret < 0)
goto op_err;
if (!ret)
goto drop;
/*
* qp->s_rdma_read_sge will be the owner
* of the mr references.
*/
qp->s_rdma_read_sge = qp->r_sge;
}
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
goto rewind;
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
hfi1_copy_sge(&qp->r_sge, data, pmtu, 0);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
case OP(SEND_LAST):
no_immediate_data:
wc.ex.imm_data = 0;
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
goto rewind;
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
hfi1_copy_sge(&qp->r_sge, data, tlen, 0);
hfi1_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = qp->remote_ah_attr.dlid;
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
* 11.4.2 of IBTA Vol. 1).
*
* However, the way the SL is chosen below is consistent
* with the way that IB/qib works and is trying avoid
* introducing incompatibilities.
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
wc.sl = qp->remote_ah_attr.sl;
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
rdma_first:
if (unlikely(!(qp->qp_access_flags &
IB_ACCESS_REMOTE_WRITE))) {
goto drop;
}
reth = &ohdr->u.rc.reth;
qp->r_len = be32_to_cpu(reth->length);
qp->r_rcv_len = 0;
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey */
ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto drop;
qp->r_sge.num_sge = 1;
} else {
qp->r_sge.num_sge = 0;
qp->r_sge.sge.mr = NULL;
qp->r_sge.sge.vaddr = NULL;
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
if (opcode == OP(RDMA_WRITE_ONLY))
goto rdma_last;
else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
goto drop;
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
wc.ex.imm_data = ohdr->u.imm_data;
rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
goto drop;
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
hfi1_put_ss(&qp->s_rdma_read_sge);
else {
ret = hfi1_get_rwqe(qp, 1);
if (ret < 0)
goto op_err;
if (!ret)
goto drop;
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
hfi1_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
goto drop;
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
hfi1_put_ss(&qp->r_sge);
break;
default:
/* Drop packet for unknown opcodes. */
goto drop;
}
qp->r_psn++;
qp->r_state = opcode;
return;
rewind:
set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
drop:
ibp->n_pkt_drops++;
return;
op_err:
hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}

View File

@ -0,0 +1,885 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/net.h>
#include <rdma/ib_smi.h>
#include "hfi.h"
#include "mad.h"
#include "qp.h"
/**
* ud_loopback - handle send on loopback QPs
* @sqp: the sending QP
* @swqe: the send work request
*
* This is called from hfi1_make_ud_req() to forward a WQE addressed
* to the same HFI.
* Note that the receive interrupt handler may be calling hfi1_ud_rcv()
* while this is being called.
*/
static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
struct hfi1_qp *qp;
struct ib_ah_attr *ah_attr;
unsigned long flags;
struct hfi1_sge_state ssge;
struct hfi1_sge *sge;
struct ib_wc wc;
u32 length;
enum ib_qp_type sqptype, dqptype;
rcu_read_lock();
qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
rcu_read_unlock();
return;
}
sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
IB_QPT_UD : sqp->ibqp.qp_type;
dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
IB_QPT_UD : qp->ibqp.qp_type;
if (dqptype != sqptype ||
!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
ibp->n_pkt_drops++;
goto drop;
}
ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
u16 pkey;
u16 slid;
u8 sc5 = ibp->sl_to_sc[ah_attr->sl];
pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
slid = ppd->lid | (ah_attr->src_path_bits &
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
hfi1_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey,
ah_attr->sl,
sqp->ibqp.qp_num, qp->ibqp.qp_num,
cpu_to_be16(slid),
cpu_to_be16(ah_attr->dlid));
goto drop;
}
}
/*
* Check that the qkey matches (except for QP0, see 9.6.1.4.1).
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
if (qp->ibqp.qp_num) {
u32 qkey;
qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
sqp->qkey : swqe->wr.wr.ud.remote_qkey;
if (unlikely(qkey != qp->qkey)) {
u16 lid;
lid = ppd->lid | (ah_attr->src_path_bits &
((1 << ppd->lmc) - 1));
hfi1_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
ah_attr->sl,
sqp->ibqp.qp_num, qp->ibqp.qp_num,
cpu_to_be16(lid),
cpu_to_be16(ah_attr->dlid));
goto drop;
}
}
/*
* A GRH is expected to precede the data even if not
* present on the wire.
*/
length = swqe->length;
memset(&wc, 0, sizeof(wc));
wc.byte_len = length + sizeof(struct ib_grh);
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = swqe->wr.ex.imm_data;
}
spin_lock_irqsave(&qp->r_lock, flags);
/*
* Get the next work request entry to find where to put the data.
*/
if (qp->r_flags & HFI1_R_REUSE_SGE)
qp->r_flags &= ~HFI1_R_REUSE_SGE;
else {
int ret;
ret = hfi1_get_rwqe(qp, 0);
if (ret < 0) {
hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
if (qp->ibqp.qp_num == 0)
ibp->n_vl15_dropped++;
goto bail_unlock;
}
}
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_flags |= HFI1_R_REUSE_SGE;
ibp->n_pkt_drops++;
goto bail_unlock;
}
if (ah_attr->ah_flags & IB_AH_GRH) {
hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
sizeof(struct ib_grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
ssge.num_sge = swqe->wr.num_sge;
sge = &ssge.sge;
while (length) {
u32 len = sge->length;
if (len > length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--ssge.num_sge)
*sge = *ssge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (++sge->n >= HFI1_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
length -= len;
}
hfi1_put_ss(&qp->r_sge);
if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
if (sqp->ibqp.qp_type == IB_QPT_GSI ||
sqp->ibqp.qp_type == IB_QPT_SMI)
wc.pkey_index = swqe->wr.wr.ud.pkey_index;
else
wc.pkey_index = sqp->s_pkey_index;
} else {
wc.pkey_index = 0;
}
wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
wc.slid = HFI1_PERMISSIVE_LID;
wc.sl = ah_attr->sl;
wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
swqe->wr.send_flags & IB_SEND_SOLICITED);
ibp->n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
drop:
rcu_read_unlock();
}
/**
* hfi1_make_ud_req - construct a UD request packet
* @qp: the QP
*
* Return 1 if constructed; otherwise, return 0.
*/
int hfi1_make_ud_req(struct hfi1_qp *qp)
{
struct hfi1_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
struct hfi1_swqe *wqe;
unsigned long flags;
u32 nwords;
u32 extra_bytes;
u32 bth0;
u16 lrh0;
u16 lid;
int ret = 0;
int next_cur;
u8 sc5;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_iowait.sdma_busy)) {
qp->s_flags |= HFI1_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
if (qp->s_cur == qp->s_head)
goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur);
next_cur = qp->s_cur + 1;
if (next_cur >= qp->s_size)
next_cur = 0;
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
ah_attr->dlid == HFI1_PERMISSIVE_LID) {
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(!loopback && (lid == ppd->lid ||
(lid == HFI1_PERMISSIVE_LID &&
qp->ibqp.qp_type == IB_QPT_GSI)))) {
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
* it would be out of order.
* Instead of waiting, we could queue a
* zero length descriptor so we get a callback.
*/
if (atomic_read(&qp->s_iowait.sdma_busy)) {
qp->s_flags |= HFI1_S_WAIT_DMA;
goto bail;
}
qp->s_cur = next_cur;
spin_unlock_irqrestore(&qp->s_lock, flags);
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
qp->s_cur = next_cur;
extra_bytes = -wqe->length & 3;
nwords = (wqe->length + extra_bytes) >> 2;
/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
qp->s_hdrwords = 7;
qp->s_cur_size = wqe->length;
qp->s_cur_sge = &qp->s_sge;
qp->s_srate = ah_attr->static_rate;
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
&ah_attr->grh,
qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
ohdr = &qp->s_hdr->ibh.u.l.oth;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
*/
} else {
/* Header size in 32-bit words. */
lrh0 = HFI1_LRH_BTH;
ohdr = &qp->s_hdr->ibh.u.oth;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
sc5 = ibp->sl_to_sc[ah_attr->sl];
lrh0 |= (ah_attr->sl & 0xf) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI) {
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
qp->s_sc = 0xf;
} else {
lrh0 |= (sc5 & 0xf) << 12;
qp->s_sc = sc5;
}
qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
qp->s_hdr->ibh.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE))
qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
else {
lid = ppd->lid;
if (lid) {
lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
qp->s_hdr->ibh.lrh[3] = cpu_to_be16(lid);
} else
qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index);
else
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
qp->qkey : wqe->wr.wr.ud.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
qp->s_hdr->ahgcount = 0;
qp->s_hdr->ahgidx = 0;
qp->s_hdr->tx_flags = 0;
qp->s_hdr->sde = NULL;
done:
ret = 1;
goto unlock;
bail:
qp->s_flags &= ~HFI1_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
/*
* Hardware can't check this so we do it here.
*
* This is a slightly different algorithm than the standard pkey check. It
* special cases the management keys and allows for 0x7fff and 0xffff to be in
* the table at the same time.
*
* @returns the index found or -1 if not found
*/
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
unsigned i;
if (pkey == FULL_MGMT_P_KEY || pkey == LIM_MGMT_P_KEY) {
unsigned lim_idx = -1;
for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i) {
/* here we look for an exact match */
if (ppd->pkeys[i] == pkey)
return i;
if (ppd->pkeys[i] == LIM_MGMT_P_KEY)
lim_idx = i;
}
/* did not find 0xffff return 0x7fff idx if found */
if (pkey == FULL_MGMT_P_KEY)
return lim_idx;
/* no match... */
return -1;
}
pkey &= 0x7fff; /* remove limited/full membership bit */
for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
if ((ppd->pkeys[i] & 0x7fff) == pkey)
return i;
/*
* Should not get here, this means hardware failed to validate pkeys.
*/
return -1;
}
void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
u32 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh)
{
u64 pbc, pbc_flags = 0;
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
struct hfi1_ib_header hdr;
struct hfi1_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
if (old_grh) {
struct ib_grh *grh = &hdr.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16((hwords - 2 + SIZE_OF_CRC) << 2);
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
ohdr = &hdr.u.l.oth;
lrh0 = HFI1_LRH_GRH;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
ohdr = &hdr.u.oth;
lrh0 = HFI1_LRH_BTH;
}
lrh0 |= (sc5 & 0xf) << 12 | sl << 4;
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
hdr.lrh[0] = cpu_to_be16(lrh0);
hdr.lrh[1] = cpu_to_be16(dlid);
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
hdr.lrh[3] = cpu_to_be16(slid);
plen = 2 /* PBC */ + hwords;
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
if (pbuf)
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
}
}
/*
* opa_smp_check() - Do the regular pkey checking, and the additional
* checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
* ("SMA Packet Checks").
*
* Note that:
* - Checks are done using the pkey directly from the packet's BTH,
* and specifically _not_ the pkey that we attach to the completion,
* which may be different.
* - These checks are specifically for "non-local" SMPs (i.e., SMPs
* which originated on another node). SMPs which are sent from, and
* destined to this node are checked in opa_local_smp_check().
*
* At the point where opa_smp_check() is called, we know:
* - destination QP is QP0
*
* opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
*/
static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
struct hfi1_qp *qp, u16 slid, struct opa_smp *smp)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
/*
* I don't think it's possible for us to get here with sc != 0xf,
* but check it to be certain.
*/
if (sc5 != 0xf)
return 1;
if (rcv_pkey_check(ppd, pkey, sc5, slid))
return 1;
/*
* At this point we know (and so don't need to check again) that
* the pkey is either LIM_MGMT_P_KEY, or FULL_MGMT_P_KEY
* (see ingress_pkey_check).
*/
if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE &&
smp->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) {
ingress_pkey_table_fail(ppd, pkey, slid);
return 1;
}
/*
* SMPs fall into one of four (disjoint) categories:
* SMA request, SMA response, trap, or trap repress.
* Our response depends, in part, on which type of
* SMP we're processing.
*
* If this is not an SMA request, or trap repress:
* - accept MAD if the port is running an SM
* - pkey == FULL_MGMT_P_KEY =>
* reply with unsupported method (i.e., just mark
* the smp's status field here, and let it be
* processed normally)
* - pkey != LIM_MGMT_P_KEY =>
* increment port recv constraint errors, drop MAD
* If this is an SMA request or trap repress:
* - pkey != FULL_MGMT_P_KEY =>
* increment port recv constraint errors, drop MAD
*/
switch (smp->method) {
case IB_MGMT_METHOD_GET:
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_REPORT:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (pkey != FULL_MGMT_P_KEY) {
ingress_pkey_table_fail(ppd, pkey, slid);
return 1;
}
break;
case IB_MGMT_METHOD_SEND:
case IB_MGMT_METHOD_TRAP:
case IB_MGMT_METHOD_GET_RESP:
case IB_MGMT_METHOD_REPORT_RESP:
if (ibp->port_cap_flags & IB_PORT_SM)
return 0;
if (pkey == FULL_MGMT_P_KEY) {
smp->status |= IB_SMP_UNSUP_METHOD;
return 0;
}
if (pkey != LIM_MGMT_P_KEY) {
ingress_pkey_table_fail(ppd, pkey, slid);
return 1;
}
break;
default:
break;
}
return 0;
}
/**
* hfi1_ud_rcv - receive an incoming UD packet
* @ibp: the port the packet came in on
* @hdr: the packet header
* @rcv_flags: flags relevant to rcv processing
* @data: the packet data
* @tlen: the packet length
* @qp: the QP the packet came on
*
* This is called from qp_rcv() to process an incoming UD packet
* for the given QP.
* Called at interrupt level.
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
struct hfi1_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
u32 pad;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
u16 dlid, pkey;
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct hfi1_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
bool sc4_bit = has_sc4_bit(packet);
u8 sc;
u32 bth1;
int is_mcast;
struct ib_grh *grh = NULL;
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
dlid = be16_to_cpu(hdr->lrh[1]);
is_mcast = (dlid > HFI1_MULTICAST_LID_BASE) &&
(dlid != HFI1_PERMISSIVE_LID);
bth1 = be32_to_cpu(ohdr->bth[1]);
if (unlikely(bth1 & HFI1_BECN_SMASK)) {
/*
* In pre-B0 h/w the CNP_OPCODE is handled via an
* error path (errata 291394).
*/
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
u8 sl, sc5;
sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
sc5 |= sc4_bit;
sl = ibp->sc_to_sl[sc5];
process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
}
/*
* The opcode is in the low byte when its in network order
* (top byte when in host order).
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
opcode &= 0xff;
pkey = (u16)be32_to_cpu(ohdr->bth[0]);
if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
u16 slid = be16_to_cpu(hdr->lrh[3]);
u8 sc5;
sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
sc5 |= sc4_bit;
return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
}
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
*/
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
if (unlikely(tlen < (hdrsize + pad + 4)))
goto drop;
tlen -= hdrsize + pad + 4;
/*
* Check that the permissive LID is only used on QP0
* and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
*/
if (qp->ibqp.qp_num) {
if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE))
goto drop;
if (qp->ibqp.qp_num > 1) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u16 slid;
u8 sc5;
sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
sc5 |= sc4_bit;
slid = be16_to_cpu(hdr->lrh[3]);
if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
/*
* Traps will not be sent for packets dropped
* by the HW. This is fine, as sending trap
* for invalid pkeys is optional according to
* IB spec (release 1.3, section 10.9.4)
*/
hfi1_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
pkey,
(be16_to_cpu(hdr->lrh[0]) >> 4) &
0xF,
src_qp, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
return;
}
} else {
/* GSI packet */
mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
if (mgmt_pkey_idx < 0)
goto drop;
}
if (unlikely(qkey != qp->qkey)) {
hfi1_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
src_qp, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
return;
}
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen > 2048 ||
(be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
goto drop;
} else {
/* Received on QP0, and so by definition, this is an SMP */
struct opa_smp *smp = (struct opa_smp *)data;
u16 slid = be16_to_cpu(hdr->lrh[3]);
u8 sc5;
sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
sc5 |= sc4_bit;
if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
goto drop;
if (tlen > 2048)
goto drop;
if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE) &&
smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
goto drop;
/* look up SMI pkey */
mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
if (mgmt_pkey_idx < 0)
goto drop;
}
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
wc.ex.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
} else
goto drop;
/*
* A GRH is expected to precede the data even if not
* present on the wire.
*/
wc.byte_len = tlen + sizeof(struct ib_grh);
/*
* Get the next work request entry to find where to put the data.
*/
if (qp->r_flags & HFI1_R_REUSE_SGE)
qp->r_flags &= ~HFI1_R_REUSE_SGE;
else {
int ret;
ret = hfi1_get_rwqe(qp, 0);
if (ret < 0) {
hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
if (qp->ibqp.qp_num == 0)
ibp->n_vl15_dropped++;
return;
}
}
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_flags |= HFI1_R_REUSE_SGE;
goto drop;
}
if (has_grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
hfi1_put_ss(&qp->r_sge);
if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = src_qp;
if (qp->ibqp.qp_type == IB_QPT_GSI ||
qp->ibqp.qp_type == IB_QPT_SMI) {
if (mgmt_pkey_idx < 0) {
if (net_ratelimit()) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = ppd->dd;
dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n",
qp->ibqp.qp_type);
mgmt_pkey_idx = 0;
}
}
wc.pkey_index = (unsigned)mgmt_pkey_idx;
} else
wc.pkey_index = 0;
wc.slid = be16_to_cpu(hdr->lrh[3]);
sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
sc |= sc4_bit;
wc.sl = ibp->sc_to_sl[sc];
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
wc.dlid_path_bits = dlid >= HFI1_MULTICAST_LID_BASE ? 0 :
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
return;
drop:
ibp->n_pkt_drops++;
}

View File

@ -0,0 +1,156 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/mm.h>
#include <linux/device.h>
#include "hfi.h"
static void __hfi1_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
size_t i;
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(p[i]);
put_page(p[i]);
}
}
/*
* Call with current->mm->mmap_sem held.
*/
static int __hfi1_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
unsigned long lock_limit;
size_t got;
int ret;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
goto bail;
}
for (got = 0; got < num_pages; got += ret) {
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
p + got, NULL);
if (ret < 0)
goto bail_release;
}
current->mm->pinned_vm += num_pages;
ret = 0;
goto bail;
bail_release:
__hfi1_release_user_pages(p, got, 0);
bail:
return ret;
}
/**
* hfi1_map_page - a safety wrapper around pci_map_page()
*
*/
dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page,
unsigned long offset, size_t size, int direction)
{
dma_addr_t phys;
phys = pci_map_page(hwdev, page, offset, size, direction);
return phys;
}
/**
* hfi1_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
* @p: the output page structures
*
* This function takes a given start page (page aligned user virtual
* address) and pins it and the following specified number of pages. For
* now, num_pages is always 1, but that will probably change at some point
* (because caller is doing expected sends on a single virtually contiguous
* buffer, so we can do all pages at once).
*/
int hfi1_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
int ret;
down_write(&current->mm->mmap_sem);
ret = __hfi1_get_user_pages(start_page, num_pages, p);
up_write(&current->mm->mmap_sem);
return ret;
}
void hfi1_release_user_pages(struct page **p, size_t num_pages)
{
if (current->mm) /* during close after signal, mm can be NULL */
down_write(&current->mm->mmap_sem);
__hfi1_release_user_pages(p, num_pages, 1);
if (current->mm) {
current->mm->pinned_vm -= num_pages;
up_write(&current->mm->mmap_sem);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/device.h>
#include <linux/wait.h>
#include "common.h"
#include "iowait.h"
#define EXP_TID_TIDLEN_MASK 0x7FFULL
#define EXP_TID_TIDLEN_SHIFT 0
#define EXP_TID_TIDCTRL_MASK 0x3ULL
#define EXP_TID_TIDCTRL_SHIFT 20
#define EXP_TID_TIDIDX_MASK 0x7FFULL
#define EXP_TID_TIDIDX_SHIFT 22
#define EXP_TID_GET(tid, field) \
(((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
struct list_head list;
unsigned ctxt;
unsigned subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
u16 reqidx;
struct hfi1_devdata *dd;
struct kmem_cache *txreq_cache;
struct user_sdma_request *reqs;
struct iowait busy;
unsigned state;
};
struct hfi1_user_sdma_comp_q {
u16 nentries;
struct hfi1_sdma_comp_entry *comps;
};
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
unsigned long *);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,385 @@
/*
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/rculist.h>
#include "hfi.h"
/**
* mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
* @qp: the QP to link
*/
static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp)
{
struct hfi1_mcast_qp *mqp;
mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
if (!mqp)
goto bail;
mqp->qp = qp;
atomic_inc(&qp->refcount);
bail:
return mqp;
}
static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
{
struct hfi1_qp *qp = mqp->qp;
/* Notify hfi1_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
kfree(mqp);
}
/**
* mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
*
* A list of QPs will be attached to this structure.
*/
static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
{
struct hfi1_mcast *mcast;
mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
if (!mcast)
goto bail;
mcast->mgid = *mgid;
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
mcast->n_attached = 0;
bail:
return mcast;
}
static void mcast_free(struct hfi1_mcast *mcast)
{
struct hfi1_mcast_qp *p, *tmp;
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
mcast_qp_free(p);
kfree(mcast);
}
/**
* hfi1_mcast_find - search the global table for the given multicast GID
* @ibp: the IB port structure
* @mgid: the multicast GID to search for
*
* Returns NULL if not found.
*
* The caller is responsible for decrementing the reference count if found.
*/
struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid)
{
struct rb_node *n;
unsigned long flags;
struct hfi1_mcast *mcast;
spin_lock_irqsave(&ibp->lock, flags);
n = ibp->mcast_tree.rb_node;
while (n) {
int ret;
mcast = rb_entry(n, struct hfi1_mcast, rb_node);
ret = memcmp(mgid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else {
atomic_inc(&mcast->refcount);
spin_unlock_irqrestore(&ibp->lock, flags);
goto bail;
}
}
spin_unlock_irqrestore(&ibp->lock, flags);
mcast = NULL;
bail:
return mcast;
}
/**
* mcast_add - insert mcast GID into table and attach QP struct
* @mcast: the mcast GID table
* @mqp: the QP to attach
*
* Return zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
* attached and neither structure was added.
*/
static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp)
{
struct rb_node **n = &ibp->mcast_tree.rb_node;
struct rb_node *pn = NULL;
int ret;
spin_lock_irq(&ibp->lock);
while (*n) {
struct hfi1_mcast *tmcast;
struct hfi1_mcast_qp *p;
pn = *n;
tmcast = rb_entry(pn, struct hfi1_mcast, rb_node);
ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0) {
n = &pn->rb_left;
continue;
}
if (ret > 0) {
n = &pn->rb_right;
continue;
}
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
ret = ESRCH;
goto bail;
}
}
if (tmcast->n_attached == hfi1_max_mcast_qp_attached) {
ret = ENOMEM;
goto bail;
}
tmcast->n_attached++;
list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
ret = EEXIST;
goto bail;
}
spin_lock(&dev->n_mcast_grps_lock);
if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) {
spin_unlock(&dev->n_mcast_grps_lock);
ret = ENOMEM;
goto bail;
}
dev->n_mcast_grps_allocated++;
spin_unlock(&dev->n_mcast_grps_lock);
mcast->n_attached++;
list_add_tail_rcu(&mqp->list, &mcast->qp_list);
atomic_inc(&mcast->refcount);
rb_link_node(&mcast->rb_node, pn, n);
rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
ret = 0;
bail:
spin_unlock_irq(&ibp->lock);
return ret;
}
int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct hfi1_qp *qp = to_iqp(ibqp);
struct hfi1_ibdev *dev = to_idev(ibqp->device);
struct hfi1_ibport *ibp;
struct hfi1_mcast *mcast;
struct hfi1_mcast_qp *mqp;
int ret;
if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
ret = -EINVAL;
goto bail;
}
/*
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
mcast = mcast_alloc(gid);
if (mcast == NULL) {
ret = -ENOMEM;
goto bail;
}
mqp = mcast_qp_alloc(qp);
if (mqp == NULL) {
mcast_free(mcast);
ret = -ENOMEM;
goto bail;
}
ibp = to_iport(ibqp->device, qp->port_num);
switch (mcast_add(dev, ibp, mcast, mqp)) {
case ESRCH:
/* Neither was used: OK to attach the same QP twice. */
mcast_qp_free(mqp);
mcast_free(mcast);
break;
case EEXIST: /* The mcast wasn't used */
mcast_free(mcast);
break;
case ENOMEM:
/* Exceeded the maximum number of mcast groups. */
mcast_qp_free(mqp);
mcast_free(mcast);
ret = -ENOMEM;
goto bail;
default:
break;
}
ret = 0;
bail:
return ret;
}
int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct hfi1_qp *qp = to_iqp(ibqp);
struct hfi1_ibdev *dev = to_idev(ibqp->device);
struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num);
struct hfi1_mcast *mcast = NULL;
struct hfi1_mcast_qp *p, *tmp;
struct rb_node *n;
int last = 0;
int ret;
if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
ret = -EINVAL;
goto bail;
}
spin_lock_irq(&ibp->lock);
/* Find the GID in the mcast table. */
n = ibp->mcast_tree.rb_node;
while (1) {
if (n == NULL) {
spin_unlock_irq(&ibp->lock);
ret = -EINVAL;
goto bail;
}
mcast = rb_entry(n, struct hfi1_mcast, rb_node);
ret = memcmp(gid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else
break;
}
/* Search the QP list. */
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
if (p->qp != qp)
continue;
/*
* We found it, so remove it, but don't poison the forward
* link until we are sure there are no list walkers.
*/
list_del_rcu(&p->list);
mcast->n_attached--;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
rb_erase(&mcast->rb_node, &ibp->mcast_tree);
last = 1;
}
break;
}
spin_unlock_irq(&ibp->lock);
if (p) {
/*
* Wait for any list walkers to finish before freeing the
* list element.
*/
wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
mcast_qp_free(p);
}
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
mcast_free(mcast);
spin_lock_irq(&dev->n_mcast_grps_lock);
dev->n_mcast_grps_allocated--;
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
ret = 0;
bail:
return ret;
}
int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp)
{
return ibp->mcast_tree.rb_node == NULL;
}