mtd: nand: Add NAND controller driver for OcteonTX

Adds support for NAND controllers found on OcteonTX or
OcteonTX2 SoC platforms. Also includes driver to support
Hardware ECC using BCH HW engine found on these platforms.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Suneel Garapati <sgarapati@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
This commit is contained in:
Suneel Garapati 2020-08-26 14:37:22 +02:00 committed by Stefan Roese
parent 55fca74a5b
commit 05c7606ac9
6 changed files with 2998 additions and 0 deletions

View File

@ -291,6 +291,22 @@ config NAND_ZYNQ_USE_BOOTLOADER1_TIMINGS
This flag prevent U-boot reconfigure NAND flash controller and reuse
the NAND timing from 1st stage bootloader.
config NAND_OCTEONTX
bool "Support for OcteonTX NAND controller"
select SYS_NAND_SELF_INIT
imply CMD_NAND
help
This enables Nand flash controller hardware found on the OcteonTX
processors.
config NAND_OCTEONTX_HW_ECC
bool "Support Hardware ECC for OcteonTX NAND controller"
depends on NAND_OCTEONTX
default y
help
This enables Hardware BCH engine found on the OcteonTX processors to
support ECC for NAND flash controller.
config NAND_STM32_FMC2
bool "Support for NAND controller on STM32MP SoCs"
depends on ARCH_STM32MP

View File

@ -58,6 +58,8 @@ obj-$(CONFIG_NAND_VF610_NFC) += vf610_nfc.o
obj-$(CONFIG_NAND_MXC) += mxc_nand.o
obj-$(CONFIG_NAND_MXS) += mxs_nand.o
obj-$(CONFIG_NAND_MXS_DT) += mxs_nand_dt.o
obj-$(CONFIG_NAND_OCTEONTX) += octeontx_nand.o
obj-$(CONFIG_NAND_OCTEONTX_HW_ECC) += octeontx_bch.o
obj-$(CONFIG_NAND_PXA3XX) += pxa3xx_nand.o
obj-$(CONFIG_NAND_SPEAR) += spr_nand.o
obj-$(CONFIG_TEGRA_NAND) += tegra_nand.o

View File

@ -0,0 +1,425 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2018 Marvell International Ltd.
*/
#include <dm.h>
#include <dm/of_access.h>
#include <malloc.h>
#include <memalign.h>
#include <nand.h>
#include <pci.h>
#include <pci_ids.h>
#include <time.h>
#include <linux/bitfield.h>
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/ioport.h>
#include <linux/libfdt.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/nand_bch.h>
#include <linux/mtd/nand_ecc.h>
#include <asm/io.h>
#include <asm/types.h>
#include <asm/dma-mapping.h>
#include <asm/arch/clock.h>
#include "octeontx_bch.h"
#ifdef DEBUG
# undef CONFIG_LOGLEVEL
# define CONFIG_LOGLEVEL 8
#endif
LIST_HEAD(octeontx_bch_devices);
static unsigned int num_vfs = BCH_NR_VF;
static void *bch_pf;
static void *bch_vf;
static void *token;
static bool bch_pf_initialized;
static bool bch_vf_initialized;
static int pci_enable_sriov(struct udevice *dev, int nr_virtfn)
{
int ret;
ret = pci_sriov_init(dev, nr_virtfn);
if (ret)
printf("%s(%s): pci_sriov_init returned %d\n", __func__,
dev->name, ret);
return ret;
}
void *octeontx_bch_getv(void)
{
if (!bch_vf)
return NULL;
if (bch_vf_initialized && bch_pf_initialized)
return bch_vf;
else
return NULL;
}
void octeontx_bch_putv(void *token)
{
bch_vf_initialized = !!token;
bch_vf = token;
}
void *octeontx_bch_getp(void)
{
return token;
}
void octeontx_bch_putp(void *token)
{
bch_pf = token;
bch_pf_initialized = !!token;
}
static int do_bch_init(struct bch_device *bch)
{
return 0;
}
static void bch_reset(struct bch_device *bch)
{
writeq(1, bch->reg_base + BCH_CTL);
mdelay(2);
}
static void bch_disable(struct bch_device *bch)
{
writeq(~0ull, bch->reg_base + BCH_ERR_INT_ENA_W1C);
writeq(~0ull, bch->reg_base + BCH_ERR_INT);
bch_reset(bch);
}
static u32 bch_check_bist_status(struct bch_device *bch)
{
return readq(bch->reg_base + BCH_BIST_RESULT);
}
static int bch_device_init(struct bch_device *bch)
{
u64 bist;
int rc;
debug("%s: Resetting...\n", __func__);
/* Reset the PF when probed first */
bch_reset(bch);
debug("%s: Checking BIST...\n", __func__);
/* Check BIST status */
bist = (u64)bch_check_bist_status(bch);
if (bist) {
dev_err(dev, "BCH BIST failed with code 0x%llx\n", bist);
return -ENODEV;
}
/* Get max VQs/VFs supported by the device */
bch->max_vfs = pci_sriov_get_totalvfs(bch->dev);
debug("%s: %d vfs\n", __func__, bch->max_vfs);
if (num_vfs > bch->max_vfs) {
dev_warn(dev, "Num of VFs to enable %d is greater than max available. Enabling %d VFs.\n",
num_vfs, bch->max_vfs);
num_vfs = bch->max_vfs;
}
bch->vfs_enabled = bch->max_vfs;
/* Get number of VQs/VFs to be enabled */
/* TODO: Get CLK frequency */
/* Reset device parameters */
debug("%s: Doing initialization\n", __func__);
rc = do_bch_init(bch);
return rc;
}
static int bch_sriov_configure(struct udevice *dev, int numvfs)
{
struct bch_device *bch = dev_get_priv(dev);
int ret = -EBUSY;
debug("%s(%s, %d), bch: %p, vfs_in_use: %d, enabled: %d\n", __func__,
dev->name, numvfs, bch, bch->vfs_in_use, bch->vfs_enabled);
if (bch->vfs_in_use)
goto exit;
ret = 0;
if (numvfs > 0) {
debug("%s: Enabling sriov\n", __func__);
ret = pci_enable_sriov(dev, numvfs);
if (ret == 0) {
bch->flags |= BCH_FLAG_SRIOV_ENABLED;
ret = numvfs;
bch->vfs_enabled = numvfs;
}
}
debug("VFs enabled: %d\n", ret);
exit:
debug("%s: Returning %d\n", __func__, ret);
return ret;
}
static int octeontx_pci_bchpf_probe(struct udevice *dev)
{
struct bch_device *bch;
int ret;
debug("%s(%s)\n", __func__, dev->name);
bch = dev_get_priv(dev);
if (!bch)
return -ENOMEM;
bch->reg_base = dm_pci_map_bar(dev, PCI_BASE_ADDRESS_0, PCI_REGION_MEM);
bch->dev = dev;
debug("%s: base address: %p\n", __func__, bch->reg_base);
ret = bch_device_init(bch);
if (ret) {
printf("%s(%s): init returned %d\n", __func__, dev->name, ret);
return ret;
}
INIT_LIST_HEAD(&bch->list);
list_add(&bch->list, &octeontx_bch_devices);
token = (void *)dev;
debug("%s: Configuring SRIOV\n", __func__);
bch_sriov_configure(dev, num_vfs);
debug("%s: Done.\n", __func__);
octeontx_bch_putp(bch);
return 0;
}
static const struct pci_device_id octeontx_bchpf_pci_id_table[] = {
{ PCI_VDEVICE(CAVIUM, PCI_DEVICE_ID_CAVIUM_BCH) },
{},
};
static const struct pci_device_id octeontx_bchvf_pci_id_table[] = {
{ PCI_VDEVICE(CAVIUM, PCI_DEVICE_ID_CAVIUM_BCHVF)},
{},
};
/**
* Given a data block calculate the ecc data and fill in the response
*
* @param[in] block 8-byte aligned pointer to data block to calculate ECC
* @param block_size Size of block in bytes, must be a multiple of two.
* @param bch_level Number of errors that must be corrected. The number of
* parity bytes is equal to ((15 * bch_level) + 7) / 8.
* Must be 4, 8, 16, 24, 32, 40, 48, 56, 60 or 64.
* @param[out] ecc 8-byte aligned pointer to where ecc data should go
* @param[in] resp pointer to where responses will be written.
*
* @return Zero on success, negative on failure.
*/
int octeontx_bch_encode(struct bch_vf *vf, dma_addr_t block, u16 block_size,
u8 bch_level, dma_addr_t ecc, dma_addr_t resp)
{
union bch_cmd cmd;
int rc;
memset(&cmd, 0, sizeof(cmd));
cmd.s.cword.ecc_gen = eg_gen;
cmd.s.cword.ecc_level = bch_level;
cmd.s.cword.size = block_size;
cmd.s.oword.ptr = ecc;
cmd.s.iword.ptr = block;
cmd.s.rword.ptr = resp;
rc = octeontx_cmd_queue_write(QID_BCH, 1,
sizeof(cmd) / sizeof(uint64_t), cmd.u);
if (rc)
return -1;
octeontx_bch_write_doorbell(1, vf);
return 0;
}
/**
* Given a data block and ecc data correct the data block
*
* @param[in] block_ecc_in 8-byte aligned pointer to data block with ECC
* data concatenated to the end to correct
* @param block_size Size of block in bytes, must be a multiple of
* two.
* @param bch_level Number of errors that must be corrected. The
* number of parity bytes is equal to
* ((15 * bch_level) + 7) / 8.
* Must be 4, 8, 16, 24, 32, 40, 48, 56, 60 or 64.
* @param[out] block_out 8-byte aligned pointer to corrected data buffer.
* This should not be the same as block_ecc_in.
* @param[in] resp pointer to where responses will be written.
*
* @return Zero on success, negative on failure.
*/
int octeontx_bch_decode(struct bch_vf *vf, dma_addr_t block_ecc_in,
u16 block_size, u8 bch_level,
dma_addr_t block_out, dma_addr_t resp)
{
union bch_cmd cmd;
int rc;
memset(&cmd, 0, sizeof(cmd));
cmd.s.cword.ecc_gen = eg_correct;
cmd.s.cword.ecc_level = bch_level;
cmd.s.cword.size = block_size;
cmd.s.oword.ptr = block_out;
cmd.s.iword.ptr = block_ecc_in;
cmd.s.rword.ptr = resp;
rc = octeontx_cmd_queue_write(QID_BCH, 1,
sizeof(cmd) / sizeof(uint64_t), cmd.u);
if (rc)
return -1;
octeontx_bch_write_doorbell(1, vf);
return 0;
}
EXPORT_SYMBOL(octeontx_bch_decode);
int octeontx_bch_wait(struct bch_vf *vf, union bch_resp *resp,
dma_addr_t handle)
{
ulong start = get_timer(0);
__iormb(); /* HW is updating *resp */
while (!resp->s.done && get_timer(start) < 10)
__iormb(); /* HW is updating *resp */
if (resp->s.done)
return 0;
return -ETIMEDOUT;
}
struct bch_q octeontx_bch_q[QID_MAX];
static int octeontx_cmd_queue_initialize(struct udevice *dev, int queue_id,
int max_depth, int fpa_pool,
int pool_size)
{
/* some params are for later merge with CPT or cn83xx */
struct bch_q *q = &octeontx_bch_q[queue_id];
unsigned long paddr;
u64 *chunk_buffer;
int chunk = max_depth + 1;
int i, size;
if ((unsigned int)queue_id >= QID_MAX)
return -EINVAL;
if (max_depth & chunk) /* must be 2^N - 1 */
return -EINVAL;
size = NQS * chunk * sizeof(u64);
chunk_buffer = dma_alloc_coherent(size, &paddr);
if (!chunk_buffer)
return -ENOMEM;
q->base_paddr = paddr;
q->dev = dev;
q->index = 0;
q->max_depth = max_depth;
q->pool_size_m1 = pool_size;
q->base_vaddr = chunk_buffer;
for (i = 0; i < NQS; i++) {
u64 *ixp;
int inext = (i + 1) * chunk - 1;
int j = (i + 1) % NQS;
int jnext = j * chunk;
dma_addr_t jbase = q->base_paddr + jnext * sizeof(u64);
ixp = &chunk_buffer[inext];
*ixp = jbase;
}
return 0;
}
static int octeontx_pci_bchvf_probe(struct udevice *dev)
{
struct bch_vf *vf;
union bch_vqx_ctl ctl;
union bch_vqx_cmd_buf cbuf;
int err;
debug("%s(%s)\n", __func__, dev->name);
vf = dev_get_priv(dev);
if (!vf)
return -ENOMEM;
vf->dev = dev;
/* Map PF's configuration registers */
vf->reg_base = dm_pci_map_bar(dev, PCI_BASE_ADDRESS_0, PCI_REGION_MEM);
debug("%s: reg base: %p\n", __func__, vf->reg_base);
err = octeontx_cmd_queue_initialize(dev, QID_BCH, QDEPTH - 1, 0,
sizeof(union bch_cmd) * QDEPTH);
if (err) {
dev_err(dev, "octeontx_cmd_queue_initialize() failed\n");
goto release;
}
ctl.u = readq(vf->reg_base + BCH_VQX_CTL(0));
cbuf.u = 0;
cbuf.s.ldwb = 1;
cbuf.s.dfb = 1;
cbuf.s.size = QDEPTH;
writeq(cbuf.u, vf->reg_base + BCH_VQX_CMD_BUF(0));
writeq(ctl.u, vf->reg_base + BCH_VQX_CTL(0));
writeq(octeontx_bch_q[QID_BCH].base_paddr,
vf->reg_base + BCH_VQX_CMD_PTR(0));
octeontx_bch_putv(vf);
debug("%s: bch vf initialization complete\n", __func__);
if (octeontx_bch_getv())
return octeontx_pci_nand_deferred_probe();
return -1;
release:
return err;
}
static int octeontx_pci_bchpf_remove(struct udevice *dev)
{
struct bch_device *bch = dev_get_priv(dev);
bch_disable(bch);
return 0;
}
U_BOOT_DRIVER(octeontx_pci_bchpf) = {
.name = BCHPF_DRIVER_NAME,
.id = UCLASS_MISC,
.probe = octeontx_pci_bchpf_probe,
.remove = octeontx_pci_bchpf_remove,
.priv_auto_alloc_size = sizeof(struct bch_device),
.flags = DM_FLAG_OS_PREPARE,
};
U_BOOT_DRIVER(octeontx_pci_bchvf) = {
.name = BCHVF_DRIVER_NAME,
.id = UCLASS_MISC,
.probe = octeontx_pci_bchvf_probe,
.priv_auto_alloc_size = sizeof(struct bch_vf),
};
U_BOOT_PCI_DEVICE(octeontx_pci_bchpf, octeontx_bchpf_pci_id_table);
U_BOOT_PCI_DEVICE(octeontx_pci_bchvf, octeontx_bchvf_pci_id_table);

View File

@ -0,0 +1,131 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright (C) 2018 Marvell International Ltd.
*/
#ifndef __OCTEONTX_BCH_H__
#define __OCTEONTX_BCH_H__
#include "octeontx_bch_regs.h"
/* flags to indicate the features supported */
#define BCH_FLAG_SRIOV_ENABLED BIT(1)
/*
* BCH Registers map for 81xx
*/
/* PF registers */
#define BCH_CTL 0x0ull
#define BCH_ERR_CFG 0x10ull
#define BCH_BIST_RESULT 0x80ull
#define BCH_ERR_INT 0x88ull
#define BCH_ERR_INT_W1S 0x90ull
#define BCH_ERR_INT_ENA_W1C 0xA0ull
#define BCH_ERR_INT_ENA_W1S 0xA8ull
/* VF registers */
#define BCH_VQX_CTL(z) 0x0ull
#define BCH_VQX_CMD_BUF(z) 0x8ull
#define BCH_VQX_CMD_PTR(z) 0x20ull
#define BCH_VQX_DOORBELL(z) 0x800ull
#define BCHPF_DRIVER_NAME "octeontx-bchpf"
#define BCHVF_DRIVER_NAME "octeontx-bchvf"
struct bch_device {
struct list_head list;
u8 max_vfs;
u8 vfs_enabled;
u8 vfs_in_use;
u32 flags;
void __iomem *reg_base;
struct udevice *dev;
};
struct bch_vf {
u16 flags;
u8 vfid;
u8 node;
u8 priority;
struct udevice *dev;
void __iomem *reg_base;
};
struct buf_ptr {
u8 *vptr;
dma_addr_t dma_addr;
u16 size;
};
void *octeontx_bch_getv(void);
void octeontx_bch_putv(void *token);
void *octeontx_bch_getp(void);
void octeontx_bch_putp(void *token);
int octeontx_bch_wait(struct bch_vf *vf, union bch_resp *resp,
dma_addr_t handle);
/**
* Given a data block calculate the ecc data and fill in the response
*
* @param[in] block 8-byte aligned pointer to data block to calculate ECC
* @param block_size Size of block in bytes, must be a multiple of two.
* @param bch_level Number of errors that must be corrected. The number of
* parity bytes is equal to ((15 * bch_level) + 7) / 8.
* Must be 4, 8, 16, 24, 32, 40, 48, 56, 60 or 64.
* @param[out] ecc 8-byte aligned pointer to where ecc data should go
* @param[in] resp pointer to where responses will be written.
*
* @return Zero on success, negative on failure.
*/
int octeontx_bch_encode(struct bch_vf *vf, dma_addr_t block, u16 block_size,
u8 bch_level, dma_addr_t ecc, dma_addr_t resp);
/**
* Given a data block and ecc data correct the data block
*
* @param[in] block_ecc_in 8-byte aligned pointer to data block with ECC
* data concatenated to the end to correct
* @param block_size Size of block in bytes, must be a multiple of
* two.
* @param bch_level Number of errors that must be corrected. The
* number of parity bytes is equal to
* ((15 * bch_level) + 7) / 8.
* Must be 4, 8, 16, 24, 32, 40, 48, 56, 60 or 64.
* @param[out] block_out 8-byte aligned pointer to corrected data buffer.
* This should not be the same as block_ecc_in.
* @param[in] resp pointer to where responses will be written.
*
* @return Zero on success, negative on failure.
*/
int octeontx_bch_decode(struct bch_vf *vf, dma_addr_t block_ecc_in,
u16 block_size, u8 bch_level,
dma_addr_t block_out, dma_addr_t resp);
/**
* Ring the BCH doorbell telling it that new commands are
* available.
*
* @param num_commands Number of new commands
* @param vf virtual function handle
*/
static inline void octeontx_bch_write_doorbell(u64 num_commands,
struct bch_vf *vf)
{
u64 num_words = num_commands * sizeof(union bch_cmd) / sizeof(uint64_t);
writeq(num_words, vf->reg_base + BCH_VQX_DOORBELL(0));
}
/**
* Since it's possible (and even likely) that the NAND device will be probed
* before the BCH device has been probed, we may need to defer the probing.
*
* In this case, the initial probe returns success but the actual probing
* is deferred until the BCH VF has been probed.
*
* @return 0 for success, otherwise error
*/
int octeontx_pci_nand_deferred_probe(void);
#endif /* __OCTEONTX_BCH_H__ */

View File

@ -0,0 +1,167 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright (C) 2018 Marvell International Ltd.
*/
#ifndef __OCTEONTX_BCH_REGS_H__
#define __OCTEONTX_BCH_REGS_H__
#define BCH_NR_VF 1
union bch_cmd {
u64 u[4];
struct fields {
struct {
u64 size:12;
u64 reserved_12_31:20;
u64 ecc_level:4;
u64 reserved_36_61:26;
u64 ecc_gen:2;
} cword;
struct {
u64 ptr:49;
u64 reserved_49_55:7;
u64 nc:1;
u64 fw:1;
u64 reserved_58_63:6;
} oword;
struct {
u64 ptr:49;
u64 reserved_49_55:7;
u64 nc:1;
u64 reserved_57_63:7;
} iword;
struct {
u64 ptr:49;
u64 reserved_49_63:15;
} rword;
} s;
};
enum ecc_gen {
eg_correct,
eg_copy,
eg_gen,
eg_copy3,
};
/** Response from BCH instruction */
union bch_resp {
u16 u16;
struct {
u16 num_errors:7; /** Number of errors in block */
u16 zero:6; /** Always zero, ignore */
u16 erased:1; /** Block is erased */
u16 uncorrectable:1;/** too many bits flipped */
u16 done:1; /** Block is done */
} s;
};
union bch_vqx_ctl {
u64 u;
struct {
u64 reserved_0:1;
u64 cmd_be:1;
u64 max_read:4;
u64 reserved_6_15:10;
u64 erase_disable:1;
u64 one_cmd:1;
u64 early_term:4;
u64 reserved_22_63:42;
} s;
};
union bch_vqx_cmd_buf {
u64 u;
struct {
u64 reserved_0_32:33;
u64 size:13;
u64 dfb:1;
u64 ldwb:1;
u64 reserved_48_63:16;
} s;
};
/* keep queue state indexed, even though just one supported here,
* for later generalization to similarly-shaped queues on other Cavium devices
*/
enum {
QID_BCH,
QID_MAX
};
struct bch_q {
struct udevice *dev;
int index;
u16 max_depth;
u16 pool_size_m1;
u64 *base_vaddr;
dma_addr_t base_paddr;
};
extern struct bch_q octeontx_bch_q[QID_MAX];
/* with one dma-mapped area, virt<->phys conversions by +/- (vaddr-paddr) */
static inline dma_addr_t qphys(int qid, void *v)
{
struct bch_q *q = &octeontx_bch_q[qid];
int off = (u8 *)v - (u8 *)q->base_vaddr;
return q->base_paddr + off;
}
#define octeontx_ptr_to_phys(v) qphys(QID_BCH, (v))
static inline void *qvirt(int qid, dma_addr_t p)
{
struct bch_q *q = &octeontx_bch_q[qid];
int off = p - q->base_paddr;
return q->base_vaddr + off;
}
#define octeontx_phys_to_ptr(p) qvirt(QID_BCH, (p))
/* plenty for interleaved r/w on two planes with 16k page, ecc_size 1k */
/* QDEPTH >= 16, as successive chunks must align on 128-byte boundaries */
#define QDEPTH 256 /* u64s in a command queue chunk, incl next-pointer */
#define NQS 1 /* linked chunks in the chain */
/**
* Write an arbitrary number of command words to a command queue.
* This is a generic function; the fixed number of command word
* functions yield higher performance.
*
* Could merge with crypto version for FPA use on cn83xx
*/
static inline int octeontx_cmd_queue_write(int queue_id, bool use_locking,
int cmd_count, const u64 *cmds)
{
int ret = 0;
u64 *cmd_ptr;
struct bch_q *qptr = &octeontx_bch_q[queue_id];
if (unlikely(cmd_count < 1 || cmd_count > 32))
return -EINVAL;
if (unlikely(!cmds))
return -EINVAL;
cmd_ptr = qptr->base_vaddr;
while (cmd_count > 0) {
int slot = qptr->index % (QDEPTH * NQS);
if (slot % QDEPTH != QDEPTH - 1) {
cmd_ptr[slot] = *cmds++;
cmd_count--;
}
qptr->index++;
}
__iowmb(); /* flush commands before ringing bell */
return ret;
}
#endif /* __OCTEONTX_BCH_REGS_H__ */

File diff suppressed because it is too large Load Diff