u-boot-brain/drivers/mtd/mtdpart.c
Paul Burton 40462e541d mtd: driver _read() returns max_bitflips; mtd_read() returns -EUCLEAN
Linux modified the MTD driver interface in commit edbc4540 (with the
same name as this commit). The effect is that calls to mtd_read will
not return -EUCLEAN if the number of ECC-corrected bit errors is below
a certain threshold, which defaults to the strength of the ECC. This
allows -EUCLEAN to stop indicating "some bits were corrected" and begin
indicating "a large number of bits were corrected, the data held in
this region of flash may be lost soon". UBI makes use of this and when
-EUCLEAN is returned from mtd_read it will move data to another block
of flash. Without adopting this interface change UBI on U-boot attempts
to move data between blocks every time a single bit is corrected using
the ECC, which is a very common occurance on some devices.

For some devices where bit errors are common enough, UBI can get stuck
constantly moving data around because each block it attempts to use has
a single bit error. This condition is hit when wear_leveling_worker
attempts to move data from one PEB to another in response to an
-EUCLEAN/UBI_IO_BITFLIPS error. When this happens ubi_eba_copy_leb is
called to perform the data copy, and after the data is written it is
read back to check its validity. If that read returns UBI_IO_BITFLIPS
(in response to an MTD -EUCLEAN) then ubi_eba_copy_leb returns 1 to
wear_leveling worker, which then proceeds to schedule the destination
PEB for erasure. This leads to erase_worker running on the PEB, and
following a successful erase wear_leveling_worker is called which
begins this whole cycle all over again. The end result is that (without
UBI debug output enabled) the boot appears to simply hang whilst in
reality U-boot busily works away at destroying a block of the NAND
flash. Debug output from this situation:

  UBI DBG: ensure_wear_leveling: schedule scrubbing
  UBI DBG: wear_leveling_worker: scrub PEB 1027 to PEB 4083
  UBI DBG: ubi_io_read_vid_hdr: read VID header from PEB 1027
  UBI DBG: ubi_io_read: read 4096 bytes from PEB 1027:4096
  UBI DBG: ubi_eba_copy_leb: copy LEB 0:0, PEB 1027 to PEB 4083
  UBI DBG: ubi_eba_copy_leb: read 1040384 bytes of data
  UBI DBG: ubi_io_read: read 1040384 bytes from PEB 1027:8192
  UBI: fixable bit-flip detected at PEB 1027
  UBI DBG: ubi_io_write_vid_hdr: write VID header to PEB 4083
  UBI DBG: ubi_io_write: write 4096 bytes to PEB 4083:4096
  UBI DBG: ubi_io_read_vid_hdr: read VID header from PEB 4083
  UBI DBG: ubi_io_read: read 4096 bytes from PEB 4083:4096
  UBI DBG: ubi_io_write: write 4096 bytes to PEB 4083:8192
  UBI DBG: ubi_io_read: read 4096 bytes from PEB 4083:8192
  UBI: fixable bit-flip detected at PEB 4083
  UBI DBG: schedule_erase: schedule erasure of PEB 4083, EC 55, torture 0
  UBI DBG: erase_worker: erase PEB 4083 EC 55
  UBI DBG: sync_erase: erase PEB 4083, old EC 55
  UBI DBG: do_sync_erase: erase PEB 4083
  UBI DBG: sync_erase: erased PEB 4083, new EC 56
  UBI DBG: ubi_io_write_ec_hdr: write EC header to PEB 4083
  UBI DBG: ubi_io_write: write 4096 bytes to PEB 4083:0
  UBI DBG: ensure_wear_leveling: schedule scrubbing
  UBI DBG: wear_leveling_worker: scrub PEB 1027 to PEB 4083
  ...

This patch adopts the interface change as in Linux commit edbc4540 in
order to avoid such situations. Given that none of the drivers under
drivers/mtd return -EUCLEAN, this should only affect those using
software ECC. I have tested that it works on a board which is
currently out of tree, but which I hope to be able to begin
upstreaming soon.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Acked-by: Stefan Roese <sr@denx.de>
2013-10-09 12:52:04 -05:00

429 lines
12 KiB
C

/*
* Simple MTD partitioning layer
*
* (C) 2000 Nicolas Pitre <nico@cam.org>
*
* This code is GPL
*
* 02-21-2002 Thomas Gleixner <gleixner@autronix.de>
* added support for read_oob, write_oob
*/
#include <common.h>
#include <malloc.h>
#include <asm/errno.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/compat.h>
/* Our partition linked list */
struct list_head mtd_partitions;
/* Our partition node structure */
struct mtd_part {
struct mtd_info mtd;
struct mtd_info *master;
uint64_t offset;
int index;
struct list_head list;
int registered;
};
/*
* Given a pointer to the MTD object in the mtd_part structure, we can retrieve
* the pointer to that structure with this macro.
*/
#define PART(x) ((struct mtd_part *)(x))
/*
* MTD methods which simply translate the effective address and pass through
* to the _real_ device.
*/
static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf)
{
struct mtd_part *part = PART(mtd);
struct mtd_ecc_stats stats;
int res;
stats = part->master->ecc_stats;
res = mtd_read(part->master, from + part->offset, len, retlen, buf);
if (unlikely(mtd_is_eccerr(res)))
mtd->ecc_stats.failed +=
part->master->ecc_stats.failed - stats.failed;
else
mtd->ecc_stats.corrected +=
part->master->ecc_stats.corrected - stats.corrected;
return res;
}
static int part_read_oob(struct mtd_info *mtd, loff_t from,
struct mtd_oob_ops *ops)
{
struct mtd_part *part = PART(mtd);
int res;
if (from >= mtd->size)
return -EINVAL;
if (ops->datbuf && from + ops->len > mtd->size)
return -EINVAL;
res = mtd_read_oob(part->master, from + part->offset, ops);
if (unlikely(res)) {
if (mtd_is_bitflip(res))
mtd->ecc_stats.corrected++;
if (mtd_is_eccerr(res))
mtd->ecc_stats.failed++;
}
return res;
}
static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
size_t len, size_t *retlen, u_char *buf)
{
struct mtd_part *part = PART(mtd);
return mtd_read_user_prot_reg(part->master, from, len, retlen, buf);
}
static int part_get_user_prot_info(struct mtd_info *mtd,
struct otp_info *buf, size_t len)
{
struct mtd_part *part = PART(mtd);
return mtd_get_user_prot_info(part->master, buf, len);
}
static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
size_t len, size_t *retlen, u_char *buf)
{
struct mtd_part *part = PART(mtd);
return mtd_read_fact_prot_reg(part->master, from, len, retlen, buf);
}
static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf,
size_t len)
{
struct mtd_part *part = PART(mtd);
return mtd_get_fact_prot_info(part->master, buf, len);
}
static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const u_char *buf)
{
struct mtd_part *part = PART(mtd);
return mtd_write(part->master, to + part->offset, len, retlen, buf);
}
static int part_write_oob(struct mtd_info *mtd, loff_t to,
struct mtd_oob_ops *ops)
{
struct mtd_part *part = PART(mtd);
if (to >= mtd->size)
return -EINVAL;
if (ops->datbuf && to + ops->len > mtd->size)
return -EINVAL;
return mtd_write_oob(part->master, to + part->offset, ops);
}
static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
size_t len, size_t *retlen, u_char *buf)
{
struct mtd_part *part = PART(mtd);
return mtd_write_user_prot_reg(part->master, from, len, retlen, buf);
}
static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
size_t len)
{
struct mtd_part *part = PART(mtd);
return mtd_lock_user_prot_reg(part->master, from, len);
}
static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
{
struct mtd_part *part = PART(mtd);
int ret;
instr->addr += part->offset;
ret = mtd_erase(part->master, instr);
if (ret) {
if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
instr->fail_addr -= part->offset;
instr->addr -= part->offset;
}
return ret;
}
void mtd_erase_callback(struct erase_info *instr)
{
if (instr->mtd->_erase == part_erase) {
struct mtd_part *part = PART(instr->mtd);
if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
instr->fail_addr -= part->offset;
instr->addr -= part->offset;
}
if (instr->callback)
instr->callback(instr);
}
static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
{
struct mtd_part *part = PART(mtd);
return mtd_lock(part->master, ofs + part->offset, len);
}
static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
{
struct mtd_part *part = PART(mtd);
return mtd_unlock(part->master, ofs + part->offset, len);
}
static void part_sync(struct mtd_info *mtd)
{
struct mtd_part *part = PART(mtd);
mtd_sync(part->master);
}
static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
{
struct mtd_part *part = PART(mtd);
ofs += part->offset;
return mtd_block_isbad(part->master, ofs);
}
static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
{
struct mtd_part *part = PART(mtd);
int res;
ofs += part->offset;
res = mtd_block_markbad(part->master, ofs);
if (!res)
mtd->ecc_stats.badblocks++;
return res;
}
/*
* This function unregisters and destroy all slave MTD objects which are
* attached to the given master MTD object.
*/
int del_mtd_partitions(struct mtd_info *master)
{
struct mtd_part *slave, *next;
list_for_each_entry_safe(slave, next, &mtd_partitions, list)
if (slave->master == master) {
list_del(&slave->list);
if (slave->registered)
del_mtd_device(&slave->mtd);
kfree(slave);
}
return 0;
}
static struct mtd_part *add_one_partition(struct mtd_info *master,
const struct mtd_partition *part, int partno,
uint64_t cur_offset)
{
struct mtd_part *slave;
/* allocate the partition structure */
slave = kzalloc(sizeof(*slave), GFP_KERNEL);
if (!slave) {
printk(KERN_ERR"memory allocation error while creating partitions for \"%s\"\n",
master->name);
del_mtd_partitions(master);
return NULL;
}
list_add(&slave->list, &mtd_partitions);
/* set up the MTD object for this partition */
slave->mtd.type = master->type;
slave->mtd.flags = master->flags & ~part->mask_flags;
slave->mtd.size = part->size;
slave->mtd.writesize = master->writesize;
slave->mtd.oobsize = master->oobsize;
slave->mtd.oobavail = master->oobavail;
slave->mtd.subpage_sft = master->subpage_sft;
slave->mtd.name = part->name;
slave->mtd.owner = master->owner;
slave->mtd._read = part_read;
slave->mtd._write = part_write;
if (master->_read_oob)
slave->mtd._read_oob = part_read_oob;
if (master->_write_oob)
slave->mtd._write_oob = part_write_oob;
if (master->_read_user_prot_reg)
slave->mtd._read_user_prot_reg = part_read_user_prot_reg;
if (master->_read_fact_prot_reg)
slave->mtd._read_fact_prot_reg = part_read_fact_prot_reg;
if (master->_write_user_prot_reg)
slave->mtd._write_user_prot_reg = part_write_user_prot_reg;
if (master->_lock_user_prot_reg)
slave->mtd._lock_user_prot_reg = part_lock_user_prot_reg;
if (master->_get_user_prot_info)
slave->mtd._get_user_prot_info = part_get_user_prot_info;
if (master->_get_fact_prot_info)
slave->mtd._get_fact_prot_info = part_get_fact_prot_info;
if (master->_sync)
slave->mtd._sync = part_sync;
if (master->_lock)
slave->mtd._lock = part_lock;
if (master->_unlock)
slave->mtd._unlock = part_unlock;
if (master->_block_isbad)
slave->mtd._block_isbad = part_block_isbad;
if (master->_block_markbad)
slave->mtd._block_markbad = part_block_markbad;
slave->mtd._erase = part_erase;
slave->master = master;
slave->offset = part->offset;
slave->index = partno;
if (slave->offset == MTDPART_OFS_APPEND)
slave->offset = cur_offset;
if (slave->offset == MTDPART_OFS_NXTBLK) {
slave->offset = cur_offset;
if (mtd_mod_by_eb(cur_offset, master) != 0) {
/* Round up to next erasesize */
slave->offset = (mtd_div_by_eb(cur_offset, master) + 1) * master->erasesize;
debug("Moving partition %d: 0x%012llx -> 0x%012llx\n",
partno, (unsigned long long)cur_offset,
(unsigned long long)slave->offset);
}
}
if (slave->mtd.size == MTDPART_SIZ_FULL)
slave->mtd.size = master->size - slave->offset;
debug("0x%012llx-0x%012llx : \"%s\"\n",
(unsigned long long)slave->offset,
(unsigned long long)(slave->offset + slave->mtd.size),
slave->mtd.name);
/* let's do some sanity checks */
if (slave->offset >= master->size) {
/* let's register it anyway to preserve ordering */
slave->offset = 0;
slave->mtd.size = 0;
printk(KERN_ERR"mtd: partition \"%s\" is out of reach -- disabled\n",
part->name);
goto out_register;
}
if (slave->offset + slave->mtd.size > master->size) {
slave->mtd.size = master->size - slave->offset;
printk(KERN_WARNING"mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#llx\n",
part->name, master->name, (unsigned long long)slave->mtd.size);
}
if (master->numeraseregions > 1) {
/* Deal with variable erase size stuff */
int i, max = master->numeraseregions;
u64 end = slave->offset + slave->mtd.size;
struct mtd_erase_region_info *regions = master->eraseregions;
/* Find the first erase regions which is part of this
* partition. */
for (i = 0; i < max && regions[i].offset <= slave->offset; i++)
;
/* The loop searched for the region _behind_ the first one */
i--;
/* Pick biggest erasesize */
for (; i < max && regions[i].offset < end; i++) {
if (slave->mtd.erasesize < regions[i].erasesize) {
slave->mtd.erasesize = regions[i].erasesize;
}
}
BUG_ON(slave->mtd.erasesize == 0);
} else {
/* Single erase size */
slave->mtd.erasesize = master->erasesize;
}
if ((slave->mtd.flags & MTD_WRITEABLE) &&
mtd_mod_by_eb(slave->offset, &slave->mtd)) {
/* Doesn't start on a boundary of major erase size */
/* FIXME: Let it be writable if it is on a boundary of
* _minor_ erase size though */
slave->mtd.flags &= ~MTD_WRITEABLE;
printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase block boundary -- force read-only\n",
part->name);
}
if ((slave->mtd.flags & MTD_WRITEABLE) &&
mtd_mod_by_eb(slave->mtd.size, &slave->mtd)) {
slave->mtd.flags &= ~MTD_WRITEABLE;
printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
part->name);
}
slave->mtd.ecclayout = master->ecclayout;
if (master->_block_isbad) {
uint64_t offs = 0;
while (offs < slave->mtd.size) {
if (mtd_block_isbad(master, offs + slave->offset))
slave->mtd.ecc_stats.badblocks++;
offs += slave->mtd.erasesize;
}
}
out_register:
if (part->mtdp) {
/* store the object pointer (caller may or may not register it*/
*part->mtdp = &slave->mtd;
slave->registered = 0;
} else {
/* register our partition */
add_mtd_device(&slave->mtd);
slave->registered = 1;
}
return slave;
}
/*
* This function, given a master MTD object and a partition table, creates
* and registers slave MTD objects which are bound to the master according to
* the partition definitions.
*
* We don't register the master, or expect the caller to have done so,
* for reasons of data integrity.
*/
int add_mtd_partitions(struct mtd_info *master,
const struct mtd_partition *parts,
int nbparts)
{
struct mtd_part *slave;
uint64_t cur_offset = 0;
int i;
/*
* Need to init the list here, since LIST_INIT() does not
* work on platforms where relocation has problems (like MIPS
* & PPC).
*/
if (mtd_partitions.next == NULL)
INIT_LIST_HEAD(&mtd_partitions);
debug("Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
for (i = 0; i < nbparts; i++) {
slave = add_one_partition(master, parts + i, i, cur_offset);
if (!slave)
return -ENOMEM;
cur_offset = slave->offset + slave->mtd.size;
}
return 0;
}