Commit Graph

49 Commits

Author SHA1 Message Date
Kees Cook 42bc47b353 treewide: Use array_size() in vmalloc()
The vmalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:

        vmalloc(a * b)

with:
        vmalloc(array_size(a, b))

as well as handling cases of:

        vmalloc(a * b * c)

with:

        vmalloc(array3_size(a, b, c))

This does, however, attempt to ignore constant size factors like:

        vmalloc(4 * 1024)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  vmalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  vmalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  vmalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
  vmalloc(
-	sizeof(TYPE) * (COUNT_ID)
+	array_size(COUNT_ID, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT_ID
+	array_size(COUNT_ID, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * (COUNT_CONST)
+	array_size(COUNT_CONST, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT_CONST
+	array_size(COUNT_CONST, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT_ID)
+	array_size(COUNT_ID, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT_ID
+	array_size(COUNT_ID, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT_CONST)
+	array_size(COUNT_CONST, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT_CONST
+	array_size(COUNT_CONST, sizeof(THING))
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

  vmalloc(
-	SIZE * COUNT
+	array_size(COUNT, SIZE)
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  vmalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  vmalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  vmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  vmalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  vmalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  vmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  vmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  vmalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  vmalloc(C1 * C2 * C3, ...)
|
  vmalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@

(
  vmalloc(C1 * C2, ...)
|
  vmalloc(
-	E1 * E2
+	array_size(E1, E2)
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 16:19:22 -07:00
Jeff Mahoney 08db141b53 reiserfs: fix race in prealloc discard
The main loop in __discard_prealloc is protected by the reiserfs write lock
which is dropped across schedules like the BKL it replaced.  The problem is
that it checks the value, calls a routine that schedules, and then adjusts
the state.  As a result, two threads that are calling
reiserfs_prealloc_discard at the same time can race when one calls
reiserfs_free_prealloc_block, the lock is dropped, and the other calls
reiserfs_free_prealloc_block with the same block number.  In the right
circumstances, it can cause the prealloc count to go negative.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2017-06-23 09:40:30 +02:00
Jeff Mahoney 54930dfeb4 reiserfs: don't preallocate blocks for extended attributes
Most extended attributes will fit in a single block.  More importantly,
we drop the reference to the inode while holding the transaction open
so the preallocated blocks aren't released.  As a result, the inode
may be evicted before it's removed from the transaction's prealloc list
which can cause memory corruption.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2017-06-23 09:40:24 +02:00
Linus Torvalds 2840c566e9 Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull reiserfs and ext3 changes from Jan Kara:
 "Big reiserfs cleanup from Jeff, an ext3 deadlock fix, and some small
  cleanups"

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (34 commits)
  reiserfs: Fix compilation breakage with CONFIG_REISERFS_CHECK
  ext3: Fix deadlock in data=journal mode when fs is frozen
  reiserfs: call truncate_setsize under tailpack mutex
  fs/jbd/revoke.c: replace shift loop by ilog2
  reiserfs: remove obsolete __constant_cpu_to_le32
  reiserfs: balance_leaf refactor, split up balance_leaf_when_delete
  reiserfs: balance_leaf refactor, format balance_leaf_finish_node
  reiserfs: balance_leaf refactor, format balance_leaf_new_nodes_paste
  reiserfs: balance_leaf refactor, format balance_leaf_paste_right
  reiserfs: balance_leaf refactor, format balance_leaf_insert_right
  reiserfs: balance_leaf refactor, format balance_leaf_paste_left
  reiserfs: balance_leaf refactor, format balance_leaf_insert_left
  reiserfs: balance_leaf refactor, pull out balance_leaf{left, right, new_nodes, finish_node}
  reiserfs: balance_leaf refactor, pull out balance_leaf_finish_node_paste
  reiserfs: balance_leaf refactor pull out balance_leaf_finish_node_insert
  reiserfs: balance_leaf refactor, pull out balance_leaf_new_nodes_paste
  reiserfs: balance_leaf refactor, pull out balance_leaf_new_nodes_insert
  reiserfs: balance_leaf refactor, pull out balance_leaf_paste_right
  reiserfs: balance_leaf refactor, pull out balance_leaf_insert_right
  reiserfs: balance_leaf refactor, pull out balance_leaf_paste_left
  ...
2014-06-11 10:45:14 -07:00
Fabian Frederick ae0a50aba0 fs/reiserfs/bitmap.c: coding style fixes
-Trivial code clean-up
-Fix endif }; (coccinelle warning)

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-06-06 16:08:16 -07:00
Jeff Mahoney a228bf8f0a reiserfs: cleanup, remove unnecessary parens
The reiserfs code is littered with extra parens in places where the authors
may not have been certain about precedence of & vs ->. This patch cleans them
out.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2014-05-06 23:18:16 +02:00
Jeff Mahoney cf776a7a4d reiserfs: cleanup, remove leading whitespace from labels
This patch moves reiserfs closer to adhering to the style rules by
removing leading whitespace from labels.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2014-05-06 23:18:09 +02:00
Jeff Mahoney 09f1b80ba8 reiserfs: cleanup, remove sb argument from journal_mark_dirty
journal_mark_dirty doesn't need a separate sb argument; It's provided
by the transaction handle.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2014-05-06 23:10:37 +02:00
Jeff Mahoney 098297b27d reiserfs: cleanup, reformat comments to normal kernel style
This patch reformats comments in the reiserfs code to fit in 80 columns and
to follow the style rules.

There is no functional change but it helps make my eyes bleed less.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2014-05-06 22:52:19 +02:00
Jeff Mahoney 4cf5f7addf reiserfs: cleanup, rename key and item accessors to more friendly names
This patch does a quick search and replace:
B_N_PITEM_HEAD() -> item_head()
B_N_PDELIM_KEY() -> internal_key()
B_N_PKEY() -> leaf_key()
B_N_PITEM() -> item_body()

And the item_head version:
B_I_PITEM() -> ih_item_body()
I_ENTRY_COUNT() -> ih_entry_count()

And the treepath variants:
get_ih() -> tp_item_head()
PATH_PITEM_HEAD() -> tp_item_head()
get_item() -> tp_item_body()

... which makes the code much easier on the eyes.

I've also removed a few unused macros.

Checkpatch will complain about the 80 character limit for do_balan.c.
I've addressed that in a later patchset to split up balance_leaf().

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2014-05-06 22:51:44 +02:00
Jeff Mahoney d2d0395fd1 reiserfs: locking, release lock around quota operations
Previous commits released the write lock across quota operations but
missed several places.  In particular, the free operations can also
call into the file system code and take the write lock, causing
deadlocks.

This patch introduces some more helpers and uses them for quota call
sites.  Without this patch applied, reiserfs + quotas runs into deadlocks
under anything more than trivial load.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
2013-08-08 17:34:47 -04:00
Jeff Mahoney 278f6679f4 reiserfs: locking, handle nested locks properly
The reiserfs write lock replaced the BKL and uses similar semantics.

Frederic's locking code makes a distinction between when the lock is nested
and when it's being acquired/released, but I don't think that's the right
distinction to make.

The right distinction is between the lock being released at end-of-use and
the lock being released for a schedule. The unlock should return the depth
and the lock should restore it, rather than the other way around as it is now.

This patch implements that and adds a number of places where the lock
should be dropped.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
2013-08-08 17:34:46 -04:00
Jeff Mahoney 48d1788493 reiserfs: fix deadlocks with quotas
The BKL push-down for reiserfs made lock recursion a special case that needs
to be handled explicitly. One of the cases that was unhandled is dropping
the quota during inode eviction. Both reiserfs_evict_inode and
reiserfs_write_dquot take the write lock, but when the journal lock is
taken it only drops one the references. The locking rules are that the journal
lock be acquired before the write lock so leaving the reference open leads
to a ABBA deadlock.

This patch pushes the unlock up before clear_inode and avoids the recursive
locking.

Another ABBA situation can occur when the write lock is dropped while reading
the bitmap buffer while in the quota code. When the lock is reacquired, it
will deadlock against dquot->dq_lock and dqopt->dqio_mutex in the dquot_acquire
path. It's safe to retain the lock across the read and should be cached under
write load.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2012-08-15 00:22:57 +02:00
Al Viro f466c6fdb3 move private bits of reiserfs_fs.h to fs/reiserfs/reiserfs.h
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2012-03-20 21:29:43 -04:00
Al Viro a8a4b79b53 kill pointless includes of reiserfs_fs_{i,sb}.h
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2012-03-20 21:29:43 -04:00
Frederic Weisbecker f32485be83 reiserfs: delay reiserfs lock until journal initialization
In the mount path, transactions that are made before journal
initialization don't involve the filesystem.  We can delay the reiserfs
lock until we play with the journal.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-01-10 16:30:53 -08:00
Jan Kara c3aa077648 reiserfs: Properly display mount options in /proc/mounts
Make reiserfs properly display mount options in /proc/mounts.

CC: reiserfs-devel@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2012-01-06 23:20:13 -05:00
Akinobu Mita 9d6bf5aa17 reiserfs: use hweight_long()
Use hweight_long() to count free bits in the bitmap.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-07-25 20:57:17 -07:00
Akinobu Mita 0c2fd1bfb1 reiserfs: use proper little-endian bitops
Using __test_and_{set,clear}_bit_le() with ignoring its return value can
be replaced with __{set,clear}_bit_le().

This introduces reiserfs_{set,clear}_le_bit for __{set,clear}_bit_le and
does the above change with them.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-07-25 20:57:17 -07:00
Jiri Kosina 318ae2edc3 Merge branch 'for-next' into for-linus
Conflicts:
	Documentation/filesystems/proc.txt
	arch/arm/mach-u300/include/mach/debug-macro.S
	drivers/net/qlge/qlge_ethtool.c
	drivers/net/qlge/qlge_main.c
	drivers/net/typhoon.c
2010-03-08 16:55:37 +01:00
Christoph Hellwig 5dd4056db8 dquot: cleanup space allocation / freeing routines
Get rid of the alloc_space, free_space, reserve_space, claim_space and
release_rsv dquot operations - they are always called from the filesystem
and if a filesystem really needs their own (which none currently does)
it can just call into it's own routine directly.

Move shared logic into the common __dquot_alloc_space,
dquot_claim_space_nodirty and __dquot_free_space low-level methods,
and rationalize the wrappers around it to move as much as possible
code into the common block for CONFIG_QUOTA vs not.  Also rename
all these helpers to be named dquot_* instead of vfs_dq_*.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
2010-03-05 00:20:28 +01:00
Daniel Mack 3ad2f3fbb9 tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-09 11:13:56 +01:00
Frederic Weisbecker 500f5a0bf5 reiserfs: Fix possible recursive lock
While allocating the bitmap using vmalloc, we hold the reiserfs lock,
which makes lockdep later reporting a possible deadlock as we may
swap out pages to allocate memory and then take the reiserfs lock
recursively:

inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage.
kswapd0/312 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (&REISERFS_SB(s)->lock){+.+.?.}, at: [<c11108a8>] reiserfs_write_lock+0x28/0x40
{RECLAIM_FS-ON-W} state was registered at:
  [<c104e1c2>] mark_held_locks+0x62/0x90
  [<c104e28a>] lockdep_trace_alloc+0x9a/0xc0
  [<c108e396>] kmem_cache_alloc+0x26/0xf0
  [<c10850ec>] __get_vm_area_node+0x6c/0xf0
  [<c10857de>] __vmalloc_node+0x7e/0xa0
  [<c108597b>] vmalloc+0x2b/0x30
  [<c10e00b9>] reiserfs_init_bitmap_cache+0x39/0x70
  [<c10f8178>] reiserfs_fill_super+0x2e8/0xb90
  [<c1094345>] get_sb_bdev+0x145/0x180
  [<c10f5a11>] get_super_block+0x21/0x30
  [<c10931f0>] vfs_kern_mount+0x40/0xd0
  [<c10932d9>] do_kern_mount+0x39/0xd0
  [<c10a9857>] do_mount+0x2c7/0x6b0
  [<c10a9ca6>] sys_mount+0x66/0xa0
  [<c161589b>] mount_block_root+0xc4/0x245
  [<c1615a75>] mount_root+0x59/0x5f
  [<c1615b8c>] prepare_namespace+0x111/0x14b
  [<c1615269>] kernel_init+0xcf/0xdb
  [<c10031fb>] kernel_thread_helper+0x7/0x1c

This is actually fine for two reasons: we call vmalloc at mount time
then it's not in the swapping out path. Also the reiserfs lock can be
acquired recursively, but since its implementation depends on a mutex,
it's hard and not necessary worth it to teach that to lockdep.

The lock is useless at mount time anyway, at least until we replay the
journal. But let's remove it from this path later as this needs
more thinking and is a sensible change.

For now we can just relax the lock around vmalloc,

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
2009-12-14 11:43:09 +01:00
Frederic Weisbecker 4c5eface5d kill-the-BKL/reiserfs: release the write lock inside reiserfs_read_bitmap_block()
reiserfs_read_bitmap_block() uses sb_bread() to read the bitmap block. This
helper might sleep.

Then, when the bkl was used, it was released at this point. We can then
relax the write lock too here.

[ Impact: release the reiserfs write lock when it is not needed ]

Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
2009-09-14 07:18:11 +02:00
Frederic Weisbecker 8ebc423238 reiserfs: kill-the-BKL
This patch is an attempt to remove the Bkl based locking scheme from
reiserfs and is intended.

It is a bit inspired from an old attempt by Peter Zijlstra:

   http://lkml.indiana.edu/hypermail/linux/kernel/0704.2/2174.html

The bkl is heavily used in this filesystem to prevent from
concurrent write accesses on the filesystem.

Reiserfs makes a deep use of the specific properties of the Bkl:

- It can be acqquired recursively by a same task
- It is released on the schedule() calls and reacquired when schedule() returns

The two properties above are a roadmap for the reiserfs write locking so it's
very hard to simply replace it with a common mutex.

- We need a recursive-able locking unless we want to restructure several blocks
  of the code.
- We need to identify the sites where the bkl was implictly relaxed
  (schedule, wait, sync, etc...) so that we can in turn release and
  reacquire our new lock explicitly.
  Such implicit releases of the lock are often required to let other
  resources producer/consumer do their job or we can suffer unexpected
  starvations or deadlocks.

So the new lock that replaces the bkl here is a per superblock mutex with a
specific property: it can be acquired recursively by a same task, like the
bkl.

For such purpose, we integrate a lock owner and a lock depth field on the
superblock information structure.

The first axis on this patch is to turn reiserfs_write_(un)lock() function
into a wrapper to manage this mutex. Also some explicit calls to
lock_kernel() have been converted to reiserfs_write_lock() helpers.

The second axis is to find the important blocking sites (schedule...(),
wait_on_buffer(), sync_dirty_buffer(), etc...) and then apply an explicit
release of the write lock on these locations before blocking. Then we can
safely wait for those who can give us resources or those who need some.
Typically this is a fight between the current writer, the reiserfs workqueue
(aka the async commiter) and the pdflush threads.

The third axis is a consequence of the second. The write lock is usually
on top of a lock dependency chain which can include the journal lock, the
flush lock or the commit lock. So it's dangerous to release and trying to
reacquire the write lock while we still hold other locks.

This is fine with the bkl:

      T1                       T2

lock_kernel()
    mutex_lock(A)
    unlock_kernel()
    // do something
                            lock_kernel()
                                mutex_lock(A) -> already locked by T1
                                schedule() (and then unlock_kernel())
    lock_kernel()
    mutex_unlock(A)
    ....

This is not fine with a mutex:

      T1                       T2

mutex_lock(write)
    mutex_lock(A)
    mutex_unlock(write)
    // do something
                           mutex_lock(write)
                              mutex_lock(A) -> already locked by T1
                              schedule()

    mutex_lock(write) -> already locked by T2
    deadlock

The solution in this patch is to provide a helper which releases the write
lock and sleep a bit if we can't lock a mutex that depend on it. It's another
simulation of the bkl behaviour.

The last axis is to locate the fs callbacks that are called with the bkl held,
according to Documentation/filesystem/Locking.

Those are:

- reiserfs_remount
- reiserfs_fill_super
- reiserfs_put_super

Reiserfs didn't need to explicitly lock because of the context of these callbacks.
But now we must take care of that with the new locking.

After this patch, reiserfs suffers from a slight performance regression (for now).
On UP, a high volume write with dd reports an average of 27 MB/s instead
of 30 MB/s without the patch applied.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Bron Gondwana <brong@fastmail.fm>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
LKML-Reference: <1239070789-13354-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-14 07:17:59 +02:00
Linus Torvalds e1c5024828 Merge branch 'reiserfs-updates' from Jeff Mahoney
* reiserfs-updates: (35 commits)
  reiserfs: rename [cn]_* variables
  reiserfs: rename p_._ variables
  reiserfs: rename p_s_tb to tb
  reiserfs: rename p_s_inode to inode
  reiserfs: rename p_s_bh to bh
  reiserfs: rename p_s_sb to sb
  reiserfs: strip trailing whitespace
  reiserfs: cleanup path functions
  reiserfs: factor out buffer_info initialization
  reiserfs: add atomic addition of selinux attributes during inode creation
  reiserfs: use generic readdir for operations across all xattrs
  reiserfs: journaled xattrs
  reiserfs: use generic xattr handlers
  reiserfs: remove i_has_xattr_dir
  reiserfs: make per-inode xattr locking more fine grained
  reiserfs: eliminate per-super xattr lock
  reiserfs: simplify xattr internal file lookups/opens
  reiserfs: Clean up xattrs when REISERFS_FS_XATTR is unset
  reiserfs: remove IS_PRIVATE helpers
  reiserfs: remove link detection code
  ...

Fixed up conflicts manually due to:
 - quota name cleanups vs variable naming changes:
	fs/reiserfs/inode.c
	fs/reiserfs/namei.c
	fs/reiserfs/stree.c
        fs/reiserfs/xattr.c
 - exported include header cleanups
	include/linux/reiserfs_fs.h
2009-03-30 12:33:01 -07:00
Jeff Mahoney 0030b64570 reiserfs: use reiserfs_error()
This patch makes many paths that are currently using warnings to handle
the error.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-03-30 12:16:37 -07:00
Jeff Mahoney 45b03d5e8e reiserfs: rework reiserfs_warning
ReiserFS warnings can be somewhat inconsistent.
In some cases:
 * a unique identifier may be associated with it
 * the function name may be included
 * the device may be printed separately

This patch aims to make warnings more consistent. reiserfs_warning() prints
the device name, so printing it a second time is not required. The function
name for a warning is always helpful in debugging, so it is now automatically
inserted into the output. Hans has stated that every warning should have
a unique identifier. Some cases lack them, others really shouldn't have them.
reiserfs_warning() now expects an id associated with each message. In the
rare case where one isn't needed, "" will suffice.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-03-30 12:16:36 -07:00
Jeff Mahoney 1d889d9958 reiserfs: make some warnings informational
In several places, reiserfs_warning is used when there is no warning, just
a notice. This patch changes some of them to indicate that the message
is merely informational.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-03-30 12:16:35 -07:00
Jan Kara 77db4f25bc reiserfs: Use lowercase names of quota functions
Use lowercase names of quota functions instead of old uppercase ones.

Signed-off-by: Jan Kara <jack@suse.cz>
CC: reiserfs-devel@vger.kernel.org
2009-03-26 02:18:36 +01:00
Harvey Harrison fbe5498b3d reiserfs: replace remaining __FUNCTION__ occurrences
__FUNCTION__ is gcc-specific, use __func__

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 08:58:46 -07:00
Joe Perches c78bad11fb fs/: Spelling fixes
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
2008-02-03 17:33:42 +02:00
Jeff Mahoney cb680c1be6 reiserfs: ignore on disk s_bmap_nr value
Implement support for file systems larger than 8 TiB.

The reiserfs superblock contains a 16 bit value for counting the number of
bitmap blocks.  The rest of the disk format supports file systems up to 2^32
blocks, but the bitmap block limitation artificially limits this to 8 TiB with
a 4KiB block size.

Rather than trust the superblock's 16-bit bitmap block count, we calculate it
dynamically based on the number of blocks in the file system.  When an
incorrect value is observed in the superblock, it is zeroed out, ensuring that
older kernels will not be able to mount the file system.

Userspace support has already been implemented and shipped in reiserfsprogs
3.6.20.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 11:53:35 -07:00
Jeff Mahoney 4d20851d37 reiserfs: remove first_zero_hint
The first_zero_hint metadata caching was never actually used, and it's of
dubious optimization quality.  This patch removes it.

It doesn't actually shrink the size of the reiserfs_bitmap_info struct, since
that doesn't work with block sizes larger than 8K.  There was a big fixme in
there, and with all the work lately in allowing block size > page size, I
might as well kill the fixme as well.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 11:53:35 -07:00
Jeff Mahoney 3ee1667042 reiserfs: fix usage of signed ints for block numbers
Do a quick signedness check for block numbers.  There are a number of places
where signed integers are used for block numbers, which limits the usable file
system size to 8 TiB.  The disk format, excepting a problem which will be
fixed in the following patch, supports file systems up to 16 TiB in size.
This patch cleans up those sites so that we can enable the full usable size.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 11:53:35 -07:00
Jeff Mahoney d4c3d19d0c reiserfs: use is_reusable to catch corruption
Build in is_reusable() unconditionally and use it to catch corruption before
it reaches the block freeing paths.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 11:53:35 -07:00
Adrian Bunk deba0f49b9 fs/reiserfs/: cleanups
- remove the following no longer used functions:
  - bitmap.c: reiserfs_claim_blocks_to_be_allocated()
  - bitmap.c: reiserfs_release_claimed_blocks()
  - bitmap.c: reiserfs_can_fit_pages()

- make the following functions static:
  - inode.c: restart_transaction()
  - journal.c: reiserfs_async_progress_wait()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Vladimir V. Saveliev <vs@namesys.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 08:42:46 -07:00
Josef "Jeff" Sipek fec6d055da [PATCH] struct path: rename Reiserfs's struct path
Rename Reiserfs's struct path to struct treepath to prevent name collision
between it and struct path from fs/namei.c.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-08 08:28:40 -08:00
Eric Eric Sesterhenn 00079e04fe [PATCH] reiserfs: null pointer dereferencing in reiserfs_read_bitmap_block
null pointer dereferencing in reiserfs_read_bitmap_block.

Signed-off-by: Alexander Zarochentsev <zam@namesys.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-07 10:51:14 -07:00
Jeff Mahoney 9ea0f9499d [PATCH] reiserfs: eliminate minimum window size for bitmap searching
When a file system becomes fragmented (using MythTV, for example), the
bigalloc window searching ends up causing huge performance problems.  In a
file system presented by a user experiencing this bug, the file system was
90% free, but no 32-block free windows existed on the entire file system.
This causes the allocator to scan the entire file system for each 128k
write before backing down to searching for individual blocks.

In the end, finding a contiguous window for all the blocks in a write is an
advantageous special case, but one that can be found naturally when such a
window exists anyway.

This patch removes the bigalloc window searching, and has been proven to
fix the test case described above.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:39:28 -07:00
Jeff Mahoney 5065227b46 [PATCH] reiserfs: on-demand bitmap loading
This is the patch the three previous ones have been leading up to.

It changes the behavior of ReiserFS from loading and caching all the bitmaps
as special, to treating the bitmaps like any other bit of metadata and just
letting the system-wide caches figure out what to hang on to.

Buffer heads are allocated on the fly, so there is no need to retain pointers
to all of them.  The caching of the metadata occurs when the data is read and
updated, and is considered invalid and uncached until then.

I needed to remove the vs-4040 check for performing a duplicate operation on a
particular bit.  The reason is that while the other sites for working with
bitmaps are allowed to schedule, is_reusable() is called from do_balance(),
which will panic if a schedule occurs in certain places.

The benefit of on-demand bitmaps clearly outweighs a sanity check that depends
on a compile-time option that is discouraged.

[akpm@osdl.org: warning fix]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:39:28 -07:00
Jeff Mahoney 6f01046b35 [PATCH] reiserfs: reorganize bitmap loading functions
This patch moves the bitmap loading code from super.c to bitmap.c

The code is also restructured somewhat.  The only difference between new
format bitmaps and old format bitmaps is where they are.  That's a two liner
before loading the block to use the correct one.  There's no need for an
entirely separate code path.

The load path is generally the same, with the pattern being to throw out a
bunch of requests and then wait for them, then cache the metadata from the
contents.

Again, like the previous patches, the purpose is to set up for later ones.

Update: There was a bug in the previously posted version of this that resulted
in corruption.  The problem was that bitmap 0 on new format file systems must
be treated specially, and wasn't.  A stupid bug with an easy fix.

This is hopefully the last fix for the disaster that is the reiserfs bitmap
patch set.

If a bitmap block was full, first_zero_hint would end up at zero since it
would never be changed from it's zeroed out value.  This just sets it
beyond the end of the bitmap block.  If any bits are freed, it will be
reset to a valid bit.  When info->free_count = 0, then we already know it's
full.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:39:27 -07:00
Jeff Mahoney 0b3dc17bc0 [PATCH] reiserfs: clean up bitmap block buffer head references
Similar to the SB_JOURNAL cleanup that was accepted a while ago, this patch
uses a temporary variable for buffer head references from the bitmap info
array.

This makes the code much more readable in some areas.

It also uses proper reference counting, doing a get_bh() after using the
pointer from the array and brelse()'ing it later.  This may seem silly, but a
later patch will replace the simple temporary variables with an actual read,
so the reference freeing will be used then.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:39:27 -07:00
Jeff Mahoney e1fabd3ccf [PATCH] reiserfs: fix is_reusable bitmap check to not traverse the bitmap info array
There is a check in is_reusable to determine if a particular block is a bitmap
block.  It verifies this by going through the array of bitmap block buffer
heads and comparing the block number to each one.

Bitmap blocks are at defined locations on the disk in both old and current
formats.  Simply checking against the known good values is enough.

This is a trivial optimization for a non-production codepath, but this is the
first in a series of patches that will ultimately remove the buffer heads from
that array.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:39:27 -07:00
Jörn Engel 6ab3d5624e Remove obsolete #include <linux/config.h>
Signed-off-by: Jörn Engel <joern@wohnheim.fh-wedel.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
2006-06-30 19:25:36 +02:00
Linus Torvalds bd4c625c06 reiserfs: run scripts/Lindent on reiserfs code
This was a pure indentation change, using:

	scripts/Lindent fs/reiserfs/*.c include/linux/reiserfs_*.h

to make reiserfs match the regular Linux indentation style.  As Jeff
Mahoney <jeffm@suse.com> writes:

 The ReiserFS code is a mix of a number of different coding styles, sometimes
 different even from line-to-line. Since the code has been relatively stable
 for quite some time and there are few outstanding patches to be applied, it
 is time to reformat the code to conform to the Linux style standard outlined
 in Documentation/CodingStyle.

 This patch contains the result of running scripts/Lindent against
 fs/reiserfs/*.c and include/linux/reiserfs_*.h. There are places where the
 code can be made to look better, but I'd rather keep those patches separate
 so that there isn't a subtle by-hand hand accident in the middle of a huge
 patch. To be clear: This patch is reformatting *only*.

 A number of patches may follow that continue to make the code more consistent
 with the Linux coding style.

 Hans wasn't particularly enthusiastic about these patches, but said he
 wouldn't really oppose them either.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-12 20:21:28 -07:00
Al Viro 3e8962be91 [PATCH] reiserfs endianness: annotate little-endian objects
little-endian objects annotated as such; again, obviously no changes of
resulting code, we only replace __u16 with __le16, etc.  in relevant places.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-01 08:59:18 -07:00
Al Viro 6a3a16f2ef [PATCH] reiserfs endianness: clone struct reiserfs_key
struct reiserfs_key cloned; (currently) identical struct in_core_key added.
Places that expect host-endian data in reiserfs_key switched to in_core_key.
Basically, we get annotation of reiserfs_key users and keep the resulting tree
obviously equivalent to original.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-01 08:59:17 -07:00
Linus Torvalds 1da177e4c3 Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
2005-04-16 15:20:36 -07:00