linux-brain/fs/pstore/inode.c

454 lines
9.7 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Persistent Storage - ramfs parts.
*
* Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/string.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/ramfs.h>
#include <linux/parser.h>
#include <linux/sched.h>
#include <linux/magic.h>
#include <linux/pstore.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include "internal.h"
#define PSTORE_NAMELEN 64
static DEFINE_SPINLOCK(allpstore_lock);
static LIST_HEAD(allpstore);
struct pstore_private {
struct list_head list;
struct pstore_record *record;
size_t total_size;
};
struct pstore_ftrace_seq_data {
const void *ptr;
size_t off;
size_t size;
};
#define REC_SIZE sizeof(struct pstore_ftrace_record)
static void free_pstore_private(struct pstore_private *private)
{
if (!private)
return;
if (private->record) {
kfree(private->record->buf);
kfree(private->record);
}
kfree(private);
}
static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos)
{
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->off = ps->total_size % REC_SIZE;
data->off += *pos * REC_SIZE;
if (data->off + REC_SIZE > ps->total_size) {
kfree(data);
return NULL;
}
return data;
}
static void pstore_ftrace_seq_stop(struct seq_file *s, void *v)
{
kfree(v);
}
static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
pstore: pstore_ftrace_seq_next should increase position index commit 6c871b7314dde9ab64f20de8f5aa3d01be4518e8 upstream. In Aug 2018 NeilBrown noticed commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") "Some ->next functions do not increment *pos when they return NULL... Note that such ->next functions are buggy and should be fixed. A simple demonstration is dd if=/proc/swaps bs=1000 skip=1 Choose any block size larger than the size of /proc/swaps. This will always show the whole last line of /proc/swaps" /proc/swaps output was fixed recently, however there are lot of other affected files, and one of them is related to pstore subsystem. If .next function does not change position index, following .show function will repeat output related to current position index. There are at least 2 related problems: - read after lseek beyond end of file, described above by NeilBrown "dd if=<AFFECTED_FILE> bs=1000 skip=1" will generate whole last list - read after lseek on in middle of last line will output expected rest of last line but then repeat whole last line once again. If .show() function generates multy-line output (like pstore_ftrace_seq_show() does ?) following bash script cycles endlessly $ q=;while read -r r;do echo "$((++q)) $r";done < AFFECTED_FILE Unfortunately I'm not familiar enough to pstore subsystem and was unable to find affected pstore-related file on my test node. If .next function does not change position index, following .show function will repeat output related to current position index. Cc: stable@vger.kernel.org Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code ...") Link: https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Link: https://lore.kernel.org/r/4e49830d-4c88-0171-ee24-1ee540028dad@virtuozzo.com [kees: with robustness tweak from Joel Fernandes <joelaf@google.com>] Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-02-25 17:11:20 +09:00
(*pos)++;
data->off += REC_SIZE;
if (data->off + REC_SIZE > ps->total_size)
return NULL;
return data;
}
static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
{
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec;
pstore: pstore_ftrace_seq_next should increase position index commit 6c871b7314dde9ab64f20de8f5aa3d01be4518e8 upstream. In Aug 2018 NeilBrown noticed commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") "Some ->next functions do not increment *pos when they return NULL... Note that such ->next functions are buggy and should be fixed. A simple demonstration is dd if=/proc/swaps bs=1000 skip=1 Choose any block size larger than the size of /proc/swaps. This will always show the whole last line of /proc/swaps" /proc/swaps output was fixed recently, however there are lot of other affected files, and one of them is related to pstore subsystem. If .next function does not change position index, following .show function will repeat output related to current position index. There are at least 2 related problems: - read after lseek beyond end of file, described above by NeilBrown "dd if=<AFFECTED_FILE> bs=1000 skip=1" will generate whole last list - read after lseek on in middle of last line will output expected rest of last line but then repeat whole last line once again. If .show() function generates multy-line output (like pstore_ftrace_seq_show() does ?) following bash script cycles endlessly $ q=;while read -r r;do echo "$((++q)) $r";done < AFFECTED_FILE Unfortunately I'm not familiar enough to pstore subsystem and was unable to find affected pstore-related file on my test node. If .next function does not change position index, following .show function will repeat output related to current position index. Cc: stable@vger.kernel.org Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code ...") Link: https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Link: https://lore.kernel.org/r/4e49830d-4c88-0171-ee24-1ee540028dad@virtuozzo.com [kees: with robustness tweak from Joel Fernandes <joelaf@google.com>] Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-02-25 17:11:20 +09:00
if (!data)
return 0;
rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off);
2019-03-26 04:32:28 +09:00
seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n",
pstore_ftrace_decode_cpu(rec),
pstore_ftrace_read_timestamp(rec),
rec->ip, rec->parent_ip, (void *)rec->ip,
(void *)rec->parent_ip);
return 0;
}
static const struct seq_operations pstore_ftrace_seq_ops = {
.start = pstore_ftrace_seq_start,
.next = pstore_ftrace_seq_next,
.stop = pstore_ftrace_seq_stop,
.show = pstore_ftrace_seq_show,
};
static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
struct seq_file *sf = file->private_data;
struct pstore_private *ps = sf->private;
if (ps->record->type == PSTORE_TYPE_FTRACE)
return seq_read(file, userbuf, count, ppos);
return simple_read_from_buffer(userbuf, count, ppos,
ps->record->buf, ps->total_size);
}
static int pstore_file_open(struct inode *inode, struct file *file)
{
struct pstore_private *ps = inode->i_private;
struct seq_file *sf;
int err;
const struct seq_operations *sops = NULL;
if (ps->record->type == PSTORE_TYPE_FTRACE)
sops = &pstore_ftrace_seq_ops;
err = seq_open(file, sops);
if (err < 0)
return err;
sf = file->private_data;
sf->private = ps;
return 0;
}
static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
{
struct seq_file *sf = file->private_data;
if (sf->op)
return seq_lseek(file, off, whence);
return default_llseek(file, off, whence);
}
static const struct file_operations pstore_file_operations = {
.open = pstore_file_open,
.read = pstore_file_read,
.llseek = pstore_file_llseek,
.release = seq_release,
};
/*
* When a file is unlinked from our file system we call the
* platform driver to erase the record from persistent store.
*/
static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = d_inode(dentry)->i_private;
struct pstore_record *record = p->record;
if (!record->psi->erase)
return -EPERM;
mutex_lock(&record->psi->read_mutex);
record->psi->erase(record);
mutex_unlock(&record->psi->read_mutex);
return simple_unlink(dir, dentry);
}
static void pstore_evict_inode(struct inode *inode)
{
struct pstore_private *p = inode->i_private;
unsigned long flags;
clear_inode(inode);
if (p) {
spin_lock_irqsave(&allpstore_lock, flags);
list_del(&p->list);
spin_unlock_irqrestore(&allpstore_lock, flags);
free_pstore_private(p);
}
}
static const struct inode_operations pstore_dir_inode_operations = {
.lookup = simple_lookup,
.unlink = pstore_unlink,
};
static struct inode *pstore_get_inode(struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
}
return inode;
}
enum {
Opt_kmsg_bytes, Opt_err
};
static const match_table_t tokens = {
{Opt_kmsg_bytes, "kmsg_bytes=%u"},
{Opt_err, NULL}
};
static void parse_options(char *options)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
if (!options)
return;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_kmsg_bytes:
if (!match_int(&args[0], &option))
pstore_set_kmsg_bytes(option);
break;
}
}
}
/*
* Display the mount options in /proc/mounts.
*/
static int pstore_show_options(struct seq_file *m, struct dentry *root)
{
if (kmsg_bytes != PSTORE_DEFAULT_KMSG_BYTES)
seq_printf(m, ",kmsg_bytes=%lu", kmsg_bytes);
return 0;
}
static int pstore_remount(struct super_block *sb, int *flags, char *data)
{
fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig <hch@infradead.org> Cc: Artem Bityutskiy <dedekind1@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Evgeniy Dushistov <dushistov@mail.ru> Cc: Jan Kara <jack@suse.cz> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Anders Larsen <al@alarsen.net> Cc: Phillip Lougher <phillip@squashfs.org.uk> Cc: Kees Cook <keescook@chromium.org> Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> Cc: Petr Vandrovec <petr@vandrovec.name> Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org
2014-03-13 23:14:33 +09:00
sync_filesystem(sb);
parse_options(data);
return 0;
}
static const struct super_operations pstore_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.evict_inode = pstore_evict_inode,
.remount_fs = pstore_remount,
.show_options = pstore_show_options,
};
static struct super_block *pstore_sb;
bool pstore_is_mounted(void)
{
return pstore_sb != NULL;
}
/*
* Make a regular file in the root directory of our file system.
* Load it up with "size" bytes of data from "buf".
* Set the mtime & ctime to the date that this record was originally stored.
*/
int pstore_mkfile(struct dentry *root, struct pstore_record *record)
{
struct dentry *dentry;
struct inode *inode;
int rc = 0;
char name[PSTORE_NAMELEN];
struct pstore_private *private, *pos;
unsigned long flags;
size_t size = record->size + record->ecc_notice_size;
WARN_ON(!inode_is_locked(d_inode(root)));
spin_lock_irqsave(&allpstore_lock, flags);
list_for_each_entry(pos, &allpstore, list) {
if (pos->record->type == record->type &&
pos->record->id == record->id &&
pos->record->psi == record->psi) {
rc = -EEXIST;
break;
}
}
spin_unlock_irqrestore(&allpstore_lock, flags);
if (rc)
return rc;
rc = -ENOMEM;
inode = pstore_get_inode(root->d_sb);
if (!inode)
goto fail;
inode->i_mode = S_IFREG | 0444;
inode->i_fop = &pstore_file_operations;
scnprintf(name, sizeof(name), "%s-%s-%llu%s",
pstore_type_to_name(record->type),
record->psi->name, record->id,
record->compressed ? ".enc.z" : "");
private = kzalloc(sizeof(*private), GFP_KERNEL);
if (!private)
goto fail_inode;
dentry = d_alloc_name(root, name);
if (!dentry)
goto fail_private;
private->record = record;
inode->i_size = private->total_size = size;
inode->i_private = private;
if (record->time.tv_sec)
inode->i_mtime = inode->i_ctime = record->time;
d_add(dentry, inode);
spin_lock_irqsave(&allpstore_lock, flags);
list_add(&private->list, &allpstore);
spin_unlock_irqrestore(&allpstore_lock, flags);
return 0;
fail_private:
free_pstore_private(private);
fail_inode:
iput(inode);
fail:
return rc;
}
/*
* Read all the records from the persistent store. Create
* files in our filesystem. Don't warn about -EEXIST errors
* when we are re-scanning the backing store looking to add new
* error records.
*/
void pstore_get_records(int quiet)
{
struct pstore_info *psi = psinfo;
struct dentry *root;
if (!psi || !pstore_sb)
return;
root = pstore_sb->s_root;
inode_lock(d_inode(root));
pstore_get_backend_records(psi, root, quiet);
inode_unlock(d_inode(root));
}
static int pstore_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
pstore_sb = sb;
sb->s_maxbytes = MAX_LFS_FILESIZE;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 21:29:47 +09:00
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = PSTOREFS_MAGIC;
sb->s_op = &pstore_ops;
sb->s_time_gran = 1;
parse_options(data);
inode = pstore_get_inode(sb);
if (inode) {
inode->i_mode = S_IFDIR | 0750;
inode->i_op = &pstore_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
}
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
pstore_get_records(0);
return 0;
}
static struct dentry *pstore_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_single(fs_type, flags, data, pstore_fill_super);
}
static void pstore_kill_sb(struct super_block *sb)
{
kill_litter_super(sb);
pstore_sb = NULL;
}
static struct file_system_type pstore_fs_type = {
.owner = THIS_MODULE,
.name = "pstore",
.mount = pstore_mount,
.kill_sb = pstore_kill_sb,
};
int __init pstore_init_fs(void)
{
int err;
/* Create a convenient mount point for people to access pstore */
err = sysfs_create_mount_point(fs_kobj, "pstore");
if (err)
goto out;
err = register_filesystem(&pstore_fs_type);
if (err < 0)
sysfs_remove_mount_point(fs_kobj, "pstore");
out:
return err;
}
void __exit pstore_exit_fs(void)
{
unregister_filesystem(&pstore_fs_type);
sysfs_remove_mount_point(fs_kobj, "pstore");
}