Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
ERROR: code indent should use tabs where possible
#939: FILE: drivers/atm/atmtcp.c:270:
+ sk_for_each(s, head) {$
WARNING: please, no spaces at the start of a line
#939: FILE: drivers/atm/atmtcp.c:270:
+ sk_for_each(s, head) {$
ERROR: space required before the open brace '{'
#2295: FILE: fs/dlm/lowcomms.c:238:
+ hlist_for_each_entry_safe(con, n, &connection_hash[i], list){
WARNING: line over 80 characters
#3070: FILE: include/linux/hashtable.h:122:
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
WARNING: line over 80 characters
#3084: FILE: include/linux/hashtable.h:132:
+#define hash_for_each_rcu(name, bkt, obj, member) \
WARNING: line over 80 characters
#3085: FILE: include/linux/hashtable.h:133:
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
WARNING: line over 80 characters
#3102: FILE: include/linux/hashtable.h:146:
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
WARNING: line over 80 characters
#3132: FILE: include/linux/hashtable.h:170:
+ hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))], member)
WARNING: line over 80 characters
#3147: FILE: include/linux/hashtable.h:181:
+#define hash_for_each_possible_safe(name, obj, tmp, member, key) \
WARNING: line over 80 characters
#3148: FILE: include/linux/hashtable.h:182:
+ hlist_for_each_entry_safe(obj, tmp, &name[hash_min(key, HASH_BITS(name))], member)
ERROR: Macros with complex values should be enclosed in parenthesis
#3188: FILE: include/linux/list.h:669:
+#define hlist_entry_safe(ptr, type, member) \
+ (ptr) ? hlist_entry(ptr, type, member) : NULL
WARNING: line over 80 characters
#3204: FILE: include/linux/list.h:678:
+#define hlist_for_each_entry(pos, head, member) \
WARNING: line over 80 characters
#3205: FILE: include/linux/list.h:679:
+ for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
WARNING: line over 80 characters
#3206: FILE: include/linux/list.h:680:
+ pos; \
WARNING: line over 80 characters
#3221: FILE: include/linux/list.h:688:
+#define hlist_for_each_entry_continue(pos, member) \
WARNING: line over 80 characters
#3222: FILE: include/linux/list.h:689:
+ for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\
WARNING: line over 80 characters
#3223: FILE: include/linux/list.h:690:
+ pos; \
WARNING: line over 80 characters
#3255: FILE: include/linux/list.h:709:
+#define hlist_for_each_entry_safe(pos, n, head, member) \
WARNING: please, no space before tabs
#3255: FILE: include/linux/list.h:709:
+#define hlist_for_each_entry_safe(pos, n, head, member) ^I^I^I\$
WARNING: line over 80 characters
#3256: FILE: include/linux/list.h:710:
+ for (pos = hlist_entry_safe((head)->first, typeof(*pos), member); \
WARNING: line over 80 characters
#3257: FILE: include/linux/list.h:711:
+ pos && ({ n = pos->member.next; 1; }); \
WARNING: line over 80 characters
#3297: FILE: include/linux/rculist.h:456:
+#define hlist_for_each_entry_rcu(pos, head, member) \
WARNING: line over 80 characters
#3298: FILE: include/linux/rculist.h:457:
+ for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
WARNING: space prohibited between function name and open parenthesis '('
#3298: FILE: include/linux/rculist.h:457:
+ for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
WARNING: line over 80 characters
#3299: FILE: include/linux/rculist.h:458:
+ typeof(*(pos)), member); \
WARNING: line over 80 characters
#3300: FILE: include/linux/rculist.h:459:
+ pos; \
WARNING: line over 80 characters
#3301: FILE: include/linux/rculist.h:460:
+ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
WARNING: line over 80 characters
#3321: FILE: include/linux/rculist.h:473:
+#define hlist_for_each_entry_rcu_bh(pos, head, member) \
WARNING: line over 80 characters
#3322: FILE: include/linux/rculist.h:474:
+ for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)), \
WARNING: line over 80 characters
#3323: FILE: include/linux/rculist.h:475:
+ typeof(*(pos)), member); \
WARNING: line over 80 characters
#3324: FILE: include/linux/rculist.h:476:
+ pos; \
WARNING: line over 80 characters
#3325: FILE: include/linux/rculist.h:477:
+ pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \
WARNING: line over 80 characters
#3340: FILE: include/linux/rculist.h:485:
+#define hlist_for_each_entry_continue_rcu(pos, member) \
WARNING: line over 80 characters
#3341: FILE: include/linux/rculist.h:486:
+ for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next), \
WARNING: line over 80 characters
#3342: FILE: include/linux/rculist.h:487:
+ typeof(*(pos)), member); \
WARNING: line over 80 characters
#3343: FILE: include/linux/rculist.h:488:
+ pos; \
WARNING: line over 80 characters
#3344: FILE: include/linux/rculist.h:489:
+ pos = hlist_entry_safe(rcu_dereference((pos)->member.next), \
ERROR: code indent should use tabs where possible
#3345: FILE: include/linux/rculist.h:490:
+^I ^I^Itypeof(*(pos)), member))$
WARNING: please, no space before tabs
#3345: FILE: include/linux/rculist.h:490:
+^I ^I^Itypeof(*(pos)), member))$
WARNING: line over 80 characters
#3359: FILE: include/linux/rculist.h:497:
+#define hlist_for_each_entry_continue_rcu_bh(pos, member) \
WARNING: line over 80 characters
#3360: FILE: include/linux/rculist.h:498:
+ for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next), \
WARNING: line over 80 characters
#3361: FILE: include/linux/rculist.h:499:
+ typeof(*(pos)), member); \
WARNING: line over 80 characters
#3362: FILE: include/linux/rculist.h:500:
+ pos; \
WARNING: line over 80 characters
#3363: FILE: include/linux/rculist.h:501:
+ pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next), \
ERROR: code indent should use tabs where possible
#3364: FILE: include/linux/rculist.h:502:
+^I ^I^Itypeof(*(pos)), member))$
WARNING: please, no space before tabs
#3364: FILE: include/linux/rculist.h:502:
+^I ^I^Itypeof(*(pos)), member))$
ERROR: do not use C99 // comments
#3817: FILE: kernel/smpboot.c:134:
+ //BUG_ON(td->cpu != smp_processor_id());
ERROR: do not use C99 // comments
#4452: FILE: net/9p/trans_virtio.c:658:
+ //.zc_request = p9_virtio_zc_request,
WARNING: line over 80 characters
#7049: FILE: net/decnet/af_decnet.c:418:
+ sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
WARNING: space prohibited between function name and open parenthesis '('
#7696: FILE: net/ipv4/raw.c:123:
+ sk_for_each_from (sk) {
ERROR: spaces required around that '=' (ctx:VxV)
#7697: FILE: net/ipv4/raw.c:124:
+ struct inet_sock *inet=inet_sk(sk);
^
WARNING: line over 80 characters
#7698: FILE: net/ipv4/raw.c:125:
+ if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
WARNING: space prohibited between function name and open parenthesis '('
#8085: FILE: net/ipv6/raw.c:100:
+ sk_for_each_from (sk)
ERROR: spaces required around that '=' (ctx:VxV)
#8087: FILE: net/ipv6/raw.c:102:
+ struct ipv6_pinfo *np=inet6_sk(sk);
^
WARNING: line over 80 characters
#8090: FILE: net/ipv6/raw.c:105:
+ if (!ipv6_addr_any(&np->daddr) && !ipv6_addr_equal(&np->daddr, rmt_addr))
WARNING: line over 80 characters
#8092: FILE: net/ipv6/raw.c:107:
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
WARNING: Too many leading tabs - consider code refactoring
#8092: FILE: net/ipv6/raw.c:107:
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
WARNING: line over 80 characters
#8094: FILE: net/ipv6/raw.c:109:
+ if (!ipv6_addr_any(&np->rcv_saddr)) {
WARNING: Too many leading tabs - consider code refactoring
#8094: FILE: net/ipv6/raw.c:109:
+ if (!ipv6_addr_any(&np->rcv_saddr)) {
WARNING: line over 80 characters
#8095: FILE: net/ipv6/raw.c:110:
+ if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
WARNING: Too many leading tabs - consider code refactoring
#8095: FILE: net/ipv6/raw.c:110:
+ if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
WARNING: line over 80 characters
#8096: FILE: net/ipv6/raw.c:111:
+ goto found;
WARNING: line over 80 characters
#8097: FILE: net/ipv6/raw.c:112:
+ if (is_multicast && inet6_mc_check(sk, loc_addr, rmt_addr))
WARNING: Too many leading tabs - consider code refactoring
#8097: FILE: net/ipv6/raw.c:112:
+ if (is_multicast && inet6_mc_check(sk, loc_addr, rmt_addr))
WARNING: line over 80 characters
#8098: FILE: net/ipv6/raw.c:113:
+ goto found;
WARNING: line over 80 characters
#8099: FILE: net/ipv6/raw.c:114:
+ continue;
WARNING: line over 80 characters
#8101: FILE: net/ipv6/raw.c:116:
+ goto found;
WARNING: line over 80 characters
#8413: FILE: net/l2tp/l2tp_core.c:285:
+ hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
WARNING: line over 80 characters
#8427: FILE: net/l2tp/l2tp_core.c:310:
+ hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
total: 9 errors, 60 warnings, 9471 lines checked
NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or
scripts/cleanfile
./patches/hlist-drop-the-node-parameter-from-iterators.patch has style problems, please review.
If any of these errors are false positives, please report
them to the maintainer, see CHECKPATCH in MAINTAINERS.
Please run checkpatch prior to sending patches
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Senna Tschudin <peter.senna@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Sasha Levin [Wed, 20 Feb 2013 02:17:03 +0000 (13:17 +1100)]
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cyrill Gorcunov [Wed, 20 Feb 2013 02:17:02 +0000 (13:17 +1100)]
kcmp: make it depend on CHECKPOINT_RESTORE
Since kcmp syscall has been implemented (initially on x86 architecture) a
number of other archs wire it up as well: xtensa, sparc, sh, s390, mips,
microblaze, m68k (not taking into account those who uses
<asm-generic/unistd.h> for syscall numbers definitions).
But the Makefile, which turns kcmp.o generation on still depends on former
config-x86. Thus get rid of this limitation and make kcmp.o depend on
CHECKPOINT_RESTORE option.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Andrey Vagin <avagin@openvz.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Jeremy Kerr [Wed, 20 Feb 2013 02:17:01 +0000 (13:17 +1100)]
selftests/efivarfs: add create-read test
Test that reads from a newly-created efivarfs file (with no data written)
will return EOF.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org> Cc: Matt Fleming <matt.fleming@intel.com> Cc: Lingzhu Xiang <lxiang@redhat.com> Cc: Dave Young <dyoung@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Jeremy Kerr [Wed, 20 Feb 2013 02:17:01 +0000 (13:17 +1100)]
selftests/efivarfs: add empty file creation test
Signed-off-by: Jeremy Kerr <jk@ozlabs.org> Cc: Matt Fleming <matt.fleming@intel.com> Cc: Lingzhu Xiang <lxiang@redhat.com> Cc: Dave Young <dyoung@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Andrew Morton [Wed, 20 Feb 2013 02:17:00 +0000 (13:17 +1100)]
selftests-add-tests-for-efivarfs-fix-fix
Cc: Dave Young <dyoung@redhat.com> Cc: Jeremy Kerr <jk@ozlabs.org> Cc: Lingzhu Xiang <lxiang@redhat.com> Cc: Matt Fleming <matt.fleming@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Andrew Morton [Wed, 20 Feb 2013 02:17:00 +0000 (13:17 +1100)]
selftests-add-tests-for-efivarfs-fix
Cc: Jeremy Kerr <jk@ozlabs.org> Cc: Matt Fleming <matt.fleming@intel.com> Cc: Lingzhu Xiang <lxiang@redhat.com> Cc: Dave Young <dyoung@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Jeremy Kerr [Wed, 20 Feb 2013 02:17:00 +0000 (13:17 +1100)]
selftests: add tests for efivarfs
This change adds a few initial efivarfs tests to the
tools/testing/selftests directory.
The open-unlink test is based on code from Lingzhu Xiang.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org> Cc: Matt Fleming <matt.fleming@intel.com> Cc: Lingzhu Xiang <lxiang@redhat.com> Cc: Dave Young <dyoung@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Stefani Seibold [Wed, 20 Feb 2013 02:16:59 +0000 (13:16 +1100)]
kfifo: fix kfifo_alloc() and kfifo_init()
Fix kfifo_alloc() and kfifo_init() to alloc at least the requested number
of elements. Since the kfifo operates on power of 2 the request size will
be rounded up to the next power of two.
Signed-off-by: Stefani Seibold <stefani@seibold.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Benjamin LaHaise [Wed, 20 Feb 2013 02:16:58 +0000 (13:16 +1100)]
aio: v3: fix kioctx not being freed after cancellation at exit time
v3 corrects a typo in v2 of this patch. The wrong version of the patch was
copied over and misspelled ctx in the first hunk. A known good tree with
this patch is at git://git.kvack.org/~bcrl/linux-next-20130213.git-v3 .
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org> Cc: Kent Overstreet <koverstreet@google.com> Cc: Josh Boyer <jwboyer@redhat.com> Cc: Zach Brown <zab@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Benjamin LaHaise [Wed, 20 Feb 2013 02:16:58 +0000 (13:16 +1100)]
aio: fix kioctx not being freed after cancellation at exit time
The recent changes overhauling fs/aio.c introduced a bug that results in the
kioctx not being freed when outstanding kiocbs are cancelled at exit_aio()
time. Specifically, a kiocb that is cancelled has its completion events
discarded by batch_complete_aio(), which then fails to wake up the process
stuck in free_ioctx(). Fix this by removing the event suppression in
batch_complete_aio() and modify the wait_event() condition in free_ioctx()
appropriately.
This patch was tested with the cancel operation in the thread based code
posted yesterday.
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org> Cc: Kent Overstreet <koverstreet@google.com> Cc: Josh Boyer <jwboyer@redhat.com> Cc: Zach Brown <zab@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Benjamin LaHaise [Wed, 20 Feb 2013 02:16:58 +0000 (13:16 +1100)]
aio: correct calculation of available events
When the number of available events in the ring buffer is calculated, the
avail calculation is incorrect when head == tail. This is harmless in
aio_read_events_ring(), but in free_ioctx() leads to the subsequent
WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr). Correct this.
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org> Reviewed-by: Kent Overstreet <koverstreet@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:57 +0000 (13:16 +1100)]
mtip32xx: convert to batch completion
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:57 +0000 (13:16 +1100)]
virtio-blk: convert to batch completion
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:56 +0000 (13:16 +1100)]
aio: Fix a null pointer deref in batch_complete_aio
The batch completion code was trying to be a bit too clever, and skip
checking ctx where it couldn't be NULL - but that broke if a kiocb had
been cancelled. Move the check to kioctx_ring_unlock().
Signed-off-by: Kent Overstreet <koverstreet@google.com> Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:54 +0000 (13:16 +1100)]
block, aio: batch completion for bios/kiocbs
When completing a kiocb, there's some fixed overhead from touching the
kioctx's ring buffer the kiocb belongs to. Some newer high end block
devices can complete multiple IOs per interrupt, much like many network
interfaces have been for some time.
This plumbs through infrastructure so we can take advantage of multiple
completions at the interrupt level, and complete multiple kiocbs at the
same time.
Drivers have to be converted to take advantage of this, but it's a simple
change and the next patches will convert a few drivers.
To use it, an interrupt handler (or any code that completes bios or
requests) declares and initializes a struct batch_complete:
Then, instead of calling bio_endio(), it calls
bio_endio_batch(bio, err, &batch). This just adds the bio to a list in
the batch_complete.
At the end, it calls
batch_complete(&batch);
This completes all the bios all at once, building up a list of kiocbs;
then the list of kiocbs are completed all at once.
Also, in order to batch up the kiocbs we have to add a different bio_endio
function to struct bio, that takes a pointer to the batch_complete - this
patch converts the dio code's bio_endio function. In order to avoid
changing every bio_endio function in the kernel (there are many), we
currently use a union and a flag to indicate what kind of bio endio
function to call. This is admittedly a hack, but should suffice for now.
For batching to work through say md or dm devices, the md/dm bio_endio
functions would have to be converted, much like the dio code. That is
left for future patches.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:54 +0000 (13:16 +1100)]
aio-kill-ki_retry-fix-fix
The "aio: kill ki-retry" patch was assuming that we didn't touch struct
kiocb after passing it off to something that would call aio_complete() -
which was wrong. So, revert the refcounting changes.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:53 +0000 (13:16 +1100)]
aio: kill ki_retry
Thanks to Zach Brown's work to rip out the retry infrastructure, we don't
need this anymore - ki_retry was only called right after the kiocb was
initialized.
This also refactors and trims some duplicated code, as well as cleaning up
the refcounting/error handling a bit.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:53 +0000 (13:16 +1100)]
aio: kill ki_key
ki_key wasn't actually used for anything previously - it was always 0.
Drop it to trim struct kiocb a bit.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Andrew Morton [Wed, 20 Feb 2013 02:16:51 +0000 (13:16 +1100)]
aio-dont-include-aioh-in-schedh-fix
Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Joel Becker <jlbec@evilplan.org> Cc: Kent Overstreet <koverstreet@google.com> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Zach Brown <zab@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:51 +0000 (13:16 +1100)]
aio: don't include aio.h in sched.h
Faster kernel compiles by way of fewer unnecessary includes.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:51 +0000 (13:16 +1100)]
aio: use xchg() instead of completion_lock
So, for sticking kiocb completions on the kioctx ringbuffer, we need a
lock - it unfortunately can't be lockless.
When the kioctx is shared between threads on different cpus and the rate
of completions is high, this lock sees quite a bit of contention - in
terms of cacheline contention it's the hottest thing in the aio subsystem.
That means, with a regular spinlock, we're going to take a cache miss to
grab the lock, then another cache miss when we touch the data the lock
protects - if it's on the same cacheline as the lock, other cpus spinning
on the lock are going to be pulling it out from under us as we're using
it.
So, we use an old trick to get rid of this second forced cache miss - make
the data the lock protects be the lock itself, so we grab them both at
once.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:50 +0000 (13:16 +1100)]
aio: percpu ioctx refcount
This just converts the ioctx refcount to the new generic dynamic percpu
refcount code.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:49 +0000 (13:16 +1100)]
percpu-refcount: sparse fixes
Here's some more fixes, the percpu refcount code is now sparse clean for
me. It's kind of ugly, but I'm not sure it's really any uglier than it
was before. Seem reasonable?
Signed-off-by: Kent Overstreet <koverstreet@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Andrew Morton [Wed, 20 Feb 2013 02:16:49 +0000 (13:16 +1100)]
generic-dynamic-per-cpu-refcounting-fix
lib/percpu-refcount.c: In function 'percpu_ref_init':
lib/percpu-refcount.c:22: error: 'jiffies' undeclared (first use in this function)
lib/percpu-refcount.c:22: error: (Each undeclared identifier is reported only once
lib/percpu-refcount.c:22: error: for each function it appears in.)
lib/percpu-refcount.c: In function 'percpu_ref_alloc':
lib/percpu-refcount.c:36: error: 'jiffies' undeclared (first use in this function)
lib/percpu-refcount.c:41: error: 'HZ' undeclared (first use in this function)
Cc: Kent Overstreet <koverstreet@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:48 +0000 (13:16 +1100)]
generic dynamic per cpu refcounting
This implements a refcount with similar semantics to
atomic_get()/atomic_dec_and_test(), that starts out as just an atomic_t
but dynamically switches to per cpu refcounting when the rate of gets/puts
becomes too high.
It also implements two stage shutdown, as we need it to tear down the
percpu counts. Before dropping the initial refcount, you must call
percpu_ref_kill(); this puts the refcount in "shutting down mode" and
switches back to a single atomic refcount with the appropriate barriers
(synchronize_rcu()).
It's also legal to call percpu_ref_kill() multiple times - it only returns
true once, so callers don't have to reimplement shutdown synchronization.
For the sake of simplicity/efficiency, the heuristic is pretty simple - it
just switches to percpu refcounting if there are more than x gets in one
second (completely arbitrarily, 4096).
It'd be more correct to count the number of cache misses or something else
more profile driven, but doing so would require accessing the shared ref
twice per get - by just counting the number of gets(), we can stick that
counter in the high bits of the refcount and increment both with a single
atomic64_add(). But I expect this'll be good enough in practice.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:48 +0000 (13:16 +1100)]
aio: percpu reqs_available
See the previous patch ("aio: reqs_active -> reqs_available") for why we
want to do this - this basically implements a per cpu allocator for
reqs_available that doesn't actually allocate anything.
Note that we need to increase the size of the ringbuffer we allocate,
since a single thread won't necessarily be able to use all the
reqs_available slots - some (up to about half) might be on other per cpu
lists, unavailable for the current thread.
We size the ringbuffer based on the nr_events userspace passed to
io_setup(), so this is a slight behaviour change - but nr_events wasn't
being used as a hard limit before, it was being rounded up to the next
page before so this doesn't change the actual semantics.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:48 +0000 (13:16 +1100)]
aio: reqs_active -> reqs_available
The number of outstanding kiocbs is one of the few shared things left that
has to be touched for every kiocb - it'd be nice to make it percpu.
We can make it per cpu by treating it like an allocation problem: we have
a maximum number of kiocbs that can be outstanding (i.e. slots) - then we
just allocate and free slots, and we know how to write per cpu allocators.
So as prep work for that, we convert reqs_active to reqs_available.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:47 +0000 (13:16 +1100)]
aio: give shared kioctx fields their own cachelines
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:47 +0000 (13:16 +1100)]
aio: kill struct aio_ring_info
struct aio_ring_info was kind of odd, the only place it's used is where
it's embedded in struct kioctx - there's no real need for it.
The next patch rearranges struct kioctx and puts various things on their
own cachelines - getting rid of struct aio_ring_info now makes that
reordering a bit clearer.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:46 +0000 (13:16 +1100)]
aio: kill batch allocation
Previously, allocating a kiocb required touching quite a few global (well,
per kioctx) cachelines... so batching up allocation to amortize those was
worthwhile. But we've gotten rid of some of those, and in another couple
of patches kiocb allocation won't require writing to any shared
cachelines, so that means we can just rip this code out.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:46 +0000 (13:16 +1100)]
aio: change reqs_active to include unreaped completions
The aio code tries really hard to avoid having to deal with the completion
ringbuffer overflowing. To do that, it has to keep track of the number of
outstanding kiocbs, and the number of completions currently in the
ringbuffer - and it's got to check that every time we allocate a kiocb.
Ouch.
But - we can improve this quite a bit if we just change reqs_active to
mean "number of outstanding requests and unreaped completions" - that
means kiocb allocation doesn't have to look at the ringbuffer, which is a
fairly significant win.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:45 +0000 (13:16 +1100)]
aio-use-cancellation-list-lazily-fix
The cancellation changes were fubar - we can't cancel a kiocb if it
doesn't actually have a cancellation callback.
The use of xchg() in aio_complete() was right - there we're marking the
kiocb as completed - but we need to use cmpxchg() in kiocb_cancel() - a
lock isn't sufficient since we're synchronizing with aio_complete() which
isn't taking any locks.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:45 +0000 (13:16 +1100)]
aio: use cancellation list lazily
Cancelling kiocbs requires adding them to a per kioctx linked list, which
is one of the few things we need to take the kioctx lock for in the fast
path. But most kiocbs can't be cancelled - so if we just do this lazily,
we can avoid quite a bit of locking overhead.
While we're at it, instead of using a flag bit switch to using ki_cancel
itself to indicate that a kiocb has been cancelled/completed. This lets
us get rid of ki_flags entirely.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:45 +0000 (13:16 +1100)]
aio: use flush_dcache_page()
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:44 +0000 (13:16 +1100)]
aio: make aio_read_evt() more efficient, convert to hrtimers
Previously, aio_read_event() pulled a single completion off the ringbuffer
at a time, locking and unlocking each time. Change it to pull off as many
events as it can at a time, and copy them directly to userspace.
This also fixes a bug where if copying the event to userspace failed,
we'd lose the event.
Also convert it to wait_event_interruptible_hrtimeout(), which
simplifies it quite a bit.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Andrew Morton [Wed, 20 Feb 2013 02:16:44 +0000 (13:16 +1100)]
wait-add-wait_event_hrtimeout-fix
fix description of `timeout' arg
Cc: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:44 +0000 (13:16 +1100)]
wait: add wait_event_hrtimeout()
Analagous to wait_event_timeout() and friends, this adds
wait_event_hrtimeout() and wait_event_interruptible_hrtimeout().
Note that unlike the versions that use regular timers, these don't return
the amount of time remaining when they return - instead, they return 0 or
-ETIME if they timed out. because I was uncomfortable with the semantics
of doing it the other way (that I could get it right, anyways).
If the timer expires, there's no real guarantee that expire_time -
current_time would be <= 0 - due to timer slack certainly, and I'm not
sure I want to know the implications of the different clock bases in
hrtimers.
If the timer does expire and the code calculates that the time remaining
is nonnegative, that could be even worse if the calling code then reuses
that timeout. Probably safer to just return 0 then, but I could imagine
weird bugs or at least unintended behaviour arising from that too.
I came to the conclusion that if other users end up actually needing the
amount of time remaining, the sanest thing to do would be to create a
version that uses absolute timeouts instead of relative.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:44 +0000 (13:16 +1100)]
aio: refcounting cleanup
The usage of ctx->dead was fubar - it makes no sense to explicitly check
it all over the place, especially when we're already using RCU.
Now, ctx->dead only indicates whether we've dropped the initial
refcount. The new teardown sequence is:
set ctx->dead
hlist_del_rcu();
synchronize_rcu();
Now we know no system calls can take a new ref, and it's safe to drop
the initial ref:
put_ioctx();
We also need to ensure there are no more outstanding kiocbs. This was
done incorrectly - it was being done in kill_ctx(), and before dropping
the initial refcount. At this point, other syscalls may still be
submitting kiocbs!
Now, we cancel and wait for outstanding kiocbs in free_ioctx(), after
kioctx->users has dropped to 0 and we know no more iocbs could be
submitted.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:43 +0000 (13:16 +1100)]
aio: make aio_put_req() lockless
Freeing a kiocb needed to touch the kioctx for three things:
* Pull it off the reqs_active list
* Decrementing reqs_active
* Issuing a wakeup, if the kioctx was in the process of being freed.
This patch moves these to aio_complete(), for a couple reasons:
* aio_complete() already has to issue the wakeup, so if we drop the
kioctx refcount before aio_complete does its wakeup we don't have to
do it twice.
* aio_complete currently has to take the kioctx lock, so it makes sense
for it to pull the kiocb off the reqs_active list too.
* A later patch is going to change reqs_active to include unreaped
completions - this will mean allocating a kiocb doesn't have to look
at the ringbuffer. So taking the decrement of reqs_active out of
kiocb_free() is useful prep work for that patch.
This doesn't really affect cancellation, since existing (usb) code that
implements a cancel function still calls aio_complete() - we just have
to make sure that aio_complete does the necessary teardown for cancelled
kiocbs.
It does affect code paths where we free kiocbs that were never
submitted; they need to decrement reqs_active and pull the kiocb off the
reqs_active list. This occurs in two places: kiocb_batch_free(), which
is going away in a later patch, and the error path in io_submit_one.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:43 +0000 (13:16 +1100)]
aio: do fget() after aio_get_req()
aio_get_req() will fail if we have the maximum number of requests
outstanding, which depending on the application may not be uncommon. So
avoid doing an unnecessary fget().
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:43 +0000 (13:16 +1100)]
aio: dprintk() -> pr_debug()
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:42 +0000 (13:16 +1100)]
aio: move private stuff out of aio.h
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:42 +0000 (13:16 +1100)]
aio: add kiocb_cancel()
Minor refactoring, to get rid of some duplicated code
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Kent Overstreet [Wed, 20 Feb 2013 02:16:41 +0000 (13:16 +1100)]
aio: kill return value of aio_complete()
Nothing used the return value, and it probably wasn't possible to use it
safely for the locked versions (aio_complete(), aio_put_req()). Just kill
it.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Acked-by: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Zach Brown [Wed, 20 Feb 2013 02:16:41 +0000 (13:16 +1100)]
char: add aio_{read,write} to /dev/{null,zero}
These are handy for measuring the cost of the aio infrastructure with
operations that do very little and complete immediately.
Signed-off-by: Zach Brown <zab@redhat.com> Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Zach Brown [Wed, 20 Feb 2013 02:16:41 +0000 (13:16 +1100)]
aio: remove retry-based AIO
This removes the retry-based AIO infrastructure now that nothing in tree
is using it.
We want to remove retry-based AIO because it is fundemantally unsafe. It
retries IO submission from a kernel thread that has only assumed the mm of
the submitting task. All other task_struct references in the IO
submission path will see the kernel thread, not the submitting task. This
design flaw means that nothing of any meaningful complexity can use
retry-based AIO.
This removes all the code and data associated with the retry machinery.
The most significant benefit of this is the removal of the locking around
the unused run list in the submission path.
This has only been compiled.
Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Zach Brown [Wed, 20 Feb 2013 02:16:40 +0000 (13:16 +1100)]
gadget: remove only user of aio retry
This removes the only in-tree user of aio retry. This will let us remove
the retry code from the aio core.
Removing retry is relatively easy as the USB gadget wasn't using it to
retry IOs at all. It always fully submitted the IO in the context of the
initial io_submit() call. It only used the AIO retry facility to get the
submitter's mm context for copying the result of a read back to user
space. This is easy to implement with use_mm() and a work struct, much
like kvm does with async_pf_execute() for get_user_pages().
Signed-off-by: Zach Brown <zab@redhat.com> Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Zach Brown [Wed, 20 Feb 2013 02:16:40 +0000 (13:16 +1100)]
aio: remove dead code from aio.h
Signed-off-by: Zach Brown <zab@redhat.com> Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Zach Brown [Wed, 20 Feb 2013 02:16:40 +0000 (13:16 +1100)]
mm: remove old aio use_mm() comment
use_mm() is used in more places than just aio. There's no need to mention
callers when describing the function.
Signed-off-by: Zach Brown <zab@redhat.com> Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Jan Kara [Wed, 20 Feb 2013 02:16:39 +0000 (13:16 +1100)]
fs/direct-io.c: fix possible use-after-free with AIO
Running AIO is pinning inode in memory using file reference. Once AIO is
completed using aio_complete(), file reference is put and inode can be
freed from memory. So we have to be sure that calling aio_complete() is
the last thing we do with the inode.
Acked-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jan Kara <jack@suse.cz> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Jan Kara [Wed, 20 Feb 2013 02:16:39 +0000 (13:16 +1100)]
ocfs2: fix possible use-after-free with AIO
Running AIO is pinning inode in memory using file reference. Once AIO
is completed using aio_complete(), file reference is put and inode can
be freed from memory. So we have to be sure that calling aio_complete()
is the last thing we do with the inode.
Signed-off-by: Jan Kara <jack@suse.cz> Acked-by: Jeff Moyer <jmoyer@redhat.com> Acked-by: Joel Becker <jlbec@evilplan.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Julia Lawall [Wed, 20 Feb 2013 02:16:39 +0000 (13:16 +1100)]
drivers/pps/clients/pps-gpio.c: use devm_kzalloc
devm_kzalloc allocates memory that is released when a driver detaches.
This patch uses devm_kzalloc for data that is allocated in the probe
function of a platform device and is only freed in the remove function.
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr> Cc: Rodolfo Giometti <giometti@enneenne.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Wouter Verhelst [Wed, 20 Feb 2013 02:16:35 +0000 (13:16 +1100)]
nbd: update documentation and link to mailinglist
Documentation/blockdev/nbd.txt contained some documentation which was
horribly outdated and probably still dates from the original patch that
added NBD support to the kernel.
This patch removes the useless and outdated bits. The tools on nbd.sf.net
are fully documented in manpages, which is where documentation for the
non-kernel bits should live.
Additionally, add a reference to the MAINTAINERS file for the nbd-general
mailinglist that is used for discussion of the userland tools and the
kernel module already.
Signed-off-by: Wouter Verhelst <w@uter.be> Cc: Paul Clements <Paul.Clements@steeleye.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Paolo Bonzini [Wed, 20 Feb 2013 02:16:35 +0000 (13:16 +1100)]
nbd: show read-only state in sysfs
Pass the read-only flag to set_device_ro, so that it will be visible to
the block layer and in sysfs.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul Clements <Paul.Clements@steeleye.com> Cc: Alex Bligh <alex@alex.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Paolo Bonzini [Wed, 20 Feb 2013 02:16:35 +0000 (13:16 +1100)]
nbd: fsync and kill block device on shutdown
There are two problems with shutdown in the NBD driver.
1: Receiving the NBD_DISCONNECT ioctl does not sync the filesystem.
This patch adds the sync operation into __nbd_ioctl()'s
NBD_DISCONNECT handler. This is useful because BLKFLSBUF is restricted
to processes that have CAP_SYS_ADMIN, and the NBD client may not
possess it (fsync of the block device does not sync the filesystem,
either).
2: Once we clear the socket we have no guarantee that later reads will
come from the same backing storage.
The patch adds calls to kill_bdev() in __nbd_ioctl()'s socket
clearing code so the page cache is cleaned, lest reads that hit on the
page cache will return stale data from the previously-accessible disk.
Example:
# qemu-nbd -r -c/dev/nbd0 /dev/sr0
# file -s /dev/nbd0
/dev/stdin: # UDF filesystem data (version 1.5) etc.
# qemu-nbd -d /dev/nbd0
# qemu-nbd -r -c/dev/nbd0 /dev/sda
# file -s /dev/nbd0
/dev/stdin: # UDF filesystem data (version 1.5) etc.
While /dev/sda has:
# file -s /dev/sda
/dev/sda: x86 boot sector; etc.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Acked-by: Paul Clements <Paul.Clements@steeleye.com> Cc: Alex Bligh <alex@alex.org.uk> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>