]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'akpm-current/current'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 16 Jan 2014 05:43:09 +0000 (16:43 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 16 Jan 2014 05:43:09 +0000 (16:43 +1100)
404 files changed:
Documentation/blockdev/ramdisk.txt
Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/maxim,ds1742.txt [new file with mode: 0644]
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/dynamic-debug-howto.txt
Documentation/filesystems/00-INDEX
Documentation/filesystems/nilfs2.txt
Documentation/filesystems/proc.txt
Documentation/filesystems/sysfs.txt
Documentation/filesystems/vfat.txt
Documentation/kernel-parameters.txt
Documentation/leds/leds-class.txt
Documentation/printk-formats.txt
Documentation/sysctl/kernel.txt
Documentation/sysctl/vm.txt
Documentation/trace/postprocess/trace-vmscan-postprocess.pl
Documentation/vm/locking [deleted file]
Documentation/vm/overcommit-accounting
MAINTAINERS
arch/alpha/Kconfig
arch/arc/Kconfig
arch/arm/Kconfig
arch/arm/include/asm/dma.h
arch/arm/include/asm/fixmap.h
arch/arm/kernel/devtree.c
arch/arm/kernel/setup.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mm/init.c
arch/hexagon/include/asm/fixmap.h
arch/ia64/Kconfig
arch/ia64/include/asm/dmi.h
arch/ia64/include/asm/processor.h
arch/ia64/mm/contig.c
arch/ia64/mm/discontig.c
arch/ia64/mm/init.c
arch/m32r/Kconfig
arch/metag/include/asm/fixmap.h
arch/metag/mm/init.c
arch/metag/mm/numa.c
arch/microblaze/Kconfig
arch/microblaze/include/asm/fixmap.h
arch/microblaze/mm/init.c
arch/mips/Kconfig
arch/mips/include/asm/fixmap.h
arch/mn10300/Kconfig
arch/parisc/Kconfig
arch/parisc/mm/init.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/fixmap.h
arch/powerpc/mm/mem.c
arch/powerpc/mm/numa.c
arch/s390/Kconfig
arch/sh/Kconfig
arch/sh/include/asm/fixmap.h
arch/sh/kernel/dwarf.c
arch/sh/kernel/setup.c
arch/sparc/Kconfig
arch/sparc/mm/init_64.c
arch/tile/include/asm/fixmap.h
arch/um/include/asm/fixmap.h
arch/unicore32/mm/init.c
arch/x86/Kconfig
arch/x86/include/asm/dmi.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/page_types.h
arch/x86/include/asm/tlbflush.h
arch/x86/kernel/check.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/e820.c
arch/x86/kernel/setup.c
arch/x86/mm/gup.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/memtest.c
arch/x86/mm/numa.c
arch/x86/mm/srat.c
arch/x86/mm/tlb.c
block/cmdline-parser.c
block/genhd.c
drivers/block/Kconfig
drivers/block/cciss.c
drivers/block/loop.c
drivers/block/paride/pg.c
drivers/block/sx8.c
drivers/char/mem.c
drivers/firmware/Kconfig
drivers/firmware/dmi_scan.c
drivers/firmware/memmap.c
drivers/gpu/drm/cirrus/cirrus_mode.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/gma500/backlight.c
drivers/gpu/drm/nouveau/nouveau_acpi.c
drivers/input/Kconfig
drivers/input/Makefile
drivers/input/input.c
drivers/input/leds.c [new file with mode: 0644]
drivers/iommu/intel-iommu.c
drivers/leds/Kconfig
drivers/mailbox/omap-mbox.h
drivers/memstick/host/rtsx_pci_ms.c
drivers/mfd/max8998.c
drivers/mfd/tps65217.c
drivers/misc/ti-st/st_core.c
drivers/net/irda/donauboe.c
drivers/pps/pps.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/class.c
drivers/rtc/rtc-as3722.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-hym8563.c [new file with mode: 0644]
drivers/rtc/rtc-max8907.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-s5m.c
drivers/rtc/rtc-twl.c
drivers/rtc/rtc-vr41xx.c
drivers/scsi/megaraid/megaraid_mm.c
drivers/tty/Kconfig
drivers/tty/vt/keyboard.c
drivers/video/aty/aty128fb.c
drivers/video/backlight/backlight.c
drivers/video/backlight/hp680_bl.c
drivers/video/backlight/jornada720_bl.c
drivers/video/backlight/jornada720_lcd.c
drivers/video/backlight/kb3886_bl.c
drivers/video/backlight/l4f00242t03.c
drivers/video/backlight/lp855x_bl.c
drivers/video/backlight/lp8788_bl.c
drivers/video/backlight/omap1_bl.c
drivers/video/backlight/ot200_bl.c
drivers/video/backlight/tosa_bl.c
drivers/video/backlight/tosa_lcd.c
drivers/vlynq/vlynq.c
drivers/w1/masters/w1-gpio.c
drivers/w1/w1_int.c
fs/afs/proc.c
fs/autofs4/autofs_i.h
fs/autofs4/dev-ioctl.c
fs/autofs4/expire.c
fs/autofs4/inode.c
fs/autofs4/root.c
fs/autofs4/symlink.c
fs/autofs4/waitq.c
fs/binfmt_elf.c
fs/compat_ioctl.c
fs/coredump.c
fs/coredump.h [deleted file]
fs/exec.c
fs/ext3/dir.c
fs/ext4/block_validity.c
fs/ext4/dir.c
fs/fat/cache.c
fs/fat/fat.h
fs/fat/file.c
fs/fat/inode.c
fs/hfsplus/inode.c
fs/jffs2/fs.c
fs/jffs2/nodelist.c
fs/jffs2/readinode.c
fs/logfs/segment.c
fs/nilfs2/ioctl.c
fs/notify/dnotify/dnotify.c
fs/notify/fanotify/fanotify.c
fs/notify/fanotify/fanotify.h [new file with mode: 0644]
fs/notify/fanotify/fanotify_user.c
fs/notify/fsnotify.c
fs/notify/group.c
fs/notify/inotify/inotify.h
fs/notify/inotify/inotify_fsnotify.c
fs/notify/inotify/inotify_user.c
fs/notify/notification.c
fs/ocfs2/Makefile
fs/ocfs2/alloc.c
fs/ocfs2/cluster/Makefile
fs/ocfs2/cluster/nodemanager.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/cluster/ver.c [deleted file]
fs/ocfs2/cluster/ver.h [deleted file]
fs/ocfs2/dlm/Makefile
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmver.c [deleted file]
fs/ocfs2/dlm/dlmver.h [deleted file]
fs/ocfs2/dlmfs/Makefile
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/dlmfs/dlmfsver.c [deleted file]
fs/ocfs2/dlmfs/dlmfsver.h [deleted file]
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/ioctl.c
fs/ocfs2/localalloc.c
fs/ocfs2/localalloc.h
fs/ocfs2/move_extents.c
fs/ocfs2/namei.c
fs/ocfs2/ocfs2.h
fs/ocfs2/stack_o2cb.c
fs/ocfs2/stack_user.c
fs/ocfs2/stackglue.c
fs/ocfs2/stackglue.h
fs/ocfs2/suballoc.c
fs/ocfs2/suballoc.h
fs/ocfs2/super.c
fs/ocfs2/ver.c [deleted file]
fs/ocfs2/ver.h [deleted file]
fs/pipe.c
fs/posix_acl.c
fs/proc/array.c
fs/proc/base.c
fs/proc/cmdline.c
fs/proc/consoles.c
fs/proc/cpuinfo.c
fs/proc/devices.c
fs/proc/generic.c
fs/proc/interrupts.c
fs/proc/kcore.c
fs/proc/kmsg.c
fs/proc/loadavg.c
fs/proc/meminfo.c
fs/proc/nommu.c
fs/proc/page.c
fs/proc/proc_devtree.c
fs/proc/softirqs.c
fs/proc/stat.c
fs/proc/uptime.c
fs/proc/version.c
fs/proc/vmcore.c
fs/proc_namespace.c
fs/ramfs/file-mmu.c
fs/ramfs/file-nommu.c
fs/ramfs/inode.c
fs/ramfs/internal.h
fs/read_write.c
fs/reiserfs/reiserfs.h
fs/romfs/super.c
fs/super.c
fs/ubifs/debug.c
fs/ubifs/log.c
fs/ubifs/orphan.c
fs/ubifs/recovery.c
fs/ubifs/super.c
fs/ubifs/tnc.c
fs/ufs/balloc.c
fs/ufs/ialloc.c
fs/ufs/super.c
fs/ufs/ufs.h
include/asm-generic/fixmap.h [new file with mode: 0644]
include/asm-generic/int-l64.h [deleted file]
include/linux/bootmem.h
include/linux/cache.h
include/linux/ceph/decode.h
include/linux/cmdline-parser.h
include/linux/compaction.h
include/linux/crc64_ecma.h [new file with mode: 0644]
include/linux/dma-debug.h
include/linux/fsnotify_backend.h
include/linux/genalloc.h
include/linux/gfp.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/hugetlb_cgroup.h
include/linux/init_task.h
include/linux/input.h
include/linux/ipc.h
include/linux/ipc_namespace.h
include/linux/kernel.h
include/linux/kexec.h
include/linux/ksm.h
include/linux/kthread.h
include/linux/memblock.h
include/linux/memcontrol.h
include/linux/mempolicy.h
include/linux/migrate.h
include/linux/mm.h
include/linux/mman.h
include/linux/mmdebug.h
include/linux/mmzone.h
include/linux/msg.h
include/linux/of.h
include/linux/page-flags.h
include/linux/pagemap.h
include/linux/parser.h
include/linux/percpu.h
include/linux/posix_acl.h
include/linux/printk.h
include/linux/ramfs.h
include/linux/rmap.h
include/linux/sched.h
include/linux/sched/sysctl.h
include/linux/shm.h
include/linux/slab.h
include/linux/splice.h
include/linux/vm_event_item.h
include/linux/vmstat.h
include/linux/w1-gpio.h
include/trace/events/compaction.h
include/trace/events/migrate.h
include/trace/events/sched.h
include/uapi/asm-generic/types.h
init/initramfs.c
init/main.c
ipc/compat.c
ipc/compat_mq.c
ipc/ipc_sysctl.c
ipc/mqueue.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/audit_tree.c
kernel/audit_watch.c
kernel/exit.c
kernel/fork.c
kernel/hung_task.c
kernel/kexec.c
kernel/kmod.c
kernel/ksysfs.c
kernel/kthread.c
kernel/power/snapshot.c
kernel/printk/printk.c
kernel/profile.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/stats.c
kernel/signal.c
kernel/sys.c
kernel/sysctl.c
kernel/time/sched_clock.c
kernel/user.c
kernel/user_namespace.c
kernel/watchdog.c
lib/Kconfig
lib/Kconfig.debug
lib/Makefile
lib/assoc_array.c
lib/cmdline.c
lib/cpumask.c
lib/crc64_ecma.c [new file with mode: 0644]
lib/decompress_unlz4.c
lib/dma-debug.c
lib/dynamic_debug.c
lib/kstrtox.c
lib/parser.c
lib/rbtree_test.c
lib/show_mem.c
lib/swiotlb.c
lib/test_module.c [new file with mode: 0644]
lib/test_user_copy.c [new file with mode: 0644]
lib/vsprintf.c
mm/balloon_compaction.c
mm/cleancache.c
mm/compaction.c
mm/filemap.c
mm/huge_memory.c
mm/hugetlb.c
mm/hugetlb_cgroup.c
mm/hwpoison-inject.c
mm/internal.h
mm/ksm.c
mm/memblock.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/migrate.c
mm/mincore.c
mm/mlock.c
mm/mm_init.c
mm/mmap.c
mm/mmu_notifier.c
mm/mprotect.c
mm/nobootmem.c
mm/nommu.c
mm/oom_kill.c
mm/page_alloc.c
mm/page_cgroup.c
mm/page_io.c
mm/percpu.c
mm/rmap.c
mm/shmem.c
mm/slab.h
mm/slab_common.c
mm/slub.c
mm/sparse-vmemmap.c
mm/sparse.c
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/util.c
mm/vmalloc.c
mm/vmscan.c
mm/zswap.c
net/ipv4/tcp_illinois.c
net/netfilter/ipset/ip_set_hash_netiface.c
scripts/checkpatch.pl
scripts/get_maintainer.pl
scripts/sortextable.c
tools/testing/selftests/Makefile
tools/testing/selftests/user/Makefile [new file with mode: 0644]

index fa72e97dd66941c16213db1eb3a6a885e0d0c5c5..fe2ef978d85a467008f3373326b39851efb8fe99 100644 (file)
@@ -36,21 +36,30 @@ allowing one to squeeze more programs onto an average installation or
 rescue floppy disk.
 
 
-2) Kernel Command Line Parameters
+2) Parameters
 ---------------------------------
 
+2a) Kernel Command Line Parameters
+
        ramdisk_size=N
        ==============
 
 This parameter tells the RAM disk driver to set up RAM disks of N k size.  The
-default is 4096 (4 MB) (8192 (8 MB) on S390).
+default is 4096 (4 MB).
+
+2b) Module parameters
 
-       ramdisk_blocksize=N
-       ===================
+       rd_nr
+       =====
+       /dev/ramX devices created.
 
-This parameter tells the RAM disk driver how many bytes to use per block.  The
-default is 1024 (BLOCK_SIZE).
+       max_part
+       ========
+       Maximum partition number.
 
+       rd_size
+       =======
+       See ramdisk_size.
 
 3) Using "rdev -r"
 ------------------
diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt
new file mode 100644 (file)
index 0000000..31406fd
--- /dev/null
@@ -0,0 +1,27 @@
+Haoyu Microelectronics HYM8563 Real Time Clock
+
+The HYM8563 provides basic rtc and alarm functionality
+as well as a clock output of up to 32kHz.
+
+Required properties:
+- compatible: should be: "haoyu,hym8563"
+- reg: i2c address
+- interrupts: rtc alarm/event interrupt
+- #clock-cells: the value should be 0
+
+Example:
+
+hym8563: hym8563@51 {
+       compatible = "haoyu,hym8563";
+       reg = <0x51>;
+
+       interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
+
+       #clock-cells = <0>;
+};
+
+device {
+...
+       clocks = <&hym8563>;
+...
+};
diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt b/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt
new file mode 100644 (file)
index 0000000..d0f937c
--- /dev/null
@@ -0,0 +1,12 @@
+* Maxim (Dallas) DS1742/DS1743 Real Time Clock
+
+Required properties:
+- compatible: Should contain "maxim,ds1742".
+- reg: Physical base address of the RTC and length of memory
+  mapped region.
+
+Example:
+       rtc: rtc@10000000 {
+               compatible = "maxim,ds1742";
+               reg = <0x10000000 0x800>;
+       };
index 27b1a9eecb4887b5e9bb24adcd579b4c5676f787..b458760691a037942689969d1d41d512d0c671f5 100644 (file)
@@ -37,6 +37,7 @@ fsl   Freescale Semiconductor
 GEFanuc        GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 gef    GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 gmt    Global Mixed-mode Technology, Inc.
+haoyu  Haoyu Microelectronic Co. Ltd.
 hisilicon      Hisilicon Limited.
 hp     Hewlett Packard
 ibm    International Business Machines (IBM)
index 1bbdcfcf1f13d242e53367d402f37099aee9901b..46325eb2ea766c85dd753bd49dafeca97e0851bd 100644 (file)
@@ -108,6 +108,12 @@ If your query set is big, you can batch them too:
 
   ~# cat query-batch-file > <debugfs>/dynamic_debug/control
 
+A another way is to use wildcard. The match rule support '*' (matches
+zero or more characters) and '?' (matches exactly one character).For
+example, you can match all usb drivers:
+
+  ~# echo "file drivers/usb/* +p" > <debugfs>/dynamic_debug/control
+
 At the syntactical level, a command comprises a sequence of match
 specifications, followed by a flags change specification.
 
@@ -315,6 +321,9 @@ nullarbor:~ # echo -n 'func svc_process -p' >
 nullarbor:~ # echo -n 'format "nfsd: READ" +p' >
                                <debugfs>/dynamic_debug/control
 
+// enable messages in files of which the pathes include string "usb"
+nullarbor:~ # echo -n '*usb* +p' > <debugfs>/dynamic_debug/control
+
 // enable all messages
 nullarbor:~ # echo -n '+p' > <debugfs>/dynamic_debug/control
 
index 8042050eb265b34cbb367e51df2805258667ae8d..632211cbdd569292b4f8c2e47676a0d0b66049a9 100644 (file)
@@ -10,24 +10,32 @@ afs.txt
        - info and examples for the distributed AFS (Andrew File System) fs.
 affs.txt
        - info and mount options for the Amiga Fast File System.
+autofs4-mount-control.txt
+       - info on device control operations for autofs4 module.
 automount-support.txt
        - information about filesystem automount support.
 befs.txt
        - information about the BeOS filesystem for Linux.
 bfs.txt
        - info for the SCO UnixWare Boot Filesystem (BFS).
+btrfs.txt
+       - info for the BTRFS filesystem.
+caching/
+       - directory containing filesystem cache documentation.
 ceph.txt
-       - info for the Ceph Distributed File System
-cifs.txt
-       - description of the CIFS filesystem.
+       - info for the Ceph Distributed File System.
+cifs/
+       - directory containing CIFS filesystem documentation and example code.
 coda.txt
        - description of the CODA filesystem.
 configfs/
        - directory containing configfs documentation and example code.
 cramfs.txt
        - info on the cram filesystem for small storage (ROMs etc).
-dentry-locking.txt
-       - info on the RCU-based dcache locking model.
+debugfs.txt
+       - info on the debugfs filesystem.
+devpts.txt
+       - info on the devpts filesystem.
 directory-locking
        - info about the locking scheme used for directory operations.
 dlmfs.txt
@@ -35,7 +43,7 @@ dlmfs.txt
 dnotify.txt
        - info about directory notification in Linux.
 dnotify_test.c
-       - example program for dnotify
+       - example program for dnotify.
 ecryptfs.txt
        - docs on eCryptfs: stacked cryptographic filesystem for Linux.
 efivarfs.txt
@@ -48,12 +56,18 @@ ext3.txt
        - info, mount options and specifications for the Ext3 filesystem.
 ext4.txt
        - info, mount options and specifications for the Ext4 filesystem.
-files.txt
-       - info on file management in the Linux kernel.
 f2fs.txt
        - info and mount options for the F2FS filesystem.
+fiemap.txt
+       - info on fiemap ioctl.
+files.txt
+       - info on file management in the Linux kernel.
 fuse.txt
        - info on the Filesystem in User SpacE including mount options.
+gfs2-glocks.txt
+       - info on the Global File System 2 - Glock internal locking rules.
+gfs2-uevents.txt
+       - info on the Global File System 2 - uevents.
 gfs2.txt
        - info on the Global File System 2.
 hfs.txt
@@ -84,40 +98,58 @@ ntfs.txt
        - info and mount options for the NTFS filesystem (Windows NT).
 ocfs2.txt
        - info and mount options for the OCFS2 clustered filesystem.
+omfs.txt
+       - info on the Optimized MPEG FileSystem.
+path-lookup.txt
+       - info on path walking and name lookup locking.
+pohmelfs/
+       - directory containing pohmelfs filesystem documentation.
 porting
        - various information on filesystem porting.
 proc.txt
        - info on Linux's /proc filesystem.
+qnx6.txt
+       - info on the QNX6 filesystem.
+quota.txt
+       - info on Quota subsystem.
 ramfs-rootfs-initramfs.txt
        - info on the 'in memory' filesystems ramfs, rootfs and initramfs.
-reiser4.txt
-       - info on the Reiser4 filesystem based on dancing tree algorithms.
 relay.txt
        - info on relay, for efficient streaming from kernel to user space.
 romfs.txt
        - description of the ROMFS filesystem.
 seq_file.txt
-       - how to use the seq_file API
+       - how to use the seq_file API.
 sharedsubtree.txt
        - a description of shared subtrees for namespaces.
 spufs.txt
        - info and mount options for the SPU filesystem used on Cell.
+squashfs.txt
+       - info on the squashfs filesystem.
 sysfs-pci.txt
        - info on accessing PCI device resources through sysfs.
+sysfs-tagging.txt
+       - info on sysfs tagging to avoid duplicates.
 sysfs.txt
        - info on sysfs, a ram-based filesystem for exporting kernel objects.
 sysv-fs.txt
        - info on the SystemV/V7/Xenix/Coherent filesystem.
 tmpfs.txt
        - info on tmpfs, a filesystem that holds all files in virtual memory.
+ubifs.txt
+       - info on the Unsorted Block Images FileSystem.
 udf.txt
        - info and mount options for the UDF filesystem.
 ufs.txt
        - info on the ufs filesystem.
 vfat.txt
-       - info on using the VFAT filesystem used in Windows NT and Windows 95
+       - info on using the VFAT filesystem used in Windows NT and Windows 95.
 vfs.txt
-       - overview of the Virtual File System
+       - overview of the Virtual File System.
+xfs-delayed-logging-design.txt
+       - info on the XFS Delayed Logging Design.
+xfs-self-describing-metadata.txt
+       - info on XFS Self Describing Metadata.
 xfs.txt
        - info and mount options for the XFS filesystem.
 xip.txt
index 873a2ab2e9f8801aee72a11833ac4bfa24fa3d84..06887d46ccf2795e8a6f46b1528e985a5ac1975b 100644 (file)
@@ -81,6 +81,62 @@ nodiscard(*)         The discard/TRIM commands are sent to the underlying
                        block device when blocks are freed.  This is useful
                        for SSD devices and sparse/thinly-provisioned LUNs.
 
+Ioctls
+======
+
+There is some NILFS2 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all NILFS2 specific ioctls are
+shown in the table below.
+
+Table of NILFS2 specific ioctls
+..............................................................................
+ Ioctl                         Description
+ NILFS_IOCTL_CHANGE_CPMODE      Change mode of given checkpoint between
+                               checkpoint and snapshot state. This ioctl is
+                               used in chcp and mkcp utilities.
+
+ NILFS_IOCTL_DELETE_CHECKPOINT  Remove checkpoint from NILFS2 file system.
+                               This ioctl is used in rmcp utility.
+
+ NILFS_IOCTL_GET_CPINFO         Return info about requested checkpoints. This
+                               ioctl is used in lscp utility and by
+                               nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_CPSTAT         Return checkpoints statistics. This ioctl is
+                               used by lscp, rmcp utilities and by
+                               nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_SUINFO         Return segment usage info about requested
+                               segments. This ioctl is used in lssu,
+                               nilfs_resize utilities and by nilfs_cleanerd
+                               daemon.
+
+ NILFS_IOCTL_GET_SUSTAT         Return segment usage statistics. This ioctl
+                               is used in lssu, nilfs_resize utilities and
+                               by nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_VINFO          Return information on virtual block addresses.
+                               This ioctl is used by nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_BDESCS         Return information about descriptors of disk
+                               block numbers. This ioctl is used by
+                               nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_CLEAN_SEGMENTS     Do garbage collection operation in the
+                               environment of requested parameters from
+                               userspace. This ioctl is used by
+                               nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_SYNC               Make a checkpoint. This ioctl is used in
+                               mkcp utility.
+
+ NILFS_IOCTL_RESIZE             Resize NILFS2 volume. This ioctl is used
+                               by nilfs_resize utility.
+
+ NILFS_IOCTL_SET_ALLOC_RANGE    Define lower limit of segments in bytes and
+                               upper limit of segments in bytes. This ioctl
+                               is used by nilfs_resize utility.
+
 NILFS2 usage
 ============
 
index 19e10ab3d56983882e10b6df0a67a15d9b3707a5..31f76178c987f1b0d65b778f72848cd5f1b705d7 100644 (file)
@@ -767,6 +767,7 @@ The "Locked" indicates whether the mapping is locked in memory or not.
 
 MemTotal:     16344972 kB
 MemFree:      13634064 kB
+MemAvailable: 14836172 kB
 Buffers:          3656 kB
 Cached:        1195708 kB
 SwapCached:          0 kB
@@ -799,6 +800,14 @@ AnonHugePages:   49152 kB
     MemTotal: Total usable ram (i.e. physical ram minus a few reserved
               bits and the kernel binary code)
      MemFree: The sum of LowFree+HighFree
+MemAvailable: An estimate of how much memory is available for starting new
+              applications, without swapping. Calculated from MemFree,
+              SReclaimable, the size of the file LRU lists, and the low
+              watermarks in each zone.
+              The estimate takes into account that the system needs some
+              page cache to function well, and that not all reclaimable
+              slab will be reclaimable, due to items being in use. The
+              impact of those factors will vary from system to system.
      Buffers: Relatively temporary storage for raw disk blocks
               shouldn't get tremendously large (20MB or so)
       Cached: in-memory cache for files read from the disk (the
index a6619b7064b9e679b887d31ec85301d300c088f5..b35a64b82f9e8371d42fcf893354d02c07c6c229 100644 (file)
@@ -108,12 +108,12 @@ static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
 is equivalent to doing:
 
 static struct device_attribute dev_attr_foo = {
-       .attr   = {
+       .attr = {
                .name = "foo",
                .mode = S_IWUSR | S_IRUGO,
-               .show = show_foo,
-               .store = store_foo,
        },
+       .show = show_foo,
+       .store = store_foo,
 };
 
 
index 4a93e98b290a085793c495c9635b97335c58da56..5cf57b368dc6347aac4a9bbcd116aa2179978490 100644 (file)
@@ -175,6 +175,16 @@ nfs=stale_rw|nostale_ro
 
 <bool>: 0,1,yes,no,true,false
 
+LIMITATION
+---------------------------------------------------------------------
+* The fallocated region of file is discarded at umount/evict time
+  when using fallocate with FALLOC_FL_KEEP_SIZE.
+  So, User should assume that fallocated region can be discarded at
+  last close if there is memory pressure resulting in eviction of
+  the inode from the memory. As a result, for any dependency on
+  the fallocated region, user should make sure to recheck fallocate
+  after reopening the file.
+
 TODO
 ----------------------------------------------------------------------
 * Need to get rid of the raw scanning stuff.  Instead, always use
index af0b689a01635c7767aa92fd98a725c8b340acdb..62cc3d333c3c04b71889c0fa965214ddf8abbe19 100644 (file)
@@ -1050,7 +1050,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        debugfs files are removed at module unload time.
 
        gpt             [EFI] Forces disk with valid GPT signature but
-                       invalid Protective MBR to be treated as GPT.
+                       invalid Protective MBR to be treated as GPT. If the
+                       primary GPT is corrupted, it enables the backup/alternate
+                       GPT to be used instead.
 
        grcan.enable0=  [HW] Configuration of physical interface 0. Determines
                        the "Enable 0" bit of the configuration register.
@@ -1452,6 +1454,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Valid arguments: on, off
                        Default: on
 
+       kmemcheck=      [KNL] Boot-time kmemcheck enable/disable/one-shot mode
+                       Valid arguments: 0, 1, 2
+                       kmemcheck=0 (disabled)
+                       kmemcheck=1 (enabled)
+                       kmemcheck=2 (one-shot mode)
+                       Default: 2 (one-shot mode)
+
        kstack=N        [X86] Print N words from the kernel stack
                        in oops dumps.
 
index 79699c2007664e5572580a0eea812cae7efd65fb..62261c04060a73bef82a3a6784cef794d39e5d50 100644 (file)
@@ -2,9 +2,6 @@
 LED handling under Linux
 ========================
 
-If you're reading this and thinking about keyboard leds, these are
-handled by the input subsystem and the led class is *not* needed.
-
 In its simplest form, the LED class just allows control of LEDs from
 userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
 LED is defined in max_brightness file. The brightness file will set the brightness
index 445ad743ec814ee4571c175884d3ccce4ffda11a..94459b42e0abee7a9f01cfadfeb178d38f19e396 100644 (file)
@@ -55,14 +55,21 @@ Struct Resources:
        For printing struct resources. The 'R' and 'r' specifiers result in a
        printed resource with ('R') or without ('r') a decoded flags member.
 
-Physical addresses:
+Physical addresses types phys_addr_t:
 
-       %pa     0x01234567 or 0x0123456789abcdef
+       %pa[p]  0x01234567 or 0x0123456789abcdef
 
        For printing a phys_addr_t type (and its derivatives, such as
        resource_size_t) which can vary based on build options, regardless of
        the width of the CPU data path. Passed by reference.
 
+DMA addresses types dma_addr_t:
+
+       %pad    0x01234567 or 0x0123456789abcdef
+
+       For printing a dma_addr_t type which can vary based on build options,
+       regardless of the width of the CPU data path. Passed by reference.
+
 Raw buffer as a hex string:
        %*ph    00 01 02  ...  3f
        %*phC   00:01:02: ... :3f
@@ -177,6 +184,12 @@ dentry names:
        equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
        n last components.  %pD does the same thing for struct file.
 
+task_struct comm name:
+
+        %pT
+
+        For printing task_struct->comm.
+
 struct va_format:
 
        %pV
index 6d486404200e9bbd71a20e97f1e85e74615014a5..ee9a2f983b9901d74266f9bfa9c1a2da68c49450 100644 (file)
@@ -33,6 +33,7 @@ show up in /proc/sys/kernel:
 - domainname
 - hostname
 - hotplug
+- kexec_load_disabled
 - kptr_restrict
 - kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
@@ -287,6 +288,18 @@ Default value is "/sbin/hotplug".
 
 ==============================================================
 
+kexec_load_disabled:
+
+A toggle indicating if the kexec_load syscall has been disabled. This
+value defaults to 0 (false: kexec_load enabled), but can be set to 1
+(true: kexec_load disabled). Once true, kexec can no longer be used, and
+the toggle cannot be set back to false. This allows a kexec image to be
+loaded before disabling the syscall, allowing a system to set up (and
+later use) an image without it being altered. Generally used together
+with the "modules_disabled" sysctl.
+
+==============================================================
+
 kptr_restrict:
 
 This toggle indicates whether restrictions are placed on
@@ -331,7 +344,7 @@ A toggle value indicating if modules are allowed to be loaded
 in an otherwise modular kernel.  This toggle defaults to off
 (0), but can be set true (1).  Once true, modules can be
 neither loaded nor unloaded, and the toggle cannot be set back
-to false.
+to false.  Generally used with the "kexec_load_disabled" toggle.
 
 ==============================================================
 
index 1fbd4eb7b64aff335bed40a3d9ebbde95a26bcc1..9f5481bdc5a43f942fb833f3131901249d2d5c50 100644 (file)
@@ -47,6 +47,7 @@ Currently, these files are in /proc/sys/vm:
 - numa_zonelist_order
 - oom_dump_tasks
 - oom_kill_allocating_task
+- overcommit_kbytes
 - overcommit_memory
 - overcommit_ratio
 - page-cluster
@@ -574,6 +575,17 @@ The default value is 0.
 
 ==============================================================
 
+overcommit_kbytes:
+
+When overcommit_memory is set to 2, the committed address space is not
+permitted to exceed swap plus this amount of physical RAM. See below.
+
+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
+of them may be specified at a time. Setting one disables the other (which
+then appears as 0 when read).
+
+==============================================================
+
 overcommit_memory:
 
 This value contains a flag that enables memory overcommitment.
index 4a37c4759cd231f72a8b5f253deec9fdec334886..00e425faa2fd7691e327e3dbfcefe13940a7873a 100644 (file)
@@ -123,7 +123,7 @@ my $regex_writepage;
 
 # Static regex used. Specified like this for readability and for use with /o
 #                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details)
-my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
+my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])(\s*[dX.][Nnp.][Hhs.][0-9a-fA-F.]*|)\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
 my $regex_statname = '[-0-9]*\s\((.*)\).*';
 my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
 
@@ -270,8 +270,8 @@ EVENT_PROCESS:
        while ($traceevent = <STDIN>) {
                if ($traceevent =~ /$regex_traceevent/o) {
                        $process_pid = $1;
-                       $timestamp = $3;
-                       $tracepoint = $4;
+                       $timestamp = $4;
+                       $tracepoint = $5;
 
                        $process_pid =~ /(.*)-([0-9]*)$/;
                        my $process = $1;
@@ -299,7 +299,7 @@ EVENT_PROCESS:
                        $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}++;
                        $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN} = $timestamp;
 
-                       $details = $5;
+                       $details = $6;
                        if ($details !~ /$regex_direct_begin/o) {
                                print "WARNING: Failed to parse mm_vmscan_direct_reclaim_begin as expected\n";
                                print "         $details\n";
@@ -322,7 +322,7 @@ EVENT_PROCESS:
                                $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index] = "$order-$latency";
                        }
                } elsif ($tracepoint eq "mm_vmscan_kswapd_wake") {
-                       $details = $5;
+                       $details = $6;
                        if ($details !~ /$regex_kswapd_wake/o) {
                                print "WARNING: Failed to parse mm_vmscan_kswapd_wake as expected\n";
                                print "         $details\n";
@@ -356,7 +356,7 @@ EVENT_PROCESS:
                } elsif ($tracepoint eq "mm_vmscan_wakeup_kswapd") {
                        $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}++;
 
-                       $details = $5;
+                       $details = $6;
                        if ($details !~ /$regex_wakeup_kswapd/o) {
                                print "WARNING: Failed to parse mm_vmscan_wakeup_kswapd as expected\n";
                                print "         $details\n";
@@ -366,7 +366,7 @@ EVENT_PROCESS:
                        my $order = $3;
                        $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]++;
                } elsif ($tracepoint eq "mm_vmscan_lru_isolate") {
-                       $details = $5;
+                       $details = $6;
                        if ($details !~ /$regex_lru_isolate/o) {
                                print "WARNING: Failed to parse mm_vmscan_lru_isolate as expected\n";
                                print "         $details\n";
@@ -387,7 +387,7 @@ EVENT_PROCESS:
                        }
                        $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
                } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
-                       $details = $5;
+                       $details = $6;
                        if ($details !~ /$regex_lru_shrink_inactive/o) {
                                print "WARNING: Failed to parse mm_vmscan_lru_shrink_inactive as expected\n";
                                print "         $details\n";
@@ -397,7 +397,7 @@ EVENT_PROCESS:
                        my $nr_reclaimed = $4;
                        $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
                } elsif ($tracepoint eq "mm_vmscan_writepage") {
-                       $details = $5;
+                       $details = $6;
                        if ($details !~ /$regex_writepage/o) {
                                print "WARNING: Failed to parse mm_vmscan_writepage as expected\n";
                                print "         $details\n";
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
deleted file mode 100644 (file)
index f61228b..0000000
+++ /dev/null
@@ -1,130 +0,0 @@
-Started Oct 1999 by Kanoj Sarcar <kanojsarcar@yahoo.com>
-
-The intent of this file is to have an uptodate, running commentary 
-from different people about how locking and synchronization is done 
-in the Linux vm code.
-
-page_table_lock & mmap_sem
---------------------------------------
-
-Page stealers pick processes out of the process pool and scan for 
-the best process to steal pages from. To guarantee the existence 
-of the victim mm, a mm_count inc and a mmdrop are done in swap_out().
-Page stealers hold kernel_lock to protect against a bunch of races.
-The vma list of the victim mm is also scanned by the stealer, 
-and the page_table_lock is used to preserve list sanity against the
-process adding/deleting to the list. This also guarantees existence
-of the vma. Vma existence is not guaranteed once try_to_swap_out() 
-drops the page_table_lock. To guarantee the existence of the underlying 
-file structure, a get_file is done before the swapout() method is 
-invoked. The page passed into swapout() is guaranteed not to be reused
-for a different purpose because the page reference count due to being
-present in the user's pte is not released till after swapout() returns.
-
-Any code that modifies the vmlist, or the vm_start/vm_end/
-vm_flags:VM_LOCKED/vm_next of any vma *in the list* must prevent 
-kswapd from looking at the chain.
-
-The rules are:
-1. To scan the vmlist (look but don't touch) you must hold the
-   mmap_sem with read bias, i.e. down_read(&mm->mmap_sem)
-2. To modify the vmlist you need to hold the mmap_sem with
-   read&write bias, i.e. down_write(&mm->mmap_sem)  *AND*
-   you need to take the page_table_lock.
-3. The swapper takes _just_ the page_table_lock, this is done
-   because the mmap_sem can be an extremely long lived lock
-   and the swapper just cannot sleep on that.
-4. The exception to this rule is expand_stack, which just
-   takes the read lock and the page_table_lock, this is ok
-   because it doesn't really modify fields anybody relies on.
-5. You must be able to guarantee that while holding page_table_lock
-   or page_table_lock of mm A, you will not try to get either lock
-   for mm B.
-
-The caveats are:
-1. find_vma() makes use of, and updates, the mmap_cache pointer hint.
-The update of mmap_cache is racy (page stealer can race with other code
-that invokes find_vma with mmap_sem held), but that is okay, since it 
-is a hint. This can be fixed, if desired, by having find_vma grab the
-page_table_lock.
-
-
-Code that add/delete elements from the vmlist chain are
-1. callers of insert_vm_struct
-2. callers of merge_segments
-3. callers of avl_remove
-
-Code that changes vm_start/vm_end/vm_flags:VM_LOCKED of vma's on
-the list:
-1. expand_stack
-2. mprotect
-3. mlock
-4. mremap
-
-It is advisable that changes to vm_start/vm_end be protected, although 
-in some cases it is not really needed. Eg, vm_start is modified by 
-expand_stack(), it is hard to come up with a destructive scenario without 
-having the vmlist protection in this case.
-
-The page_table_lock nests with the inode i_mmap_mutex and the kmem cache
-c_spinlock spinlocks.  This is okay, since the kmem code asks for pages after
-dropping c_spinlock.  The page_table_lock also nests with pagecache_lock and
-pagemap_lru_lock spinlocks, and no code asks for memory with these locks
-held.
-
-The page_table_lock is grabbed while holding the kernel_lock spinning monitor.
-
-The page_table_lock is a spin lock.
-
-Note: PTL can also be used to guarantee that no new clones using the
-mm start up ... this is a loose form of stability on mm_users. For
-example, it is used in copy_mm to protect against a racing tlb_gather_mmu
-single address space optimization, so that the zap_page_range (from
-truncate) does not lose sending ipi's to cloned threads that might
-be spawned underneath it and go to user mode to drag in pte's into tlbs.
-
-swap_lock
---------------
-The swap devices are chained in priority order from the "swap_list" header. 
-The "swap_list" is used for the round-robin swaphandle allocation strategy.
-The #free swaphandles is maintained in "nr_swap_pages". These two together
-are protected by the swap_lock.
-
-The swap_lock also protects all the device reference counts on the
-corresponding swaphandles, maintained in the "swap_map" array, and the
-"highest_bit" and "lowest_bit" fields.
-
-The swap_lock is a spinlock, and is never acquired from intr level.
-
-To prevent races between swap space deletion or async readahead swapins
-deciding whether a swap handle is being used, ie worthy of being read in
-from disk, and an unmap -> swap_free making the handle unused, the swap
-delete and readahead code grabs a temp reference on the swaphandle to
-prevent warning messages from swap_duplicate <- read_swap_cache_async.
-
-Swap cache locking
-------------------
-Pages are added into the swap cache with kernel_lock held, to make sure
-that multiple pages are not being added (and hence lost) by associating
-all of them with the same swaphandle.
-
-Pages are guaranteed not to be removed from the scache if the page is 
-"shared": ie, other processes hold reference on the page or the associated 
-swap handle. The only code that does not follow this rule is shrink_mmap,
-which deletes pages from the swap cache if no process has a reference on 
-the page (multiple processes might have references on the corresponding
-swap handle though). lookup_swap_cache() races with shrink_mmap, when
-establishing a reference on a scache page, so, it must check whether the
-page it located is still in the swapcache, or shrink_mmap deleted it.
-(This race is due to the fact that shrink_mmap looks at the page ref
-count with pagecache_lock, but then drops pagecache_lock before deleting
-the page from the scache).
-
-do_wp_page and do_swap_page have MP races in them while trying to figure
-out whether a page is "shared", by looking at the page_count + swap_count.
-To preserve the sum of the counts, the page lock _must_ be acquired before
-calling is_page_shared (else processes might switch their swap_count refs
-to the page count refs, after the page count ref has been snapshotted).
-
-Swap device deletion code currently breaks all the scache assumptions,
-since it grabs neither mmap_sem nor page_table_lock.
index 8eaa2fc4b8fae253930a798f38394438198dbf5a..cbfaaa674118daaf46e6467cf865d4cd06aae23e 100644 (file)
@@ -14,8 +14,8 @@ The Linux kernel supports the following overcommit handling modes
 
 2      -       Don't overcommit. The total address space commit
                for the system is not permitted to exceed swap + a
-               configurable percentage (default is 50) of physical RAM.
-               Depending on the percentage you use, in most situations
+               configurable amount (default is 50%) of physical RAM.
+               Depending on the amount you use, in most situations
                this means a process will not be killed while accessing
                pages but will receive errors on memory allocation as
                appropriate.
@@ -26,7 +26,8 @@ The Linux kernel supports the following overcommit handling modes
 
 The overcommit policy is set via the sysctl `vm.overcommit_memory'.
 
-The overcommit percentage is set via `vm.overcommit_ratio'.
+The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
+or `vm.overcommit_kbytes' (absolute value).
 
 The current overcommit limit and amount committed are viewable in
 /proc/meminfo as CommitLimit and Committed_AS respectively.
index 48e113cf951c923dcca9e5da6a62f2ce1262e877..7798b077d7f9b689981ae3be71362c345ba2861c 100644 (file)
@@ -93,6 +93,11 @@ Descriptions of section entries:
        N: Files and directories with regex patterns.
           N:   [^a-z]tegra     all files whose path contains the word tegra
           One pattern per line.  Multiple N: lines acceptable.
+          scripts/get_maintainer.pl has different behavior for files that
+          match F: pattern and matches of N: patterns.  By default,
+          get_maintainer will not look at git log history when an F: pattern
+          match occurs.  When an N: match occurs, git log history is used
+          to also notify the people that have git commit signatures.
        X: Files and directories that are NOT maintained, same rules as F:
           Files exclusions are tested before file matches.
           Can be useful for excluding a specific subdirectory, for instance:
@@ -3982,6 +3987,12 @@ S:       Orphan
 F:     Documentation/filesystems/hfs.txt
 F:     fs/hfs/
 
+HFSPLUS FILESYSTEM
+L:     linux-fsdevel@vger.kernel.org
+S:     Orphan
+F:     Documentation/filesystems/hfsplus.txt
+F:     fs/hfsplus/
+
 HGA FRAMEBUFFER DRIVER
 M:     Ferenc Bakonyi <fero@drama.obuda.kando.hu>
 L:     linux-nvidia@lists.surfsouth.com
index 7ce5ce586bd9e0fa5e0f0c4131f83d33587bc04e..97a2d9a096b948cd0e325e8188d591a11fedef7a 100644 (file)
@@ -540,13 +540,13 @@ config SMP
        depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL
        ---help---
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          See also the SMP-HOWTO available at
index 9063ae6553ccb7a0a220b8db667ac770627addbf..5438cabbc45d725e07dccf7b8e8f7c802d5d67db 100644 (file)
@@ -128,8 +128,8 @@ config SMP
        default n
        help
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
 if SMP
 
index f9b0fd387c6ff3d9cfc62b79705a37243d5274e5..dc6ef9a2c649df04d0d8de515b5565978164dde5 100644 (file)
@@ -1470,14 +1470,14 @@ config SMP
        depends on MMU || ARM_MPU
        help
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
-         you say Y here, the kernel will run on many, but not all, single
-         processor machines. On a single processor machine, the kernel will
-         run faster if you say N here.
+         you say Y here, the kernel will run on many, but not all,
+         uniprocessor machines. On a uniprocessor machine, the kernel
+         will run faster if you say N here.
 
          See also <file:Documentation/x86/i386/IO-APIC.txt>,
          <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
index 58b8c6a0ab1fe3985772746932a1713e29bd1ea0..99084431d6ae2616498bef8b3edee9351d75cbe7 100644 (file)
@@ -8,8 +8,8 @@
 #define MAX_DMA_ADDRESS        0xffffffffUL
 #else
 #define MAX_DMA_ADDRESS        ({ \
-       extern unsigned long arm_dma_zone_size; \
-       arm_dma_zone_size ? \
+       extern phys_addr_t arm_dma_zone_size; \
+       arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
                (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
 #endif
 
index bbae919bceb4b5762d94fc4a2f570fbed18b6fbe..68ea615c2a28184e312c4c3bb6a9da5b099b9201 100644 (file)
  */
 
 #define FIXADDR_START          0xfff00000UL
-#define FIXADDR_TOP            0xfffe0000UL
-#define FIXADDR_SIZE           (FIXADDR_TOP - FIXADDR_START)
+#define FIXADDR_END            0xfffe0000UL
+#define FIXADDR_TOP            (FIXADDR_END - PAGE_SIZE)
 
-#define FIX_KMAP_BEGIN         0
-#define FIX_KMAP_END           (FIXADDR_SIZE >> PAGE_SHIFT)
+enum fixed_addresses {
+       FIX_KMAP_BEGIN,
+       FIX_KMAP_END = (FIXADDR_TOP - FIXADDR_START) >> PAGE_SHIFT,
+       __end_of_fixed_addresses
+};
 
-#define __fix_to_virt(x)       (FIXADDR_START + ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       (((x) - FIXADDR_START) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       if (idx >= FIX_KMAP_END)
-               __this_fixmap_does_not_exist();
-       return __fix_to_virt(idx);
-}
-
-static inline unsigned int virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #endif
index 34d5fd585bbb1d472015838c2665dea5aad376f7..f751714d52c1f7dd5d4e87542bfadb012fcf1ca0 100644 (file)
@@ -33,7 +33,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 
 void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-       return alloc_bootmem_align(size, align);
+       return memblock_virt_alloc(size, align);
 }
 
 void __init arm_dt_memblock_reserve(void)
index a4729c6be25dc9aff11a519e0f15de0bcce6a913..1e8b030dbefd8b2b19da27d9ca8ecabfaf610bba 100644 (file)
@@ -731,7 +731,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
        kernel_data.end     = virt_to_phys(_end - 1);
 
        for_each_memblock(memory, region) {
-               res = alloc_bootmem_low(sizeof(*res));
+               res = memblock_virt_alloc(sizeof(*res), 0);
                res->name  = "System RAM";
                res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
                res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
index 8a1b5e0bad40df2adbab8b202e0dd87602fe7921..f7a6fd35b1e43f6dc9d36256508928b9ba135ea9 100644 (file)
@@ -2791,9 +2791,7 @@ static int __init _alloc_links(struct omap_hwmod_link **ml,
        sz = sizeof(struct omap_hwmod_link) * LINKS_PER_OCP_IF;
 
        *sl = NULL;
-       *ml = alloc_bootmem(sz);
-
-       memset(*ml, 0, sz);
+       *ml = memblock_virt_alloc(sz, 0);
 
        *sl = (void *)(*ml) + sizeof(struct omap_hwmod_link);
 
@@ -2912,9 +2910,7 @@ static int __init _alloc_linkspace(struct omap_hwmod_ocp_if **ois)
        pr_debug("omap_hwmod: %s: allocating %d byte linkspace (%d links)\n",
                 __func__, sz, max_ls);
 
-       linkspace = alloc_bootmem(sz);
-
-       memset(linkspace, 0, sz);
+       linkspace = memblock_virt_alloc(sz, 0);
 
        return 0;
 }
index 6dd66a999d9f2c69b291b300d8f79e63577a503b..8a271ffce12a97a0086c9ccacb7fd6f0d025ed01 100644 (file)
@@ -92,9 +92,6 @@ void show_mem(unsigned int filter)
        printk("Mem-info:\n");
        show_free_areas(filter);
 
-       if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-               return;
-
        for_each_bank (i, mi) {
                struct membank *bank = &mi->bank[i];
                unsigned int pfn1, pfn2;
@@ -407,7 +404,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
         * free the section of the memmap array.
         */
        if (pg < pgend)
-               free_bootmem(pg, pgend - pg);
+               memblock_free_early(pg, pgend - pg);
 }
 
 /*
@@ -578,7 +575,7 @@ void __init mem_init(void)
                        MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
                        MLK(ITCM_OFFSET, (unsigned long) itcm_end),
 #endif
-                       MLK(FIXADDR_START, FIXADDR_TOP),
+                       MLK(FIXADDR_START, FIXADDR_END),
                        MLM(VMALLOC_START, VMALLOC_END),
                        MLM(PAGE_OFFSET, (unsigned long)high_memory),
 #ifdef CONFIG_HIGHMEM
index b75b6bf4269c6a3e2a3152f1402f5ae63cfd5a65..1387f84b42b633127963042d4be4c100bcd8eb8e 100644 (file)
  */
 #include <asm/mem-layout.h>
 
-/*
- * Full fixmap support involves set_fixmap() functions, but
- * these may not be needed if all we're after is an area for
- * highmem kernel mappings.
- */
-#define        __fix_to_virt(x)        (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define        __virt_to_fix(x)        ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/**
- * fix_to_virt -- "index to address" translation.
- *
- * If anyone tries to use the idx directly without translation,
- * we catch the bug with a NULL-deference kernel oops. Illegal
- * ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * This branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-       return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #define kmap_get_fixmap_pte(vaddr) \
        pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), \
index 7cf2347eb899795ae8c2b321da8c270f30dfb074..0c8e553e0b9f8cafdc40853047d3e9b873885ed3 100644 (file)
@@ -105,6 +105,7 @@ config HAVE_SETUP_PER_CPU_AREA
 config DMI
        bool
        default y
+       select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
 
 config EFI
        bool
index 185d3d18d0ec08d8319a317432074de0a8abca54..f365a61f5c717741967cf556bc41e007f777a41a 100644 (file)
@@ -5,8 +5,10 @@
 #include <asm/io.h>
 
 /* Use normal IO mappings for DMI */
-#define dmi_ioremap ioremap
-#define dmi_iounmap(x,l) iounmap(x)
-#define dmi_alloc(l) kzalloc(l, GFP_ATOMIC)
+#define dmi_early_remap                ioremap
+#define dmi_early_unmap(x, l)  iounmap(x)
+#define dmi_remap              ioremap
+#define dmi_unmap              iounmap
+#define dmi_alloc(l)           kzalloc(l, GFP_ATOMIC)
 
 #endif
index 5a84b3a5074158d8b0fe975d5def50689c8c735d..efd1b927ccb7ebe57934a566f8a204cdbf8733b1 100644 (file)
@@ -71,6 +71,7 @@
 #include <linux/compiler.h>
 #include <linux/threads.h>
 #include <linux/types.h>
+#include <linux/bitops.h>
 
 #include <asm/fpu.h>
 #include <asm/page.h>
index da5237d636d650ec5f58dc9ff5f046473a60b679..52715a71aede013ecc3151338986e8417b134be0 100644 (file)
 static unsigned long max_gap;
 #endif
 
-/**
- * show_mem - give short summary of memory stats
- *
- * Shows a simple page count of reserved and used pages in the system.
- * For discontig machines, it does this on a per-pgdat basis.
- */
-void show_mem(unsigned int filter)
-{
-       int i, total_reserved = 0;
-       int total_shared = 0, total_cached = 0;
-       unsigned long total_present = 0;
-       pg_data_t *pgdat;
-
-       printk(KERN_INFO "Mem-info:\n");
-       show_free_areas(filter);
-       printk(KERN_INFO "Node memory in pages:\n");
-       if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-               return;
-       for_each_online_pgdat(pgdat) {
-               unsigned long present;
-               unsigned long flags;
-               int shared = 0, cached = 0, reserved = 0;
-               int nid = pgdat->node_id;
-
-               if (skip_free_areas_node(filter, nid))
-                       continue;
-               pgdat_resize_lock(pgdat, &flags);
-               present = pgdat->node_present_pages;
-               for(i = 0; i < pgdat->node_spanned_pages; i++) {
-                       struct page *page;
-                       if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
-                               touch_nmi_watchdog();
-                       if (pfn_valid(pgdat->node_start_pfn + i))
-                               page = pfn_to_page(pgdat->node_start_pfn + i);
-                       else {
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-                               if (max_gap < LARGE_GAP)
-                                       continue;
-#endif
-                               i = vmemmap_find_next_valid_pfn(nid, i) - 1;
-                               continue;
-                       }
-                       if (PageReserved(page))
-                               reserved++;
-                       else if (PageSwapCache(page))
-                               cached++;
-                       else if (page_count(page))
-                               shared += page_count(page)-1;
-               }
-               pgdat_resize_unlock(pgdat, &flags);
-               total_present += present;
-               total_reserved += reserved;
-               total_cached += cached;
-               total_shared += shared;
-               printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-                      "shrd: %10d, swpd: %10d\n", nid,
-                      present, reserved, shared, cached);
-       }
-       printk(KERN_INFO "%ld pages of RAM\n", total_present);
-       printk(KERN_INFO "%d reserved pages\n", total_reserved);
-       printk(KERN_INFO "%d pages shared\n", total_shared);
-       printk(KERN_INFO "%d pages swap cached\n", total_cached);
-       printk(KERN_INFO "Total of %ld pages in page table cache\n",
-              quicklist_total_size());
-       printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
-}
-
-
 /* physical address where the bootmem map is located */
 unsigned long bootmap_start;
 
index 2de08f4d99305fa42aafe573da4cbbd5ad3892ca..8786268053693cac4ed3114becc62a488a905999 100644 (file)
@@ -607,69 +607,6 @@ void *per_cpu_init(void)
 }
 #endif /* CONFIG_SMP */
 
-/**
- * show_mem - give short summary of memory stats
- *
- * Shows a simple page count of reserved and used pages in the system.
- * For discontig machines, it does this on a per-pgdat basis.
- */
-void show_mem(unsigned int filter)
-{
-       int i, total_reserved = 0;
-       int total_shared = 0, total_cached = 0;
-       unsigned long total_present = 0;
-       pg_data_t *pgdat;
-
-       printk(KERN_INFO "Mem-info:\n");
-       show_free_areas(filter);
-       if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-               return;
-       printk(KERN_INFO "Node memory in pages:\n");
-       for_each_online_pgdat(pgdat) {
-               unsigned long present;
-               unsigned long flags;
-               int shared = 0, cached = 0, reserved = 0;
-               int nid = pgdat->node_id;
-
-               if (skip_free_areas_node(filter, nid))
-                       continue;
-               pgdat_resize_lock(pgdat, &flags);
-               present = pgdat->node_present_pages;
-               for(i = 0; i < pgdat->node_spanned_pages; i++) {
-                       struct page *page;
-                       if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
-                               touch_nmi_watchdog();
-                       if (pfn_valid(pgdat->node_start_pfn + i))
-                               page = pfn_to_page(pgdat->node_start_pfn + i);
-                       else {
-                               i = vmemmap_find_next_valid_pfn(nid, i) - 1;
-                               continue;
-                       }
-                       if (PageReserved(page))
-                               reserved++;
-                       else if (PageSwapCache(page))
-                               cached++;
-                       else if (page_count(page))
-                               shared += page_count(page)-1;
-               }
-               pgdat_resize_unlock(pgdat, &flags);
-               total_present += present;
-               total_reserved += reserved;
-               total_cached += cached;
-               total_shared += shared;
-               printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-                      "shrd: %10d, swpd: %10d\n", nid,
-                      present, reserved, shared, cached);
-       }
-       printk(KERN_INFO "%ld pages of RAM\n", total_present);
-       printk(KERN_INFO "%d reserved pages\n", total_reserved);
-       printk(KERN_INFO "%d pages shared\n", total_shared);
-       printk(KERN_INFO "%d pages swap cached\n", total_cached);
-       printk(KERN_INFO "Total of %ld pages in page table cache\n",
-              quicklist_total_size());
-       printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
-}
-
 /**
  * call_pernode_memory - use SRAT to call callback functions with node info
  * @start: physical start of range
index 88504abf570429886183d32d9b857a90adb5c148..25c350264a41012bba72d3d992674aea17d32fba 100644 (file)
@@ -684,3 +684,51 @@ per_linux32_init(void)
 }
 
 __initcall(per_linux32_init);
+
+/**
+ * show_mem - give short summary of memory stats
+ *
+ * Shows a simple page count of reserved and used pages in the system.
+ * For discontig machines, it does this on a per-pgdat basis.
+ */
+void show_mem(unsigned int filter)
+{
+       int total_reserved = 0;
+       unsigned long total_present = 0;
+       pg_data_t *pgdat;
+
+       printk(KERN_INFO "Mem-info:\n");
+       show_free_areas(filter);
+       printk(KERN_INFO "Node memory in pages:\n");
+       for_each_online_pgdat(pgdat) {
+               unsigned long present;
+               unsigned long flags;
+               int reserved = 0;
+               int nid = pgdat->node_id;
+               int zoneid;
+
+               if (skip_free_areas_node(filter, nid))
+                       continue;
+               pgdat_resize_lock(pgdat, &flags);
+
+               for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
+                       struct zone *zone = &pgdat->node_zones[zoneid];
+                       if (!populated_zone(zone))
+                               continue;
+
+                       reserved += zone->present_pages - zone->managed_pages;
+               }
+               present = pgdat->node_present_pages;
+
+               pgdat_resize_unlock(pgdat, &flags);
+               total_present += present;
+               total_reserved += reserved;
+               printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, ",
+                      nid, present, reserved);
+       }
+       printk(KERN_INFO "%ld pages of RAM\n", total_present);
+       printk(KERN_INFO "%d reserved pages\n", total_reserved);
+       printk(KERN_INFO "Total of %ld pages in page table cache\n",
+              quicklist_total_size());
+       printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
+}
index 09ef94a8a7c3a18632f7d7bfb3f6edb291100c34..ca4504424dae7e9f38cf8f59c953a0cda2570c12 100644 (file)
@@ -277,13 +277,13 @@ config SMP
        bool "Symmetric multi-processing support"
        ---help---
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          People using multiprocessor machines who say Y here should also say
index 33312751c92b06b7c17cf38986a38e906c5ddac7..af621b041739a60795edf8e121ddc20b46308e15 100644 (file)
@@ -51,37 +51,7 @@ enum fixed_addresses {
 #define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START  ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK)
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-       return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #define kmap_get_fixmap_pte(vaddr) \
        pte_offset_kernel( \
index 3cd6288f65c22bd8c0aed5968d886363d0a9f6fb..11fa51c89617deb1a303c6bfdbf82b2a32a1e4db 100644 (file)
@@ -204,7 +204,8 @@ static void __init do_init_bootmem(void)
                start_pfn = memblock_region_memory_base_pfn(reg);
                end_pfn = memblock_region_memory_end_pfn(reg);
                memblock_set_node(PFN_PHYS(start_pfn),
-                                 PFN_PHYS(end_pfn - start_pfn), 0);
+                                 PFN_PHYS(end_pfn - start_pfn),
+                                 &memblock.memory, 0);
        }
 
        /* All of system RAM sits in node 0 for the non-NUMA case */
index b172aa45fcf8201934880a0ac41cf8461ca4a435..67b46c2950728e9e7691fccea44d9ab9656ed54f 100644 (file)
@@ -42,7 +42,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
        memblock_add(start, end - start);
 
        memblock_set_node(PFN_PHYS(start_pfn),
-                         PFN_PHYS(end_pfn - start_pfn), nid);
+                         PFN_PHYS(end_pfn - start_pfn),
+                         &memblock.memory, nid);
 
        /* Node-local pgdat */
        pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data),
index e23cccde9c2759b9ea798a3a0705f7f242acd474..8d581ab06c5df6bc1e4d61da48324f65212c7992 100644 (file)
@@ -30,6 +30,7 @@ config MICROBLAZE
        select MODULES_USE_ELF_RELA
        select CLONE_BACKWARDS3
        select CLKSRC_OF
+       select BUILDTIME_EXTABLE_SORT
 
 config SWAP
        def_bool n
index f2b312e10b104e36eaeaed5e1d123c128435fd93..06c0e2b1883fe038bbefa903243ed6845a6a054c 100644 (file)
@@ -58,52 +58,12 @@ enum fixed_addresses {
 extern void __set_fixmap(enum fixed_addresses idx,
                                        phys_addr_t phys, pgprot_t flags);
 
-#define set_fixmap(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL_CI)
-
-#define clear_fixmap(idx) \
-               __set_fixmap(idx, 0, __pgprot(0))
-
 #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START          (FIXADDR_TOP - __FIXADDR_SIZE)
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-       return __fix_to_virt(idx);
-}
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_CI
 
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #endif /* !__ASSEMBLY__ */
 #endif
index 74c7bcc1e82d8da05b6775274f572fe17114d5f0..89077d34671458acf121c47224674289eb84e15d 100644 (file)
@@ -192,7 +192,8 @@ void __init setup_memory(void)
                start_pfn = memblock_region_memory_base_pfn(reg);
                end_pfn = memblock_region_memory_end_pfn(reg);
                memblock_set_node(start_pfn << PAGE_SHIFT,
-                                       (end_pfn - start_pfn) << PAGE_SHIFT, 0);
+                                 (end_pfn - start_pfn) << PAGE_SHIFT,
+                                 &memblock.memory, 0);
        }
 
        /* free bootmem is whole main memory */
index 35aca20721f5f5dff9d37946d2917f596f925ac6..dc37faabb6c43e7b7a1b15e4fbb0dedd9c0de7fc 100644 (file)
@@ -2161,13 +2161,13 @@ config SMP
        depends on SYS_SUPPORTS_SMP
        help
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          People using multiprocessor machines who say Y here should also say
@@ -2479,7 +2479,7 @@ source "drivers/pcmcia/Kconfig"
 source "drivers/pci/hotplug/Kconfig"
 
 config RAPIDIO
-       bool "RapidIO support"
+       tristate "RapidIO support"
        depends on PCI
        default n
        help
index dfaaf493e9d4a552238372506b1a662a8e6c260e..8c012af2f451dbd4b6e2727f298b8983a7ff8794 100644 (file)
@@ -71,38 +71,7 @@ enum fixed_addresses {
 #define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-       return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #define kmap_get_fixmap_pte(vaddr)                                     \
        pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
index 8bde9237d13bf3dca2ab111b35ba921f496baec3..a648de1b1096142d13d56055e10391bafe309ae5 100644 (file)
@@ -184,13 +184,13 @@ config SMP
        depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050
        ---help---
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          See also <file:Documentation/x86/i386/IO-APIC.txt>,
index b5f1858baf3399b1dcb10ef3f5d19debf50876d3..bb2a8ec440e76ac80fcb2d0afa69284291054099 100644 (file)
@@ -229,13 +229,13 @@ config SMP
        bool "Symmetric multi-processing support"
        ---help---
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
index 96f8168cf4ec1d9d50aad70ae17f54110a2c1527..ae085ad0fba03827df21874edfb6a6985c3338f8 100644 (file)
@@ -645,55 +645,30 @@ EXPORT_SYMBOL(empty_zero_page);
 
 void show_mem(unsigned int filter)
 {
-       int i,free = 0,total = 0,reserved = 0;
-       int shared = 0, cached = 0;
+       int total = 0,reserved = 0;
+       pg_data_t *pgdat;
 
        printk(KERN_INFO "Mem-info:\n");
        show_free_areas(filter);
-       if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-               return;
-#ifndef CONFIG_DISCONTIGMEM
-       i = max_mapnr;
-       while (i-- > 0) {
-               total++;
-               if (PageReserved(mem_map+i))
-                       reserved++;
-               else if (PageSwapCache(mem_map+i))
-                       cached++;
-               else if (!page_count(&mem_map[i]))
-                       free++;
-               else
-                       shared += page_count(&mem_map[i]) - 1;
-       }
-#else
-       for (i = 0; i < npmem_ranges; i++) {
-               int j;
 
-               for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
-                       struct page *p;
-                       unsigned long flags;
-
-                       pgdat_resize_lock(NODE_DATA(i), &flags);
-                       p = nid_page_nr(i, j) - node_start_pfn(i);
-
-                       total++;
-                       if (PageReserved(p))
-                               reserved++;
-                       else if (PageSwapCache(p))
-                               cached++;
-                       else if (!page_count(p))
-                               free++;
-                       else
-                               shared += page_count(p) - 1;
-                       pgdat_resize_unlock(NODE_DATA(i), &flags);
-               }
+       for_each_online_pgdat(pgdat) {
+               unsigned long flags;
+               int zoneid;
+
+               pgdat_resize_lock(pgdat, &flags);
+               for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
+                       struct zone *zone = &pgdat->node_zones[zoneid];
+                       if (!populated_zone(zone))
+                               continue;
+
+                       total += zone->present_pages;
+                       reserved = zone->present_pages - zone->managed_pages;
+               }
+               pgdat_resize_unlock(pgdat, &flags);
        }
-#endif
+
        printk(KERN_INFO "%d pages of RAM\n", total);
        printk(KERN_INFO "%d reserved pages\n", reserved);
-       printk(KERN_INFO "%d pages shared\n", shared);
-       printk(KERN_INFO "%d pages swap cached\n", cached);
-
 
 #ifdef CONFIG_DISCONTIGMEM
        {
index 7c3d97d8da7002745c07e4fea6ddd6564577c9fc..f58aefe7b6e04561438ef466896a201dbb6c3de5 100644 (file)
@@ -797,7 +797,7 @@ config HAS_RAPIDIO
        default n
 
 config RAPIDIO
-       bool "RapidIO support"
+       tristate "RapidIO support"
        depends on HAS_RAPIDIO || PCI
        help
          If you say Y here, the kernel will include drivers and
@@ -805,7 +805,7 @@ config RAPIDIO
 
 config FSL_RIO
        bool "Freescale Embedded SRIO Controller support"
-       depends on RAPIDIO && HAS_RAPIDIO
+       depends on RAPIDIO = y && HAS_RAPIDIO
        default "n"
        ---help---
          Include support for RapidIO controller on Freescale embedded
index 5c2c0233175e7837b4b2a154003fba67b2166da2..90f604bbcd19b1703bc2b78458447b8bab815e7e 100644 (file)
@@ -58,52 +58,12 @@ enum fixed_addresses {
 extern void __set_fixmap (enum fixed_addresses idx,
                                        phys_addr_t phys, pgprot_t flags);
 
-#define set_fixmap(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL_NCG)
-
-#define clear_fixmap(idx) \
-               __set_fixmap(idx, 0, __pgprot(0))
-
 #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START          (FIXADDR_TOP - __FIXADDR_SIZE)
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-        return __fix_to_virt(idx);
-}
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG
 
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #endif /* !__ASSEMBLY__ */
 #endif
index b1c5734bc2ce5ebe325b47024cd88172872f038b..4b5cd5c2594d9b5ca507bc80751f6d1003fde4d6 100644 (file)
@@ -209,7 +209,7 @@ void __init do_init_bootmem(void)
        /* Place all memblock_regions in the same node and merge contiguous
         * memblock_regions
         */
-       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
 
        /* Add all physical memory to the bootmem map, mark each area
         * present.
index 4f50c6a9e68fe0e313990a6a9987b03e3f385f8d..86a63de072c617c35dc65b138b456d0ba900001a 100644 (file)
@@ -720,7 +720,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
                        node_set_online(nid);
                        sz = numa_enforce_memory_limit(base, size);
                        if (sz)
-                               memblock_set_node(base, sz, nid);
+                               memblock_set_node(base, sz,
+                                                 &memblock.memory, nid);
                } while (--ranges);
        }
 }
@@ -810,7 +811,7 @@ new_range:
                                continue;
                }
 
-               memblock_set_node(start, size, nid);
+               memblock_set_node(start, size, &memblock.memory, nid);
 
                if (--ranges)
                        goto new_range;
@@ -847,7 +848,8 @@ static void __init setup_nonnuma(void)
 
                fake_numa_create_new_node(end_pfn, &nid);
                memblock_set_node(PFN_PHYS(start_pfn),
-                                 PFN_PHYS(end_pfn - start_pfn), nid);
+                                 PFN_PHYS(end_pfn - start_pfn),
+                                 &memblock.memory, nid);
                node_set_online(nid);
        }
 }
index e9f3125325266ff8b93f13c009003d3d4aa335de..4f858f77d870d842fa99fdc26e00b13521ae35e3 100644 (file)
@@ -334,10 +334,10 @@ config SMP
          a system with only one CPU, like most personal computers, say N. If
          you have a system with more than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          See also the SMP-HOWTO available at
index ce298317a73e5da295b398ff140b14a267a3977e..6357710753d548f3a2ccf8c964d4053636796bc6 100644 (file)
@@ -701,13 +701,13 @@ config SMP
        depends on SYS_SUPPORTS_SMP
        ---help---
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          People using multiprocessor machines who say Y here should also say
index cbe0186b679433d988263f73945df8f3940e99d8..4daf91c3b725905e3b59f68151e0e3f71ba542ed 100644 (file)
@@ -79,13 +79,6 @@ extern void __set_fixmap(enum fixed_addresses idx,
                         unsigned long phys, pgprot_t flags);
 extern void __clear_fixmap(enum fixed_addresses idx, pgprot_t flags);
 
-#define set_fixmap(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
 /*
  * used by vmalloc.c.
  *
@@ -101,36 +94,8 @@ extern void __clear_fixmap(enum fixed_addresses idx, pgprot_t flags);
 #define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE
 
-        return __fix_to_virt(idx);
-}
+#include <asm-generic/fixmap.h>
 
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
 #endif
index 49c09c7d5b77ce76b3351bf2174251756c7b9b41..67a049e75ec12f60cf7f1d1934d94122eb2f2617 100644 (file)
@@ -995,29 +995,19 @@ static struct unwinder dwarf_unwinder = {
 
 static void dwarf_unwinder_cleanup(void)
 {
-       struct rb_node **fde_rb_node = &fde_root.rb_node;
-       struct rb_node **cie_rb_node = &cie_root.rb_node;
+       struct dwarf_fde *fde, *next_fde;
+       struct dwarf_cie *cie, *next_cie;
 
        /*
         * Deallocate all the memory allocated for the DWARF unwinder.
         * Traverse all the FDE/CIE lists and remove and free all the
         * memory associated with those data structures.
         */
-       while (*fde_rb_node) {
-               struct dwarf_fde *fde;
-
-               fde = rb_entry(*fde_rb_node, struct dwarf_fde, node);
-               rb_erase(*fde_rb_node, &fde_root);
+       rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node)
                kfree(fde);
-       }
 
-       while (*cie_rb_node) {
-               struct dwarf_cie *cie;
-
-               cie = rb_entry(*cie_rb_node, struct dwarf_cie, node);
-               rb_erase(*cie_rb_node, &cie_root);
+       rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
                kfree(cie);
-       }
 
        kmem_cache_destroy(dwarf_reg_cachep);
        kmem_cache_destroy(dwarf_frame_cachep);
index 1cf90e947dbf19962bbf86d685b0d8b142331e99..de19cfa768f208708406e3350b5e9aecc92a6266 100644 (file)
@@ -230,8 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
        pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
                         PAGE_KERNEL);
 
-       memblock_set_node(PFN_PHYS(start_pfn),
-                         PFN_PHYS(end_pfn - start_pfn), nid);
+       memblock_set_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
+                         &memblock.memory, nid);
 }
 
 void __init __weak plat_early_device_setup(void)
index 034a680ffec260c9249800023648f7cbcccdba22..c51efdcd07a2d09e76c06e31671efcd19a6db4ed 100644 (file)
@@ -153,10 +153,10 @@ config SMP
          a system with only one CPU, say N. If you have a system with more
          than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          People using multiprocessor machines who say Y here should also say
index 5322e530d09cf9cbbb459198a1613997c5bf9964..eafbc65c9c47f63772162d384ef55fd549d1e149 100644 (file)
@@ -1021,7 +1021,8 @@ static void __init add_node_ranges(void)
                                "start[%lx] end[%lx]\n",
                                nid, start, this_end);
 
-                       memblock_set_node(start, this_end - start, nid);
+                       memblock_set_node(start, this_end - start,
+                                         &memblock.memory, nid);
                        start = this_end;
                }
        }
@@ -1325,7 +1326,7 @@ static void __init bootmem_init_nonnuma(void)
               (top_of_ram - total_ram) >> 20);
 
        init_node_masks_nonnuma();
-       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
        allocate_node_data(0);
        node_set_online(0);
 }
index c6b9c1b38fd1f0e195f1bf55340ee45eacd79305..ffe2637aeb31240a3ce81fd510bfeee4c0e71b6d 100644 (file)
@@ -25,9 +25,6 @@
 #include <asm/kmap_types.h>
 #endif
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
@@ -83,35 +80,7 @@ enum fixed_addresses {
 #define FIXADDR_START          (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
 #define FIXADDR_BOOT_START     (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE)
 
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-       return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #endif /* !__ASSEMBLY__ */
 
index 21a423bae5e853fa02673ab82391d4c7dd88cc19..3094ea3c73b094e970ded6d775efa7602588151f 100644 (file)
@@ -43,13 +43,6 @@ enum fixed_addresses {
 extern void __set_fixmap (enum fixed_addresses idx,
                          unsigned long phys, pgprot_t flags);
 
-#define set_fixmap(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
 /*
  * used by vmalloc.c.
  *
@@ -62,37 +55,6 @@ extern void __set_fixmap (enum fixed_addresses idx,
 #define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
 
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)      ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-        return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-      BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-      return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
 
 #endif
index ae6bc036db92afdc4de9f83de485caf2a86695fe..be2bde9b07cf7d8594a59639e340d9f1020551b6 100644 (file)
@@ -66,9 +66,6 @@ void show_mem(unsigned int filter)
        printk(KERN_DEFAULT "Mem-info:\n");
        show_free_areas(filter);
 
-       if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-               return;
-
        for_each_bank(i, mi) {
                struct membank *bank = &mi->bank[i];
                unsigned int pfn1, pfn2;
index beff90683a1b6f33d192747f47691cd59f5a162c..9421881d52df09189f071564d9677015362a4ab5 100644 (file)
@@ -280,13 +280,13 @@ config SMP
        bool "Symmetric multi-processing support"
        ---help---
          This enables support for systems with more than one CPU. If you have
-         a system with only one CPU, like most personal computers, say N. If
-         you have a system with more than one CPU, say Y.
+         a system with only one CPU, say N. If you have a system with more
+         than one CPU, say Y.
 
-         If you say N here, the kernel will run on single and multiprocessor
+         If you say N here, the kernel will run on uni- and multiprocessor
          machines, but will use only one CPU of a multiprocessor machine. If
          you say Y here, the kernel will run on many, but not all,
-         singleprocessor machines. On a singleprocessor machine, the kernel
+         uniprocessor machines. On a uniprocessor machine, the kernel
          will run faster if you say N here.
 
          Note that if you say Y here and choose architecture "586" or
@@ -748,6 +748,7 @@ config APB_TIMER
 # The code disables itself when not needed.
 config DMI
        default y
+       select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
        bool "Enable DMI scanning" if EXPERT
        ---help---
          Enabled scanning of DMI to identify machine quirks. Say Y
index fd8f9e2ca35f16ffdac6b9e609be0086ae055803..535192f6bfad8d67c27235377e1e549b8ba89276 100644 (file)
@@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)
 }
 
 /* Use early IO mappings for DMI because it's initialized early */
-#define dmi_ioremap early_ioremap
-#define dmi_iounmap early_iounmap
+#define dmi_early_remap                early_ioremap
+#define dmi_early_unmap                early_iounmap
+#define dmi_remap              ioremap
+#define dmi_unmap              iounmap
 
 #endif /* _ASM_X86_DMI_H */
index e846225265ed4a6b8dccc6384b06a6e806d9f1c6..7252cd339175eb760e68a233e67dee0e628a8e5f 100644 (file)
@@ -175,64 +175,7 @@ static inline void __set_fixmap(enum fixed_addresses idx,
 }
 #endif
 
-#define set_fixmap(idx, phys)                          \
-       __set_fixmap(idx, phys, PAGE_KERNEL)
-
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys)                  \
-       __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
-
-#define clear_fixmap(idx)                      \
-       __set_fixmap(idx, 0, __pgprot(0))
-
-#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without translation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
-       /*
-        * this branch gets completely eliminated after inlining,
-        * except when someone tries to use fixaddr indices in an
-        * illegal way. (such as mixing up address types or using
-        * out-of-range indices).
-        *
-        * If it doesn't get removed, the linker will complain
-        * loudly with a reasonably clear error message..
-        */
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
-
-       return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-       return __virt_to_fix(vaddr);
-}
-
-/* Return an pointer with offset calculated */
-static __always_inline unsigned long
-__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
-{
-       __set_fixmap(idx, phys, flags);
-       return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
-}
-
-#define set_fixmap_offset(idx, phys)                   \
-       __set_fixmap_offset(idx, phys, PAGE_KERNEL)
-
-#define set_fixmap_offset_nocache(idx, phys)                   \
-       __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
+#include <asm-generic/fixmap.h>
 
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_FIXMAP_H */
index f97fbe3abb67f5059d4e6f0a37261d6113df19de..2f59cce3b38aaabb83efe9c528ffbfc7ea9ae8d8 100644 (file)
@@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
-static inline phys_addr_t get_max_mapped(void)
+static inline phys_addr_t get_max_low_mapped(void)
 {
-       return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
+       return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT;
 }
 
 bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
index e6d90babc245c1c2713bea64d436fb2a736b83e6..04905bfc508b9925c697687b7c1d5754827ba417 100644 (file)
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
 
 static inline void __flush_tlb_one(unsigned long addr)
 {
-       count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
        __flush_tlb_single(addr);
 }
 
@@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr)
  */
 static inline void __flush_tlb_up(void)
 {
-       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        __flush_tlb();
 }
 
 static inline void flush_tlb_all(void)
 {
-       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        __flush_tlb_all();
 }
 
index e2dbcb7dabdd6287f9410bae67a3e0adf5a9e1de..83a7995625a6de35c293537c7e78c3a0ba6a1a63 100644 (file)
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void)
 
        corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
 
-       for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
+       for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) {
                start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
                                PAGE_SIZE, corruption_check_size);
                end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
index 4a48e8bbd8576dc62df7bd6fe494011e11fe4037..1ac871d53912d731df329848fb0ff33c117e8da3 100644 (file)
@@ -757,10 +757,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 
 static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
 {
-       tlb_flushall_shift = 5;
-
-       if (c->x86 <= 0x11)
-               tlb_flushall_shift = 4;
+       tlb_flushall_shift = 6;
 }
 
 static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
index 3db61c644e440e8af7b978320b546a25336055f4..5cd9bfabd6450e6743dc03479dad8cba38f9eec9 100644 (file)
@@ -640,21 +640,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
        case 0x61d: /* six-core 45 nm xeon "Dunnington" */
                tlb_flushall_shift = -1;
                break;
+       case 0x63a: /* Ivybridge */
+               tlb_flushall_shift = 2;
+               break;
        case 0x61a: /* 45 nm nehalem, "Bloomfield" */
        case 0x61e: /* 45 nm nehalem, "Lynnfield" */
        case 0x625: /* 32 nm nehalem, "Clarkdale" */
        case 0x62c: /* 32 nm nehalem, "Gulftown" */
        case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
        case 0x62f: /* 32 nm Xeon E7 */
-               tlb_flushall_shift = 6;
-               break;
        case 0x62a: /* SandyBridge */
        case 0x62d: /* SandyBridge, "Romely-EP" */
-               tlb_flushall_shift = 5;
-               break;
-       case 0x63a: /* Ivybridge */
-               tlb_flushall_shift = 1;
-               break;
        default:
                tlb_flushall_shift = 6;
        }
index ce2d0a2c3e4ff56819152574eefded2e8d64ea69..0e25a1bc5ab5cfbbf21484ce268ad17ed48844ec 100644 (file)
@@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
        }
 
        /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
-       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        __flush_tlb();
 
        /* Save MTRR state */
@@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 static void post_set(void) __releases(set_atomicity_lock)
 {
        /* Flush TLBs (no need to flush caches - they are disabled) */
-       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        __flush_tlb();
 
        /* Intel (P6) standard MTRRs */
index 174da5fc5a7b06005733a994424dd8f88b883ecb..988c00a1f60dac037defb41d4419e82abaa7202c 100644 (file)
@@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void)
                nr_pages += end_pfn - start_pfn;
        }
 
-       for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
+       for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) {
                start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
                end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
                if (start_pfn < end_pfn)
index 06853e6703541f8349106e17330f86621fa984db..c9675594d7caee907fd6fd78559fe14bf1aea036 100644 (file)
@@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p)
 
        setup_real_mode();
 
-       memblock_set_current_limit(get_max_mapped());
+       memblock_set_current_limit(get_max_low_mapped());
        dma_contiguous_reserve(0);
 
        /*
index 0596e8e0cc1992b1fc32a00277a4f879cd07a8f3..207d9aef662def24bfef094ff587f56f33afa595 100644 (file)
@@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 
 static inline void get_head_page_multiple(struct page *page, int nr)
 {
-       VM_BUG_ON(page != compound_head(page));
-       VM_BUG_ON(page_count(page) == 0);
+       VM_BUG_ON_PAGE(page != compound_head(page), page);
+       VM_BUG_ON_PAGE(page_count(page) == 0, page);
        atomic_add(nr, &page->_count);
        SetPageReferenced(page);
 }
@@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
        head = pte_page(pte);
        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
        do {
-               VM_BUG_ON(compound_head(page) != head);
+               VM_BUG_ON_PAGE(compound_head(page) != head, page);
                pages[*nr] = page;
                if (PageTail(page))
                        get_huge_page_tail(page);
@@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
        head = pte_page(pte);
        page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
        do {
-               VM_BUG_ON(compound_head(page) != head);
+               VM_BUG_ON_PAGE(compound_head(page) != head, page);
                pages[*nr] = page;
                if (PageTail(page))
                        get_huge_page_tail(page);
index 5bdc5430597cfea9056e08edb7902a098f28535c..e39504878aecd22f6688073bd3fca80f39bfd9c1 100644 (file)
@@ -665,7 +665,7 @@ void __init initmem_init(void)
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
-       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
        sparse_memory_present_with_active_regions(0);
 
 #ifdef CONFIG_FLATMEM
index 104d56a9245f1ea071009dc49e3bd25a2c48383b..f35c66c5959ac0042fbf9ff43a76cf55f89d9d0f 100644 (file)
@@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start,
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
-       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
 }
 #endif
 
index 8dabbed409ee69d58e9752be88b5ddaa38a02093..1e9da795767ab95c6688b2b47fada69db0767ebd 100644 (file)
@@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end)
        u64 i;
        phys_addr_t this_start, this_end;
 
-       for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
+       for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) {
                this_start = clamp_t(phys_addr_t, this_start, start, end);
                this_end = clamp_t(phys_addr_t, this_end, start, end);
                if (this_start < this_end) {
index c85da7bb6b603cf2cab624835ea2d6459958ce5b..81b2750f3666f16d7a42e0699d8dc458530fd7b8 100644 (file)
@@ -491,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 
        for (i = 0; i < mi->nr_blks; i++) {
                struct numa_memblk *mb = &mi->blk[i];
-               memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
+               memblock_set_node(mb->start, mb->end - mb->start,
+                                 &memblock.memory, mb->nid);
+
+               /*
+                * At this time, all memory regions reserved by memblock are
+                * used by the kernel. Set the nid in memblock.reserved will
+                * mark out all the nodes the kernel resides in.
+                */
+               memblock_set_node(mb->start, mb->end - mb->start,
+                                 &memblock.reserved, mb->nid);
        }
 
        /*
@@ -553,6 +562,30 @@ static void __init numa_init_array(void)
        }
 }
 
+static void __init numa_clear_kernel_node_hotplug(void)
+{
+       int i, nid;
+       nodemask_t numa_kernel_nodes;
+       unsigned long start, end;
+       struct memblock_type *type = &memblock.reserved;
+
+       /* Mark all kernel nodes. */
+       for (i = 0; i < type->cnt; i++)
+               node_set(type->regions[i].nid, numa_kernel_nodes);
+
+       /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+       for (i = 0; i < numa_meminfo.nr_blks; i++) {
+               nid = numa_meminfo.blk[i].nid;
+               if (!node_isset(nid, numa_kernel_nodes))
+                       continue;
+
+               start = numa_meminfo.blk[i].start;
+               end = numa_meminfo.blk[i].end;
+
+               memblock_clear_hotplug(start, end - start);
+       }
+}
+
 static int __init numa_init(int (*init_func)(void))
 {
        int i;
@@ -565,7 +598,12 @@ static int __init numa_init(int (*init_func)(void))
        nodes_clear(node_possible_map);
        nodes_clear(node_online_map);
        memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-       WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
+       WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
+                                 MAX_NUMNODES));
+       WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
+                                 MAX_NUMNODES));
+       /* In case that parsing SRAT failed. */
+       WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
        numa_reset_distance();
 
        ret = init_func();
@@ -601,6 +639,16 @@ static int __init numa_init(int (*init_func)(void))
                        numa_clear_node(i);
        }
        numa_init_array();
+
+       /*
+        * At very early time, the kernel have to use some memory such as
+        * loading the kernel image. We cannot prevent this anyway. So any
+        * node the kernel resides in should be un-hotpluggable.
+        *
+        * And when we come here, numa_init() won't fail.
+        */
+       numa_clear_kernel_node_hotplug();
+
        return 0;
 }
 
index 266ca912f62e00e96eab405ce6290f3d7466cec3..2f78a38ac281ea66902dcc7bbe0083fb24014d2d 100644 (file)
@@ -42,15 +42,25 @@ static __init inline int srat_disabled(void)
        return acpi_numa < 0;
 }
 
-/* Callback for SLIT parsing */
+/**
+ * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
+ * I/O localities since SRAT does not list them.  I/O localities are
+ * not supported at this point.
+ */
 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
        int i, j;
 
-       for (i = 0; i < slit->locality_count; i++)
-               for (j = 0; j < slit->locality_count; j++)
+       for (i = 0; i < slit->locality_count; i++) {
+               if (pxm_to_node(i) == NUMA_NO_NODE)
+                       continue;
+               for (j = 0; j < slit->locality_count; j++) {
+                       if (pxm_to_node(j) == NUMA_NO_NODE)
+                               continue;
                        numa_set_distance(pxm_to_node(i), pxm_to_node(j),
                                slit->entry[slit->locality_count * i + j]);
+               }
+       }
 }
 
 /* Callback for Proximity Domain -> x2APIC mapping */
@@ -181,6 +191,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
                (unsigned long long) start, (unsigned long long) end - 1,
                hotpluggable ? " hotplug" : "");
 
+       /* Mark hotplug range in memblock. */
+       if (hotpluggable && memblock_mark_hotplug(start, ma->length))
+               pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
+                       (unsigned long long)start, (unsigned long long)end - 1);
+
        return 0;
 out_err_bad_srat:
        bad_srat();
index ae699b3bbac84a920042349c1fc8605a8f93aba0..dd8dda167a242621515c901a3a5d62b4fcadf37b 100644 (file)
@@ -103,7 +103,7 @@ static void flush_tlb_func(void *info)
        if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
                return;
 
-       count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
        if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
                if (f->flush_end == TLB_FLUSH_ALL)
                        local_flush_tlb();
@@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
        info.flush_start = start;
        info.flush_end = end;
 
-       count_vm_event(NR_TLB_REMOTE_FLUSH);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
        if (is_uv_system()) {
                unsigned int cpu;
 
@@ -151,44 +151,19 @@ void flush_tlb_current_task(void)
 
        preempt_disable();
 
-       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        local_flush_tlb();
        if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
                flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
        preempt_enable();
 }
 
-/*
- * It can find out the THP large page, or
- * HUGETLB page in tlb_flush when THP disabled
- */
-static inline unsigned long has_large_page(struct mm_struct *mm,
-                                unsigned long start, unsigned long end)
-{
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       unsigned long addr = ALIGN(start, HPAGE_SIZE);
-       for (; addr < end; addr += HPAGE_SIZE) {
-               pgd = pgd_offset(mm, addr);
-               if (likely(!pgd_none(*pgd))) {
-                       pud = pud_offset(pgd, addr);
-                       if (likely(!pud_none(*pud))) {
-                               pmd = pmd_offset(pud, addr);
-                               if (likely(!pmd_none(*pmd)))
-                                       if (pmd_large(*pmd))
-                                               return addr;
-                       }
-               }
-       }
-       return 0;
-}
-
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                                unsigned long end, unsigned long vmflag)
 {
        unsigned long addr;
        unsigned act_entries, tlb_entries = 0;
+       unsigned long nr_base_pages;
 
        preempt_disable();
        if (current->active_mm != mm)
@@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                tlb_entries = tlb_lli_4k[ENTRIES];
        else
                tlb_entries = tlb_lld_4k[ENTRIES];
+
        /* Assume all of TLB entries was occupied by this task */
-       act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
+       act_entries = tlb_entries >> tlb_flushall_shift;
+       act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
+       nr_base_pages = (end - start) >> PAGE_SHIFT;
 
        /* tlb_flushall_shift is on balance point, details in commit log */
-       if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
-               count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       if (nr_base_pages > act_entries) {
+               count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
                local_flush_tlb();
        } else {
-               if (has_large_page(mm, start, end)) {
-                       local_flush_tlb();
-                       goto flush_all;
-               }
                /* flush range by one by one 'invlpg' */
                for (addr = start; addr < end;  addr += PAGE_SIZE) {
-                       count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
+                       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
                        __flush_tlb_single(addr);
                }
 
@@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 
 static void do_flush_tlb_all(void *info)
 {
-       count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
        __flush_tlb_all();
        if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
                leave_mm(smp_processor_id());
@@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-       count_vm_event(NR_TLB_REMOTE_FLUSH);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
        on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
index cc2637f8674ed61df149fb3bf51da4cc3a04f7cf..9dbc67e42a993193fb56d169cab00ecf4b825ef0 100644 (file)
@@ -4,8 +4,7 @@
  * Written by Cai Zhiyong <caizhiyong@huawei.com>
  *
  */
-#include <linux/buffer_head.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cmdline-parser.h>
 
 static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
@@ -159,6 +158,7 @@ void cmdline_parts_free(struct cmdline_parts **parts)
                *parts = next_parts;
        }
 }
+EXPORT_SYMBOL(cmdline_parts_free);
 
 int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
 {
@@ -206,6 +206,7 @@ fail:
        cmdline_parts_free(parts);
        goto done;
 }
+EXPORT_SYMBOL(cmdline_parts_parse);
 
 struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
                                         const char *bdev)
@@ -214,17 +215,17 @@ struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
                parts = parts->next_parts;
        return parts;
 }
+EXPORT_SYMBOL(cmdline_parts_find);
 
 /*
  *  add_part()
  *    0 success.
  *    1 can not add so many partitions.
  */
-void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
-                      int slot,
-                      int (*add_part)(int, struct cmdline_subpart *, void *),
-                      void *param)
-
+int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+                     int slot,
+                     int (*add_part)(int, struct cmdline_subpart *, void *),
+                     void *param)
 {
        sector_t from = 0;
        struct cmdline_subpart *subpart;
@@ -247,4 +248,7 @@ void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
                if (add_part(slot, subpart, param))
                        break;
        }
+
+       return slot;
 }
+EXPORT_SYMBOL(cmdline_parts_set);
index 791f419431322882a915f88995df7b72552d5507..7bd4372e8b6f4727e9a53c86927dbd1635468c2e 100644 (file)
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
        char buf[BDEVNAME_SIZE];
 
        /* Don't show non-partitionable removeable devices or empty devices */
-       if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+       if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
                                   (sgp->flags & GENHD_FL_REMOVABLE)))
                return 0;
        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
index 86b9f37d102e1110791ce693079561bd0a804619..9ffa90c6201c3860779f4a3e7e162a368b766874 100644 (file)
@@ -368,7 +368,8 @@ config BLK_DEV_RAM
          For details, read <file:Documentation/blockdev/ramdisk.txt>.
 
          To compile this driver as a module, choose M here: the
-         module will be called rd.
+         module will be called brd. An alias "rd" has been defined
+         for historical reasons.
 
          Most normal users won't need the RAM disk functionality, and can
          thus say N here.
index b35fc4f5237c3b44c7c51aaa0517e58a42a2ab81..036e8ab86c718057ae01c9f6bf3ba6c403a6b942 100644 (file)
@@ -5004,7 +5004,7 @@ reinit_after_soft_reset:
 
        i = alloc_cciss_hba(pdev);
        if (i < 0)
-               return -1;
+               return -ENOMEM;
 
        h = hba[i];
        h->pdev = pdev;
@@ -5205,7 +5205,7 @@ clean_no_release_regions:
         */
        pci_set_drvdata(pdev, NULL);
        free_hba(h);
-       return -1;
+       return -ENODEV;
 }
 
 static void cciss_shutdown(struct pci_dev *pdev)
index 33fde3a3975954c793d0d2e9feb846e5e6a2bdc2..66e8c3b94ef35443f46bf67ea3065023da8b808d 100644 (file)
@@ -799,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo)
 
        /*
         * We use punch hole to reclaim the free space used by the
-        * image a.k.a. discard. However we do support discard if
+        * image a.k.a. discard. However we do not support discard if
         * encryption is enabled, because it may give an attacker
         * useful information.
         */
index 4a27b1de5fcb9fb0fef805fb51d0d2f1bc418a29..2ce3dfd7e6b9bafd65c670aa45bf3a8660578360 100644 (file)
@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count,
 
        if (hdr.magic != PG_MAGIC)
                return -EINVAL;
-       if (hdr.dlen > PG_MAX_DATA)
+       if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
                return -EINVAL;
        if ((count - hs) > PG_MAX_DATA)
                return -EINVAL;
index 3fb6ab4c8b4e9e96f9ba487cf801fdcd776f0c73..d5e2d12b9d9e329d77fb21560b8f21e5e98a11f4 100644 (file)
@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev)
        kfree(host);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
-       pci_set_drvdata(pdev, NULL);
 }
 
-static int __init carm_init(void)
-{
-       return pci_register_driver(&carm_driver);
-}
-
-static void __exit carm_exit(void)
-{
-       pci_unregister_driver(&carm_driver);
-}
-
-module_init(carm_init);
-module_exit(carm_exit);
-
-
+module_pci_driver(carm_driver);
index f895a8c8a244b54c02eb8b59b0eba8d42734a011..92c5937f80c33acac907dccb2377b6157721b33c 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/device.h>
 #include <linux/highmem.h>
 #include <linux/backing-dev.h>
-#include <linux/bootmem.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
 #include <linux/export.h>
index a6ef6acaa1c80b0e781b7f8b71310e7093e71648..41983883cef47da740612a9966d39ec97094d593 100644 (file)
@@ -108,6 +108,9 @@ config DMI_SYSFS
          under /sys/firmware/dmi when this option is enabled and
          loaded.
 
+config DMI_SCAN_MACHINE_NON_EFI_FALLBACK
+       bool
+
 config ISCSI_IBFT_FIND
        bool "iSCSI Boot Firmware Table Attributes"
        depends on X86 && ACPI
index c7e81ff8f3ef7079a7ccc3e4c04bc6ec59516015..17afc51f3054402c22a2a8d2fd50c1c809edd160 100644 (file)
@@ -116,7 +116,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 {
        u8 *buf;
 
-       buf = dmi_ioremap(dmi_base, dmi_len);
+       buf = dmi_early_remap(dmi_base, dmi_len);
        if (buf == NULL)
                return -1;
 
@@ -124,7 +124,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 
        add_device_randomness(buf, dmi_len);
 
-       dmi_iounmap(buf, dmi_len);
+       dmi_early_unmap(buf, dmi_len);
        return 0;
 }
 
@@ -527,18 +527,18 @@ void __init dmi_scan_machine(void)
                 * needed during early boot.  This also means we can
                 * iounmap the space when we're done with it.
                 */
-               p = dmi_ioremap(efi.smbios, 32);
+               p = dmi_early_remap(efi.smbios, 32);
                if (p == NULL)
                        goto error;
                memcpy_fromio(buf, p, 32);
-               dmi_iounmap(p, 32);
+               dmi_early_unmap(p, 32);
 
                if (!dmi_present(buf)) {
                        dmi_available = 1;
                        goto out;
                }
-       } else {
-               p = dmi_ioremap(0xF0000, 0x10000);
+       } else if (IS_ENABLED(CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK)) {
+               p = dmi_early_remap(0xF0000, 0x10000);
                if (p == NULL)
                        goto error;
 
@@ -554,12 +554,12 @@ void __init dmi_scan_machine(void)
                        memcpy_fromio(buf + 16, q, 16);
                        if (!dmi_present(buf)) {
                                dmi_available = 1;
-                               dmi_iounmap(p, 0x10000);
+                               dmi_early_unmap(p, 0x10000);
                                goto out;
                        }
                        memcpy(buf, buf + 16, 16);
                }
-               dmi_iounmap(p, 0x10000);
+               dmi_early_unmap(p, 0x10000);
        }
  error:
        pr_info("DMI not present or invalid.\n");
@@ -831,13 +831,13 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
        if (!dmi_available)
                return -1;
 
-       buf = ioremap(dmi_base, dmi_len);
+       buf = dmi_remap(dmi_base, dmi_len);
        if (buf == NULL)
                return -1;
 
        dmi_table(buf, dmi_len, dmi_num, decode, private_data);
 
-       iounmap(buf);
+       dmi_unmap(buf);
        return 0;
 }
 EXPORT_SYMBOL_GPL(dmi_walk);
index e2e04b007e155954a405a53f7117000d03ccfc1e..17cf96c45f2b07eaeaa31f7e7bdba63b3c98f7b7 100644 (file)
@@ -324,7 +324,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
        struct firmware_map_entry *entry;
 
-       entry = alloc_bootmem(sizeof(struct firmware_map_entry));
+       entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0);
        if (WARN_ON(!entry))
                return -ENOMEM;
 
index 58dd900083b9ec6141c8744040ac98ec084ca8ad..530f78f84deed250a2f96350254657b49a0166c9 100644 (file)
@@ -273,8 +273,8 @@ static int cirrus_crtc_mode_set(struct drm_crtc *crtc,
                sr07 |= 0x11;
                break;
        case 16:
-               sr07 |= 0xc1;
-               hdr = 0xc0;
+               sr07 |= 0x17;
+               hdr = 0xc1;
                break;
        case 24:
                sr07 |= 0x15;
index 98a03639b413d834d08c5c73c0ef08cec3d37c55..a0ba94e87d7c510baa48adf81602fac80d005e4a 100644 (file)
@@ -409,6 +409,14 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode)
        struct drm_connector *connector;
        int i, j;
 
+       /*
+        * fbdev->blank can be called from irq context in case of a panic.
+        * Since we already have our own special panic handler which will
+        * restore the fbdev console mode completely, just bail out early.
+        */
+       if (oops_in_progress)
+               return;
+
        /*
         * fbdev->blank can be called from irq context in case of a panic.
         * Since we already have our own special panic handler which will
index 143eba3309c5fc7685a48a9ec56a875580f4f038..ea7dfc59d796605d89d6929de7ecdcffa70711c7 100644 (file)
 #include "intel_bios.h"
 #include "power.h"
 
+#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
 static void do_gma_backlight_set(struct drm_device *dev)
 {
-#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
        struct drm_psb_private *dev_priv = dev->dev_private;
        backlight_update_status(dev_priv->backlight_device);
-#endif 
 }
+#endif
 
 void gma_backlight_enable(struct drm_device *dev)
 {
index 3c149617cfcbaabb49a0253fd405fe52930bc0af..4ef83df2b246fb335dc8ce05bafead80ed7ac180 100644 (file)
@@ -61,6 +61,7 @@ bool nouveau_is_v1_dsm(void) {
 #define NOUVEAU_DSM_HAS_MUX 0x1
 #define NOUVEAU_DSM_HAS_OPT 0x2
 
+#ifdef CONFIG_VGA_SWITCHEROO
 static const char nouveau_dsm_muid[] = {
        0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
        0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
@@ -326,6 +327,11 @@ void nouveau_unregister_dsm_handler(void)
        if (nouveau_dsm_priv.optimus_detected || nouveau_dsm_priv.dsm_detected)
                vga_switcheroo_unregister_handler();
 }
+#else
+void nouveau_register_dsm_handler(void) {}
+void nouveau_unregister_dsm_handler(void) {}
+void nouveau_switcheroo_optimus_dsm(void) {}
+#endif
 
 /* retrieve the ROM in 4k blocks */
 static int nouveau_rom_call(acpi_handle rom_handle, uint8_t *bios,
index a11ff74a5127019cb6f367d07ffae32429ccf37e..9eac8de9e8b71f6d6a2afbc41b87859477fe0698 100644 (file)
@@ -178,6 +178,15 @@ comment "Input Device Drivers"
 
 source "drivers/input/keyboard/Kconfig"
 
+config INPUT_LEDS
+       bool "LED Support"
+       depends on LEDS_CLASS = INPUT || LEDS_CLASS = y
+       select LEDS_TRIGGERS
+       default y
+       help
+         This option enables support for LEDs on keyboards managed
+         by the input layer.
+
 source "drivers/input/mouse/Kconfig"
 
 source "drivers/input/joystick/Kconfig"
index 5ca3f631497f4d8295cf42a1d2bf54ed9c4f9e16..2ab5f3336da5c9e603601c632d5bd6b182405161 100644 (file)
@@ -6,6 +6,9 @@
 
 obj-$(CONFIG_INPUT)            += input-core.o
 input-core-y := input.o input-compat.o input-mt.o ff-core.o
+ifeq ($(CONFIG_INPUT_LEDS),y)
+input-core-y += leds.o
+endif
 
 obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
 obj-$(CONFIG_INPUT_POLLDEV)    += input-polldev.o
index 1c4c0db055509cc45df2e1865a5c0b6d86e25fc9..3b9284b18e701a5c26b76e0991f6422ac41ed508 100644 (file)
@@ -708,6 +708,9 @@ static void input_disconnect_device(struct input_dev *dev)
                handle->open = 0;
 
        spin_unlock_irq(&dev->event_lock);
+
+       if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+               input_led_disconnect(dev);
 }
 
 /**
@@ -2134,6 +2137,9 @@ int input_register_device(struct input_dev *dev)
 
        list_add_tail(&dev->node, &input_dev_list);
 
+       if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+               input_led_connect(dev);
+
        list_for_each_entry(handler, &input_handler_list, node)
                input_attach_handler(dev, handler);
 
diff --git a/drivers/input/leds.c b/drivers/input/leds.c
new file mode 100644 (file)
index 0000000..1d8a980
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ * LED support for the input layer
+ *
+ * Copyright 2010-2013 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+
+/*
+ * Keyboard LEDs are propagated by default like the following example:
+ *
+ * VT keyboard numlock trigger
+ * -> vt::numl VT LED
+ * -> vt-numl VT trigger
+ * -> per-device inputX::numl LED
+ *
+ * Userland can however choose the trigger for the vt::numl LED, or
+ * independently choose the trigger for any inputx::numl LED.
+ *
+ *
+ * VT LED classes and triggers are registered on-demand according to
+ * existing LED devices
+ */
+
+/* Handler for VT LEDs, just triggers the corresponding VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+                         enum led_brightness brightness);
+static struct led_classdev vt_leds[LED_CNT] = {
+#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \
+       [vt_led] = { \
+               .name = "vt::"nam, \
+               .max_brightness = 1, \
+               .brightness_set = vt_led_set, \
+               .default_trigger = deftrig, \
+       }
+/* Default triggers for the VT LEDs just correspond to the legacy
+ * usage. */
+       DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"),
+       DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"),
+       DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"),
+       DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL),
+       DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"),
+       DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL),
+       DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL),
+       DEFINE_INPUT_LED(LED_MUTE, "mute", NULL),
+       DEFINE_INPUT_LED(LED_MISC, "misc", NULL),
+       DEFINE_INPUT_LED(LED_MAIL, "mail", NULL),
+       DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL),
+};
+static const char *const vt_led_names[LED_CNT] = {
+       [LED_NUML] = "numl",
+       [LED_CAPSL] = "capsl",
+       [LED_SCROLLL] = "scrolll",
+       [LED_COMPOSE] = "compose",
+       [LED_KANA] = "kana",
+       [LED_SLEEP] = "sleep",
+       [LED_SUSPEND] = "suspend",
+       [LED_MUTE] = "mute",
+       [LED_MISC] = "misc",
+       [LED_MAIL] = "mail",
+       [LED_CHARGING] = "charging",
+};
+/* Handler for hotplug initialization */
+static void vt_led_trigger_activate(struct led_classdev *cdev);
+/* VT triggers */
+static struct led_trigger vt_led_triggers[LED_CNT] = {
+#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \
+       [vt_led] = { \
+               .name = "vt-"nam, \
+               .activate = vt_led_trigger_activate, \
+       }
+       DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"),
+       DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"),
+       DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"),
+       DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"),
+       DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"),
+       DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"),
+       DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"),
+       DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"),
+       DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"),
+       DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"),
+       DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"),
+};
+
+/* Lock for registration coherency */
+static DEFINE_MUTEX(vt_led_registered_lock);
+
+/* Which VT LED classes and triggers are registered */
+static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)];
+
+/* Number of input devices having each LED */
+static int vt_led_references[LED_CNT];
+
+/* VT LED state change, tell the VT trigger.  */
+static void vt_led_set(struct led_classdev *cdev,
+                         enum led_brightness brightness)
+{
+       int led = cdev - vt_leds;
+
+       led_trigger_event(&vt_led_triggers[led], !!brightness);
+}
+
+/* LED state change for some keyboard, notify that keyboard.  */
+static void perdevice_input_led_set(struct led_classdev *cdev,
+                         enum led_brightness brightness)
+{
+       struct input_dev *dev;
+       struct led_classdev *leds;
+       int led;
+
+       dev = cdev->dev->platform_data;
+       if (!dev)
+               /* Still initializing */
+               return;
+       leds = dev->leds;
+       led = cdev - leds;
+
+       input_event(dev, EV_LED, led, !!brightness);
+       input_event(dev, EV_SYN, SYN_REPORT, 0);
+}
+
+/* Keyboard hotplug, initialize its LED status */
+static void vt_led_trigger_activate(struct led_classdev *cdev)
+{
+       struct led_trigger *trigger = cdev->trigger;
+       int led = trigger - vt_led_triggers;
+
+       if (cdev->brightness_set)
+               cdev->brightness_set(cdev, vt_leds[led].brightness);
+}
+
+/* Free led stuff from input device, used at abortion and disconnection.  */
+static void input_led_delete(struct input_dev *dev)
+{
+       if (dev) {
+               struct led_classdev *leds = dev->leds;
+               if (leds) {
+                       int i;
+                       for (i = 0; i < LED_CNT; i++)
+                               kfree(leds[i].name);
+                       kfree(leds);
+                       dev->leds = NULL;
+               }
+       }
+}
+
+/* A new input device with potential LEDs to connect.  */
+int input_led_connect(struct input_dev *dev)
+{
+       int i, error = 0;
+       struct led_classdev *leds;
+
+       dev->leds = leds = kzalloc(sizeof(*leds) * LED_CNT, GFP_KERNEL);
+       if (!dev->leds)
+               return -ENOMEM;
+
+       /* lazily register missing VT LEDs */
+       mutex_lock(&vt_led_registered_lock);
+       for (i = 0; i < LED_CNT; i++)
+               if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+                       if (!vt_led_references[i]) {
+                               led_trigger_register(&vt_led_triggers[i]);
+                               /* This keyboard is first to have led i,
+                                * try to register it */
+                               if (!led_classdev_register(NULL, &vt_leds[i]))
+                                       vt_led_references[i] = 1;
+                               else
+                                       led_trigger_unregister(&vt_led_triggers[i]);
+                       } else
+                               vt_led_references[i]++;
+               }
+       mutex_unlock(&vt_led_registered_lock);
+
+       /* and register this device's LEDs */
+       for (i = 0; i < LED_CNT; i++)
+               if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+                       leds[i].name = kasprintf(GFP_KERNEL, "%s::%s",
+                                               dev_name(&dev->dev),
+                                               vt_led_names[i]);
+                       if (!leds[i].name) {
+                               error = -ENOMEM;
+                               goto err;
+                       }
+                       leds[i].max_brightness = 1;
+                       leds[i].brightness_set = perdevice_input_led_set;
+                       leds[i].default_trigger = vt_led_triggers[i].name;
+               }
+
+       /* No issue so far, we can register for real.  */
+       for (i = 0; i < LED_CNT; i++)
+               if (leds[i].name) {
+                       led_classdev_register(&dev->dev, &leds[i]);
+                       leds[i].dev->platform_data = dev;
+                       perdevice_input_led_set(&leds[i],
+                                       vt_leds[i].brightness);
+               }
+
+       return 0;
+
+err:
+       input_led_delete(dev);
+       return error;
+}
+
+/*
+ * Disconnected input device. Clean it, and deregister now-useless VT LEDs and
+ * triggers.
+ */
+void input_led_disconnect(struct input_dev *dev)
+{
+       int i;
+       struct led_classdev *leds = dev->leds;
+
+       for (i = 0; i < LED_CNT; i++)
+               if (leds[i].name)
+                       led_classdev_unregister(&leds[i]);
+
+       input_led_delete(dev);
+
+       mutex_lock(&vt_led_registered_lock);
+       for (i = 0; i < LED_CNT; i++) {
+               if (!vt_leds[i].name || !test_bit(i, dev->ledbit))
+                       continue;
+
+               vt_led_references[i]--;
+               if (vt_led_references[i]) {
+                       /* Still some devices needing it */
+                       continue;
+               }
+
+               led_classdev_unregister(&vt_leds[i]);
+               led_trigger_unregister(&vt_led_triggers[i]);
+               clear_bit(i, vt_led_registered);
+       }
+       mutex_unlock(&vt_led_registered_lock);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("User LED support for input layer");
+MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>");
index 5ac7efc70ca904e6307cd63e2f729fc10afe9974..a22c86c867faee78544b99b8528e1f4b9fc183c8 100644 (file)
@@ -889,7 +889,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
 
                /* If range covers entire pagetable, free it */
                if (!(start_pfn > level_pfn ||
-                     last_pfn < level_pfn + level_size(level))) {
+                     last_pfn < level_pfn + level_size(level) - 1)) {
                        dma_clear_pte(pte);
                        domain_flush_cache(domain, pte, sizeof(*pte));
                        free_pgtable_page(level_pte);
index 72156c123033342f566497155154d31eeef79baf..6e3ca5940e98e3a2efb787348c8ab875d5bacd3c 100644 (file)
@@ -11,9 +11,6 @@ menuconfig NEW_LEDS
          Say Y to enable Linux LED support.  This allows control of supported
          LEDs from both userspace and optionally, by kernel events (triggers).
 
-         This is not related to standard keyboard LEDs which are controlled
-         via the input system.
-
 if NEW_LEDS
 
 config LEDS_CLASS
index 6cd38fc685994b025c74689e324481bcf7b2b6b1..86d7518cd13b2c07b27b6931042e1d3c7b10529d 100644 (file)
@@ -52,7 +52,7 @@ struct omap_mbox_queue {
 
 struct omap_mbox {
        const char              *name;
-       unsigned int            irq;
+       int                     irq;
        struct omap_mbox_queue  *txq, *rxq;
        struct omap_mbox_ops    *ops;
        struct device           *dev;
index 25f8f93decb6e0c97267ed00609b35f881634519..2a635b6fdaf7274ac64b901376cbb1a496d21a4a 100644 (file)
@@ -145,6 +145,8 @@ static int ms_transfer_data(struct realtek_pci_ms *host, unsigned char data_dir,
        unsigned int length = sg->length;
        u16 sec_cnt = (u16)(length / 512);
        u8 val, trans_mode, dma_dir;
+       struct memstick_dev *card = host->msh->card;
+       bool pro_card = card->id.type == MEMSTICK_TYPE_PRO;
 
        dev_dbg(ms_dev(host), "%s: tpc = 0x%02x, data_dir = %s, length = %d\n",
                        __func__, tpc, (data_dir == READ) ? "READ" : "WRITE",
@@ -152,19 +154,21 @@ static int ms_transfer_data(struct realtek_pci_ms *host, unsigned char data_dir,
 
        if (data_dir == READ) {
                dma_dir = DMA_DIR_FROM_CARD;
-               trans_mode = MS_TM_AUTO_READ;
+               trans_mode = pro_card ? MS_TM_AUTO_READ : MS_TM_NORMAL_READ;
        } else {
                dma_dir = DMA_DIR_TO_CARD;
-               trans_mode = MS_TM_AUTO_WRITE;
+               trans_mode = pro_card ? MS_TM_AUTO_WRITE : MS_TM_NORMAL_WRITE;
        }
 
        rtsx_pci_init_cmd(pcr);
 
        rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_TPC, 0xFF, tpc);
-       rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_H,
-                       0xFF, (u8)(sec_cnt >> 8));
-       rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_L,
-                       0xFF, (u8)sec_cnt);
+       if (pro_card) {
+               rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_H,
+                               0xFF, (u8)(sec_cnt >> 8));
+               rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_L,
+                               0xFF, (u8)sec_cnt);
+       }
        rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_TRANS_CFG, 0xFF, cfg);
 
        rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
@@ -192,8 +196,14 @@ static int ms_transfer_data(struct realtek_pci_ms *host, unsigned char data_dir,
        }
 
        rtsx_pci_read_register(pcr, MS_TRANS_CFG, &val);
-       if (val & (MS_INT_CMDNK | MS_INT_ERR | MS_CRC16_ERR | MS_RDY_TIMEOUT))
-               return -EIO;
+       if (pro_card) {
+               if (val & (MS_INT_CMDNK | MS_INT_ERR |
+                               MS_CRC16_ERR | MS_RDY_TIMEOUT))
+                       return -EIO;
+       } else {
+               if (val & (MS_CRC16_ERR | MS_RDY_TIMEOUT))
+                       return -EIO;
+       }
 
        return 0;
 }
@@ -462,8 +472,8 @@ static int rtsx_pci_ms_set_param(struct memstick_host *msh,
                        clock = 19000000;
                        ssc_depth = RTSX_SSC_DEPTH_500K;
 
-                       err = rtsx_pci_write_register(pcr, MS_CFG,
-                                       0x18, MS_BUS_WIDTH_1);
+                       err = rtsx_pci_write_register(pcr, MS_CFG, 0x58,
+                                       MS_BUS_WIDTH_1 | PUSH_TIME_DEFAULT);
                        if (err < 0)
                                return err;
                } else if (value == MEMSTICK_PAR4) {
index f47eaa70eae076f172f4e72977f98dfe26e680da..612ca404e1502d6c736eaf239d9b721e522664f9 100644 (file)
@@ -175,7 +175,7 @@ static inline int max8998_i2c_get_driver_data(struct i2c_client *i2c,
        if (IS_ENABLED(CONFIG_OF) && i2c->dev.of_node) {
                const struct of_device_id *match;
                match = of_match_node(max8998_dt_match, i2c->dev.of_node);
-               return (int)match->data;
+               return (int)(long)match->data;
        }
 
        return (int)id->driver_data;
index 6939ae56c2e168d390a504af22767198b222b9f1..966cf65c5c363f63d907048fe4bb31b3d30bbbac 100644 (file)
@@ -170,7 +170,7 @@ static int tps65217_probe(struct i2c_client *client,
                                "Failed to find matching dt id\n");
                        return -EINVAL;
                }
-               chip_id = (unsigned int)match->data;
+               chip_id = (unsigned int)(unsigned long)match->data;
                status_off = of_property_read_bool(client->dev.of_node,
                                        "ti,pmic-shutdown-controller");
        }
index 3aed525e55b48bee7303715c55dea5f162f23752..78a998bfcd45870a3164fac0e45cf2d5540fb594 100644 (file)
@@ -343,7 +343,7 @@ void st_int_recv(void *disc_data,
                        /* Unknow packet? */
                default:
                        type = *ptr;
-                       if (st_gdata->list[type] == NULL) {
+                       if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) {
                                pr_err("chip/interface misbehavior dropping"
                                        " frame starting with 0x%02x", type);
                                goto done;
index 768dfe9a93159e4964d4b9c0bc6986da2e6c09b6..6d3e2093bf7f5575ce4360af9e0bdc7763bd11b1 100644 (file)
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
        .resume         = toshoboe_wakeup 
 };
 
-static int __init
-donauboe_init (void)
-{
-  return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
-  pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
index 2f07cd615665248b55164525b0c24087ce7411c9..983f50c4b7b462be3a3567a2068b272488179f4a 100644 (file)
@@ -152,35 +152,38 @@ static long pps_cdev_ioctl(struct file *file,
                if (err)
                        return -EFAULT;
 
-               ev = pps->last_ev;
-
-               /* Manage the timeout */
-               if (fdata.timeout.flags & PPS_TIME_INVALID)
-                       err = wait_event_interruptible(pps->queue,
-                                       ev != pps->last_ev);
-               else {
-                       unsigned long ticks;
-
-                       dev_dbg(pps->dev, "timeout %lld.%09d\n",
-                                       (long long) fdata.timeout.sec,
-                                       fdata.timeout.nsec);
-                       ticks = fdata.timeout.sec * HZ;
-                       ticks += fdata.timeout.nsec / (NSEC_PER_SEC / HZ);
-
-                       if (ticks != 0) {
-                               err = wait_event_interruptible_timeout(
-                                               pps->queue,
-                                               ev != pps->last_ev,
-                                               ticks);
-                               if (err == 0)
-                                       return -ETIMEDOUT;
+               if (!(file->f_flags & O_NONBLOCK)) {
+                       ev = pps->last_ev;
+
+                       /* Manage the timeout */
+                       if (fdata.timeout.flags & PPS_TIME_INVALID)
+                               err = wait_event_interruptible(pps->queue,
+                                               ev != pps->last_ev);
+                       else {
+                               unsigned long ticks;
+
+                               dev_dbg(pps->dev, "timeout %lld.%09d\n",
+                                               (long long) fdata.timeout.sec,
+                                               fdata.timeout.nsec);
+                               ticks = fdata.timeout.sec * HZ;
+                               ticks += fdata.timeout.nsec /
+                                       (NSEC_PER_SEC / HZ);
+
+                               if (ticks != 0) {
+                                       err = wait_event_interruptible_timeout(
+                                                       pps->queue,
+                                                       ev != pps->last_ev,
+                                                       ticks);
+                                       if (err == 0)
+                                               return -ETIMEDOUT;
+                               }
                        }
-               }
 
-               /* Check for pending signals */
-               if (err == -ERESTARTSYS) {
-                       dev_dbg(pps->dev, "pending signal caught\n");
-                       return -EINTR;
+                       /* Check for pending signals */
+                       if (err == -ERESTARTSYS) {
+                               dev_dbg(pps->dev, "pending signal caught\n");
+                               return -EINTR;
+                       }
                }
 
                /* Return the fetched timestamp */
index b1328a45b095656078f8aeb642aa3ba0f6a5f697..db933decc39ce39ec26d9df0da4212d61fe3635d 100644 (file)
@@ -212,6 +212,17 @@ config RTC_DRV_DS3232
          This driver can also be built as a module.  If so, the module
          will be called rtc-ds3232.
 
+config RTC_DRV_HYM8563
+       tristate "Haoyu Microelectronics HYM8563"
+       depends on I2C && OF
+       help
+         Say Y to enable support for the HYM8563 I2C RTC chip. Apart
+         from the usual rtc functions it provides a clock output of
+         up to 32kHz.
+
+         This driver can also be built as a module. If so, the module
+         will be called rtc-hym8563.
+
 config RTC_DRV_LP8788
        tristate "TI LP8788 RTC driver"
        depends on MFD_LP8788
@@ -637,7 +648,7 @@ comment "Platform RTC drivers"
 
 config RTC_DRV_CMOS
        tristate "PC-style 'CMOS'"
-       depends on X86 || ARM || M32R || ATARI || PPC || MIPS || SPARC64
+       depends on X86 || ARM || M32R || PPC || MIPS || SPARC64
        default y if X86
        help
          Say "yes" here to get direct support for the real time clock
index c00741a0bf1016f281fc2191e7e3d90604189799..b427bf7dd20d9330b83c14bbc47caa7f4b5e741f 100644 (file)
@@ -55,6 +55,7 @@ obj-$(CONFIG_RTC_DRV_EP93XX)  += rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_FM3130)   += rtc-fm3130.o
 obj-$(CONFIG_RTC_DRV_GENERIC)  += rtc-generic.o
 obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
+obj-$(CONFIG_RTC_DRV_HYM8563)  += rtc-hym8563.o
 obj-$(CONFIG_RTC_DRV_IMXDI)    += rtc-imxdi.o
 obj-$(CONFIG_RTC_DRV_ISL1208)  += rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_ISL12022) += rtc-isl12022.o
index 02426812bebc5c5cc74cba4db4395e2c67d78f25..589351ef75d03cd3c226389912e27316216df677 100644 (file)
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 #include <linux/kdev_t.h>
 #include <linux/idr.h>
@@ -157,12 +158,27 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 {
        struct rtc_device *rtc;
        struct rtc_wkalrm alrm;
-       int id, err;
+       int of_id = -1, id = -1, err;
+
+       if (dev->of_node)
+               of_id = of_alias_get_id(dev->of_node, "rtc");
+       else if (dev->parent && dev->parent->of_node)
+               of_id = of_alias_get_id(dev->parent->of_node, "rtc");
+
+       if (of_id >= 0) {
+               id = ida_simple_get(&rtc_ida, of_id, of_id + 1,
+                                   GFP_KERNEL);
+               if (id < 0)
+                       dev_warn(dev, "/aliases ID %d not available\n",
+                                   of_id);
+       }
 
-       id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
        if (id < 0) {
-               err = id;
-               goto exit;
+               id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
+               if (id < 0) {
+                       err = id;
+                       goto exit;
+               }
        }
 
        rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
index 9cfa8170a2d6390aea1511170f26a00ad1c9790f..4af016985890e6d70a7f57a9b7c605cde1d9723d 100644 (file)
@@ -198,7 +198,7 @@ static int as3722_rtc_probe(struct platform_device *pdev)
 
        device_init_wakeup(&pdev->dev, 1);
 
-       as3722_rtc->rtc = rtc_device_register("as3722", &pdev->dev,
+       as3722_rtc->rtc = devm_rtc_device_register(&pdev->dev, "as3722-rtc",
                                &as3722_rtc_ops, THIS_MODULE);
        if (IS_ERR(as3722_rtc->rtc)) {
                ret = PTR_ERR(as3722_rtc->rtc);
@@ -209,28 +209,16 @@ static int as3722_rtc_probe(struct platform_device *pdev)
        as3722_rtc->alarm_irq = platform_get_irq(pdev, 0);
        dev_info(&pdev->dev, "RTC interrupt %d\n", as3722_rtc->alarm_irq);
 
-       ret = request_threaded_irq(as3722_rtc->alarm_irq, NULL,
+       ret = devm_request_threaded_irq(&pdev->dev, as3722_rtc->alarm_irq, NULL,
                        as3722_alarm_irq, IRQF_ONESHOT | IRQF_EARLY_RESUME,
                        "rtc-alarm", as3722_rtc);
        if (ret < 0) {
                dev_err(&pdev->dev, "Failed to request alarm IRQ %d: %d\n",
                                as3722_rtc->alarm_irq, ret);
-               goto scrub;
+               return ret;
        }
        disable_irq(as3722_rtc->alarm_irq);
        return 0;
-scrub:
-       rtc_device_unregister(as3722_rtc->rtc);
-       return ret;
-}
-
-static int as3722_rtc_remove(struct platform_device *pdev)
-{
-       struct as3722_rtc *as3722_rtc = platform_get_drvdata(pdev);
-
-       free_irq(as3722_rtc->alarm_irq, as3722_rtc);
-       rtc_device_unregister(as3722_rtc->rtc);
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -260,7 +248,6 @@ static const struct dev_pm_ops as3722_rtc_pm_ops = {
 
 static struct platform_driver as3722_rtc_driver = {
        .probe = as3722_rtc_probe,
-       .remove = as3722_rtc_remove,
        .driver = {
                .name = "as3722-rtc",
                .pm = &as3722_rtc_pm_ops,
index a2325bc5e497e7dbee4c72ac805da1c324ae4d27..cae212f30d6512f481b9dba7070f25aa00ce2c3d 100644 (file)
@@ -756,11 +756,9 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                irq_handler_t rtc_cmos_int_handler;
 
                if (is_hpet_enabled()) {
-                       int err;
-
                        rtc_cmos_int_handler = hpet_rtc_interrupt;
-                       err = hpet_register_irq_handler(cmos_interrupt);
-                       if (err != 0) {
+                       retval = hpet_register_irq_handler(cmos_interrupt);
+                       if (retval) {
                                dev_warn(dev, "hpet_register_irq_handler "
                                                " failed in rtc_init().");
                                goto cleanup1;
@@ -1175,7 +1173,7 @@ static struct platform_driver cmos_platform_driver = {
        .remove         = __exit_p(cmos_platform_remove),
        .shutdown       = cmos_platform_shutdown,
        .driver = {
-               .name           = (char *) driver_name,
+               .name           = driver_name,
 #ifdef CONFIG_PM
                .pm             = &cmos_pm_ops,
 #endif
index 80f323731ee2cdac987c963692d49be61841bbb1..2dd586a19b594a73e9c6f7485d1e5da9203a4467 100644 (file)
@@ -787,7 +787,6 @@ static int ds1305_remove(struct spi_device *spi)
                cancel_work_sync(&ds1305->work);
        }
 
-       spi_set_drvdata(spi, NULL);
        return 0;
 }
 
index 17b73fdc3b6e60036c8ba83ad35df7fbf716cb40..5a1f3b2a8f1e99591095d6663d285c21dda7df2d 100644 (file)
  */
 
 #include <linux/bcd.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/delay.h>
 #include <linux/jiffies.h>
 #include <linux/rtc.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -215,12 +216,19 @@ static int ds1742_rtc_remove(struct platform_device *pdev)
        return 0;
 }
 
+static struct of_device_id __maybe_unused ds1742_rtc_of_match[] = {
+       { .compatible = "maxim,ds1742", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, ds1742_rtc_of_match);
+
 static struct platform_driver ds1742_rtc_driver = {
        .probe          = ds1742_rtc_probe,
        .remove         = ds1742_rtc_remove,
        .driver         = {
                .name   = "rtc-ds1742",
                .owner  = THIS_MODULE,
+               .of_match_table = ds1742_rtc_of_match,
        },
 };
 
diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c
new file mode 100644 (file)
index 0000000..bd628a6
--- /dev/null
@@ -0,0 +1,606 @@
+/*
+ * Haoyu HYM8563 RTC driver
+ *
+ * Copyright (C) 2013 MundoReader S.L.
+ * Author: Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on rtc-HYM8563
+ * Copyright (C) 2010 ROCKCHIP, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+
+#define HYM8563_CTL1           0x00
+#define HYM8563_CTL1_TEST      BIT(7)
+#define HYM8563_CTL1_STOP      BIT(5)
+#define HYM8563_CTL1_TESTC     BIT(3)
+
+#define HYM8563_CTL2           0x01
+#define HYM8563_CTL2_TI_TP     BIT(4)
+#define HYM8563_CTL2_AF                BIT(3)
+#define HYM8563_CTL2_TF                BIT(2)
+#define HYM8563_CTL2_AIE       BIT(1)
+#define HYM8563_CTL2_TIE       BIT(0)
+
+#define HYM8563_SEC            0x02
+#define HYM8563_SEC_VL         BIT(7)
+#define HYM8563_SEC_MASK       0x7f
+
+#define HYM8563_MIN            0x03
+#define HYM8563_MIN_MASK       0x7f
+
+#define HYM8563_HOUR           0x04
+#define HYM8563_HOUR_MASK      0x3f
+
+#define HYM8563_DAY            0x05
+#define HYM8563_DAY_MASK       0x3f
+
+#define HYM8563_WEEKDAY                0x06
+#define HYM8563_WEEKDAY_MASK   0x07
+
+#define HYM8563_MONTH          0x07
+#define HYM8563_MONTH_CENTURY  BIT(7)
+#define HYM8563_MONTH_MASK     0x1f
+
+#define HYM8563_YEAR           0x08
+
+#define HYM8563_ALM_MIN                0x09
+#define HYM8563_ALM_HOUR       0x0a
+#define HYM8563_ALM_DAY                0x0b
+#define HYM8563_ALM_WEEK       0x0c
+
+/* Each alarm check can be disabled by setting this bit in the register */
+#define HYM8563_ALM_BIT_DISABLE        BIT(7)
+
+#define HYM8563_CLKOUT         0x0d
+#define HYM8563_CLKOUT_DISABLE BIT(7)
+#define HYM8563_CLKOUT_32768   0
+#define HYM8563_CLKOUT_1024    1
+#define HYM8563_CLKOUT_32      2
+#define HYM8563_CLKOUT_1       3
+#define HYM8563_CLKOUT_MASK    3
+
+#define HYM8563_TMR_CTL                0x0e
+#define HYM8563_TMR_CTL_ENABLE BIT(7)
+#define HYM8563_TMR_CTL_4096   0
+#define HYM8563_TMR_CTL_64     1
+#define HYM8563_TMR_CTL_1      2
+#define HYM8563_TMR_CTL_1_60   3
+#define HYM8563_TMR_CTL_MASK   3
+
+#define HYM8563_TMR_CNT                0x0f
+
+struct hym8563 {
+       struct i2c_client       *client;
+       struct rtc_device       *rtc;
+       bool                    valid;
+#ifdef CONFIG_COMMON_CLK
+       struct clk_hw           clkout_hw;
+#endif
+};
+
+/*
+ * RTC handling
+ */
+
+static int hym8563_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct hym8563 *hym8563 = i2c_get_clientdata(client);
+       u8 buf[7];
+       int ret;
+
+       if (!hym8563->valid) {
+               dev_warn(&client->dev, "no valid clock/calendar values available\n");
+               return -EPERM;
+       }
+
+       ret = i2c_smbus_read_i2c_block_data(client, HYM8563_SEC, 7, buf);
+
+       tm->tm_sec = bcd2bin(buf[0] & HYM8563_SEC_MASK);
+       tm->tm_min = bcd2bin(buf[1] & HYM8563_MIN_MASK);
+       tm->tm_hour = bcd2bin(buf[2] & HYM8563_HOUR_MASK);
+       tm->tm_mday = bcd2bin(buf[3] & HYM8563_DAY_MASK);
+       tm->tm_wday = bcd2bin(buf[4] & HYM8563_WEEKDAY_MASK); /* 0 = Sun */
+       tm->tm_mon = bcd2bin(buf[5] & HYM8563_MONTH_MASK) - 1; /* 0 = Jan */
+       tm->tm_year = bcd2bin(buf[6]) + 100;
+
+       return 0;
+}
+
+static int hym8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct hym8563 *hym8563 = i2c_get_clientdata(client);
+       u8 buf[7];
+       int ret;
+
+       /* Years >= 2100 are to far in the future, 19XX is to early */
+       if (tm->tm_year < 100 || tm->tm_year >= 200)
+               return -EINVAL;
+
+       buf[0] = bin2bcd(tm->tm_sec);
+       buf[1] = bin2bcd(tm->tm_min);
+       buf[2] = bin2bcd(tm->tm_hour);
+       buf[3] = bin2bcd(tm->tm_mday);
+       buf[4] = bin2bcd(tm->tm_wday);
+       buf[5] = bin2bcd(tm->tm_mon + 1);
+
+       /*
+        * While the HYM8563 has a century flag in the month register,
+        * it does not seem to carry it over a subsequent write/read.
+        * So we'll limit ourself to 100 years, starting at 2000 for now.
+        */
+       buf[6] = tm->tm_year - 100;
+
+       /*
+        * CTL1 only contains TEST-mode bits apart from stop,
+        * so no need to read the value first
+        */
+       ret = i2c_smbus_write_byte_data(client, HYM8563_CTL1,
+                                               HYM8563_CTL1_STOP);
+       if (ret < 0)
+               return ret;
+
+       ret = i2c_smbus_write_i2c_block_data(client, HYM8563_SEC, 7, buf);
+       if (ret < 0)
+               return ret;
+
+       ret = i2c_smbus_write_byte_data(client, HYM8563_CTL1, 0);
+       if (ret < 0)
+               return ret;
+
+       hym8563->valid = true;
+
+       return 0;
+}
+
+static int hym8563_rtc_alarm_irq_enable(struct device *dev,
+                                       unsigned int enabled)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       int data;
+
+       data = i2c_smbus_read_byte_data(client, HYM8563_CTL2);
+       if (data < 0)
+               return data;
+
+       if (enabled)
+               data |= HYM8563_CTL2_AIE;
+       else
+               data &= ~HYM8563_CTL2_AIE;
+
+       return i2c_smbus_write_byte_data(client, HYM8563_CTL2, data);
+};
+
+static int hym8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct rtc_time *alm_tm = &alm->time;
+       u8 buf[4];
+       int ret;
+
+       ret = i2c_smbus_read_i2c_block_data(client, HYM8563_ALM_MIN, 4, buf);
+       if (ret < 0)
+               return ret;
+
+       /* The alarm only has a minute accuracy */
+       alm_tm->tm_sec = -1;
+
+       alm_tm->tm_min = (buf[0] & HYM8563_ALM_BIT_DISABLE) ?
+                                       -1 :
+                                       bcd2bin(buf[0] & HYM8563_MIN_MASK);
+       alm_tm->tm_hour = (buf[1] & HYM8563_ALM_BIT_DISABLE) ?
+                                       -1 :
+                                       bcd2bin(buf[1] & HYM8563_HOUR_MASK);
+       alm_tm->tm_mday = (buf[2] & HYM8563_ALM_BIT_DISABLE) ?
+                                       -1 :
+                                       bcd2bin(buf[2] & HYM8563_DAY_MASK);
+       alm_tm->tm_wday = (buf[3] & HYM8563_ALM_BIT_DISABLE) ?
+                                       -1 :
+                                       bcd2bin(buf[3] & HYM8563_WEEKDAY_MASK);
+
+       alm_tm->tm_mon = -1;
+       alm_tm->tm_year = -1;
+
+       ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2);
+       if (ret < 0)
+               return ret;
+
+       if (ret & HYM8563_CTL2_AIE)
+               alm->enabled = 1;
+
+       return 0;
+}
+
+static int hym8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct rtc_time *alm_tm = &alm->time;
+       u8 buf[4];
+       int ret;
+
+       /*
+        * The alarm has no seconds so deal with it
+        */
+       if (alm_tm->tm_sec) {
+               alm_tm->tm_sec = 0;
+               alm_tm->tm_min++;
+               if (alm_tm->tm_min >= 60) {
+                       alm_tm->tm_min = 0;
+                       alm_tm->tm_hour++;
+                       if (alm_tm->tm_hour >= 24) {
+                               alm_tm->tm_hour = 0;
+                               alm_tm->tm_mday++;
+                               if (alm_tm->tm_mday > 31)
+                                       alm_tm->tm_mday = 0;
+                       }
+               }
+       }
+
+       ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2);
+       if (ret < 0)
+               return ret;
+
+       ret &= ~HYM8563_CTL2_AIE;
+
+       ret = i2c_smbus_write_byte_data(client, HYM8563_CTL2, ret);
+       if (ret < 0)
+               return ret;
+
+       buf[0] = (alm_tm->tm_min < 60 && alm_tm->tm_min >= 0) ?
+                       bin2bcd(alm_tm->tm_min) : HYM8563_ALM_BIT_DISABLE;
+
+       buf[1] = (alm_tm->tm_hour < 24 && alm_tm->tm_hour >= 0) ?
+                       bin2bcd(alm_tm->tm_hour) : HYM8563_ALM_BIT_DISABLE;
+
+       buf[2] = (alm_tm->tm_mday <= 31 && alm_tm->tm_mday >= 1) ?
+                       bin2bcd(alm_tm->tm_mday) : HYM8563_ALM_BIT_DISABLE;
+
+       buf[3] = (alm_tm->tm_wday < 7 && alm_tm->tm_wday >= 0) ?
+                       bin2bcd(alm_tm->tm_wday) : HYM8563_ALM_BIT_DISABLE;
+
+       ret = i2c_smbus_write_i2c_block_data(client, HYM8563_ALM_MIN, 4, buf);
+       if (ret < 0)
+               return ret;
+
+       return hym8563_rtc_alarm_irq_enable(dev, alm->enabled);
+}
+
+static const struct rtc_class_ops hym8563_rtc_ops = {
+       .read_time              = hym8563_rtc_read_time,
+       .set_time               = hym8563_rtc_set_time,
+       .alarm_irq_enable       = hym8563_rtc_alarm_irq_enable,
+       .read_alarm             = hym8563_rtc_read_alarm,
+       .set_alarm              = hym8563_rtc_set_alarm,
+};
+
+/*
+ * Handling of the clkout
+ */
+
+#ifdef CONFIG_COMMON_CLK
+#define clkout_hw_to_hym8563(_hw) container_of(_hw, struct hym8563, clkout_hw)
+
+static int clkout_rates[] = {
+       32768,
+       1024,
+       32,
+       1,
+};
+
+static unsigned long hym8563_clkout_recalc_rate(struct clk_hw *hw,
+                                               unsigned long parent_rate)
+{
+       struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw);
+       struct i2c_client *client = hym8563->client;
+       int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT);
+
+       if (ret < 0 || ret & HYM8563_CLKOUT_DISABLE)
+               return 0;
+
+       ret &= HYM8563_CLKOUT_MASK;
+       return clkout_rates[ret];
+}
+
+static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
+                                     unsigned long *prate)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
+               if (clkout_rates[i] <= rate)
+                       return clkout_rates[i];
+
+       return 0;
+}
+
+static int hym8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate,
+                                  unsigned long parent_rate)
+{
+       struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw);
+       struct i2c_client *client = hym8563->client;
+       int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT);
+       int i;
+
+       if (ret < 0)
+               return ret;
+
+       for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
+               if (clkout_rates[i] == rate) {
+                       ret &= ~HYM8563_CLKOUT_MASK;
+                       ret |= i;
+                       return i2c_smbus_write_byte_data(client,
+                                                        HYM8563_CLKOUT, ret);
+               }
+
+       return -EINVAL;
+}
+
+static int hym8563_clkout_control(struct clk_hw *hw, bool enable)
+{
+       struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw);
+       struct i2c_client *client = hym8563->client;
+       int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT);
+
+       if (ret < 0)
+               return ret;
+
+       if (enable)
+               ret &= ~HYM8563_CLKOUT_DISABLE;
+       else
+               ret |= HYM8563_CLKOUT_DISABLE;
+
+       return i2c_smbus_write_byte_data(client, HYM8563_CLKOUT, ret);
+}
+
+static int hym8563_clkout_prepare(struct clk_hw *hw)
+{
+       return hym8563_clkout_control(hw, 1);
+}
+
+static void hym8563_clkout_unprepare(struct clk_hw *hw)
+{
+       hym8563_clkout_control(hw, 0);
+}
+
+static int hym8563_clkout_is_prepared(struct clk_hw *hw)
+{
+       struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw);
+       struct i2c_client *client = hym8563->client;
+       int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT);
+
+       if (ret < 0)
+               return ret;
+
+       return !(ret & HYM8563_CLKOUT_DISABLE);
+}
+
+static const struct clk_ops hym8563_clkout_ops = {
+       .prepare = hym8563_clkout_prepare,
+       .unprepare = hym8563_clkout_unprepare,
+       .is_prepared = hym8563_clkout_is_prepared,
+       .recalc_rate = hym8563_clkout_recalc_rate,
+       .round_rate = hym8563_clkout_round_rate,
+       .set_rate = hym8563_clkout_set_rate,
+};
+
+static struct clk *hym8563_clkout_register_clk(struct hym8563 *hym8563)
+{
+       struct i2c_client *client = hym8563->client;
+       struct device_node *node = client->dev.of_node;
+       struct clk *clk;
+       struct clk_init_data init;
+       int ret;
+
+       ret = i2c_smbus_write_byte_data(client, HYM8563_CLKOUT,
+                                               HYM8563_CLKOUT_DISABLE);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       init.name = "hym8563-clkout";
+       init.ops = &hym8563_clkout_ops;
+       init.flags = CLK_IS_ROOT;
+       init.parent_names = NULL;
+       init.num_parents = 0;
+       hym8563->clkout_hw.init = &init;
+
+       /* register the clock */
+       clk = clk_register(&client->dev, &hym8563->clkout_hw);
+
+       if (!IS_ERR(clk))
+               of_clk_add_provider(node, of_clk_src_simple_get, clk);
+
+       return clk;
+}
+#endif
+
+/*
+ * The alarm interrupt is implemented as a level-low interrupt in the
+ * hym8563, while the timer interrupt uses a falling edge.
+ * We don't use the timer at all, so the interrupt is requested to
+ * use the level-low trigger.
+ */
+static irqreturn_t hym8563_irq(int irq, void *dev_id)
+{
+       struct hym8563 *hym8563 = (struct hym8563 *)dev_id;
+       struct i2c_client *client = hym8563->client;
+       struct mutex *lock = &hym8563->rtc->ops_lock;
+       int data, ret;
+
+       mutex_lock(lock);
+
+       /* Clear the alarm flag */
+
+       data = i2c_smbus_read_byte_data(client, HYM8563_CTL2);
+       if (data < 0) {
+               dev_err(&client->dev, "%s: error reading i2c data %d\n",
+                       __func__, data);
+               goto out;
+       }
+
+       data &= ~HYM8563_CTL2_AF;
+
+       ret = i2c_smbus_write_byte_data(client, HYM8563_CTL2, data);
+       if (ret < 0) {
+               dev_err(&client->dev, "%s: error writing i2c data %d\n",
+                       __func__, ret);
+       }
+
+out:
+       mutex_unlock(lock);
+       return IRQ_HANDLED;
+}
+
+static int hym8563_init_device(struct i2c_client *client)
+{
+       int ret;
+
+       /* Clear stop flag if present */
+       ret = i2c_smbus_write_byte_data(client, HYM8563_CTL1, 0);
+       if (ret < 0)
+               return ret;
+
+       ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2);
+       if (ret < 0)
+               return ret;
+
+       /* Disable alarm and timer interrupts */
+       ret &= ~HYM8563_CTL2_AIE;
+       ret &= ~HYM8563_CTL2_TIE;
+
+       /* Clear any pending alarm and timer flags */
+       if (ret & HYM8563_CTL2_AF)
+               ret &= ~HYM8563_CTL2_AF;
+
+       if (ret & HYM8563_CTL2_TF)
+               ret &= ~HYM8563_CTL2_TF;
+
+       ret &= ~HYM8563_CTL2_TI_TP;
+
+       return i2c_smbus_write_byte_data(client, HYM8563_CTL2, ret);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int hym8563_suspend(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       int ret;
+
+       if (device_may_wakeup(dev)) {
+               ret = enable_irq_wake(client->irq);
+               if (ret) {
+                       dev_err(dev, "enable_irq_wake failed, %d\n", ret);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int hym8563_resume(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+
+       if (device_may_wakeup(dev))
+               disable_irq_wake(client->irq);
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(hym8563_pm_ops, hym8563_suspend, hym8563_resume);
+
+static int hym8563_probe(struct i2c_client *client,
+                        const struct i2c_device_id *id)
+{
+       struct hym8563 *hym8563;
+       int ret;
+
+       hym8563 = devm_kzalloc(&client->dev, sizeof(*hym8563), GFP_KERNEL);
+       if (!hym8563)
+               return -ENOMEM;
+
+       hym8563->client = client;
+       i2c_set_clientdata(client, hym8563);
+
+       device_set_wakeup_capable(&client->dev, true);
+
+       ret = hym8563_init_device(client);
+       if (ret) {
+               dev_err(&client->dev, "could not init device, %d\n", ret);
+               return ret;
+       }
+
+       ret = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, hym8563_irq,
+                                       IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                       client->name, hym8563);
+       if (ret < 0) {
+               dev_err(&client->dev, "irq %d request failed, %d\n",
+                       client->irq, ret);
+               return ret;
+       }
+
+       /* check state of calendar information */
+       ret = i2c_smbus_read_byte_data(client, HYM8563_SEC);
+       if (ret < 0)
+               return ret;
+
+       hym8563->valid = !(ret & HYM8563_SEC_VL);
+       dev_dbg(&client->dev, "rtc information is %s\n",
+               hym8563->valid ? "valid" : "invalid");
+
+       hym8563->rtc = devm_rtc_device_register(&client->dev, client->name,
+                                               &hym8563_rtc_ops, THIS_MODULE);
+       if (IS_ERR(hym8563->rtc))
+               return PTR_ERR(hym8563->rtc);
+
+#ifdef CONFIG_COMMON_CLK
+       hym8563_clkout_register_clk(hym8563);
+#endif
+
+       return 0;
+}
+
+static const struct i2c_device_id hym8563_id[] = {
+       { "hym8563", 0 },
+       {},
+};
+MODULE_DEVICE_TABLE(i2c, hym8563_id);
+
+static struct of_device_id hym8563_dt_idtable[] = {
+       { .compatible = "haoyu,hym8563" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, hym8563_dt_idtable);
+
+static struct i2c_driver hym8563_driver = {
+       .driver         = {
+               .name   = "rtc-hym8563",
+               .owner  = THIS_MODULE,
+               .pm     = &hym8563_pm_ops,
+               .of_match_table = hym8563_dt_idtable,
+       },
+       .probe          = hym8563_probe,
+       .id_table       = hym8563_id,
+};
+
+module_i2c_driver(hym8563_driver);
+
+MODULE_AUTHOR("Heiko Stuebner <heiko@sntech.de>");
+MODULE_DESCRIPTION("HYM8563 RTC driver");
+MODULE_LICENSE("GPL");
index 8e45b3c4aa2fdb10629c90a4b23b9af6eb6bc06f..3032178bd9e675eb2652ba22c973322b2c775707 100644 (file)
@@ -51,7 +51,7 @@ static irqreturn_t max8907_irq_handler(int irq, void *data)
 {
        struct max8907_rtc *rtc = data;
 
-       regmap_update_bits(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0x7f, 0);
+       regmap_write(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0);
 
        rtc_update_irq(rtc->rtc_dev, 1, RTC_IRQF | RTC_AF);
 
@@ -64,7 +64,7 @@ static void regs_to_tm(u8 *regs, struct rtc_time *tm)
                bcd2bin(regs[RTC_YEAR1]) - 1900;
        tm->tm_mon = bcd2bin(regs[RTC_MONTH] & 0x1f) - 1;
        tm->tm_mday = bcd2bin(regs[RTC_DATE] & 0x3f);
-       tm->tm_wday = (regs[RTC_WEEKDAY] & 0x07) - 1;
+       tm->tm_wday = (regs[RTC_WEEKDAY] & 0x07);
        if (regs[RTC_HOUR] & HOUR_12) {
                tm->tm_hour = bcd2bin(regs[RTC_HOUR] & 0x01f);
                if (tm->tm_hour == 12)
@@ -88,7 +88,7 @@ static void tm_to_regs(struct rtc_time *tm, u8 *regs)
        regs[RTC_YEAR1] = bin2bcd(low);
        regs[RTC_MONTH] = bin2bcd(tm->tm_mon + 1);
        regs[RTC_DATE] = bin2bcd(tm->tm_mday);
-       regs[RTC_WEEKDAY] = tm->tm_wday + 1;
+       regs[RTC_WEEKDAY] = tm->tm_wday;
        regs[RTC_HOUR] = bin2bcd(tm->tm_hour);
        regs[RTC_MIN] = bin2bcd(tm->tm_min);
        regs[RTC_SEC] = bin2bcd(tm->tm_sec);
@@ -153,7 +153,7 @@ static int max8907_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        tm_to_regs(&alrm->time, regs);
 
        /* Disable alarm while we update the target time */
-       ret = regmap_update_bits(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0x7f, 0);
+       ret = regmap_write(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0);
        if (ret < 0)
                return ret;
 
@@ -163,8 +163,7 @@ static int max8907_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
                return ret;
 
        if (alrm->enabled)
-               ret = regmap_update_bits(rtc->regmap, MAX8907_REG_ALARM0_CNTL,
-                                        0x7f, 0x7f);
+               ret = regmap_write(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0x77);
 
        return ret;
 }
index 50c572645546bf20d6a4414d454136f95636d9a3..419874fefa4b977ed205fd1aee50c808c1866cf5 100644 (file)
@@ -391,11 +391,13 @@ static int mxc_rtc_probe(struct platform_device *pdev)
        pdata->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pdata->clk)) {
                dev_err(&pdev->dev, "unable to get clock!\n");
-               ret = PTR_ERR(pdata->clk);
-               goto exit_free_pdata;
+               return PTR_ERR(pdata->clk);
        }
 
-       clk_prepare_enable(pdata->clk);
+       ret = clk_prepare_enable(pdata->clk);
+       if (ret)
+               return ret;
+
        rate = clk_get_rate(pdata->clk);
 
        if (rate == 32768)
@@ -447,8 +449,6 @@ static int mxc_rtc_probe(struct platform_device *pdev)
 exit_put_clk:
        clk_disable_unprepare(pdata->clk);
 
-exit_free_pdata:
-
        return ret;
 }
 
index 1ee514a3972c7d5c91fc8c8dc88d641a0e8c25ee..9bd842e977492d6afc93a651403b0eb9db809cf9 100644 (file)
@@ -197,10 +197,7 @@ static int pcf2127_probe(struct i2c_client *client,
                                pcf2127_driver.driver.name,
                                &pcf2127_rtc_ops, THIS_MODULE);
 
-       if (IS_ERR(pcf2127->rtc))
-               return PTR_ERR(pcf2127->rtc);
-
-       return 0;
+       return PTR_ERR_OR_ZERO(pcf2127->rtc);
 }
 
 static const struct i2c_device_id pcf2127_id[] = {
index ae8119dc2846aac28c8f47019720bf93fd39803f..476af93543f6efa24c9fbb8c6d28fa64de732fde 100644 (file)
@@ -639,6 +639,7 @@ static void s5m_rtc_shutdown(struct platform_device *pdev)
        s5m_rtc_enable_smpl(info, false);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int s5m_rtc_resume(struct device *dev)
 {
        struct s5m_rtc_info *info = dev_get_drvdata(dev);
@@ -660,6 +661,7 @@ static int s5m_rtc_suspend(struct device *dev)
 
        return ret;
 }
+#endif /* CONFIG_PM_SLEEP */
 
 static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume);
 
index c2e80d7ca5e26b618a19bfc5aac45e8497651082..1915464e4cd6022189fe88451c14147e6a0e8be1 100644 (file)
@@ -479,7 +479,7 @@ static int twl_rtc_probe(struct platform_device *pdev)
        u8 rd_reg;
 
        if (irq <= 0)
-               goto out1;
+               return ret;
 
        /* Initialize the register map */
        if (twl_class_is_4030())
@@ -489,7 +489,7 @@ static int twl_rtc_probe(struct platform_device *pdev)
 
        ret = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
        if (ret < 0)
-               goto out1;
+               return ret;
 
        if (rd_reg & BIT_RTC_STATUS_REG_POWER_UP_M)
                dev_warn(&pdev->dev, "Power up reset detected.\n");
@@ -500,7 +500,7 @@ static int twl_rtc_probe(struct platform_device *pdev)
        /* Clear RTC Power up reset and pending alarm interrupts */
        ret = twl_rtc_write_u8(rd_reg, REG_RTC_STATUS_REG);
        if (ret < 0)
-               goto out1;
+               return ret;
 
        if (twl_class_is_6030()) {
                twl6030_interrupt_unmask(TWL6030_RTC_INT_MASK,
@@ -512,7 +512,7 @@ static int twl_rtc_probe(struct platform_device *pdev)
        dev_info(&pdev->dev, "Enabling TWL-RTC\n");
        ret = twl_rtc_write_u8(BIT_RTC_CTRL_REG_STOP_RTC_M, REG_RTC_CTRL_REG);
        if (ret < 0)
-               goto out1;
+               return ret;
 
        /* ensure interrupts are disabled, bootloaders can be strange */
        ret = twl_rtc_write_u8(0, REG_RTC_INTERRUPTS_REG);
@@ -522,34 +522,29 @@ static int twl_rtc_probe(struct platform_device *pdev)
        /* init cached IRQ enable bits */
        ret = twl_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
        if (ret < 0)
-               goto out1;
+               return ret;
 
        device_init_wakeup(&pdev->dev, 1);
 
-       rtc = rtc_device_register(pdev->name,
-                                 &pdev->dev, &twl_rtc_ops, THIS_MODULE);
+       rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+                                       &twl_rtc_ops, THIS_MODULE);
        if (IS_ERR(rtc)) {
-               ret = PTR_ERR(rtc);
                dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
                        PTR_ERR(rtc));
-               goto out1;
+               return PTR_ERR(rtc);
        }
 
-       ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt,
-                                  IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-                                  dev_name(&rtc->dev), rtc);
+       ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+                                       twl_rtc_interrupt,
+                                       IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+                                       dev_name(&rtc->dev), rtc);
        if (ret < 0) {
                dev_err(&pdev->dev, "IRQ is not free.\n");
-               goto out2;
+               return ret;
        }
 
        platform_set_drvdata(pdev, rtc);
        return 0;
-
-out2:
-       rtc_device_unregister(rtc);
-out1:
-       return ret;
 }
 
 /*
@@ -559,9 +554,6 @@ out1:
 static int twl_rtc_remove(struct platform_device *pdev)
 {
        /* leave rtc running, but disable irqs */
-       struct rtc_device *rtc = platform_get_drvdata(pdev);
-       int irq = platform_get_irq(pdev, 0);
-
        mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
        mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
        if (twl_class_is_6030()) {
@@ -571,10 +563,6 @@ static int twl_rtc_remove(struct platform_device *pdev)
                        REG_INT_MSK_STS_A);
        }
 
-
-       free_irq(irq, rtc);
-
-       rtc_device_unregister(rtc);
        return 0;
 }
 
index aabc22c587fb49025b47de4de531279832a05efd..88c9c92e89fdef7a409bb0d2365797aee224def7 100644 (file)
@@ -293,7 +293,7 @@ static int rtc_probe(struct platform_device *pdev)
        if (!res)
                return -EBUSY;
 
-       rtc1_base = ioremap(res->start, resource_size(res));
+       rtc1_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
        if (!rtc1_base)
                return -EBUSY;
 
@@ -303,13 +303,14 @@ static int rtc_probe(struct platform_device *pdev)
                goto err_rtc1_iounmap;
        }
 
-       rtc2_base = ioremap(res->start, resource_size(res));
+       rtc2_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
        if (!rtc2_base) {
                retval = -EBUSY;
                goto err_rtc1_iounmap;
        }
 
-       rtc = rtc_device_register(rtc_name, &pdev->dev, &vr41xx_rtc_ops, THIS_MODULE);
+       rtc = devm_rtc_device_register(&pdev->dev, rtc_name, &vr41xx_rtc_ops,
+                                       THIS_MODULE);
        if (IS_ERR(rtc)) {
                retval = PTR_ERR(rtc);
                goto err_iounmap_all;
@@ -330,24 +331,24 @@ static int rtc_probe(struct platform_device *pdev)
        aie_irq = platform_get_irq(pdev, 0);
        if (aie_irq <= 0) {
                retval = -EBUSY;
-               goto err_device_unregister;
+               goto err_iounmap_all;
        }
 
-       retval = request_irq(aie_irq, elapsedtime_interrupt, 0,
-                            "elapsed_time", pdev);
+       retval = devm_request_irq(&pdev->dev, aie_irq, elapsedtime_interrupt, 0,
+                               "elapsed_time", pdev);
        if (retval < 0)
-               goto err_device_unregister;
+               goto err_iounmap_all;
 
        pie_irq = platform_get_irq(pdev, 1);
        if (pie_irq <= 0) {
                retval = -EBUSY;
-               goto err_free_irq;
+               goto err_iounmap_all;
        }
 
-       retval = request_irq(pie_irq, rtclong1_interrupt, 0,
-                            "rtclong1", pdev);
+       retval = devm_request_irq(&pdev->dev, pie_irq, rtclong1_interrupt, 0,
+                               "rtclong1", pdev);
        if (retval < 0)
-               goto err_free_irq;
+               goto err_iounmap_all;
 
        platform_set_drvdata(pdev, rtc);
 
@@ -358,47 +359,20 @@ static int rtc_probe(struct platform_device *pdev)
 
        return 0;
 
-err_free_irq:
-       free_irq(aie_irq, pdev);
-
-err_device_unregister:
-       rtc_device_unregister(rtc);
-
 err_iounmap_all:
-       iounmap(rtc2_base);
        rtc2_base = NULL;
 
 err_rtc1_iounmap:
-       iounmap(rtc1_base);
        rtc1_base = NULL;
 
        return retval;
 }
 
-static int rtc_remove(struct platform_device *pdev)
-{
-       struct rtc_device *rtc;
-
-       rtc = platform_get_drvdata(pdev);
-       if (rtc)
-               rtc_device_unregister(rtc);
-
-       free_irq(aie_irq, pdev);
-       free_irq(pie_irq, pdev);
-       if (rtc1_base)
-               iounmap(rtc1_base);
-       if (rtc2_base)
-               iounmap(rtc2_base);
-
-       return 0;
-}
-
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:RTC");
 
 static struct platform_driver rtc_platform_driver = {
        .probe          = rtc_probe,
-       .remove         = rtc_remove,
        .driver         = {
                .name   = rtc_name,
                .owner  = THIS_MODULE,
index dfffd0f37916f18097eaafe21eb2589b033d9193..a70692779a16c770300b410eda56a9f1823e9239 100644 (file)
@@ -486,6 +486,8 @@ mimd_to_kioc(mimd_t __user *umimd, mraid_mmadp_t *adp, uioc_t *kioc)
 
        pthru32->dataxferaddr   = kioc->buf_paddr;
        if (kioc->data_dir & UIOC_WR) {
+               if (pthru32->dataxferlen > kioc->xferlen)
+                       return -EINVAL;
                if (copy_from_user(kioc->buf_vaddr, kioc->user_data,
                                                pthru32->dataxferlen)) {
                        return (-EFAULT);
index b24aa010f68c5fd156c8abba916be24a61ed4c2f..65cd80bf9aece8661b5d14dbe9004bfae5dd928e 100644 (file)
@@ -13,6 +13,10 @@ config VT
        bool "Virtual terminal" if EXPERT
        depends on !S390 && !UML
        select INPUT
+       select NEW_LEDS
+       select LEDS_CLASS
+       select LEDS_TRIGGERS
+       select INPUT_LEDS
        default y
        ---help---
          If you say Y here, you will get support for terminal devices with
index d0e3a44977074526979a92e638c620aabaf661cb..d6ecfc9e734f2f25159f3eaf11f502de81597562 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/leds.h>
 
 #include <linux/kbd_kern.h>
 #include <linux/kbd_diacr.h>
@@ -130,6 +131,7 @@ static char rep;                                    /* flag telling character repeat */
 static int shift_state = 0;
 
 static unsigned char ledstate = 0xff;                  /* undefined */
+static unsigned char lockstate = 0xff;                 /* undefined */
 static unsigned char ledioctl;
 
 /*
@@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
        }
 }
 
+/* We route VT keyboard "leds" through triggers */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_ledstate[] = {
+#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \
+       [kbd_led] = { \
+               .name = nam, \
+               .activate = kbd_ledstate_trigger_activate, \
+       }
+       DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"),
+       DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK,   "kbd-numlock"),
+       DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK,  "kbd-capslock"),
+       DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK,  "kbd-kanalock"),
+#undef DEFINE_LEDSTATE_TRIGGER
+};
+
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_lockstate[] = {
+#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \
+       [kbd_led] = { \
+               .name = nam, \
+               .activate = kbd_lockstate_trigger_activate, \
+       }
+       DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK,  "kbd-shiftlock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK,  "kbd-altgrlock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK,   "kbd-ctrllock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK,    "kbd-altlock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK,  "kbd-ctrlllock"),
+       DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK,  "kbd-ctrlrlock"),
+#undef DEFINE_LOCKSTATE_TRIGGER
+};
+
 /*
  * The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
  * or (ii) whatever pattern of lights people want to show using KDSETLED,
@@ -995,18 +1032,25 @@ static inline unsigned char getleds(void)
        return kbd->ledflagstate;
 }
 
-static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+/* Called on trigger connection, to set initial state */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev)
 {
-       unsigned char leds = *(unsigned char *)data;
+       struct led_trigger *trigger = cdev->trigger;
+       int led = trigger - ledtrig_ledstate;
 
-       if (test_bit(EV_LED, handle->dev->evbit)) {
-               input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
-               input_inject_event(handle, EV_LED, LED_NUML,    !!(leds & 0x02));
-               input_inject_event(handle, EV_LED, LED_CAPSL,   !!(leds & 0x04));
-               input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
-       }
+       tasklet_disable(&keyboard_tasklet);
+       led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF);
+       tasklet_enable(&keyboard_tasklet);
+}
 
-       return 0;
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev)
+{
+       struct led_trigger *trigger = cdev->trigger;
+       int led = trigger - ledtrig_lockstate;
+
+       tasklet_disable(&keyboard_tasklet);
+       led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF);
+       tasklet_enable(&keyboard_tasklet);
 }
 
 /**
@@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy)
 {
        unsigned char leds;
        unsigned long flags;
-       
+       int i;
+
        spin_lock_irqsave(&led_lock, flags);
        leds = getleds();
        spin_unlock_irqrestore(&led_lock, flags);
 
        if (leds != ledstate) {
-               input_handler_for_each_handle(&kbd_handler, &leds,
-                                             kbd_update_leds_helper);
+               for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++)
+                       if ((leds ^ ledstate) & (1 << i))
+                               led_trigger_event(&ledtrig_ledstate[i],
+                                               leds & (1 << i)
+                                               ? LED_FULL : LED_OFF);
                ledstate = leds;
        }
+
+       if (kbd->lockstate != lockstate) {
+               for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++)
+                       if ((kbd->lockstate ^ lockstate) & (1 << i))
+                               led_trigger_event(&ledtrig_lockstate[i],
+                                               kbd->lockstate & (1 << i)
+                                               ? LED_FULL : LED_OFF);
+               lockstate = kbd->lockstate;
+       }
 }
 
 DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle)
        kfree(handle);
 }
 
-/*
- * Start keyboard handler on the new keyboard by refreshing LED state to
- * match the rest of the system.
- */
-static void kbd_start(struct input_handle *handle)
-{
-       tasklet_disable(&keyboard_tasklet);
-
-       if (ledstate != 0xff)
-               kbd_update_leds_helper(handle, &ledstate);
-
-       tasklet_enable(&keyboard_tasklet);
-}
-
 static const struct input_device_id kbd_ids[] = {
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
@@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = {
        .match          = kbd_match,
        .connect        = kbd_connect,
        .disconnect     = kbd_disconnect,
-       .start          = kbd_start,
        .name           = "kbd",
        .id_table       = kbd_ids,
 };
@@ -1501,6 +1543,20 @@ int __init kbd_init(void)
        if (error)
                return error;
 
+       for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) {
+               error = led_trigger_register(&ledtrig_ledstate[i]);
+               if (error)
+                       pr_err("error %d while registering trigger %s\n",
+                                       error, ledtrig_ledstate[i].name);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) {
+               error = led_trigger_register(&ledtrig_lockstate[i]);
+               if (error)
+                       pr_err("error %d while registering trigger %s\n",
+                                       error, ledtrig_lockstate[i].name);
+       }
+
        tasklet_enable(&keyboard_tasklet);
        tasklet_schedule(&keyboard_tasklet);
 
index 12ca031877d43865f49e7875c4207684d1bf0afa..52108be69e7748031b4392a5a4145168bd7358d0 100644 (file)
@@ -357,11 +357,13 @@ static int default_lcd_on = 1;
 static bool mtrr = true;
 #endif
 
+#ifdef CONFIG_FB_ATY128_BACKLIGHT
 #ifdef CONFIG_PMAC_BACKLIGHT
 static int backlight = 1;
 #else
 static int backlight = 0;
 #endif
+#endif
 
 /* PLL constants */
 struct aty128_constants {
@@ -1671,7 +1673,9 @@ static int aty128fb_setup(char *options)
                        default_crt_on = simple_strtoul(this_opt+4, NULL, 0);
                        continue;
                } else if (!strncmp(this_opt, "backlight:", 10)) {
+#ifdef CONFIG_FB_ATY128_BACKLIGHT
                        backlight = simple_strtoul(this_opt+10, NULL, 0);
+#endif
                        continue;
                }
 #ifdef CONFIG_MTRR
index 5d05555fe841d2a4eaac7d230e498e0c51f5d841..b83f00eaca9ccb1e59701831aec058badfa81d3d 100644 (file)
@@ -175,8 +175,6 @@ static ssize_t brightness_store(struct device *dev,
        }
        mutex_unlock(&bd->ops_lock);
 
-       backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
        return rc;
 }
 static DEVICE_ATTR_RW(brightness);
index 00076ecfe9b8d52adaa09d15d74a1a4aebf0abf1..8ea42b8d9bc8570f3bf379d44bd5c97885e5df2e 100644 (file)
@@ -110,8 +110,8 @@ static int hp680bl_probe(struct platform_device *pdev)
        memset(&props, 0, sizeof(struct backlight_properties));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = HP680_MAX_INTENSITY;
-       bd = backlight_device_register("hp680-bl", &pdev->dev, NULL,
-                                      &hp680bl_ops, &props);
+       bd = devm_backlight_device_register(&pdev->dev, "hp680-bl", &pdev->dev,
+                                       NULL, &hp680bl_ops, &props);
        if (IS_ERR(bd))
                return PTR_ERR(bd);
 
@@ -131,8 +131,6 @@ static int hp680bl_remove(struct platform_device *pdev)
        bd->props.power = 0;
        hp680bl_send_intensity(bd);
 
-       backlight_device_unregister(bd);
-
        return 0;
 }
 
index 3ccb89340f22644e8cfac83c0a4b9eebe37cc55d..6ce96b4a879696bc04457b329f2f39e16bb5e7ed 100644 (file)
@@ -115,9 +115,10 @@ static int jornada_bl_probe(struct platform_device *pdev)
        memset(&props, 0, sizeof(struct backlight_properties));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = BL_MAX_BRIGHT;
-       bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL,
-                                      &jornada_bl_ops, &props);
 
+       bd = devm_backlight_device_register(&pdev->dev, S1D_DEVICENAME,
+                                       &pdev->dev, NULL, &jornada_bl_ops,
+                                       &props);
        if (IS_ERR(bd)) {
                ret = PTR_ERR(bd);
                dev_err(&pdev->dev, "failed to register device, err=%x\n", ret);
@@ -139,18 +140,8 @@ static int jornada_bl_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int jornada_bl_remove(struct platform_device *pdev)
-{
-       struct backlight_device *bd = platform_get_drvdata(pdev);
-
-       backlight_device_unregister(bd);
-
-       return 0;
-}
-
 static struct platform_driver jornada_bl_driver = {
        .probe          = jornada_bl_probe,
-       .remove         = jornada_bl_remove,
        .driver = {
                .name   = "jornada_bl",
        },
index b061413f1a65c8968ef47542623c6d7f3a6028af..da3876c9b3ae70465b887838958687c7fd2a9675 100644 (file)
@@ -100,7 +100,8 @@ static int jornada_lcd_probe(struct platform_device *pdev)
        struct lcd_device *lcd_device;
        int ret;
 
-       lcd_device = lcd_device_register(S1D_DEVICENAME, &pdev->dev, NULL, &jornada_lcd_props);
+       lcd_device = devm_lcd_device_register(&pdev->dev, S1D_DEVICENAME,
+                                       &pdev->dev, NULL, &jornada_lcd_props);
 
        if (IS_ERR(lcd_device)) {
                ret = PTR_ERR(lcd_device);
@@ -119,18 +120,8 @@ static int jornada_lcd_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int jornada_lcd_remove(struct platform_device *pdev)
-{
-       struct lcd_device *lcd_device = platform_get_drvdata(pdev);
-
-       lcd_device_unregister(lcd_device);
-
-       return 0;
-}
-
 static struct platform_driver jornada_lcd_driver = {
        .probe  = jornada_lcd_probe,
-       .remove = jornada_lcd_remove,
        .driver = {
                .name   = "jornada_lcd",
        },
index 7592cc25c963e11638175ff474aaa82c2888a46d..84a110a719cbd73256c326b13fe19619b6cfde74 100644 (file)
@@ -78,7 +78,7 @@ static struct kb3886bl_machinfo *bl_machinfo;
 static unsigned long kb3886bl_flags;
 #define KB3886BL_SUSPENDED     0x01
 
-static struct dmi_system_id __initdata kb3886bl_device_table[] = {
+static struct dmi_system_id kb3886bl_device_table[] __initdata = {
        {
                .ident = "Sahara Touch-iT",
                .matches = {
index b5fc13bc24e7d1567c8318f55760cb6d6d456bc9..63e763828e0e1a062bdc4c8577eaf3027e4b5564 100644 (file)
@@ -223,8 +223,8 @@ static int l4f00242t03_probe(struct spi_device *spi)
                return PTR_ERR(priv->core_reg);
        }
 
-       priv->ld = lcd_device_register("l4f00242t03",
-                                       &spi->dev, priv, &l4f_ops);
+       priv->ld = devm_lcd_device_register(&spi->dev, "l4f00242t03", &spi->dev,
+                                       priv, &l4f_ops);
        if (IS_ERR(priv->ld))
                return PTR_ERR(priv->ld);
 
@@ -243,8 +243,6 @@ static int l4f00242t03_remove(struct spi_device *spi)
        struct l4f00242t03_priv *priv = spi_get_drvdata(spi);
 
        l4f00242t03_lcd_power_set(priv->ld, FB_BLANK_POWERDOWN);
-       lcd_device_unregister(priv->ld);
-
        return 0;
 }
 
index cae80d555e841fcd22dc844c9bf0d6fe12b87539..2ca3a040007bb6298c3c571b7d788900d8aa3759 100644 (file)
@@ -125,7 +125,7 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
                return false;
        }
 
-       return (addr >= start && addr <= end);
+       return addr >= start && addr <= end;
 }
 
 static int lp8557_bl_off(struct lp855x *lp)
index e49905d495dcd66102df9113f068128aee4f8a7b..daba34dc46d49556738737fc0f4e21623b9d1f85 100644 (file)
@@ -63,13 +63,13 @@ static struct lp8788_bl_config default_bl_config = {
 
 static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode)
 {
-       return (mode == LP8788_BL_COMB_PWM_BASED);
+       return mode == LP8788_BL_COMB_PWM_BASED;
 }
 
 static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode)
 {
-       return (mode == LP8788_BL_REGISTER_ONLY ||
-               mode == LP8788_BL_COMB_REGISTER_BASED);
+       return mode == LP8788_BL_REGISTER_ONLY ||
+               mode == LP8788_BL_COMB_REGISTER_BASED;
 }
 
 static int lp8788_backlight_configure(struct lp8788_bl *bl)
index ac11a4650c19ea92fcfe26d6720fda8d64913fbf..a0dcd88ac74fce38d77531df242a690c4bc1eb12 100644 (file)
@@ -146,8 +146,8 @@ static int omapbl_probe(struct platform_device *pdev)
        memset(&props, 0, sizeof(struct backlight_properties));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = OMAPBL_MAX_INTENSITY;
-       dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops,
-                                       &props);
+       dev = devm_backlight_device_register(&pdev->dev, "omap-bl", &pdev->dev,
+                                       bl, &omapbl_ops, &props);
        if (IS_ERR(dev))
                return PTR_ERR(dev);
 
@@ -170,20 +170,10 @@ static int omapbl_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int omapbl_remove(struct platform_device *pdev)
-{
-       struct backlight_device *dev = platform_get_drvdata(pdev);
-
-       backlight_device_unregister(dev);
-
-       return 0;
-}
-
 static SIMPLE_DEV_PM_OPS(omapbl_pm_ops, omapbl_suspend, omapbl_resume);
 
 static struct platform_driver omapbl_driver = {
        .probe          = omapbl_probe,
-       .remove         = omapbl_remove,
        .driver         = {
                .name   = "omap-bl",
                .pm     = &omapbl_pm_ops,
index fdbb6ee5027ce18f3e21ada191bdb929d193f225..f5a5202dd79d4c1cbba5b11631f323fa5bf59ee3 100644 (file)
@@ -118,8 +118,9 @@ static int ot200_backlight_probe(struct platform_device *pdev)
        props.brightness = 100;
        props.type = BACKLIGHT_RAW;
 
-       bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, data,
-                                       &ot200_backlight_ops, &props);
+       bl = devm_backlight_device_register(&pdev->dev, dev_name(&pdev->dev),
+                                       &pdev->dev, data, &ot200_backlight_ops,
+                                       &props);
        if (IS_ERR(bl)) {
                dev_err(&pdev->dev, "failed to register backlight\n");
                retval = PTR_ERR(bl);
@@ -137,10 +138,6 @@ error_devm_kzalloc:
 
 static int ot200_backlight_remove(struct platform_device *pdev)
 {
-       struct backlight_device *bl = platform_get_drvdata(pdev);
-
-       backlight_device_unregister(bl);
-
        /* on module unload set brightness to 100% */
        cs5535_mfgpt_write(pwm_timer, MFGPT_REG_COUNTER, 0);
        cs5535_mfgpt_write(pwm_timer, MFGPT_REG_SETUP, MFGPT_SETUP_CNTEN);
index b8db9338cacddcdadc9cd7d2c08364a9604c4bff..3ad676558c803fdf0b7d896f7aaee086de2804c6 100644 (file)
@@ -105,8 +105,9 @@ static int tosa_bl_probe(struct i2c_client *client,
        memset(&props, 0, sizeof(struct backlight_properties));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = 512 - 1;
-       data->bl = backlight_device_register("tosa-bl", &client->dev, data,
-                                            &bl_ops, &props);
+       data->bl = devm_backlight_device_register(&client->dev, "tosa-bl",
+                                               &client->dev, data, &bl_ops,
+                                               &props);
        if (IS_ERR(data->bl)) {
                ret = PTR_ERR(data->bl);
                goto err_reg;
@@ -128,9 +129,7 @@ static int tosa_bl_remove(struct i2c_client *client)
 {
        struct tosa_bl_data *data = i2c_get_clientdata(client);
 
-       backlight_device_unregister(data->bl);
        data->bl = NULL;
-
        return 0;
 }
 
index be5d636764bf4bc9b8eec7417c0730dea0914e26..f08d641ccd0103d49ad6421f9473368e1053317b 100644 (file)
@@ -206,8 +206,8 @@ static int tosa_lcd_probe(struct spi_device *spi)
 
        tosa_lcd_tg_on(data);
 
-       data->lcd = lcd_device_register("tosa-lcd", &spi->dev, data,
-                       &tosa_lcd_ops);
+       data->lcd = devm_lcd_device_register(&spi->dev, "tosa-lcd", &spi->dev,
+                                       data, &tosa_lcd_ops);
 
        if (IS_ERR(data->lcd)) {
                ret = PTR_ERR(data->lcd);
@@ -226,8 +226,6 @@ static int tosa_lcd_remove(struct spi_device *spi)
 {
        struct tosa_lcd_data *data = spi_get_drvdata(spi);
 
-       lcd_device_unregister(data->lcd);
-
        if (data->i2c)
                i2c_unregister_device(data->i2c);
 
index 7b07135ab26ef722824c7387a9ec9df547d36f49..c0227f9418ebd772f90af33406c10b0f8b3b6b0d 100644 (file)
@@ -762,7 +762,8 @@ static int vlynq_remove(struct platform_device *pdev)
 
        device_unregister(&dev->dev);
        iounmap(dev->local);
-       release_mem_region(dev->regs_start, dev->regs_end - dev->regs_start);
+       release_mem_region(dev->regs_start,
+                          dev->regs_end - dev->regs_start + 1);
 
        kfree(dev);
 
index e36b18b2817b9e655f524d79e8edf753cabece68..9709b8b484bacc8fcd9ddb4c1a88e137e2afa961 100644 (file)
 #include <linux/of_gpio.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <linux/delay.h>
 
 #include "../w1.h"
 #include "../w1_int.h"
 
+static u8 w1_gpio_set_pullup(void *data, int delay)
+{
+       struct w1_gpio_platform_data *pdata = data;
+
+       if (delay) {
+               pdata->pullup_duration = delay;
+       } else {
+               if (pdata->pullup_duration) {
+                       gpio_direction_output(pdata->pin, 1);
+
+                       msleep(pdata->pullup_duration);
+
+                       gpio_direction_input(pdata->pin);
+               }
+               pdata->pullup_duration = 0;
+       }
+
+       return 0;
+}
+
 static void w1_gpio_write_bit_dir(void *data, u8 bit)
 {
        struct w1_gpio_platform_data *pdata = data;
@@ -132,6 +153,7 @@ static int w1_gpio_probe(struct platform_device *pdev)
        } else {
                gpio_direction_input(pdata->pin);
                master->write_bit = w1_gpio_write_bit_dir;
+               master->set_pullup = w1_gpio_set_pullup;
        }
 
        err = w1_add_master_device(master);
index 5a98649f6abc27d563bf8ac957ddc0523a21f8e0..590bd8a7cd1bf3f071b145a7a48c47c30cabe510 100644 (file)
@@ -117,18 +117,6 @@ int w1_add_master_device(struct w1_bus_master *master)
                printk(KERN_ERR "w1_add_master_device: invalid function set\n");
                return(-EINVAL);
         }
-       /* While it would be electrically possible to make a device that
-        * generated a strong pullup in bit bang mode, only hardware that
-        * controls 1-wire time frames are even expected to support a strong
-        * pullup.  w1_io.c would need to support calling set_pullup before
-        * the last write_bit operation of a w1_write_8 which it currently
-        * doesn't.
-        */
-       if (!master->write_byte && !master->touch_bit && master->set_pullup) {
-               printk(KERN_ERR "w1_add_master_device: set_pullup requires "
-                       "write_byte or touch_bit, disabling\n");
-               master->set_pullup = NULL;
-       }
 
        /* Lock until the device is added (or not) to w1_masters. */
        mutex_lock(&w1_mlock);
index 526e4bbbde59e4936f91367101adac8c7373053e..276cb6ed0b9343250639674fb67340eb8d516568 100644 (file)
@@ -147,11 +147,11 @@ int afs_proc_init(void)
        if (!proc_afs)
                goto error_dir;
 
-       p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
+       p = proc_create("cells", S_IFREG | S_IRUGO | S_IWUSR, proc_afs, &afs_proc_cells_fops);
        if (!p)
                goto error_cells;
 
-       p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
+       p = proc_create("rootcell", S_IFREG | S_IRUGO | S_IWUSR, proc_afs, &afs_proc_rootcell_fops);
        if (!p)
                goto error_rootcell;
 
index 4218e26df916245818272785203358b5cd8a130f..acf32054edd87a6f7360cfc17b2a1f5ddd6ee85f 100644 (file)
@@ -104,7 +104,7 @@ struct autofs_sb_info {
        u32 magic;
        int pipefd;
        struct file *pipe;
-       pid_t oz_pgrp;
+       struct pid *oz_pgrp;
        int catatonic;
        int version;
        int sub_version;
@@ -140,7 +140,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-       return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
+       return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
index 1818ce7f5a06049178e968d978733fe29c920e1f..3182c0e68b4204cb2aec089b5d960b7053534757 100644 (file)
@@ -346,6 +346,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 {
        int pipefd;
        int err = 0;
+       struct pid *new_pid = NULL;
 
        if (param->setpipefd.pipefd == -1)
                return -EINVAL;
@@ -357,7 +358,17 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
                mutex_unlock(&sbi->wq_mutex);
                return -EBUSY;
        } else {
-               struct file *pipe = fget(pipefd);
+               struct file *pipe;
+
+               new_pid = get_task_pid(current, PIDTYPE_PGID);
+
+               if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) {
+                       AUTOFS_WARN("Not allowed to change PID namespace");
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               pipe = fget(pipefd);
                if (!pipe) {
                        err = -EBADF;
                        goto out;
@@ -367,12 +378,13 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
                        fput(pipe);
                        goto out;
                }
-               sbi->oz_pgrp = task_pgrp_nr(current);
+               swap(sbi->oz_pgrp, new_pid);
                sbi->pipefd = pipefd;
                sbi->pipe = pipe;
                sbi->catatonic = 0;
        }
 out:
+       put_pid(new_pid);
        mutex_unlock(&sbi->wq_mutex);
        return err;
 }
index 3d9d3f5d5dda688bcd9bfc666210553ffa73be0d..394e90b02c5e60783b828a2d39d7698806505930 100644 (file)
@@ -402,6 +402,20 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
                        goto next;
                }
 
+               if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) {
+                       DPRINTK("checking symlink %p %.*s",
+                               dentry, (int)dentry->d_name.len, dentry->d_name.name);
+                       /*
+                        * A symlink can't be "busy" in the usual sense so
+                        * just check last used for expire timeout.
+                        */
+                       if (autofs4_can_expire(dentry, timeout, do_now)) {
+                               expired = dentry;
+                               goto found;
+                       }
+                       goto next;
+               }
+
                if (simple_empty(dentry))
                        goto next;
 
index 3b9cc9b973c25086992cb7e2273f1286a76f084c..d7bd395ab5865d070b547ac31c03a85b58ae6c46 100644 (file)
@@ -56,8 +56,11 @@ void autofs4_kill_sb(struct super_block *sb)
         * just call kill_anon_super when we are called from
         * deactivate_super.
         */
-       if (sbi) /* Free wait queues, close pipe */
+       if (sbi) {
+               /* Free wait queues, close pipe */
                autofs4_catatonic_mode(sbi);
+               put_pid(sbi->oz_pgrp);
+       }
 
        DPRINTK("shutting down");
        kill_litter_super(sb);
@@ -80,7 +83,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
        if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
                seq_printf(m, ",gid=%u",
                        from_kgid_munged(&init_user_ns, root_inode->i_gid));
-       seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+       seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp));
        seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
        seq_printf(m, ",minproto=%d", sbi->min_proto);
        seq_printf(m, ",maxproto=%d", sbi->max_proto);
@@ -124,7 +127,8 @@ static const match_table_t tokens = {
 };
 
 static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
-               pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
+                        int *pgrp, bool *pgrp_set, unsigned int *type,
+                        int *minproto, int *maxproto)
 {
        char *p;
        substring_t args[MAX_OPT_ARGS];
@@ -132,7 +136,6 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
 
        *uid = current_uid();
        *gid = current_gid();
-       *pgrp = task_pgrp_nr(current);
 
        *minproto = AUTOFS_MIN_PROTO_VERSION;
        *maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -171,6 +174,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
                        if (match_int(args, &option))
                                return 1;
                        *pgrp = option;
+                       *pgrp_set = true;
                        break;
                case Opt_minproto:
                        if (match_int(args, &option))
@@ -206,10 +210,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        int pipefd;
        struct autofs_sb_info *sbi;
        struct autofs_info *ino;
+       int pgrp;
+       bool pgrp_set = false;
+       int ret = -EINVAL;
 
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
-               goto fail_unlock;
+               return -ENOMEM;
        DPRINTK("starting up, sbi = %p",sbi);
 
        s->s_fs_info = sbi;
@@ -218,7 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        sbi->pipe = NULL;
        sbi->catatonic = 1;
        sbi->exp_timeout = 0;
-       sbi->oz_pgrp = task_pgrp_nr(current);
+       sbi->oz_pgrp = NULL;
        sbi->sb = s;
        sbi->version = 0;
        sbi->sub_version = 0;
@@ -243,8 +250,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
         * Get the root inode and dentry, but defer checking for errors.
         */
        ino = autofs4_new_ino(sbi);
-       if (!ino)
+       if (!ino) {
+               ret = -ENOMEM;
                goto fail_free;
+       }
        root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
        root = d_make_root(root_inode);
        if (!root)
@@ -255,12 +264,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 
        /* Can this call block? */
        if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
-                               &sbi->oz_pgrp, &sbi->type, &sbi->min_proto,
-                               &sbi->max_proto)) {
+                         &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
+                         &sbi->max_proto)) {
                printk("autofs: called with bogus options\n");
                goto fail_dput;
        }
 
+       if (pgrp_set) {
+               sbi->oz_pgrp = find_get_pid(pgrp);
+               if (!sbi->oz_pgrp) {
+                       pr_warn("autofs: could not find process group %d\n",
+                               pgrp);
+                       goto fail_dput;
+               }
+       } else {
+               sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
+       }
+
        if (autofs_type_trigger(sbi->type))
                __managed_dentry_set_managed(root);
 
@@ -284,14 +304,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
                sbi->version = sbi->max_proto;
        sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
 
-       DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
+       DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp));
        pipe = fget(pipefd);
-       
+
        if (!pipe) {
                printk("autofs: could not open pipe file descriptor\n");
                goto fail_dput;
        }
-       if (autofs_prepare_pipe(pipe) < 0)
+       ret = autofs_prepare_pipe(pipe);
+       if (ret < 0)
                goto fail_fput;
        sbi->pipe = pipe;
        sbi->pipefd = pipefd;
@@ -316,10 +337,10 @@ fail_dput:
 fail_ino:
        kfree(ino);
 fail_free:
+       put_pid(sbi->oz_pgrp);
        kfree(sbi);
        s->s_fs_info = NULL;
-fail_unlock:
-       return -EINVAL;
+       return ret;
 }
 
 struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
index 92ef341ba0cf35c1bd1b5f001b9b39068979f6ff..2caf36ac3e93e41d4f14ac48ef4bcedc4b682548 100644 (file)
@@ -558,7 +558,7 @@ static int autofs4_dir_symlink(struct inode *dir,
        dget(dentry);
        atomic_inc(&ino->count);
        p_ino = autofs4_dentry_ino(dentry->d_parent);
-       if (p_ino && dentry->d_parent != dentry)
+       if (p_ino && !IS_ROOT(dentry))
                atomic_inc(&p_ino->count);
 
        dir->i_mtime = CURRENT_TIME;
@@ -593,7 +593,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 
        if (atomic_dec_and_test(&ino->count)) {
                p_ino = autofs4_dentry_ino(dentry->d_parent);
-               if (p_ino && dentry->d_parent != dentry)
+               if (p_ino && !IS_ROOT(dentry))
                        atomic_dec(&p_ino->count);
        }
        dput(ino->dentry);
@@ -732,7 +732,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
        dget(dentry);
        atomic_inc(&ino->count);
        p_ino = autofs4_dentry_ino(dentry->d_parent);
-       if (p_ino && dentry->d_parent != dentry)
+       if (p_ino && !IS_ROOT(dentry))
                atomic_inc(&p_ino->count);
        inc_nlink(dir);
        dir->i_mtime = CURRENT_TIME;
index f27c094a1919c2af890df6b1f2ac3a9e6a523b4c..1e8ea192be2b13612e9702c4296f57611e50ba09 100644 (file)
 
 static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
+       struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+       struct autofs_info *ino = autofs4_dentry_ino(dentry);
+       if (ino && !autofs4_oz_mode(sbi))
+               ino->last_used = jiffies;
        nd_set_link(nd, dentry->d_inode->i_private);
        return NULL;
 }
index 689e40d983ad64ca3726cd5f8dd64c44d49bb5e6..116fd38ee472c74f243c05428969c2bd1bbe85e2 100644 (file)
@@ -347,11 +347,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
        struct qstr qstr;
        char *name;
        int status, ret, type;
+       pid_t pid;
+       pid_t tgid;
 
        /* In catatonic mode, we don't wait for nobody */
        if (sbi->catatonic)
                return -ENOENT;
 
+       /*
+        * Try translating pids to the namespace of the daemon.
+        *
+        * Zero means failure: we are in an unrelated pid namespace.
+        */
+       pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+       tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+       if (pid == 0 || tgid == 0)
+               return -ENOENT;
+
        if (!dentry->d_inode) {
                /*
                 * A wait for a negative dentry is invalid for certain
@@ -417,8 +429,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
                wq->ino = autofs4_get_ino(sbi);
                wq->uid = current_uid();
                wq->gid = current_gid();
-               wq->pid = current->pid;
-               wq->tgid = current->tgid;
+               wq->pid = pid;
+               wq->tgid = tgid;
                wq->status = -EINTR; /* Status return if interrupted */
                wq->wait_ctr = 2;
 
index 571a423269085177a1d15e932f400ce1673619ca..1a965e654f2ed44608b2afd390bce1df68e36936 100644 (file)
@@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss)
 #define ELF_BASE_PLATFORM NULL
 #endif
 
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+       unsigned char *p = buf;
+
+       while (nbytes) {
+               unsigned int random_variable;
+               size_t chunk = min(nbytes, sizeof(random_variable));
+
+               random_variable = get_random_int();
+               memcpy(p, &random_variable, chunk);
+               p += chunk;
+               nbytes -= chunk;
+       }
+}
+
 static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                unsigned long load_addr, unsigned long interp_load_addr)
@@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
        /*
         * Generate 16 random bytes for userspace PRNG seeding.
         */
-       get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+       get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
        u_rand_bytes = (elf_addr_t __user *)
                       STACK_ALLOC(p, sizeof(k_rand_bytes));
        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
@@ -543,9 +562,6 @@ out:
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-#define INTERPRETER_NONE 0
-#define INTERPRETER_ELF 2
-
 #ifndef STACK_RND_MASK
 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))    /* 8MB of VA */
 #endif
index dc52e13d58e021ac5cf7951d44c66c8e8d9a4993..3881610b64384cee634367c3c35d237a4fa2f2aa 100644 (file)
@@ -680,7 +680,8 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
        struct i2c_msg                  __user *tmsgs;
        struct i2c_msg32                __user *umsgs;
        compat_caddr_t                  datap;
-       int                             nmsgs, i;
+       u32                             nmsgs;
+       int                             i;
 
        if (get_user(nmsgs, &udata->nmsgs))
                return -EFAULT;
index bc3fbcd32558fd61823b126997ace2785d7bac21..e3ad709a4232f414b91fe1ed18989a07d0f7aaa3 100644 (file)
@@ -40,7 +40,6 @@
 
 #include <trace/events/task.h>
 #include "internal.h"
-#include "coredump.h"
 
 #include <trace/events/sched.h>
 
diff --git a/fs/coredump.h b/fs/coredump.h
deleted file mode 100644 (file)
index e39ff07..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _FS_COREDUMP_H
-#define _FS_COREDUMP_H
-
-extern int __get_dumpable(unsigned long mm_flags);
-
-#endif
index 7ea097f6b341f06982f3ea3b068de5755b1605e0..e1529b4c79b1c29b300ab6519a94d7b748069e33 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,7 +62,6 @@
 
 #include <trace/events/task.h>
 #include "internal.h"
-#include "coredump.h"
 
 #include <trace/events/sched.h>
 
@@ -843,7 +842,6 @@ static int exec_mmap(struct mm_struct *mm)
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
        task_unlock(tsk);
-       arch_pick_mmap_layout(mm);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
                BUG_ON(active_mm != old_mm);
@@ -1088,8 +1086,8 @@ int flush_old_exec(struct linux_binprm * bprm)
        bprm->mm = NULL;                /* We're using it now */
 
        set_fs(USER_DS);
-       current->flags &=
-               ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE);
+       current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
+                                       PF_NOFREEZE | PF_NO_SETAFFINITY);
        flush_thread();
        current->personality &= ~bprm->per_clear;
 
@@ -1139,9 +1137,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 
        /* An exec changes our domain. We are no longer part of the thread
           group */
-
        current->self_exec_id++;
-                       
        flush_signal_handlers(current, 0);
        do_close_on_exec(current->files);
 }
@@ -1173,6 +1169,10 @@ void free_bprm(struct linux_binprm *bprm)
                mutex_unlock(&current->signal->cred_guard_mutex);
                abort_creds(bprm->cred);
        }
+       if (bprm->file) {
+               allow_write_access(bprm->file);
+               fput(bprm->file);
+       }
        /* If a binfmt changed the interp, free it. */
        if (bprm->interp != bprm->filename)
                kfree(bprm->interp);
@@ -1224,11 +1224,10 @@ EXPORT_SYMBOL(install_exec_creds);
  * - the caller must hold ->cred_guard_mutex to protect against
  *   PTRACE_ATTACH
  */
-static int check_unsafe_exec(struct linux_binprm *bprm)
+static void check_unsafe_exec(struct linux_binprm *bprm)
 {
        struct task_struct *p = current, *t;
        unsigned n_fs;
-       int res = 0;
 
        if (p->ptrace) {
                if (p->ptrace & PT_PTRACE_CAP)
@@ -1244,31 +1243,25 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
        if (current->no_new_privs)
                bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
 
+       t = p;
        n_fs = 1;
        spin_lock(&p->fs->lock);
        rcu_read_lock();
-       for (t = next_thread(p); t != p; t = next_thread(t)) {
+       while_each_thread(p, t) {
                if (t->fs == p->fs)
                        n_fs++;
        }
        rcu_read_unlock();
 
-       if (p->fs->users > n_fs) {
+       if (p->fs->users > n_fs)
                bprm->unsafe |= LSM_UNSAFE_SHARE;
-       } else {
-               res = -EAGAIN;
-               if (!p->fs->in_exec) {
-                       p->fs->in_exec = 1;
-                       res = 1;
-               }
-       }
+       else
+               p->fs->in_exec = 1;
        spin_unlock(&p->fs->lock);
-
-       return res;
 }
 
-/* 
- * Fill the binprm structure from the inode. 
+/*
+ * Fill the binprm structure from the inode.
  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
  *
  * This may be called multiple times for binary chains (scripts for example).
@@ -1430,14 +1423,7 @@ static int exec_binprm(struct linux_binprm *bprm)
                audit_bprm(bprm);
                trace_sched_process_exec(current, old_pid, bprm);
                ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
-               current->did_exec = 1;
                proc_exec_connector(current);
-
-               if (bprm->file) {
-                       allow_write_access(bprm->file);
-                       fput(bprm->file);
-                       bprm->file = NULL; /* to catch use-after-free */
-               }
        }
 
        return ret;
@@ -1453,7 +1439,6 @@ static int do_execve_common(const char *filename,
        struct linux_binprm *bprm;
        struct file *file;
        struct files_struct *displaced;
-       bool clear_in_exec;
        int retval;
 
        /*
@@ -1485,10 +1470,7 @@ static int do_execve_common(const char *filename,
        if (retval)
                goto out_free;
 
-       retval = check_unsafe_exec(bprm);
-       if (retval < 0)
-               goto out_free;
-       clear_in_exec = retval;
+       check_unsafe_exec(bprm);
        current->in_execve = 1;
 
        file = open_exec(filename);
@@ -1504,7 +1486,7 @@ static int do_execve_common(const char *filename,
 
        retval = bprm_mm_init(bprm);
        if (retval)
-               goto out_file;
+               goto out_unmark;
 
        bprm->argc = count(argv, MAX_ARG_STRINGS);
        if ((retval = bprm->argc) < 0)
@@ -1551,15 +1533,8 @@ out:
                mmput(bprm->mm);
        }
 
-out_file:
-       if (bprm->file) {
-               allow_write_access(bprm->file);
-               fput(bprm->file);
-       }
-
 out_unmark:
-       if (clear_in_exec)
-               current->fs->in_exec = 0;
+       current->fs->in_exec = 0;
        current->in_execve = 0;
 
 out_free:
@@ -1609,67 +1584,22 @@ void set_binfmt(struct linux_binfmt *new)
        if (new)
                __module_get(new->module);
 }
-
 EXPORT_SYMBOL(set_binfmt);
 
 /*
- * set_dumpable converts traditional three-value dumpable to two flags and
- * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
- * these bits are not changed atomically.  So get_dumpable can observe the
- * intermediate state.  To avoid doing unexpected behavior, get get_dumpable
- * return either old dumpable or new one by paying attention to the order of
- * modifying the bits.
- *
- * dumpable |   mm->flags (binary)
- * old  new | initial interim  final
- * ---------+-----------------------
- *  0    1  |   00      01      01
- *  0    2  |   00      10(*)   11
- *  1    0  |   01      00      00
- *  1    2  |   01      11      11
- *  2    0  |   11      10(*)   00
- *  2    1  |   11      11      01
- *
- * (*) get_dumpable regards interim value of 10 as 11.
+ * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
  */
 void set_dumpable(struct mm_struct *mm, int value)
 {
-       switch (value) {
-       case SUID_DUMP_DISABLE:
-               clear_bit(MMF_DUMPABLE, &mm->flags);
-               smp_wmb();
-               clear_bit(MMF_DUMP_SECURELY, &mm->flags);
-               break;
-       case SUID_DUMP_USER:
-               set_bit(MMF_DUMPABLE, &mm->flags);
-               smp_wmb();
-               clear_bit(MMF_DUMP_SECURELY, &mm->flags);
-               break;
-       case SUID_DUMP_ROOT:
-               set_bit(MMF_DUMP_SECURELY, &mm->flags);
-               smp_wmb();
-               set_bit(MMF_DUMPABLE, &mm->flags);
-               break;
-       }
-}
-
-int __get_dumpable(unsigned long mm_flags)
-{
-       int ret;
+       unsigned long old, new;
 
-       ret = mm_flags & MMF_DUMPABLE_MASK;
-       return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
-}
+       if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
+               return;
 
-/*
- * This returns the actual value of the suid_dumpable flag. For things
- * that are using this for checking for privilege transitions, it must
- * test against SUID_DUMP_USER rather than treating it as a boolean
- * value.
- */
-int get_dumpable(struct mm_struct *mm)
-{
-       return __get_dumpable(mm->flags);
+       do {
+               old = ACCESS_ONCE(mm->flags);
+               new = (old & ~MMF_DUMPABLE_MASK) | value;
+       } while (cmpxchg(&mm->flags, old, new) != old);
 }
 
 SYSCALL_DEFINE3(execve,
index bafdd48eefde60a926e8f5478cf97a0ee2e1f598..e66e4808719f1958cec123853cd7224ee103d479 100644 (file)
@@ -309,43 +309,17 @@ struct fname {
  */
 static void free_rb_tree_fname(struct rb_root *root)
 {
-       struct rb_node  *n = root->rb_node;
-       struct rb_node  *parent;
-       struct fname    *fname;
-
-       while (n) {
-               /* Do the node's children first */
-               if (n->rb_left) {
-                       n = n->rb_left;
-                       continue;
-               }
-               if (n->rb_right) {
-                       n = n->rb_right;
-                       continue;
-               }
-               /*
-                * The node has no children; free it, and then zero
-                * out parent's link to it.  Finally go to the
-                * beginning of the loop and try to free the parent
-                * node.
-                */
-               parent = rb_parent(n);
-               fname = rb_entry(n, struct fname, rb_hash);
-               while (fname) {
-                       struct fname * old = fname;
+       struct fname *fname, *next;
+
+       rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
+               do {
+                       struct fname *old = fname;
                        fname = fname->next;
-                       kfree (old);
-               }
-               if (!parent)
-                       *root = RB_ROOT;
-               else if (parent->rb_left == n)
-                       parent->rb_left = NULL;
-               else if (parent->rb_right == n)
-                       parent->rb_right = NULL;
-               n = parent;
-       }
-}
+                       kfree(old);
+               } while (fname);
 
+       *root = RB_ROOT;
+}
 
 static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
                                                           loff_t pos)
index 3f11656bd72e3c4e667bc666320a2f9e84d1bd4f..41eb9dcfac7e3ebc7b8e2099a80c892caabbfb19 100644 (file)
@@ -180,37 +180,12 @@ int ext4_setup_system_zone(struct super_block *sb)
 /* Called when the filesystem is unmounted */
 void ext4_release_system_zone(struct super_block *sb)
 {
-       struct rb_node  *n = EXT4_SB(sb)->system_blks.rb_node;
-       struct rb_node  *parent;
-       struct ext4_system_zone *entry;
+       struct ext4_system_zone *entry, *n;
 
-       while (n) {
-               /* Do the node's children first */
-               if (n->rb_left) {
-                       n = n->rb_left;
-                       continue;
-               }
-               if (n->rb_right) {
-                       n = n->rb_right;
-                       continue;
-               }
-               /*
-                * The node has no children; free it, and then zero
-                * out parent's link to it.  Finally go to the
-                * beginning of the loop and try to free the parent
-                * node.
-                */
-               parent = rb_parent(n);
-               entry = rb_entry(n, struct ext4_system_zone, node);
+       rbtree_postorder_for_each_entry_safe(entry, n,
+                       &EXT4_SB(sb)->system_blks, node)
                kmem_cache_free(ext4_system_zone_cachep, entry);
-               if (!parent)
-                       EXT4_SB(sb)->system_blks = RB_ROOT;
-               else if (parent->rb_left == n)
-                       parent->rb_left = NULL;
-               else if (parent->rb_right == n)
-                       parent->rb_right = NULL;
-               n = parent;
-       }
+
        EXT4_SB(sb)->system_blks = RB_ROOT;
 }
 
index 680bb3388919a444b25e7dc0ee660b92ad475ce2..d638c57e996e6f775143cfac459cc7eccb607f3b 100644 (file)
@@ -353,41 +353,16 @@ struct fname {
  */
 static void free_rb_tree_fname(struct rb_root *root)
 {
-       struct rb_node  *n = root->rb_node;
-       struct rb_node  *parent;
-       struct fname    *fname;
-
-       while (n) {
-               /* Do the node's children first */
-               if (n->rb_left) {
-                       n = n->rb_left;
-                       continue;
-               }
-               if (n->rb_right) {
-                       n = n->rb_right;
-                       continue;
-               }
-               /*
-                * The node has no children; free it, and then zero
-                * out parent's link to it.  Finally go to the
-                * beginning of the loop and try to free the parent
-                * node.
-                */
-               parent = rb_parent(n);
-               fname = rb_entry(n, struct fname, rb_hash);
+       struct fname *fname, *next;
+
+       rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
                while (fname) {
                        struct fname *old = fname;
                        fname = fname->next;
                        kfree(old);
                }
-               if (!parent)
-                       *root = RB_ROOT;
-               else if (parent->rb_left == n)
-                       parent->rb_left = NULL;
-               else if (parent->rb_right == n)
-                       parent->rb_right = NULL;
-               n = parent;
-       }
+
+       *root = RB_ROOT;
 }
 
 
index 91ad9e1c94417a813c1661af830bc2ecbc962033..d22c1a209808850211779f1db68525bf0858b2b4 100644 (file)
@@ -325,19 +325,26 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
 
        last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
        if (sector >= last_block) {
-               if (!create)
-                       return 0;
-
                /*
-                * ->mmu_private can access on only allocation path.
-                * (caller must hold ->i_mutex)
+                * Both ->mmu_private and ->i_disksize can access
+                * on only allocation path. (caller must hold ->i_mutex)
                 */
-               last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+               last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
                        >> blocksize_bits;
+               if (!create) {
+                       /* Map a block in fallocated region */
+                       if (atomic_read(&MSDOS_I(inode)->beyond_isize))
+                               if (sector < last_block)
+                                       goto out_map_cluster;
+
+                       return 0;
+               }
+
                if (sector >= last_block)
                        return 0;
        }
 
+out_map_cluster:
        cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
        offset  = sector & (sbi->sec_per_clus - 1);
        cluster = fat_bmap_cluster(inode, cluster);
index 7c31f4bc74a9d3e2e358baa27688e329b77d9fc5..b8842769b0c55542e27e784f2a7b894a1ddb4c09 100644 (file)
@@ -118,7 +118,8 @@ struct msdos_inode_info {
        unsigned int cache_valid_id;
 
        /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
-       loff_t mmu_private;     /* physically allocated size */
+       loff_t mmu_private;     /* physically allocated size (initialized) */
+       loff_t i_disksize;      /* physically allocated size (uninitialized) */
 
        int i_start;            /* first cluster or 0 */
        int i_logstart;         /* logical first cluster */
@@ -128,6 +129,9 @@ struct msdos_inode_info {
        struct hlist_node i_dir_hash;   /* hash by i_logstart */
        struct rw_semaphore truncate_lock; /* protect bmap against truncate */
        struct inode vfs_inode;
+
+       /* for getting block number beyond file size in case of fallocate */
+       atomic_t beyond_isize;
 };
 
 struct fat_slot_info {
index 9b104f543056238016c683ef822046a784169f50..79db8b6ab3478931e29956730e606021fb9e7e45 100644 (file)
 #include <linux/blkdev.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
+#include <linux/falloc.h>
 #include "fat.h"
 
+static long fat_fallocate(struct file *file, int mode,
+                         loff_t offset, loff_t len);
+
 static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 {
        u32 attr;
@@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = {
 #endif
        .fsync          = fat_file_fsync,
        .splice_read    = generic_file_splice_read,
+       .fallocate      = fat_fallocate,
 };
 
 static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +225,75 @@ out:
        return err;
 }
 
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+                         loff_t offset, loff_t len)
+{
+       int cluster;
+       int nr_cluster; /* Number of clusters to be allocated */
+       loff_t mm_bytes; /* Number of bytes to be allocated for file */
+       struct inode *inode = file->f_mapping->host;
+       struct super_block *sb = inode->i_sb;
+       struct msdos_sb_info *sbi = MSDOS_SB(sb);
+       int err = 0;
+
+       /* No support for hole punch or other fallocate flags. */
+       if (mode & ~FALLOC_FL_KEEP_SIZE)
+               return -EOPNOTSUPP;
+
+       /* No support for dir */
+       if (!S_ISREG(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       mutex_lock(&inode->i_mutex);
+       if ((offset + len) <= MSDOS_I(inode)->i_disksize)
+               goto error;
+
+       err = inode_newsize_ok(inode, (len + offset));
+       if (err)
+               goto error;
+
+       if (mode & FALLOC_FL_KEEP_SIZE) {
+               /* First compute the number of clusters to be allocated */
+               mm_bytes = offset + len - round_up(MSDOS_I(inode)->mmu_private,
+                       sbi->cluster_size);
+               nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+                       sbi->cluster_bits;
+
+               /* Start the allocation.We are not zeroing out the clusters */
+               while (nr_cluster-- > 0) {
+                       err = fat_alloc_clusters(inode, &cluster, 1);
+                       if (err) {
+                               fat_msg(sb, KERN_ERR,
+                                       "fat_fallocate(): fat_alloc_clusters() error");
+                               goto error;
+                       }
+                       err = fat_chain_add(inode, cluster, 1);
+                       if (err) {
+                               fat_free_clusters(inode, cluster);
+                               goto error;
+                       }
+                       MSDOS_I(inode)->i_disksize += sbi->cluster_size;
+               }
+       } else {
+               /* This is just an expanding truncate */
+               err = fat_cont_expand(inode, (offset + len));
+               if (err)
+                       fat_msg(sb, KERN_ERR,
+                               "fat_fallocate(): fat_cont_expand() error");
+       }
+
+error:
+       mutex_unlock(&inode->i_mutex);
+       return err;
+}
+
 /* Free all clusters after the skip'th cluster. */
 static int fat_free(struct inode *inode, int skip)
 {
@@ -300,8 +374,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
         * This protects against truncating a file bigger than it was then
         * trying to write into the hole.
         */
-       if (MSDOS_I(inode)->mmu_private > offset)
+       if (MSDOS_I(inode)->i_disksize > offset) {
                MSDOS_I(inode)->mmu_private = offset;
+               MSDOS_I(inode)->i_disksize = offset;
+       }
 
        nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
 
index 854b578f6695eae6e0ef25d7af4dcdc8546d6044..ba9831d9f648052be657b3be38c1763d095f40b9 100644 (file)
@@ -54,6 +54,25 @@ static int fat_add_cluster(struct inode *inode)
        return err;
 }
 
+static void check_fallocated_region(struct inode *inode, sector_t iblock,
+               unsigned long *max_blocks, struct buffer_head *bh_result)
+{
+       struct super_block *sb = inode->i_sb;
+       sector_t last_block, disk_block;
+       const unsigned long blocksize = sb->s_blocksize;
+       const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+       last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+               >> blocksize_bits;
+       disk_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+               >> blocksize_bits;
+       if (iblock >= last_block && iblock <= disk_block) {
+               MSDOS_I(inode)->mmu_private += *max_blocks << blocksize_bits;
+               set_buffer_new(bh_result);
+       }
+
+}
+
 static inline int __fat_get_block(struct inode *inode, sector_t iblock,
                                  unsigned long *max_blocks,
                                  struct buffer_head *bh_result, int create)
@@ -68,8 +87,11 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
        if (err)
                return err;
        if (phys) {
-               map_bh(bh_result, sb, phys);
                *max_blocks = min(mapped_blocks, *max_blocks);
+               if (create)
+                       check_fallocated_region(inode, iblock, max_blocks,
+                               bh_result);
+               map_bh(bh_result, sb, phys);
                return 0;
        }
        if (!create)
@@ -93,6 +115,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
 
        *max_blocks = min(mapped_blocks, *max_blocks);
        MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+       MSDOS_I(inode)->i_disksize = MSDOS_I(inode)->mmu_private;
 
        err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
        if (err)
@@ -206,6 +229,13 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
                loff_t size = offset + iov_length(iov, nr_segs);
                if (MSDOS_I(inode)->mmu_private < size)
                        return 0;
+
+               /*
+                * In case of writing in fallocated region, return 0 and
+                * fallback to buffered write.
+                */
+               if (MSDOS_I(inode)->i_disksize > MSDOS_I(inode)->mmu_private)
+                       return 0;
        }
 
        /*
@@ -226,7 +256,10 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 
        /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
        down_read(&MSDOS_I(mapping->host)->truncate_lock);
+       /* To get block number beyond file size in fallocated region */
+       atomic_set(&MSDOS_I(mapping->host)->beyond_isize, 1);
        blocknr = generic_block_bmap(mapping, block, fat_get_block);
+       atomic_set(&MSDOS_I(mapping->host)->beyond_isize, 0);
        up_read(&MSDOS_I(mapping->host)->truncate_lock);
 
        return blocknr;
@@ -408,6 +441,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                if (error < 0)
                        return error;
                MSDOS_I(inode)->mmu_private = inode->i_size;
+               MSDOS_I(inode)->i_disksize = inode->i_size;
 
                set_nlink(inode, fat_subdirs(inode));
        } else { /* not a directory */
@@ -423,6 +457,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                inode->i_fop = &fat_file_operations;
                inode->i_mapping->a_ops = &fat_aops;
                MSDOS_I(inode)->mmu_private = inode->i_size;
+               MSDOS_I(inode)->i_disksize = inode->i_size;
        }
        if (de->attr & ATTR_SYS) {
                if (sbi->options.sys_immutable)
@@ -494,6 +529,25 @@ static void fat_evict_inode(struct inode *inode)
        if (!inode->i_nlink) {
                inode->i_size = 0;
                fat_truncate_blocks(inode, 0);
+       } else {
+               /* Release unwritten fallocated blocks on inode eviction. */
+               if (MSDOS_I(inode)->mmu_private < MSDOS_I(inode)->i_disksize) {
+                       int err;
+                       fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+                       /* Fallocate results in updating the i_start/iogstart
+                        * for the zero byte file. So, make it return to
+                        * original state during evict and commit it
+                        * synchrnously to avoid any corruption on the next
+                        * access to the cluster chain for the file.
+                        */
+                       err = fat_sync_inode(inode);
+                       if (err) {
+                               fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+                               "update on disk inode for unused fallocated "
+                               "blocks, inode could be corrupted. Please run "
+                               "fsck");
+                       }
+               }
        }
        invalidate_inode_buffers(inode);
        clear_inode(inode);
@@ -1223,6 +1277,7 @@ static int fat_read_root(struct inode *inode)
                           & ~((loff_t)sbi->cluster_size - 1)) >> 9;
        MSDOS_I(inode)->i_logstart = 0;
        MSDOS_I(inode)->mmu_private = inode->i_size;
+       MSDOS_I(inode)->i_disksize = inode->i_size;
 
        fat_save_attrs(inode, ATTR_DIR);
        inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
index 37213d075f3c5c9f29029b280b093781ddb526ca..3ebda928229cb375486da2a05f15e860b5380fbc 100644 (file)
@@ -178,64 +178,6 @@ const struct dentry_operations hfsplus_dentry_operations = {
        .d_compare    = hfsplus_compare_dentry,
 };
 
-static struct dentry *hfsplus_file_lookup(struct inode *dir,
-               struct dentry *dentry, unsigned int flags)
-{
-       struct hfs_find_data fd;
-       struct super_block *sb = dir->i_sb;
-       struct inode *inode = NULL;
-       struct hfsplus_inode_info *hip;
-       int err;
-
-       if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc"))
-               goto out;
-
-       inode = HFSPLUS_I(dir)->rsrc_inode;
-       if (inode)
-               goto out;
-
-       inode = new_inode(sb);
-       if (!inode)
-               return ERR_PTR(-ENOMEM);
-
-       hip = HFSPLUS_I(inode);
-       inode->i_ino = dir->i_ino;
-       INIT_LIST_HEAD(&hip->open_dir_list);
-       mutex_init(&hip->extents_lock);
-       hip->extent_state = 0;
-       hip->flags = 0;
-       hip->userflags = 0;
-       set_bit(HFSPLUS_I_RSRC, &hip->flags);
-
-       err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-       if (!err) {
-               err = hfsplus_find_cat(sb, dir->i_ino, &fd);
-               if (!err)
-                       err = hfsplus_cat_read_inode(inode, &fd);
-               hfs_find_exit(&fd);
-       }
-       if (err) {
-               iput(inode);
-               return ERR_PTR(err);
-       }
-       hip->rsrc_inode = dir;
-       HFSPLUS_I(dir)->rsrc_inode = inode;
-       igrab(dir);
-
-       /*
-        * __mark_inode_dirty expects inodes to be hashed.  Since we don't
-        * want resource fork inodes in the regular inode space, we make them
-        * appear hashed, but do not put on any lists.  hlist_del()
-        * will work fine and require no locking.
-        */
-       hlist_add_fake(&inode->i_hash);
-
-       mark_inode_dirty(inode);
-out:
-       d_add(dentry, inode);
-       return NULL;
-}
-
 static void hfsplus_get_perms(struct inode *inode,
                struct hfsplus_perm *perms, int dir)
 {
@@ -385,7 +327,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
 }
 
 static const struct inode_operations hfsplus_file_inode_operations = {
-       .lookup         = hfsplus_file_lookup,
        .setattr        = hfsplus_setattr,
        .setxattr       = generic_setxattr,
        .getxattr       = generic_getxattr,
index 09b3ed45572475feb68b117fcf5b420a1a5e4d8f..2b91675ffcab01cf70deba55906b9dcdad1a443f 100644 (file)
@@ -456,12 +456,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
           The umask is only applied if there's no default ACL */
        ret = jffs2_init_acl_pre(dir_i, inode, &mode);
        if (ret) {
-           make_bad_inode(inode);
-           iput(inode);
-           return ERR_PTR(ret);
+               mutex_unlock(&f->sem);
+               make_bad_inode(inode);
+               iput(inode);
+               return ERR_PTR(ret);
        }
        ret = jffs2_do_new_inode (c, f, mode, ri);
        if (ret) {
+               mutex_unlock(&f->sem);
                make_bad_inode(inode);
                iput(inode);
                return ERR_PTR(ret);
@@ -478,6 +480,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
        inode->i_size = 0;
 
        if (insert_inode_locked(inode) < 0) {
+               mutex_unlock(&f->sem);
                make_bad_inode(inode);
                iput(inode);
                return ERR_PTR(-EINVAL);
index 975a1f562c10de31f3859b1dded82d134f4c2d0e..9a5449bc3afb0b3b784bfb04c69b011219f013d2 100644 (file)
@@ -564,25 +564,10 @@ struct jffs2_node_frag *jffs2_lookup_node_frag(struct rb_root *fragtree, uint32_
    they're killed. */
 void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c)
 {
-       struct jffs2_node_frag *frag;
-       struct jffs2_node_frag *parent;
-
-       if (!root->rb_node)
-               return;
+       struct jffs2_node_frag *frag, *next;
 
        dbg_fragtree("killing\n");
-
-       frag = (rb_entry(root->rb_node, struct jffs2_node_frag, rb));
-       while(frag) {
-               if (frag->rb.rb_left) {
-                       frag = frag_left(frag);
-                       continue;
-               }
-               if (frag->rb.rb_right) {
-                       frag = frag_right(frag);
-                       continue;
-               }
-
+       rbtree_postorder_for_each_entry_safe(frag, next, root, rb) {
                if (frag->node && !(--frag->node->frags)) {
                        /* Not a hole, and it's the final remaining frag
                           of this node. Free the node */
@@ -591,17 +576,8 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c)
 
                        jffs2_free_full_dnode(frag->node);
                }
-               parent = frag_parent(frag);
-               if (parent) {
-                       if (frag_left(parent) == frag)
-                               parent->rb.rb_left = NULL;
-                       else
-                               parent->rb.rb_right = NULL;
-               }
 
                jffs2_free_node_frag(frag);
-               frag = parent;
-
                cond_resched();
        }
 }
index ae81b01e6fd7ebe8291cfcba8b419b245a6a2310..386303dca382e636fa9aab5d547f343e2e5058a1 100644 (file)
@@ -543,33 +543,13 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c,
 
 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
 {
-       struct rb_node *this;
-       struct jffs2_tmp_dnode_info *tn;
-
-       this = list->rb_node;
+       struct jffs2_tmp_dnode_info *tn, *next;
 
-       /* Now at bottom of tree */
-       while (this) {
-               if (this->rb_left)
-                       this = this->rb_left;
-               else if (this->rb_right)
-                       this = this->rb_right;
-               else {
-                       tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
+       rbtree_postorder_for_each_entry_safe(tn, next, list, rb) {
                        jffs2_free_full_dnode(tn->fn);
                        jffs2_free_tmp_dnode_info(tn);
-
-                       this = rb_parent(this);
-                       if (!this)
-                               break;
-
-                       if (this->rb_left == &tn->rb)
-                               this->rb_left = NULL;
-                       else if (this->rb_right == &tn->rb)
-                               this->rb_right = NULL;
-                       else BUG();
-               }
        }
+
        *list = RB_ROOT;
 }
 
index d448a777166b71bc21df131c0c32a462d97b5efe..7f9b096d8d572e0753ee84d060c4018fa6978a40 100644 (file)
@@ -62,7 +62,8 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
                page = read_cache_page(mapping, index, filler, sb);
        else {
                page = find_or_create_page(mapping, index, GFP_NOFS);
-               unlock_page(page);
+               if (page)
+                       unlock_page(page);
        }
        return page;
 }
index b44bdb291b8435000055b6c7321acc57e3a026f3..2b34021948e4d905f872e6c749614ee9d5500bae 100644 (file)
 #include "sufile.h"
 #include "dat.h"
 
-
+/**
+ * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @dir: set of direction flags
+ * @dofunc: concrete function of get/set metadata info
+ *
+ * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of
+ * calling dofunc() function on the basis of @argv argument.
+ *
+ * Return Value: On success, 0 is returned and requested metadata info
+ * is copied into userspace. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EINVAL - Invalid arguments from userspace.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during execution of requested operation.
+ */
 static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
                                 struct nilfs_argv *argv, int dir,
                                 ssize_t (*dofunc)(struct the_nilfs *,
@@ -57,6 +76,14 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
        if (argv->v_size > PAGE_SIZE)
                return -EINVAL;
 
+       /*
+        * Reject pairs of a start item position (argv->v_index) and a
+        * total count (argv->v_nmembs) which leads position 'pos' to
+        * overflow by the increment at the end of the loop.
+        */
+       if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
+               return -EINVAL;
+
        buf = (void *)__get_free_pages(GFP_NOFS, 0);
        if (unlikely(!buf))
                return -ENOMEM;
@@ -99,6 +126,9 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_getflags - ioctl to support lsattr
+ */
 static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
 {
        unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE;
@@ -106,6 +136,9 @@ static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
        return put_user(flags, (int __user *)argp);
 }
 
+/**
+ * nilfs_ioctl_setflags - ioctl to support chattr
+ */
 static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
                                void __user *argp)
 {
@@ -158,11 +191,33 @@ out:
        return ret;
 }
 
+/**
+ * nilfs_ioctl_getversion - get info about a file's version (generation number)
+ */
 static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
 {
        return put_user(inode->i_generation, (int __user *)argp);
 }
 
+/**
+ * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot)
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_change_cpmode() function changes mode of
+ * given checkpoint between checkpoint and snapshot state. This ioctl
+ * is used in chcp and mkcp utilities.
+ *
+ * Return Value: On success, 0 is returned and mode of a checkpoint is
+ * changed. On error, one of the following negative error codes
+ * is returned.
+ *
+ * %-EPERM - Operation not permitted.
+ *
+ * %-EFAULT - Failure during checkpoint mode changing.
+ */
 static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
                                     unsigned int cmd, void __user *argp)
 {
@@ -198,6 +253,25 @@ out:
        return ret;
 }
 
+/**
+ * nilfs_ioctl_delete_checkpoint - remove checkpoint
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_delete_checkpoint() function removes
+ * checkpoint from NILFS2 file system. This ioctl is used in rmcp
+ * utility.
+ *
+ * Return Value: On success, 0 is returned and a checkpoint is
+ * removed. On error, one of the following negative error codes
+ * is returned.
+ *
+ * %-EPERM - Operation not permitted.
+ *
+ * %-EFAULT - Failure during checkpoint removing.
+ */
 static int
 nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
                              unsigned int cmd, void __user *argp)
@@ -229,6 +303,21 @@ out:
        return ret;
 }
 
+/**
+ * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints
+ * @nilfs: nilfs object
+ * @posp: pointer on array of checkpoint's numbers
+ * @flags: checkpoint mode (checkpoint or snapshot)
+ * @buf: buffer for storing checkponts' info
+ * @size: size in bytes of one checkpoint info item in array
+ * @nmembs: number of checkpoints in array (numbers and infos)
+ *
+ * Description: nilfs_ioctl_do_get_cpinfo() function returns info about
+ * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in
+ * lscp utility and by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_cpinfo structures in output buffer.
+ */
 static ssize_t
 nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
                          void *buf, size_t size, size_t nmembs)
@@ -242,6 +331,27 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_get_cpstat - get checkpoints statistics
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints.
+ * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities
+ * and by nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned, and checkpoints information is
+ * copied into userspace pointer @argp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during getting checkpoints statistics.
+ */
 static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
                                  unsigned int cmd, void __user *argp)
 {
@@ -260,6 +370,21 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info
+ * @nilfs: nilfs object
+ * @posp: pointer on array of segment numbers
+ * @flags: *not used*
+ * @buf: buffer for storing suinfo array
+ * @size: size in bytes of one suinfo item in array
+ * @nmembs: count of segment numbers and suinfos in array
+ *
+ * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage
+ * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used
+ * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_suinfo structures in output buffer.
+ */
 static ssize_t
 nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
                          void *buf, size_t size, size_t nmembs)
@@ -273,6 +398,27 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_get_sustat - get segment usage statistics
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_get_sustat() returns segment usage statistics.
+ * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities
+ * and by nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned, and segment usage information is
+ * copied into userspace pointer @argp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during getting segment usage statistics.
+ */
 static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
                                  unsigned int cmd, void __user *argp)
 {
@@ -291,6 +437,21 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info
+ * @nilfs: nilfs object
+ * @posp: *not used*
+ * @flags: *not used*
+ * @buf: buffer for storing array of nilfs_vinfo structures
+ * @size: size in bytes of one vinfo item in array
+ * @nmembs: count of vinfos in array
+ *
+ * Description: nilfs_ioctl_do_get_vinfo() function returns information
+ * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used
+ * by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_vinfo structures in output buffer.
+ */
 static ssize_t
 nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
                         void *buf, size_t size, size_t nmembs)
@@ -303,6 +464,21 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors
+ * @nilfs: nilfs object
+ * @posp: *not used*
+ * @flags: *not used*
+ * @buf: buffer for storing array of nilfs_bdesc structures
+ * @size: size in bytes of one bdesc item in array
+ * @nmembs: count of bdescs in array
+ *
+ * Description: nilfs_ioctl_do_get_bdescs() function returns information
+ * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
+ * is used by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_bdescs structures in output buffer.
+ */
 static ssize_t
 nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
                          void *buf, size_t size, size_t nmembs)
@@ -329,6 +505,29 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
        return nmembs;
 }
 
+/**
+ * nilfs_ioctl_get_bdescs - get disk block descriptors
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_do_get_bdescs() function returns information
+ * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
+ * is used by nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned, and disk block descriptors are
+ * copied into userspace pointer @argp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EINVAL - Invalid arguments from userspace.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during getting disk block descriptors.
+ */
 static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
                                  unsigned int cmd, void __user *argp)
 {
@@ -352,6 +551,26 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC
+ * @inode: inode object
+ * @vdesc: descriptor of virtual block number
+ * @buffers: list of moving buffers
+ *
+ * Description: nilfs_ioctl_move_inode_block() function registers data/node
+ * buffer in the GC pagecache and submit read request.
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - Requested block doesn't exist.
+ *
+ * %-EEXIST - Blocks conflict is detected.
+ */
 static int nilfs_ioctl_move_inode_block(struct inode *inode,
                                        struct nilfs_vdesc *vdesc,
                                        struct list_head *buffers)
@@ -397,6 +616,19 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
        return 0;
 }
 
+/**
+ * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection
+ * @sb: superblock object
+ * @argv: vector of arguments from userspace
+ * @buf: array of nilfs_vdesc structures
+ *
+ * Description: nilfs_ioctl_move_blocks() function reads valid data/node
+ * blocks that garbage collector specified with the array of nilfs_vdesc
+ * structures and stores them into page caches of GC inodes.
+ *
+ * Return Value: Number of processed nilfs_vdesc structures or
+ * error code, otherwise.
+ */
 static int nilfs_ioctl_move_blocks(struct super_block *sb,
                                   struct nilfs_argv *argv, void *buf)
 {
@@ -462,6 +694,25 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_delete_checkpoints - delete checkpoints
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of periods of checkpoints numbers
+ *
+ * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints
+ * in the period from p_start to p_end, excluding p_end itself. The checkpoints
+ * which have been already deleted are ignored.
+ *
+ * Return Value: Number of processed nilfs_period structures or
+ * error code, otherwise.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - invalid checkpoints.
+ */
 static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
                                          struct nilfs_argv *argv, void *buf)
 {
@@ -479,6 +730,24 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
        return nmembs;
 }
 
+/**
+ * nilfs_ioctl_free_vblocknrs - free virtual block numbers
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of virtual block numbers
+ *
+ * Description: nilfs_ioctl_free_vblocknrs() function frees
+ * the virtual block numbers specified by @buf and @argv->v_nmembs.
+ *
+ * Return Value: Number of processed virtual block numbers or
+ * error code, otherwise.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - The virtual block number have not been allocated.
+ */
 static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
                                      struct nilfs_argv *argv, void *buf)
 {
@@ -490,6 +759,24 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
        return (ret < 0) ? ret : nmembs;
 }
 
+/**
+ * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of block descriptors
+ *
+ * Description: nilfs_ioctl_mark_blocks_dirty() function marks
+ * metadata file or data blocks as dirty.
+ *
+ * Return Value: Number of processed block descriptors or
+ * error code, otherwise.
+ *
+ * %-ENOMEM - Insufficient memory available.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOENT - the specified block does not exist (hole block)
+ */
 static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
                                         struct nilfs_argv *argv, void *buf)
 {
@@ -571,6 +858,20 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
        return ret;
 }
 
+/**
+ * nilfs_ioctl_clean_segments - clean segments
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_clean_segments() function makes garbage
+ * collection operation in the environment of requested parameters
+ * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by
+ * nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned or error code, otherwise.
+ */
 static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
                                      unsigned int cmd, void __user *argp)
 {
@@ -682,6 +983,33 @@ out:
        return ret;
 }
 
+/**
+ * nilfs_ioctl_sync - make a checkpoint
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_sync() function constructs a logical segment
+ * for checkpointing.  This function guarantees that all modified data
+ * and metadata are written out to the device when it successfully
+ * returned.
+ *
+ * Return Value: On success, 0 is retured. On errors, one of the following
+ * negative error code is returned.
+ *
+ * %-EROFS - Read only filesystem.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOSPC - No space left on device (only in a panic state).
+ *
+ * %-ERESTARTSYS - Interrupted.
+ *
+ * %-ENOMEM - Insufficient memory available.
+ *
+ * %-EFAULT - Failure during execution of requested operation.
+ */
 static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
                            unsigned int cmd, void __user *argp)
 {
@@ -710,6 +1038,14 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
        return 0;
 }
 
+/**
+ * nilfs_ioctl_resize - resize NILFS2 volume
+ * @inode: inode object
+ * @filp: file object
+ * @argp: pointer on argument from userspace
+ *
+ * Return Value: On success, 0 is returned or error code, otherwise.
+ */
 static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
                              void __user *argp)
 {
@@ -735,6 +1071,17 @@ out:
        return ret;
 }
 
+/**
+ * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated
+ * @inode: inode object
+ * @argp: pointer on argument from userspace
+ *
+ * Decription: nilfs_ioctl_set_alloc_range() function defines lower limit
+ * of segments in bytes and upper limit of segments in bytes.
+ * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility.
+ *
+ * Return Value: On success, 0 is returned or error code, otherwise.
+ */
 static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
 {
        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
@@ -767,6 +1114,28 @@ out:
        return ret;
 }
 
+/**
+ * nilfs_ioctl_get_info - wrapping function of get metadata info
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ * @membsz: size of an item in bytes
+ * @dofunc: concrete function of getting metadata info
+ *
+ * Description: nilfs_ioctl_get_info() gets metadata info by means of
+ * calling dofunc() function.
+ *
+ * Return Value: On success, 0 is returned and requested metadata info
+ * is copied into userspace. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EINVAL - Invalid arguments from userspace.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during execution of requested operation.
+ */
 static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
                                unsigned int cmd, void __user *argp,
                                size_t membsz,
index 1fedd5f7ccc4ed24398d19eda55a26380462095c..0b9ff4395e6ac320f6108f7c3cabb2d57d5a426f 100644 (file)
@@ -82,20 +82,23 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
  * events.
  */
 static int dnotify_handle_event(struct fsnotify_group *group,
+                               struct inode *inode,
                                struct fsnotify_mark *inode_mark,
                                struct fsnotify_mark *vfsmount_mark,
-                               struct fsnotify_event *event)
+                               u32 mask, void *data, int data_type,
+                               const unsigned char *file_name)
 {
        struct dnotify_mark *dn_mark;
-       struct inode *to_tell;
        struct dnotify_struct *dn;
        struct dnotify_struct **prev;
        struct fown_struct *fown;
-       __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
+       __u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
 
-       BUG_ON(vfsmount_mark);
+       /* not a dir, dnotify doesn't care */
+       if (!S_ISDIR(inode->i_mode))
+               return 0;
 
-       to_tell = event->to_tell;
+       BUG_ON(vfsmount_mark);
 
        dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
 
@@ -122,23 +125,6 @@ static int dnotify_handle_event(struct fsnotify_group *group,
        return 0;
 }
 
-/*
- * Given an inode and mask determine if dnotify would be interested in sending
- * userspace notification for that pair.
- */
-static bool dnotify_should_send_event(struct fsnotify_group *group,
-                                     struct inode *inode,
-                                     struct fsnotify_mark *inode_mark,
-                                     struct fsnotify_mark *vfsmount_mark,
-                                     __u32 mask, void *data, int data_type)
-{
-       /* not a dir, dnotify doesn't care */
-       if (!S_ISDIR(inode->i_mode))
-               return false;
-
-       return true;
-}
-
 static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
        struct dnotify_mark *dn_mark = container_of(fsn_mark,
@@ -152,10 +138,6 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
 
 static struct fsnotify_ops dnotify_fsnotify_ops = {
        .handle_event = dnotify_handle_event,
-       .should_send_event = dnotify_should_send_event,
-       .free_group_priv = NULL,
-       .freeing_mark = NULL,
-       .free_event_priv = NULL,
 };
 
 /*
index 0c2f9122b262da54392c8ed90a5a9d94ea22719c..58772623f02a90e7e7c4355921ba0c8457024175 100644 (file)
@@ -9,31 +9,27 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 
-static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
+#include "fanotify.h"
+
+static bool should_merge(struct fsnotify_event *old_fsn,
+                        struct fsnotify_event *new_fsn)
 {
-       pr_debug("%s: old=%p new=%p\n", __func__, old, new);
+       struct fanotify_event_info *old, *new;
 
-       if (old->to_tell == new->to_tell &&
-           old->data_type == new->data_type &&
-           old->tgid == new->tgid) {
-               switch (old->data_type) {
-               case (FSNOTIFY_EVENT_PATH):
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
-                       /* dont merge two permission events */
-                       if ((old->mask & FAN_ALL_PERM_EVENTS) &&
-                           (new->mask & FAN_ALL_PERM_EVENTS))
-                               return false;
+       /* dont merge two permission events */
+       if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) &&
+           (new_fsn->mask & FAN_ALL_PERM_EVENTS))
+               return false;
 #endif
-                       if ((old->path.mnt == new->path.mnt) &&
-                           (old->path.dentry == new->path.dentry))
-                               return true;
-                       break;
-               case (FSNOTIFY_EVENT_NONE):
-                       return true;
-               default:
-                       BUG();
-               };
-       }
+       pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
+       old = FANOTIFY_E(old_fsn);
+       new = FANOTIFY_E(new_fsn);
+
+       if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
+           old->path.mnt == new->path.mnt &&
+           old->path.dentry == new->path.dentry)
+               return true;
        return false;
 }
 
@@ -41,59 +37,28 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 static struct fsnotify_event *fanotify_merge(struct list_head *list,
                                             struct fsnotify_event *event)
 {
-       struct fsnotify_event_holder *test_holder;
-       struct fsnotify_event *test_event = NULL;
-       struct fsnotify_event *new_event;
+       struct fsnotify_event *test_event;
+       bool do_merge = false;
 
        pr_debug("%s: list=%p event=%p\n", __func__, list, event);
 
-
-       list_for_each_entry_reverse(test_holder, list, event_list) {
-               if (should_merge(test_holder->event, event)) {
-                       test_event = test_holder->event;
+       list_for_each_entry_reverse(test_event, list, list) {
+               if (should_merge(test_event, event)) {
+                       do_merge = true;
                        break;
                }
        }
 
-       if (!test_event)
+       if (!do_merge)
                return NULL;
 
-       fsnotify_get_event(test_event);
-
-       /* if they are exactly the same we are done */
-       if (test_event->mask == event->mask)
-               return test_event;
-
-       /*
-        * if the refcnt == 2 this is the only queue
-        * for this event and so we can update the mask
-        * in place.
-        */
-       if (atomic_read(&test_event->refcnt) == 2) {
-               test_event->mask |= event->mask;
-               return test_event;
-       }
-
-       new_event = fsnotify_clone_event(test_event);
-
-       /* done with test_event */
-       fsnotify_put_event(test_event);
-
-       /* couldn't allocate memory, merge was not possible */
-       if (unlikely(!new_event))
-               return ERR_PTR(-ENOMEM);
-
-       /* build new event and replace it on the list */
-       new_event->mask = (test_event->mask | event->mask);
-       fsnotify_replace_event(test_holder, new_event);
-
-       /* we hold a reference on new_event from clone_event */
-       return new_event;
+       test_event->mask |= event->mask;
+       return test_event;
 }
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 static int fanotify_get_response_from_access(struct fsnotify_group *group,
-                                            struct fsnotify_event *event)
+                                            struct fanotify_event_info *event)
 {
        int ret;
 
@@ -106,7 +71,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
                return 0;
 
        /* userspace responded, convert to something usable */
-       spin_lock(&event->lock);
        switch (event->response) {
        case FAN_ALLOW:
                ret = 0;
@@ -116,7 +80,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
                ret = -EPERM;
        }
        event->response = 0;
-       spin_unlock(&event->lock);
 
        pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
                 group, event, ret);
@@ -125,58 +88,17 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 }
 #endif
 
-static int fanotify_handle_event(struct fsnotify_group *group,
-                                struct fsnotify_mark *inode_mark,
-                                struct fsnotify_mark *fanotify_mark,
-                                struct fsnotify_event *event)
-{
-       int ret = 0;
-       struct fsnotify_event *notify_event = NULL;
-
-       BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
-       BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
-       BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
-       BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
-       BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
-       BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
-       BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
-       BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
-       BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
-       BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
-
-       pr_debug("%s: group=%p event=%p\n", __func__, group, event);
-
-       notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
-       if (IS_ERR(notify_event))
-               return PTR_ERR(notify_event);
-
-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
-       if (event->mask & FAN_ALL_PERM_EVENTS) {
-               /* if we merged we need to wait on the new event */
-               if (notify_event)
-                       event = notify_event;
-               ret = fanotify_get_response_from_access(group, event);
-       }
-#endif
-
-       if (notify_event)
-               fsnotify_put_event(notify_event);
-
-       return ret;
-}
-
-static bool fanotify_should_send_event(struct fsnotify_group *group,
-                                      struct inode *to_tell,
-                                      struct fsnotify_mark *inode_mark,
+static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
                                       struct fsnotify_mark *vfsmnt_mark,
-                                      __u32 event_mask, void *data, int data_type)
+                                      u32 event_mask,
+                                      void *data, int data_type)
 {
        __u32 marks_mask, marks_ignored_mask;
        struct path *path = data;
 
-       pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
-                "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
-                inode_mark, vfsmnt_mark, event_mask, data, data_type);
+       pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
+                " data_type=%d\n", __func__, inode_mark, vfsmnt_mark,
+                event_mask, data, data_type);
 
        /* if we don't have enough info to send an event to userspace say no */
        if (data_type != FSNOTIFY_EVENT_PATH)
@@ -217,6 +139,74 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
        return false;
 }
 
+static int fanotify_handle_event(struct fsnotify_group *group,
+                                struct inode *inode,
+                                struct fsnotify_mark *inode_mark,
+                                struct fsnotify_mark *fanotify_mark,
+                                u32 mask, void *data, int data_type,
+                                const unsigned char *file_name)
+{
+       int ret = 0;
+       struct fanotify_event_info *event;
+       struct fsnotify_event *fsn_event;
+       struct fsnotify_event *notify_fsn_event;
+
+       BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
+       BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+       BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+       BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
+       BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+       BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
+       BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
+       BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
+       BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+       BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+
+       if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data,
+                                       data_type))
+               return 0;
+
+       pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
+                mask);
+
+       event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
+       if (unlikely(!event))
+               return -ENOMEM;
+
+       fsn_event = &event->fse;
+       fsnotify_init_event(fsn_event, inode, mask);
+       event->tgid = get_pid(task_tgid(current));
+       if (data_type == FSNOTIFY_EVENT_PATH) {
+               struct path *path = data;
+               event->path = *path;
+               path_get(&event->path);
+       } else {
+               event->path.mnt = NULL;
+               event->path.dentry = NULL;
+       }
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+       event->response = 0;
+#endif
+
+       notify_fsn_event = fsnotify_add_notify_event(group, fsn_event,
+                                                    fanotify_merge);
+       if (notify_fsn_event) {
+               /* Our event wasn't used in the end. Free it. */
+               fsnotify_destroy_event(group, fsn_event);
+               if (IS_ERR(notify_fsn_event))
+                       return PTR_ERR(notify_fsn_event);
+               /* We need to ask about a different events after a merge... */
+               event = FANOTIFY_E(notify_fsn_event);
+               fsn_event = notify_fsn_event;
+       }
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+       if (fsn_event->mask & FAN_ALL_PERM_EVENTS)
+               ret = fanotify_get_response_from_access(group, event);
+#endif
+       return ret;
+}
+
 static void fanotify_free_group_priv(struct fsnotify_group *group)
 {
        struct user_struct *user;
@@ -226,10 +216,18 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
        free_uid(user);
 }
 
+static void fanotify_free_event(struct fsnotify_event *fsn_event)
+{
+       struct fanotify_event_info *event;
+
+       event = FANOTIFY_E(fsn_event);
+       path_put(&event->path);
+       put_pid(event->tgid);
+       kmem_cache_free(fanotify_event_cachep, event);
+}
+
 const struct fsnotify_ops fanotify_fsnotify_ops = {
        .handle_event = fanotify_handle_event,
-       .should_send_event = fanotify_should_send_event,
        .free_group_priv = fanotify_free_group_priv,
-       .free_event_priv = NULL,
-       .freeing_mark = NULL,
+       .free_event = fanotify_free_event,
 };
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
new file mode 100644 (file)
index 0000000..0e90174
--- /dev/null
@@ -0,0 +1,23 @@
+#include <linux/fsnotify_backend.h>
+#include <linux/path.h>
+#include <linux/slab.h>
+
+extern struct kmem_cache *fanotify_event_cachep;
+
+struct fanotify_event_info {
+       struct fsnotify_event fse;
+       /*
+        * We hold ref to this path so it may be dereferenced at any point
+        * during this object's lifetime
+        */
+       struct path path;
+       struct pid *tgid;
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+       u32 response;   /* userspace answer to question */
+#endif
+};
+
+static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
+{
+       return container_of(fse, struct fanotify_event_info, fse);
+}
index e44cb6427df35d52e0b9cd738ec09a4a2571d424..57d7c083cb4b89c9c49858680a05e69c36b7ef09 100644 (file)
@@ -19,6 +19,7 @@
 
 #include "../../mount.h"
 #include "../fdinfo.h"
+#include "fanotify.h"
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS    16384
 #define FANOTIFY_DEFAULT_MAX_MARKS     8192
@@ -28,11 +29,12 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
 static struct kmem_cache *fanotify_response_event_cache __read_mostly;
+struct kmem_cache *fanotify_event_cachep __read_mostly;
 
 struct fanotify_response_event {
        struct list_head list;
        __s32 fd;
-       struct fsnotify_event *event;
+       struct fanotify_event_info *event;
 };
 
 /*
@@ -61,8 +63,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 }
 
 static int create_fd(struct fsnotify_group *group,
-                       struct fsnotify_event *event,
-                       struct file **file)
+                    struct fanotify_event_info *event,
+                    struct file **file)
 {
        int client_fd;
        struct file *new_file;
@@ -73,12 +75,6 @@ static int create_fd(struct fsnotify_group *group,
        if (client_fd < 0)
                return client_fd;
 
-       if (event->data_type != FSNOTIFY_EVENT_PATH) {
-               WARN_ON(1);
-               put_unused_fd(client_fd);
-               return -EINVAL;
-       }
-
        /*
         * we need a new file handle for the userspace program so it can read even if it was
         * originally opened O_WRONLY.
@@ -109,23 +105,25 @@ static int create_fd(struct fsnotify_group *group,
 }
 
 static int fill_event_metadata(struct fsnotify_group *group,
-                                  struct fanotify_event_metadata *metadata,
-                                  struct fsnotify_event *event,
-                                  struct file **file)
+                              struct fanotify_event_metadata *metadata,
+                              struct fsnotify_event *fsn_event,
+                              struct file **file)
 {
        int ret = 0;
+       struct fanotify_event_info *event;
 
        pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
-                group, metadata, event);
+                group, metadata, fsn_event);
 
        *file = NULL;
+       event = container_of(fsn_event, struct fanotify_event_info, fse);
        metadata->event_len = FAN_EVENT_METADATA_LEN;
        metadata->metadata_len = FAN_EVENT_METADATA_LEN;
        metadata->vers = FANOTIFY_METADATA_VERSION;
        metadata->reserved = 0;
-       metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
+       metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
        metadata->pid = pid_vnr(event->tgid);
-       if (unlikely(event->mask & FAN_Q_OVERFLOW))
+       if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
                metadata->fd = FAN_NOFD;
        else {
                metadata->fd = create_fd(group, event, file);
@@ -209,7 +207,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
        if (!re)
                return -ENOMEM;
 
-       re->event = event;
+       re->event = FANOTIFY_E(event);
        re->fd = fd;
 
        mutex_lock(&group->fanotify_data.access_mutex);
@@ -217,7 +215,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
        if (atomic_read(&group->fanotify_data.bypass_perm)) {
                mutex_unlock(&group->fanotify_data.access_mutex);
                kmem_cache_free(fanotify_response_event_cache, re);
-               event->response = FAN_ALLOW;
+               FANOTIFY_E(event)->response = FAN_ALLOW;
                return 0;
        }
                
@@ -273,7 +271,7 @@ out_close_fd:
 out:
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        if (event->mask & FAN_ALL_PERM_EVENTS) {
-               event->response = FAN_DENY;
+               FANOTIFY_E(event)->response = FAN_DENY;
                wake_up(&group->fanotify_data.access_waitq);
        }
 #endif
@@ -321,7 +319,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
                        if (IS_ERR(kevent))
                                break;
                        ret = copy_event_to_user(group, kevent, buf);
-                       fsnotify_put_event(kevent);
+                       fsnotify_destroy_event(group, kevent);
                        if (ret < 0)
                                break;
                        buf += ret;
@@ -409,7 +407,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct fsnotify_group *group;
-       struct fsnotify_event_holder *holder;
+       struct fsnotify_event *fsn_event;
        void __user *p;
        int ret = -ENOTTY;
        size_t send_len = 0;
@@ -421,7 +419,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
        switch (cmd) {
        case FIONREAD:
                mutex_lock(&group->notification_mutex);
-               list_for_each_entry(holder, &group->notification_list, event_list)
+               list_for_each_entry(fsn_event, &group->notification_list, list)
                        send_len += FAN_EVENT_METADATA_LEN;
                mutex_unlock(&group->notification_mutex);
                ret = put_user(send_len, (int __user *) p);
@@ -906,6 +904,7 @@ static int __init fanotify_user_setup(void)
        fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
        fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
                                                   SLAB_PANIC);
+       fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
 
        return 0;
 }
index 4bb21d67d9b1b1acde17f4031ddba4d429090e49..1d4e1ea2f37ca4995db3f16db9bd00bbf862ca93 100644 (file)
@@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell,
                         struct fsnotify_mark *vfsmount_mark,
                         __u32 mask, void *data,
                         int data_is, u32 cookie,
-                        const unsigned char *file_name,
-                        struct fsnotify_event **event)
+                        const unsigned char *file_name)
 {
        struct fsnotify_group *group = NULL;
        __u32 inode_test_mask = 0;
@@ -170,27 +169,17 @@ static int send_to_group(struct inode *to_tell,
 
        pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
                 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
-                " data=%p data_is=%d cookie=%d event=%p\n",
+                " data=%p data_is=%d cookie=%d\n",
                 __func__, group, to_tell, mask, inode_mark,
                 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
-                data_is, cookie, *event);
+                data_is, cookie);
 
        if (!inode_test_mask && !vfsmount_test_mask)
                return 0;
 
-       if (group->ops->should_send_event(group, to_tell, inode_mark,
-                                         vfsmount_mark, mask, data,
-                                         data_is) == false)
-               return 0;
-
-       if (!*event) {
-               *event = fsnotify_create_event(to_tell, mask, data,
-                                               data_is, file_name,
-                                               cookie, GFP_KERNEL);
-               if (!*event)
-                       return -ENOMEM;
-       }
-       return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
+       return group->ops->handle_event(group, to_tell, inode_mark,
+                                       vfsmount_mark, mask, data, data_is,
+                                       file_name);
 }
 
 /*
@@ -205,7 +194,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
        struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
        struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
        struct fsnotify_group *inode_group, *vfsmount_group;
-       struct fsnotify_event *event = NULL;
        struct mount *mnt;
        int idx, ret = 0;
        /* global tests shouldn't care about events on child only the specific event */
@@ -258,18 +246,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
                if (inode_group > vfsmount_group) {
                        /* handle inode */
-                       ret = send_to_group(to_tell, inode_mark, NULL, mask, data,
-                                           data_is, cookie, file_name, &event);
+                       ret = send_to_group(to_tell, inode_mark, NULL, mask,
+                                           data, data_is, cookie, file_name);
                        /* we didn't use the vfsmount_mark */
                        vfsmount_group = NULL;
                } else if (vfsmount_group > inode_group) {
-                       ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data,
-                                           data_is, cookie, file_name, &event);
+                       ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
+                                           data, data_is, cookie, file_name);
                        inode_group = NULL;
                } else {
                        ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
-                                           mask, data, data_is, cookie, file_name,
-                                           &event);
+                                           mask, data, data_is, cookie,
+                                           file_name);
                }
 
                if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
@@ -285,12 +273,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
        ret = 0;
 out:
        srcu_read_unlock(&fsnotify_mark_srcu, idx);
-       /*
-        * fsnotify_create_event() took a reference so the event can't be cleaned
-        * up while we are still trying to add it to lists, drop that one.
-        */
-       if (event)
-               fsnotify_put_event(event);
 
        return ret;
 }
index bd2625bd88b47a7b2961ec0d43b77f5675a80ad6..ee674fe2cec7f3f7b99ad09363b6a425ff4d6dbd 100644 (file)
@@ -99,6 +99,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
        INIT_LIST_HEAD(&group->marks_list);
 
        group->ops = ops;
+       fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW);
 
        return group;
 }
index b6642e4de4bf978400d96f74da34c05f7040ab02..485eef3f4407a0371d903c08a75c9f262dfde82e 100644 (file)
@@ -2,11 +2,12 @@
 #include <linux/inotify.h>
 #include <linux/slab.h> /* struct kmem_cache */
 
-extern struct kmem_cache *event_priv_cachep;
-
-struct inotify_event_private_data {
-       struct fsnotify_event_private_data fsnotify_event_priv_data;
+struct inotify_event_info {
+       struct fsnotify_event fse;
        int wd;
+       u32 sync_cookie;
+       int name_len;
+       char name[];
 };
 
 struct inotify_inode_mark {
@@ -14,8 +15,18 @@ struct inotify_inode_mark {
        int wd;
 };
 
+static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
+{
+       return container_of(fse, struct inotify_event_info, fse);
+}
+
 extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
                                           struct fsnotify_group *group);
-extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
+extern int inotify_handle_event(struct fsnotify_group *group,
+                               struct inode *inode,
+                               struct fsnotify_mark *inode_mark,
+                               struct fsnotify_mark *vfsmount_mark,
+                               u32 mask, void *data, int data_type,
+                               const unsigned char *file_name);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
index 4216308b81b409443d8e38430752e346ff30341a..aad1a35e9af117fdc397cca897ba6f192f2de7a0 100644 (file)
 #include "inotify.h"
 
 /*
- * Check if 2 events contain the same information.  We do not compare private data
- * but at this moment that isn't a problem for any know fsnotify listeners.
+ * Check if 2 events contain the same information.
  */
-static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+static bool event_compare(struct fsnotify_event *old_fsn,
+                         struct fsnotify_event *new_fsn)
 {
-       if ((old->mask == new->mask) &&
-           (old->to_tell == new->to_tell) &&
-           (old->data_type == new->data_type) &&
-           (old->name_len == new->name_len)) {
-               switch (old->data_type) {
-               case (FSNOTIFY_EVENT_INODE):
-                       /* remember, after old was put on the wait_q we aren't
-                        * allowed to look at the inode any more, only thing
-                        * left to check was if the file_name is the same */
-                       if (!old->name_len ||
-                           !strcmp(old->file_name, new->file_name))
-                               return true;
-                       break;
-               case (FSNOTIFY_EVENT_PATH):
-                       if ((old->path.mnt == new->path.mnt) &&
-                           (old->path.dentry == new->path.dentry))
-                               return true;
-                       break;
-               case (FSNOTIFY_EVENT_NONE):
-                       if (old->mask & FS_Q_OVERFLOW)
-                               return true;
-                       else if (old->mask & FS_IN_IGNORED)
-                               return false;
-                       return true;
-               };
-       }
+       struct inotify_event_info *old, *new;
+
+       if (old_fsn->mask & FS_IN_IGNORED)
+               return false;
+       old = INOTIFY_E(old_fsn);
+       new = INOTIFY_E(new_fsn);
+       if ((old_fsn->mask == new_fsn->mask) &&
+           (old_fsn->inode == new_fsn->inode) &&
+           (old->name_len == new->name_len) &&
+           (!old->name_len || !strcmp(old->name, new->name)))
+               return true;
        return false;
 }
 
 static struct fsnotify_event *inotify_merge(struct list_head *list,
                                            struct fsnotify_event *event)
 {
-       struct fsnotify_event_holder *last_holder;
        struct fsnotify_event *last_event;
 
-       /* and the list better be locked by something too */
-       spin_lock(&event->lock);
-
-       last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
-       last_event = last_holder->event;
-       if (event_compare(last_event, event))
-               fsnotify_get_event(last_event);
-       else
-               last_event = NULL;
-
-       spin_unlock(&event->lock);
-
+       last_event = list_entry(list->prev, struct fsnotify_event, list);
+       if (!event_compare(last_event, event))
+               return NULL;
        return last_event;
 }
 
-static int inotify_handle_event(struct fsnotify_group *group,
-                               struct fsnotify_mark *inode_mark,
-                               struct fsnotify_mark *vfsmount_mark,
-                               struct fsnotify_event *event)
+int inotify_handle_event(struct fsnotify_group *group,
+                        struct inode *inode,
+                        struct fsnotify_mark *inode_mark,
+                        struct fsnotify_mark *vfsmount_mark,
+                        u32 mask, void *data, int data_type,
+                        const unsigned char *file_name)
 {
        struct inotify_inode_mark *i_mark;
-       struct inode *to_tell;
-       struct inotify_event_private_data *event_priv;
-       struct fsnotify_event_private_data *fsn_event_priv;
+       struct inotify_event_info *event;
        struct fsnotify_event *added_event;
-       int wd, ret = 0;
+       struct fsnotify_event *fsn_event;
+       int ret = 0;
+       int len = 0;
+       int alloc_len = sizeof(struct inotify_event_info);
 
        BUG_ON(vfsmount_mark);
 
-       pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
-                event, event->to_tell, event->mask);
+       if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+           (data_type == FSNOTIFY_EVENT_PATH)) {
+               struct path *path = data;
 
-       to_tell = event->to_tell;
+               if (d_unlinked(path->dentry))
+                       return 0;
+       }
+       if (file_name) {
+               len = strlen(file_name);
+               alloc_len += len + 1;
+       }
+
+       pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
+                mask);
 
        i_mark = container_of(inode_mark, struct inotify_inode_mark,
                              fsn_mark);
-       wd = i_mark->wd;
 
-       event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
-       if (unlikely(!event_priv))
+       event = kmalloc(alloc_len, GFP_KERNEL);
+       if (unlikely(!event))
                return -ENOMEM;
 
-       fsn_event_priv = &event_priv->fsnotify_event_priv_data;
-
-       fsnotify_get_group(group);
-       fsn_event_priv->group = group;
-       event_priv->wd = wd;
+       fsn_event = &event->fse;
+       fsnotify_init_event(fsn_event, inode, mask);
+       event->wd = i_mark->wd;
+       event->name_len = len;
+       if (len)
+               strcpy(event->name, file_name);
 
-       added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
+       added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
        if (added_event) {
-               inotify_free_event_priv(fsn_event_priv);
-               if (!IS_ERR(added_event))
-                       fsnotify_put_event(added_event);
-               else
+               /* Our event wasn't used in the end. Free it. */
+               fsnotify_destroy_event(group, fsn_event);
+               if (IS_ERR(added_event))
                        ret = PTR_ERR(added_event);
        }
 
@@ -142,22 +129,6 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
        inotify_ignored_and_remove_idr(fsn_mark, group);
 }
 
-static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
-                                     struct fsnotify_mark *inode_mark,
-                                     struct fsnotify_mark *vfsmount_mark,
-                                     __u32 mask, void *data, int data_type)
-{
-       if ((inode_mark->mask & FS_EXCL_UNLINK) &&
-           (data_type == FSNOTIFY_EVENT_PATH)) {
-               struct path *path = data;
-
-               if (d_unlinked(path->dentry))
-                       return false;
-       }
-
-       return true;
-}
-
 /*
  * This is NEVER supposed to be called.  Inotify marks should either have been
  * removed from the idr when the watch was removed or in the
@@ -202,22 +173,14 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
        free_uid(group->inotify_data.user);
 }
 
-void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
+static void inotify_free_event(struct fsnotify_event *fsn_event)
 {
-       struct inotify_event_private_data *event_priv;
-
-
-       event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
-                                 fsnotify_event_priv_data);
-
-       fsnotify_put_group(fsn_event_priv->group);
-       kmem_cache_free(event_priv_cachep, event_priv);
+       kfree(INOTIFY_E(fsn_event));
 }
 
 const struct fsnotify_ops inotify_fsnotify_ops = {
        .handle_event = inotify_handle_event,
-       .should_send_event = inotify_should_send_event,
        .free_group_priv = inotify_free_group_priv,
-       .free_event_priv = inotify_free_event_priv,
+       .free_event = inotify_free_event,
        .freeing_mark = inotify_freeing_mark,
 };
index 60f954a891ab3551cb20f54054c37588a5d52e58..497395c8274bc62cd0fab6c2cf89ce4950fafffc 100644 (file)
@@ -50,7 +50,6 @@ static int inotify_max_queued_events __read_mostly;
 static int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
-struct kmem_cache *event_priv_cachep __read_mostly;
 
 #ifdef CONFIG_SYSCTL
 
@@ -124,6 +123,16 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
        return ret;
 }
 
+static int round_event_name_len(struct fsnotify_event *fsn_event)
+{
+       struct inotify_event_info *event;
+
+       event = INOTIFY_E(fsn_event);
+       if (!event->name_len)
+               return 0;
+       return roundup(event->name_len + 1, sizeof(struct inotify_event));
+}
+
 /*
  * Get an inotify_kernel_event if one exists and is small
  * enough to fit in "count". Return an error pointer if
@@ -144,9 +153,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
        pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-       if (event->name_len)
-               event_size += roundup(event->name_len + 1, event_size);
-
+       event_size += round_event_name_len(event);
        if (event_size > count)
                return ERR_PTR(-EINVAL);
 
@@ -164,40 +171,27 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
  * buffer we had in "get_one_event()" above.
  */
 static ssize_t copy_event_to_user(struct fsnotify_group *group,
-                                 struct fsnotify_event *event,
+                                 struct fsnotify_event *fsn_event,
                                  char __user *buf)
 {
        struct inotify_event inotify_event;
-       struct fsnotify_event_private_data *fsn_priv;
-       struct inotify_event_private_data *priv;
+       struct inotify_event_info *event;
        size_t event_size = sizeof(struct inotify_event);
-       size_t name_len = 0;
-
-       pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+       size_t name_len;
+       size_t pad_name_len;
 
-       /* we get the inotify watch descriptor from the event private data */
-       spin_lock(&event->lock);
-       fsn_priv = fsnotify_remove_priv_from_event(group, event);
-       spin_unlock(&event->lock);
-
-       if (!fsn_priv)
-               inotify_event.wd = -1;
-       else {
-               priv = container_of(fsn_priv, struct inotify_event_private_data,
-                                   fsnotify_event_priv_data);
-               inotify_event.wd = priv->wd;
-               inotify_free_event_priv(fsn_priv);
-       }
+       pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
 
+       event = INOTIFY_E(fsn_event);
+       name_len = event->name_len;
        /*
-        * round up event->name_len so it is a multiple of event_size
+        * round up name length so it is a multiple of event_size
         * plus an extra byte for the terminating '\0'.
         */
-       if (event->name_len)
-               name_len = roundup(event->name_len + 1, event_size);
-       inotify_event.len = name_len;
-
-       inotify_event.mask = inotify_mask_to_arg(event->mask);
+       pad_name_len = round_event_name_len(fsn_event);
+       inotify_event.len = pad_name_len;
+       inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
+       inotify_event.wd = event->wd;
        inotify_event.cookie = event->sync_cookie;
 
        /* send the main event */
@@ -209,20 +203,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
        /*
         * fsnotify only stores the pathname, so here we have to send the pathname
         * and then pad that pathname out to a multiple of sizeof(inotify_event)
-        * with zeros.  I get my zeros from the nul_inotify_event.
+        * with zeros.
         */
-       if (name_len) {
-               unsigned int len_to_zero = name_len - event->name_len;
+       if (pad_name_len) {
                /* copy the path name */
-               if (copy_to_user(buf, event->file_name, event->name_len))
+               if (copy_to_user(buf, event->name, name_len))
                        return -EFAULT;
-               buf += event->name_len;
+               buf += name_len;
 
                /* fill userspace with 0's */
-               if (clear_user(buf, len_to_zero))
+               if (clear_user(buf, pad_name_len - name_len))
                        return -EFAULT;
-               buf += len_to_zero;
-               event_size += name_len;
+               event_size += pad_name_len;
        }
 
        return event_size;
@@ -254,7 +246,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
                        if (IS_ERR(kevent))
                                break;
                        ret = copy_event_to_user(group, kevent, buf);
-                       fsnotify_put_event(kevent);
+                       fsnotify_destroy_event(group, kevent);
                        if (ret < 0)
                                break;
                        buf += ret;
@@ -297,8 +289,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
                          unsigned long arg)
 {
        struct fsnotify_group *group;
-       struct fsnotify_event_holder *holder;
-       struct fsnotify_event *event;
+       struct fsnotify_event *fsn_event;
        void __user *p;
        int ret = -ENOTTY;
        size_t send_len = 0;
@@ -311,12 +302,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
        switch (cmd) {
        case FIONREAD:
                mutex_lock(&group->notification_mutex);
-               list_for_each_entry(holder, &group->notification_list, event_list) {
-                       event = holder->event;
+               list_for_each_entry(fsn_event, &group->notification_list,
+                                   list) {
                        send_len += sizeof(struct inotify_event);
-                       if (event->name_len)
-                               send_len += roundup(event->name_len + 1,
-                                               sizeof(struct inotify_event));
+                       send_len += round_event_name_len(fsn_event);
                }
                mutex_unlock(&group->notification_mutex);
                ret = put_user(send_len, (int __user *) p);
@@ -503,43 +492,12 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
                                    struct fsnotify_group *group)
 {
        struct inotify_inode_mark *i_mark;
-       struct fsnotify_event *ignored_event, *notify_event;
-       struct inotify_event_private_data *event_priv;
-       struct fsnotify_event_private_data *fsn_event_priv;
-       int ret;
-
-       i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
-
-       ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
-                                             FSNOTIFY_EVENT_NONE, NULL, 0,
-                                             GFP_NOFS);
-       if (!ignored_event)
-               goto skip_send_ignore;
-
-       event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
-       if (unlikely(!event_priv))
-               goto skip_send_ignore;
-
-       fsn_event_priv = &event_priv->fsnotify_event_priv_data;
-
-       fsnotify_get_group(group);
-       fsn_event_priv->group = group;
-       event_priv->wd = i_mark->wd;
-
-       notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
-       if (notify_event) {
-               if (IS_ERR(notify_event))
-                       ret = PTR_ERR(notify_event);
-               else
-                       fsnotify_put_event(notify_event);
-               inotify_free_event_priv(fsn_event_priv);
-       }
 
-skip_send_ignore:
-       /* matches the reference taken when the event was created */
-       if (ignored_event)
-               fsnotify_put_event(ignored_event);
+       /* Queue ignore event for the watch */
+       inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
+                            NULL, FSNOTIFY_EVENT_NONE, NULL);
 
+       i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
        /* remove this mark from the idr */
        inotify_remove_from_idr(group, i_mark);
 
@@ -836,7 +794,6 @@ static int __init inotify_user_setup(void)
        BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
 
        inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
-       event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
        inotify_max_queued_events = 16384;
        inotify_max_user_instances = 128;
index 7b51b05f160c36846e47a8f1b7cb417bed9ed914..952237b8e2d27bbea9466bf41db33ba854b937c6 100644 (file)
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
-static struct kmem_cache *fsnotify_event_cachep;
-static struct kmem_cache *fsnotify_event_holder_cachep;
-/*
- * This is a magic event we send when the q is too full.  Since it doesn't
- * hold real event information we just keep one system wide and use it any time
- * it is needed.  It's refcnt is set 1 at kernel init time and will never
- * get set to 0 so it will never get 'freed'
- */
-static struct fsnotify_event *q_overflow_event;
 static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
 
 /**
@@ -76,60 +67,14 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
        return list_empty(&group->notification_list) ? true : false;
 }
 
-void fsnotify_get_event(struct fsnotify_event *event)
+void fsnotify_destroy_event(struct fsnotify_group *group,
+                           struct fsnotify_event *event)
 {
-       atomic_inc(&event->refcnt);
-}
-
-void fsnotify_put_event(struct fsnotify_event *event)
-{
-       if (!event)
+       /* Overflow events are per-group and we don't want to free them */
+       if (!event || event->mask == FS_Q_OVERFLOW)
                return;
 
-       if (atomic_dec_and_test(&event->refcnt)) {
-               pr_debug("%s: event=%p\n", __func__, event);
-
-               if (event->data_type == FSNOTIFY_EVENT_PATH)
-                       path_put(&event->path);
-
-               BUG_ON(!list_empty(&event->private_data_list));
-
-               kfree(event->file_name);
-               put_pid(event->tgid);
-               kmem_cache_free(fsnotify_event_cachep, event);
-       }
-}
-
-struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
-{
-       return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
-}
-
-void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
-{
-       if (holder)
-               kmem_cache_free(fsnotify_event_holder_cachep, holder);
-}
-
-/*
- * Find the private data that the group previously attached to this event when
- * the group added the event to the notification queue (fsnotify_add_notify_event)
- */
-struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
-{
-       struct fsnotify_event_private_data *lpriv;
-       struct fsnotify_event_private_data *priv = NULL;
-
-       assert_spin_locked(&event->lock);
-
-       list_for_each_entry(lpriv, &event->private_data_list, event_list) {
-               if (lpriv->group == group) {
-                       priv = lpriv;
-                       list_del(&priv->event_list);
-                       break;
-               }
-       }
-       return priv;
+       group->ops->free_event(event);
 }
 
 /*
@@ -137,91 +82,35 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
  * event off the queue to deal with.  If the event is successfully added to the
  * group's notification queue, a reference is taken on event.
  */
-struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
-                                                struct fsnotify_event_private_data *priv,
+struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
+                                                struct fsnotify_event *event,
                                                 struct fsnotify_event *(*merge)(struct list_head *,
                                                                                 struct fsnotify_event *))
 {
        struct fsnotify_event *return_event = NULL;
-       struct fsnotify_event_holder *holder = NULL;
        struct list_head *list = &group->notification_list;
 
-       pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
-
-       /*
-        * There is one fsnotify_event_holder embedded inside each fsnotify_event.
-        * Check if we expect to be able to use that holder.  If not alloc a new
-        * holder.
-        * For the overflow event it's possible that something will use the in
-        * event holder before we get the lock so we may need to jump back and
-        * alloc a new holder, this can't happen for most events...
-        */
-       if (!list_empty(&event->holder.event_list)) {
-alloc_holder:
-               holder = fsnotify_alloc_event_holder();
-               if (!holder)
-                       return ERR_PTR(-ENOMEM);
-       }
+       pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
        mutex_lock(&group->notification_mutex);
 
        if (group->q_len >= group->max_events) {
-               event = q_overflow_event;
-
-               /*
-                * we need to return the overflow event
-                * which means we need a ref
-                */
-               fsnotify_get_event(event);
+               /* Queue overflow event only if it isn't already queued */
+               if (list_empty(&group->overflow_event.list))
+                       event = &group->overflow_event;
                return_event = event;
-
-               /* sorry, no private data on the overflow event */
-               priv = NULL;
        }
 
        if (!list_empty(list) && merge) {
-               struct fsnotify_event *tmp;
-
-               tmp = merge(list, event);
-               if (tmp) {
-                       mutex_unlock(&group->notification_mutex);
-
-                       if (return_event)
-                               fsnotify_put_event(return_event);
-                       if (holder != &event->holder)
-                               fsnotify_destroy_event_holder(holder);
-                       return tmp;
-               }
-       }
-
-       spin_lock(&event->lock);
-
-       if (list_empty(&event->holder.event_list)) {
-               if (unlikely(holder))
-                       fsnotify_destroy_event_holder(holder);
-               holder = &event->holder;
-       } else if (unlikely(!holder)) {
-               /* between the time we checked above and got the lock the in
-                * event holder was used, go back and get a new one */
-               spin_unlock(&event->lock);
-               mutex_unlock(&group->notification_mutex);
-
+               return_event = merge(list, event);
                if (return_event) {
-                       fsnotify_put_event(return_event);
-                       return_event = NULL;
+                       mutex_unlock(&group->notification_mutex);
+                       return return_event;
                }
-
-               goto alloc_holder;
        }
 
        group->q_len++;
-       holder->event = event;
-
-       fsnotify_get_event(event);
-       list_add_tail(&holder->event_list, list);
-       if (priv)
-               list_add_tail(&priv->event_list, &event->private_data_list);
-       spin_unlock(&event->lock);
+       list_add_tail(&event->list, list);
        mutex_unlock(&group->notification_mutex);
 
        wake_up(&group->notification_waitq);
@@ -230,32 +119,20 @@ alloc_holder:
 }
 
 /*
- * Remove and return the first event from the notification list.  There is a
- * reference held on this event since it was on the list.  It is the responsibility
- * of the caller to drop this reference.
+ * Remove and return the first event from the notification list.  It is the
+ * responsibility of the caller to destroy the obtained event
  */
 struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
 {
        struct fsnotify_event *event;
-       struct fsnotify_event_holder *holder;
 
        BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
        pr_debug("%s: group=%p\n", __func__, group);
 
-       holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
-
-       event = holder->event;
-
-       spin_lock(&event->lock);
-       holder->event = NULL;
-       list_del_init(&holder->event_list);
-       spin_unlock(&event->lock);
-
-       /* event == holder means we are referenced through the in event holder */
-       if (holder != &event->holder)
-               fsnotify_destroy_event_holder(holder);
-
+       event = list_first_entry(&group->notification_list,
+                                struct fsnotify_event, list);
+       list_del(&event->list);
        group->q_len--;
 
        return event;
@@ -266,15 +143,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
  */
 struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
 {
-       struct fsnotify_event *event;
-       struct fsnotify_event_holder *holder;
-
        BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
-       holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
-       event = holder->event;
-
-       return event;
+       return list_first_entry(&group->notification_list,
+                               struct fsnotify_event, list);
 }
 
 /*
@@ -284,181 +156,31 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
 void fsnotify_flush_notify(struct fsnotify_group *group)
 {
        struct fsnotify_event *event;
-       struct fsnotify_event_private_data *priv;
 
        mutex_lock(&group->notification_mutex);
        while (!fsnotify_notify_queue_is_empty(group)) {
                event = fsnotify_remove_notify_event(group);
-               /* if they don't implement free_event_priv they better not have attached any */
-               if (group->ops->free_event_priv) {
-                       spin_lock(&event->lock);
-                       priv = fsnotify_remove_priv_from_event(group, event);
-                       spin_unlock(&event->lock);
-                       if (priv)
-                               group->ops->free_event_priv(priv);
-               }
-               fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
+               fsnotify_destroy_event(group, event);
        }
        mutex_unlock(&group->notification_mutex);
 }
 
-static void initialize_event(struct fsnotify_event *event)
-{
-       INIT_LIST_HEAD(&event->holder.event_list);
-       atomic_set(&event->refcnt, 1);
-
-       spin_lock_init(&event->lock);
-
-       INIT_LIST_HEAD(&event->private_data_list);
-}
-
-/*
- * Caller damn well better be holding whatever mutex is protecting the
- * old_holder->event_list and the new_event must be a clean event which
- * cannot be found anywhere else in the kernel.
- */
-int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
-                          struct fsnotify_event *new_event)
-{
-       struct fsnotify_event *old_event = old_holder->event;
-       struct fsnotify_event_holder *new_holder = &new_event->holder;
-
-       enum event_spinlock_class {
-               SPINLOCK_OLD,
-               SPINLOCK_NEW,
-       };
-
-       pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
-
-       /*
-        * if the new_event's embedded holder is in use someone
-        * screwed up and didn't give us a clean new event.
-        */
-       BUG_ON(!list_empty(&new_holder->event_list));
-
-       spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
-       spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
-
-       new_holder->event = new_event;
-       list_replace_init(&old_holder->event_list, &new_holder->event_list);
-
-       spin_unlock(&new_event->lock);
-       spin_unlock(&old_event->lock);
-
-       /* event == holder means we are referenced through the in event holder */
-       if (old_holder != &old_event->holder)
-               fsnotify_destroy_event_holder(old_holder);
-
-       fsnotify_get_event(new_event); /* on the list take reference */
-       fsnotify_put_event(old_event); /* off the list, drop reference */
-
-       return 0;
-}
-
-struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
-{
-       struct fsnotify_event *event;
-
-       event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
-       if (!event)
-               return NULL;
-
-       pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
-
-       memcpy(event, old_event, sizeof(*event));
-       initialize_event(event);
-
-       if (event->name_len) {
-               event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
-               if (!event->file_name) {
-                       kmem_cache_free(fsnotify_event_cachep, event);
-                       return NULL;
-               }
-       }
-       event->tgid = get_pid(old_event->tgid);
-       if (event->data_type == FSNOTIFY_EVENT_PATH)
-               path_get(&event->path);
-
-       return event;
-}
-
 /*
  * fsnotify_create_event - Allocate a new event which will be sent to each
  * group's handle_event function if the group was interested in this
  * particular event.
  *
- * @to_tell the inode which is supposed to receive the event (sometimes a
+ * @inode the inode which is supposed to receive the event (sometimes a
  *     parent of the inode to which the event happened.
  * @mask what actually happened.
  * @data pointer to the object which was actually affected
  * @data_type flag indication if the data is a file, path, inode, nothing...
  * @name the filename, if available
  */
-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
-                                            int data_type, const unsigned char *name,
-                                            u32 cookie, gfp_t gfp)
+void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
+                        u32 mask)
 {
-       struct fsnotify_event *event;
-
-       event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
-       if (!event)
-               return NULL;
-
-       pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
-                __func__, event, to_tell, mask, data, data_type);
-
-       initialize_event(event);
-
-       if (name) {
-               event->file_name = kstrdup(name, gfp);
-               if (!event->file_name) {
-                       kmem_cache_free(fsnotify_event_cachep, event);
-                       return NULL;
-               }
-               event->name_len = strlen(event->file_name);
-       }
-
-       event->tgid = get_pid(task_tgid(current));
-       event->sync_cookie = cookie;
-       event->to_tell = to_tell;
-       event->data_type = data_type;
-
-       switch (data_type) {
-       case FSNOTIFY_EVENT_PATH: {
-               struct path *path = data;
-               event->path.dentry = path->dentry;
-               event->path.mnt = path->mnt;
-               path_get(&event->path);
-               break;
-       }
-       case FSNOTIFY_EVENT_INODE:
-               event->inode = data;
-               break;
-       case FSNOTIFY_EVENT_NONE:
-               event->inode = NULL;
-               event->path.dentry = NULL;
-               event->path.mnt = NULL;
-               break;
-       default:
-               BUG();
-       }
-
+       INIT_LIST_HEAD(&event->list);
+       event->inode = inode;
        event->mask = mask;
-
-       return event;
-}
-
-static __init int fsnotify_notification_init(void)
-{
-       fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
-       fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
-
-       q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
-                                                FSNOTIFY_EVENT_NONE, NULL, 0,
-                                                GFP_KERNEL);
-       if (!q_overflow_event)
-               panic("unable to allocate fsnotify q_overflow_event\n");
-
-       return 0;
 }
-subsys_initcall(fsnotify_notification_init);
index f17e58b32989a53f4f0413065bbd5e07d7901d5d..ce210d4951a1dda3570b902103ba7a14d0b3069b 100644 (file)
@@ -38,7 +38,6 @@ ocfs2-objs := \
        symlink.o               \
        sysfile.o               \
        uptodate.o              \
-       ver.o                   \
        quota_local.o           \
        quota_global.o          \
        xattr.o                 \
index dc7411fe185d99652cc98f93436e1b46bdfd380d..e2edff38be52b6963c32962604b26adfbe682dd4 100644 (file)
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
                                enum ocfs2_alloc_restarted *reason_ret)
 {
        int status = 0, err = 0;
+       int need_free = 0;
        int free_extents;
        enum ocfs2_alloc_restarted reason = RESTART_NONE;
        u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
                                              OCFS2_JOURNAL_ACCESS_WRITE);
        if (status < 0) {
                mlog_errno(status);
-               goto leave;
+               need_free = 1;
+               goto bail;
        }
 
        block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
                                     num_bits, flags, meta_ac);
        if (status < 0) {
                mlog_errno(status);
-               goto leave;
+               need_free = 1;
+               goto bail;
        }
 
        ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
                reason = RESTART_TRANS;
        }
 
+bail:
+       if (need_free) {
+               if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+                       ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+                                       bit_off, num_bits);
+               else
+                       ocfs2_free_clusters(handle,
+                                       data_ac->ac_inode,
+                                       data_ac->ac_bh,
+                                       ocfs2_clusters_to_blocks(osb->sb, bit_off),
+                                       num_bits);
+       }
+
 leave:
        if (reason_ret)
                *reason_ret = reason;
@@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                                         struct buffer_head *di_bh)
 {
        int ret, i, has_data, num_pages = 0;
+       int need_free = 0;
+       u32 bit_off, num;
        handle_t *handle;
        u64 uninitialized_var(block);
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
        }
 
        if (has_data) {
-               u32 bit_off, num;
                unsigned int page_end;
                u64 phys;
 
@@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
                if (ret) {
                        mlog_errno(ret);
+                       need_free = 1;
                        goto out_commit;
                }
 
@@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
                if (ret) {
                        mlog_errno(ret);
+                       need_free = 1;
                        goto out_commit;
                }
 
@@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
                if (ret) {
                        mlog_errno(ret);
+                       need_free = 1;
                        goto out_commit;
                }
 
@@ -6938,6 +6958,18 @@ out_commit:
                dquot_free_space_nodirty(inode,
                                          ocfs2_clusters_to_bytes(osb->sb, 1));
 
+       if (need_free) {
+               if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+                       ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+                                       bit_off, num);
+               else
+                       ocfs2_free_clusters(handle,
+                                       data_ac->ac_inode,
+                                       data_ac->ac_bh,
+                                       ocfs2_clusters_to_blocks(osb->sb, bit_off),
+                                       num);
+       }
+
        ocfs2_commit_trans(osb, handle);
 
 out_unlock:
@@ -7126,7 +7158,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
        if (end > i_size_read(inode))
                end = i_size_read(inode);
 
-       BUG_ON(start >= end);
+       BUG_ON(start > end);
 
        if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
            !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
@@ -7260,14 +7292,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
        start = range->start >> osb->s_clustersize_bits;
        len = range->len >> osb->s_clustersize_bits;
        minlen = range->minlen >> osb->s_clustersize_bits;
-       trimmed = 0;
 
-       if (!len) {
-               range->len = 0;
-               return 0;
-       }
-
-       if (minlen >= osb->bitmap_cpg)
+       if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
                return -EINVAL;
 
        main_bm_inode = ocfs2_get_system_file_inode(osb,
@@ -7293,6 +7319,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
                goto out_unlock;
        }
 
+       len = range->len >> osb->s_clustersize_bits;
        if (start + len > le32_to_cpu(main_bm->i_clusters))
                len = le32_to_cpu(main_bm->i_clusters) - start;
 
@@ -7307,6 +7334,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
        last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
        last_bit = osb->bitmap_cpg;
 
+       trimmed = 0;
        for (group = first_group; group <= last_group;) {
                if (first_bit + len >= osb->bitmap_cpg)
                        last_bit = osb->bitmap_cpg;
index bc8c5e7d8608bc027085aed634d75350071c3d80..1aefc0350ec32db1a83d7bd2197ac58681e5bfc1 100644 (file)
@@ -1,4 +1,4 @@
 obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
 
 ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
-       quorum.o tcp.o netdebug.o ver.o
+       quorum.o tcp.o netdebug.o
index bb240647ca5f5e0f47f52c43860ae7d1c3be435a..441c84e169e6f05e819d7331027f6e519d02b3be 100644 (file)
@@ -29,7 +29,6 @@
 #include "heartbeat.h"
 #include "masklog.h"
 #include "sys.h"
-#include "ver.h"
 
 /* for now we operate under the assertion that there can be only one
  * cluster active at a time.  Changing this will require trickling
@@ -945,8 +944,6 @@ static int __init init_o2nm(void)
 {
        int ret = -1;
 
-       cluster_print_version();
-
        ret = o2hb_init();
        if (ret)
                goto out;
@@ -984,6 +981,7 @@ out:
 
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 cluster management");
 
 module_init(init_o2nm)
 module_exit(exit_o2nm)
index 2cd2406b41408b61dc0797e2e87e6f29a84506e0..1828201bc901d9c8d3de37ee6eb5ba4cb5cf6f71 100644 (file)
@@ -1826,7 +1826,7 @@ int o2net_register_hb_callbacks(void)
 
 /* ------------------------------------------------------------ */
 
-static int o2net_accept_one(struct socket *sock)
+static int o2net_accept_one(struct socket *sock, int *more)
 {
        int ret, slen;
        struct sockaddr_in sin;
@@ -1837,6 +1837,7 @@ static int o2net_accept_one(struct socket *sock)
        struct o2net_node *nn;
 
        BUG_ON(sock == NULL);
+       *more = 0;
        ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
                               sock->sk->sk_protocol, &new_sock);
        if (ret)
@@ -1848,6 +1849,7 @@ static int o2net_accept_one(struct socket *sock)
        if (ret < 0)
                goto out;
 
+       *more = 1;
        new_sock->sk->sk_allocation = GFP_ATOMIC;
 
        ret = o2net_set_nodelay(new_sock);
@@ -1949,8 +1951,15 @@ out:
 static void o2net_accept_many(struct work_struct *work)
 {
        struct socket *sock = o2net_listen_sock;
-       while (o2net_accept_one(sock) == 0)
+       int     more;
+       int     err;
+
+       for (;;) {
+               err = o2net_accept_one(sock, &more);
+               if (!more)
+                       break;
                cond_resched();
+       }
 }
 
 static void o2net_listen_data_ready(struct sock *sk, int bytes)
@@ -1964,18 +1973,30 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes)
                goto out;
        }
 
-       /* ->sk_data_ready is also called for a newly established child socket
-        * before it has been accepted and the acceptor has set up their
-        * data_ready.. we only want to queue listen work for our listening
-        * socket */
+       /* This callback may called twice when a new connection
+        * is  being established as a child socket inherits everything
+        * from a parent LISTEN socket, including the data_ready cb of
+        * the parent. This leads to a hazard. In o2net_accept_one()
+        * we are still initializing the child socket but have not
+        * changed the inherited data_ready callback yet when
+        * data starts arriving.
+        * We avoid this hazard by checking the state.
+        * For the listening socket,  the state will be TCP_LISTEN; for the new
+        * socket, will be  TCP_ESTABLISHED. Also, in this case,
+        * sk->sk_user_data is not a valid function pointer.
+        */
+
        if (sk->sk_state == TCP_LISTEN) {
                mlog(ML_TCP, "bytes: %d\n", bytes);
                queue_work(o2net_wq, &o2net_listen_work);
+       } else {
+               ready = NULL;
        }
 
 out:
        read_unlock(&sk->sk_callback_lock);
-       ready(sk, bytes);
+       if (ready != NULL)
+               ready(sk, bytes);
 }
 
 static int o2net_open_listening_sock(__be32 addr, __be16 port)
diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c
deleted file mode 100644 (file)
index a56eee6..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "ver.h"
-
-#define CLUSTER_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
-
-void cluster_print_version(void)
-{
-       printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(CLUSTER_BUILD_VERSION);
diff --git a/fs/ocfs2/cluster/ver.h b/fs/ocfs2/cluster/ver.h
deleted file mode 100644 (file)
index 32554c3..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef O2CLUSTER_VER_H
-#define O2CLUSTER_VER_H
-
-void cluster_print_version(void);
-
-#endif /* O2CLUSTER_VER_H */
index c8a044efbb150653c32d94460fa929944d25181f..bd1aab1f49a437ccf4e44b77ed7391c3c4d219d8 100644 (file)
@@ -3,5 +3,5 @@ ccflags-y := -Ifs/ocfs2
 obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
 
 ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
-       dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
+       dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o
 
index 8b3382abf840d6939a66fb3b99c389a18354768a..33660a4a52fac7f3a2da8439ffe5b45107e211b2 100644 (file)
@@ -43,8 +43,6 @@
 #include "dlmdomain.h"
 #include "dlmdebug.h"
 
-#include "dlmver.h"
-
 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
 #include "cluster/masklog.h"
 
@@ -2328,8 +2326,6 @@ static int __init dlm_init(void)
 {
        int status;
 
-       dlm_print_version();
-
        status = dlm_init_mle_cache();
        if (status) {
                mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
@@ -2379,6 +2375,7 @@ static void __exit dlm_exit (void)
 
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 Distributed Lock Management");
 
 module_init(dlm_init);
 module_exit(dlm_exit);
diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c
deleted file mode 100644 (file)
index dfc0da4..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "dlmver.h"
-
-#define DLM_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
-
-void dlm_print_version(void)
-{
-       printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlm/dlmver.h b/fs/ocfs2/dlm/dlmver.h
deleted file mode 100644 (file)
index f674aee..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmfsver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef DLM_VER_H
-#define DLM_VER_H
-
-void dlm_print_version(void);
-
-#endif /* DLM_VER_H */
index f14be89a67016101afb74705399a7724ec6d55bf..eed3db8c5b49295a1f0fc1904b25a27acdf93274 100644 (file)
@@ -2,4 +2,4 @@ ccflags-y := -Ifs/ocfs2
 
 obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
 
-ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o
+ocfs2_dlmfs-objs := userdlm.o dlmfs.o
index efa2b3d339e3146e91dafcc6359f1db8f14fd96e..09b7d9dac71d567d49d4f23d01511ba0a2c5adb1 100644 (file)
@@ -49,7 +49,6 @@
 
 #include "stackglue.h"
 #include "userdlm.h"
-#include "dlmfsver.h"
 
 #define MLOG_MASK_PREFIX ML_DLMFS
 #include "cluster/masklog.h"
@@ -644,8 +643,6 @@ static int __init init_dlmfs_fs(void)
        int status;
        int cleanup_inode = 0, cleanup_worker = 0;
 
-       dlmfs_print_version();
-
        status = bdi_init(&dlmfs_backing_dev_info);
        if (status)
                return status;
@@ -701,6 +698,7 @@ static void __exit exit_dlmfs_fs(void)
 
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 DLM-Filesystem");
 
 module_init(init_dlmfs_fs)
 module_exit(exit_dlmfs_fs)
diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c
deleted file mode 100644 (file)
index a733b33..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmfsver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "dlmfsver.h"
-
-#define DLM_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
-
-void dlmfs_print_version(void)
-{
-       printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h
deleted file mode 100644 (file)
index f35eadb..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef DLMFS_VER_H
-#define DLMFS_VER_H
-
-void dlmfs_print_version(void);
-
-#endif /* DLMFS_VER_H */
index 3407b2c62b21bbf51981584f6645520048a2a57a..19986959d14948bd1660e116321533efd027bbc1 100644 (file)
@@ -2996,6 +2996,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
 
        /* for now, uuid == domain */
        status = ocfs2_cluster_connect(osb->osb_cluster_stack,
+                                      osb->osb_cluster_name,
+                                      strlen(osb->osb_cluster_name),
                                       osb->uuid_str,
                                       strlen(osb->uuid_str),
                                       &lproto, ocfs2_do_node_down, osb,
@@ -3005,7 +3007,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
                goto bail;
        }
 
-       status = ocfs2_cluster_this_node(&osb->node_num);
+       status = ocfs2_cluster_this_node(conn, &osb->node_num);
        if (status < 0) {
                mlog_errno(status);
                mlog(ML_ERROR,
index 6fff128cad16164e0f10ca614db618e0af60768f..a2d20c58ef07030b932982787ebfb626c09236d7 100644 (file)
@@ -185,6 +185,9 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
                              file->f_path.dentry->d_name.name,
                              (unsigned long long)datasync);
 
+       if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+               return 0;
+
        err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (err)
                return err;
@@ -474,11 +477,6 @@ static int ocfs2_truncate_file(struct inode *inode,
                goto bail;
        }
 
-       /* lets handle the simple truncate cases before doing any more
-        * cluster locking. */
-       if (new_i_size == le64_to_cpu(fe->i_size))
-               goto bail;
-
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
        ocfs2_resv_discard(&osb->osb_la_resmap,
@@ -718,7 +716,8 @@ leave:
  * While a write will already be ordering the data, a truncate will not.
  * Thus, we need to explicitly order the zeroed pages.
  */
-static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
+                                               struct buffer_head *di_bh)
 {
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        handle_t *handle = NULL;
@@ -735,7 +734,14 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
        }
 
        ret = ocfs2_jbd2_file_inode(handle, inode);
-       if (ret < 0)
+       if (ret < 0) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+                                     OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret)
                mlog_errno(ret);
 
 out:
@@ -751,7 +757,7 @@ out:
  * to be too fragile to do exactly what we need without us having to
  * worry about recursive locking in ->write_begin() and ->write_end(). */
 static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
-                                u64 abs_to)
+                                u64 abs_to, struct buffer_head *di_bh)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
@@ -759,6 +765,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
        handle_t *handle = NULL;
        int ret = 0;
        unsigned zero_from, zero_to, block_start, block_end;
+       struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 
        BUG_ON(abs_from >= abs_to);
        BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
@@ -801,7 +808,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
                }
 
                if (!handle) {
-                       handle = ocfs2_zero_start_ordered_transaction(inode);
+                       handle = ocfs2_zero_start_ordered_transaction(inode,
+                                                                     di_bh);
                        if (IS_ERR(handle)) {
                                ret = PTR_ERR(handle);
                                handle = NULL;
@@ -818,8 +826,22 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
                        ret = 0;
        }
 
-       if (handle)
+       if (handle) {
+               /*
+                * fs-writeback will release the dirty pages without page lock
+                * whose offset are over inode size, the release happens at
+                * block_write_full_page_endio().
+                */
+               i_size_write(inode, abs_to);
+               inode->i_blocks = ocfs2_inode_sector_count(inode);
+               di->i_size = cpu_to_le64((u64)i_size_read(inode));
+               inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+               di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+               di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+               di->i_mtime_nsec = di->i_ctime_nsec;
+               ocfs2_journal_dirty(handle, di_bh);
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+       }
 
 out_unlock:
        unlock_page(page);
@@ -915,7 +937,7 @@ out:
  * has made sure that the entire range needs zeroing.
  */
 static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
-                                  u64 range_end)
+                                  u64 range_end, struct buffer_head *di_bh)
 {
        int rc = 0;
        u64 next_pos;
@@ -931,7 +953,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
                next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
                if (next_pos > range_end)
                        next_pos = range_end;
-               rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+               rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
                if (rc < 0) {
                        mlog_errno(rc);
                        break;
@@ -977,7 +999,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
                        range_end = zero_to_size;
 
                ret = ocfs2_zero_extend_range(inode, range_start,
-                                             range_end);
+                                             range_end, di_bh);
                if (ret) {
                        mlog_errno(ret);
                        break;
@@ -1145,14 +1167,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                goto bail_unlock_rw;
        }
 
-       if (size_change && attr->ia_size != i_size_read(inode)) {
+       if (size_change) {
                status = inode_newsize_ok(inode, attr->ia_size);
                if (status)
                        goto bail_unlock;
 
                inode_dio_wait(inode);
 
-               if (i_size_read(inode) > attr->ia_size) {
+               if (i_size_read(inode) >= attr->ia_size) {
                        if (ocfs2_should_order_data(inode)) {
                                status = ocfs2_begin_ordered_truncate(inode,
                                                                      attr->ia_size);
@@ -1869,7 +1891,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
        }
        size = sr->l_start + sr->l_len;
 
-       if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
+       if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
+           cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
                if (sr->l_len <= 0) {
                        ret = -EINVAL;
                        goto out_inode_unlock;
@@ -2622,7 +2645,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        case SEEK_SET:
                break;
        case SEEK_END:
-               offset += inode->i_size;
+               /* SEEK_END requires the OCFS2 inode lock for the file
+                * because it references the file's size.
+                */
+               ret = ocfs2_inode_lock(inode, NULL, 0);
+               if (ret < 0) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+               offset += i_size_read(inode);
+               ocfs2_inode_unlock(inode, 0);
                break;
        case SEEK_CUR:
                if (offset == 0) {
index fa32ce9b455df9ad7ff06e812ad79eb8f42a4e4b..8ca3c29accbf08f2d7d40aaf53afcee6563a7322 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
+#include <linux/blkdev.h>
 #include <linux/compat.h>
 
 #include <cluster/masklog.h>
@@ -966,15 +967,21 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
+               if (!blk_queue_discard(q))
+                       return -EOPNOTSUPP;
+
                if (copy_from_user(&range, argp, sizeof(range)))
                        return -EFAULT;
 
+               range.minlen = max_t(u64, q->limits.discard_granularity,
+                                    range.minlen);
                ret = ocfs2_trim_fs(sb, &range);
                if (ret < 0)
                        return ret;
index cd5496b7a0a39d4ab7658b59a8480a1bb5d6d1eb..25ec3b712d5ff6f15b3dca1b75e79b9ddb00e0b3 100644 (file)
@@ -781,6 +781,46 @@ bail:
        return status;
 }
 
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+                               handle_t *handle,
+                               struct ocfs2_alloc_context *ac,
+                               u32 bit_off,
+                               u32 num_bits)
+{
+       int status, start;
+       struct inode *local_alloc_inode;
+       void *bitmap;
+       struct ocfs2_dinode *alloc;
+       struct ocfs2_local_alloc *la;
+
+       BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
+
+       local_alloc_inode = ac->ac_inode;
+       alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
+       la = OCFS2_LOCAL_ALLOC(alloc);
+
+       bitmap = la->la_bitmap;
+       start = bit_off - le32_to_cpu(la->la_bm_off);
+
+       status = ocfs2_journal_access_di(handle,
+                       INODE_CACHE(local_alloc_inode),
+                       osb->local_alloc_bh,
+                       OCFS2_JOURNAL_ACCESS_WRITE);
+       if (status < 0) {
+               mlog_errno(status);
+               goto bail;
+       }
+
+       while (num_bits--)
+               ocfs2_clear_bit(start++, bitmap);
+
+       le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
+       ocfs2_journal_dirty(handle, osb->local_alloc_bh);
+
+bail:
+       return status;
+}
+
 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
 {
        u32 count;
index 1be9b586446086bb9ea81499a10283cedaa5c6f2..44a7d1fb2decc79c108082668432dfe1f641b3b3 100644 (file)
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
                                 u32 *bit_off,
                                 u32 *num_bits);
 
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+                               handle_t *handle,
+                               struct ocfs2_alloc_context *ac,
+                               u32 bit_off,
+                               u32 num_bits);
+
 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
                                      unsigned int num_clusters);
 void ocfs2_la_enable_worker(struct work_struct *work);
index 631a98213474406b339908968bf7e83157051d34..64c304d668f0347067e5afd7fb4b486b1983a529 100644 (file)
@@ -561,83 +561,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
        mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
 }
 
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
-                                      handle_t *handle,
-                                      struct buffer_head *di_bh,
-                                      u32 num_bits,
-                                      u16 chain)
-{
-       int ret;
-       u32 tmp_used;
-       struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
-       struct ocfs2_chain_list *cl =
-                               (struct ocfs2_chain_list *) &di->id2.i_chain;
-
-       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
-                                     OCFS2_JOURNAL_ACCESS_WRITE);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out;
-       }
-
-       tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
-       di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
-       le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
-       ocfs2_journal_dirty(handle, di_bh);
-
-out:
-       return ret;
-}
-
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
-                                            struct inode *alloc_inode,
-                                            struct ocfs2_group_desc *bg,
-                                            struct buffer_head *group_bh,
-                                            unsigned int bit_off,
-                                            unsigned int num_bits)
-{
-       int status;
-       void *bitmap = bg->bg_bitmap;
-       int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
-
-       /* All callers get the descriptor via
-        * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
-       BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
-       BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
-
-       mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
-            num_bits);
-
-       if (ocfs2_is_cluster_bitmap(alloc_inode))
-               journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
-
-       status = ocfs2_journal_access_gd(handle,
-                                        INODE_CACHE(alloc_inode),
-                                        group_bh,
-                                        journal_type);
-       if (status < 0) {
-               mlog_errno(status);
-               goto bail;
-       }
-
-       le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
-       if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
-               ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
-                           " count %u but claims %u are freed. num_bits %d",
-                           (unsigned long long)le64_to_cpu(bg->bg_blkno),
-                           le16_to_cpu(bg->bg_bits),
-                           le16_to_cpu(bg->bg_free_bits_count), num_bits);
-               return -EROFS;
-       }
-       while (num_bits--)
-               ocfs2_set_bit(bit_off++, bitmap);
-
-       ocfs2_journal_dirty(handle, group_bh);
-
-bail:
-       return status;
-}
-
 static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                             u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
                             u32 len, int ext_flags)
index 4f791f6d27d0463f8bef77dc20a5f5274df8ddea..179661a21b613a12368def9546f60db8d69b3fc7 100644 (file)
@@ -644,6 +644,7 @@ static int ocfs2_link(struct dentry *old_dentry,
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
        struct ocfs2_dir_lookup_result lookup = { NULL, };
        sigset_t oldset;
+       u64 old_de_ino;
 
        trace_ocfs2_link((unsigned long long)OCFS2_I(inode)->ip_blkno,
                         old_dentry->d_name.len, old_dentry->d_name.name,
@@ -666,6 +667,18 @@ static int ocfs2_link(struct dentry *old_dentry,
                goto out;
        }
 
+       err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name,
+                       old_dentry->d_name.len, &old_de_ino);
+       if (err) {
+               err = -ENOENT;
+               goto out;
+       }
+
+       if (old_de_ino != OCFS2_I(inode)->ip_blkno) {
+               err = -ENOENT;
+               goto out;
+       }
+
        err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
                                        dentry->d_name.len);
        if (err)
@@ -954,6 +967,65 @@ leave:
        return status;
 }
 
+static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
+               u64 src_inode_no, u64 dest_inode_no)
+{
+       int ret = 0, i = 0;
+       u64 parent_inode_no = 0;
+       u64 child_inode_no = src_inode_no;
+       struct inode *child_inode;
+
+#define MAX_LOOKUP_TIMES 32
+       while (1) {
+               child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
+               if (IS_ERR(child_inode)) {
+                       ret = PTR_ERR(child_inode);
+                       break;
+               }
+
+               ret = ocfs2_inode_lock(child_inode, NULL, 0);
+               if (ret < 0) {
+                       iput(child_inode);
+                       if (ret != -ENOENT)
+                               mlog_errno(ret);
+                       break;
+               }
+
+               ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
+                               &parent_inode_no);
+               ocfs2_inode_unlock(child_inode, 0);
+               iput(child_inode);
+               if (ret < 0) {
+                       ret = -ENOENT;
+                       break;
+               }
+
+               if (parent_inode_no == dest_inode_no) {
+                       ret = 1;
+                       break;
+               }
+
+               if (parent_inode_no == osb->root_inode->i_ino) {
+                       ret = 0;
+                       break;
+               }
+
+               child_inode_no = parent_inode_no;
+
+               if (++i >= MAX_LOOKUP_TIMES) {
+                       mlog(ML_NOTICE, "max lookup times reached, filesystem "
+                                       "may have nested directories, "
+                                       "src inode: %llu, dest inode: %llu.\n",
+                                       (unsigned long long)src_inode_no,
+                                       (unsigned long long)dest_inode_no);
+                       ret = 0;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
 /*
  * The only place this should be used is rename!
  * if they have the same id, then the 1st one is the only one locked.
@@ -965,6 +1037,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
                             struct inode *inode2)
 {
        int status;
+       int inode1_is_ancestor, inode2_is_ancestor;
        struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
        struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
        struct buffer_head **tmpbh;
@@ -978,9 +1051,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
        if (*bh2)
                *bh2 = NULL;
 
-       /* we always want to lock the one with the lower lockid first. */
+       /* we always want to lock the one with the lower lockid first.
+        * and if they are nested, we lock ancestor first */
        if (oi1->ip_blkno != oi2->ip_blkno) {
-               if (oi1->ip_blkno < oi2->ip_blkno) {
+               inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
+                               oi1->ip_blkno);
+               if (inode1_is_ancestor < 0) {
+                       status = inode1_is_ancestor;
+                       goto bail;
+               }
+
+               inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
+                               oi2->ip_blkno);
+               if (inode2_is_ancestor < 0) {
+                       status = inode2_is_ancestor;
+                       goto bail;
+               }
+
+               if ((inode1_is_ancestor == 1) ||
+                               (oi1->ip_blkno < oi2->ip_blkno &&
+                               inode2_is_ancestor == 0)) {
                        /* switch id1 and id2 around */
                        tmpbh = bh2;
                        bh2 = bh1;
@@ -1097,6 +1187,22 @@ static int ocfs2_rename(struct inode *old_dir,
                        goto bail;
                }
                rename_lock = 1;
+
+               /* here we cannot guarantee the inodes haven't just been
+                * changed, so check if they are nested again */
+               status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
+                               old_inode->i_ino);
+               if (status < 0) {
+                       mlog_errno(status);
+                       goto bail;
+               } else if (status == 1) {
+                       status = -EPERM;
+                       mlog(ML_ERROR, "src inode %llu should not be ancestor "
+                               "of new dir inode %llu\n",
+                               (unsigned long long)old_inode->i_ino,
+                               (unsigned long long)new_dir->i_ino);
+                       goto bail;
+               }
        }
 
        /* if old and new are the same, this'll just do one lock. */
@@ -2101,17 +2207,17 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
                goto leave;
        }
 
-       /* remove it from the orphan directory */
-       status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
+       status = ocfs2_journal_access_di(handle,
+                                        INODE_CACHE(orphan_dir_inode),
+                                        orphan_dir_bh,
+                                        OCFS2_JOURNAL_ACCESS_WRITE);
        if (status < 0) {
                mlog_errno(status);
                goto leave;
        }
 
-       status = ocfs2_journal_access_di(handle,
-                                        INODE_CACHE(orphan_dir_inode),
-                                        orphan_dir_bh,
-                                        OCFS2_JOURNAL_ACCESS_WRITE);
+       /* remove it from the orphan directory */
+       status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
        if (status < 0) {
                mlog_errno(status);
                goto leave;
index 3a903470c794ec61dace1c03f9446cfe5f388e9c..553f53cc73ae532f7bad618b17f566ecba6bb690 100644 (file)
@@ -387,6 +387,7 @@ struct ocfs2_super
        u8 osb_stackflags;
 
        char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
+       char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1];
        struct ocfs2_cluster_connection *cconn;
        struct ocfs2_lock_res osb_super_lockres;
        struct ocfs2_lock_res osb_rename_lockres;
index bf1f8930456f145343089206ff3ec2469a532ad7..1724d43d3da1626b517b8deb73cd87c47492fd85 100644 (file)
@@ -398,7 +398,8 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn)
        return 0;
 }
 
-static int o2cb_cluster_this_node(unsigned int *node)
+static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
+                                 unsigned int *node)
 {
        int node_num;
 
index 286edf1e231f3598b0e33c77297bc8cf0d91ebe2..13a8537d8e8b0b5732fa730dcff2870255a26e07 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/reboot.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 
 #include "stackglue.h"
 #define OCFS2_TEXT_UUID_LEN                    32
 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN       2
 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN      8
+#define VERSION_LOCK                           "version_lock"
+
+enum ocfs2_connection_type {
+       WITH_CONTROLD,
+       NO_CONTROLD
+};
 
 /*
  * ocfs2_live_connection is refcounted because the filesystem and
 struct ocfs2_live_connection {
        struct list_head                oc_list;
        struct ocfs2_cluster_connection *oc_conn;
+       enum ocfs2_connection_type      oc_type;
+       atomic_t                        oc_this_node;
+       int                             oc_our_slot;
+       struct dlm_lksb                 oc_version_lksb;
+       char                            oc_lvb[DLM_LVB_LEN];
+       struct completion               oc_sync_wait;
+       wait_queue_head_t               oc_wait;
 };
 
 struct ocfs2_control_private {
@@ -198,20 +212,15 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
  * mount path.  Since the VFS prevents multiple calls to
  * fill_super(), we can't get dupes here.
  */
-static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
-                                    struct ocfs2_live_connection **c_ret)
+static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn,
+                                    struct ocfs2_live_connection *c)
 {
        int rc = 0;
-       struct ocfs2_live_connection *c;
-
-       c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
-       if (!c)
-               return -ENOMEM;
 
        mutex_lock(&ocfs2_control_lock);
        c->oc_conn = conn;
 
-       if (atomic_read(&ocfs2_control_opened))
+       if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened))
                list_add(&c->oc_list, &ocfs2_live_connection_list);
        else {
                printk(KERN_ERR
@@ -220,12 +229,6 @@ static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
        }
 
        mutex_unlock(&ocfs2_control_lock);
-
-       if (!rc)
-               *c_ret = c;
-       else
-               kfree(c);
-
        return rc;
 }
 
@@ -799,18 +802,251 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
        return 0;
 }
 
+static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver)
+{
+       struct ocfs2_protocol_version *pv =
+               (struct ocfs2_protocol_version *)lvb;
+       /*
+        * ocfs2_protocol_version has two u8 variables, so we don't
+        * need any endian conversion.
+        */
+       ver->pv_major = pv->pv_major;
+       ver->pv_minor = pv->pv_minor;
+}
+
+static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb)
+{
+       struct ocfs2_protocol_version *pv =
+               (struct ocfs2_protocol_version *)lvb;
+       /*
+        * ocfs2_protocol_version has two u8 variables, so we don't
+        * need any endian conversion.
+        */
+       pv->pv_major = ver->pv_major;
+       pv->pv_minor = ver->pv_minor;
+}
+
+static void sync_wait_cb(void *arg)
+{
+       struct ocfs2_cluster_connection *conn = arg;
+       struct ocfs2_live_connection *lc = conn->cc_private;
+       complete(&lc->oc_sync_wait);
+}
+
+static int sync_unlock(struct ocfs2_cluster_connection *conn,
+               struct dlm_lksb *lksb, char *name)
+{
+       int error;
+       struct ocfs2_live_connection *lc = conn->cc_private;
+
+       error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn);
+       if (error) {
+               printk(KERN_ERR "%s lkid %x error %d\n",
+                               name, lksb->sb_lkid, error);
+               return error;
+       }
+
+       wait_for_completion(&lc->oc_sync_wait);
+
+       if (lksb->sb_status != -DLM_EUNLOCK) {
+               printk(KERN_ERR "%s lkid %x status %d\n",
+                               name, lksb->sb_lkid, lksb->sb_status);
+               return -1;
+       }
+       return 0;
+}
+
+static int sync_lock(struct ocfs2_cluster_connection *conn,
+               int mode, uint32_t flags,
+               struct dlm_lksb *lksb, char *name)
+{
+       int error, status;
+       struct ocfs2_live_connection *lc = conn->cc_private;
+
+       error = dlm_lock(conn->cc_lockspace, mode, lksb, flags,
+                       name, strlen(name),
+                       0, sync_wait_cb, conn, NULL);
+       if (error) {
+               printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n",
+                               name, lksb->sb_lkid, flags, mode, error);
+               return error;
+       }
+
+       wait_for_completion(&lc->oc_sync_wait);
+
+       status = lksb->sb_status;
+
+       if (status && status != -EAGAIN) {
+               printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n",
+                               name, lksb->sb_lkid, flags, mode, status);
+       }
+
+       return status;
+}
+
+
+static int version_lock(struct ocfs2_cluster_connection *conn, int mode,
+               int flags)
+{
+       struct ocfs2_live_connection *lc = conn->cc_private;
+       return sync_lock(conn, mode, flags,
+                       &lc->oc_version_lksb, VERSION_LOCK);
+}
+
+static int version_unlock(struct ocfs2_cluster_connection *conn)
+{
+       struct ocfs2_live_connection *lc = conn->cc_private;
+       return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK);
+}
+
+/* get_protocol_version()
+ *
+ * To exchange ocfs2 versioning, we use the LVB of the version dlm lock.
+ * The algorithm is:
+ * 1. Attempt to take the lock in EX mode (non-blocking).
+ * 2. If successful (which means it is the first mount), write the
+ *    version number and downconvert to PR lock.
+ * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after
+ *    taking the PR lock.
+ */
+
+static int get_protocol_version(struct ocfs2_cluster_connection *conn)
+{
+       int ret;
+       struct ocfs2_live_connection *lc = conn->cc_private;
+       struct ocfs2_protocol_version pv;
+
+       running_proto.pv_major =
+               ocfs2_user_plugin.sp_max_proto.pv_major;
+       running_proto.pv_minor =
+               ocfs2_user_plugin.sp_max_proto.pv_minor;
+
+       lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb;
+       ret = version_lock(conn, DLM_LOCK_EX,
+                       DLM_LKF_VALBLK|DLM_LKF_NOQUEUE);
+       if (!ret) {
+               conn->cc_version.pv_major = running_proto.pv_major;
+               conn->cc_version.pv_minor = running_proto.pv_minor;
+               version_to_lvb(&running_proto, lc->oc_lvb);
+               version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+       } else if (ret == -EAGAIN) {
+               ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK);
+               if (ret)
+                       goto out;
+               lvb_to_version(lc->oc_lvb, &pv);
+
+               if ((pv.pv_major != running_proto.pv_major) ||
+                               (pv.pv_minor > running_proto.pv_minor)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               conn->cc_version.pv_major = pv.pv_major;
+               conn->cc_version.pv_minor = pv.pv_minor;
+       }
+out:
+       return ret;
+}
+
+static void user_recover_prep(void *arg)
+{
+}
+
+static void user_recover_slot(void *arg, struct dlm_slot *slot)
+{
+       struct ocfs2_cluster_connection *conn = arg;
+       printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
+                       slot->nodeid, slot->slot);
+       conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
+
+}
+
+static void user_recover_done(void *arg, struct dlm_slot *slots,
+               int num_slots, int our_slot,
+               uint32_t generation)
+{
+       struct ocfs2_cluster_connection *conn = arg;
+       struct ocfs2_live_connection *lc = conn->cc_private;
+       int i;
+
+       for (i = 0; i < num_slots; i++)
+               if (slots[i].slot == our_slot) {
+                       atomic_set(&lc->oc_this_node, slots[i].nodeid);
+                       break;
+               }
+
+       lc->oc_our_slot = our_slot;
+       wake_up(&lc->oc_wait);
+}
+
+static const struct dlm_lockspace_ops ocfs2_ls_ops = {
+       .recover_prep = user_recover_prep,
+       .recover_slot = user_recover_slot,
+       .recover_done = user_recover_done,
+};
+
+static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
+{
+       version_unlock(conn);
+       dlm_release_lockspace(conn->cc_lockspace, 2);
+       conn->cc_lockspace = NULL;
+       ocfs2_live_connection_drop(conn->cc_private);
+       conn->cc_private = NULL;
+       return 0;
+}
+
 static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 {
        dlm_lockspace_t *fsdlm;
-       struct ocfs2_live_connection *uninitialized_var(control);
-       int rc = 0;
+       struct ocfs2_live_connection *lc;
+       int rc, ops_rv;
 
        BUG_ON(conn == NULL);
 
-       rc = ocfs2_live_connection_new(conn, &control);
+       lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
+       if (!lc) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       init_waitqueue_head(&lc->oc_wait);
+       init_completion(&lc->oc_sync_wait);
+       atomic_set(&lc->oc_this_node, 0);
+       conn->cc_private = lc;
+       lc->oc_type = NO_CONTROLD;
+
+       rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
+                              DLM_LSFL_FS, DLM_LVB_LEN,
+                              &ocfs2_ls_ops, conn, &ops_rv, &fsdlm);
+       if (rc)
+               goto out;
+
+       if (ops_rv == -EOPNOTSUPP) {
+               lc->oc_type = WITH_CONTROLD;
+               printk(KERN_NOTICE "ocfs2: You seem to be using an older "
+                               "version of dlm_controld and/or ocfs2-tools."
+                               " Please consider upgrading.\n");
+       } else if (ops_rv) {
+               rc = ops_rv;
+               goto out;
+       }
+       conn->cc_lockspace = fsdlm;
+
+       rc = ocfs2_live_connection_attach(conn, lc);
        if (rc)
                goto out;
 
+       if (lc->oc_type == NO_CONTROLD) {
+               rc = get_protocol_version(conn);
+               if (rc) {
+                       printk(KERN_ERR "ocfs2: Could not determine"
+                                       " locking version\n");
+                       user_cluster_disconnect(conn);
+                       goto out;
+               }
+               wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
+       }
+
        /*
         * running_proto must have been set before we allowed any mounts
         * to proceed.
@@ -818,42 +1054,34 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
        if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
                printk(KERN_ERR
                       "Unable to mount with fs locking protocol version "
-                      "%u.%u because the userspace control daemon has "
-                      "negotiated %u.%u\n",
+                      "%u.%u because negotiated protocol is %u.%u\n",
                       conn->cc_version.pv_major, conn->cc_version.pv_minor,
                       running_proto.pv_major, running_proto.pv_minor);
                rc = -EPROTO;
-               ocfs2_live_connection_drop(control);
-               goto out;
-       }
-
-       rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
-                              NULL, NULL, NULL, &fsdlm);
-       if (rc) {
-               ocfs2_live_connection_drop(control);
-               goto out;
+               ocfs2_live_connection_drop(lc);
+               lc = NULL;
        }
 
-       conn->cc_private = control;
-       conn->cc_lockspace = fsdlm;
 out:
+       if (rc && lc)
+               kfree(lc);
        return rc;
 }
 
-static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
-{
-       dlm_release_lockspace(conn->cc_lockspace, 2);
-       conn->cc_lockspace = NULL;
-       ocfs2_live_connection_drop(conn->cc_private);
-       conn->cc_private = NULL;
-       return 0;
-}
 
-static int user_cluster_this_node(unsigned int *this_node)
+static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
+                                 unsigned int *this_node)
 {
        int rc;
+       struct ocfs2_live_connection *lc = conn->cc_private;
+
+       if (lc->oc_type == WITH_CONTROLD)
+               rc = ocfs2_control_get_this_node();
+       else if (lc->oc_type == NO_CONTROLD)
+               rc = atomic_read(&lc->oc_this_node);
+       else
+               rc = -EINVAL;
 
-       rc = ocfs2_control_get_this_node();
        if (rc < 0)
                return rc;
 
index cb7ec0b63ddc503939888d153c0b9617400d52f7..1324e6600e57378b8e4d1624dc1366af57fde7b2 100644 (file)
@@ -309,6 +309,8 @@ int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
 EXPORT_SYMBOL_GPL(ocfs2_plock);
 
 int ocfs2_cluster_connect(const char *stack_name,
+                         const char *cluster_name,
+                         int cluster_name_len,
                          const char *group,
                          int grouplen,
                          struct ocfs2_locking_protocol *lproto,
@@ -342,8 +344,10 @@ int ocfs2_cluster_connect(const char *stack_name,
                goto out;
        }
 
-       memcpy(new_conn->cc_name, group, grouplen);
+       strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
        new_conn->cc_namelen = grouplen;
+       strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1);
+       new_conn->cc_cluster_name_len = cluster_name_len;
        new_conn->cc_recovery_handler = recovery_handler;
        new_conn->cc_recovery_data = recovery_data;
 
@@ -386,8 +390,9 @@ int ocfs2_cluster_connect_agnostic(const char *group,
 
        if (cluster_stack_name[0])
                stack_name = cluster_stack_name;
-       return ocfs2_cluster_connect(stack_name, group, grouplen, lproto,
-                                    recovery_handler, recovery_data, conn);
+       return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen,
+                                    lproto, recovery_handler, recovery_data,
+                                    conn);
 }
 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic);
 
@@ -460,9 +465,10 @@ void ocfs2_cluster_hangup(const char *group, int grouplen)
 }
 EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
 
-int ocfs2_cluster_this_node(unsigned int *node)
+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
+                           unsigned int *node)
 {
-       return active_stack->sp_ops->this_node(node);
+       return active_stack->sp_ops->this_node(conn, node);
 }
 EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
 
index 1ec56fdb8d0d91752682bc532132dc6ae9cc029b..66334a30cea80b2555dc9658a7a4b71d2284d480 100644 (file)
@@ -45,6 +45,9 @@ struct file_lock;
  */
 #define GROUP_NAME_MAX         64
 
+/* This shadows  OCFS2_CLUSTER_NAME_LEN */
+#define CLUSTER_NAME_MAX       16
+
 
 /*
  * ocfs2_protocol_version changes when ocfs2 does something different in
@@ -97,8 +100,10 @@ struct ocfs2_locking_protocol {
  * locking compatibility.
  */
 struct ocfs2_cluster_connection {
-       char cc_name[GROUP_NAME_MAX];
+       char cc_name[GROUP_NAME_MAX + 1];
        int cc_namelen;
+       char cc_cluster_name[CLUSTER_NAME_MAX + 1];
+       int cc_cluster_name_len;
        struct ocfs2_protocol_version cc_version;
        struct ocfs2_locking_protocol *cc_proto;
        void (*cc_recovery_handler)(int node_num, void *recovery_data);
@@ -152,7 +157,8 @@ struct ocfs2_stack_operations {
         * ->this_node() returns the cluster's unique identifier for the
         * local node.
         */
-       int (*this_node)(unsigned int *node);
+       int (*this_node)(struct ocfs2_cluster_connection *conn,
+                        unsigned int *node);
 
        /*
         * Call the underlying dlm lock function.  The ->dlm_lock()
@@ -239,6 +245,8 @@ struct ocfs2_stack_plugin {
 
 /* Used by the filesystem */
 int ocfs2_cluster_connect(const char *stack_name,
+                         const char *cluster_name,
+                         int cluster_name_len,
                          const char *group,
                          int grouplen,
                          struct ocfs2_locking_protocol *lproto,
@@ -260,7 +268,8 @@ int ocfs2_cluster_connect_agnostic(const char *group,
 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
                             int hangup_pending);
 void ocfs2_cluster_hangup(const char *group, int grouplen);
-int ocfs2_cluster_this_node(unsigned int *node);
+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
+                           unsigned int *node);
 
 struct ocfs2_lock_res;
 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
index 2c91452c4047bc3a9ff324a3ba5e2e0fbfd7d09f..47ae2663a6f51c591e841e36bd043e5c889c0125 100644 (file)
@@ -113,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
                                     struct ocfs2_suballoc_result *res);
 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
                                         int nr);
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
-                                            struct inode *alloc_inode,
-                                            struct ocfs2_group_desc *bg,
-                                            struct buffer_head *group_bh,
-                                            unsigned int bit_off,
-                                            unsigned int num_bits);
 static int ocfs2_relink_block_group(handle_t *handle,
                                    struct inode *alloc_inode,
                                    struct buffer_head *fe_bh,
@@ -1343,7 +1337,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
        return status;
 }
 
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
+int ocfs2_block_group_set_bits(handle_t *handle,
                                             struct inode *alloc_inode,
                                             struct ocfs2_group_desc *bg,
                                             struct buffer_head *group_bh,
@@ -1388,8 +1382,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
        ocfs2_journal_dirty(handle, group_bh);
 
 bail:
-       if (status)
-               mlog_errno(status);
        return status;
 }
 
@@ -1588,7 +1580,7 @@ static int ocfs2_block_group_search(struct inode *inode,
        return ret;
 }
 
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
                                       handle_t *handle,
                                       struct buffer_head *di_bh,
                                       u32 num_bits,
index a36d0aa50911056231129a31cf3a2c7c6151cd44..218d8036b3e70f988da136475e8b1daaca7f6745 100644 (file)
@@ -86,6 +86,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
                           u32 bits_wanted,
                           struct ocfs2_alloc_context **ac);
 
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+                        handle_t *handle,
+                        struct buffer_head *di_bh,
+                        u32 num_bits,
+                        u16 chain);
+int ocfs2_block_group_set_bits(handle_t *handle,
+                        struct inode *alloc_inode,
+                        struct ocfs2_group_desc *bg,
+                        struct buffer_head *group_bh,
+                        unsigned int bit_off,
+                        unsigned int num_bits);
+
 int ocfs2_claim_metadata(handle_t *handle,
                         struct ocfs2_alloc_context *ac,
                         u32 bits_wanted,
index c41492957aa5f4ab4f7aa7a056ac2715a352c2b7..49d84f80f36ce96ca61d5c5abd81e915aff40635 100644 (file)
@@ -68,7 +68,6 @@
 #include "super.h"
 #include "sysfile.h"
 #include "uptodate.h"
-#include "ver.h"
 #include "xattr.h"
 #include "quota.h"
 #include "refcounttree.h"
@@ -90,6 +89,7 @@ static struct dentry *ocfs2_debugfs_root = NULL;
 
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 cluster file system");
 
 struct mount_options
 {
@@ -1618,8 +1618,6 @@ static int __init ocfs2_init(void)
 {
        int status, i;
 
-       ocfs2_print_version();
-
        for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
                init_waitqueue_head(&ocfs2__ioend_wq[i]);
 
@@ -1947,11 +1945,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
        ocfs2_shutdown_local_alloc(osb);
 
-       ocfs2_truncate_log_shutdown(osb);
-
        /* This will disable recovery and flush any recovery work. */
        ocfs2_recovery_exit(osb);
 
+       /*
+        * During dismount, when it recovers another node it will call
+        * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
+        */
+       ocfs2_truncate_log_shutdown(osb);
+
        ocfs2_journal_shutdown(osb);
 
        ocfs2_sync_blockdev(sb);
@@ -2225,10 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
        if (ocfs2_clusterinfo_valid(osb)) {
                osb->osb_stackflags =
                        OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
-               memcpy(osb->osb_cluster_stack,
+               strlcpy(osb->osb_cluster_stack,
                       OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
-                      OCFS2_STACK_LABEL_LEN);
-               osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+                      OCFS2_STACK_LABEL_LEN + 1);
                if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
                        mlog(ML_ERROR,
                             "couldn't mount because of an invalid "
@@ -2237,6 +2238,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
                        status = -EINVAL;
                        goto bail;
                }
+               strlcpy(osb->osb_cluster_name,
+                       OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
+                       OCFS2_CLUSTER_NAME_LEN + 1);
        } else {
                /* The empty string is identical with classic tools that
                 * don't know about s_cluster_info. */
diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c
deleted file mode 100644 (file)
index e2488f4..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#include "ver.h"
-
-#define OCFS2_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
-
-void ocfs2_print_version(void)
-{
-       printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(OCFS2_BUILD_VERSION);
diff --git a/fs/ocfs2/ver.h b/fs/ocfs2/ver.h
deleted file mode 100644 (file)
index d7395cb..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_VER_H
-#define OCFS2_VER_H
-
-void ocfs2_print_version(void);
-
-#endif /* OCFS2_VER_H */
index 0e0752ef27159f6183dabc1749e49fca8494319a..78fd0d0788dbc465c3d7a6ba674bb64a6ebea25d 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -663,10 +663,11 @@ out:
                wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
        }
-       if (ret > 0) {
+       if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
                int err = file_update_time(filp);
                if (err)
                        ret = err;
+               sb_end_write(file_inode(filp)->i_sb);
        }
        return ret;
 }
index 8bd2135b7f82ce181445f8e4804d4b226dca4891..551e61ba15b6047c909594968163eaf26c8cb605 100644 (file)
 
 #include <linux/errno.h>
 
-EXPORT_SYMBOL(posix_acl_init);
-EXPORT_SYMBOL(posix_acl_alloc);
-EXPORT_SYMBOL(posix_acl_valid);
-EXPORT_SYMBOL(posix_acl_equiv_mode);
-EXPORT_SYMBOL(posix_acl_from_mode);
+struct posix_acl **acl_by_type(struct inode *inode, int type)
+{
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               return &inode->i_acl;
+       case ACL_TYPE_DEFAULT:
+               return &inode->i_default_acl;
+       default:
+               BUG();
+       }
+}
+EXPORT_SYMBOL(acl_by_type);
+
+struct posix_acl *get_cached_acl(struct inode *inode, int type)
+{
+       struct posix_acl **p = acl_by_type(inode, type);
+       struct posix_acl *acl = ACCESS_ONCE(*p);
+       if (acl) {
+               spin_lock(&inode->i_lock);
+               acl = *p;
+               if (acl != ACL_NOT_CACHED)
+                       acl = posix_acl_dup(acl);
+               spin_unlock(&inode->i_lock);
+       }
+       return acl;
+}
+EXPORT_SYMBOL(get_cached_acl);
+
+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
+{
+       return rcu_dereference(*acl_by_type(inode, type));
+}
+EXPORT_SYMBOL(get_cached_acl_rcu);
+
+void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+       struct posix_acl **p = acl_by_type(inode, type);
+       struct posix_acl *old;
+       spin_lock(&inode->i_lock);
+       old = *p;
+       rcu_assign_pointer(*p, posix_acl_dup(acl));
+       spin_unlock(&inode->i_lock);
+       if (old != ACL_NOT_CACHED)
+               posix_acl_release(old);
+}
+EXPORT_SYMBOL(set_cached_acl);
+
+void forget_cached_acl(struct inode *inode, int type)
+{
+       struct posix_acl **p = acl_by_type(inode, type);
+       struct posix_acl *old;
+       spin_lock(&inode->i_lock);
+       old = *p;
+       *p = ACL_NOT_CACHED;
+       spin_unlock(&inode->i_lock);
+       if (old != ACL_NOT_CACHED)
+               posix_acl_release(old);
+}
+EXPORT_SYMBOL(forget_cached_acl);
+
+void forget_all_cached_acls(struct inode *inode)
+{
+       struct posix_acl *old_access, *old_default;
+       spin_lock(&inode->i_lock);
+       old_access = inode->i_acl;
+       old_default = inode->i_default_acl;
+       inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+       spin_unlock(&inode->i_lock);
+       if (old_access != ACL_NOT_CACHED)
+               posix_acl_release(old_access);
+       if (old_default != ACL_NOT_CACHED)
+               posix_acl_release(old_default);
+}
+EXPORT_SYMBOL(forget_all_cached_acls);
 
 /*
  * Init a fresh posix_acl
@@ -37,6 +106,7 @@ posix_acl_init(struct posix_acl *acl, int count)
        atomic_set(&acl->a_refcount, 1);
        acl->a_count = count;
 }
+EXPORT_SYMBOL(posix_acl_init);
 
 /*
  * Allocate a new ACL with the specified number of entries.
@@ -51,6 +121,7 @@ posix_acl_alloc(int count, gfp_t flags)
                posix_acl_init(acl, count);
        return acl;
 }
+EXPORT_SYMBOL(posix_acl_alloc);
 
 /*
  * Clone an ACL.
@@ -78,8 +149,6 @@ posix_acl_valid(const struct posix_acl *acl)
 {
        const struct posix_acl_entry *pa, *pe;
        int state = ACL_USER_OBJ;
-       kuid_t prev_uid = INVALID_UID;
-       kgid_t prev_gid = INVALID_GID;
        int needs_mask = 0;
 
        FOREACH_ACL_ENTRY(pa, acl, pe) {
@@ -98,10 +167,6 @@ posix_acl_valid(const struct posix_acl *acl)
                                        return -EINVAL;
                                if (!uid_valid(pa->e_uid))
                                        return -EINVAL;
-                               if (uid_valid(prev_uid) &&
-                                   uid_lte(pa->e_uid, prev_uid))
-                                       return -EINVAL;
-                               prev_uid = pa->e_uid;
                                needs_mask = 1;
                                break;
 
@@ -117,10 +182,6 @@ posix_acl_valid(const struct posix_acl *acl)
                                        return -EINVAL;
                                if (!gid_valid(pa->e_gid))
                                        return -EINVAL;
-                               if (gid_valid(prev_gid) &&
-                                   gid_lte(pa->e_gid, prev_gid))
-                                       return -EINVAL;
-                               prev_gid = pa->e_gid;
                                needs_mask = 1;
                                break;
 
@@ -146,6 +207,7 @@ posix_acl_valid(const struct posix_acl *acl)
                return 0;
        return -EINVAL;
 }
+EXPORT_SYMBOL(posix_acl_valid);
 
 /*
  * Returns 0 if the acl can be exactly represented in the traditional
@@ -186,6 +248,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
                 *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
         return not_equiv;
 }
+EXPORT_SYMBOL(posix_acl_equiv_mode);
 
 /*
  * Create an ACL representing the file mode permission bits of an inode.
@@ -207,6 +270,7 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
        acl->a_entries[2].e_perm = (mode & S_IRWXO);
        return acl;
 }
+EXPORT_SYMBOL(posix_acl_from_mode);
 
 /*
  * Return 0 if current is granted want access to the inode
index 1bd2077187fd000a621e2f10453ce0661e908c02..656e401794de5f8e2b03e975130cd3546ed5ed27 100644 (file)
@@ -140,24 +140,15 @@ static const char * const task_state_array[] = {
        "t (tracing stop)",     /*   8 */
        "Z (zombie)",           /*  16 */
        "X (dead)",             /*  32 */
-       "x (dead)",             /*  64 */
-       "K (wakekill)",         /* 128 */
-       "W (waking)",           /* 256 */
-       "P (parked)",           /* 512 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-       unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
-       const char * const *p = &task_state_array[0];
+       unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
 
-       BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+       BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
 
-       while (state) {
-               p++;
-               state >>= 1;
-       }
-       return *p;
+       return task_state_array[fls(state)];
 }
 
 static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
@@ -453,8 +444,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
                                gtime += task_gtime(t);
-                               t = next_thread(t);
-                       } while (t != task);
+                       } while_each_thread(task, t);
 
                        min_flt += sig->min_flt;
                        maj_flt += sig->maj_flt;
index 03c8d747be48be2a14e93fed94355d42c5d7bd52..51507065263b29e3b915a6aa2238630fe3f7ac67 100644 (file)
@@ -1658,13 +1658,18 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
        return 0;
 }
 
+static inline bool proc_inode_is_dead(struct inode *inode)
+{
+       return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
+}
+
 int pid_delete_dentry(const struct dentry *dentry)
 {
        /* Is the task we represent dead?
         * If so, then don't put the dentry on the lru list,
         * kill it immediately.
         */
-       return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+       return proc_inode_is_dead(dentry->d_inode);
 }
 
 const struct dentry_operations pid_dentry_operations =
@@ -3092,34 +3097,42 @@ out_no_task:
  * In the case of a seek we start with the leader and walk nr
  * threads past it.
  */
-static struct task_struct *first_tid(struct task_struct *leader,
-               int tid, int nr, struct pid_namespace *ns)
+static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
+                                       struct pid_namespace *ns)
 {
-       struct task_struct *pos;
+       struct task_struct *pos, *task;
+       unsigned long nr = f_pos;
+
+       if (nr != f_pos)        /* 32bit overflow? */
+               return NULL;
 
        rcu_read_lock();
-       /* Attempt to start with the pid of a thread */
-       if (tid && (nr > 0)) {
+       task = pid_task(pid, PIDTYPE_PID);
+       if (!task)
+               goto fail;
+
+       /* Attempt to start with the tid of a thread */
+       if (tid && nr) {
                pos = find_task_by_pid_ns(tid, ns);
-               if (pos && (pos->group_leader == leader))
+               if (pos && same_thread_group(pos, task))
                        goto found;
        }
 
        /* If nr exceeds the number of threads there is nothing todo */
-       pos = NULL;
-       if (nr && nr >= get_nr_threads(leader))
-               goto out;
+       if (nr >= get_nr_threads(task))
+               goto fail;
 
        /* If we haven't found our starting place yet start
         * with the leader and walk nr threads forward.
         */
-       for (pos = leader; nr > 0; --nr) {
-               pos = next_thread(pos);
-               if (pos == leader) {
-                       pos = NULL;
-                       goto out;
-               }
-       }
+       pos = task = task->group_leader;
+       do {
+               if (!nr--)
+                       goto found;
+       } while_each_thread(task, pos);
+fail:
+       pos = NULL;
+       goto out;
 found:
        get_task_struct(pos);
 out:
@@ -3152,25 +3165,16 @@ static struct task_struct *next_tid(struct task_struct *start)
 /* for the /proc/TGID/task/ directories */
 static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 {
-       struct task_struct *leader = NULL;
-       struct task_struct *task = get_proc_task(file_inode(file));
+       struct inode *inode = file_inode(file);
+       struct task_struct *task;
        struct pid_namespace *ns;
        int tid;
 
-       if (!task)
-               return -ENOENT;
-       rcu_read_lock();
-       if (pid_alive(task)) {
-               leader = task->group_leader;
-               get_task_struct(leader);
-       }
-       rcu_read_unlock();
-       put_task_struct(task);
-       if (!leader)
+       if (proc_inode_is_dead(inode))
                return -ENOENT;
 
        if (!dir_emit_dots(file, ctx))
-               goto out;
+               return 0;
 
        /* f_version caches the tgid value that the last readdir call couldn't
         * return. lseek aka telldir automagically resets f_version to 0.
@@ -3178,7 +3182,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
        ns = file->f_dentry->d_sb->s_fs_info;
        tid = (int)file->f_version;
        file->f_version = 0;
-       for (task = first_tid(leader, tid, ctx->pos - 2, ns);
+       for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
             task;
             task = next_tid(task), ctx->pos++) {
                char name[PROC_NUMBUF];
@@ -3194,8 +3198,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
                        break;
                }
        }
-out:
-       put_task_struct(leader);
+
        return 0;
 }
 
index 82676e3fcd1d096c74a9b5bca2f7f29335b336be..cbd82dff7e81aeecadacb219ba96ec01ae4f5202 100644 (file)
@@ -26,4 +26,4 @@ static int __init proc_cmdline_init(void)
        proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
        return 0;
 }
-module_init(proc_cmdline_init);
+fs_initcall(proc_cmdline_init);
index 51942d5abcecb43d8a081fefa830a97f52d05b76..290ba85cb9002e7f0e9d6dd21dbbdbc4dd56e006 100644 (file)
@@ -109,4 +109,4 @@ static int __init proc_consoles_init(void)
        proc_create("consoles", 0, NULL, &proc_consoles_operations);
        return 0;
 }
-module_init(proc_consoles_init);
+fs_initcall(proc_consoles_init);
index 5a1e539a234bf3b55eda8a1d183537cc70bf03d8..06f4d31e0396dfe4d2187c1a70f62aa8401e1d0e 100644 (file)
@@ -21,4 +21,4 @@ static int __init proc_cpuinfo_init(void)
        proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
        return 0;
 }
-module_init(proc_cpuinfo_init);
+fs_initcall(proc_cpuinfo_init);
index b14347167c35a49ea2e55a219bc4f298ea156458..50493edc30e56e157a6f37407cce84ff60a05de2 100644 (file)
@@ -67,4 +67,4 @@ static int __init proc_devices_init(void)
        proc_create("devices", 0, NULL, &proc_devinfo_operations);
        return 0;
 }
-module_init(proc_devices_init);
+fs_initcall(proc_devices_init);
index cca93b6fb9a9e841cc480308968771125fb7cd87..b7f268eb5f45251ae1977c643540e081c6612546 100644 (file)
@@ -49,8 +49,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
        setattr_copy(inode, iattr);
        mark_inode_dirty(inode);
 
-       de->uid = inode->i_uid;
-       de->gid = inode->i_gid;
+       proc_set_user(de, inode->i_uid, inode->i_gid);
        de->mode = inode->i_mode;
        return 0;
 }
index 05029c0e2f2479d072cb3402ea965d361b1dbb35..a352d5703b4196112c9e581905ef467f28d4f133 100644 (file)
@@ -50,4 +50,4 @@ static int __init proc_interrupts_init(void)
        proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
        return 0;
 }
-module_init(proc_interrupts_init);
+fs_initcall(proc_interrupts_init);
index 5ed0e52d6aa02bcd4e4867bd1cd36f8633f9f2f1..39e6ef32f0bd6a3483f8771606a88801eb56ccc6 100644 (file)
@@ -639,4 +639,4 @@ static int __init proc_kcore_init(void)
 
        return 0;
 }
-module_init(proc_kcore_init);
+fs_initcall(proc_kcore_init);
index bdfabdaefdceab967df948cd3509a39990d3fe8b..05f8dcdb086e4a2fd44e7077fbc6f98b8d9608a2 100644 (file)
@@ -61,4 +61,4 @@ static int __init proc_kmsg_init(void)
        proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
        return 0;
 }
-module_init(proc_kmsg_init);
+fs_initcall(proc_kmsg_init);
index 1afa4dd4cae24167a0c4e3f47137a13cf8a6d199..aec66e6c2060b8f1a3f24bfb44a90d48f5bb5af2 100644 (file)
@@ -42,4 +42,4 @@ static int __init proc_loadavg_init(void)
        proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
        return 0;
 }
-module_init(proc_loadavg_init);
+fs_initcall(proc_loadavg_init);
index a77d2b2991998187843271a714ab241adacfc0b5..136e548d9567feafaa6a695abf8dd3a9015c5a87 100644 (file)
@@ -26,7 +26,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
        unsigned long committed;
        struct vmalloc_info vmi;
        long cached;
+       long available;
+       unsigned long pagecache;
+       unsigned long wmark_low = 0;
        unsigned long pages[NR_LRU_LISTS];
+       struct zone *zone;
        int lru;
 
 /*
@@ -47,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
        for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
                pages[lru] = global_page_state(NR_LRU_BASE + lru);
 
+       for_each_zone(zone)
+               wmark_low += zone->watermark[WMARK_LOW];
+
+       /*
+        * Estimate the amount of memory available for userspace allocations,
+        * without causing swapping.
+        *
+        * Free memory cannot be taken below the low watermark, before the
+        * system starts swapping.
+        */
+       available = i.freeram - wmark_low;
+
+       /*
+        * Not all the page cache can be freed, otherwise the system will
+        * start swapping. Assume at least half of the page cache, or the
+        * low watermark worth of cache, needs to stay.
+        */
+       pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+       pagecache -= min(pagecache / 2, wmark_low);
+       available += pagecache;
+
+       /*
+        * Part of the reclaimable swap consists of items that are in use,
+        * and cannot be freed. Cap this estimate at the low watermark.
+        */
+       available += global_page_state(NR_SLAB_RECLAIMABLE) -
+                    min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+
+       if (available < 0)
+               available = 0;
+
        /*
         * Tagged format, for easy grepping and expansion.
         */
        seq_printf(m,
                "MemTotal:       %8lu kB\n"
                "MemFree:        %8lu kB\n"
+               "MemAvailable:   %8lu kB\n"
                "Buffers:        %8lu kB\n"
                "Cached:         %8lu kB\n"
                "SwapCached:     %8lu kB\n"
@@ -105,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                ,
                K(i.totalram),
                K(i.freeram),
+               K(available),
                K(i.bufferram),
                K(cached),
                K(total_swapcache_pages()),
@@ -183,4 +220,4 @@ static int __init proc_meminfo_init(void)
        proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
        return 0;
 }
-module_init(proc_meminfo_init);
+fs_initcall(proc_meminfo_init);
index 5f9bc8a746c93dbfa99678b9a7d91c2a0999fdf9..d4a35746cab91f8967dc83b7466fd572a77ae8d4 100644 (file)
@@ -131,4 +131,4 @@ static int __init proc_nommu_init(void)
        return 0;
 }
 
-module_init(proc_nommu_init);
+fs_initcall(proc_nommu_init);
index b8730d9ebaee651244d0d7e46e687792b46b0c9f..02174a610315ebb218880a8744e05b2244b6d26f 100644 (file)
@@ -118,10 +118,12 @@ u64 stable_page_flags(struct page *page)
        /*
         * PageTransCompound can be true for non-huge compound pages (slab
         * pages or pages allocated by drivers with __GFP_COMP) because it
-        * just checks PG_head/PG_tail, so we need to check PageLRU to make
-        * sure a given page is a thp, not a non-huge compound page.
+        * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
+        * to make sure a given page is a thp, not a non-huge compound page.
         */
-       else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+       else if (PageTransCompound(page) &&
+                (PageLRU(compound_trans_head(page)) ||
+                 PageAnon(compound_trans_head(page))))
                u |= 1 << KPF_THP;
 
        /*
@@ -217,4 +219,4 @@ static int __init proc_page_init(void)
        proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
        return 0;
 }
-module_init(proc_page_init);
+fs_initcall(proc_page_init);
index 70779b2fc2090b98b741d10f3f37c4a8c1379bf9..c82dd5147845d07053fd1fa37398d8c6da2560f5 100644 (file)
@@ -74,9 +74,9 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
                return NULL;
 
        if (!strncmp(name, "security-", 9))
-               ent->size = 0; /* don't leak number of password chars */
+               proc_set_size(ent, 0); /* don't leak number of password chars */
        else
-               ent->size = pp->length;
+               proc_set_size(ent, pp->length);
 
        return ent;
 }
@@ -232,6 +232,7 @@ void __init proc_device_tree_init(void)
                return;
        root = of_find_node_by_path("/");
        if (root == NULL) {
+               remove_proc_entry("device-tree", NULL);
                pr_debug("/proc/device-tree: can't find root\n");
                return;
        }
index 62604be9f58d61ef6b9c4cf5b8f787e214d8dc78..ad8a77f94beb0210311b1f8d2af0cbeee6093201 100644 (file)
@@ -41,4 +41,4 @@ static int __init proc_softirqs_init(void)
        proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
        return 0;
 }
-module_init(proc_softirqs_init);
+fs_initcall(proc_softirqs_init);
index 1cf86c0e868930585d0058b7943cc047367802f3..6f599c62f0cc939ce2b68a75dbcd38d366ee7fa9 100644 (file)
@@ -221,4 +221,4 @@ static int __init proc_stat_init(void)
        proc_create("stat", 0, NULL, &proc_stat_operations);
        return 0;
 }
-module_init(proc_stat_init);
+fs_initcall(proc_stat_init);
index 061894625903ded399f2ab9285daa5f69adba854..7141b8d0ca9ed9800afb1ae87def5f8aa06b8f46 100644 (file)
@@ -49,4 +49,4 @@ static int __init proc_uptime_init(void)
        proc_create("uptime", 0, NULL, &uptime_proc_fops);
        return 0;
 }
-module_init(proc_uptime_init);
+fs_initcall(proc_uptime_init);
index 76817a60678c35c599f8416232bf75df52dffd91..d2154eb6d78faa593ed72ee176eadb89d7df3f4c 100644 (file)
@@ -31,4 +31,4 @@ static int __init proc_version_init(void)
        proc_create("version", 0, NULL, &version_proc_fops);
        return 0;
 }
-module_init(proc_version_init);
+fs_initcall(proc_version_init);
index 9100d695988690e0ae7e3263ad2502aa43d94e8c..2ca7ba047f04b658028e1e8c9cd9f5ba746d9946 100644 (file)
@@ -1082,7 +1082,7 @@ static int __init vmcore_init(void)
                proc_vmcore->size = vmcore_size;
        return 0;
 }
-module_init(vmcore_init)
+fs_initcall(vmcore_init);
 
 /* Cleanup function for vmcore module. */
 void vmcore_cleanup(void)
index 439406e081af903a298f6305d9e7c1d744b58813..7be26f03a3f5813ed501bea520e79041af4466f7 100644 (file)
@@ -234,17 +234,12 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 
        rcu_read_lock();
        nsp = task_nsproxy(task);
-       if (!nsp) {
+       if (!nsp || !nsp->mnt_ns) {
                rcu_read_unlock();
                put_task_struct(task);
                goto err;
        }
        ns = nsp->mnt_ns;
-       if (!ns) {
-               rcu_read_unlock();
-               put_task_struct(task);
-               goto err;
-       }
        get_mnt_ns(ns);
        rcu_read_unlock();
        task_lock(task);
index 4884ac5ae9bea224517e384588847a44f4f8e462..1e56a4e8cf7cd47d4886731ea063c95be9f078ee 100644 (file)
 
 #include "internal.h"
 
-const struct address_space_operations ramfs_aops = {
-       .readpage       = simple_readpage,
-       .write_begin    = simple_write_begin,
-       .write_end      = simple_write_end,
-       .set_page_dirty = __set_page_dirty_no_writeback,
-};
-
 const struct file_operations ramfs_file_operations = {
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
index 8d5b438cc18859868fc0cf2206fa51d147ba9eb4..0b3d8e4cb2fa00dd8b906d1ddc811bc17a4c8e07 100644 (file)
 #include "internal.h"
 
 static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
-
-const struct address_space_operations ramfs_aops = {
-       .readpage               = simple_readpage,
-       .write_begin            = simple_write_begin,
-       .write_end              = simple_write_end,
-       .set_page_dirty         = __set_page_dirty_no_writeback,
-};
+static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+                                                  unsigned long addr,
+                                                  unsigned long len,
+                                                  unsigned long pgoff,
+                                                  unsigned long flags);
+static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 
 const struct file_operations ramfs_file_operations = {
        .mmap                   = ramfs_nommu_mmap,
@@ -197,7 +196,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
  *   - the pages to be mapped must exist
  *   - the pages be physically contiguous in sequence
  */
-unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
                                            unsigned long addr, unsigned long len,
                                            unsigned long pgoff, unsigned long flags)
 {
@@ -256,7 +255,7 @@ out:
 /*
  * set up a mapping for shared memory segments
  */
-int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
+static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
 {
        if (!(vma->vm_flags & VM_SHARED))
                return -ENOSYS;
index 39d14659a8d3e47b8171f5e1d840d574802edba0..d365b1c4eb3cbe469965ba4a0e93d63a57124cc7 100644 (file)
 static const struct super_operations ramfs_ops;
 static const struct inode_operations ramfs_dir_inode_operations;
 
+static const struct address_space_operations ramfs_aops = {
+       .readpage       = simple_readpage,
+       .write_begin    = simple_write_begin,
+       .write_end      = simple_write_end,
+       .set_page_dirty = __set_page_dirty_no_writeback,
+};
+
 static struct backing_dev_info ramfs_backing_dev_info = {
        .name           = "ramfs",
        .ra_pages       = 0,    /* No readahead */
@@ -275,4 +282,4 @@ int __init init_ramfs_fs(void)
 
        return err;
 }
-module_init(init_ramfs_fs)
+fs_initcall(init_ramfs_fs);
index 6b330639b51dc7b497d6cc9ac8726ce0c004ec37..a9d8ae88fa15428d20c432c121cab5ca868eb42a 100644 (file)
@@ -10,5 +10,4 @@
  */
 
 
-extern const struct address_space_operations ramfs_aops;
 extern const struct inode_operations ramfs_file_inode_operations;
index 58e440df1bc687e3dd3eea50fe38f018a6cc192b..1193ffd0356547b63cdf0a503a2eb68ae84d8ba6 100644 (file)
@@ -901,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        io_fn_t fn;
        iov_fn_t fnv;
 
-       ret = -EFAULT;
-       if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
-               goto out;
-
        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
                                               UIO_FASTIOV, iovstack, &iov);
        if (ret <= 0)
index f8adaee537c2a39283bc8c4c672f0949fd79d0f4..dfb617b2bad2ae1ada39a9ff36684793cfed51f1 100644 (file)
@@ -1958,8 +1958,6 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
 #define MAX_US_INT 0xffff
 
 // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
-#define U32_MAX (~(__u32)0)
-
 static inline loff_t max_reiserfs_offset(struct inode *inode)
 {
        if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
index ff1d3d42e72accd5a0572686d2c5f226ed59c5c3..d8418782862b60fe2b96b4bd131281417c2adc24 100644 (file)
@@ -533,16 +533,14 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
 
        root = romfs_iget(sb, pos);
        if (IS_ERR(root))
-               goto error;
+               return PTR_ERR(root);
 
        sb->s_root = d_make_root(root);
        if (!sb->s_root)
-               goto error;
+               return -ENOMEM;
 
        return 0;
 
-error:
-       return -EINVAL;
 error_rsb_inval:
        ret = -EINVAL;
 error_rsb:
index e5f6c2cfac380a7c4503dcebb0b3e310841893f0..cecd780e0f44ffed45f9b6ff9b540fadc34d5a3a 100644 (file)
@@ -166,6 +166,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
        if (!s)
                return NULL;
 
+       INIT_LIST_HEAD(&s->s_mounts);
+
        if (security_sb_alloc(s))
                goto fail;
 
@@ -188,7 +190,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
        if (list_lru_init(&s->s_inode_lru))
                goto fail;
 
-       INIT_LIST_HEAD(&s->s_mounts);
        init_rwsem(&s->s_umount);
        lockdep_set_class(&s->s_umount, &type->s_umount_key);
        /*
index cc1febd8fadf730ddd2e2dfb78315798508aa408..5157b866a853cf76cfd6bef1c6fd6ab4b49123bf 100644 (file)
@@ -2118,26 +2118,10 @@ out_free:
  */
 static void free_inodes(struct fsck_data *fsckd)
 {
-       struct rb_node *this = fsckd->inodes.rb_node;
-       struct fsck_inode *fscki;
+       struct fsck_inode *fscki, *n;
 
-       while (this) {
-               if (this->rb_left)
-                       this = this->rb_left;
-               else if (this->rb_right)
-                       this = this->rb_right;
-               else {
-                       fscki = rb_entry(this, struct fsck_inode, rb);
-                       this = rb_parent(this);
-                       if (this) {
-                               if (this->rb_left == &fscki->rb)
-                                       this->rb_left = NULL;
-                               else
-                                       this->rb_right = NULL;
-                       }
-                       kfree(fscki);
-               }
-       }
+       rbtree_postorder_for_each_entry_safe(fscki, n, &fsckd->inodes, rb)
+               kfree(fscki);
 }
 
 /**
index 36bd4efd0819e96ee299acd030f24fa7113e4fb2..a902c5919e423ca619fcf0508345909b51ec99f8 100644 (file)
@@ -574,27 +574,10 @@ static int done_already(struct rb_root *done_tree, int lnum)
  */
 static void destroy_done_tree(struct rb_root *done_tree)
 {
-       struct rb_node *this = done_tree->rb_node;
-       struct done_ref *dr;
+       struct done_ref *dr, *n;
 
-       while (this) {
-               if (this->rb_left) {
-                       this = this->rb_left;
-                       continue;
-               } else if (this->rb_right) {
-                       this = this->rb_right;
-                       continue;
-               }
-               dr = rb_entry(this, struct done_ref, rb);
-               this = rb_parent(this);
-               if (this) {
-                       if (this->rb_left == &dr->rb)
-                               this->rb_left = NULL;
-                       else
-                               this->rb_right = NULL;
-               }
+       rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb)
                kfree(dr);
-       }
 }
 
 /**
index ba32da3fe08afa059f750fd18131f21c523376f0..f1c3e5a1b31554c4fbd9c9710d5925ad90682cb1 100644 (file)
@@ -815,27 +815,10 @@ static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
 
 static void dbg_free_check_tree(struct rb_root *root)
 {
-       struct rb_node *this = root->rb_node;
-       struct check_orphan *o;
+       struct check_orphan *o, *n;
 
-       while (this) {
-               if (this->rb_left) {
-                       this = this->rb_left;
-                       continue;
-               } else if (this->rb_right) {
-                       this = this->rb_right;
-                       continue;
-               }
-               o = rb_entry(this, struct check_orphan, rb);
-               this = rb_parent(this);
-               if (this) {
-                       if (this->rb_left == &o->rb)
-                               this->rb_left = NULL;
-                       else
-                               this->rb_right = NULL;
-               }
+       rbtree_postorder_for_each_entry_safe(o, n, root, rb)
                kfree(o);
-       }
 }
 
 static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
index 065096e36ed9733a14f96090decb3131c0e0d184..c14adb2f420cb09c48fef049bf6d246b8d22cd28 100644 (file)
@@ -1335,29 +1335,14 @@ static void remove_ino(struct ubifs_info *c, ino_t inum)
  */
 void ubifs_destroy_size_tree(struct ubifs_info *c)
 {
-       struct rb_node *this = c->size_tree.rb_node;
-       struct size_entry *e;
+       struct size_entry *e, *n;
 
-       while (this) {
-               if (this->rb_left) {
-                       this = this->rb_left;
-                       continue;
-               } else if (this->rb_right) {
-                       this = this->rb_right;
-                       continue;
-               }
-               e = rb_entry(this, struct size_entry, rb);
+       rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) {
                if (e->inode)
                        iput(e->inode);
-               this = rb_parent(this);
-               if (this) {
-                       if (this->rb_left == &e->rb)
-                               this->rb_left = NULL;
-                       else
-                               this->rb_right = NULL;
-               }
                kfree(e);
        }
+
        c->size_tree = RB_ROOT;
 }
 
index f69daa514a57a0827a55c47f4680dcb5e69ec746..5ded8490c0c66cca3a6dea286aec8e700bc24e44 100644 (file)
@@ -873,26 +873,10 @@ static void free_orphans(struct ubifs_info *c)
  */
 static void free_buds(struct ubifs_info *c)
 {
-       struct rb_node *this = c->buds.rb_node;
-       struct ubifs_bud *bud;
-
-       while (this) {
-               if (this->rb_left)
-                       this = this->rb_left;
-               else if (this->rb_right)
-                       this = this->rb_right;
-               else {
-                       bud = rb_entry(this, struct ubifs_bud, rb);
-                       this = rb_parent(this);
-                       if (this) {
-                               if (this->rb_left == &bud->rb)
-                                       this->rb_left = NULL;
-                               else
-                                       this->rb_right = NULL;
-                       }
-                       kfree(bud);
-               }
-       }
+       struct ubifs_bud *bud, *n;
+
+       rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb)
+               kfree(bud);
 }
 
 /**
index 349f31a30f401cb488a595de91cec30cfbf9d647..9083bc7ed4ae49f5d9550c20c1943400860114ab 100644 (file)
@@ -178,27 +178,11 @@ static int ins_clr_old_idx_znode(struct ubifs_info *c,
  */
 void destroy_old_idx(struct ubifs_info *c)
 {
-       struct rb_node *this = c->old_idx.rb_node;
-       struct ubifs_old_idx *old_idx;
+       struct ubifs_old_idx *old_idx, *n;
 
-       while (this) {
-               if (this->rb_left) {
-                       this = this->rb_left;
-                       continue;
-               } else if (this->rb_right) {
-                       this = this->rb_right;
-                       continue;
-               }
-               old_idx = rb_entry(this, struct ubifs_old_idx, rb);
-               this = rb_parent(this);
-               if (this) {
-                       if (this->rb_left == &old_idx->rb)
-                               this->rb_left = NULL;
-                       else
-                               this->rb_right = NULL;
-               }
+       rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb)
                kfree(old_idx);
-       }
+
        c->old_idx = RB_ROOT;
 }
 
index a7ea492ae66072d34cb4daa4c99c51f3a5ea281c..59aa24dc0cddfe7731e1b44c02ef25caf329533d 100644 (file)
@@ -54,7 +54,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
        if (ufs_fragnum(fragment) + count > uspi->s_fpg)
                ufs_error (sb, "ufs_free_fragments", "internal error");
        
-       mutex_lock(&UFS_SB(sb)->s_lock);
+       lock_ufs(sb);
        
        cgno = ufs_dtog(uspi, fragment);
        bit = ufs_dtogd(uspi, fragment);
@@ -118,12 +118,12 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
                ubh_sync_block(UCPI_UBH(ucpi));
        ufs_mark_sb_dirty(sb);
        
-       mutex_unlock(&UFS_SB(sb)->s_lock);
+       unlock_ufs(sb);
        UFSD("EXIT\n");
        return;
 
 failed:
-       mutex_unlock(&UFS_SB(sb)->s_lock);
+       unlock_ufs(sb);
        UFSD("EXIT (FAILED)\n");
        return;
 }
@@ -155,7 +155,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
                goto failed;
        }
 
-       mutex_lock(&UFS_SB(sb)->s_lock);
+       lock_ufs(sb);
        
 do_more:
        overflow = 0;
@@ -215,12 +215,12 @@ do_more:
        }
 
        ufs_mark_sb_dirty(sb);
-       mutex_unlock(&UFS_SB(sb)->s_lock);
+       unlock_ufs(sb);
        UFSD("EXIT\n");
        return;
 
 failed_unlock:
-       mutex_unlock(&UFS_SB(sb)->s_lock);
+       unlock_ufs(sb);
 failed:
        UFSD("EXIT (FAILED)\n");
        return;
@@ -361,7 +361,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
        usb1 = ubh_get_usb_first(uspi);
        *err = -ENOSPC;
 
-       mutex_lock(&UFS_SB(sb)->s_lock);
+       lock_ufs(sb);
        tmp = ufs_data_ptr_to_cpu(sb, p);
 
        if (count + ufs_fragnum(fragment) > uspi->s_fpb) {
@@ -382,19 +382,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                                  "fragment %llu, tmp %llu\n",
                                  (unsigned long long)fragment,
                                  (unsigned long long)tmp);
-                       mutex_unlock(&UFS_SB(sb)->s_lock);
+                       unlock_ufs(sb);
                        return INVBLOCK;
                }
                if (fragment < UFS_I(inode)->i_lastfrag) {
                        UFSD("EXIT (ALREADY ALLOCATED)\n");
-                       mutex_unlock(&UFS_SB(sb)->s_lock);
+                       unlock_ufs(sb);
                        return 0;
                }
        }
        else {
                if (tmp) {
                        UFSD("EXIT (ALREADY ALLOCATED)\n");
-                       mutex_unlock(&UFS_SB(sb)->s_lock);
+                       unlock_ufs(sb);
                        return 0;
                }
        }
@@ -403,7 +403,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
         * There is not enough space for user on the device
         */
        if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               unlock_ufs(sb);
                UFSD("EXIT (FAILED)\n");
                return 0;
        }
@@ -428,7 +428,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                        ufs_clear_frags(inode, result + oldcount,
                                        newcount - oldcount, locked_page != NULL);
                }
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               unlock_ufs(sb);
                UFSD("EXIT, result %llu\n", (unsigned long long)result);
                return result;
        }
@@ -443,7 +443,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                                                fragment + count);
                ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
                                locked_page != NULL);
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               unlock_ufs(sb);
                UFSD("EXIT, result %llu\n", (unsigned long long)result);
                return result;
        }
@@ -481,7 +481,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                *err = 0;
                UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
                                                fragment + count);
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               unlock_ufs(sb);
                if (newcount < request)
                        ufs_free_fragments (inode, result + newcount, request - newcount);
                ufs_free_fragments (inode, tmp, oldcount);
@@ -489,7 +489,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                return result;
        }
 
-       mutex_unlock(&UFS_SB(sb)->s_lock);
+       unlock_ufs(sb);
        UFSD("EXIT (FAILED)\n");
        return 0;
 }              
index d0426d74817b68402e97a9794b2c54c7ac4690c2..da5e5255185066f4ef1b6c24835b9d9e65be20b0 100644 (file)
@@ -71,11 +71,11 @@ void ufs_free_inode (struct inode * inode)
        
        ino = inode->i_ino;
 
-       mutex_lock(&UFS_SB(sb)->s_lock);
+       lock_ufs(sb);
 
        if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) {
                ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino);
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               unlock_ufs(sb);
                return;
        }
        
@@ -83,7 +83,7 @@ void ufs_free_inode (struct inode * inode)
        bit = ufs_inotocgoff (ino);
        ucpi = ufs_load_cylinder (sb, cg);
        if (!ucpi) {
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               unlock_ufs(sb);
                return;
        }
        ucg = ubh_get_ucg(UCPI_UBH(ucpi));
@@ -117,7 +117,7 @@ void ufs_free_inode (struct inode * inode)
                ubh_sync_block(UCPI_UBH(ucpi));
        
        ufs_mark_sb_dirty(sb);
-       mutex_unlock(&UFS_SB(sb)->s_lock);
+       unlock_ufs(sb);
        UFSD("EXIT\n");
 }
 
@@ -197,7 +197,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
        uspi = sbi->s_uspi;
        usb1 = ubh_get_usb_first(uspi);
 
-       mutex_lock(&sbi->s_lock);
+       lock_ufs(sb);
 
        /*
         * Try to place the inode in its parent directory
@@ -332,21 +332,20 @@ cg_found:
                        sync_dirty_buffer(bh);
                brelse(bh);
        }
-
-       mutex_unlock(&sbi->s_lock);
+       unlock_ufs(sb);
 
        UFSD("allocating inode %lu\n", inode->i_ino);
        UFSD("EXIT\n");
        return inode;
 
 fail_remove_inode:
-       mutex_unlock(&sbi->s_lock);
+       unlock_ufs(sb);
        clear_nlink(inode);
        iput(inode);
        UFSD("EXIT (FAILED): err %d\n", err);
        return ERR_PTR(err);
 failed:
-       mutex_unlock(&sbi->s_lock);
+       unlock_ufs(sb);
        make_bad_inode(inode);
        iput (inode);
        UFSD("EXIT (FAILED): err %d\n", err);
index 329f2f53b7ed655b2ef525331f7c75a714d58de9..e5a993416feccfb9ebdd71f97f35c2a5127ad50b 100644 (file)
@@ -699,7 +699,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
        unsigned flags;
 
        lock_ufs(sb);
-       mutex_lock(&UFS_SB(sb)->s_lock);
 
        UFSD("ENTER\n");
 
@@ -717,7 +716,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
        ufs_put_cstotal(sb);
 
        UFSD("EXIT\n");
-       mutex_unlock(&UFS_SB(sb)->s_lock);
        unlock_ufs(sb);
 
        return 0;
@@ -762,6 +760,7 @@ static void ufs_put_super(struct super_block *sb)
 
        ubh_brelse_uspi (sbi->s_uspi);
        kfree (sbi->s_uspi);
+       mutex_destroy(&sbi->mutex);
        kfree (sbi);
        sb->s_fs_info = NULL;
        UFSD("EXIT\n");
@@ -805,7 +804,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
        }
 #endif
        mutex_init(&sbi->mutex);
-       mutex_init(&sbi->s_lock);
        spin_lock_init(&sbi->work_lock);
        INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
        /*
@@ -1262,6 +1260,7 @@ failed:
        if (ubh)
                ubh_brelse_uspi (uspi);
        kfree (uspi);
+       mutex_destroy(&sbi->mutex);
        kfree(sbi);
        sb->s_fs_info = NULL;
        UFSD("EXIT (FAILED)\n");
@@ -1281,7 +1280,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
        unsigned flags;
 
        lock_ufs(sb);
-       mutex_lock(&UFS_SB(sb)->s_lock);
        uspi = UFS_SB(sb)->s_uspi;
        flags = UFS_SB(sb)->s_flags;
        usb1 = ubh_get_usb_first(uspi);
@@ -1295,7 +1293,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
        new_mount_opt = 0;
        ufs_set_opt (new_mount_opt, ONERROR_LOCK);
        if (!ufs_parse_options (data, &new_mount_opt)) {
-               mutex_unlock(&UFS_SB(sb)->s_lock);
                unlock_ufs(sb);
                return -EINVAL;
        }
@@ -1303,14 +1300,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
                new_mount_opt |= ufstype;
        } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
                printk("ufstype can't be changed during remount\n");
-               mutex_unlock(&UFS_SB(sb)->s_lock);
                unlock_ufs(sb);
                return -EINVAL;
        }
 
        if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
                UFS_SB(sb)->s_mount_opt = new_mount_opt;
-               mutex_unlock(&UFS_SB(sb)->s_lock);
                unlock_ufs(sb);
                return 0;
        }
@@ -1335,7 +1330,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 #ifndef CONFIG_UFS_FS_WRITE
                printk("ufs was compiled with read-only support, "
                "can't be mounted as read-write\n");
-               mutex_unlock(&UFS_SB(sb)->s_lock);
                unlock_ufs(sb);
                return -EINVAL;
 #else
@@ -1345,13 +1339,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
                    ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
                    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
                        printk("this ufstype is read-only supported\n");
-                       mutex_unlock(&UFS_SB(sb)->s_lock);
                        unlock_ufs(sb);
                        return -EINVAL;
                }
                if (!ufs_read_cylinder_structures(sb)) {
                        printk("failed during remounting\n");
-                       mutex_unlock(&UFS_SB(sb)->s_lock);
                        unlock_ufs(sb);
                        return -EPERM;
                }
@@ -1359,7 +1351,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 #endif
        }
        UFS_SB(sb)->s_mount_opt = new_mount_opt;
-       mutex_unlock(&UFS_SB(sb)->s_lock);
        unlock_ufs(sb);
        return 0;
 }
index ff2c15ab81aaa05c06409966c03f9bb912e87c4a..343e6fc571e5b3976b6132d43dccb03b538d9380 100644 (file)
@@ -24,7 +24,6 @@ struct ufs_sb_info {
        int work_queued; /* non-zero if the delayed work is queued */
        struct delayed_work sync_work; /* FS sync delayed work */
        spinlock_t work_lock; /* protects sync_work and work_queued */
-       struct mutex s_lock;
 };
 
 struct ufs_inode_info {
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
new file mode 100644 (file)
index 0000000..5a64ca4
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009
+ * Break out common bits to asm-generic by Mark Salter, November 2013
+ */
+
+#ifndef __ASM_GENERIC_FIXMAP_H
+#define __ASM_GENERIC_FIXMAP_H
+
+#include <linux/bug.h>
+
+#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without translation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+       BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+       return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+       return __virt_to_fix(vaddr);
+}
+
+/*
+ * Provide some reasonable defaults for page flags.
+ * Not all architectures use all of these different types and some
+ * architectures use different names.
+ */
+#ifndef FIXMAP_PAGE_NORMAL
+#define FIXMAP_PAGE_NORMAL PAGE_KERNEL
+#endif
+#ifndef FIXMAP_PAGE_NOCACHE
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE
+#endif
+#ifndef FIXMAP_PAGE_IO
+#define FIXMAP_PAGE_IO PAGE_KERNEL_IO
+#endif
+#ifndef FIXMAP_PAGE_CLEAR
+#define FIXMAP_PAGE_CLEAR __pgprot(0)
+#endif
+
+#ifndef set_fixmap
+#define set_fixmap(idx, phys)                          \
+       __set_fixmap(idx, phys, FIXMAP_PAGE_NORMAL)
+#endif
+
+#ifndef clear_fixmap
+#define clear_fixmap(idx)                      \
+       __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR)
+#endif
+
+/* Return a pointer with offset calculated */
+#define __set_fixmap_offset(idx, phys, flags)                \
+({                                                           \
+       unsigned long addr;                                   \
+       __set_fixmap(idx, phys, flags);                       \
+       addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
+       addr;                                                 \
+})
+
+#define set_fixmap_offset(idx, phys) \
+       __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL)
+
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+       __set_fixmap(idx, phys, FIXMAP_PAGE_NOCACHE)
+
+#define set_fixmap_offset_nocache(idx, phys) \
+       __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NOCACHE)
+
+/*
+ * Some fixmaps are for IO
+ */
+#define set_fixmap_io(idx, phys) \
+       __set_fixmap(idx, phys, FIXMAP_PAGE_IO)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_GENERIC_FIXMAP_H */
diff --git a/include/asm-generic/int-l64.h b/include/asm-generic/int-l64.h
deleted file mode 100644 (file)
index 27d4ec0..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * asm-generic/int-l64.h
- *
- * Integer declarations for architectures which use "long"
- * for 64-bit types.
- */
-#ifndef _ASM_GENERIC_INT_L64_H
-#define _ASM_GENERIC_INT_L64_H
-
-#include <uapi/asm-generic/int-l64.h>
-
-
-#ifndef __ASSEMBLY__
-
-typedef signed char s8;
-typedef unsigned char u8;
-
-typedef signed short s16;
-typedef unsigned short u16;
-
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long s64;
-typedef unsigned long u64;
-
-#define S8_C(x)  x
-#define U8_C(x)  x ## U
-#define S16_C(x) x
-#define U16_C(x) x ## U
-#define S32_C(x) x
-#define U32_C(x) x ## U
-#define S64_C(x) x ## L
-#define U64_C(x) x ## UL
-
-#else /* __ASSEMBLY__ */
-
-#define S8_C(x)  x
-#define U8_C(x)  x
-#define S16_C(x) x
-#define U16_C(x) x
-#define S32_C(x) x
-#define U32_C(x) x
-#define S64_C(x) x
-#define U64_C(x) x
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_GENERIC_INT_L64_H */
index f1f07d31a3af15fbf34934f502feb54d469e3d26..2fae55def608b0cc82fba2ac07f2f3c14941d9f6 100644 (file)
@@ -5,6 +5,7 @@
 #define _LINUX_BOOTMEM_H
 
 #include <linux/mmzone.h>
+#include <linux/mm_types.h>
 #include <asm/dma.h>
 
 /*
@@ -52,7 +53,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
                              unsigned long size);
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
-extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
@@ -142,6 +142,157 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 #define alloc_bootmem_low_pages_node(pgdat, x) \
        __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
+
+#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
+
+/* FIXME: use MEMBLOCK_ALLOC_* variants here */
+#define BOOTMEM_ALLOC_ACCESSIBLE       0
+#define BOOTMEM_ALLOC_ANYWHERE         (~(phys_addr_t)0)
+
+/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
+void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
+               phys_addr_t align, phys_addr_t min_addr,
+               phys_addr_t max_addr, int nid);
+void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+               phys_addr_t min_addr, phys_addr_t max_addr, int nid);
+void __memblock_free_early(phys_addr_t base, phys_addr_t size);
+void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+
+static inline void * __init memblock_virt_alloc(
+                                       phys_addr_t size,  phys_addr_t align)
+{
+       return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
+                                           BOOTMEM_ALLOC_ACCESSIBLE,
+                                           NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_nopanic(
+                                       phys_addr_t size, phys_addr_t align)
+{
+       return memblock_virt_alloc_try_nid_nopanic(size, align,
+                                                   BOOTMEM_LOW_LIMIT,
+                                                   BOOTMEM_ALLOC_ACCESSIBLE,
+                                                   NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_from_nopanic(
+               phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+       return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
+                                                   BOOTMEM_ALLOC_ACCESSIBLE,
+                                                   NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_node(
+                                               phys_addr_t size, int nid)
+{
+       return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+                                           BOOTMEM_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void * __init memblock_virt_alloc_node_nopanic(
+                                               phys_addr_t size, int nid)
+{
+       return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
+                                                   BOOTMEM_ALLOC_ACCESSIBLE,
+                                                   nid);
+}
+
+static inline void __init memblock_free_early(
+                                       phys_addr_t base, phys_addr_t size)
+{
+       __memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_early_nid(
+                               phys_addr_t base, phys_addr_t size, int nid)
+{
+       __memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_late(
+                                       phys_addr_t base, phys_addr_t size)
+{
+       __memblock_free_late(base, size);
+}
+
+#else
+
+#define BOOTMEM_ALLOC_ACCESSIBLE       0
+
+
+/* Fall back to all the existing bootmem APIs */
+static inline void * __init memblock_virt_alloc(
+                                       phys_addr_t size,  phys_addr_t align)
+{
+       if (!align)
+               align = SMP_CACHE_BYTES;
+       return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_nopanic(
+                                       phys_addr_t size, phys_addr_t align)
+{
+       if (!align)
+               align = SMP_CACHE_BYTES;
+       return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_from_nopanic(
+               phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+       return __alloc_bootmem_nopanic(size, align, min_addr);
+}
+
+static inline void * __init memblock_virt_alloc_node(
+                                               phys_addr_t size, int nid)
+{
+       return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
+                                    BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_node_nopanic(
+                                               phys_addr_t size, int nid)
+{
+       return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+                                            SMP_CACHE_BYTES,
+                                            BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
+       phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
+{
+       return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
+                                         min_addr);
+}
+
+static inline void * __init memblock_virt_alloc_try_nid_nopanic(
+                       phys_addr_t size, phys_addr_t align,
+                       phys_addr_t min_addr, phys_addr_t max_addr, int nid)
+{
+       return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
+                               min_addr, max_addr);
+}
+
+static inline void __init memblock_free_early(
+                                       phys_addr_t base, phys_addr_t size)
+{
+       free_bootmem(base, size);
+}
+
+static inline void __init memblock_free_early_nid(
+                               phys_addr_t base, phys_addr_t size, int nid)
+{
+       free_bootmem_node(NODE_DATA(nid), base, size);
+}
+
+static inline void __init memblock_free_late(
+                                       phys_addr_t base, phys_addr_t size)
+{
+       free_bootmem_late(base, size);
+}
+#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
+
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
 extern void *alloc_remap(int nid, unsigned long size);
 #else
index 4c570653ab84f9822ba9e1e1f19a41242c16b74f..17e7e82d2aa758f9888419a9c03aa4059e16b247 100644 (file)
@@ -1,11 +1,11 @@
 #ifndef __LINUX_CACHE_H
 #define __LINUX_CACHE_H
 
-#include <linux/kernel.h>
+#include <uapi/linux/kernel.h>
 #include <asm/cache.h>
 
 #ifndef L1_CACHE_ALIGN
-#define L1_CACHE_ALIGN(x) ALIGN(x, L1_CACHE_BYTES)
+#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES)
 #endif
 
 #ifndef SMP_CACHE_BYTES
index 0442c3d800f0f9c8a9268a93715cfe2bc18c639b..a6ef9cc267ec2cfd3940bab1d8e4f7030e31ef7b 100644 (file)
@@ -8,23 +8,6 @@
 
 #include <linux/ceph/types.h>
 
-/* This seemed to be the easiest place to define these */
-
-#define        U8_MAX  ((u8)(~0U))
-#define        U16_MAX ((u16)(~0U))
-#define        U32_MAX ((u32)(~0U))
-#define        U64_MAX ((u64)(~0ULL))
-
-#define        S8_MAX  ((s8)(U8_MAX >> 1))
-#define        S16_MAX ((s16)(U16_MAX >> 1))
-#define        S32_MAX ((s32)(U32_MAX >> 1))
-#define        S64_MAX ((s64)(U64_MAX >> 1LL))
-
-#define        S8_MIN  ((s8)(-S8_MAX - 1))
-#define        S16_MIN ((s16)(-S16_MAX - 1))
-#define        S32_MIN ((s32)(-S32_MAX - 1))
-#define        S64_MIN ((s64)(-S64_MAX - 1LL))
-
 /*
  * in all cases,
  *   void **p     pointer to position pointer
index a0f9280421eca511b00e80f51e8e65436ec47e3a..2e6dce6e5c2acf9bae626033c700ec0da012bbcb 100644 (file)
@@ -37,9 +37,9 @@ int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline);
 struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
                                         const char *bdev);
 
-void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
-                      int slot,
-                      int (*add_part)(int, struct cmdline_subpart *, void *),
-                      void *param);
+int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+                     int slot,
+                     int (*add_part)(int, struct cmdline_subpart *, void *),
+                     void *param);
 
 #endif /* CMDLINEPARSEH */
index 091d72e70d8a708f55d447b8559814caa2952b1a..7e1c76e3cd6890387b983be96377bf7c19cd3c06 100644 (file)
@@ -62,6 +62,22 @@ static inline bool compaction_deferred(struct zone *zone, int order)
        return zone->compact_considered < defer_limit;
 }
 
+/*
+ * Update defer tracking counters after successful compaction of given order,
+ * which means an allocation either succeeded (alloc_success == true) or is
+ * expected to succeed.
+ */
+static inline void compaction_defer_reset(struct zone *zone, int order,
+               bool alloc_success)
+{
+       if (alloc_success) {
+               zone->compact_considered = 0;
+               zone->compact_defer_shift = 0;
+       }
+       if (order >= zone->compact_order_failed)
+               zone->compact_order_failed = order + 1;
+}
+
 /* Returns true if restarting compaction after many failures */
 static inline bool compaction_restarting(struct zone *zone, int order)
 {
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644 (file)
index 0000000..bba7a4d
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL           0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata:     pointer to the data to compute checksum for.
+ * @nbytes:    number of bytes in data buffer.
+ * @seed:      CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
index fc0e34ce038f4055eaf7160cb1eb7693b7cc7e5d..fe8cb610deac70f3027edb7923d4bd99f525b7c6 100644 (file)
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
 
 extern void debug_dma_dump_mappings(struct device *dev);
 
+extern void debug_dma_assert_idle(struct page *page);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_add_bus(struct bus_type *bus)
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
 {
 }
 
+static inline void debug_dma_assert_idle(struct page *page)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
index 4b2ee8d12f5e0d8272f15bbbb402f58353dde4cb..7d8d5e608594c911c2eacc78dbb094c5384fd4af 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/path.h> /* struct path */
 #include <linux/spinlock.h>
 #include <linux/types.h>
-
 #include <linux/atomic.h>
 
 /*
@@ -79,6 +78,7 @@ struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark;
 struct fsnotify_event_private_data;
+struct fsnotify_fname;
 
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
@@ -94,17 +94,27 @@ struct fsnotify_event_private_data;
  *             userspace messages that marks have been removed.
  */
 struct fsnotify_ops {
-       bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
-                                 struct fsnotify_mark *inode_mark,
-                                 struct fsnotify_mark *vfsmount_mark,
-                                 __u32 mask, void *data, int data_type);
        int (*handle_event)(struct fsnotify_group *group,
+                           struct inode *inode,
                            struct fsnotify_mark *inode_mark,
                            struct fsnotify_mark *vfsmount_mark,
-                           struct fsnotify_event *event);
+                           u32 mask, void *data, int data_type,
+                           const unsigned char *file_name);
        void (*free_group_priv)(struct fsnotify_group *group);
        void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
-       void (*free_event_priv)(struct fsnotify_event_private_data *priv);
+       void (*free_event)(struct fsnotify_event *event);
+};
+
+/*
+ * all of the information about the original object we want to now send to
+ * a group.  If you want to carry more info from the accessing task to the
+ * listener this structure is where you need to be adding fields.
+ */
+struct fsnotify_event {
+       struct list_head list;
+       /* inode may ONLY be dereferenced during handle_event(). */
+       struct inode *inode;    /* either the inode the event happened to or its parent */
+       u32 mask;               /* the type of access, bitwise OR for FS_* event types */
 };
 
 /*
@@ -148,7 +158,11 @@ struct fsnotify_group {
                                         * a group */
        struct list_head marks_list;    /* all inode marks for this group */
 
-       struct fasync_struct    *fsn_fa;    /* async notification */
+       struct fasync_struct *fsn_fa;    /* async notification */
+
+       struct fsnotify_event overflow_event;   /* Event we queue when the
+                                                * notification list is too
+                                                * full */
 
        /* groups can define private fields here or use the void *private */
        union {
@@ -177,76 +191,10 @@ struct fsnotify_group {
        };
 };
 
-/*
- * A single event can be queued in multiple group->notification_lists.
- *
- * each group->notification_list will point to an event_holder which in turns points
- * to the actual event that needs to be sent to userspace.
- *
- * Seemed cheaper to create a refcnt'd event and a small holder for every group
- * than create a different event for every group
- *
- */
-struct fsnotify_event_holder {
-       struct fsnotify_event *event;
-       struct list_head event_list;
-};
-
-/*
- * Inotify needs to tack data onto an event.  This struct lets us later find the
- * correct private data of the correct group.
- */
-struct fsnotify_event_private_data {
-       struct fsnotify_group *group;
-       struct list_head event_list;
-};
-
-/*
- * all of the information about the original object we want to now send to
- * a group.  If you want to carry more info from the accessing task to the
- * listener this structure is where you need to be adding fields.
- */
-struct fsnotify_event {
-       /*
-        * If we create an event we are also likely going to need a holder
-        * to link to a group.  So embed one holder in the event.  Means only
-        * one allocation for the common case where we only have one group
-        */
-       struct fsnotify_event_holder holder;
-       spinlock_t lock;        /* protection for the associated event_holder and private_list */
-       /* to_tell may ONLY be dereferenced during handle_event(). */
-       struct inode *to_tell;  /* either the inode the event happened to or its parent */
-       /*
-        * depending on the event type we should have either a path or inode
-        * We hold a reference on path, but NOT on inode.  Since we have the ref on
-        * the path, it may be dereferenced at any point during this object's
-        * lifetime.  That reference is dropped when this object's refcnt hits
-        * 0.  If this event contains an inode instead of a path, the inode may
-        * ONLY be used during handle_event().
-        */
-       union {
-               struct path path;
-               struct inode *inode;
-       };
 /* when calling fsnotify tell it if the data is a path or inode */
 #define FSNOTIFY_EVENT_NONE    0
 #define FSNOTIFY_EVENT_PATH    1
 #define FSNOTIFY_EVENT_INODE   2
-       int data_type;          /* which of the above union we have */
-       atomic_t refcnt;        /* how many groups still are using/need to send this event */
-       __u32 mask;             /* the type of access, bitwise OR for FS_* event types */
-
-       u32 sync_cookie;        /* used to corrolate events, namely inotify mv events */
-       const unsigned char *file_name;
-       size_t name_len;
-       struct pid *tgid;
-
-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
-       __u32 response; /* userspace answer to question */
-#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
-
-       struct list_head private_data_list;     /* groups can store private data here */
-};
 
 /*
  * Inode specific fields in an fsnotify_mark
@@ -370,17 +318,12 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* fasync handler function */
 extern int fsnotify_fasync(int fd, struct file *file, int on);
-/* take a reference to an event */
-extern void fsnotify_get_event(struct fsnotify_event *event);
-extern void fsnotify_put_event(struct fsnotify_event *event);
-/* find private data previously attached to an event and unlink it */
-extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
-                                                                          struct fsnotify_event *event);
-
+/* Free event from memory */
+extern void fsnotify_destroy_event(struct fsnotify_group *group,
+                                  struct fsnotify_event *event);
 /* attach the event to the group notification queue */
 extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
                                                        struct fsnotify_event *event,
-                                                       struct fsnotify_event_private_data *priv,
                                                        struct fsnotify_event *(*merge)(struct list_head *,
                                                                                        struct fsnotify_event *));
 /* true if the group notification queue is empty */
@@ -430,15 +373,8 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
 extern void fsnotify_unmount_inodes(struct list_head *list);
 
 /* put here because inotify does some weird stuff when destroying watches */
-extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-                                                   void *data, int data_is,
-                                                   const unsigned char *name,
-                                                   u32 cookie, gfp_t gfp);
-
-/* fanotify likes to change events after they are on lists... */
-extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
-extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
-                                 struct fsnotify_event *new_event);
+extern void fsnotify_init_event(struct fsnotify_event *event,
+                               struct inode *to_tell, u32 mask);
 
 #else
 
index 1eda33d7cb1089ac91c2b3499d76e47f97051b1a..1c2fdaa2ffc3ef68fed69ffc7be5ef5d357a0dcb 100644 (file)
@@ -30,6 +30,8 @@
 #ifndef __GENALLOC_H__
 #define __GENALLOC_H__
 
+#include <linux/spinlock_types.h>
+
 struct device;
 struct device_node;
 
index 9b4dd491f7e8db512d11d357a83900497750197e..0437439bc047bd4a99d7465132b1d9417667934e 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __LINUX_GFP_H
 #define __LINUX_GFP_H
 
+#include <linux/mmdebug.h>
 #include <linux/mmzone.h>
 #include <linux/stddef.h>
 #include <linux/linkage.h>
index 91672e2deec36cd4c986116c32b527fe5f934e19..db512014e061b27abae7d689175e82d74348683b 100644 (file)
@@ -157,6 +157,26 @@ static inline int hpage_nr_pages(struct page *page)
                return HPAGE_PMD_NR;
        return 1;
 }
+/*
+ * compound_trans_head() should be used instead of compound_head(),
+ * whenever the "page" passed as parameter could be the tail of a
+ * transparent hugepage that could be undergoing a
+ * __split_huge_page_refcount(). The page structure layout often
+ * changes across releases and it makes extensive use of unions. So if
+ * the page structure layout will change in a way that
+ * page->first_page gets clobbered by __split_huge_page_refcount, the
+ * implementation making use of smp_rmb() will be required.
+ *
+ * Currently we define compound_trans_head as compound_head, because
+ * page->private is in the same union with page->first_page, and
+ * page->private isn't clobbered. However this also means we're
+ * currently leaving dirt into the page->private field of anonymous
+ * pages resulting from a THP split, instead of setting page->private
+ * to zero like for every other page that has PG_private not set. But
+ * anonymous pages don't use page->private so this is not a problem.
+ */
+#if 0
+/* This will be needed if page->private will be clobbered in split_huge_page */
 static inline struct page *compound_trans_head(struct page *page)
 {
        if (PageTail(page)) {
@@ -174,6 +194,9 @@ static inline struct page *compound_trans_head(struct page *page)
        }
        return page;
 }
+#else
+#define compound_trans_head(page) compound_head(page)
+#endif
 
 extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                unsigned long addr, pmd_t pmd, pmd_t *pmdp);
index bd7e987522221f42b7042cc242abc8ca31a72996..8c43cc469d78259b6028dfb30be8899b9a95e3d0 100644 (file)
@@ -2,6 +2,7 @@
 #define _LINUX_HUGETLB_H
 
 #include <linux/mm_types.h>
+#include <linux/mmdebug.h>
 #include <linux/fs.h>
 #include <linux/hugetlb_inline.h>
 #include <linux/cgroup.h>
@@ -31,7 +32,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
 void hugepage_put_subpool(struct hugepage_subpool *spool);
 
 int PageHuge(struct page *page);
-int PageHeadHuge(struct page *page_head);
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -104,11 +104,6 @@ static inline int PageHuge(struct page *page)
        return 0;
 }
 
-static inline int PageHeadHuge(struct page *page_head)
-{
-       return 0;
-}
-
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 }
@@ -360,6 +355,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 
 static inline struct hstate *page_hstate(struct page *page)
 {
+       VM_BUG_ON_PAGE(!PageHuge(page), page);
        return size_to_hstate(PAGE_SIZE << compound_order(page));
 }
 
index ce8217f7b5c224176178e95bcf5a76c75dd8dba6..787bba3bf5528e6679cea598c2c212c5fb108ea2 100644 (file)
@@ -15,6 +15,7 @@
 #ifndef _LINUX_HUGETLB_CGROUP_H
 #define _LINUX_HUGETLB_CGROUP_H
 
+#include <linux/mmdebug.h>
 #include <linux/res_counter.h>
 
 struct hugetlb_cgroup;
@@ -28,7 +29,7 @@ struct hugetlb_cgroup;
 
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
 {
-       VM_BUG_ON(!PageHuge(page));
+       VM_BUG_ON_PAGE(!PageHuge(page), page);
 
        if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
                return NULL;
@@ -38,7 +39,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
 static inline
 int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
 {
-       VM_BUG_ON(!PageHuge(page));
+       VM_BUG_ON_PAGE(!PageHuge(page), page);
 
        if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
                return -1;
index 4dfdb893362ada191359822d465aa32c6a0ce97e..6df7f9fe0d014faab41d122c41765ef69bf7ee59 100644 (file)
@@ -41,6 +41,7 @@ extern struct fs_struct init_fs;
 
 #define INIT_SIGNALS(sig) {                                            \
        .nr_threads     = 1,                                            \
+       .thread_head    = LIST_HEAD_INIT(init_task.thread_node),        \
        .wait_chldexit  = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
        .shared_pending = {                                             \
                .list = LIST_HEAD_INIT(sig.shared_pending.list),        \
@@ -222,6 +223,7 @@ extern struct task_group root_task_group;
                [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),            \
        },                                                              \
        .thread_group   = LIST_HEAD_INIT(tsk.thread_group),             \
+       .thread_node    = LIST_HEAD_INIT(init_signals.thread_head),     \
        INIT_IDS                                                        \
        INIT_PERF_EVENTS(tsk)                                           \
        INIT_TRACE_IRQFLAGS                                             \
index 82ce323b998692b9c3d0b95b762605e51147cafd..6453b22372ac4cab8396719335ffe7a5a1e0a5e8 100644 (file)
@@ -79,6 +79,7 @@ struct input_value {
  * @led: reflects current state of device's LEDs
  * @snd: reflects current state of sound effects
  * @sw: reflects current state of device's switches
+ * @leds: leds objects for the device's LEDs
  * @open: this method is called when the very first user calls
  *     input_open_device(). The driver must prepare the device
  *     to start generating events (start polling thread,
@@ -164,6 +165,8 @@ struct input_dev {
        unsigned long snd[BITS_TO_LONGS(SND_CNT)];
        unsigned long sw[BITS_TO_LONGS(SW_CNT)];
 
+       struct led_classdev *leds;
+
        int (*open)(struct input_dev *dev);
        void (*close)(struct input_dev *dev);
        int (*flush)(struct input_dev *dev, struct file *file);
@@ -531,4 +534,22 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
 int input_ff_create_memless(struct input_dev *dev, void *data,
                int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
 
+#ifdef CONFIG_INPUT_LEDS
+
+int input_led_connect(struct input_dev *dev);
+void input_led_disconnect(struct input_dev *dev);
+
+#else
+
+static inline int input_led_connect(struct input_dev *dev)
+{
+       return 0;
+}
+
+static inline void input_led_disconnect(struct input_dev *dev)
+{
+}
+
+#endif
+
 #endif
index 8d861b2651f7b0586edfa0be89adac92d9592746..9d84942ae2e577835a338b8a352f9c93eabe148b 100644 (file)
@@ -11,7 +11,7 @@
 struct kern_ipc_perm
 {
        spinlock_t      lock;
-       int             deleted;
+       bool            deleted;
        int             id;
        key_t           key;
        kuid_t          uid;
index f6c82de125413e8b6076ab91d68f64771641bfd1..e7831d20373776f5c3dc80c5a04147e8aa611c85 100644 (file)
@@ -21,7 +21,6 @@ struct user_namespace;
 struct ipc_ids {
        int in_use;
        unsigned short seq;
-       unsigned short seq_max;
        struct rw_semaphore rwsem;
        struct idr ipcs_idr;
        int next_id;
index 2aa3d4b000e6f62c209d594bfdd9c3cde2dd68e7..f74bb581ab649cbbec6f1c2abfcb5b4394f0aa85 100644 (file)
 #define ULLONG_MAX     (~0ULL)
 #define SIZE_MAX       (~(size_t)0)
 
+#define U8_MAX         ((u8)~0U)
+#define S8_MAX         ((s8)(U8_MAX>>1))
+#define S8_MIN         ((s8)(-S8_MAX - 1))
+#define U16_MAX                ((u16)~0U)
+#define S16_MAX                ((s16)(U16_MAX>>1))
+#define S16_MIN                ((s16)(-S16_MAX - 1))
+#define U32_MAX                ((u32)~0U)
+#define S32_MAX                ((s32)(U32_MAX>>1))
+#define S32_MIN                ((s32)(-S32_MAX - 1))
+#define U64_MAX                ((u64)~0ULL)
+#define S64_MAX                ((s64)(U64_MAX>>1))
+#define S64_MIN                ((s64)(-S64_MAX - 1))
+
 #define STACK_MAGIC    0xdeadbeef
 
 #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
index 5fd33dc1fe3ad265d352ed48f3a8cdeec0ca646e..6d4066cdb5b5b8508be5347e50f1c16c1fddf493 100644 (file)
@@ -170,6 +170,7 @@ unsigned long paddr_vmcoreinfo_note(void);
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
+extern int kexec_load_disabled;
 
 #ifndef kexec_flush_icache_page
 #define kexec_flush_icache_page(page)
index 45c9b6a17bcb5e7037ce01173f51ec6383d681c7..3be6bb18562dc7b1c8d4719b644f485ce5dbca6c 100644 (file)
@@ -73,11 +73,7 @@ static inline void set_page_stable_node(struct page *page,
 struct page *ksm_might_need_to_copy(struct page *page,
                        struct vm_area_struct *vma, unsigned long address);
 
-int page_referenced_ksm(struct page *page,
-                       struct mem_cgroup *memcg, unsigned long *vm_flags);
-int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
-int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
-                 struct vm_area_struct *, unsigned long, void *), void *arg);
+int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
 void ksm_migrate_page(struct page *newpage, struct page *oldpage);
 
 #else  /* !CONFIG_KSM */
@@ -115,13 +111,8 @@ static inline int page_referenced_ksm(struct page *page,
        return 0;
 }
 
-static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
-{
-       return 0;
-}
-
-static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+static inline int rmap_walk_ksm(struct page *page,
+                       struct rmap_walk_control *rwc)
 {
        return 0;
 }
index 7dcef3317689e8ba6037eb5ecebf842095bdbda2..50050ae17b00e855a94df04d58adaf48f0118148 100644 (file)
@@ -51,6 +51,7 @@ void kthread_parkme(void);
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
 extern int tsk_fork_get_node(struct task_struct *tsk);
+void set_kthreadd_affinity(void);
 
 /*
  * Simple work processor based on kthread.
index 77c60e52939da9f74611e8d3443b04220396ca26..1ef66360f0b092b751f430bf31ca693a8a85ed46 100644 (file)
 
 #define INIT_MEMBLOCK_REGIONS  128
 
+/* Definition of memblock flags. */
+#define MEMBLOCK_HOTPLUG       0x1     /* hotpluggable region */
+
 struct memblock_region {
        phys_addr_t base;
        phys_addr_t size;
+       unsigned long flags;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        int nid;
 #endif
@@ -43,15 +47,21 @@ struct memblock {
 
 extern struct memblock memblock;
 extern int memblock_debug;
+#ifdef CONFIG_MOVABLE_NODE
+/* If movable_node boot option specified */
+extern bool movable_node_enabled;
+#endif /* CONFIG_MOVABLE_NODE */
 
 #define memblock_dbg(fmt, ...) \
        if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
-phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
-                               phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
+                                           phys_addr_t start, phys_addr_t end,
+                                           int nid);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
                                   phys_addr_t size, phys_addr_t align);
 phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
+phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
 void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
@@ -59,6 +69,28 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
 int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
+int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
+int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
+#ifdef CONFIG_MOVABLE_NODE
+static inline bool memblock_is_hotpluggable(struct memblock_region *m)
+{
+       return m->flags & MEMBLOCK_HOTPLUG;
+}
+
+static inline bool movable_node_is_enabled(void)
+{
+       return movable_node_enabled;
+}
+#else
+static inline bool memblock_is_hotpluggable(struct memblock_region *m)
+{
+       return false;
+}
+static inline bool movable_node_is_enabled(void)
+{
+       return false;
+}
+#endif
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
@@ -87,7 +119,7 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
 /**
  * for_each_free_mem_range - iterate through free memblock areas
  * @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
@@ -107,7 +139,7 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
 /**
  * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
  * @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
@@ -121,8 +153,21 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
             i != (u64)ULLONG_MAX;                                      \
             __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
 
+static inline void memblock_set_region_flags(struct memblock_region *r,
+                                            unsigned long flags)
+{
+       r->flags |= flags;
+}
+
+static inline void memblock_clear_region_flags(struct memblock_region *r,
+                                              unsigned long flags)
+{
+       r->flags &= ~flags;
+}
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
+int memblock_set_node(phys_addr_t base, phys_addr_t size,
+                     struct memblock_type *type, int nid);
 
 static inline void memblock_set_region_node(struct memblock_region *r, int nid)
 {
index b3e7a667e03c24ca5c3d1c54c0db5c7b0bdde052..abd0113b66203ca2713c848516e5f3a7cef90718 100644 (file)
@@ -497,10 +497,11 @@ void __memcg_kmem_commit_charge(struct page *page,
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
-                        struct kmem_cache *root_cache);
-void memcg_release_cache(struct kmem_cache *cachep);
-void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
+int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
+                            struct kmem_cache *root_cache);
+void memcg_free_cache_params(struct kmem_cache *s);
+void memcg_register_cache(struct kmem_cache *s);
+void memcg_unregister_cache(struct kmem_cache *s);
 
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
@@ -640,19 +641,21 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
        return -1;
 }
 
-static inline int
-memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
-                    struct kmem_cache *root_cache)
+static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
+               struct kmem_cache *s, struct kmem_cache *root_cache)
 {
        return 0;
 }
 
-static inline void memcg_release_cache(struct kmem_cache *cachep)
+static inline void memcg_free_cache_params(struct kmem_cache *s)
+{
+}
+
+static inline void memcg_register_cache(struct kmem_cache *s)
 {
 }
 
-static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
-                                       struct kmem_cache *s)
+static inline void memcg_unregister_cache(struct kmem_cache *s)
 {
 }
 
index 9fe426b30a418e335485a7aee045883c974f15de..5f1ea756aaceee191eaccb1e5c73ab7455636de8 100644 (file)
@@ -211,20 +211,8 @@ static inline void mpol_get(struct mempolicy *pol)
 {
 }
 
-static inline struct mempolicy *mpol_dup(struct mempolicy *old)
-{
-       return NULL;
-}
-
 struct shared_policy {};
 
-static inline int mpol_set_shared_policy(struct shared_policy *info,
-                                       struct vm_area_struct *vma,
-                                       struct mempolicy *new)
-{
-       return -EINVAL;
-}
-
 static inline void mpol_shared_policy_init(struct shared_policy *sp,
                                                struct mempolicy *mpol)
 {
@@ -234,12 +222,6 @@ static inline void mpol_free_shared_policy(struct shared_policy *p)
 {
 }
 
-static inline struct mempolicy *
-mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
-{
-       return NULL;
-}
-
 #define vma_policy(vma) NULL
 
 static inline int
@@ -266,10 +248,6 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 {
 }
 
-static inline void mpol_fix_fork_child_flag(struct task_struct *p)
-{
-}
-
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
                                unsigned long addr, gfp_t gfp_flags,
                                struct mempolicy **mpol, nodemask_t **nodemask)
@@ -284,12 +262,6 @@ static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
        return false;
 }
 
-static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
-                       const nodemask_t *mask)
-{
-       return false;
-}
-
 static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
                                   const nodemask_t *to, int flags)
 {
@@ -307,10 +279,6 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 }
 #endif
 
-static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
-{
-}
-
 static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
                                 unsigned long address)
 {
index f015c059e159f1f8cad2da7b88dd26cdcbfd637d..84a31ad0b791180215809f06bc70b4396a8ed46d 100644 (file)
@@ -35,16 +35,12 @@ enum migrate_reason {
 
 #ifdef CONFIG_MIGRATION
 
-extern void putback_lru_pages(struct list_head *l);
 extern void putback_movable_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
                        struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
                unsigned long private, enum migrate_mode mode, int reason);
 
-extern int fail_migrate_page(struct address_space *,
-                       struct page *, struct page *);
-
 extern int migrate_prep(void);
 extern int migrate_prep_local(void);
 extern int migrate_vmas(struct mm_struct *mm,
@@ -59,7 +55,6 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
                int extra_count);
 #else
 
-static inline void putback_lru_pages(struct list_head *l) {}
 static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
                unsigned long private, enum migrate_mode mode, int reason)
@@ -86,7 +81,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 /* Possible settings for the migrate_page() method in address_operations */
 #define migrate_page NULL
-#define fail_migrate_page NULL
 
 #endif /* CONFIG_MIGRATION */
 
index bf362d053ce1c0f3af1a69a151b5de8490693a0d..28b6daade067e688adf5a27543723fea4fbd81f0 100644 (file)
@@ -5,6 +5,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/mmdebug.h>
 #include <linux/gfp.h>
 #include <linux/bug.h>
 #include <linux/list.h>
@@ -57,6 +58,15 @@ extern int sysctl_legacy_va_layout;
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
+extern int sysctl_overcommit_memory;
+extern int sysctl_overcommit_ratio;
+extern unsigned long sysctl_overcommit_kbytes;
+
+extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
+                                   size_t *, loff_t *);
+extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
+                                   size_t *, loff_t *);
+
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
 /* to align the pointer to the (next) page boundary */
@@ -294,7 +304,7 @@ static inline int get_freepage_migratetype(struct page *page)
  */
 static inline int put_page_testzero(struct page *page)
 {
-       VM_BUG_ON(atomic_read(&page->_count) == 0);
+       VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page);
        return atomic_dec_and_test(&page->_count);
 }
 
@@ -355,7 +365,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 static inline void compound_lock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       VM_BUG_ON(PageSlab(page));
+       VM_BUG_ON_PAGE(PageSlab(page), page);
        bit_spin_lock(PG_compound_lock, &page->flags);
 #endif
 }
@@ -363,7 +373,7 @@ static inline void compound_lock(struct page *page)
 static inline void compound_unlock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       VM_BUG_ON(PageSlab(page));
+       VM_BUG_ON_PAGE(PageSlab(page), page);
        bit_spin_unlock(PG_compound_lock, &page->flags);
 #endif
 }
@@ -414,15 +424,44 @@ static inline int page_count(struct page *page)
        return atomic_read(&compound_head(page)->_count);
 }
 
+#ifdef CONFIG_HUGETLB_PAGE
+extern int PageHeadHuge(struct page *page_head);
+#else /* CONFIG_HUGETLB_PAGE */
+static inline int PageHeadHuge(struct page *page_head)
+{
+       return 0;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static inline bool __compound_tail_refcounted(struct page *page)
+{
+       return !PageSlab(page) && !PageHeadHuge(page);
+}
+
+/*
+ * This takes a head page as parameter and tells if the
+ * tail page reference counting can be skipped.
+ *
+ * For this to be safe, PageSlab and PageHeadHuge must remain true on
+ * any given page where they return true here, until all tail pins
+ * have been released.
+ */
+static inline bool compound_tail_refcounted(struct page *page)
+{
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+       return __compound_tail_refcounted(page);
+}
+
 static inline void get_huge_page_tail(struct page *page)
 {
        /*
-        * __split_huge_page_refcount() cannot run
-        * from under us.
+        * __split_huge_page_refcount() cannot run from under us.
         */
-       VM_BUG_ON(page_mapcount(page) < 0);
-       VM_BUG_ON(atomic_read(&page->_count) != 0);
-       atomic_inc(&page->_mapcount);
+       VM_BUG_ON_PAGE(!PageTail(page), page);
+       VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+       VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
+       if (compound_tail_refcounted(page->first_page))
+               atomic_inc(&page->_mapcount);
 }
 
 extern bool __get_page_tail(struct page *page);
@@ -436,7 +475,7 @@ static inline void get_page(struct page *page)
         * Getting a normal page or the head of a compound page
         * requires to already have an elevated page->_count.
         */
-       VM_BUG_ON(atomic_read(&page->_count) <= 0);
+       VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
        atomic_inc(&page->_count);
 }
 
@@ -473,13 +512,13 @@ static inline int PageBuddy(struct page *page)
 
 static inline void __SetPageBuddy(struct page *page)
 {
-       VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+       VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
        atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
 }
 
 static inline void __ClearPageBuddy(struct page *page)
 {
-       VM_BUG_ON(!PageBuddy(page));
+       VM_BUG_ON_PAGE(!PageBuddy(page), page);
        atomic_set(&page->_mapcount, -1);
 }
 
@@ -984,7 +1023,6 @@ extern void pagefault_out_of_memory(void);
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES          (0x0001u)       /* disallowed nodes */
-#define SHOW_MEM_FILTER_PAGE_COUNT     (0x0002u)       /* page type count */
 
 extern void show_free_areas(unsigned int flags);
 extern bool skip_free_areas_node(unsigned int flags, int nid);
@@ -1318,6 +1356,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 
 #if USE_SPLIT_PTE_PTLOCKS
 #if ALLOC_SPLIT_PTLOCKS
+void __init ptlock_cache_init(void);
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 
@@ -1326,6 +1365,10 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
        return page->ptl;
 }
 #else /* ALLOC_SPLIT_PTLOCKS */
+static inline void ptlock_cache_init(void)
+{
+}
+
 static inline bool ptlock_alloc(struct page *page)
 {
        return true;
@@ -1356,7 +1399,7 @@ static inline bool ptlock_init(struct page *page)
         * slab code uses page->slab_cache and page->first_page (for tail
         * pages), which share storage with page->ptl.
         */
-       VM_BUG_ON(*(unsigned long *)&page->ptl);
+       VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
        if (!ptlock_alloc(page))
                return false;
        spin_lock_init(ptlock_ptr(page));
@@ -1378,10 +1421,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
        return &mm->page_table_lock;
 }
+static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
+static inline void pgtable_init(void)
+{
+       ptlock_cache_init();
+       pgtable_cache_init();
+}
+
 static inline bool pgtable_page_ctor(struct page *page)
 {
        inc_zone_page_state(page, NR_PAGETABLE);
@@ -1440,7 +1490,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
 static inline void pgtable_pmd_page_dtor(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       VM_BUG_ON(page->pmd_huge_pte);
+       VM_BUG_ON_PAGE(page->pmd_huge_pte, page);
 #endif
        ptlock_free(page);
 }
@@ -1977,8 +2027,6 @@ extern void shake_page(struct page *p, int access);
 extern atomic_long_t num_poisoned_pages;
 extern int soft_offline_page(struct page *page, int flags);
 
-extern void dump_page(struct page *page);
-
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
 extern void clear_huge_page(struct page *page,
                            unsigned long addr,
index 7f7f8dae4b1deec32eb1079428f4b13b87dd25c7..16373c8f5f5788c12a2b404d3793f0dd67d979c5 100644 (file)
@@ -9,6 +9,7 @@
 
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
+extern unsigned long sysctl_overcommit_kbytes;
 extern struct percpu_counter vm_committed_as;
 
 #ifdef CONFIG_SMP
index 580bd587d916cfa28116e814b24639e1719efe5c..5042c036dda9fcddcef627743d588f62b5aa4aaf 100644 (file)
@@ -1,10 +1,19 @@
 #ifndef LINUX_MM_DEBUG_H
 #define LINUX_MM_DEBUG_H 1
 
+struct page;
+
+extern void dump_page(struct page *page, char *reason);
+extern void dump_page_badflags(struct page *page, char *reason,
+                              unsigned long badflags);
+
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
+#define VM_BUG_ON_PAGE(cond, page) \
+       do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
index bd791e452ad7a96329d883afba92ffbdf7bbc9bc..5f2052c831547a33b66606d78ac5713086d285e3 100644 (file)
@@ -489,6 +489,12 @@ struct zone {
        unsigned long           present_pages;
        unsigned long           managed_pages;
 
+       /*
+        * Number of MIGRATE_RESEVE page block. To maintain for just
+        * optimization. Protected by zone->lock.
+        */
+       int                     nr_migrate_reserve_block;
+
        /*
         * rarely used fields:
         */
@@ -758,10 +764,7 @@ typedef struct pglist_data {
        int kswapd_max_order;
        enum zone_type classzone_idx;
 #ifdef CONFIG_NUMA_BALANCING
-       /*
-        * Lock serializing the per destination node AutoNUMA memory
-        * migration rate limiting data.
-        */
+       /* Lock serializing the migrate rate limiting window */
        spinlock_t numabalancing_migrate_lock;
 
        /* Rate limiting time interval */
index e21f9d44307f0004baf0146b4d02a20de6a96466..f3f302f9c1975a67ea1c01a828403950fac8a061 100644 (file)
@@ -9,7 +9,7 @@ struct msg_msg {
        struct list_head m_list;
        long m_type;
        size_t m_ts;            /* message text size */
-       struct msg_msgsegnext;
+       struct msg_msgseg *next;
        void *security;
        /* the actual message follows immediately */
 };
index 276c546980d81fec5867a3126cc2bb73575ed769..70c64ba17fa51f7cca0e232b8f95b7669f5a3075 100644 (file)
@@ -377,8 +377,13 @@ static inline bool of_have_populated_dt(void)
        return false;
 }
 
+/* Kill an unused variable warning on a device_node pointer */
+static inline void __of_use_dn(const struct device_node *np)
+{
+}
+
 #define for_each_child_of_node(parent, child) \
-       while (0)
+       while (__of_use_dn(parent), __of_use_dn(child), 0)
 
 #define for_each_available_child_of_node(parent, child) \
        while (0)
index 98ada58f9942855b90583de9c5ed324993d39101..d1fe1a761047683e555a9544dd5c668d5ae6752b 100644 (file)
@@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
 PAGEFLAG(MappedToDisk, mappedtodisk)
 
-/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
+/* PG_readahead is only used for reads; PG_reclaim is only for writes */
 PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
-PAGEFLAG(Readahead, reclaim)           /* Reminder to do async read-ahead */
+PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
 
 #ifdef CONFIG_HIGHMEM
 /*
@@ -412,7 +412,7 @@ static inline void ClearPageCompound(struct page *page)
  */
 static inline int PageTransHuge(struct page *page)
 {
-       VM_BUG_ON(PageTail(page));
+       VM_BUG_ON_PAGE(PageTail(page), page);
        return PageHead(page);
 }
 
@@ -460,25 +460,25 @@ static inline int PageTransTail(struct page *page)
  */
 static inline int PageSlabPfmemalloc(struct page *page)
 {
-       VM_BUG_ON(!PageSlab(page));
+       VM_BUG_ON_PAGE(!PageSlab(page), page);
        return PageActive(page);
 }
 
 static inline void SetPageSlabPfmemalloc(struct page *page)
 {
-       VM_BUG_ON(!PageSlab(page));
+       VM_BUG_ON_PAGE(!PageSlab(page), page);
        SetPageActive(page);
 }
 
 static inline void __ClearPageSlabPfmemalloc(struct page *page)
 {
-       VM_BUG_ON(!PageSlab(page));
+       VM_BUG_ON_PAGE(!PageSlab(page), page);
        __ClearPageActive(page);
 }
 
 static inline void ClearPageSlabPfmemalloc(struct page *page)
 {
-       VM_BUG_ON(!PageSlab(page));
+       VM_BUG_ON_PAGE(!PageSlab(page), page);
        ClearPageActive(page);
 }
 
index e3dea75a078ba67b12408abd743633e3ea0a3995..1710d1b060ba23f47a0269cf3481805ee93480db 100644 (file)
@@ -162,7 +162,7 @@ static inline int page_cache_get_speculative(struct page *page)
         * disabling preempt, and hence no need for the "speculative get" that
         * SMP requires.
         */
-       VM_BUG_ON(page_count(page) == 0);
+       VM_BUG_ON_PAGE(page_count(page) == 0, page);
        atomic_inc(&page->_count);
 
 #else
@@ -175,7 +175,7 @@ static inline int page_cache_get_speculative(struct page *page)
                return 0;
        }
 #endif
-       VM_BUG_ON(PageTail(page));
+       VM_BUG_ON_PAGE(PageTail(page), page);
 
        return 1;
 }
@@ -191,14 +191,14 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 # ifdef CONFIG_PREEMPT_COUNT
        VM_BUG_ON(!in_atomic());
 # endif
-       VM_BUG_ON(page_count(page) == 0);
+       VM_BUG_ON_PAGE(page_count(page) == 0, page);
        atomic_add(count, &page->_count);
 
 #else
        if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
                return 0;
 #endif
-       VM_BUG_ON(PageCompound(page) && page != compound_head(page));
+       VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page);
 
        return 1;
 }
@@ -210,7 +210,7 @@ static inline int page_freeze_refs(struct page *page, int count)
 
 static inline void page_unfreeze_refs(struct page *page, int count)
 {
-       VM_BUG_ON(page_count(page) != 0);
+       VM_BUG_ON_PAGE(page_count(page) != 0, page);
        VM_BUG_ON(count == 0);
 
        atomic_set(&page->_count, count);
index ea2281e726f65f5f2968eb571adc874843530959..39d5b7955b23f9a02c80904804c996180de060ac 100644 (file)
@@ -29,5 +29,6 @@ int match_token(char *, const match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
+bool match_wildcard(const char *pattern, const char *str);
 size_t match_strlcpy(char *, const substring_t *, size_t);
 char *match_strdup(const substring_t *);
index 9e4761caa80c6ec906d2c0edad62cfbd2d4ab3bd..e3817d2441b697c2042a9e4b7624d99333671cb3 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
 
+#include <linux/mmdebug.h>
 #include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
index 7931efe7117553d00a920cfd4aef757f72578658..fb616942e4c7f2ebdca8b7997498605a99a7e943 100644 (file)
@@ -94,78 +94,12 @@ extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
 extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern int set_posix_acl(struct inode *, int, struct posix_acl *);
 
-#ifdef CONFIG_FS_POSIX_ACL
-static inline struct posix_acl **acl_by_type(struct inode *inode, int type)
-{
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               return &inode->i_acl;
-       case ACL_TYPE_DEFAULT:
-               return &inode->i_default_acl;
-       default:
-               BUG();
-       }
-}
-
-static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
-{
-       struct posix_acl **p = acl_by_type(inode, type);
-       struct posix_acl *acl = ACCESS_ONCE(*p);
-       if (acl) {
-               spin_lock(&inode->i_lock);
-               acl = *p;
-               if (acl != ACL_NOT_CACHED)
-                       acl = posix_acl_dup(acl);
-               spin_unlock(&inode->i_lock);
-       }
-       return acl;
-}
-
-static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
-{
-       return rcu_dereference(*acl_by_type(inode, type));
-}
-
-static inline void set_cached_acl(struct inode *inode,
-                                 int type,
-                                 struct posix_acl *acl)
-{
-       struct posix_acl **p = acl_by_type(inode, type);
-       struct posix_acl *old;
-       spin_lock(&inode->i_lock);
-       old = *p;
-       rcu_assign_pointer(*p, posix_acl_dup(acl));
-       spin_unlock(&inode->i_lock);
-       if (old != ACL_NOT_CACHED)
-               posix_acl_release(old);
-}
-
-static inline void forget_cached_acl(struct inode *inode, int type)
-{
-       struct posix_acl **p = acl_by_type(inode, type);
-       struct posix_acl *old;
-       spin_lock(&inode->i_lock);
-       old = *p;
-       *p = ACL_NOT_CACHED;
-       spin_unlock(&inode->i_lock);
-       if (old != ACL_NOT_CACHED)
-               posix_acl_release(old);
-}
-
-static inline void forget_all_cached_acls(struct inode *inode)
-{
-       struct posix_acl *old_access, *old_default;
-       spin_lock(&inode->i_lock);
-       old_access = inode->i_acl;
-       old_default = inode->i_default_acl;
-       inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
-       spin_unlock(&inode->i_lock);
-       if (old_access != ACL_NOT_CACHED)
-               posix_acl_release(old_access);
-       if (old_default != ACL_NOT_CACHED)
-               posix_acl_release(old_default);
-}
-#endif
+struct posix_acl **acl_by_type(struct inode *inode, int type);
+struct posix_acl *get_cached_acl(struct inode *inode, int type);
+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
+void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
+void forget_cached_acl(struct inode *inode, int type);
+void forget_all_cached_acls(struct inode *inode);
 
 static inline void cache_no_acl(struct inode *inode)
 {
index 26fb95ce5080dec51eb9c1f654ac81af87e53a07..fa47e2708c01748a958f40300c900673cfaa3777 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/init.h>
 #include <linux/kern_levels.h>
 #include <linux/linkage.h>
+#include <linux/cache.h>
 
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
@@ -260,17 +261,17 @@ extern asmlinkage void dump_stack(void) __cold;
  */
 
 #ifdef CONFIG_PRINTK
-#define printk_once(fmt, ...)                  \
-({                                             \
-       static bool __print_once;               \
-                                               \
-       if (!__print_once) {                    \
-               __print_once = true;            \
-               printk(fmt, ##__VA_ARGS__);     \
-       }                                       \
+#define printk_once(fmt, ...)                                  \
+({                                                             \
+       static bool __print_once __read_mostly;                 \
+                                                               \
+       if (!__print_once) {                                    \
+               __print_once = true;                            \
+               printk(fmt, ##__VA_ARGS__);                     \
+       }                                                       \
 })
 #else
-#define printk_once(fmt, ...)                  \
+#define printk_once(fmt, ...)                                  \
        no_printk(fmt, ##__VA_ARGS__)
 #endif
 
index 753207c8ce204fe11eab92f43d6c04b25e5ee753..ecc730977a5ae9372b5605e1879ec1b4a146b246 100644 (file)
@@ -14,13 +14,6 @@ ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 }
 #else
 extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize);
-extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
-                                                  unsigned long addr,
-                                                  unsigned long len,
-                                                  unsigned long pgoff,
-                                                  unsigned long flags);
-
-extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 #endif
 
 extern const struct file_operations ramfs_file_operations;
index 6dacb93a6d94a3f710706b6ed2e16a3fa5ad5e4e..1da693d51255d974c60ba4b50527fe3e1808c03d 100644 (file)
@@ -184,13 +184,13 @@ static inline void page_dup_rmap(struct page *page)
 int page_referenced(struct page *, int is_locked,
                        struct mem_cgroup *memcg, unsigned long *vm_flags);
 int page_referenced_one(struct page *, struct vm_area_struct *,
-       unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
+       unsigned long address, void *arg);
 
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
 int try_to_unmap(struct page *, enum ttu_flags flags);
 int try_to_unmap_one(struct page *, struct vm_area_struct *,
-                       unsigned long address, enum ttu_flags flags);
+                       unsigned long address, void *arg);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
@@ -236,10 +236,27 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 /*
- * Called by migrate.c to remove migration ptes, but might be used more later.
+ * rmap_walk_control: To control rmap traversing for specific needs
+ *
+ * arg: passed to rmap_one() and invalid_vma()
+ * rmap_one: executed on each vma where page is mapped
+ * done: for checking traversing termination condition
+ * file_nonlinear: for handling file nonlinear mapping
+ * anon_lock: for getting anon_lock by optimized way rather than default
+ * invalid_vma: for skipping uninterested vma
  */
-int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg);
+struct rmap_walk_control {
+       void *arg;
+       int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
+                                       unsigned long addr, void *arg);
+       int (*done)(struct page *page);
+       int (*file_nonlinear)(struct page *, struct address_space *,
+                                       struct vm_area_struct *vma);
+       struct anon_vma *(*anon_lock)(struct page *page);
+       bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
+};
+
+int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
 
 #else  /* !CONFIG_MMU */
 
index ffccdad050b5b89d086b70a906843b3c23861984..68a0e84463a0eb86b273fe14b49bd9b01feab21f 100644 (file)
@@ -229,7 +229,7 @@ extern char ___assert_task_state[1 - 2*!!(
 /* get_task_state() */
 #define TASK_REPORT            (TASK_RUNNING | TASK_INTERRUPTIBLE | \
                                 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-                                __TASK_TRACED)
+                                __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
 
 #define task_is_traced(task)   ((task->state & __TASK_TRACED) != 0)
 #define task_is_stopped(task)  ((task->state & __TASK_STOPPED) != 0)
@@ -391,22 +391,33 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-
-extern void set_dumpable(struct mm_struct *mm, int value);
-extern int get_dumpable(struct mm_struct *mm);
-
 #define SUID_DUMP_DISABLE      0       /* No setuid dumping */
 #define SUID_DUMP_USER         1       /* Dump as user of process */
 #define SUID_DUMP_ROOT         2       /* Dump as root */
 
 /* mm flags */
-/* dumpable bits */
-#define MMF_DUMPABLE      0  /* core dump is permitted */
-#define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
 
+/* for SUID_DUMP_* above */
 #define MMF_DUMPABLE_BITS 2
 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
 
+extern void set_dumpable(struct mm_struct *mm, int value);
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
+static inline int __get_dumpable(unsigned long mm_flags)
+{
+       return mm_flags & MMF_DUMPABLE_MASK;
+}
+
+static inline int get_dumpable(struct mm_struct *mm)
+{
+       return __get_dumpable(mm->flags);
+}
+
 /* coredump filter bits */
 #define MMF_DUMP_ANON_PRIVATE  2
 #define MMF_DUMP_ANON_SHARED   3
@@ -549,6 +560,7 @@ struct signal_struct {
        atomic_t                sigcnt;
        atomic_t                live;
        int                     nr_threads;
+       struct list_head        thread_head;
 
        wait_queue_head_t       wait_chldexit;  /* for wait4() */
 
@@ -1227,7 +1239,6 @@ struct task_struct {
        /* Used for emulating ABI behavior of previous Linux versions */
        unsigned int personality;
 
-       unsigned did_exec:1;
        unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                 * execve */
        unsigned in_iowait:1;
@@ -1271,6 +1282,7 @@ struct task_struct {
        /* PID/PID hash table linkage. */
        struct pid_link pids[PIDTYPE_MAX];
        struct list_head thread_group;
+       struct list_head thread_node;
 
        struct completion *vfork_done;          /* for vfork() */
        int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
@@ -2282,8 +2294,6 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
 extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
-/* Allocate a new mm structure and copy contents from tsk->mm */
-extern struct mm_struct *dup_mm(struct task_struct *tsk);
 
 extern int copy_thread(unsigned long, unsigned long, unsigned long,
                        struct task_struct *);
@@ -2341,6 +2351,16 @@ extern bool current_is_single_threaded(void);
 #define while_each_thread(g, t) \
        while ((t = next_thread(t)) != g)
 
+#define __for_each_thread(signal, t)   \
+       list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)          \
+       __for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)  \
+       for_each_process(p) for_each_thread(p, t)
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
        return tsk->signal->nr_threads;
index 31e0193cb0c5b06c505742c3ec21e41a902ea6ed..b13cf430764f76cc4053f1c935e1e417a32188a1 100644 (file)
@@ -99,4 +99,8 @@ extern int sched_rt_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos);
 
+extern int sysctl_numa_balancing(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp,
+                                loff_t *ppos);
+
 #endif /* _SCHED_SYSCTL_H */
index 429c1995d756634f82906af0ad532d5423ec7622..1e2cd2e6b5407956ab9a70f815a5a521da5ffea2 100644 (file)
@@ -9,7 +9,7 @@
 struct shmid_kernel /* private to the kernel */
 {      
        struct kern_ipc_perm    shm_perm;
-       struct file *           shm_file;
+       struct file             *shm_file;
        unsigned long           shm_nattch;
        unsigned long           shm_segsz;
        time_t                  shm_atim;
index 1e2f4fe12773bdaf9c5ffddd9b5eb77d42b665a0..a060142aa5f53daa9bbfe147b0834a1bcb079c63 100644 (file)
@@ -513,7 +513,9 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  *
  * Both the root cache and the child caches will have it. For the root cache,
  * this will hold a dynamically allocated array large enough to hold
- * information about the currently limited memcgs in the system.
+ * information about the currently limited memcgs in the system. To allow the
+ * array to be accessed without taking any locks, on relocation we free the old
+ * version only after a grace period.
  *
  * Child caches will hold extra metadata needed for its operation. Fields are:
  *
@@ -528,7 +530,10 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 struct memcg_cache_params {
        bool is_root_cache;
        union {
-               struct kmem_cache *memcg_caches[0];
+               struct {
+                       struct rcu_head rcu_head;
+                       struct kmem_cache *memcg_caches[0];
+               };
                struct {
                        struct mem_cgroup *memcg;
                        struct list_head list;
index 74575cbf2d6f579c317fec5f1f16e9d3793fab03..0e43906d2fda6dc68cffc6343594178465d6e461 100644 (file)
@@ -24,7 +24,8 @@
  * Passed to the actors
  */
 struct splice_desc {
-       unsigned int len, total_len;    /* current and remaining length */
+       size_t total_len;               /* remaining length */
+       unsigned int len;               /* current length */
        unsigned int flags;             /* splice flags */
        /*
         * actor() private data
index c557c6d096def9a93bb5b00af4df87166539e048..070de3de8856ae6ff5cbe0e31030df958307805c 100644 (file)
@@ -71,12 +71,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                THP_ZERO_PAGE_ALLOC,
                THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_DEBUG_TLBFLUSH
                NR_TLB_REMOTE_FLUSH,    /* cpu tried to flush others' tlbs */
                NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
-#endif
                NR_TLB_LOCAL_FLUSH_ALL,
                NR_TLB_LOCAL_FLUSH_ONE,
+#endif
                NR_VM_EVENT_ITEMS
 };
 
index e4b948080d20e7a537c7a83da17b8b5b7fec0008..80ebba9c2e87ee7e8c8056e51db763906bb034fb 100644 (file)
@@ -83,6 +83,14 @@ static inline void vm_events_fold_cpu(int cpu)
 #define count_vm_numa_events(x, y) do { (void)(y); } while (0)
 #endif /* CONFIG_NUMA_BALANCING */
 
+#ifdef CONFIG_DEBUG_TLBFLUSH
+#define count_vm_tlb_event(x)     count_vm_event(x)
+#define count_vm_tlb_events(x, y)  count_vm_events(x, y)
+#else
+#define count_vm_tlb_event(x)     do {} while (0)
+#define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
+#endif
+
 #define __count_zone_vm_events(item, zone, delta) \
                __count_vm_events(item##_NORMAL - ZONE_NORMAL + \
                zone_idx(zone), delta)
index 065e3ae79ab0e7f09138cceb19d8f35ce6d72d67..d58594a3232492e33f1dd4babd3798b03e0f0203 100644 (file)
@@ -20,6 +20,7 @@ struct w1_gpio_platform_data {
        unsigned int is_open_drain:1;
        void (*enable_external_pullup)(int enable);
        unsigned int ext_pullup_enable_pin;
+       unsigned int pullup_duration;
 };
 
 #endif /* _LINUX_W1_GPIO_H */
index fde1b3e94c7d971c571ca77ef3fdf7cee5f71744..06f544ef2f6fefd9a288a81581bfeb2e35d90ba4 100644 (file)
@@ -67,6 +67,48 @@ TRACE_EVENT(mm_compaction_migratepages,
                __entry->nr_failed)
 );
 
+TRACE_EVENT(mm_compaction_begin,
+       TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
+               unsigned long free_start, unsigned long zone_end),
+
+       TP_ARGS(zone_start, migrate_start, free_start, zone_end),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, zone_start)
+               __field(unsigned long, migrate_start)
+               __field(unsigned long, free_start)
+               __field(unsigned long, zone_end)
+       ),
+
+       TP_fast_assign(
+               __entry->zone_start = zone_start;
+               __entry->migrate_start = migrate_start;
+               __entry->free_start = free_start;
+               __entry->zone_end = zone_end;
+       ),
+
+       TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
+               __entry->zone_start,
+               __entry->migrate_start,
+               __entry->free_start,
+               __entry->zone_end)
+);
+
+TRACE_EVENT(mm_compaction_end,
+       TP_PROTO(int status),
+
+       TP_ARGS(status),
+
+       TP_STRUCT__entry(
+               __field(int, status)
+       ),
+
+       TP_fast_assign(
+               __entry->status = status;
+       ),
+
+       TP_printk("status=%d", __entry->status)
+);
 
 #endif /* _TRACE_COMPACTION_H */
 
index ec2a6ccfd7e5445b393b2a8c7659c8862c0eb64b..3075ffbb9a830506d79991e9a0123c18058d5bc2 100644 (file)
@@ -45,6 +45,32 @@ TRACE_EVENT(mm_migrate_pages,
                __print_symbolic(__entry->reason, MIGRATE_REASON))
 );
 
+TRACE_EVENT(mm_numa_migrate_ratelimit,
+
+       TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
+
+       TP_ARGS(p, dst_nid, nr_pages),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN)
+               __field(        pid_t,          pid)
+               __field(        int,            dst_nid)
+               __field(        unsigned long,  nr_pages)
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->dst_nid        = dst_nid;
+               __entry->nr_pages       = nr_pages;
+       ),
+
+       TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
+               __entry->comm,
+               __entry->pid,
+               __entry->dst_nid,
+               __entry->nr_pages)
+);
 #endif /* _TRACE_MIGRATE_H */
 
 /* This part must be outside protection */
index 04c308413a5dd3b2295b33c8c78bf56a11fa6b20..67e1bbf836954dbc486d1d7debb3a2bfa3cfd3d1 100644 (file)
@@ -443,6 +443,93 @@ TRACE_EVENT(sched_process_hang,
 );
 #endif /* CONFIG_DETECT_HUNG_TASK */
 
+DECLARE_EVENT_CLASS(sched_move_task_template,
+
+       TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+       TP_ARGS(tsk, src_cpu, dst_cpu),
+
+       TP_STRUCT__entry(
+               __field( pid_t, pid                     )
+               __field( pid_t, tgid                    )
+               __field( pid_t, ngid                    )
+               __field( int,   src_cpu                 )
+               __field( int,   src_nid                 )
+               __field( int,   dst_cpu                 )
+               __field( int,   dst_nid                 )
+       ),
+
+       TP_fast_assign(
+               __entry->pid            = task_pid_nr(tsk);
+               __entry->tgid           = task_tgid_nr(tsk);
+               __entry->ngid           = task_numa_group_id(tsk);
+               __entry->src_cpu        = src_cpu;
+               __entry->src_nid        = cpu_to_node(src_cpu);
+               __entry->dst_cpu        = dst_cpu;
+               __entry->dst_nid        = cpu_to_node(dst_cpu);
+       ),
+
+       TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
+                       __entry->pid, __entry->tgid, __entry->ngid,
+                       __entry->src_cpu, __entry->src_nid,
+                       __entry->dst_cpu, __entry->dst_nid)
+);
+
+/*
+ * Tracks migration of tasks from one runqueue to another. Can be used to
+ * detect if automatic NUMA balancing is bouncing between nodes
+ */
+DEFINE_EVENT(sched_move_task_template, sched_move_numa,
+       TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+       TP_ARGS(tsk, src_cpu, dst_cpu)
+);
+
+DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
+       TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+       TP_ARGS(tsk, src_cpu, dst_cpu)
+);
+
+TRACE_EVENT(sched_swap_numa,
+
+       TP_PROTO(struct task_struct *src_tsk, int src_cpu,
+                struct task_struct *dst_tsk, int dst_cpu),
+
+       TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
+
+       TP_STRUCT__entry(
+               __field( pid_t, src_pid                 )
+               __field( pid_t, src_tgid                )
+               __field( pid_t, src_ngid                )
+               __field( int,   src_cpu                 )
+               __field( int,   src_nid                 )
+               __field( pid_t, dst_pid                 )
+               __field( pid_t, dst_tgid                )
+               __field( pid_t, dst_ngid                )
+               __field( int,   dst_cpu                 )
+               __field( int,   dst_nid                 )
+       ),
+
+       TP_fast_assign(
+               __entry->src_pid        = task_pid_nr(src_tsk);
+               __entry->src_tgid       = task_tgid_nr(src_tsk);
+               __entry->src_ngid       = task_numa_group_id(src_tsk);
+               __entry->src_cpu        = src_cpu;
+               __entry->src_nid        = cpu_to_node(src_cpu);
+               __entry->dst_pid        = task_pid_nr(dst_tsk);
+               __entry->dst_tgid       = task_tgid_nr(dst_tsk);
+               __entry->dst_ngid       = task_numa_group_id(dst_tsk);
+               __entry->dst_cpu        = dst_cpu;
+               __entry->dst_nid        = cpu_to_node(dst_cpu);
+       ),
+
+       TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
+                       __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
+                       __entry->src_cpu, __entry->src_nid,
+                       __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
+                       __entry->dst_cpu, __entry->dst_nid)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
index bd39806013b54e216acc0ef4f3605c674e409303..a3877926b0d45ea26b3026ecdeafbe8979c76961 100644 (file)
@@ -1,8 +1,7 @@
 #ifndef _ASM_GENERIC_TYPES_H
 #define _ASM_GENERIC_TYPES_H
 /*
- * int-ll64 is used practically everywhere now,
- * so use it as a reasonable default.
+ * int-ll64 is used everywhere now.
  */
 #include <asm-generic/int-ll64.h>
 
index a67ef9dbda9dae0ea7a1619a393c55157a6d8592..93b61396756bcdddd9c38f9967cb0494fc580d7e 100644 (file)
@@ -583,7 +583,7 @@ static int __init populate_rootfs(void)
 {
        char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
        if (err)
-               panic(err);     /* Failed to decompress INTERNAL initramfs */
+               panic("%s", err); /* Failed to decompress INTERNAL initramfs */
        if (initrd_start) {
 #ifdef CONFIG_BLK_DEV_RAM
                int fd;
index febc511e078a65d08ac4dd6a3be67b26926195cd..f333385d9a4f3cd0550c48e34e6925ca4a27ac36 100644 (file)
@@ -99,10 +99,6 @@ extern void radix_tree_init(void);
 static inline void mark_rodata_ro(void) { }
 #endif
 
-#ifdef CONFIG_TC
-extern void tc_init(void);
-#endif
-
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
  * where only the boot processor is running with IRQ disabled.  This means
@@ -282,7 +278,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused)
                unsigned int i;
                for (i = 0; envp_init[i]; i++) {
                        if (i == MAX_INIT_ENVS) {
-                               panic_later = "Too many boot env vars at `%s'";
+                               panic_later = "env";
                                panic_param = param;
                        }
                        if (!strncmp(param, envp_init[i], val - param))
@@ -294,7 +290,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused)
                unsigned int i;
                for (i = 0; argv_init[i]; i++) {
                        if (i == MAX_INIT_ARGS) {
-                               panic_later = "Too many boot init vars at `%s'";
+                               panic_later = "init";
                                panic_param = param;
                        }
                }
@@ -355,9 +351,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
  */
 static void __init setup_command_line(char *command_line)
 {
-       saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
-       initcall_command_line = alloc_bootmem(strlen (boot_command_line)+1);
-       static_command_line = alloc_bootmem(strlen (command_line)+1);
+       saved_command_line =
+               memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+       initcall_command_line =
+               memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+       static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
        strcpy (saved_command_line, boot_command_line);
        strcpy (static_command_line, command_line);
 }
@@ -476,7 +474,7 @@ static void __init mm_init(void)
        mem_init();
        kmem_cache_init();
        percpu_init_late();
-       pgtable_cache_init();
+       pgtable_init();
        vmalloc_init();
 }
 
@@ -584,7 +582,8 @@ asmlinkage void __init start_kernel(void)
         */
        console_init();
        if (panic_later)
-               panic(panic_later, panic_param);
+               panic("Too many boot %s vars at `%s'", panic_later,
+                     panic_param);
 
        lockdep_info();
 
index 892f6585dd6014a7b1fb70cabd8f7ce0d2f9a4ad..f71e962756d815230b9c9dea727b1be63f009445 100644 (file)
@@ -197,7 +197,7 @@ static inline int __put_compat_ipc_perm(struct ipc64_perm *p,
 static inline int get_compat_semid64_ds(struct semid64_ds *s64,
                                        struct compat_semid64_ds __user *up64)
 {
-       if (!access_ok (VERIFY_READ, up64, sizeof(*up64)))
+       if (!access_ok(VERIFY_READ, up64, sizeof(*up64)))
                return -EFAULT;
        return __get_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm);
 }
@@ -205,7 +205,7 @@ static inline int get_compat_semid64_ds(struct semid64_ds *s64,
 static inline int get_compat_semid_ds(struct semid64_ds *s,
                                      struct compat_semid_ds __user *up)
 {
-       if (!access_ok (VERIFY_READ, up, sizeof(*up)))
+       if (!access_ok(VERIFY_READ, up, sizeof(*up)))
                return -EFAULT;
        return __get_compat_ipc_perm(&s->sem_perm, &up->sem_perm);
 }
@@ -215,7 +215,7 @@ static inline int put_compat_semid64_ds(struct semid64_ds *s64,
 {
        int err;
 
-       if (!access_ok (VERIFY_WRITE, up64, sizeof(*up64)))
+       if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
                return -EFAULT;
        err  = __put_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm);
        err |= __put_user(s64->sem_otime, &up64->sem_otime);
@@ -229,7 +229,7 @@ static inline int put_compat_semid_ds(struct semid64_ds *s,
 {
        int err;
 
-       if (!access_ok (VERIFY_WRITE, up, sizeof(*up)))
+       if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
                return -EFAULT;
        err  = __put_compat_ipc_perm(&s->sem_perm, &up->sem_perm);
        err |= __put_user(s->sem_otime, &up->sem_otime);
@@ -288,11 +288,11 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
                break;
 
        case IPC_SET:
-               if (version == IPC_64) {
+               if (version == IPC_64)
                        err = get_compat_semid64_ds(&s64, compat_ptr(pad));
-               } else {
+               else
                        err = get_compat_semid_ds(&s64, compat_ptr(pad));
-               }
+
                up64 = compat_alloc_user_space(sizeof(s64));
                if (copy_to_user(up64, &s64, sizeof(s64)))
                        err = -EFAULT;
@@ -376,7 +376,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second,
                        struct compat_ipc_kludge ipck;
                        if (!uptr)
                                return -EINVAL;
-                       if (copy_from_user (&ipck, uptr, sizeof(ipck)))
+                       if (copy_from_user(&ipck, uptr, sizeof(ipck)))
                                return -EFAULT;
                        uptr = compat_ptr(ipck.msgp);
                        fifth = ipck.msgtyp;
@@ -515,11 +515,11 @@ long compat_sys_msgctl(int first, int second, void __user *uptr)
                break;
 
        case IPC_SET:
-               if (version == IPC_64) {
+               if (version == IPC_64)
                        err = get_compat_msqid64(&m64, uptr);
-               } else {
+               else
                        err = get_compat_msqid(&m64, uptr);
-               }
+
                if (err)
                        break;
                p = compat_alloc_user_space(sizeof(m64));
@@ -702,11 +702,11 @@ long compat_sys_shmctl(int first, int second, void __user *uptr)
 
 
        case IPC_SET:
-               if (version == IPC_64) {
+               if (version == IPC_64)
                        err = get_compat_shmid64_ds(&s64, uptr);
-               } else {
+               else
                        err = get_compat_shmid_ds(&s64, uptr);
-               }
+
                if (err)
                        break;
                p = compat_alloc_user_space(sizeof(s64));
index 380ea4fe08e7151c71c64a39eac8a9e92a2ea7ee..63d7c6de335bd3b4878f8f2cf65d1d9e3f7e9474 100644 (file)
@@ -64,7 +64,7 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name,
        return sys_mq_open(u_name, oflag, mode, p);
 }
 
-static int compat_prepare_timeout(struct timespec __user * *p,
+static int compat_prepare_timeout(struct timespec __user **p,
                                  const struct compat_timespec __user *u)
 {
        struct timespec ts;
index b0e99deb6d05330482c8ec98f1a511f07f9fa5f1..17028648cfeb05fc4b76470ca343cd069a11bb02 100644 (file)
@@ -164,21 +164,21 @@ static struct ctl_table ipc_kern_table[] = {
        {
                .procname       = "shmmax",
                .data           = &init_ipc_ns.shm_ctlmax,
-               .maxlen         = sizeof (init_ipc_ns.shm_ctlmax),
+               .maxlen         = sizeof(init_ipc_ns.shm_ctlmax),
                .mode           = 0644,
                .proc_handler   = proc_ipc_doulongvec_minmax,
        },
        {
                .procname       = "shmall",
                .data           = &init_ipc_ns.shm_ctlall,
-               .maxlen         = sizeof (init_ipc_ns.shm_ctlall),
+               .maxlen         = sizeof(init_ipc_ns.shm_ctlall),
                .mode           = 0644,
                .proc_handler   = proc_ipc_doulongvec_minmax,
        },
        {
                .procname       = "shmmni",
                .data           = &init_ipc_ns.shm_ctlmni,
-               .maxlen         = sizeof (init_ipc_ns.shm_ctlmni),
+               .maxlen         = sizeof(init_ipc_ns.shm_ctlmni),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec,
        },
@@ -194,7 +194,7 @@ static struct ctl_table ipc_kern_table[] = {
        {
                .procname       = "msgmax",
                .data           = &init_ipc_ns.msg_ctlmax,
-               .maxlen         = sizeof (init_ipc_ns.msg_ctlmax),
+               .maxlen         = sizeof(init_ipc_ns.msg_ctlmax),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec_minmax,
                .extra1         = &zero,
@@ -203,7 +203,7 @@ static struct ctl_table ipc_kern_table[] = {
        {
                .procname       = "msgmni",
                .data           = &init_ipc_ns.msg_ctlmni,
-               .maxlen         = sizeof (init_ipc_ns.msg_ctlmni),
+               .maxlen         = sizeof(init_ipc_ns.msg_ctlmni),
                .mode           = 0644,
                .proc_handler   = proc_ipc_callback_dointvec_minmax,
                .extra1         = &zero,
@@ -212,7 +212,7 @@ static struct ctl_table ipc_kern_table[] = {
        {
                .procname       =  "msgmnb",
                .data           = &init_ipc_ns.msg_ctlmnb,
-               .maxlen         = sizeof (init_ipc_ns.msg_ctlmnb),
+               .maxlen         = sizeof(init_ipc_ns.msg_ctlmnb),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec_minmax,
                .extra1         = &zero,
@@ -221,7 +221,7 @@ static struct ctl_table ipc_kern_table[] = {
        {
                .procname       = "sem",
                .data           = &init_ipc_ns.sem_ctls,
-               .maxlen         = 4*sizeof (int),
+               .maxlen         = 4*sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec,
        },
index 95827ce2f3c78e76adfdef64278a227402356587..ccf1f9fd263acdfae7dc56a87bceddd3170f69d9 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Spinlocks:               Mohamed Abbas           (abbas.mohamed@intel.com)
  * Lockless receive & send, fd based notify:
- *                         Manfred Spraul          (manfred@colorfullife.com)
+ *                         Manfred Spraul          (manfred@colorfullife.com)
  *
  * Audit:                   George Wilson           (ltcgcw@us.ibm.com)
  *
@@ -73,7 +73,7 @@ struct mqueue_inode_info {
        struct mq_attr attr;
 
        struct sigevent notify;
-       struct pidnotify_owner;
+       struct pid *notify_owner;
        struct user_namespace *notify_user_ns;
        struct user_struct *user;       /* user who created, for accounting */
        struct sock *notify_sock;
@@ -92,7 +92,7 @@ static void remove_notification(struct mqueue_inode_info *info);
 
 static struct kmem_cache *mqueue_inode_cachep;
 
-static struct ctl_table_header * mq_sysctl_table;
+static struct ctl_table_header *mq_sysctl_table;
 
 static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
 {
@@ -466,13 +466,13 @@ out_unlock:
 
 static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = dentry->d_inode;
 
        dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
        dir->i_size -= DIRENT_SIZE;
-       drop_nlink(inode);
-       dput(dentry);
-       return 0;
+       drop_nlink(inode);
+       dput(dentry);
+       return 0;
 }
 
 /*
@@ -622,7 +622,7 @@ static struct ext_wait_queue *wq_get_first_waiter(
 
 static inline void set_cookie(struct sk_buff *skb, char code)
 {
-       ((char*)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
+       ((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
 }
 
 /*
@@ -1303,11 +1303,11 @@ retry:
 out_fput:
        fdput(f);
 out:
-       if (sock) {
+       if (sock)
                netlink_detachskb(sock, nc);
-       } else if (nc) {
+       else if (nc)
                dev_kfree_skb(nc);
-       }
+
        return ret;
 }
 
index 558aa91186b6ced1a27b1e05b65c5ee129e0a175..245db1140ad66a2be47744f02ef0d80deea5006f 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -253,8 +253,14 @@ static void expunge_all(struct msg_queue *msq, int res)
        struct msg_receiver *msr, *t;
 
        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
-               msr->r_msg = NULL;
+               msr->r_msg = NULL; /* initialize expunge ordering */
                wake_up_process(msr->r_tsk);
+               /*
+                * Ensure that the wakeup is visible before setting r_msg as
+                * the receiving end depends on it: either spinning on a nil,
+                * or dealing with -EAGAIN cases. See lockless receive part 1
+                * and 2 in do_msgrcv().
+                */
                smp_mb();
                msr->r_msg = ERR_PTR(res);
        }
@@ -318,7 +324,7 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
 static inline unsigned long
 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                return copy_to_user(buf, in, sizeof(*in));
        case IPC_OLD:
@@ -363,7 +369,7 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 static inline unsigned long
 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                if (copy_from_user(out, buf, sizeof(*out)))
                        return -EFAULT;
@@ -375,9 +381,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
                if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
                        return -EFAULT;
 
-               out->msg_perm.uid       = tbuf_old.msg_perm.uid;
-               out->msg_perm.gid       = tbuf_old.msg_perm.gid;
-               out->msg_perm.mode      = tbuf_old.msg_perm.mode;
+               out->msg_perm.uid       = tbuf_old.msg_perm.uid;
+               out->msg_perm.gid       = tbuf_old.msg_perm.gid;
+               out->msg_perm.mode      = tbuf_old.msg_perm.mode;
 
                if (tbuf_old.msg_qbytes == 0)
                        out->msg_qbytes = tbuf_old.msg_lqbytes;
@@ -606,13 +612,13 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
 
 static int testmsg(struct msg_msg *msg, long type, int mode)
 {
-       switch(mode)
+       switch (mode)
        {
                case SEARCH_ANY:
                case SEARCH_NUMBER:
                        return 1;
                case SEARCH_LESSEQUAL:
-                       if (msg->m_type <=type)
+                       if (msg->m_type <= type)
                                return 1;
                        break;
                case SEARCH_EQUAL:
@@ -638,15 +644,22 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
 
                        list_del(&msr->r_list);
                        if (msr->r_maxsize < msg->m_ts) {
+                               /* initialize pipelined send ordering */
                                msr->r_msg = NULL;
                                wake_up_process(msr->r_tsk);
-                               smp_mb();
+                               smp_mb(); /* see barrier comment below */
                                msr->r_msg = ERR_PTR(-E2BIG);
                        } else {
                                msr->r_msg = NULL;
                                msq->q_lrpid = task_pid_vnr(msr->r_tsk);
                                msq->q_rtime = get_seconds();
                                wake_up_process(msr->r_tsk);
+                               /*
+                                * Ensure that the wakeup is visible before
+                                * setting r_msg, as the receiving end depends
+                                * on it. See lockless receive part 1 and 2 in
+                                * do_msgrcv().
+                                */
                                smp_mb();
                                msr->r_msg = msg;
 
@@ -654,6 +667,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
                        }
                }
        }
+
        return 0;
 }
 
@@ -696,7 +710,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                        goto out_unlock0;
 
                /* raced with RMID? */
-               if (msq->q_perm.deleted) {
+               if (!ipc_valid_object(&msq->q_perm)) {
                        err = -EIDRM;
                        goto out_unlock0;
                }
@@ -716,6 +730,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                        goto out_unlock0;
                }
 
+               /* enqueue the sender and prepare to block */
                ss_add(msq, &s);
 
                if (!ipc_rcu_getref(msq)) {
@@ -731,7 +746,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                ipc_lock_object(&msq->q_perm);
 
                ipc_rcu_putref(msq, ipc_rcu_free);
-               if (msq->q_perm.deleted) {
+               /* raced with RMID? */
+               if (!ipc_valid_object(&msq->q_perm)) {
                        err = -EIDRM;
                        goto out_unlock0;
                }
@@ -909,7 +925,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                ipc_lock_object(&msq->q_perm);
 
                /* raced with RMID? */
-               if (msq->q_perm.deleted) {
+               if (!ipc_valid_object(&msq->q_perm)) {
                        msg = ERR_PTR(-EIDRM);
                        goto out_unlock0;
                }
@@ -983,7 +999,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                 * wake_up_process(). There is a race with exit(), see
                 * ipc/mqueue.c for the details.
                 */
-               msg = (struct msg_msg*)msr_d.r_msg;
+               msg = (struct msg_msg *)msr_d.r_msg;
                while (msg == NULL) {
                        cpu_relax();
                        msg = (struct msg_msg *)msr_d.r_msg;
@@ -1004,7 +1020,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                /* Lockless receive, part 4:
                 * Repeat test after acquiring the spinlock.
                 */
-               msg = (struct msg_msg*)msr_d.r_msg;
+               msg = (struct msg_msg *)msr_d.r_msg;
                if (msg != ERR_PTR(-EAGAIN))
                        goto out_unlock0;
 
index db9d241af133d770cb0a95a22cacf257f79b1215..bee5554173120780374e6b42228d13c786ceb315 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -188,7 +188,7 @@ void sem_exit_ns(struct ipc_namespace *ns)
 }
 #endif
 
-void __init sem_init (void)
+void __init sem_init(void)
 {
        sem_init_ns(&init_ipc_ns);
        ipc_init_proc_interface("sysvipc/sem",
@@ -225,7 +225,7 @@ static void unmerge_queues(struct sem_array *sma)
 }
 
 /**
- * merge_queues - Merge single semop queues into global queue
+ * merge_queues - merge single semop queues into global queue
  * @sma: semaphore array
  *
  * This function merges all per-semaphore queues into the global queue.
@@ -394,7 +394,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
        /* ipc_rmid() may have already freed the ID while sem_lock
         * was spinning: verify that the structure is still valid
         */
-       if (!ipcp->deleted)
+       if (ipc_valid_object(ipcp))
                return container_of(ipcp, struct sem_array, sem_perm);
 
        sem_unlock(sma, *locknum);
@@ -445,11 +445,11 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  *     * call wake_up_process
  *     * set queue.status to the final value.
  * - the previously blocked thread checks queue.status:
- *     * if it's IN_WAKEUP, then it must wait until the value changes
- *     * if it's not -EINTR, then the operation was completed by
- *       update_queue. semtimedop can return queue.status without
- *       performing any operation on the sem array.
- *     * otherwise it must acquire the spinlock and check what's up.
+ *     * if it's IN_WAKEUP, then it must wait until the value changes
+ *     * if it's not -EINTR, then the operation was completed by
+ *       update_queue. semtimedop can return queue.status without
+ *       performing any operation on the sem array.
+ *     * otherwise it must acquire the spinlock and check what's up.
  *
  * The two-stage algorithm is necessary to protect against the following
  * races:
@@ -474,7 +474,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  *
  * Called with sem_ids.rwsem held (as a writer)
  */
-
 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
        int id;
@@ -491,12 +490,12 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        if (ns->used_sems + nsems > ns->sc_semmns)
                return -ENOSPC;
 
-       size = sizeof (*sma) + nsems * sizeof (struct sem);
+       size = sizeof(*sma) + nsems * sizeof(struct sem);
        sma = ipc_rcu_alloc(size);
-       if (!sma) {
+       if (!sma)
                return -ENOMEM;
-       }
-       memset (sma, 0, size);
+
+       memset(sma, 0, size);
 
        sma->sem_perm.mode = (semflg & S_IRWXUGO);
        sma->sem_perm.key = key;
@@ -584,10 +583,11 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
        return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
-/** perform_atomic_semop - Perform (if possible) a semaphore operation
+/**
+ * perform_atomic_semop - Perform (if possible) a semaphore operation
  * @sma: semaphore array
  * @sops: array with operations that should be checked
- * @nsems: number of sops
+ * @nsops: number of operations
  * @un: undo array
  * @pid: pid that did the change
  *
@@ -595,19 +595,18 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
  * Returns 1 if the operation is impossible, the caller must sleep.
  * Negative values are error codes.
  */
-
 static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops,
                             int nsops, struct sem_undo *un, int pid)
 {
        int result, sem_op;
        struct sembuf *sop;
-       struct sem * curr;
+       struct sem *curr;
 
        for (sop = sops; sop < sops + nsops; sop++) {
                curr = sma->sem_base + sop->sem_num;
                sem_op = sop->sem_op;
                result = curr->semval;
-  
+
                if (!sem_op && result)
                        goto would_block;
 
@@ -616,25 +615,24 @@ static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops,
                        goto would_block;
                if (result > SEMVMX)
                        goto out_of_range;
+
                if (sop->sem_flg & SEM_UNDO) {
                        int undo = un->semadj[sop->sem_num] - sem_op;
-                       /*
-                        *      Exceeding the undo range is an error.
-                        */
+                       /* Exceeding the undo range is an error. */
                        if (undo < (-SEMAEM - 1) || undo > SEMAEM)
                                goto out_of_range;
+                       un->semadj[sop->sem_num] = undo;
                }
+
                curr->semval = result;
        }
 
        sop--;
        while (sop >= sops) {
                sma->sem_base[sop->sem_num].sempid = pid;
-               if (sop->sem_flg & SEM_UNDO)
-                       un->semadj[sop->sem_num] -= sop->sem_op;
                sop--;
        }
-       
+
        return 0;
 
 out_of_range:
@@ -650,7 +648,10 @@ would_block:
 undo:
        sop--;
        while (sop >= sops) {
-               sma->sem_base[sop->sem_num].semval -= sop->sem_op;
+               sem_op = sop->sem_op;
+               sma->sem_base[sop->sem_num].semval -= sem_op;
+               if (sop->sem_flg & SEM_UNDO)
+                       un->semadj[sop->sem_num] += sem_op;
                sop--;
        }
 
@@ -680,7 +681,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
 }
 
 /**
- * wake_up_sem_queue_do(pt) - do the actual wake-up
+ * wake_up_sem_queue_do - do the actual wake-up
  * @pt: list of tasks to be woken up
  *
  * Do the actual wake-up.
@@ -746,7 +747,7 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
 }
 
 /**
- * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks
+ * wake_const_ops - wake up non-alter tasks
  * @sma: semaphore array.
  * @semnum: semaphore that was modified.
  * @pt: list head for the tasks that must be woken up.
@@ -796,15 +797,14 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
 }
 
 /**
- * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks
+ * do_smart_wakeup_zero - wakeup all wait for zero tasks
  * @sma: semaphore array
  * @sops: operations that were performed
  * @nsops: number of operations
  * @pt: list head of the tasks that must be woken up.
  *
- * do_smart_wakeup_zero() checks all required queue for wait-for-zero
- * operations, based on the actual changes that were performed on the
- * semaphore array.
+ * Checks all required queue for wait-for-zero operations, based
+ * on the actual changes that were performed on the semaphore array.
  * The function returns 1 if at least one operation was completed successfully.
  */
 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
@@ -848,7 +848,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 
 
 /**
- * update_queue(sma, semnum): Look for tasks that can be completed.
+ * update_queue - look for tasks that can be completed.
  * @sma: semaphore array.
  * @semnum: semaphore that was modified.
  * @pt: list head for the tasks that must be woken up.
@@ -918,7 +918,7 @@ again:
 }
 
 /**
- * set_semotime(sma, sops) - set sem_otime
+ * set_semotime - set sem_otime
  * @sma: semaphore array
  * @sops: operations that modified the array, may be NULL
  *
@@ -936,7 +936,7 @@ static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 }
 
 /**
- * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
+ * do_smart_update - optimized update_queue
  * @sma: semaphore array
  * @sops: operations that were performed
  * @nsops: number of operations
@@ -998,21 +998,21 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
  * The counts we return here are a rough approximation, but still
  * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
  */
-static int count_semncnt (struct sem_array * sma, ushort semnum)
+static int count_semncnt(struct sem_array *sma, ushort semnum)
 {
        int semncnt;
-       struct sem_queue * q;
+       struct sem_queue *q;
 
        semncnt = 0;
        list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) {
-               struct sembuf * sops = q->sops;
+               struct sembuf *sops = q->sops;
                BUG_ON(sops->sem_num != semnum);
                if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
                        semncnt++;
        }
 
        list_for_each_entry(q, &sma->pending_alter, list) {
-               struct sembuf * sops = q->sops;
+               struct sembuf *sops = q->sops;
                int nsops = q->nsops;
                int i;
                for (i = 0; i < nsops; i++)
@@ -1024,21 +1024,21 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
        return semncnt;
 }
 
-static int count_semzcnt (struct sem_array * sma, ushort semnum)
+static int count_semzcnt(struct sem_array *sma, ushort semnum)
 {
        int semzcnt;
-       struct sem_queue * q;
+       struct sem_queue *q;
 
        semzcnt = 0;
        list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) {
-               struct sembuf * sops = q->sops;
+               struct sembuf *sops = q->sops;
                BUG_ON(sops->sem_num != semnum);
                if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
                        semzcnt++;
        }
 
        list_for_each_entry(q, &sma->pending_const, list) {
-               struct sembuf * sops = q->sops;
+               struct sembuf *sops = q->sops;
                int nsops = q->nsops;
                int i;
                for (i = 0; i < nsops; i++)
@@ -1108,7 +1108,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                return copy_to_user(buf, in, sizeof(*in));
        case IPC_OLD:
@@ -1151,7 +1151,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
        int err;
        struct sem_array *sma;
 
-       switch(cmd) {
+       switch (cmd) {
        case IPC_INFO:
        case SEM_INFO:
        {
@@ -1162,7 +1162,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
                if (err)
                        return err;
                
-               memset(&seminfo,0,sizeof(seminfo));
+               memset(&seminfo, 0, sizeof(seminfo));
                seminfo.semmni = ns->sc_semmni;
                seminfo.semmns = ns->sc_semmns;
                seminfo.semmsl = ns->sc_semmsl;
@@ -1183,7 +1183,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
                up_read(&sem_ids(ns).rwsem);
                if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 
                        return -EFAULT;
-               return (max_id < 0) ? 0: max_id;
+               return (max_id < 0) ? 0 : max_id;
        }
        case IPC_STAT:
        case SEM_STAT:
@@ -1239,7 +1239,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 {
        struct sem_undo *un;
        struct sem_array *sma;
-       struct semcurr;
+       struct sem *curr;
        int err;
        struct list_head tasks;
        int val;
@@ -1282,7 +1282,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 
        sem_lock(sma, NULL, -1);
 
-       if (sma->sem_perm.deleted) {
+       if (!ipc_valid_object(&sma->sem_perm)) {
                sem_unlock(sma, -1);
                rcu_read_unlock();
                return -EIDRM;
@@ -1309,10 +1309,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                int cmd, void __user *p)
 {
        struct sem_array *sma;
-       struct semcurr;
+       struct sem *curr;
        int err, nsems;
        ushort fast_sem_io[SEMMSL_FAST];
-       ushortsem_io = fast_sem_io;
+       ushort *sem_io = fast_sem_io;
        struct list_head tasks;
 
        INIT_LIST_HEAD(&tasks);
@@ -1342,11 +1342,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                int i;
 
                sem_lock(sma, NULL, -1);
-               if (sma->sem_perm.deleted) {
+               if (!ipc_valid_object(&sma->sem_perm)) {
                        err = -EIDRM;
                        goto out_unlock;
                }
-               if(nsems > SEMMSL_FAST) {
+               if (nsems > SEMMSL_FAST) {
                        if (!ipc_rcu_getref(sma)) {
                                err = -EIDRM;
                                goto out_unlock;
@@ -1354,14 +1354,14 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                        sem_unlock(sma, -1);
                        rcu_read_unlock();
                        sem_io = ipc_alloc(sizeof(ushort)*nsems);
-                       if(sem_io == NULL) {
+                       if (sem_io == NULL) {
                                ipc_rcu_putref(sma, ipc_rcu_free);
                                return -ENOMEM;
                        }
 
                        rcu_read_lock();
                        sem_lock_and_putref(sma);
-                       if (sma->sem_perm.deleted) {
+                       if (!ipc_valid_object(&sma->sem_perm)) {
                                err = -EIDRM;
                                goto out_unlock;
                        }
@@ -1371,7 +1371,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                sem_unlock(sma, -1);
                rcu_read_unlock();
                err = 0;
-               if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
+               if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
                        err = -EFAULT;
                goto out_free;
        }
@@ -1386,15 +1386,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                }
                rcu_read_unlock();
 
-               if(nsems > SEMMSL_FAST) {
+               if (nsems > SEMMSL_FAST) {
                        sem_io = ipc_alloc(sizeof(ushort)*nsems);
-                       if(sem_io == NULL) {
+                       if (sem_io == NULL) {
                                ipc_rcu_putref(sma, ipc_rcu_free);
                                return -ENOMEM;
                        }
                }
 
-               if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
+               if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
                        ipc_rcu_putref(sma, ipc_rcu_free);
                        err = -EFAULT;
                        goto out_free;
@@ -1409,7 +1409,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                }
                rcu_read_lock();
                sem_lock_and_putref(sma);
-               if (sma->sem_perm.deleted) {
+               if (!ipc_valid_object(&sma->sem_perm)) {
                        err = -EIDRM;
                        goto out_unlock;
                }
@@ -1435,7 +1435,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                goto out_rcu_wakeup;
 
        sem_lock(sma, NULL, -1);
-       if (sma->sem_perm.deleted) {
+       if (!ipc_valid_object(&sma->sem_perm)) {
                err = -EIDRM;
                goto out_unlock;
        }
@@ -1449,10 +1449,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                err = curr->sempid;
                goto out_unlock;
        case GETNCNT:
-               err = count_semncnt(sma,semnum);
+               err = count_semncnt(sma, semnum);
                goto out_unlock;
        case GETZCNT:
-               err = count_semzcnt(sma,semnum);
+               err = count_semzcnt(sma, semnum);
                goto out_unlock;
        }
 
@@ -1462,7 +1462,7 @@ out_rcu_wakeup:
        rcu_read_unlock();
        wake_up_sem_queue_do(&tasks);
 out_free:
-       if(sem_io != fast_sem_io)
+       if (sem_io != fast_sem_io)
                ipc_free(sem_io, sizeof(ushort)*nsems);
        return err;
 }
@@ -1470,7 +1470,7 @@ out_free:
 static inline unsigned long
 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                if (copy_from_user(out, buf, sizeof(*out)))
                        return -EFAULT;
@@ -1479,7 +1479,7 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
            {
                struct semid_ds tbuf_old;
 
-               if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
+               if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
                        return -EFAULT;
 
                out->sem_perm.uid       = tbuf_old.sem_perm.uid;
@@ -1506,7 +1506,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
        struct semid64_ds semid64;
        struct kern_ipc_perm *ipcp;
 
-       if(cmd == IPC_SET) {
+       if (cmd == IPC_SET) {
                if (copy_semid_from_user(&semid64, p, version))
                        return -EFAULT;
        }
@@ -1566,7 +1566,7 @@ SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
        version = ipc_parse_version(&cmd);
        ns = current->nsproxy->ipc_ns;
 
-       switch(cmd) {
+       switch (cmd) {
        case IPC_INFO:
        case SEM_INFO:
        case IPC_STAT:
@@ -1634,7 +1634,7 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
        struct sem_undo *un;
 
-       assert_spin_locked(&ulp->lock);
+       assert_spin_locked(&ulp->lock);
 
        un = __lookup_undo(ulp, semid);
        if (un) {
@@ -1645,7 +1645,7 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 }
 
 /**
- * find_alloc_undo - Lookup (and if not present create) undo array
+ * find_alloc_undo - lookup (and if not present create) undo array
  * @ns: namespace
  * @semid: semaphore array id
  *
@@ -1670,7 +1670,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        spin_lock(&ulp->lock);
        un = lookup_undo(ulp, semid);
        spin_unlock(&ulp->lock);
-       if (likely(un!=NULL))
+       if (likely(un != NULL))
                goto out;
 
        /* no undo structure around - allocate one. */
@@ -1699,7 +1699,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        /* step 3: Acquire the lock on semaphore array */
        rcu_read_lock();
        sem_lock_and_putref(sma);
-       if (sma->sem_perm.deleted) {
+       if (!ipc_valid_object(&sma->sem_perm)) {
                sem_unlock(sma, -1);
                rcu_read_unlock();
                kfree(new);
@@ -1735,7 +1735,7 @@ out:
 
 
 /**
- * get_queue_result - Retrieve the result code from sem_queue
+ * get_queue_result - retrieve the result code from sem_queue
  * @q: Pointer to queue structure
  *
  * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
@@ -1765,7 +1765,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
        int error = -EINVAL;
        struct sem_array *sma;
        struct sembuf fast_sops[SEMOPM_FAST];
-       struct sembufsops = fast_sops, *sop;
+       struct sembuf *sops = fast_sops, *sop;
        struct sem_undo *un;
        int undos = 0, alter = 0, max, locknum;
        struct sem_queue queue;
@@ -1779,13 +1779,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
                return -EINVAL;
        if (nsops > ns->sc_semopm)
                return -E2BIG;
-       if(nsops > SEMOPM_FAST) {
-               sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
-               if(sops==NULL)
+       if (nsops > SEMOPM_FAST) {
+               sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
+               if (sops == NULL)
                        return -ENOMEM;
        }
-       if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
-               error=-EFAULT;
+       if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
+               error =  -EFAULT;
                goto out_free;
        }
        if (timeout) {
@@ -1846,7 +1846,15 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
        error = -EIDRM;
        locknum = sem_lock(sma, sops, nsops);
-       if (sma->sem_perm.deleted)
+       /*
+        * We eventually might perform the following check in a lockless
+        * fashion, considering ipc_valid_object() locking constraints.
+        * If nsops == 1 and there is no contention for sem_perm.lock, then
+        * only a per-semaphore lock is held and it's OK to proceed with the
+        * check below. More details on the fine grained locking scheme
+        * entangled here and why it's RMID race safe on comments at sem_lock()
+        */
+       if (!ipc_valid_object(&sma->sem_perm))
                goto out_unlock_free;
        /*
         * semid identifiers are not unique - find_alloc_undo may have
@@ -1959,10 +1967,8 @@ sleep_again:
         * If queue.status != -EINTR we are woken up by another process.
         * Leave without unlink_queue(), but with sem_unlock().
         */
-
-       if (error != -EINTR) {
+       if (error != -EINTR)
                goto out_unlock_free;
-       }
 
        /*
         * If an interrupt occurred we have to clean up the queue
@@ -1984,7 +1990,7 @@ out_rcu_wakeup:
        rcu_read_unlock();
        wake_up_sem_queue_do(&tasks);
 out_free:
-       if(sops != fast_sops)
+       if (sops != fast_sops)
                kfree(sops);
        return error;
 }
@@ -2068,7 +2074,7 @@ void exit_sem(struct task_struct *tsk)
 
                sem_lock(sma, NULL, -1);
                /* exit_sem raced with IPC_RMID, nothing to do */
-               if (sma->sem_perm.deleted) {
+               if (!ipc_valid_object(&sma->sem_perm)) {
                        sem_unlock(sma, -1);
                        rcu_read_unlock();
                        continue;
@@ -2093,7 +2099,7 @@ void exit_sem(struct task_struct *tsk)
 
                /* perform adjustments registered in un */
                for (i = 0; i < sma->sem_nsems; i++) {
-                       struct sem * semaphore = &sma->sem_base[i];
+                       struct sem *semaphore = &sma->sem_base[i];
                        if (un->semadj[i]) {
                                semaphore->semval += un->semadj[i];
                                /*
@@ -2107,7 +2113,7 @@ void exit_sem(struct task_struct *tsk)
                                 * Linux caps the semaphore value, both at 0
                                 * and at SEMVMX.
                                 *
-                                *      Manfred <manfred@colorfullife.com>
+                                *      Manfred <manfred@colorfullife.com>
                                 */
                                if (semaphore->semval < 0)
                                        semaphore->semval = 0;
index 7a51443a51d6421bd2a02a66ec18db98f6796dd3..76459616a7fafe7581d713a245c63e861a4cf631 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -67,7 +67,7 @@ static const struct vm_operations_struct shm_vm_ops;
 static int newseg(struct ipc_namespace *, struct ipc_params *);
 static void shm_open(struct vm_area_struct *vma);
 static void shm_close(struct vm_area_struct *vma);
-static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
+static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
@@ -91,7 +91,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
        struct shmid_kernel *shp;
        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 
-       if (shp->shm_nattch){
+       if (shp->shm_nattch) {
                shp->shm_perm.mode |= SHM_DEST;
                /* Do not find it any more */
                shp->shm_perm.key = IPC_PRIVATE;
@@ -116,7 +116,7 @@ static int __init ipc_ns_init(void)
 
 pure_initcall(ipc_ns_init);
 
-void __init shm_init (void)
+void __init shm_init(void)
 {
        ipc_init_proc_interface("sysvipc/shm",
 #if BITS_PER_LONG <= 32
@@ -248,7 +248,7 @@ static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  */
 static void shm_close(struct vm_area_struct *vma)
 {
-       struct file * file = vma->vm_file;
+       struct file *file = vma->vm_file;
        struct shm_file_data *sfd = shm_file_data(file);
        struct shmid_kernel *shp;
        struct ipc_namespace *ns = sfd->ns;
@@ -379,7 +379,7 @@ static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
 }
 #endif
 
-static int shm_mmap(struct file * file, struct vm_area_struct * vma)
+static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct shm_file_data *sfd = shm_file_data(file);
        int ret;
@@ -477,7 +477,6 @@ static const struct vm_operations_struct shm_vm_ops = {
  *
  * Called with shm_ids.rwsem held as a writer.
  */
-
 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 {
        key_t key = params->key;
@@ -486,7 +485,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        int error;
        struct shmid_kernel *shp;
        size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       struct file * file;
+       struct file *file;
        char name[13];
        int id;
        vm_flags_t acctflag = 0;
@@ -512,7 +511,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
                return error;
        }
 
-       sprintf (name, "SYSV%08x", key);
+       sprintf(name, "SYSV%08x", key);
        if (shmflg & SHM_HUGETLB) {
                struct hstate *hs;
                size_t hugesize;
@@ -533,7 +532,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        } else {
                /*
                 * Do not allow no accounting for OVERCOMMIT_NEVER, even
-                * if it's asked for.
+                * if it's asked for.
                 */
                if  ((shmflg & SHM_NORESERVE) &&
                                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
@@ -628,7 +627,7 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                return copy_to_user(buf, in, sizeof(*in));
        case IPC_OLD:
@@ -655,7 +654,7 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_
 static inline unsigned long
 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                if (copy_from_user(out, buf, sizeof(*out)))
                        return -EFAULT;
@@ -680,14 +679,14 @@ copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
 
 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
 {
-       switch(version) {
+       switch (version) {
        case IPC_64:
                return copy_to_user(buf, in, sizeof(*in));
        case IPC_OLD:
            {
                struct shminfo out;
 
-               if(in->shmmax > INT_MAX)
+               if (in->shmmax > INT_MAX)
                        out.shmmax = INT_MAX;
                else
                        out.shmmax = (int)in->shmmax;
@@ -846,14 +845,14 @@ static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
                shminfo.shmall = ns->shm_ctlall;
 
                shminfo.shmmin = SHMMIN;
-               if(copy_shminfo_to_user (buf, &shminfo, version))
+               if (copy_shminfo_to_user(buf, &shminfo, version))
                        return -EFAULT;
 
                down_read(&shm_ids(ns).rwsem);
                err = ipc_get_maxid(&shm_ids(ns));
                up_read(&shm_ids(ns).rwsem);
 
-               if(err<0)
+               if (err < 0)
                        err = 0;
                goto out;
        }
@@ -864,7 +863,7 @@ static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
                memset(&shm_info, 0, sizeof(shm_info));
                down_read(&shm_ids(ns).rwsem);
                shm_info.used_ids = shm_ids(ns).in_use;
-               shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
+               shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp);
                shm_info.shm_tot = ns->shm_tot;
                shm_info.swap_attempts = 0;
                shm_info.swap_successes = 0;
@@ -975,6 +974,13 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                        goto out_unlock1;
 
                ipc_lock_object(&shp->shm_perm);
+
+               /* check if shm_destroy() is tearing down shp */
+               if (!ipc_valid_object(&shp->shm_perm)) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+               }
+
                if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
                        kuid_t euid = current_euid();
                        if (!uid_eq(euid, shp->shm_perm.uid) &&
@@ -989,13 +995,6 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                }
 
                shm_file = shp->shm_file;
-
-               /* check if shm_destroy() is tearing down shp */
-               if (shm_file == NULL) {
-                       err = -EIDRM;
-                       goto out_unlock0;
-               }
-
                if (is_file_hugepages(shm_file))
                        goto out_unlock0;
 
@@ -1047,7 +1046,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        struct shmid_kernel *shp;
        unsigned long addr;
        unsigned long size;
-       struct file * file;
+       struct file *file;
        int    err;
        unsigned long flags;
        unsigned long prot;
@@ -1116,7 +1115,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        ipc_lock_object(&shp->shm_perm);
 
        /* check if shm_destroy() is tearing down shp */
-       if (shp->shm_file == NULL) {
+       if (!ipc_valid_object(&shp->shm_perm)) {
                ipc_unlock_object(&shp->shm_perm);
                err = -EIDRM;
                goto out_unlock;
index 3ae17a4ace5b9ba242fd1aa42543d64145e5cb7f..fc5c655ff50791e045e218589be0382187df314c 100644 (file)
@@ -110,15 +110,15 @@ static struct notifier_block ipc_memory_nb = {
 };
 
 /**
- *     ipc_init        -       initialise IPC subsystem
+ * ipc_init - initialise ipc subsystem
  *
- *     The various system5 IPC resources (semaphores, messages and shared
- *     memory) are initialised
- *     A callback routine is registered into the memory hotplug notifier
- *     chain: since msgmni scales to lowmem this callback routine will be
- *     called upon successful memory add / remove to recompute msmgni.
+ * The various sysv ipc resources (semaphores, messages and shared
+ * memory) are initialised.
+ *
+ * A callback routine is registered into the memory hotplug notifier
+ * chain: since msgmni scales to lowmem this callback routine will be
+ * called upon successful memory add / remove to recompute msmgni.
  */
 static int __init ipc_init(void)
 {
        sem_init();
@@ -131,39 +131,29 @@ static int __init ipc_init(void)
 __initcall(ipc_init);
 
 /**
- *     ipc_init_ids            -       initialise IPC identifiers
- *     @ids: Identifier set
+ * ipc_init_ids        - initialise ipc identifiers
+ * @ids: ipc identifier set
  *
- *     Set up the sequence range to use for the ipc identifier range (limited
- *     below IPCMNI) then initialise the ids idr.
+ * Set up the sequence range to use for the ipc identifier range (limited
+ * below IPCMNI) then initialise the ids idr.
  */
 void ipc_init_ids(struct ipc_ids *ids)
 {
-       init_rwsem(&ids->rwsem);
-
        ids->in_use = 0;
        ids->seq = 0;
        ids->next_id = -1;
-       {
-               int seq_limit = INT_MAX/SEQ_MULTIPLIER;
-               if (seq_limit > USHRT_MAX)
-                       ids->seq_max = USHRT_MAX;
-                else
-                       ids->seq_max = seq_limit;
-       }
-
+       init_rwsem(&ids->rwsem);
        idr_init(&ids->ipcs_idr);
 }
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations sysvipc_proc_fops;
 /**
- *     ipc_init_proc_interface -  Create a proc interface for sysipc types using a seq_file interface.
- *     @path: Path in procfs
- *     @header: Banner to be printed at the beginning of the file.
- *     @ids: ipc id table to iterate.
- *     @show: show routine.
+ * ipc_init_proc_interface -  create a proc interface for sysipc types using a seq_file interface.
+ * @path: Path in procfs
+ * @header: Banner to be printed at the beginning of the file.
+ * @ids: ipc id table to iterate.
+ * @show: show routine.
  */
 void __init ipc_init_proc_interface(const char *path, const char *header,
                int ids, int (*show)(struct seq_file *, void *))
@@ -184,23 +174,21 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
                               NULL,           /* parent dir */
                               &sysvipc_proc_fops,
                               iface);
-       if (!pde) {
+       if (!pde)
                kfree(iface);
-       }
 }
 #endif
 
 /**
- *     ipc_findkey     -       find a key in an ipc identifier set     
- *     @ids: Identifier set
- *     @key: The key to find
- *     
- *     Requires ipc_ids.rwsem locked.
- *     Returns the LOCKED pointer to the ipc structure if found or NULL
- *     if not.
- *     If key is found ipc points to the owning ipc structure
+ * ipc_findkey - find a key in an ipc identifier set
+ * @ids: ipc identifier set
+ * @key: key to find
+ *
+ * Returns the locked pointer to the ipc structure if found or NULL
+ * otherwise. If key is found ipc points to the owning ipc structure
+ *
+ * Called with ipc_ids.rwsem held.
  */
 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
        struct kern_ipc_perm *ipc;
@@ -227,12 +215,11 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 }
 
 /**
- *     ipc_get_maxid   -       get the last assigned id
- *     @ids: IPC identifier set
+ * ipc_get_maxid - get the last assigned id
+ * @ids: ipc identifier set
  *
- *     Called with ipc_ids.rwsem held.
+ * Called with ipc_ids.rwsem held.
  */
-
 int ipc_get_maxid(struct ipc_ids *ids)
 {
        struct kern_ipc_perm *ipc;
@@ -258,19 +245,19 @@ int ipc_get_maxid(struct ipc_ids *ids)
 }
 
 /**
- *     ipc_addid       -       add an IPC identifier
- *     @ids: IPC identifier set
- *     @new: new IPC permission set
- *     @size: limit for the number of used ids
+ * ipc_addid - add an ipc identifier
+ * @ids: ipc identifier set
+ * @new: new ipc permission set
+ * @size: limit for the number of used ids
  *
- *     Add an entry 'new' to the IPC ids idr. The permissions object is
- *     initialised and the first free entry is set up and the id assigned
- *     is returned. The 'new' entry is returned in a locked state on success.
- *     On failure the entry is not locked and a negative err-code is returned.
+ * Add an entry 'new' to the ipc ids idr. The permissions object is
+ * initialised and the first free entry is set up and the id assigned
+ * is returned. The 'new' entry is returned in a locked state on success.
+ * On failure the entry is not locked and a negative err-code is returned.
  *
- *     Called with writer ipc_ids.rwsem held.
+ * Called with writer ipc_ids.rwsem held.
  */
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
+int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 {
        kuid_t euid;
        kgid_t egid;
@@ -286,7 +273,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
        idr_preload(GFP_KERNEL);
 
        spin_lock_init(&new->lock);
-       new->deleted = 0;
+       new->deleted = false;
        rcu_read_lock();
        spin_lock(&new->lock);
 
@@ -308,7 +295,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 
        if (next_id < 0) {
                new->seq = ids->seq++;
-               if (ids->seq > ids->seq_max)
+               if (ids->seq > IPCID_SEQ_MAX)
                        ids->seq = 0;
        } else {
                new->seq = ipcid_to_seqx(next_id);
@@ -320,14 +307,14 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 }
 
 /**
- *     ipcget_new      -       create a new ipc object
- *     @ns: namespace
- *     @ids: IPC identifer set
- *     @ops: the actual creation routine to call
- *     @params: its parameters
- *
- *     This routine is called by sys_msgget, sys_semget() and sys_shmget()
- *     when the key is IPC_PRIVATE.
+ * ipcget_new -        create a new ipc object
+ * @ns: ipc namespace
+ * @ids: ipc identifer set
+ * @ops: the actual creation routine to call
+ * @params: its parameters
+ *
+ * This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ * when the key is IPC_PRIVATE.
  */
 static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
                struct ipc_ops *ops, struct ipc_params *params)
@@ -341,19 +328,19 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
 }
 
 /**
- *     ipc_check_perms -       check security and permissions for an IPC
- *     @ns: IPC namespace
- *     @ipcp: ipc permission set
- *     @ops: the actual security routine to call
- *     @params: its parameters
+ * ipc_check_perms - check security and permissions for an ipc object
+ * @ns: ipc namespace
+ * @ipcp: ipc permission set
+ * @ops: the actual security routine to call
+ * @params: its parameters
  *
- *     This routine is called by sys_msgget(), sys_semget() and sys_shmget()
- *      when the key is not IPC_PRIVATE and that key already exists in the
- *      ids IDR.
+ * This routine is called by sys_msgget(), sys_semget() and sys_shmget()
+ * when the key is not IPC_PRIVATE and that key already exists in the
+ * ds IDR.
  *
- *     On success, the IPC id is returned.
+ * On success, the ipc id is returned.
  *
- *     It is called with ipc_ids.rwsem and ipcp->lock held.
+ * It is called with ipc_ids.rwsem and ipcp->lock held.
  */
 static int ipc_check_perms(struct ipc_namespace *ns,
                           struct kern_ipc_perm *ipcp,
@@ -374,96 +361,96 @@ static int ipc_check_perms(struct ipc_namespace *ns,
 }
 
 /**
- *     ipcget_public   -       get an ipc object or create a new one
- *     @ns: namespace
- *     @ids: IPC identifer set
- *     @ops: the actual creation routine to call
- *     @params: its parameters
- *
- *     This routine is called by sys_msgget, sys_semget() and sys_shmget()
- *     when the key is not IPC_PRIVATE.
- *     It adds a new entry if the key is not found and does some permission
- *      / security checkings if the key is found.
- *
- *     On success, the ipc id is returned.
+ * ipcget_public - get an ipc object or create a new one
+ * @ns: ipc namespace
+ * @ids: ipc identifer set
+ * @ops: the actual creation routine to call
+ * @params: its parameters
+ *
+ * This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ * when the key is not IPC_PRIVATE.
+ * It adds a new entry if the key is not found and does some permission
+ * / security checkings if the key is found.
+ *
+ * On success, the ipc id is returned.
  */
 static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
-               struct ipc_ops *ops, struct ipc_params *params)
+                        struct ipc_ops *ops, struct ipc_params *params)
 {
        struct kern_ipc_perm *ipcp;
        int flg = params->flg;
-       int err;
+       int err = 0;
 
-       /*
-        * Take the lock as a writer since we are potentially going to add
-        * a new entry + read locks are not "upgradable"
-        */
        down_write(&ids->rwsem);
        ipcp = ipc_findkey(ids, params->key);
-       if (ipcp == NULL) {
+
+       if (!ipcp) {
                /* key not used */
                if (!(flg & IPC_CREAT))
                        err = -ENOENT;
-               else
+               else /* create new ipc object */
                        err = ops->getnew(ns, params);
-       } else {
-               /* ipc object has been locked by ipc_findkey() */
-
-               if (flg & IPC_CREAT && flg & IPC_EXCL)
-                       err = -EEXIST;
-               else {
-                       err = 0;
-                       if (ops->more_checks)
-                               err = ops->more_checks(ipcp, params);
-                       if (!err)
-                               /*
-                                * ipc_check_perms returns the IPC id on
-                                * success
-                                */
-                               err = ipc_check_perms(ns, ipcp, ops, params);
-               }
+
+               goto done_write;
+       }
+
+       if ((flg & IPC_CREAT) && (flg & IPC_EXCL)) {
+               /* ipc object was locked by successful ipc_findkey() lookup */
                ipc_unlock(ipcp);
+               err = -ENOENT;
+
+               goto done_write;
        }
-       up_write(&ids->rwsem);
 
+       /*
+        * The key was found, so we will just perform routinary checks on
+        * ipc the object. Share the lock among other readers.
+        */
+       downgrade_write(&ids->rwsem);
+
+       if (ops->more_checks)
+               err = ops->more_checks(ipcp, params);
+       if (!err)
+               /* returns the IPC id on success */
+               err = ipc_check_perms(ns, ipcp, ops, params);
+
+       ipc_unlock(ipcp);
+
+       up_read(&ids->rwsem);
+       return err;
+done_write:
+       up_write(&ids->rwsem);
        return err;
 }
 
-
 /**
- *     ipc_rmid        -       remove an IPC identifier
- *     @ids: IPC identifier set
- *     @ipcp: ipc perm structure containing the identifier to remove
+ * ipc_rmid - remove an ipc identifier
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the identifier to remove
  *
- *     ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
- *     before this function is called, and remain locked on the exit.
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
  */
 void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
        int lid = ipcid_to_idx(ipcp->id);
 
        idr_remove(&ids->ipcs_idr, lid);
-
        ids->in_use--;
-
-       ipcp->deleted = 1;
-
-       return;
+       ipcp->deleted = true;
 }
 
 /**
- *     ipc_alloc       -       allocate ipc space
- *     @size: size desired
+ * ipc_alloc - allocate ipc space
+ * @size: size desired
  *
- *     Allocate memory from the appropriate pools and return a pointer to it.
- *     NULL is returned if the allocation fails
+ * Allocate memory from the appropriate pools and return a pointer to it.
+ * NULL is returned if the allocation fails
  */
 void *ipc_alloc(int size)
 {
        void *out;
-       if(size > PAGE_SIZE)
+       if (size > PAGE_SIZE)
                out = vmalloc(size);
        else
                out = kmalloc(size, GFP_KERNEL);
@@ -471,28 +458,27 @@ void *ipc_alloc(int size)
 }
 
 /**
- *     ipc_free        -       free ipc space
- *     @ptr: pointer returned by ipc_alloc
- *     @size: size of block
+ * ipc_free - free ipc space
+ * @ptr: pointer returned by ipc_alloc
+ * @size: size of block
  *
- *     Free a block created with ipc_alloc(). The caller must know the size
- *     used in the allocation call.
+ * Free a block created with ipc_alloc(). The caller must know the size
+ * used in the allocation call.
  */
-
-void ipc_free(void* ptr, int size)
+void ipc_free(void *ptr, int size)
 {
-       if(size > PAGE_SIZE)
+       if (size > PAGE_SIZE)
                vfree(ptr);
        else
                kfree(ptr);
 }
 
 /**
- *     ipc_rcu_alloc   -       allocate ipc and rcu space 
- *     @size: size desired
+ * ipc_rcu_alloc - allocate ipc and rcu space
+ * @size: size desired
  *
- *     Allocate memory for the rcu header structure +  the object.
- *     Returns the pointer to the object or NULL upon failure.
+ * Allocate memory for the rcu header structure +  the object.
+ * Returns the pointer to the object or NULL upon failure.
  */
 void *ipc_rcu_alloc(int size)
 {
@@ -534,17 +520,16 @@ void ipc_rcu_free(struct rcu_head *head)
 }
 
 /**
- *     ipcperms        -       check IPC permissions
- *     @ns: IPC namespace
- *     @ipcp: IPC permission set
- *     @flag: desired permission set.
+ * ipcperms - check ipc permissions
+ * @ns: ipc namespace
+ * @ipcp: ipc permission set
+ * @flag: desired permission set
  *
- *     Check user, group, other permissions for access
- *     to ipc resources. return 0 if allowed
+ * Check user, group, other permissions for access
+ * to ipc resources. return 0 if allowed
  *
- *     @flag will most probably be 0 or S_...UGO from <linux/stat.h>
+ * @flag will most probably be 0 or S_...UGO from <linux/stat.h>
  */
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag)
 {
        kuid_t euid = current_euid();
@@ -572,16 +557,14 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag)
  */
 
 /**
- *     kernel_to_ipc64_perm    -       convert kernel ipc permissions to user
- *     @in: kernel permissions
- *     @out: new style IPC permissions
+ * kernel_to_ipc64_perm        - convert kernel ipc permissions to user
+ * @in: kernel permissions
+ * @out: new style ipc permissions
  *
- *     Turn the kernel object @in into a set of permissions descriptions
- *     for returning to userspace (@out).
+ * Turn the kernel object @in into a set of permissions descriptions
+ * for returning to userspace (@out).
  */
-
-void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
+void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out)
 {
        out->key        = in->key;
        out->uid        = from_kuid_munged(current_user_ns(), in->uid);
@@ -593,15 +576,14 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
 }
 
 /**
- *     ipc64_perm_to_ipc_perm  -       convert new ipc permissions to old
- *     @in: new style IPC permissions
- *     @out: old style IPC permissions
+ * ipc64_perm_to_ipc_perm - convert new ipc permissions to old
+ * @in: new style ipc permissions
+ * @out: old style ipc permissions
  *
- *     Turn the new style permissions object @in into a compatibility
- *     object and store it into the @out pointer.
+ * Turn the new style permissions object @in into a compatibility
+ * object and store it into the @out pointer.
  */
-void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
+void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
 {
        out->key        = in->key;
        SET_UID(out->uid, in->uid);
@@ -635,8 +617,8 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
 }
 
 /**
- * ipc_lock - Lock an ipc structure without rwsem held
- * @ids: IPC identifier set
+ * ipc_lock - lock an ipc structure without rwsem held
+ * @ids: ipc identifier set
  * @id: ipc id to look for
  *
  * Look for an id in the ipc ids idr and lock the associated ipc object.
@@ -657,7 +639,7 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
        /* ipc_rmid() may have already freed the ID while ipc_lock
         * was spinning: here verify that the structure is still valid
         */
-       if (!out->deleted)
+       if (ipc_valid_object(out))
                return out;
 
        spin_unlock(&out->lock);
@@ -693,11 +675,11 @@ out:
 
 /**
  * ipcget - Common sys_*get() code
- * @ns : namsepace
- * @ids : IPC identifier set
- * @ops : operations to be called on ipc object creation, permission checks
- *        and further checks
- * @params : the parameters needed by the previous operations.
+ * @ns: namsepace
+ * @ids: ipc identifier set
+ * @ops: operations to be called on ipc object creation, permission checks
+ *       and further checks
+ * @params: the parameters needed by the previous operations.
  *
  * Common routine called by sys_msgget(), sys_semget() and sys_shmget().
  */
@@ -711,7 +693,7 @@ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
 }
 
 /**
- * ipc_update_perm - update the permissions of an IPC.
+ * ipc_update_perm - update the permissions of an ipc object
  * @in:  the permission given as input.
  * @out: the permission of the ipc to set.
  */
@@ -732,7 +714,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
 
 /**
  * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
- * @ns:  the ipc namespace
+ * @ns:  ipc namespace
  * @ids:  the table of ids where to look for the ipc
  * @id:   the id of the ipc to retrieve
  * @cmd:  the cmd to check
@@ -779,15 +761,14 @@ err:
 
 
 /**
- *     ipc_parse_version       -       IPC call version
- *     @cmd: pointer to command
+ * ipc_parse_version - ipc call version
+ * @cmd: pointer to command
  *
- *     Return IPC_64 for new style IPC and IPC_OLD for old style IPC. 
- *     The @cmd value is turned from an encoding command and version into
- *     just the command code.
+ * Return IPC_64 for new style IPC and IPC_OLD for old style IPC.
+ * The @cmd value is turned from an encoding command and version into
+ * just the command code.
  */
-int ipc_parse_version (int *cmd)
+int ipc_parse_version(int *cmd)
 {
        if (*cmd & IPC_64) {
                *cmd ^= IPC_64;
@@ -824,7 +805,7 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
        if (total >= ids->in_use)
                return NULL;
 
-       for ( ; pos < IPCMNI; pos++) {
+       for (; pos < IPCMNI; pos++) {
                ipc = idr_find(&ids->ipcs_idr, pos);
                if (ipc != NULL) {
                        *new_pos = pos + 1;
@@ -927,8 +908,10 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file)
                goto out;
 
        ret = seq_open(file, &sysvipc_proc_seqops);
-       if (ret)
-               goto out_kfree;
+       if (ret) {
+               kfree(iter);
+               goto out;
+       }
 
        seq = file->private_data;
        seq->private = iter;
@@ -937,9 +920,6 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file)
        iter->ns    = get_ipc_ns(current->nsproxy->ipc_ns);
 out:
        return ret;
-out_kfree:
-       kfree(iter);
-       goto out;
 }
 
 static int sysvipc_proc_release(struct inode *inode, struct file *file)
index 59d78aa949874aff138845ce13fb55c764205f9d..9c47d6f6c7b4b6c63b05b8a7c246ea3efcdeb0dc 100644 (file)
@@ -15,9 +15,9 @@
 
 #define SEQ_MULTIPLIER (IPCMNI)
 
-void sem_init (void);
-void msg_init (void);
-void shm_init (void);
+void sem_init(void);
+void msg_init(void);
+void shm_init(void);
 
 struct ipc_namespace;
 
@@ -100,6 +100,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 
 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
 #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
+#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX)
 
 /* must be called with ids->rwsem acquired for writing */
 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
@@ -116,8 +117,8 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 /* for rare, potentially huge allocations.
  * both function can sleep
  */
-voidipc_alloc(int size);
-void ipc_free(voidptr, int size);
+void *ipc_alloc(int size);
+void ipc_free(void *ptr, int size);
 
 /*
  * For allocation that need to be freed by RCU.
@@ -125,7 +126,7 @@ void ipc_free(void* ptr, int size);
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
  */
-voidipc_rcu_alloc(int size);
+void *ipc_rcu_alloc(int size);
 int ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
 void ipc_rcu_free(struct rcu_head *head);
@@ -144,7 +145,7 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
   /* On IA-64, we always use the "64-bit version" of the IPC structures.  */ 
 # define ipc_parse_version(cmd)        IPC_64
 #else
-int ipc_parse_version (int *cmd);
+int ipc_parse_version(int *cmd);
 #endif
 
 extern void free_msg(struct msg_msg *msg);
@@ -185,6 +186,19 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
        rcu_read_unlock();
 }
 
+/*
+ * ipc_valid_object() - helper to sort out IPC_RMID races for codepaths
+ * where the respective ipc_ids.rwsem is not being held down.
+ * Checks whether the ipc object is still around or if it's gone already, as
+ * ipc_rmid() may have already freed the ID while the ipc lock was spinning.
+ * Needs to be called with kern_ipc_perm.lock held -- exception made for one
+ * checkpoint case at sys_semtimedop() as noted in code commentary.
+ */
+static inline bool ipc_valid_object(struct kern_ipc_perm *perm)
+{
+       return !perm->deleted;
+}
+
 struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
                        struct ipc_ops *ops, struct ipc_params *params);
index 43c307dc9453d5c9166596d2303deaf099cbf5b0..67ccf0e7cca92412f457175d5ff3e784b254966a 100644 (file)
@@ -912,12 +912,13 @@ static void evict_chunk(struct audit_chunk *chunk)
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
+                                  struct inode *to_tell,
                                   struct fsnotify_mark *inode_mark,
-                                  struct fsnotify_mark *vfsmonut_mark,
-                                  struct fsnotify_event *event)
+                                  struct fsnotify_mark *vfsmount_mark,
+                                  u32 mask, void *data, int data_type,
+                                  const unsigned char *file_name)
 {
-       BUG();
-       return -EOPNOTSUPP;
+       return 0;
 }
 
 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
@@ -933,19 +934,8 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
        BUG_ON(atomic_read(&entry->refcnt) < 1);
 }
 
-static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
-                                 struct fsnotify_mark *inode_mark,
-                                 struct fsnotify_mark *vfsmount_mark,
-                                 __u32 mask, void *data, int data_type)
-{
-       return false;
-}
-
 static const struct fsnotify_ops audit_tree_ops = {
        .handle_event = audit_tree_handle_event,
-       .should_send_event = audit_tree_send_event,
-       .free_group_priv = NULL,
-       .free_event_priv = NULL,
        .freeing_mark = audit_tree_freeing_mark,
 };
 
index 22831c4d369c67d988b1f0702f9db51bac6275bf..2596fac5dcb4552a0d574decada597424df0587b 100644 (file)
@@ -465,35 +465,27 @@ void audit_remove_watch_rule(struct audit_krule *krule)
        }
 }
 
-static bool audit_watch_should_send_event(struct fsnotify_group *group, struct inode *inode,
-                                         struct fsnotify_mark *inode_mark,
-                                         struct fsnotify_mark *vfsmount_mark,
-                                         __u32 mask, void *data, int data_type)
-{
-       return true;
-}
-
 /* Update watch data in audit rules based on fsnotify events. */
 static int audit_watch_handle_event(struct fsnotify_group *group,
+                                   struct inode *to_tell,
                                    struct fsnotify_mark *inode_mark,
                                    struct fsnotify_mark *vfsmount_mark,
-                                   struct fsnotify_event *event)
+                                   u32 mask, void *data, int data_type,
+                                   const unsigned char *dname)
 {
        struct inode *inode;
-       __u32 mask = event->mask;
-       const char *dname = event->file_name;
        struct audit_parent *parent;
 
        parent = container_of(inode_mark, struct audit_parent, mark);
 
        BUG_ON(group != audit_watch_group);
 
-       switch (event->data_type) {
+       switch (data_type) {
        case (FSNOTIFY_EVENT_PATH):
-               inode = event->path.dentry->d_inode;
+               inode = ((struct path *)data)->dentry->d_inode;
                break;
        case (FSNOTIFY_EVENT_INODE):
-               inode = event->inode;
+               inode = (struct inode *)data;
                break;
        default:
                BUG();
@@ -512,11 +504,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
 }
 
 static const struct fsnotify_ops audit_watch_fsnotify_ops = {
-       .should_send_event =    audit_watch_should_send_event,
        .handle_event =         audit_watch_handle_event,
-       .free_group_priv =      NULL,
-       .freeing_mark =         NULL,
-       .free_event_priv =      NULL,
 };
 
 static int __init audit_watch_init(void)
index a949819055d51d5d4335544f81016770af6b687e..1e77fc6453174a5945ec786e0b0183b33dfcf507 100644 (file)
@@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
                __this_cpu_dec(process_counts);
        }
        list_del_rcu(&p->thread_group);
+       list_del_rcu(&p->thread_node);
 }
 
 /*
index 294189fc7ac8991f31c97e327fb8b19841c44968..a17621c6cd4272182a78083e736c5972c741ed7a 100644 (file)
@@ -800,14 +800,11 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  * Allocate a new mm structure and copy contents from the
  * mm structure of the passed in task structure.
  */
-struct mm_struct *dup_mm(struct task_struct *tsk)
+static struct mm_struct *dup_mm(struct task_struct *tsk)
 {
        struct mm_struct *mm, *oldmm = current->mm;
        int err;
 
-       if (!oldmm)
-               return NULL;
-
        mm = allocate_mm();
        if (!mm)
                goto fail_nomem;
@@ -1035,6 +1032,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->nr_threads = 1;
        atomic_set(&sig->live, 1);
        atomic_set(&sig->sigcnt, 1);
+
+       /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
+       sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
+       tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
+
        init_waitqueue_head(&sig->wait_chldexit);
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
@@ -1224,7 +1226,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (!try_module_get(task_thread_info(p)->exec_domain->module))
                goto bad_fork_cleanup_count;
 
-       p->did_exec = 0;
        delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
        copy_flags(clone_flags, p);
        INIT_LIST_HEAD(&p->children);
@@ -1474,6 +1475,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        atomic_inc(&current->signal->sigcnt);
                        list_add_tail_rcu(&p->thread_group,
                                          &p->group_leader->thread_group);
+                       list_add_tail_rcu(&p->thread_node,
+                                         &p->signal->thread_head);
                }
                attach_pid(p, PIDTYPE_PID);
                nr_threads++;
@@ -1647,7 +1650,7 @@ SYSCALL_DEFINE0(fork)
        return do_fork(SIGCHLD, 0, 0, NULL, NULL);
 #else
        /* can not support in nommu mode */
-       return(-EINVAL);
+       return -EINVAL;
 #endif
 }
 #endif
@@ -1655,7 +1658,7 @@ SYSCALL_DEFINE0(fork)
 #ifdef __ARCH_WANT_SYS_VFORK
 SYSCALL_DEFINE0(vfork)
 {
-       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 
+       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
                        0, NULL, NULL);
 }
 #endif
index 9328b80eaf14c347bb188bee856154ea07b86168..7899ee9dd212ecc031196ac9ed71e6b9d9cc08ac 100644 (file)
@@ -244,5 +244,4 @@ static int __init hung_task_init(void)
 
        return 0;
 }
-
-module_init(hung_task_init);
+subsys_initcall(hung_task_init);
index 9c970167e4025f01be6bd696499eec2504c72ef7..00232e288f6138064ab754dae2a1dfef95a2e2f6 100644 (file)
@@ -932,6 +932,7 @@ static int kimage_load_segment(struct kimage *image,
  */
 struct kimage *kexec_image;
 struct kimage *kexec_crash_image;
+int kexec_load_disabled;
 
 static DEFINE_MUTEX(kexec_mutex);
 
@@ -942,7 +943,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
        int result;
 
        /* We only trust the superuser with rebooting the system. */
-       if (!capable(CAP_SYS_BOOT))
+       if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
                return -EPERM;
 
        /*
@@ -1234,7 +1235,7 @@ static int __init crash_notes_memory_init(void)
        }
        return 0;
 }
-module_init(crash_notes_memory_init)
+subsys_initcall(crash_notes_memory_init);
 
 
 /*
@@ -1628,7 +1629,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        return 0;
 }
 
-module_init(crash_save_vmcoreinfo_init)
+subsys_initcall(crash_save_vmcoreinfo_init);
 
 /*
  * Move into place and start executing a preloaded standalone
index b086006c59e7c6957a51984a3ec101ea2db8525d..ed8e7bd35074a3a45148a33bc7897a96fef359e6 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ptrace.h>
 #include <linux/async.h>
 #include <asm/uaccess.h>
+#include <linux/kthread.h>
 
 #include <trace/events/module.h>
 
@@ -209,8 +210,14 @@ static int ____call_usermodehelper(void *data)
        flush_signal_handlers(current, 1);
        spin_unlock_irq(&current->sighand->siglock);
 
-       /* We can run anywhere, unlike our parent keventd(). */
-       set_cpus_allowed_ptr(current, cpu_all_mask);
+       /*
+        * Kthreadd can be restricted to a set of processors if the user wants
+        * to protect other processors from OS latencies. If that has happened
+        * then we do not want to disturb the other processors here either so we
+        * start the usermode helper threads only on the processors allowed for
+        * kthreadd.
+        */
+       set_kthreadd_affinity();
 
        /*
         * Our parent is keventd, which runs with elevated scheduling priority.
index 9659d38e008f7f4e35ed4fcfb9a866f61f00c72f..d945a949760f0ec4a9fd09313e30fb5f651bc39a 100644 (file)
@@ -126,7 +126,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
 {
        return sprintf(buf, "%lx %x\n",
                       paddr_vmcoreinfo_note(),
-                      (unsigned int)vmcoreinfo_max_size);
+                      (unsigned int)sizeof(vmcoreinfo_note));
 }
 KERNEL_ATTR_RO(vmcoreinfo);
 
index b5ae3ee860a9a520d1ea78d15ecdb94b75a02cca..232f06c4a5377f19a195aee717617099e4b970ed 100644 (file)
@@ -136,6 +136,15 @@ void *kthread_data(struct task_struct *task)
        return to_kthread(task)->data;
 }
 
+/*
+ * Set the affinity of the calling task to be the same
+ * as the kthreadd affinities.
+ */
+void set_kthreadd_affinity(void)
+{
+       set_cpus_allowed_ptr(current, &kthreadd_task->cpus_allowed);
+}
+
 /**
  * probe_kthread_data - speculative version of kthread_data()
  * @task: possible kthread task in question
index b38109e204aff8e83afb0e15484cdfec8e450575..d9f61a145802df2393f6041e935d00ea3f15ff8f 100644 (file)
@@ -637,7 +637,7 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
                BUG_ON(!region);
        } else
                /* This allocation cannot fail */
-               region = alloc_bootmem(sizeof(struct nosave_region));
+               region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
        region->start_pfn = start_pfn;
        region->end_pfn = end_pfn;
        list_add_tail(&region->list, &nosave_regions);
index be7c86bae5762e4e5283dcbd305951301310c58d..b1d255f041351779dacb5341dbcb0c0c4a09fb87 100644 (file)
@@ -757,14 +757,10 @@ void __init setup_log_buf(int early)
                return;
 
        if (early) {
-               unsigned long mem;
-
-               mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
-               if (!mem)
-                       return;
-               new_log_buf = __va(mem);
+               new_log_buf =
+                       memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
        } else {
-               new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
+               new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
        }
 
        if (unlikely(!new_log_buf)) {
@@ -1599,10 +1595,13 @@ asmlinkage int vprintk_emit(int facility, int level,
                 * either merge it with the current buffer and flush, or if
                 * there was a race with interrupts (prefix == true) then just
                 * flush it out and store this line separately.
+                * If the preceding printk was from a different task and missed
+                * a newline, flush and append the newline.
                 */
-               if (cont.len && cont.owner == current) {
-                       if (!(lflags & LOG_PREFIX))
-                               stored = cont_add(facility, level, text, text_len);
+               if (cont.len) {
+                       if (cont.owner == current && !(lflags & LOG_PREFIX))
+                               stored = cont_add(facility, level, text,
+                                                 text_len);
                        cont_flush(LOG_NEWLINE);
                }
 
index 6631e1ef55ab0970f095b42ec350982abd8d9678..b37576b22acc9c7599fd827c9f96cce83e11f663 100644 (file)
@@ -604,5 +604,5 @@ int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */
        hotcpu_notifier(profile_cpu_callback, 0);
        return 0;
 }
-module_init(create_proc_profile);
+subsys_initcall(create_proc_profile);
 #endif /* CONFIG_PROC_FS */
index de46c2098e363552cac0332963ecf13c3318362c..7f45fd52bc9ad6c9b6733dcfdda9993d5e59b4bf 100644 (file)
@@ -1119,6 +1119,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
        if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
                goto out;
 
+       trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
        ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
 
 out:
@@ -1780,7 +1781,29 @@ void set_numabalancing_state(bool enabled)
        numabalancing_enabled = enabled;
 }
 #endif /* CONFIG_SCHED_DEBUG */
-#endif /* CONFIG_NUMA_BALANCING */
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_numa_balancing(struct ctl_table *table, int write,
+                        void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table t;
+       int err;
+       int state = numabalancing_enabled;
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       t = *table;
+       t.data = &state;
+       err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+       if (err < 0)
+               return err;
+       if (write)
+               set_numabalancing_state(state);
+       return err;
+}
+#endif
+#endif
 
 /*
  * fork()/clone()-time setup:
@@ -4590,6 +4613,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
 
        /* TODO: This is not properly updating schedstats */
 
+       trace_sched_move_numa(p, curr_cpu, target_cpu);
        return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
 }
 
index b24b6cfde9aaba5ecfe038ff503e3cd5525f610e..867b0a4b08935a4ad2ae3f49f56acee481ce07ba 100644 (file)
@@ -1250,11 +1250,15 @@ static int task_numa_migrate(struct task_struct *p)
        p->numa_scan_period = task_scan_min(p);
 
        if (env.best_task == NULL) {
-               int ret = migrate_task_to(p, env.best_cpu);
+               ret = migrate_task_to(p, env.best_cpu);
+               if (ret != 0)
+                       trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
                return ret;
        }
 
        ret = migrate_swap(p, env.best_task);
+       if (ret != 0)
+               trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
        put_task_struct(env.best_task);
        return ret;
 }
index da98af347e8b6ed212e67086ae8576628c11d0dd..a476bea17fbc663974a07fa4521e6b590071fbef 100644 (file)
@@ -142,4 +142,4 @@ static int __init proc_schedstat_init(void)
        proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
        return 0;
 }
-module_init(proc_schedstat_init);
+subsys_initcall(proc_schedstat_init);
index 940b30ee9a30fb0cfb8eabdc186c2f2a4f27e7fb..52f881db1ca02a4190b46174e5d7ad458c7b5c33 100644 (file)
@@ -2047,8 +2047,8 @@ static bool do_signal_stop(int signr)
                if (task_set_jobctl_pending(current, signr | gstop))
                        sig->group_stop_count++;
 
-               for (t = next_thread(current); t != current;
-                    t = next_thread(t)) {
+               = current;
+               while_each_thread(current, t) {
                        /*
                         * Setting state to TASK_STOPPED for a group
                         * stop is always done with the siglock held,
@@ -3125,8 +3125,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
                        rm_from_queue_full(&mask, &t->signal->shared_pending);
                        do {
                                rm_from_queue_full(&mask, &t->pending);
-                               t = next_thread(t);
-                       } while (t != current);
+                       } while_each_thread(current, t);
                }
        }
 
index c72311324ea76ef2c9ae9514d2a30e4aa7c4be66..c0a58be780a407a5bf350e852db1c9800c98dd34 100644 (file)
@@ -895,8 +895,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
  * only important on a multi-user system anyway, to make sure one user
  * can't send a signal to a process owned by another.  -TYT, 12/12/91
  *
- * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
- * LBT 04.03.94
+ * !PF_FORKNOEXEC check to conform completely to POSIX.
  */
 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
 {
@@ -932,7 +931,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
                if (task_session(p) != task_session(group_leader))
                        goto out;
                err = -EACCES;
-               if (p->did_exec)
+               if (!(p->flags & PF_FORKNOEXEC))
                        goto out;
        } else {
                err = -ESRCH;
@@ -1572,8 +1571,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                        t = p;
                        do {
                                accumulate_thread_rusage(t, r);
-                               t = next_thread(t);
-                       } while (t != p);
+                       } while_each_thread(p, t);
                        break;
 
                default:
index c8da99f905cf522a34dd7ff059bde584e4d8c90a..096db7452cbd29c8250a49741d3c8b210b82cf74 100644 (file)
@@ -62,6 +62,7 @@
 #include <linux/capability.h>
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
+#include <linux/kexec.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -95,8 +96,6 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
-extern int sysctl_overcommit_memory;
-extern int sysctl_overcommit_ratio;
 extern int max_threads;
 extern int suid_dumpable;
 #ifdef CONFIG_COREDUMP
@@ -391,6 +390,15 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "numa_balancing",
+               .data           = NULL, /* filled in by handler */
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = sysctl_numa_balancing,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
        {
@@ -607,6 +615,18 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
+#ifdef CONFIG_KEXEC
+       {
+               .procname       = "kexec_load_disabled",
+               .data           = &kexec_load_disabled,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               /* only handle a transition from default "0" to "1" */
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &one,
+       },
+#endif
 #ifdef CONFIG_MODULES
        {
                .procname       = "modprobe",
@@ -1121,7 +1141,14 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_overcommit_ratio,
                .maxlen         = sizeof(sysctl_overcommit_ratio),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = overcommit_ratio_handler,
+       },
+       {
+               .procname       = "overcommit_kbytes",
+               .data           = &sysctl_overcommit_kbytes,
+               .maxlen         = sizeof(sysctl_overcommit_kbytes),
+               .mode           = 0644,
+               .proc_handler   = overcommit_kbytes_handler,
        },
        {
                .procname       = "page-cluster", 
index 0abb364642818e4ef842ab2f3631d15cb80400f2..e5387a069060673d74c0f463e8d42331b033358b 100644 (file)
@@ -132,6 +132,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
        clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600);
 
        r = rate;
+       /*
+        * Use 4MHz instead of 1MHz so that things like 1.832Mhz show as
+        * 1832Khz
+        */
        if (r >= 4000000) {
                r /= 1000000;
                r_unit = 'M';
index c006131beb77c5151f92cacb9e11e6a3b1defe1b..294fc6a9416845f7d5aa2b2d2029fe1b95bcbdbd 100644 (file)
@@ -222,5 +222,4 @@ static int __init uid_cache_init(void)
 
        return 0;
 }
-
-module_init(uid_cache_init);
+subsys_initcall(uid_cache_init);
index 240fb62cf3945aa0f7b601b343db65312a42f345..4f211868e6a2d58d4d02d4da6e2b9617b8957f60 100644 (file)
@@ -902,4 +902,4 @@ static __init int user_namespaces_init(void)
        user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
        return 0;
 }
-module_init(user_namespaces_init);
+subsys_initcall(user_namespaces_init);
index 4431610f049ac77888adefe3a335d47d4d232939..9cbaef929c2fdb281279d107d8d0f5fe240e1074 100644 (file)
@@ -158,14 +158,14 @@ void touch_all_softlockup_watchdogs(void)
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 void touch_nmi_watchdog(void)
 {
-       if (watchdog_user_enabled) {
-               unsigned cpu;
-
-               for_each_present_cpu(cpu) {
-                       if (per_cpu(watchdog_nmi_touch, cpu) != true)
-                               per_cpu(watchdog_nmi_touch, cpu) = true;
-               }
-       }
+       /*
+        * Using __raw here because some code paths have
+        * preemption enabled.  If preemption is enabled
+        * then interrupts should be enabled too, in which
+        * case we shouldn't have to worry about the watchdog
+        * going off.
+        */
+       __raw_get_cpu_var(watchdog_nmi_touch) = true;
        touch_softlockup_watchdog();
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
@@ -239,10 +239,12 @@ static void watchdog_overflow_callback(struct perf_event *event,
                if (__this_cpu_read(hard_watchdog_warn) == true)
                        return;
 
-               if (hardlockup_panic)
+               if (hardlockup_panic) {
+                       trigger_all_cpu_backtrace();
                        panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
-               else
+               } else {
                        WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+               }
 
                __this_cpu_write(hard_watchdog_warn, true);
                return;
@@ -323,8 +325,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                else
                        dump_stack();
 
-               if (softlockup_panic)
+               if (softlockup_panic) {
+                       trigger_all_cpu_backtrace();
                        panic("softlockup: hung tasks");
+               }
                __this_cpu_write(soft_watchdog_warn, true);
        } else
                __this_cpu_write(soft_watchdog_warn, false);
index 991c98bc4a3f51e9e7f377274084bec909483ea9..d2cbd6f749f5fdffd430b7f8ce6506f621afa9f2 100644 (file)
@@ -177,6 +177,13 @@ config CRC8
          when they need to do cyclic redundancy check according CRC8
          algorithm. Module will be called crc8.
 
+config CRC64_ECMA
+       tristate "CRC64 ECMA function"
+       help
+         This option provides CRC64 ECMA function. Drivers may select this
+         when they need to do cyclic redundancy check according to the CRC64
+         ECMA algorithm.
+
 config AUDIT_GENERIC
        bool
        depends on AUDIT && !AUDIT_ARCH
index 501ec4deaddb18fa35cac20c14b93a7519c94d5e..dbf94a7d25a8a1c7be60d62f29680e69e1261f1b 100644 (file)
@@ -1573,8 +1573,43 @@ config DMA_API_DEBUG
          With this option you will be able to detect common bugs in device
          drivers like double-freeing of DMA mappings or freeing mappings that
          were never allocated.
-         This option causes a performance degredation.  Use only if you want
-         to debug device drivers. If unsure, say N.
+
+         This also attempts to catch cases where a page owned by DMA is
+         accessed by the cpu in a way that could cause data corruption.  For
+         example, this enables cow_user_page() to check that the source page is
+         not undergoing DMA.
+
+         This option causes a performance degradation.  Use only if you want to
+         debug device drivers and dma interactions.
+
+         If unsure, say N.
+
+config TEST_MODULE
+       tristate "Test module loading with 'hello world' module"
+       default n
+       depends on m
+       help
+         This builds the "test_module" module that emits "Hello, world"
+         on printk when loaded. It is designed to be used for basic
+         evaluation of the module loading subsystem (for example when
+         validating module verification). It lacks any extra dependencies,
+         and will not normally be loaded by the system unless explicitly
+         requested by name.
+
+         If unsure, say N.
+
+config TEST_USER_COPY
+       tristate "Test user/kernel boundary protections"
+       default n
+       depends on m
+       help
+         This builds the "test_user_copy" module that runs sanity checks
+         on the copy_to/from_user infrastructure, making sure basic
+         user/kernel boundary testing is working. If it fails to load,
+         a regression has been detected in the user/kernel memory boundary
+         protections.
+
+         If unsure, say N.
 
 source "samples/Kconfig"
 
index d0f79c547d97953549f2420d9b9b52edd6a77ed5..7a42a946cab22267fb4537e14dfa18bfec4e4206 100644 (file)
@@ -31,6 +31,8 @@ obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
+obj-$(CONFIG_TEST_MODULE) += test_module.o
+obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -66,6 +68,7 @@ obj-$(CONFIG_CRC32)   += crc32.o
 obj-$(CONFIG_CRC7)     += crc7.o
 obj-$(CONFIG_LIBCRC32C)        += libcrc32c.o
 obj-$(CONFIG_CRC8)     += crc8.o
+obj-$(CONFIG_CRC64_ECMA)       += crc64_ecma.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
index 1b6a44f1ec3e3b10f3b02d19f8ab0ce59453f6d5..c0b1007011e188836616509deef5876c207d32d4 100644 (file)
@@ -157,7 +157,7 @@ enum assoc_array_walk_status {
        assoc_array_walk_tree_empty,
        assoc_array_walk_found_terminal_node,
        assoc_array_walk_found_wrong_shortcut,
-} status;
+};
 
 struct assoc_array_walk_result {
        struct {
index eb6791188cf51a19b22861ea23909f93be08935d..d4932f745e9214aaf62d8ad54b0f1092dd555414 100644 (file)
@@ -49,13 +49,13 @@ static int get_range(char **str, int *pint)
  *     3 - hyphen found to denote a range
  */
 
-int get_option (char **str, int *pint)
+int get_option(char **str, int *pint)
 {
        char *cur = *str;
 
        if (!cur || !(*cur))
                return 0;
-       *pint = simple_strtol (cur, str, 0);
+       *pint = simple_strtol(cur, str, 0);
        if (cur == *str)
                return 0;
        if (**str == ',') {
@@ -67,6 +67,7 @@ int get_option (char **str, int *pint)
 
        return 1;
 }
+EXPORT_SYMBOL(get_option);
 
 /**
  *     get_options - Parse a string into a list of integers
@@ -84,13 +85,13 @@ int get_option (char **str, int *pint)
  *     the parse to end (typically a null terminator, if @str is
  *     completely parseable).
  */
+
 char *get_options(const char *str, int nints, int *ints)
 {
        int res, i = 1;
 
        while (i < nints) {
-               res = get_option ((char **)&str, ints + i);
+               res = get_option((char **)&str, ints + i);
                if (res == 0)
                        break;
                if (res == 3) {
@@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints)
        ints[0] = i - 1;
        return (char *)str;
 }
+EXPORT_SYMBOL(get_options);
 
 /**
  *     memparse - parse a string with mem suffixes into a number
@@ -152,8 +154,4 @@ unsigned long long memparse(const char *ptr, char **retptr)
 
        return ret;
 }
-
-
 EXPORT_SYMBOL(memparse);
-EXPORT_SYMBOL(get_option);
-EXPORT_SYMBOL(get_options);
index d327b87c99b7ca28a222c20e6d05975bfa912e9e..b810b753c607500a9aab3aedd3060cb6c35189fd 100644 (file)
@@ -140,7 +140,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-       *mask = alloc_bootmem(cpumask_size());
+       *mask = memblock_virt_alloc(cpumask_size(), 0);
 }
 
 /**
@@ -161,6 +161,6 @@ EXPORT_SYMBOL(free_cpumask_var);
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-       free_bootmem(__pa(mask), cpumask_size());
+       memblock_free_early(__pa(mask), cpumask_size());
 }
 #endif
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644 (file)
index 0000000..41629ea
--- /dev/null
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK                        0xFF
+#define CRC64_TABLE_SIZE               256
+
+
+struct crc64_table {
+       u64 seed;
+       u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+       CRC64_DEFAULT_INITVAL,
+       {
+               0x0000000000000000ULL,
+               0xb32e4cbe03a75f6fULL,
+               0xf4843657a840a05bULL,
+               0x47aa7ae9abe7ff34ULL,
+               0x7bd0c384ff8f5e33ULL,
+               0xc8fe8f3afc28015cULL,
+               0x8f54f5d357cffe68ULL,
+               0x3c7ab96d5468a107ULL,
+               0xf7a18709ff1ebc66ULL,
+               0x448fcbb7fcb9e309ULL,
+               0x0325b15e575e1c3dULL,
+               0xb00bfde054f94352ULL,
+               0x8c71448d0091e255ULL,
+               0x3f5f08330336bd3aULL,
+               0x78f572daa8d1420eULL,
+               0xcbdb3e64ab761d61ULL,
+               0x7d9ba13851336649ULL,
+               0xceb5ed8652943926ULL,
+               0x891f976ff973c612ULL,
+               0x3a31dbd1fad4997dULL,
+               0x064b62bcaebc387aULL,
+               0xb5652e02ad1b6715ULL,
+               0xf2cf54eb06fc9821ULL,
+               0x41e11855055bc74eULL,
+               0x8a3a2631ae2dda2fULL,
+               0x39146a8fad8a8540ULL,
+               0x7ebe1066066d7a74ULL,
+               0xcd905cd805ca251bULL,
+               0xf1eae5b551a2841cULL,
+               0x42c4a90b5205db73ULL,
+               0x056ed3e2f9e22447ULL,
+               0xb6409f5cfa457b28ULL,
+               0xfb374270a266cc92ULL,
+               0x48190ecea1c193fdULL,
+               0x0fb374270a266cc9ULL,
+               0xbc9d3899098133a6ULL,
+               0x80e781f45de992a1ULL,
+               0x33c9cd4a5e4ecdceULL,
+               0x7463b7a3f5a932faULL,
+               0xc74dfb1df60e6d95ULL,
+               0x0c96c5795d7870f4ULL,
+               0xbfb889c75edf2f9bULL,
+               0xf812f32ef538d0afULL,
+               0x4b3cbf90f69f8fc0ULL,
+               0x774606fda2f72ec7ULL,
+               0xc4684a43a15071a8ULL,
+               0x83c230aa0ab78e9cULL,
+               0x30ec7c140910d1f3ULL,
+               0x86ace348f355aadbULL,
+               0x3582aff6f0f2f5b4ULL,
+               0x7228d51f5b150a80ULL,
+               0xc10699a158b255efULL,
+               0xfd7c20cc0cdaf4e8ULL,
+               0x4e526c720f7dab87ULL,
+               0x09f8169ba49a54b3ULL,
+               0xbad65a25a73d0bdcULL,
+               0x710d64410c4b16bdULL,
+               0xc22328ff0fec49d2ULL,
+               0x85895216a40bb6e6ULL,
+               0x36a71ea8a7ace989ULL,
+               0x0adda7c5f3c4488eULL,
+               0xb9f3eb7bf06317e1ULL,
+               0xfe5991925b84e8d5ULL,
+               0x4d77dd2c5823b7baULL,
+               0x64b62bcaebc387a1ULL,
+               0xd7986774e864d8ceULL,
+               0x90321d9d438327faULL,
+               0x231c512340247895ULL,
+               0x1f66e84e144cd992ULL,
+               0xac48a4f017eb86fdULL,
+               0xebe2de19bc0c79c9ULL,
+               0x58cc92a7bfab26a6ULL,
+               0x9317acc314dd3bc7ULL,
+               0x2039e07d177a64a8ULL,
+               0x67939a94bc9d9b9cULL,
+               0xd4bdd62abf3ac4f3ULL,
+               0xe8c76f47eb5265f4ULL,
+               0x5be923f9e8f53a9bULL,
+               0x1c4359104312c5afULL,
+               0xaf6d15ae40b59ac0ULL,
+               0x192d8af2baf0e1e8ULL,
+               0xaa03c64cb957be87ULL,
+               0xeda9bca512b041b3ULL,
+               0x5e87f01b11171edcULL,
+               0x62fd4976457fbfdbULL,
+               0xd1d305c846d8e0b4ULL,
+               0x96797f21ed3f1f80ULL,
+               0x2557339fee9840efULL,
+               0xee8c0dfb45ee5d8eULL,
+               0x5da24145464902e1ULL,
+               0x1a083bacedaefdd5ULL,
+               0xa9267712ee09a2baULL,
+               0x955cce7fba6103bdULL,
+               0x267282c1b9c65cd2ULL,
+               0x61d8f8281221a3e6ULL,
+               0xd2f6b4961186fc89ULL,
+               0x9f8169ba49a54b33ULL,
+               0x2caf25044a02145cULL,
+               0x6b055fede1e5eb68ULL,
+               0xd82b1353e242b407ULL,
+               0xe451aa3eb62a1500ULL,
+               0x577fe680b58d4a6fULL,
+               0x10d59c691e6ab55bULL,
+               0xa3fbd0d71dcdea34ULL,
+               0x6820eeb3b6bbf755ULL,
+               0xdb0ea20db51ca83aULL,
+               0x9ca4d8e41efb570eULL,
+               0x2f8a945a1d5c0861ULL,
+               0x13f02d374934a966ULL,
+               0xa0de61894a93f609ULL,
+               0xe7741b60e174093dULL,
+               0x545a57dee2d35652ULL,
+               0xe21ac88218962d7aULL,
+               0x5134843c1b317215ULL,
+               0x169efed5b0d68d21ULL,
+               0xa5b0b26bb371d24eULL,
+               0x99ca0b06e7197349ULL,
+               0x2ae447b8e4be2c26ULL,
+               0x6d4e3d514f59d312ULL,
+               0xde6071ef4cfe8c7dULL,
+               0x15bb4f8be788911cULL,
+               0xa6950335e42fce73ULL,
+               0xe13f79dc4fc83147ULL,
+               0x521135624c6f6e28ULL,
+               0x6e6b8c0f1807cf2fULL,
+               0xdd45c0b11ba09040ULL,
+               0x9aefba58b0476f74ULL,
+               0x29c1f6e6b3e0301bULL,
+               0xc96c5795d7870f42ULL,
+               0x7a421b2bd420502dULL,
+               0x3de861c27fc7af19ULL,
+               0x8ec62d7c7c60f076ULL,
+               0xb2bc941128085171ULL,
+               0x0192d8af2baf0e1eULL,
+               0x4638a2468048f12aULL,
+               0xf516eef883efae45ULL,
+               0x3ecdd09c2899b324ULL,
+               0x8de39c222b3eec4bULL,
+               0xca49e6cb80d9137fULL,
+               0x7967aa75837e4c10ULL,
+               0x451d1318d716ed17ULL,
+               0xf6335fa6d4b1b278ULL,
+               0xb199254f7f564d4cULL,
+               0x02b769f17cf11223ULL,
+               0xb4f7f6ad86b4690bULL,
+               0x07d9ba1385133664ULL,
+               0x4073c0fa2ef4c950ULL,
+               0xf35d8c442d53963fULL,
+               0xcf273529793b3738ULL,
+               0x7c0979977a9c6857ULL,
+               0x3ba3037ed17b9763ULL,
+               0x888d4fc0d2dcc80cULL,
+               0x435671a479aad56dULL,
+               0xf0783d1a7a0d8a02ULL,
+               0xb7d247f3d1ea7536ULL,
+               0x04fc0b4dd24d2a59ULL,
+               0x3886b22086258b5eULL,
+               0x8ba8fe9e8582d431ULL,
+               0xcc0284772e652b05ULL,
+               0x7f2cc8c92dc2746aULL,
+               0x325b15e575e1c3d0ULL,
+               0x8175595b76469cbfULL,
+               0xc6df23b2dda1638bULL,
+               0x75f16f0cde063ce4ULL,
+               0x498bd6618a6e9de3ULL,
+               0xfaa59adf89c9c28cULL,
+               0xbd0fe036222e3db8ULL,
+               0x0e21ac88218962d7ULL,
+               0xc5fa92ec8aff7fb6ULL,
+               0x76d4de52895820d9ULL,
+               0x317ea4bb22bfdfedULL,
+               0x8250e80521188082ULL,
+               0xbe2a516875702185ULL,
+               0x0d041dd676d77eeaULL,
+               0x4aae673fdd3081deULL,
+               0xf9802b81de97deb1ULL,
+               0x4fc0b4dd24d2a599ULL,
+               0xfceef8632775faf6ULL,
+               0xbb44828a8c9205c2ULL,
+               0x086ace348f355aadULL,
+               0x34107759db5dfbaaULL,
+               0x873e3be7d8faa4c5ULL,
+               0xc094410e731d5bf1ULL,
+               0x73ba0db070ba049eULL,
+               0xb86133d4dbcc19ffULL,
+               0x0b4f7f6ad86b4690ULL,
+               0x4ce50583738cb9a4ULL,
+               0xffcb493d702be6cbULL,
+               0xc3b1f050244347ccULL,
+               0x709fbcee27e418a3ULL,
+               0x3735c6078c03e797ULL,
+               0x841b8ab98fa4b8f8ULL,
+               0xadda7c5f3c4488e3ULL,
+               0x1ef430e13fe3d78cULL,
+               0x595e4a08940428b8ULL,
+               0xea7006b697a377d7ULL,
+               0xd60abfdbc3cbd6d0ULL,
+               0x6524f365c06c89bfULL,
+               0x228e898c6b8b768bULL,
+               0x91a0c532682c29e4ULL,
+               0x5a7bfb56c35a3485ULL,
+               0xe955b7e8c0fd6beaULL,
+               0xaeffcd016b1a94deULL,
+               0x1dd181bf68bdcbb1ULL,
+               0x21ab38d23cd56ab6ULL,
+               0x9285746c3f7235d9ULL,
+               0xd52f0e859495caedULL,
+               0x6601423b97329582ULL,
+               0xd041dd676d77eeaaULL,
+               0x636f91d96ed0b1c5ULL,
+               0x24c5eb30c5374ef1ULL,
+               0x97eba78ec690119eULL,
+               0xab911ee392f8b099ULL,
+               0x18bf525d915feff6ULL,
+               0x5f1528b43ab810c2ULL,
+               0xec3b640a391f4fadULL,
+               0x27e05a6e926952ccULL,
+               0x94ce16d091ce0da3ULL,
+               0xd3646c393a29f297ULL,
+               0x604a2087398eadf8ULL,
+               0x5c3099ea6de60cffULL,
+               0xef1ed5546e415390ULL,
+               0xa8b4afbdc5a6aca4ULL,
+               0x1b9ae303c601f3cbULL,
+               0x56ed3e2f9e224471ULL,
+               0xe5c372919d851b1eULL,
+               0xa26908783662e42aULL,
+               0x114744c635c5bb45ULL,
+               0x2d3dfdab61ad1a42ULL,
+               0x9e13b115620a452dULL,
+               0xd9b9cbfcc9edba19ULL,
+               0x6a978742ca4ae576ULL,
+               0xa14cb926613cf817ULL,
+               0x1262f598629ba778ULL,
+               0x55c88f71c97c584cULL,
+               0xe6e6c3cfcadb0723ULL,
+               0xda9c7aa29eb3a624ULL,
+               0x69b2361c9d14f94bULL,
+               0x2e184cf536f3067fULL,
+               0x9d36004b35545910ULL,
+               0x2b769f17cf112238ULL,
+               0x9858d3a9ccb67d57ULL,
+               0xdff2a94067518263ULL,
+               0x6cdce5fe64f6dd0cULL,
+               0x50a65c93309e7c0bULL,
+               0xe388102d33392364ULL,
+               0xa4226ac498dedc50ULL,
+               0x170c267a9b79833fULL,
+               0xdcd7181e300f9e5eULL,
+               0x6ff954a033a8c131ULL,
+               0x28532e49984f3e05ULL,
+               0x9b7d62f79be8616aULL,
+               0xa707db9acf80c06dULL,
+               0x14299724cc279f02ULL,
+               0x5383edcd67c06036ULL,
+               0xe0ada17364673f59ULL
+       }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+       return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+       unsigned int i;
+       u64 crc = seed;
+
+       for (i = 0; i < nbytes; i++)
+               crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+                       (crc >> 8);
+
+       return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
index 3e67cfad16ad53614508e126d87f4d0024128577..7d1e83caf8ad8512c3a0d39e4f5e5b5c39de619f 100644 (file)
@@ -141,6 +141,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
                        goto exit_2;
                }
 
+               ret = -1;
                if (flush && flush(outp, dest_len) != dest_len)
                        goto exit_2;
                if (output)
index d87a17a819d07a58bc8f1dd40f2d41ae499935c8..c38083871f11dbaf3f7252674b937fe5e161380a 100644 (file)
@@ -53,11 +53,26 @@ enum map_err_types {
 
 #define DMA_DEBUG_STACKTRACE_ENTRIES 5
 
+/**
+ * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
+ * @list: node on pre-allocated free_entries list
+ * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
+ * @type: single, page, sg, coherent
+ * @pfn: page frame of the start address
+ * @offset: offset of mapping relative to pfn
+ * @size: length of the mapping
+ * @direction: enum dma_data_direction
+ * @sg_call_ents: 'nents' from dma_map_sg
+ * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
+ * @map_err_type: track whether dma_mapping_error() was checked
+ * @stacktrace: support backtraces when a violation is detected
+ */
 struct dma_debug_entry {
        struct list_head list;
        struct device    *dev;
        int              type;
-       phys_addr_t      paddr;
+       unsigned long    pfn;
+       size_t           offset;
        u64              dev_addr;
        u64              size;
        int              direction;
@@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
        list_del(&entry->list);
 }
 
+static unsigned long long phys_addr(struct dma_debug_entry *entry)
+{
+       return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
+}
+
 /*
  * Dump mapping entries for debugging purposes
  */
@@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev)
                list_for_each_entry(entry, &bucket->list, list) {
                        if (!dev || dev == entry->dev) {
                                dev_info(entry->dev,
-                                        "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
+                                        "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
                                         type2name[entry->type], idx,
-                                        (unsigned long long)entry->paddr,
+                                        phys_addr(entry), entry->pfn,
                                         entry->dev_addr, entry->size,
                                         dir2name[entry->direction],
                                         maperr2str[entry->map_err_type]);
@@ -403,6 +423,133 @@ void debug_dma_dump_mappings(struct device *dev)
 }
 EXPORT_SYMBOL(debug_dma_dump_mappings);
 
+/*
+ * For each page mapped (initial page in the case of
+ * dma_alloc_coherent/dma_map_{single|page}, or each page in a
+ * scatterlist) insert into this tree using the pfn as the key. At
+ * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
+ * the pfn already exists at insertion time add a tag as a reference
+ * count for the overlapping mappings.  For now, the overlap tracking
+ * just ensures that 'unmaps' balance 'maps' before marking the pfn
+ * idle, but we should also be flagging overlaps as an API violation.
+ *
+ * Memory usage is mostly constrained by the maximum number of available
+ * dma-debug entries in that we need a free dma_debug_entry before
+ * inserting into the tree.  In the case of dma_map_{single|page} and
+ * dma_alloc_coherent there is only one dma_debug_entry and one pfn to
+ * track per event.  dma_map_sg(), on the other hand,
+ * consumes a single dma_debug_entry, but inserts 'nents' entries into
+ * the tree.
+ *
+ * At any time debug_dma_assert_idle() can be called to trigger a
+ * warning if the given page is in the active set.
+ */
+static RADIX_TREE(dma_active_pfn, GFP_NOWAIT);
+static DEFINE_SPINLOCK(radix_lock);
+#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+
+static int active_pfn_read_overlap(unsigned long pfn)
+{
+       int overlap = 0, i;
+
+       for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+               if (radix_tree_tag_get(&dma_active_pfn, pfn, i))
+                       overlap |= 1 << i;
+       return overlap;
+}
+
+static int active_pfn_set_overlap(unsigned long pfn, int overlap)
+{
+       int i;
+
+       if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0)
+               return 0;
+
+       for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+               if (overlap & 1 << i)
+                       radix_tree_tag_set(&dma_active_pfn, pfn, i);
+               else
+                       radix_tree_tag_clear(&dma_active_pfn, pfn, i);
+
+       return overlap;
+}
+
+static void active_pfn_inc_overlap(unsigned long pfn)
+{
+       int overlap = active_pfn_read_overlap(pfn);
+
+       overlap = active_pfn_set_overlap(pfn, ++overlap);
+
+       /* If we overflowed the overlap counter then we're potentially
+        * leaking dma-mappings.  Otherwise, if maps and unmaps are
+        * balanced then this overflow may cause false negatives in
+        * debug_dma_assert_idle() as the pfn may be marked idle
+        * prematurely.
+        */
+       WARN_ONCE(overlap == 0,
+                 "DMA-API: exceeded %d overlapping mappings of pfn %lx\n",
+                 ACTIVE_PFN_MAX_OVERLAP, pfn);
+}
+
+static int active_pfn_dec_overlap(unsigned long pfn)
+{
+       int overlap = active_pfn_read_overlap(pfn);
+
+       return active_pfn_set_overlap(pfn, --overlap);
+}
+
+static int active_pfn_insert(struct dma_debug_entry *entry)
+{
+       unsigned long flags;
+       int rc;
+
+       spin_lock_irqsave(&radix_lock, flags);
+       rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry);
+       if (rc == -EEXIST)
+               active_pfn_inc_overlap(entry->pfn);
+       spin_unlock_irqrestore(&radix_lock, flags);
+
+       return rc;
+}
+
+static void active_pfn_remove(struct dma_debug_entry *entry)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&radix_lock, flags);
+       if (active_pfn_dec_overlap(entry->pfn) == 0)
+               radix_tree_delete(&dma_active_pfn, entry->pfn);
+       spin_unlock_irqrestore(&radix_lock, flags);
+}
+
+/**
+ * debug_dma_assert_idle() - assert that a page is not undergoing dma
+ * @page: page to lookup in the dma_active_pfn tree
+ *
+ * Place a call to this routine in cases where the cpu touching the page
+ * before the dma completes (page is dma_unmapped) will lead to data
+ * corruption.
+ */
+void debug_dma_assert_idle(struct page *page)
+{
+       unsigned long flags;
+       struct dma_debug_entry *entry;
+
+       if (!page)
+               return;
+
+       spin_lock_irqsave(&radix_lock, flags);
+       entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page));
+       spin_unlock_irqrestore(&radix_lock, flags);
+
+       if (!entry)
+               return;
+
+       err_printk(entry->dev, entry,
+                  "DMA-API: cpu touching an active dma mapped page "
+                  "[pfn=0x%lx]\n", entry->pfn);
+}
+
 /*
  * Wrapper function for adding an entry to the hash.
  * This function takes care of locking itself.
@@ -411,10 +558,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
 {
        struct hash_bucket *bucket;
        unsigned long flags;
+       int rc;
 
        bucket = get_hash_bucket(entry, &flags);
        hash_bucket_add(bucket, entry);
        put_hash_bucket(bucket, &flags);
+
+       rc = active_pfn_insert(entry);
+       if (rc == -ENOMEM) {
+               pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n");
+               global_disable = true;
+       }
+
+       /* TODO: report -EEXIST errors here as overlapping mappings are
+        * not supported by the DMA API
+        */
 }
 
 static struct dma_debug_entry *__dma_entry_alloc(void)
@@ -469,6 +627,8 @@ static void dma_entry_free(struct dma_debug_entry *entry)
 {
        unsigned long flags;
 
+       active_pfn_remove(entry);
+
        /*
         * add to beginning of the list - this way the entries are
         * more likely cache hot when they are reallocated.
@@ -895,15 +1055,15 @@ static void check_unmap(struct dma_debug_entry *ref)
                           ref->dev_addr, ref->size,
                           type2name[entry->type], type2name[ref->type]);
        } else if ((entry->type == dma_debug_coherent) &&
-                  (ref->paddr != entry->paddr)) {
+                  (phys_addr(ref) != phys_addr(entry))) {
                err_printk(ref->dev, entry, "DMA-API: device driver frees "
                           "DMA memory with different CPU address "
                           "[device address=0x%016llx] [size=%llu bytes] "
                           "[cpu alloc address=0x%016llx] "
                           "[cpu free address=0x%016llx]",
                           ref->dev_addr, ref->size,
-                          (unsigned long long)entry->paddr,
-                          (unsigned long long)ref->paddr);
+                          phys_addr(entry),
+                          phys_addr(ref));
        }
 
        if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -1052,7 +1212,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 
        entry->dev       = dev;
        entry->type      = dma_debug_page;
-       entry->paddr     = page_to_phys(page) + offset;
+       entry->pfn       = page_to_pfn(page);
+       entry->offset    = offset,
        entry->dev_addr  = dma_addr;
        entry->size      = size;
        entry->direction = direction;
@@ -1148,7 +1309,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
                entry->type           = dma_debug_sg;
                entry->dev            = dev;
-               entry->paddr          = sg_phys(s);
+               entry->pfn            = page_to_pfn(sg_page(s));
+               entry->offset         = s->offset,
                entry->size           = sg_dma_len(s);
                entry->dev_addr       = sg_dma_address(s);
                entry->direction      = direction;
@@ -1198,7 +1360,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
                struct dma_debug_entry ref = {
                        .type           = dma_debug_sg,
                        .dev            = dev,
-                       .paddr          = sg_phys(s),
+                       .pfn            = page_to_pfn(sg_page(s)),
+                       .offset         = s->offset,
                        .dev_addr       = sg_dma_address(s),
                        .size           = sg_dma_len(s),
                        .direction      = dir,
@@ -1233,7 +1396,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 
        entry->type      = dma_debug_coherent;
        entry->dev       = dev;
-       entry->paddr     = virt_to_phys(virt);
+       entry->pfn       = page_to_pfn(virt_to_page(virt));
+       entry->offset    = (size_t) virt & PAGE_MASK;
        entry->size      = size;
        entry->dev_addr  = dma_addr;
        entry->direction = DMA_BIDIRECTIONAL;
@@ -1248,7 +1412,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
        struct dma_debug_entry ref = {
                .type           = dma_debug_coherent,
                .dev            = dev,
-               .paddr          = virt_to_phys(virt),
+               .pfn            = page_to_pfn(virt_to_page(virt)),
+               .offset         = (size_t) virt & PAGE_MASK,
                .dev_addr       = addr,
                .size           = size,
                .direction      = DMA_BIDIRECTIONAL,
@@ -1356,7 +1521,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                struct dma_debug_entry ref = {
                        .type           = dma_debug_sg,
                        .dev            = dev,
-                       .paddr          = sg_phys(s),
+                       .pfn            = page_to_pfn(sg_page(s)),
+                       .offset         = s->offset,
                        .dev_addr       = sg_dma_address(s),
                        .size           = sg_dma_len(s),
                        .direction      = direction,
@@ -1388,7 +1554,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                struct dma_debug_entry ref = {
                        .type           = dma_debug_sg,
                        .dev            = dev,
-                       .paddr          = sg_phys(s),
+                       .pfn            = page_to_pfn(sg_page(s)),
+                       .offset         = s->offset,
                        .dev_addr       = sg_dma_address(s),
                        .size           = sg_dma_len(s),
                        .direction      = direction,
index c37aeacd7651b0ace2db9ffd85bad2ec45171d76..600ac57e27777f429b77279d93cb9a483f1fbd15 100644 (file)
@@ -8,6 +8,7 @@
  * By Greg Banks <gnb@melbourne.sgi.com>
  * Copyright (c) 2008 Silicon Graphics Inc.  All Rights Reserved.
  * Copyright (C) 2011 Bart Van Assche.  All Rights Reserved.
+ * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com>
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
@@ -24,6 +25,7 @@
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
+#include <linux/parser.h>
 #include <linux/string_helpers.h>
 #include <linux/uaccess.h>
 #include <linux/dynamic_debug.h>
@@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query,
        list_for_each_entry(dt, &ddebug_tables, link) {
 
                /* match against the module name */
-               if (query->module && strcmp(query->module, dt->mod_name))
+               if (query->module &&
+                   !match_wildcard(query->module, dt->mod_name))
                        continue;
 
                for (i = 0; i < dt->num_ddebugs; i++) {
@@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query,
 
                        /* match against the source filename */
                        if (query->filename &&
-                           strcmp(query->filename, dp->filename) &&
-                           strcmp(query->filename, kbasename(dp->filename)) &&
-                           strcmp(query->filename, trim_prefix(dp->filename)))
+                           !match_wildcard(query->filename, dp->filename) &&
+                           !match_wildcard(query->filename,
+                                          kbasename(dp->filename)) &&
+                           !match_wildcard(query->filename,
+                                          trim_prefix(dp->filename)))
                                continue;
 
                        /* match against the function */
                        if (query->function &&
-                           strcmp(query->function, dp->function))
+                           !match_wildcard(query->function, dp->function))
                                continue;
 
                        /* match against the format */
index f78ae0c0c4e2257cf96ab3a67b11f6bd6d9d11e0..ec8da78df9be9f4ea245ff398193bd1d90210573 100644 (file)
@@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
        rv = _parse_integer(s, base, &_res);
        if (rv & KSTRTOX_OVERFLOW)
                return -ERANGE;
-       rv &= ~KSTRTOX_OVERFLOW;
        if (rv == 0)
                return -EINVAL;
        s += rv;
index 807b2aaa33fa42488c88e1adff522fa17b4368f3..b6d11631231b67e330b03eae74ecdf8373073866 100644 (file)
@@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[])
 
        return p->token;
 }
+EXPORT_SYMBOL(match_token);
 
 /**
  * match_number: scan a number in the given base from a substring_t
@@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result)
 {
        return match_number(s, result, 0);
 }
+EXPORT_SYMBOL(match_int);
 
 /**
  * match_octal: - scan an octal representation of an integer from a substring_t
@@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result)
 {
        return match_number(s, result, 8);
 }
+EXPORT_SYMBOL(match_octal);
 
 /**
  * match_hex: - scan a hex representation of an integer from a substring_t
@@ -191,6 +194,58 @@ int match_hex(substring_t *s, int *result)
 {
        return match_number(s, result, 16);
 }
+EXPORT_SYMBOL(match_hex);
+
+/**
+ * match_wildcard: - parse if a string matches given wildcard pattern
+ * @pattern: wildcard pattern
+ * @str: the string to be parsed
+ *
+ * Description: Parse the string @str to check if matches wildcard
+ * pattern @pattern. The pattern may contain two type wildcardes:
+ *   '*' - matches zero or more characters
+ *   '?' - matches one character
+ * If it's matched, return true, else return false.
+ */
+bool match_wildcard(const char *pattern, const char *str)
+{
+       const char *s = str;
+       const char *p = pattern;
+       bool star = false;
+
+       while (*s) {
+               switch (*p) {
+               case '?':
+                       s++;
+                       p++;
+                       break;
+               case '*':
+                       star = true;
+                       str = s;
+                       if (!*++p)
+                               return true;
+                       pattern = p;
+                       break;
+               default:
+                       if (*s == *p) {
+                               s++;
+                               p++;
+                       } else {
+                               if (!star)
+                                       return false;
+                               str++;
+                               s = str;
+                               p = pattern;
+                       }
+                       break;
+               }
+       }
+
+       if (*p == '*')
+               ++p;
+       return !*p;
+}
+EXPORT_SYMBOL(match_wildcard);
 
 /**
  * match_strlcpy: - Copy the characters from a substring_t to a sized buffer
@@ -213,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size)
        }
        return ret;
 }
+EXPORT_SYMBOL(match_strlcpy);
 
 /**
  * match_strdup: - allocate a new string with the contents of a substring_t
@@ -230,10 +286,4 @@ char *match_strdup(const substring_t *s)
                match_strlcpy(p, s, sz);
        return p;
 }
-
-EXPORT_SYMBOL(match_token);
-EXPORT_SYMBOL(match_int);
-EXPORT_SYMBOL(match_octal);
-EXPORT_SYMBOL(match_hex);
-EXPORT_SYMBOL(match_strlcpy);
 EXPORT_SYMBOL(match_strdup);
index 31dd4ccd3baae800a4459088cca01ca560c3a536..8b3c9dc882628fd9945b5901eba670f67f885adf 100644 (file)
@@ -8,8 +8,8 @@
 #define CHECK_LOOPS 100
 
 struct test_node {
-       struct rb_node rb;
        u32 key;
+       struct rb_node rb;
 
        /* following fields used for testing augmented rbtree functionality */
        u32 val;
@@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb)
        return count;
 }
 
+static void check_postorder_foreach(int nr_nodes)
+{
+       struct test_node *cur, *n;
+       int count = 0;
+       rbtree_postorder_for_each_entry_safe(cur, n, &root, rb)
+               count++;
+
+       WARN_ON_ONCE(count != nr_nodes);
+}
+
 static void check_postorder(int nr_nodes)
 {
        struct rb_node *rb;
@@ -148,6 +158,7 @@ static void check(int nr_nodes)
        WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1);
 
        check_postorder(nr_nodes);
+       check_postorder_foreach(nr_nodes);
 }
 
 static void check_augmented(int nr_nodes)
index 5847a4921b8ee25f5fd88624cd74970da371f2ca..09225796991a83a9281194d855719021b8b18282 100644 (file)
@@ -17,9 +17,6 @@ void show_mem(unsigned int filter)
        printk("Mem-Info:\n");
        show_free_areas(filter);
 
-       if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-               return;
-
        for_each_online_pgdat(pgdat) {
                unsigned long flags;
                int zoneid;
@@ -46,4 +43,7 @@ void show_mem(unsigned int filter)
        printk("%lu pages in pagetable cache\n",
                quicklist_total_size());
 #endif
+#ifdef CONFIG_MEMORY_FAILURE
+       printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
+#endif
 }
index 4634ac9cdb38e8fd31ed06f073078f8e7010c6e4..2e1c102759ce7a18f9a988b54c14c0b4c154d0bb 100644 (file)
@@ -172,8 +172,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
        /*
         * Get the overflow emergency buffer
         */
-       v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
-                                               PAGE_ALIGN(io_tlb_overflow));
+       v_overflow_buffer = memblock_virt_alloc_nopanic(
+                                               PAGE_ALIGN(io_tlb_overflow),
+                                               PAGE_SIZE);
        if (!v_overflow_buffer)
                return -ENOMEM;
 
@@ -184,11 +185,15 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
         * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
         * between io_tlb_start and io_tlb_end.
         */
-       io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+       io_tlb_list = memblock_virt_alloc(
+                               PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
+                               PAGE_SIZE);
        for (i = 0; i < io_tlb_nslabs; i++)
                io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
        io_tlb_index = 0;
-       io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+       io_tlb_orig_addr = memblock_virt_alloc(
+                               PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
+                               PAGE_SIZE);
 
        if (verbose)
                swiotlb_print_info();
@@ -215,13 +220,13 @@ swiotlb_init(int verbose)
        bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
        /* Get IO TLB memory from the low pages */
-       vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
+       vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
        if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
                return;
 
        if (io_tlb_start)
-               free_bootmem(io_tlb_start,
-                                PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+               memblock_free_early(io_tlb_start,
+                                   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
        pr_warn("Cannot allocate SWIOTLB buffer");
        no_iotlb_memory = true;
 }
@@ -357,14 +362,14 @@ void __init swiotlb_free(void)
                free_pages((unsigned long)phys_to_virt(io_tlb_start),
                           get_order(io_tlb_nslabs << IO_TLB_SHIFT));
        } else {
-               free_bootmem_late(io_tlb_overflow_buffer,
-                                 PAGE_ALIGN(io_tlb_overflow));
-               free_bootmem_late(__pa(io_tlb_orig_addr),
-                                 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-               free_bootmem_late(__pa(io_tlb_list),
-                                 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-               free_bootmem_late(io_tlb_start,
-                                 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+               memblock_free_late(io_tlb_overflow_buffer,
+                                  PAGE_ALIGN(io_tlb_overflow));
+               memblock_free_late(__pa(io_tlb_orig_addr),
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+               memblock_free_late(__pa(io_tlb_list),
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+               memblock_free_late(io_tlb_start,
+                                  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
        }
        io_tlb_nslabs = 0;
 }
diff --git a/lib/test_module.c b/lib/test_module.c
new file mode 100644 (file)
index 0000000..319b66f
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * This module emits "Hello, world" on printk when loaded.
+ *
+ * It is designed to be used for basic evaluation of the module loading
+ * subsystem (for example when validating module signing/verification). It
+ * lacks any extra dependencies, and will not normally be loaded by the
+ * system unless explicitly requested by name.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+static int __init test_module_init(void)
+{
+       pr_warn("Hello, world\n");
+
+       return 0;
+}
+
+module_init(test_module_init);
+
+static void __exit test_module_exit(void)
+{
+       pr_warn("Goodbye\n");
+}
+
+module_exit(test_module_exit);
+
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
new file mode 100644 (file)
index 0000000..0ecef3e
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Kernel module for testing copy_to/from_user infrastructure.
+ *
+ * Copyright 2013 Google Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Kees Cook       <keescook@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#define test(condition, msg)           \
+({                                     \
+       int cond = (condition);         \
+       if (cond)                       \
+               pr_warn("%s\n", msg);   \
+       cond;                           \
+})
+
+static int __init test_user_copy_init(void)
+{
+       int ret = 0;
+       char *kmem;
+       char __user *usermem;
+       char *bad_usermem;
+       unsigned long user_addr;
+       unsigned long value = 0x5A;
+
+       kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
+       if (!kmem)
+               return -ENOMEM;
+
+       user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2,
+                           PROT_READ | PROT_WRITE | PROT_EXEC,
+                           MAP_ANONYMOUS | MAP_PRIVATE, 0);
+       if (user_addr >= (unsigned long)(TASK_SIZE)) {
+               pr_warn("Failed to allocate user memory\n");
+               kfree(kmem);
+               return -ENOMEM;
+       }
+
+       usermem = (char __user *)user_addr;
+       bad_usermem = (char *)user_addr;
+
+       /* Legitimate usage: none of these should fail. */
+       ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE),
+                   "legitimate copy_from_user failed");
+       ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE),
+                   "legitimate copy_to_user failed");
+       ret |= test(get_user(value, (unsigned long __user *)usermem),
+                   "legitimate get_user failed");
+       ret |= test(put_user(value, (unsigned long __user *)usermem),
+                   "legitimate put_user failed");
+
+       /* Invalid usage: none of these should succeed. */
+       ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE),
+                                   PAGE_SIZE),
+                   "illegal all-kernel copy_from_user passed");
+       ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem,
+                                   PAGE_SIZE),
+                   "illegal reversed copy_from_user passed");
+       ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE,
+                                 PAGE_SIZE),
+                   "illegal all-kernel copy_to_user passed");
+       ret |= test(!copy_to_user((char __user *)kmem, bad_usermem,
+                                 PAGE_SIZE),
+                   "illegal reversed copy_to_user passed");
+       ret |= test(!get_user(value, (unsigned long __user *)kmem),
+                   "illegal get_user passed");
+       ret |= test(!put_user(value, (unsigned long __user *)kmem),
+                   "illegal put_user passed");
+
+       vm_munmap(user_addr, PAGE_SIZE * 2);
+       kfree(kmem);
+
+       if (ret == 0) {
+               pr_info("tests passed.\n");
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+module_init(test_user_copy_init);
+
+static void __exit test_user_copy_exit(void)
+{
+       pr_info("unloaded.\n");
+}
+
+module_exit(test_user_copy_exit);
+
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_LICENSE("GPL");
index 10909c571494893ffba0643784a115ab04267ac1..a97f18be9070e5f5401933bf1f1169b9b128d35d 100644 (file)
@@ -1155,6 +1155,45 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr,
        return number(buf, end, *(const netdev_features_t *)addr, spec);
 }
 
+static noinline_for_stack
+char *address_val(char *buf, char *end, const void *addr,
+                 struct printf_spec spec, const char *fmt)
+{
+       unsigned long long num;
+
+       spec.flags |= SPECIAL | SMALL | ZEROPAD;
+       spec.base = 16;
+
+       switch (fmt[1]) {
+       case 'd':
+               num = *(const dma_addr_t *)addr;
+               spec.field_width = sizeof(dma_addr_t) * 2 + 2;
+               break;
+       case 'p':
+       default:
+               num = *(const phys_addr_t *)addr;
+               spec.field_width = sizeof(phys_addr_t) * 2 + 2;
+               break;
+       }
+
+       return number(buf, end, num, spec);
+}
+
+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+               struct printf_spec spec, const char *fmt)
+{
+       char name[TASK_COMM_LEN];
+
+       /* Caller can pass NULL instead of current. */
+       if (!tsk)
+               tsk = current;
+       /* Not using get_task_comm() in case I'm in IRQ context. */
+       memcpy(name, tsk->comm, TASK_COMM_LEN);
+       name[sizeof(name) - 1] = '\0';
+       return string(buf, end, name, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1218,9 +1257,11 @@ int kptr_restrict __read_mostly;
  *              N no separator
  *            The maximum supported length is 64 bytes of the input. Consider
  *            to use print_hex_dump() for the larger input.
- * - 'a' For a phys_addr_t type and its derivative types (passed by reference)
+ * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives
+ *           (default assumed to be phys_addr_t, passed by reference)
  * - 'd[234]' For a dentry name (optionally 2-4 last components)
  * - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -1232,7 +1273,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 {
        int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
 
-       if (!ptr && *fmt != 'K') {
+       if (!ptr && *fmt != 'K' && *fmt != 'T') {
                /*
                 * Print (null) with the same width as a pointer so it makes
                 * tabular output look nice.
@@ -1353,17 +1394,15 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                }
                break;
        case 'a':
-               spec.flags |= SPECIAL | SMALL | ZEROPAD;
-               spec.field_width = sizeof(phys_addr_t) * 2 + 2;
-               spec.base = 16;
-               return number(buf, end,
-                             (unsigned long long) *((phys_addr_t *)ptr), spec);
+               return address_val(buf, end, ptr, spec, fmt);
        case 'd':
                return dentry_name(buf, end, ptr, spec, fmt);
        case 'D':
                return dentry_name(buf, end,
                                   ((const struct file *)ptr)->f_path.dentry,
                                   spec, fmt);
+       case 'T':
+               return comm_name(buf, end, ptr, spec, fmt);
        }
        spec.flags |= SMALL;
        if (spec.field_width == -1) {
index 07dbc8ec46cfb5bba7eff332a0950b9f1b9593e1..6e45a5074bf023b85896b1feb01198a5f1c652ff 100644 (file)
@@ -267,7 +267,7 @@ void balloon_page_putback(struct page *page)
                put_page(page);
        } else {
                WARN_ON(1);
-               dump_page(page);
+               dump_page(page, "not movable balloon page");
        }
        unlock_page(page);
 }
@@ -287,7 +287,7 @@ int balloon_page_migrate(struct page *newpage,
        BUG_ON(!trylock_page(newpage));
 
        if (WARN_ON(!__is_movable_balloon_page(page))) {
-               dump_page(page);
+               dump_page(page, "not movable balloon page");
                unlock_page(newpage);
                return rc;
        }
index 5875f48ce27954821bec4c57b21bde2b9ea450f0..d0eac43504033d2cf6933fce2e20ca47c564d0be 100644 (file)
@@ -237,7 +237,7 @@ int __cleancache_get_page(struct page *page)
                goto out;
        }
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
        if (fake_pool_id < 0)
                goto out;
@@ -279,7 +279,7 @@ void __cleancache_put_page(struct page *page)
                return;
        }
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
        if (fake_pool_id < 0)
                return;
@@ -318,7 +318,7 @@ void __cleancache_invalidate_page(struct address_space *mapping,
                if (pool_id < 0)
                        return;
 
-               VM_BUG_ON(!PageLocked(page));
+               VM_BUG_ON_PAGE(!PageLocked(page), page);
                if (cleancache_get_key(mapping->host, &key) >= 0) {
                        cleancache_ops->invalidate_page(pool_id,
                                        key, page->index);
index f58bcd016f432dd094d6f6378aa53f9799d30811..e0ab02d70f13434059d1f05442285e0773189a1c 100644 (file)
@@ -459,6 +459,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
        unsigned long flags;
        bool locked = false;
        struct page *page = NULL, *valid_page = NULL;
+       bool skipped_async_unsuitable = false;
 
        /*
         * Ensure that there are not too many pages isolated from the LRU
@@ -534,6 +535,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
                if (!cc->sync && last_pageblock_nr != pageblock_nr &&
                    !migrate_async_suitable(get_pageblock_migratetype(page))) {
                        cc->finished_update_migrate = true;
+                       skipped_async_unsuitable = true;
                        goto next_pageblock;
                }
 
@@ -599,7 +601,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
                if (__isolate_lru_page(page, mode) != 0)
                        continue;
 
-               VM_BUG_ON(PageTransCompound(page));
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
 
                /* Successfully isolated */
                cc->finished_update_migrate = true;
@@ -627,8 +629,13 @@ next_pageblock:
        if (locked)
                spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-       /* Update the pageblock-skip if the whole pageblock was scanned */
-       if (low_pfn == end_pfn)
+       /*
+        * Update the pageblock-skip information and cached scanner pfn,
+        * if the whole pageblock was scanned without isolating any page.
+        * This is not done when pageblock was skipped due to being unsuitable
+        * for async compaction, so that eventual sync compaction can try.
+        */
+       if (low_pfn == end_pfn && !skipped_async_unsuitable)
                update_pageblock_skip(cc, valid_page, nr_isolated, true);
 
        trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
@@ -660,7 +667,7 @@ static void isolate_freepages(struct zone *zone,
         * is the end of the pageblock the migration scanner is using.
         */
        pfn = cc->free_pfn;
-       low_pfn = cc->migrate_pfn + pageblock_nr_pages;
+       low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
 
        /*
         * Take care that if the migration scanner is at the end of the zone
@@ -676,7 +683,7 @@ static void isolate_freepages(struct zone *zone,
         * pages on cc->migratepages. We stop searching if the migrate
         * and free page scanners meet or enough free pages are isolated.
         */
-       for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
+       for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
                                        pfn -= pageblock_nr_pages) {
                unsigned long isolated;
 
@@ -738,7 +745,14 @@ static void isolate_freepages(struct zone *zone,
        /* split_free_page does not map the pages */
        map_pages(freelist);
 
-       cc->free_pfn = high_pfn;
+       /*
+        * If we crossed the migrate scanner, we want to keep it that way
+        * so that compact_finished() may detect this
+        */
+       if (pfn < low_pfn)
+               cc->free_pfn = max(pfn, zone->zone_start_pfn);
+       else
+               cc->free_pfn = high_pfn;
        cc->nr_freepages = nr_freepages;
 }
 
@@ -837,6 +851,10 @@ static int compact_finished(struct zone *zone,
 
        /* Compaction run completes if the migrate and free scanner meet */
        if (cc->free_pfn <= cc->migrate_pfn) {
+               /* Let the next compaction start anew. */
+               zone->compact_cached_migrate_pfn = zone->zone_start_pfn;
+               zone->compact_cached_free_pfn = zone_end_pfn(zone);
+
                /*
                 * Mark that the PG_migrate_skip information should be cleared
                 * by kswapd when it goes to sleep. kswapd does not set the
@@ -946,6 +964,14 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                ;
        }
 
+       /*
+        * Clear pageblock skip if there were failures recently and compaction
+        * is about to be retried after being deferred. kswapd does not do
+        * this reset as it'll reset the cached information when going to sleep.
+        */
+       if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
+               __reset_isolation_suitable(zone);
+
        /*
         * Setup to move all movable pages to the end of the zone. Used cached
         * information on where the scanners should start but check that it
@@ -962,13 +988,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                zone->compact_cached_migrate_pfn = cc->migrate_pfn;
        }
 
-       /*
-        * Clear pageblock skip if there were failures recently and compaction
-        * is about to be retried after being deferred. kswapd does not do
-        * this reset as it'll reset the cached information when going to sleep.
-        */
-       if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
-               __reset_isolation_suitable(zone);
+       trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn);
 
        migrate_prep_local();
 
@@ -1003,7 +1023,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                if (err) {
                        putback_movable_pages(&cc->migratepages);
                        cc->nr_migratepages = 0;
-                       if (err == -ENOMEM) {
+                       /*
+                        * migrate_pages() may return -ENOMEM when scanners meet
+                        * and we want compact_finished() to detect it
+                        */
+                       if (err == -ENOMEM && cc->free_pfn > cc->migrate_pfn) {
                                ret = COMPACT_PARTIAL;
                                goto out;
                        }
@@ -1015,6 +1039,8 @@ out:
        cc->nr_freepages -= release_freepages(&cc->freepages);
        VM_BUG_ON(cc->nr_freepages != 0);
 
+       trace_mm_compaction_end(ret);
+
        return ret;
 }
 
@@ -1120,12 +1146,11 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
                        compact_zone(zone, cc);
 
                if (cc->order > 0) {
-                       int ok = zone_watermark_ok(zone, cc->order,
-                                               low_wmark_pages(zone), 0, 0);
-                       if (ok && cc->order >= zone->compact_order_failed)
-                               zone->compact_order_failed = cc->order + 1;
+                       if (zone_watermark_ok(zone, cc->order,
+                                               low_wmark_pages(zone), 0, 0))
+                               compaction_defer_reset(zone, cc->order, false);
                        /* Currently async compaction is never deferred. */
-                       else if (!ok && cc->sync)
+                       else if (cc->sync)
                                defer_compaction(zone, cc->order);
                }
 
index b7749a92021c39b93907b94baf352f58f9db0359..7a7f3e0db7384515b6e266029ef930e5c775ec0d 100644 (file)
@@ -409,9 +409,9 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 {
        int error;
 
-       VM_BUG_ON(!PageLocked(old));
-       VM_BUG_ON(!PageLocked(new));
-       VM_BUG_ON(new->mapping);
+       VM_BUG_ON_PAGE(!PageLocked(old), old);
+       VM_BUG_ON_PAGE(!PageLocked(new), new);
+       VM_BUG_ON_PAGE(new->mapping, new);
 
        error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
        if (!error) {
@@ -461,8 +461,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 {
        int error;
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(PageSwapBacked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 
        error = mem_cgroup_cache_charge(page, current->mm,
                                        gfp_mask & GFP_RECLAIM_MASK);
@@ -607,7 +607,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
  */
 void unlock_page(struct page *page)
 {
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        clear_bit_unlock(PG_locked, &page->flags);
        smp_mb__after_clear_bit();
        wake_up_page(page, PG_locked);
@@ -760,7 +760,7 @@ repeat:
                        page_cache_release(page);
                        goto repeat;
                }
-               VM_BUG_ON(page->index != offset);
+               VM_BUG_ON_PAGE(page->index != offset, page);
        }
        return page;
 }
@@ -1656,7 +1656,7 @@ retry_find:
                put_page(page);
                goto retry_find;
        }
-       VM_BUG_ON(page->index != offset);
+       VM_BUG_ON_PAGE(page->index != offset, page);
 
        /*
         * We have a locked page in the page cache, now we need to check
index 5d80c53b87cb00c77702d209367220005af3a1b2..82166bf974e14262ecfb064ea7c173d006d3ab98 100644 (file)
@@ -130,8 +130,14 @@ static int set_recommended_min_free_kbytes(void)
                              (unsigned long) nr_free_buffer_pages() / 20);
        recommended_min <<= (PAGE_SHIFT-10);
 
-       if (recommended_min > min_free_kbytes)
+       if (recommended_min > min_free_kbytes) {
+               if (user_min_free_kbytes >= 0)
+                       pr_info("raising min_free_kbytes from %d to %lu "
+                               "to help transparent hugepage allocations\n",
+                               min_free_kbytes, recommended_min);
+
                min_free_kbytes = recommended_min;
+       }
        setup_per_zone_wmarks();
        return 0;
 }
@@ -655,7 +661,7 @@ out:
        hugepage_exit_sysfs(hugepage_kobj);
        return err;
 }
-module_init(hugepage_init)
+subsys_initcall(hugepage_init);
 
 static int __init setup_transparent_hugepage(char *str)
 {
@@ -712,7 +718,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
        pgtable_t pgtable;
        spinlock_t *ptl;
 
-       VM_BUG_ON(!PageCompound(page));
+       VM_BUG_ON_PAGE(!PageCompound(page), page);
        pgtable = pte_alloc_one(mm, haddr);
        if (unlikely(!pgtable))
                return VM_FAULT_OOM;
@@ -893,7 +899,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                goto out;
        }
        src_page = pmd_page(pmd);
-       VM_BUG_ON(!PageHead(src_page));
+       VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
        get_page(src_page);
        page_dup_rmap(src_page);
        add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -1067,7 +1073,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
        ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_same(*pmd, orig_pmd)))
                goto out_free_pages;
-       VM_BUG_ON(!PageHead(page));
+       VM_BUG_ON_PAGE(!PageHead(page), page);
 
        pmdp_clear_flush(vma, haddr, pmd);
        /* leave pmd empty until pte is filled */
@@ -1133,7 +1139,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out_unlock;
 
        page = pmd_page(orig_pmd);
-       VM_BUG_ON(!PageCompound(page) || !PageHead(page));
+       VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
        if (page_mapcount(page) == 1) {
                pmd_t entry;
                entry = pmd_mkyoung(orig_pmd);
@@ -1211,7 +1217,7 @@ alloc:
                        add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
                        put_huge_zero_page();
                } else {
-                       VM_BUG_ON(!PageHead(page));
+                       VM_BUG_ON_PAGE(!PageHead(page), page);
                        page_remove_rmap(page);
                        put_page(page);
                }
@@ -1249,7 +1255,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                goto out;
 
        page = pmd_page(*pmd);
-       VM_BUG_ON(!PageHead(page));
+       VM_BUG_ON_PAGE(!PageHead(page), page);
        if (flags & FOLL_TOUCH) {
                pmd_t _pmd;
                /*
@@ -1274,7 +1280,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                }
        }
        page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
-       VM_BUG_ON(!PageCompound(page));
+       VM_BUG_ON_PAGE(!PageCompound(page), page);
        if (flags & FOLL_GET)
                get_page_foll(page);
 
@@ -1432,9 +1438,9 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                } else {
                        page = pmd_page(orig_pmd);
                        page_remove_rmap(page);
-                       VM_BUG_ON(page_mapcount(page) < 0);
+                       VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
-                       VM_BUG_ON(!PageHead(page));
+                       VM_BUG_ON_PAGE(!PageHead(page), page);
                        atomic_long_dec(&tlb->mm->nr_ptes);
                        spin_unlock(ptl);
                        tlb_remove_page(tlb, page);
@@ -2172,9 +2178,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                if (unlikely(!page))
                        goto out;
 
-               VM_BUG_ON(PageCompound(page));
-               BUG_ON(!PageAnon(page));
-               VM_BUG_ON(!PageSwapBacked(page));
+               VM_BUG_ON_PAGE(PageCompound(page), page);
+               VM_BUG_ON_PAGE(!PageAnon(page), page);
+               VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
                /* cannot use mapcount: can't collapse if there's a gup pin */
                if (page_count(page) != 1)
@@ -2197,8 +2203,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                }
                /* 0 stands for page_is_file_cache(page) == false */
                inc_zone_page_state(page, NR_ISOLATED_ANON + 0);
-               VM_BUG_ON(!PageLocked(page));
-               VM_BUG_ON(PageLRU(page));
+               VM_BUG_ON_PAGE(!PageLocked(page), page);
+               VM_BUG_ON_PAGE(PageLRU(page), page);
 
                /* If there is no mapped pte young don't collapse the page */
                if (pte_young(pteval) || PageReferenced(page) ||
@@ -2228,7 +2234,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
                } else {
                        src_page = pte_page(pteval);
                        copy_user_highpage(page, src_page, address, vma);
-                       VM_BUG_ON(page_mapcount(src_page) != 1);
+                       VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page);
                        release_pte_page(src_page);
                        /*
                         * ptl mostly unnecessary, but preempt has to
@@ -2307,7 +2313,7 @@ static struct page
                       struct vm_area_struct *vma, unsigned long address,
                       int node)
 {
-       VM_BUG_ON(*hpage);
+       VM_BUG_ON_PAGE(*hpage, *hpage);
        /*
         * Allocate the page while the vma is still valid and under
         * the mmap_sem read mode so there is no memory allocation
@@ -2576,7 +2582,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                 */
                node = page_to_nid(page);
                khugepaged_node_load[node]++;
-               VM_BUG_ON(PageCompound(page));
+               VM_BUG_ON_PAGE(PageCompound(page), page);
                if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
                        goto out_unmap;
                /* cannot use mapcount: can't collapse if there's a gup pin */
@@ -2872,7 +2878,7 @@ again:
                return;
        }
        page = pmd_page(*pmd);
-       VM_BUG_ON(!page_count(page));
+       VM_BUG_ON_PAGE(!page_count(page), page);
        get_page(page);
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
index dee6cf4e6d34135e1880c5c01c7627aa1a33c69a..c01cb9fedb18f8495c815d4caa380580734e1c8e 100644 (file)
@@ -584,7 +584,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
                                1 << PG_active | 1 << PG_reserved |
                                1 << PG_private | 1 << PG_writeback);
        }
-       VM_BUG_ON(hugetlb_cgroup_from_page(page));
+       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
        set_compound_page_dtor(page, NULL);
        set_page_refcounted(page);
        arch_release_hugepage(page);
@@ -690,15 +690,11 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
  */
 int PageHuge(struct page *page)
 {
-       compound_page_dtor *dtor;
-
        if (!PageCompound(page))
                return 0;
 
        page = compound_head(page);
-       dtor = get_compound_page_dtor(page);
-
-       return dtor == free_huge_page;
+       return get_compound_page_dtor(page) == free_huge_page;
 }
 EXPORT_SYMBOL_GPL(PageHuge);
 
@@ -708,16 +704,11 @@ EXPORT_SYMBOL_GPL(PageHuge);
  */
 int PageHeadHuge(struct page *page_head)
 {
-       compound_page_dtor *dtor;
-
        if (!PageHead(page_head))
                return 0;
 
-       dtor = get_compound_page_dtor(page_head);
-
-       return dtor == free_huge_page;
+       return get_compound_page_dtor(page_head) == free_huge_page;
 }
-EXPORT_SYMBOL_GPL(PageHeadHuge);
 
 pgoff_t __basepage_index(struct page *page)
 {
@@ -1098,7 +1089,7 @@ retry:
                 * no users -- drop the buddy allocator's reference.
                 */
                put_page_testzero(page);
-               VM_BUG_ON(page_count(page));
+               VM_BUG_ON_PAGE(page_count(page), page);
                enqueue_huge_page(h, page);
        }
 free:
@@ -1280,9 +1271,9 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
        for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
                void *addr;
 
-               addr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
-                               huge_page_size(h), huge_page_size(h), 0);
-
+               addr = memblock_virt_alloc_try_nid_nopanic(
+                               huge_page_size(h), huge_page_size(h),
+                               0, BOOTMEM_ALLOC_ACCESSIBLE, node);
                if (addr) {
                        /*
                         * Use the beginning of the huge page to store the
@@ -1322,8 +1313,8 @@ static void __init gather_bootmem_prealloc(void)
 
 #ifdef CONFIG_HIGHMEM
                page = pfn_to_page(m->phys >> PAGE_SHIFT);
-               free_bootmem_late((unsigned long)m,
-                                 sizeof(struct huge_bootmem_page));
+               memblock_free_late(__pa(m),
+                                  sizeof(struct huge_bootmem_page));
 #else
                page = virt_to_page(m);
 #endif
@@ -2355,17 +2346,27 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
        int cow;
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
+       int ret = 0;
 
        cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 
+       mmun_start = vma->vm_start;
+       mmun_end = vma->vm_end;
+       if (cow)
+               mmu_notifier_invalidate_range_start(src, mmun_start, mmun_end);
+
        for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
                spinlock_t *src_ptl, *dst_ptl;
                src_pte = huge_pte_offset(src, addr);
                if (!src_pte)
                        continue;
                dst_pte = huge_pte_alloc(dst, addr, sz);
-               if (!dst_pte)
-                       goto nomem;
+               if (!dst_pte) {
+                       ret = -ENOMEM;
+                       break;
+               }
 
                /* If the pagetables are shared don't copy or take references */
                if (dst_pte == src_pte)
@@ -2386,10 +2387,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);
        }
-       return 0;
 
-nomem:
-       return -ENOMEM;
+       if (cow)
+               mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end);
+
+       return ret;
 }
 
 static int is_hugetlb_entry_migration(pte_t pte)
@@ -3079,7 +3081,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 same_page:
                if (pages) {
                        pages[i] = mem_map_offset(page, pfn_offset);
-                       get_page(pages[i]);
+                       get_page_foll(pages[i]);
                }
 
                if (vmas)
@@ -3501,7 +3503,7 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
 
 bool isolate_huge_page(struct page *page, struct list_head *list)
 {
-       VM_BUG_ON(!PageHead(page));
+       VM_BUG_ON_PAGE(!PageHead(page), page);
        if (!get_page_unless_zero(page))
                return false;
        spin_lock(&hugetlb_lock);
@@ -3512,7 +3514,7 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
 
 void putback_active_hugepage(struct page *page)
 {
-       VM_BUG_ON(!PageHead(page));
+       VM_BUG_ON_PAGE(!PageHead(page), page);
        spin_lock(&hugetlb_lock);
        list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
        spin_unlock(&hugetlb_lock);
@@ -3521,7 +3523,7 @@ void putback_active_hugepage(struct page *page)
 
 bool is_hugepage_active(struct page *page)
 {
-       VM_BUG_ON(!PageHuge(page));
+       VM_BUG_ON_PAGE(!PageHuge(page), page);
        /*
         * This function can be called for a tail page because the caller,
         * scan_movable_pages, scans through a given pfn-range which typically
index d747a84e09b088e4e585a56089ce7e6c6302a3a5..cb00829bb4663bf0272d2140f0ebd07afca0751d 100644 (file)
@@ -390,7 +390,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
        if (hugetlb_cgroup_disabled())
                return;
 
-       VM_BUG_ON(!PageHuge(oldhpage));
+       VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
        spin_lock(&hugetlb_lock);
        h_cg = hugetlb_cgroup_from_page(oldhpage);
        set_hugetlb_cgroup(oldhpage, NULL);
index 4c84678371eb5b5905cc8c4386b512ec57e4f5e3..95487c71cad59737994d77e1d47c1ff07a7d4365 100644 (file)
@@ -55,7 +55,7 @@ static int hwpoison_inject(void *data, u64 val)
                return 0;
 
 inject:
-       printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn);
+       pr_info("Injecting memory failure at pfn %#lx\n", pfn);
        return memory_failure(pfn, 18, MF_COUNT_INCREASED);
 }
 
index 684f7aa9692aecc9e002a3095468a23c5c5c4ed4..7e145e8cd1e6ffadcd86ca0f2c2a30c7ac9ba027 100644 (file)
@@ -27,8 +27,8 @@ static inline void set_page_count(struct page *page, int v)
  */
 static inline void set_page_refcounted(struct page *page)
 {
-       VM_BUG_ON(PageTail(page));
-       VM_BUG_ON(atomic_read(&page->_count));
+       VM_BUG_ON_PAGE(PageTail(page), page);
+       VM_BUG_ON_PAGE(atomic_read(&page->_count), page);
        set_page_count(page, 1);
 }
 
@@ -46,12 +46,10 @@ static inline void __get_page_tail_foll(struct page *page,
         * speculative page access (like in
         * page_cache_get_speculative()) on tail pages.
         */
-       VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
-       VM_BUG_ON(atomic_read(&page->_count) != 0);
-       VM_BUG_ON(page_mapcount(page) < 0);
+       VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page);
        if (get_page_head)
                atomic_inc(&page->first_page->_count);
-       atomic_inc(&page->_mapcount);
+       get_huge_page_tail(page);
 }
 
 /*
@@ -73,7 +71,7 @@ static inline void get_page_foll(struct page *page)
                 * Getting a normal page or the head of a compound page
                 * requires to already have an elevated page->_count.
                 */
-               VM_BUG_ON(atomic_read(&page->_count) <= 0);
+               VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
                atomic_inc(&page->_count);
        }
 }
@@ -101,6 +99,7 @@ extern void prep_compound_page(struct page *page, unsigned long order);
 #ifdef CONFIG_MEMORY_FAILURE
 extern bool is_free_buddy_page(struct page *page);
 #endif
+extern int user_min_free_kbytes;
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
@@ -175,7 +174,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
 static inline int mlocked_vma_newpage(struct vm_area_struct *vma,
                                    struct page *page)
 {
-       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON_PAGE(PageLRU(page), page);
 
        if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
                return 0;
index 175fff79dc95749f6607aaa70976ebff09193397..aa4c7c7250c11a95b9676b70c7cc38f987d88717 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1891,21 +1891,24 @@ struct page *ksm_might_need_to_copy(struct page *page,
        return new_page;
 }
 
-int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
-                       unsigned long *vm_flags)
+int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
 {
        struct stable_node *stable_node;
        struct rmap_item *rmap_item;
-       unsigned int mapcount = page_mapcount(page);
-       int referenced = 0;
+       int ret = SWAP_AGAIN;
        int search_new_forks = 0;
 
-       VM_BUG_ON(!PageKsm(page));
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageKsm(page), page);
+
+       /*
+        * Rely on the page lock to protect against concurrent modifications
+        * to that page's node of the stable tree.
+        */
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
 
        stable_node = page_stable_node(page);
        if (!stable_node)
-               return 0;
+               return ret;
 again:
        hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
                struct anon_vma *anon_vma = rmap_item->anon_vma;
@@ -1928,113 +1931,16 @@ again:
                        if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
                                continue;
 
-                       if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                               continue;
-
-                       referenced += page_referenced_one(page, vma,
-                               rmap_item->address, &mapcount, vm_flags);
-                       if (!search_new_forks || !mapcount)
-                               break;
-               }
-               anon_vma_unlock_read(anon_vma);
-               if (!mapcount)
-                       goto out;
-       }
-       if (!search_new_forks++)
-               goto again;
-out:
-       return referenced;
-}
-
-int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
-{
-       struct stable_node *stable_node;
-       struct rmap_item *rmap_item;
-       int ret = SWAP_AGAIN;
-       int search_new_forks = 0;
-
-       VM_BUG_ON(!PageKsm(page));
-       VM_BUG_ON(!PageLocked(page));
-
-       stable_node = page_stable_node(page);
-       if (!stable_node)
-               return SWAP_FAIL;
-again:
-       hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
-               struct anon_vma *anon_vma = rmap_item->anon_vma;
-               struct anon_vma_chain *vmac;
-               struct vm_area_struct *vma;
-
-               anon_vma_lock_read(anon_vma);
-               anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
-                                              0, ULONG_MAX) {
-                       vma = vmac->vma;
-                       if (rmap_item->address < vma->vm_start ||
-                           rmap_item->address >= vma->vm_end)
-                               continue;
-                       /*
-                        * Initially we examine only the vma which covers this
-                        * rmap_item; but later, if there is still work to do,
-                        * we examine covering vmas in other mms: in case they
-                        * were forked from the original since ksmd passed.
-                        */
-                       if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
+                       if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                                continue;
 
-                       ret = try_to_unmap_one(page, vma,
-                                       rmap_item->address, flags);
-                       if (ret != SWAP_AGAIN || !page_mapped(page)) {
+                       ret = rwc->rmap_one(page, vma,
+                                       rmap_item->address, rwc->arg);
+                       if (ret != SWAP_AGAIN) {
                                anon_vma_unlock_read(anon_vma);
                                goto out;
                        }
-               }
-               anon_vma_unlock_read(anon_vma);
-       }
-       if (!search_new_forks++)
-               goto again;
-out:
-       return ret;
-}
-
-#ifdef CONFIG_MIGRATION
-int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
-                 struct vm_area_struct *, unsigned long, void *), void *arg)
-{
-       struct stable_node *stable_node;
-       struct rmap_item *rmap_item;
-       int ret = SWAP_AGAIN;
-       int search_new_forks = 0;
-
-       VM_BUG_ON(!PageKsm(page));
-       VM_BUG_ON(!PageLocked(page));
-
-       stable_node = page_stable_node(page);
-       if (!stable_node)
-               return ret;
-again:
-       hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
-               struct anon_vma *anon_vma = rmap_item->anon_vma;
-               struct anon_vma_chain *vmac;
-               struct vm_area_struct *vma;
-
-               anon_vma_lock_read(anon_vma);
-               anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
-                                              0, ULONG_MAX) {
-                       vma = vmac->vma;
-                       if (rmap_item->address < vma->vm_start ||
-                           rmap_item->address >= vma->vm_end)
-                               continue;
-                       /*
-                        * Initially we examine only the vma which covers this
-                        * rmap_item; but later, if there is still work to do,
-                        * we examine covering vmas in other mms: in case they
-                        * were forked from the original since ksmd passed.
-                        */
-                       if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
-                               continue;
-
-                       ret = rmap_one(page, vma, rmap_item->address, arg);
-                       if (ret != SWAP_AGAIN) {
+                       if (rwc->done && rwc->done(page)) {
                                anon_vma_unlock_read(anon_vma);
                                goto out;
                        }
@@ -2047,17 +1953,18 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_MIGRATION
 void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 {
        struct stable_node *stable_node;
 
-       VM_BUG_ON(!PageLocked(oldpage));
-       VM_BUG_ON(!PageLocked(newpage));
-       VM_BUG_ON(newpage->mapping != oldpage->mapping);
+       VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+       VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+       VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
 
        stable_node = page_stable_node(newpage);
        if (stable_node) {
-               VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
+               VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
                stable_node->kpfn = page_to_pfn(newpage);
                /*
                 * newpage->mapping was set in advance; now we need smp_wmb()
@@ -2438,4 +2345,4 @@ out_free:
 out:
        return err;
 }
-module_init(ksm_init)
+subsys_initcall(ksm_init);
index 53e477bb55587aff585558f91c96a04db9f5c965..9c0aeef194404a93a0e64f94a25c337ff301749e 100644 (file)
@@ -21,6 +21,9 @@
 #include <linux/memblock.h>
 
 #include <asm-generic/sections.h>
+#include <linux/io.h>
+
+#include "internal.h"
 
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
@@ -39,6 +42,9 @@ struct memblock memblock __initdata_memblock = {
 };
 
 int memblock_debug __initdata_memblock;
+#ifdef CONFIG_MOVABLE_NODE
+bool movable_node_enabled __initdata_memblock = false;
+#endif
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
@@ -91,7 +97,7 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
  * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
  * @size: size of free area to find
  * @align: alignment of free area to find
- * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
  * Utility called from memblock_find_in_range_node(), find free area bottom-up.
  *
@@ -123,7 +129,7 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
  * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
  * @size: size of free area to find
  * @align: alignment of free area to find
- * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
  * Utility called from memblock_find_in_range_node(), find free area top-down.
  *
@@ -154,11 +160,11 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
 
 /**
  * memblock_find_in_range_node - find free area in given range and node
- * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
  * @size: size of free area to find
  * @align: alignment of free area to find
- * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
  * Find @size free area aligned to @align in the specified range and node.
  *
@@ -173,9 +179,9 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  * RETURNS:
  * Found address on success, 0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
-                                       phys_addr_t end, phys_addr_t size,
-                                       phys_addr_t align, int nid)
+phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
+                                       phys_addr_t align, phys_addr_t start,
+                                       phys_addr_t end, int nid)
 {
        int ret;
        phys_addr_t kernel_end;
@@ -238,8 +244,8 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
                                        phys_addr_t end, phys_addr_t size,
                                        phys_addr_t align)
 {
-       return memblock_find_in_range_node(start, end, size, align,
-                                          MAX_NUMNODES);
+       return memblock_find_in_range_node(size, align, start, end,
+                                           NUMA_NO_NODE);
 }
 
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
@@ -255,10 +261,13 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
                type->cnt = 1;
                type->regions[0].base = 0;
                type->regions[0].size = 0;
+               type->regions[0].flags = 0;
                memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
        }
 }
 
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+
 phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
                                        phys_addr_t *addr)
 {
@@ -271,6 +280,20 @@ phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
                          memblock.reserved.max);
 }
 
+phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
+                                       phys_addr_t *addr)
+{
+       if (memblock.memory.regions == memblock_memory_init_regions)
+               return 0;
+
+       *addr = __pa(memblock.memory.regions);
+
+       return PAGE_ALIGN(sizeof(struct memblock_region) *
+                         memblock.memory.max);
+}
+
+#endif
+
 /**
  * memblock_double_array - double the size of the memblock regions array
  * @type: memblock type of the regions array being doubled
@@ -405,7 +428,8 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
 
                if (this->base + this->size != next->base ||
                    memblock_get_region_node(this) !=
-                   memblock_get_region_node(next)) {
+                   memblock_get_region_node(next) ||
+                   this->flags != next->flags) {
                        BUG_ON(this->base + this->size > next->base);
                        i++;
                        continue;
@@ -425,13 +449,15 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
  * @base:      base address of the new region
  * @size:      size of the new region
  * @nid:       node id of the new region
+ * @flags:     flags of the new region
  *
  * Insert new memblock region [@base,@base+@size) into @type at @idx.
  * @type must already have extra room to accomodate the new region.
  */
 static void __init_memblock memblock_insert_region(struct memblock_type *type,
                                                   int idx, phys_addr_t base,
-                                                  phys_addr_t size, int nid)
+                                                  phys_addr_t size,
+                                                  int nid, unsigned long flags)
 {
        struct memblock_region *rgn = &type->regions[idx];
 
@@ -439,6 +465,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
        memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
        rgn->base = base;
        rgn->size = size;
+       rgn->flags = flags;
        memblock_set_region_node(rgn, nid);
        type->cnt++;
        type->total_size += size;
@@ -450,6 +477,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * @base: base address of the new region
  * @size: size of the new region
  * @nid: nid of the new region
+ * @flags: flags of the new region
  *
  * Add new memblock region [@base,@base+@size) into @type.  The new region
  * is allowed to overlap with existing ones - overlaps don't affect already
@@ -460,7 +488,8 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_add_region(struct memblock_type *type,
-                               phys_addr_t base, phys_addr_t size, int nid)
+                               phys_addr_t base, phys_addr_t size,
+                               int nid, unsigned long flags)
 {
        bool insert = false;
        phys_addr_t obase = base;
@@ -475,6 +504,7 @@ static int __init_memblock memblock_add_region(struct memblock_type *type,
                WARN_ON(type->cnt != 1 || type->total_size);
                type->regions[0].base = base;
                type->regions[0].size = size;
+               type->regions[0].flags = flags;
                memblock_set_region_node(&type->regions[0], nid);
                type->total_size = size;
                return 0;
@@ -505,7 +535,8 @@ repeat:
                        nr_new++;
                        if (insert)
                                memblock_insert_region(type, i++, base,
-                                                      rbase - base, nid);
+                                                      rbase - base, nid,
+                                                      flags);
                }
                /* area below @rend is dealt with, forget about it */
                base = min(rend, end);
@@ -515,7 +546,8 @@ repeat:
        if (base < end) {
                nr_new++;
                if (insert)
-                       memblock_insert_region(type, i, base, end - base, nid);
+                       memblock_insert_region(type, i, base, end - base,
+                                              nid, flags);
        }
 
        /*
@@ -537,12 +569,13 @@ repeat:
 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
                                       int nid)
 {
-       return memblock_add_region(&memblock.memory, base, size, nid);
+       return memblock_add_region(&memblock.memory, base, size, nid, 0);
 }
 
 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 {
-       return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES);
+       return memblock_add_region(&memblock.memory, base, size,
+                                  MAX_NUMNODES, 0);
 }
 
 /**
@@ -597,7 +630,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
                        rgn->size -= base - rbase;
                        type->total_size -= base - rbase;
                        memblock_insert_region(type, i, rbase, base - rbase,
-                                              memblock_get_region_node(rgn));
+                                              memblock_get_region_node(rgn),
+                                              rgn->flags);
                } else if (rend > end) {
                        /*
                         * @rgn intersects from above.  Split and redo the
@@ -607,7 +641,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
                        rgn->size -= end - rbase;
                        type->total_size -= end - rbase;
                        memblock_insert_region(type, i--, rbase, end - rbase,
-                                              memblock_get_region_node(rgn));
+                                              memblock_get_region_node(rgn),
+                                              rgn->flags);
                } else {
                        /* @rgn is fully contained, record it */
                        if (!*end_rgn)
@@ -643,28 +678,89 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 {
        memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
                     (unsigned long long)base,
-                    (unsigned long long)base + size,
+                    (unsigned long long)base + size - 1,
                     (void *)_RET_IP_);
 
        return __memblock_remove(&memblock.reserved, base, size);
 }
 
-int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
+static int __init_memblock memblock_reserve_region(phys_addr_t base,
+                                                  phys_addr_t size,
+                                                  int nid,
+                                                  unsigned long flags)
 {
        struct memblock_type *_rgn = &memblock.reserved;
 
-       memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n",
+       memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n",
                     (unsigned long long)base,
-                    (unsigned long long)base + size,
-                    (void *)_RET_IP_);
+                    (unsigned long long)base + size - 1,
+                    flags, (void *)_RET_IP_);
+
+       return memblock_add_region(_rgn, base, size, nid, flags);
+}
+
+int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
+{
+       return memblock_reserve_region(base, size, MAX_NUMNODES, 0);
+}
+
+/**
+ * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * This function isolates region [@base, @base + @size), and mark it with flag
+ * MEMBLOCK_HOTPLUG.
+ *
+ * Return 0 on succees, -errno on failure.
+ */
+int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
+{
+       struct memblock_type *type = &memblock.memory;
+       int i, ret, start_rgn, end_rgn;
+
+       ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
+       if (ret)
+               return ret;
+
+       for (i = start_rgn; i < end_rgn; i++)
+               memblock_set_region_flags(&type->regions[i], MEMBLOCK_HOTPLUG);
+
+       memblock_merge_regions(type);
+       return 0;
+}
+
+/**
+ * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * This function isolates region [@base, @base + @size), and clear flag
+ * MEMBLOCK_HOTPLUG for the isolated regions.
+ *
+ * Return 0 on succees, -errno on failure.
+ */
+int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
+{
+       struct memblock_type *type = &memblock.memory;
+       int i, ret, start_rgn, end_rgn;
+
+       ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
+       if (ret)
+               return ret;
+
+       for (i = start_rgn; i < end_rgn; i++)
+               memblock_clear_region_flags(&type->regions[i],
+                                           MEMBLOCK_HOTPLUG);
 
-       return memblock_add_region(_rgn, base, size, MAX_NUMNODES);
+       memblock_merge_regions(type);
+       return 0;
 }
 
 /**
  * __next_free_mem_range - next function for for_each_free_mem_range()
  * @idx: pointer to u64 loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
@@ -693,13 +789,16 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
        int mi = *idx & 0xffffffff;
        int ri = *idx >> 32;
 
+       if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+               nid = NUMA_NO_NODE;
+
        for ( ; mi < mem->cnt; mi++) {
                struct memblock_region *m = &mem->regions[mi];
                phys_addr_t m_start = m->base;
                phys_addr_t m_end = m->base + m->size;
 
                /* only memory regions are associated with nodes, check it */
-               if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
+               if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
                        continue;
 
                /* scan areas before each reservation for intersection */
@@ -740,12 +839,17 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
 /**
  * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
  * @idx: pointer to u64 loop variable
- * @nid: nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
  * Reverse of __next_free_mem_range().
+ *
+ * Linux kernel cannot migrate pages used by itself. Memory hotplug users won't
+ * be able to hot-remove hotpluggable memory used by the kernel. So this
+ * function skip hotpluggable regions if needed when allocating memory for the
+ * kernel.
  */
 void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
                                           phys_addr_t *out_start,
@@ -756,6 +860,9 @@ void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
        int mi = *idx & 0xffffffff;
        int ri = *idx >> 32;
 
+       if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+               nid = NUMA_NO_NODE;
+
        if (*idx == (u64)ULLONG_MAX) {
                mi = mem->cnt - 1;
                ri = rsv->cnt;
@@ -767,7 +874,11 @@ void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
                phys_addr_t m_end = m->base + m->size;
 
                /* only memory regions are associated with nodes, check it */
-               if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
+               if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
+                       continue;
+
+               /* skip hotpluggable memory regions if needed */
+               if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
                        continue;
 
                /* scan areas before each reservation for intersection */
@@ -837,18 +948,18 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
  * memblock_set_node - set node ID on memblock regions
  * @base: base of area to set node ID for
  * @size: size of area to set node ID for
+ * @type: memblock type to set node ID for
  * @nid: node ID to set
  *
- * Set the nid of memblock memory regions in [@base,@base+@size) to @nid.
+ * Set the nid of memblock @type regions in [@base,@base+@size) to @nid.
  * Regions which cross the area boundaries are split as necessary.
  *
  * RETURNS:
  * 0 on success, -errno on failure.
  */
 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
-                                     int nid)
+                                     struct memblock_type *type, int nid)
 {
-       struct memblock_type *type = &memblock.memory;
        int start_rgn, end_rgn;
        int i, ret;
 
@@ -870,13 +981,13 @@ static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
 {
        phys_addr_t found;
 
-       if (WARN_ON(!align))
-               align = __alignof__(long long);
+       if (!align)
+               align = SMP_CACHE_BYTES;
 
        /* align @size to avoid excessive fragmentation on reserved array */
        size = round_up(size, align);
 
-       found = memblock_find_in_range_node(0, max_addr, size, align, nid);
+       found = memblock_find_in_range_node(size, align, 0, max_addr, nid);
        if (found && !memblock_reserve(found, size))
                return found;
 
@@ -890,7 +1001,7 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n
 
 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
-       return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
+       return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE);
 }
 
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
@@ -920,6 +1031,207 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
        return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
+/**
+ * memblock_virt_alloc_internal - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region to allocate (phys address)
+ * @max_addr: the upper bound of the memory region to allocate (phys address)
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * The @min_addr limit is dropped if it can not be satisfied and the allocation
+ * will fall back to memory below @min_addr. Also, allocation may fall back
+ * to any node in the system if the specified node can not
+ * hold the requested memory.
+ *
+ * The allocation is performed from memory region limited by
+ * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE.
+ *
+ * The memory block is aligned on SMP_CACHE_BYTES if @align == 0.
+ *
+ * The phys address of allocated boot memory block is converted to virtual and
+ * allocated memory is reset to 0.
+ *
+ * In addition, function sets the min_count to 0 using kmemleak_alloc for
+ * allocated boot memory block, so that it is never reported as leaks.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+static void * __init memblock_virt_alloc_internal(
+                               phys_addr_t size, phys_addr_t align,
+                               phys_addr_t min_addr, phys_addr_t max_addr,
+                               int nid)
+{
+       phys_addr_t alloc;
+       void *ptr;
+
+       if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+               nid = NUMA_NO_NODE;
+
+       /*
+        * Detect any accidental use of these APIs after slab is ready, as at
+        * this moment memblock may be deinitialized already and its
+        * internal data may be destroyed (after execution of free_all_bootmem)
+        */
+       if (WARN_ON_ONCE(slab_is_available()))
+               return kzalloc_node(size, GFP_NOWAIT, nid);
+
+       if (!align)
+               align = SMP_CACHE_BYTES;
+
+       /* align @size to avoid excessive fragmentation on reserved array */
+       size = round_up(size, align);
+
+again:
+       alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
+                                           nid);
+       if (alloc)
+               goto done;
+
+       if (nid != NUMA_NO_NODE) {
+               alloc = memblock_find_in_range_node(size, align, min_addr,
+                                                   max_addr,  NUMA_NO_NODE);
+               if (alloc)
+                       goto done;
+       }
+
+       if (min_addr) {
+               min_addr = 0;
+               goto again;
+       } else {
+               goto error;
+       }
+
+done:
+       memblock_reserve(alloc, size);
+       ptr = phys_to_virt(alloc);
+       memset(ptr, 0, size);
+
+       /*
+        * The min_count is set to 0 so that bootmem allocated blocks
+        * are never reported as leaks. This is because many of these blocks
+        * are only referred via the physical address which is not
+        * looked up by kmemleak.
+        */
+       kmemleak_alloc(ptr, size, 0, 0);
+
+       return ptr;
+
+error:
+       return NULL;
+}
+
+/**
+ * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ *       is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ *           is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
+ *           allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides
+ * additional debug information (including caller info), if enabled.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_virt_alloc_try_nid_nopanic(
+                               phys_addr_t size, phys_addr_t align,
+                               phys_addr_t min_addr, phys_addr_t max_addr,
+                               int nid)
+{
+       memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",
+                    __func__, (u64)size, (u64)align, nid, (u64)min_addr,
+                    (u64)max_addr, (void *)_RET_IP_);
+       return memblock_virt_alloc_internal(size, align, min_addr,
+                                            max_addr, nid);
+}
+
+/**
+ * memblock_virt_alloc_try_nid - allocate boot memory block with panicking
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ *       is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ *           is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
+ *           allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public panicking version of _memblock_virt_alloc_try_nid_nopanic()
+ * which provides debug information (including caller info), if enabled,
+ * and panics if the request can not be satisfied.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_virt_alloc_try_nid(
+                       phys_addr_t size, phys_addr_t align,
+                       phys_addr_t min_addr, phys_addr_t max_addr,
+                       int nid)
+{
+       void *ptr;
+
+       memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",
+                    __func__, (u64)size, (u64)align, nid, (u64)min_addr,
+                    (u64)max_addr, (void *)_RET_IP_);
+       ptr = memblock_virt_alloc_internal(size, align,
+                                          min_addr, max_addr, nid);
+       if (ptr)
+               return ptr;
+
+       panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n",
+             __func__, (u64)size, (u64)align, nid, (u64)min_addr,
+             (u64)max_addr);
+       return NULL;
+}
+
+/**
+ * __memblock_free_early - free boot memory block
+ * @base: phys starting address of the  boot memory block
+ * @size: size of the boot memory block in bytes
+ *
+ * Free boot memory block previously allocated by memblock_virt_alloc_xx() API.
+ * The freeing memory will not be released to the buddy allocator.
+ */
+void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
+{
+       memblock_dbg("%s: [%#016llx-%#016llx] %pF\n",
+                    __func__, (u64)base, (u64)base + size - 1,
+                    (void *)_RET_IP_);
+       kmemleak_free_part(__va(base), size);
+       __memblock_remove(&memblock.reserved, base, size);
+}
+
+/*
+ * __memblock_free_late - free bootmem block pages directly to buddy allocator
+ * @addr: phys starting address of the  boot memory block
+ * @size: size of the boot memory block in bytes
+ *
+ * This is only useful when the bootmem allocator has already been torn
+ * down, but we are still initializing the system.  Pages are released directly
+ * to the buddy allocator, no bootmem metadata is updated because it is gone.
+ */
+void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
+{
+       u64 cursor, end;
+
+       memblock_dbg("%s: [%#016llx-%#016llx] %pF\n",
+                    __func__, (u64)base, (u64)base + size - 1,
+                    (void *)_RET_IP_);
+       kmemleak_free_part(__va(base), size);
+       cursor = PFN_UP(base);
+       end = PFN_DOWN(base + size);
+
+       for (; cursor < end; cursor++) {
+               __free_pages_bootmem(pfn_to_page(cursor), 0);
+               totalram_pages++;
+       }
+}
 
 /*
  * Remaining API functions
@@ -1101,6 +1413,7 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit)
 static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
 {
        unsigned long long base, size;
+       unsigned long flags;
        int i;
 
        pr_info(" %s.cnt  = 0x%lx\n", name, type->cnt);
@@ -1111,13 +1424,14 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name
 
                base = rgn->base;
                size = rgn->size;
+               flags = rgn->flags;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
                if (memblock_get_region_node(rgn) != MAX_NUMNODES)
                        snprintf(nid_buf, sizeof(nid_buf), " on node %d",
                                 memblock_get_region_node(rgn));
 #endif
-               pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n",
-                       name, i, base, base + size - 1, size, nid_buf);
+               pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n",
+                       name, i, base, base + size - 1, size, nid_buf, flags);
        }
 }
 
index 7caff36180cda1c3e012f6ff17765f3e26c5a07f..a815686b7f0acf6a27efd1bc26c8d399adf363f5 100644 (file)
@@ -49,7 +49,6 @@
 #include <linux/sort.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
-#include <linux/vmalloc.h>
 #include <linux/vmpressure.h>
 #include <linux/mm_inline.h>
 #include <linux/page_cgroup.h>
@@ -150,7 +149,7 @@ struct mem_cgroup_reclaim_iter {
         * matches memcg->dead_count of the hierarchy root group.
         */
        struct mem_cgroup *last_visited;
-       unsigned long last_dead_count;
+       int last_dead_count;
 
        /* scan generation, increased every round-trip */
        unsigned int generation;
@@ -381,23 +380,12 @@ struct mem_cgroup {
        /* WARNING: nodeinfo must be the last member here */
 };
 
-static size_t memcg_size(void)
-{
-       return sizeof(struct mem_cgroup) +
-               nr_node_ids * sizeof(struct mem_cgroup_per_node *);
-}
-
 /* internal only representation about the status of kmem accounting. */
 enum {
-       KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
-       KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */
+       KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
        KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
 };
 
-/* We account when limit is on, but only after call sites are patched */
-#define KMEM_ACCOUNTED_MASK \
-               ((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
-
 #ifdef CONFIG_MEMCG_KMEM
 static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
 {
@@ -409,16 +397,6 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
        return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
 }
 
-static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
-{
-       set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
-}
-
-static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
-{
-       clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
-}
-
 static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
 {
        /*
@@ -1141,10 +1119,8 @@ skip_node:
         * protected by css_get and the tree walk is rcu safe.
         */
        if (next_css) {
-               struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
-
-               if (css_tryget(&mem->css))
-                       return mem;
+               if ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))
+                       return mem_cgroup_from_css(next_css);
                else {
                        prev_css = next_css;
                        goto skip_node;
@@ -1688,13 +1664,13 @@ static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
  */
 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
-       struct cgroup *task_cgrp;
-       struct cgroup *mem_cgrp;
        /*
-        * Need a buffer in BSS, can't rely on allocations. The code relies
-        * on the assumption that OOM is serialized for memory controller.
-        * If this assumption is broken, revisit this code.
+        * protects memcg_name and makes sure that parallel ooms do not
+        * interleave
         */
+       static DEFINE_SPINLOCK(oom_info_lock);
+       struct cgroup *task_cgrp;
+       struct cgroup *mem_cgrp;
        static char memcg_name[PATH_MAX];
        int ret;
        struct mem_cgroup *iter;
@@ -1703,6 +1679,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
        if (!p)
                return;
 
+       spin_lock(&oom_info_lock);
        rcu_read_lock();
 
        mem_cgrp = memcg->css.cgroup;
@@ -1771,6 +1748,7 @@ done:
 
                pr_cont("\n");
        }
+       spin_unlock(&oom_info_lock);
 }
 
 /*
@@ -2731,7 +2709,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
         * MEMDIE process.
         */
        if (unlikely(test_thread_flag(TIF_MEMDIE)
-                    || fatal_signal_pending(current)))
+                    || fatal_signal_pending(current))
+                    || current->flags & PF_EXITING)
                goto bypass;
 
        if (unlikely(task_in_memcg_oom(current)))
@@ -2902,7 +2881,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
        unsigned short id;
        swp_entry_t ent;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
 
        pc = lookup_page_cgroup(page);
        lock_page_cgroup(pc);
@@ -2936,7 +2915,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        bool anon;
 
        lock_page_cgroup(pc);
-       VM_BUG_ON(PageCgroupUsed(pc));
+       VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
        /*
         * we don't need page_cgroup_lock about tail pages, becase they are not
         * accessed by any other context at this point.
@@ -2971,7 +2950,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        if (lrucare) {
                if (was_on_lru) {
                        lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-                       VM_BUG_ON(PageLRU(page));
+                       VM_BUG_ON_PAGE(PageLRU(page), page);
                        SetPageLRU(page);
                        add_page_to_lru_list(page, lruvec, page_lru(page));
                }
@@ -2997,10 +2976,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 static DEFINE_MUTEX(set_limit_mutex);
 
 #ifdef CONFIG_MEMCG_KMEM
+static DEFINE_MUTEX(activate_kmem_mutex);
+
 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 {
        return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
-               (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
+               memcg_kmem_is_active(memcg);
 }
 
 /*
@@ -3099,16 +3080,6 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
                css_put(&memcg->css);
 }
 
-void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep)
-{
-       if (!memcg)
-               return;
-
-       mutex_lock(&memcg->slab_caches_mutex);
-       list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
-       mutex_unlock(&memcg->slab_caches_mutex);
-}
-
 /*
  * helper for acessing a memcg's index. It will be used as an index in the
  * child cache array in kmem_cache, and also to derive its name. This function
@@ -3119,43 +3090,6 @@ int memcg_cache_id(struct mem_cgroup *memcg)
        return memcg ? memcg->kmemcg_id : -1;
 }
 
-/*
- * This ends up being protected by the set_limit mutex, during normal
- * operation, because that is its main call site.
- *
- * But when we create a new cache, we can call this as well if its parent
- * is kmem-limited. That will have to hold set_limit_mutex as well.
- */
-int memcg_update_cache_sizes(struct mem_cgroup *memcg)
-{
-       int num, ret;
-
-       num = ida_simple_get(&kmem_limited_groups,
-                               0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
-       if (num < 0)
-               return num;
-       /*
-        * After this point, kmem_accounted (that we test atomically in
-        * the beginning of this conditional), is no longer 0. This
-        * guarantees only one process will set the following boolean
-        * to true. We don't need test_and_set because we're protected
-        * by the set_limit_mutex anyway.
-        */
-       memcg_kmem_set_activated(memcg);
-
-       ret = memcg_update_all_caches(num+1);
-       if (ret) {
-               ida_simple_remove(&kmem_limited_groups, num);
-               memcg_kmem_clear_activated(memcg);
-               return ret;
-       }
-
-       memcg->kmemcg_id = num;
-       INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-       mutex_init(&memcg->slab_caches_mutex);
-       return 0;
-}
-
 static size_t memcg_caches_array_size(int num_groups)
 {
        ssize_t size;
@@ -3192,18 +3126,17 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 
        if (num_groups > memcg_limited_groups_array_size) {
                int i;
+               struct memcg_cache_params *new_params;
                ssize_t size = memcg_caches_array_size(num_groups);
 
                size *= sizeof(void *);
                size += offsetof(struct memcg_cache_params, memcg_caches);
 
-               s->memcg_params = kzalloc(size, GFP_KERNEL);
-               if (!s->memcg_params) {
-                       s->memcg_params = cur_params;
+               new_params = kzalloc(size, GFP_KERNEL);
+               if (!new_params)
                        return -ENOMEM;
-               }
 
-               s->memcg_params->is_root_cache = true;
+               new_params->is_root_cache = true;
 
                /*
                 * There is the chance it will be bigger than
@@ -3217,7 +3150,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
                for (i = 0; i < memcg_limited_groups_array_size; i++) {
                        if (!cur_params->memcg_caches[i])
                                continue;
-                       s->memcg_params->memcg_caches[i] =
+                       new_params->memcg_caches[i] =
                                                cur_params->memcg_caches[i];
                }
 
@@ -3230,13 +3163,15 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
                 * bigger than the others. And all updates will reset this
                 * anyway.
                 */
-               kfree(cur_params);
+               rcu_assign_pointer(s->memcg_params, new_params);
+               if (cur_params)
+                       kfree_rcu(cur_params, rcu_head);
        }
        return 0;
 }
 
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
-                        struct kmem_cache *root_cache)
+int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
+                            struct kmem_cache *root_cache)
 {
        size_t size;
 
@@ -3264,35 +3199,85 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
        return 0;
 }
 
-void memcg_release_cache(struct kmem_cache *s)
+void memcg_free_cache_params(struct kmem_cache *s)
+{
+       kfree(s->memcg_params);
+}
+
+void memcg_register_cache(struct kmem_cache *s)
 {
        struct kmem_cache *root;
        struct mem_cgroup *memcg;
        int id;
 
-       /*
-        * This happens, for instance, when a root cache goes away before we
-        * add any memcg.
-        */
-       if (!s->memcg_params)
+       if (is_root_cache(s))
                return;
 
-       if (s->memcg_params->is_root_cache)
-               goto out;
+       /*
+        * Holding the slab_mutex assures nobody will touch the memcg_caches
+        * array while we are modifying it.
+        */
+       lockdep_assert_held(&slab_mutex);
 
+       root = s->memcg_params->root_cache;
        memcg = s->memcg_params->memcg;
-       id  = memcg_cache_id(memcg);
+       id = memcg_cache_id(memcg);
+
+       css_get(&memcg->css);
+
+
+       /*
+        * Since readers won't lock (see cache_from_memcg_idx()), we need a
+        * barrier here to ensure nobody will see the kmem_cache partially
+        * initialized.
+        */
+       smp_wmb();
+
+       /*
+        * Initialize the pointer to this cache in its parent's memcg_params
+        * before adding it to the memcg_slab_caches list, otherwise we can
+        * fail to convert memcg_params_to_cache() while traversing the list.
+        */
+       VM_BUG_ON(root->memcg_params->memcg_caches[id]);
+       root->memcg_params->memcg_caches[id] = s;
+
+       mutex_lock(&memcg->slab_caches_mutex);
+       list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
+       mutex_unlock(&memcg->slab_caches_mutex);
+}
+
+void memcg_unregister_cache(struct kmem_cache *s)
+{
+       struct kmem_cache *root;
+       struct mem_cgroup *memcg;
+       int id;
+
+       if (is_root_cache(s))
+               return;
+
+       /*
+        * Holding the slab_mutex assures nobody will touch the memcg_caches
+        * array while we are modifying it.
+        */
+       lockdep_assert_held(&slab_mutex);
 
        root = s->memcg_params->root_cache;
-       root->memcg_params->memcg_caches[id] = NULL;
+       memcg = s->memcg_params->memcg;
+       id = memcg_cache_id(memcg);
 
        mutex_lock(&memcg->slab_caches_mutex);
        list_del(&s->memcg_params->list);
        mutex_unlock(&memcg->slab_caches_mutex);
 
+       /*
+        * Clear the pointer to this cache in its parent's memcg_params only
+        * after removing it from the memcg_slab_caches list, otherwise we can
+        * fail to convert memcg_params_to_cache() while traversing the list.
+        */
+       VM_BUG_ON(!root->memcg_params->memcg_caches[id]);
+       root->memcg_params->memcg_caches[id] = NULL;
+
        css_put(&memcg->css);
-out:
-       kfree(s->memcg_params);
 }
 
 /*
@@ -3391,27 +3376,16 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
        schedule_work(&cachep->memcg_params->destroy);
 }
 
-/*
- * This lock protects updaters, not readers. We want readers to be as fast as
- * they can, and they will either see NULL or a valid cache value. Our model
- * allow them to see NULL, in which case the root memcg will be selected.
- *
- * We need this lock because multiple allocations to the same cache from a non
- * will span more than one worker. Only one of them can create the cache.
- */
-static DEFINE_MUTEX(memcg_cache_mutex);
-
-/*
- * Called with memcg_cache_mutex held
- */
-static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
-                                        struct kmem_cache *s)
+static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+                                                 struct kmem_cache *s)
 {
        struct kmem_cache *new;
        static char *tmp_name = NULL;
+       static DEFINE_MUTEX(mutex);     /* protects tmp_name */
 
-       lockdep_assert_held(&memcg_cache_mutex);
+       BUG_ON(!memcg_can_account_kmem(memcg));
 
+       mutex_lock(&mutex);
        /*
         * kmem_cache_create_memcg duplicates the given name and
         * cgroup_name for this name requires RCU context.
@@ -3434,47 +3408,13 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
 
        if (new)
                new->allocflags |= __GFP_KMEMCG;
+       else
+               new = s;
 
+       mutex_unlock(&mutex);
        return new;
 }
 
-static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
-                                                 struct kmem_cache *cachep)
-{
-       struct kmem_cache *new_cachep;
-       int idx;
-
-       BUG_ON(!memcg_can_account_kmem(memcg));
-
-       idx = memcg_cache_id(memcg);
-
-       mutex_lock(&memcg_cache_mutex);
-       new_cachep = cache_from_memcg_idx(cachep, idx);
-       if (new_cachep) {
-               css_put(&memcg->css);
-               goto out;
-       }
-
-       new_cachep = kmem_cache_dup(memcg, cachep);
-       if (new_cachep == NULL) {
-               new_cachep = cachep;
-               css_put(&memcg->css);
-               goto out;
-       }
-
-       atomic_set(&new_cachep->memcg_params->nr_pages , 0);
-
-       cachep->memcg_params->memcg_caches[idx] = new_cachep;
-       /*
-        * the readers won't lock, make sure everybody sees the updated value,
-        * so they won't put stuff in the queue again for no reason
-        */
-       wmb();
-out:
-       mutex_unlock(&memcg_cache_mutex);
-       return new_cachep;
-}
-
 void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 {
        struct kmem_cache *c;
@@ -3492,9 +3432,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
         *
         * Still, we don't want anyone else freeing memcg_caches under our
         * noses, which can happen if a new memcg comes to life. As usual,
-        * we'll take the set_limit_mutex to protect ourselves against this.
+        * we'll take the activate_kmem_mutex to protect ourselves against
+        * this.
         */
-       mutex_lock(&set_limit_mutex);
+       mutex_lock(&activate_kmem_mutex);
        for_each_memcg_cache_index(i) {
                c = cache_from_memcg_idx(s, i);
                if (!c)
@@ -3517,7 +3458,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
                cancel_work_sync(&c->memcg_params->destroy);
                kmem_cache_destroy(c);
        }
-       mutex_unlock(&set_limit_mutex);
+       mutex_unlock(&activate_kmem_mutex);
 }
 
 struct create_work {
@@ -3549,6 +3490,7 @@ static void memcg_create_cache_work_func(struct work_struct *w)
 
        cw = container_of(w, struct create_work, work);
        memcg_create_kmem_cache(cw->memcg, cw->cachep);
+       css_put(&cw->memcg->css);
        kfree(cw);
 }
 
@@ -3608,7 +3550,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
                                          gfp_t gfp)
 {
        struct mem_cgroup *memcg;
-       int idx;
+       struct kmem_cache *memcg_cachep;
 
        VM_BUG_ON(!cachep->memcg_params);
        VM_BUG_ON(!cachep->memcg_params->is_root_cache);
@@ -3622,15 +3564,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
        if (!memcg_can_account_kmem(memcg))
                goto out;
 
-       idx = memcg_cache_id(memcg);
-
-       /*
-        * barrier to mare sure we're always seeing the up to date value.  The
-        * code updating memcg_caches will issue a write barrier to match this.
-        */
-       read_barrier_depends();
-       if (likely(cache_from_memcg_idx(cachep, idx))) {
-               cachep = cache_from_memcg_idx(cachep, idx);
+       memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
+       if (likely(memcg_cachep)) {
+               cachep = memcg_cachep;
                goto out;
        }
 
@@ -3784,7 +3720,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
        if (!memcg)
                return;
 
-       VM_BUG_ON(mem_cgroup_is_root(memcg));
+       VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
        memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
 }
 #else
@@ -3863,7 +3799,7 @@ static int mem_cgroup_move_account(struct page *page,
        bool anon = PageAnon(page);
 
        VM_BUG_ON(from == to);
-       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON_PAGE(PageLRU(page), page);
        /*
         * The page is isolated from LRU. So, collapse function
         * will not handle this page. But page splitting can happen.
@@ -3956,7 +3892,7 @@ static int mem_cgroup_move_parent(struct page *page,
                parent = root_mem_cgroup;
 
        if (nr_pages > 1) {
-               VM_BUG_ON(!PageTransHuge(page));
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                flags = compound_lock_irqsave(page);
        }
 
@@ -3990,7 +3926,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 
        if (PageTransHuge(page)) {
                nr_pages <<= compound_order(page);
-               VM_BUG_ON(!PageTransHuge(page));
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                /*
                 * Never OOM-kill a process for a huge page.  The
                 * fault handler will fall back to regular pages.
@@ -4010,8 +3946,8 @@ int mem_cgroup_newpage_charge(struct page *page,
 {
        if (mem_cgroup_disabled())
                return 0;
-       VM_BUG_ON(page_mapped(page));
-       VM_BUG_ON(page->mapping && !PageAnon(page));
+       VM_BUG_ON_PAGE(page_mapped(page), page);
+       VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
        VM_BUG_ON(!mm);
        return mem_cgroup_charge_common(page, mm, gfp_mask,
                                        MEM_CGROUP_CHARGE_TYPE_ANON);
@@ -4215,7 +4151,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
 
        if (PageTransHuge(page)) {
                nr_pages <<= compound_order(page);
-               VM_BUG_ON(!PageTransHuge(page));
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
        }
        /*
         * Check if our page_cgroup is valid
@@ -4307,7 +4243,7 @@ void mem_cgroup_uncharge_page(struct page *page)
        /* early check. */
        if (page_mapped(page))
                return;
-       VM_BUG_ON(page->mapping && !PageAnon(page));
+       VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
        /*
         * If the page is in swap cache, uncharge should be deferred
         * to the swap path, which also properly accounts swap usage
@@ -4327,8 +4263,8 @@ void mem_cgroup_uncharge_page(struct page *page)
 
 void mem_cgroup_uncharge_cache_page(struct page *page)
 {
-       VM_BUG_ON(page_mapped(page));
-       VM_BUG_ON(page->mapping);
+       VM_BUG_ON_PAGE(page_mapped(page), page);
+       VM_BUG_ON_PAGE(page->mapping, page);
        __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
 }
 
@@ -5186,11 +5122,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
        return val;
 }
 
-static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
-{
-       int ret = -EINVAL;
 #ifdef CONFIG_MEMCG_KMEM
-       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+/* should be called with activate_kmem_mutex held */
+static int __memcg_activate_kmem(struct mem_cgroup *memcg,
+                                unsigned long long limit)
+{
+       int err = 0;
+       int memcg_id;
+
+       if (memcg_kmem_is_active(memcg))
+               return 0;
+
+       /*
+        * We are going to allocate memory for data shared by all memory
+        * cgroups so let's stop accounting here.
+        */
+       memcg_stop_kmem_account();
+
        /*
         * For simplicity, we won't allow this to be disabled.  It also can't
         * be changed if the cgroup has children already, or if tasks had
@@ -5204,72 +5152,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
         * of course permitted.
         */
        mutex_lock(&memcg_create_mutex);
-       mutex_lock(&set_limit_mutex);
-       if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) {
-               if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) {
-                       ret = -EBUSY;
-                       goto out;
-               }
-               ret = res_counter_set_limit(&memcg->kmem, val);
-               VM_BUG_ON(ret);
+       if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg))
+               err = -EBUSY;
+       mutex_unlock(&memcg_create_mutex);
+       if (err)
+               goto out;
 
-               ret = memcg_update_cache_sizes(memcg);
-               if (ret) {
-                       res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX);
-                       goto out;
-               }
-               static_key_slow_inc(&memcg_kmem_enabled_key);
-               /*
-                * setting the active bit after the inc will guarantee no one
-                * starts accounting before all call sites are patched
-                */
-               memcg_kmem_set_active(memcg);
-       } else
-               ret = res_counter_set_limit(&memcg->kmem, val);
+       memcg_id = ida_simple_get(&kmem_limited_groups,
+                                 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
+       if (memcg_id < 0) {
+               err = memcg_id;
+               goto out;
+       }
+
+       /*
+        * Make sure we have enough space for this cgroup in each root cache's
+        * memcg_params.
+        */
+       err = memcg_update_all_caches(memcg_id + 1);
+       if (err)
+               goto out_rmid;
+
+       memcg->kmemcg_id = memcg_id;
+       INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+       mutex_init(&memcg->slab_caches_mutex);
+
+       /*
+        * We couldn't have accounted to this cgroup, because it hasn't got the
+        * active bit set yet, so this should succeed.
+        */
+       err = res_counter_set_limit(&memcg->kmem, limit);
+       VM_BUG_ON(err);
+
+       static_key_slow_inc(&memcg_kmem_enabled_key);
+       /*
+        * Setting the active bit after enabling static branching will
+        * guarantee no one starts accounting before all call sites are
+        * patched.
+        */
+       memcg_kmem_set_active(memcg);
 out:
-       mutex_unlock(&set_limit_mutex);
-       mutex_unlock(&memcg_create_mutex);
-#endif
+       memcg_resume_kmem_account();
+       return err;
+
+out_rmid:
+       ida_simple_remove(&kmem_limited_groups, memcg_id);
+       goto out;
+}
+
+static int memcg_activate_kmem(struct mem_cgroup *memcg,
+                              unsigned long long limit)
+{
+       int ret;
+
+       mutex_lock(&activate_kmem_mutex);
+       ret = __memcg_activate_kmem(memcg, limit);
+       mutex_unlock(&activate_kmem_mutex);
+       return ret;
+}
+
+static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
+                                  unsigned long long val)
+{
+       int ret;
+
+       if (!memcg_kmem_is_active(memcg))
+               ret = memcg_activate_kmem(memcg, val);
+       else
+               ret = res_counter_set_limit(&memcg->kmem, val);
        return ret;
 }
 
-#ifdef CONFIG_MEMCG_KMEM
 static int memcg_propagate_kmem(struct mem_cgroup *memcg)
 {
        int ret = 0;
        struct mem_cgroup *parent = parent_mem_cgroup(memcg);
-       if (!parent)
-               goto out;
 
-       memcg->kmem_account_flags = parent->kmem_account_flags;
-       /*
-        * When that happen, we need to disable the static branch only on those
-        * memcgs that enabled it. To achieve this, we would be forced to
-        * complicate the code by keeping track of which memcgs were the ones
-        * that actually enabled limits, and which ones got it from its
-        * parents.
-        *
-        * It is a lot simpler just to do static_key_slow_inc() on every child
-        * that is accounted.
-        */
-       if (!memcg_kmem_is_active(memcg))
-               goto out;
+       if (!parent)
+               return 0;
 
+       mutex_lock(&activate_kmem_mutex);
        /*
-        * __mem_cgroup_free() will issue static_key_slow_dec() because this
-        * memcg is active already. If the later initialization fails then the
-        * cgroup core triggers the cleanup so we do not have to do it here.
+        * If the parent cgroup is not kmem-active now, it cannot be activated
+        * after this point, because it has at least one child already.
         */
-       static_key_slow_inc(&memcg_kmem_enabled_key);
-
-       mutex_lock(&set_limit_mutex);
-       memcg_stop_kmem_account();
-       ret = memcg_update_cache_sizes(memcg);
-       memcg_resume_kmem_account();
-       mutex_unlock(&set_limit_mutex);
-out:
+       if (memcg_kmem_is_active(parent))
+               ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
+       mutex_unlock(&activate_kmem_mutex);
        return ret;
 }
+#else
+static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
+                                  unsigned long long val)
+{
+       return -EINVAL;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 /*
@@ -5303,7 +5280,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
                else if (type == _MEMSWAP)
                        ret = mem_cgroup_resize_memsw_limit(memcg, val);
                else if (type == _KMEM)
-                       ret = memcg_update_kmem_limit(css, val);
+                       ret = memcg_update_kmem_limit(memcg, val);
                else
                        return -EINVAL;
                break;
@@ -6402,14 +6379,12 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
        struct mem_cgroup *memcg;
-       size_t size = memcg_size();
+       size_t size;
 
-       /* Can be very big if nr_node_ids is very big */
-       if (size < PAGE_SIZE)
-               memcg = kzalloc(size, GFP_KERNEL);
-       else
-               memcg = vzalloc(size);
+       size = sizeof(struct mem_cgroup);
+       size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
 
+       memcg = kzalloc(size, GFP_KERNEL);
        if (!memcg)
                return NULL;
 
@@ -6420,10 +6395,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        return memcg;
 
 out_free:
-       if (size < PAGE_SIZE)
-               kfree(memcg);
-       else
-               vfree(memcg);
+       kfree(memcg);
        return NULL;
 }
 
@@ -6441,7 +6413,6 @@ out_free:
 static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
        int node;
-       size_t size = memcg_size();
 
        mem_cgroup_remove_from_trees(memcg);
 
@@ -6462,10 +6433,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
         * the cgroup_lock.
         */
        disarm_static_keys(memcg);
-       if (size < PAGE_SIZE)
-               kfree(memcg);
-       else
-               vfree(memcg);
+       kfree(memcg);
 }
 
 /*
@@ -6546,7 +6514,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
        struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
-       int error = 0;
 
        if (css->cgroup->id > MEM_CGROUP_ID_MAX)
                return -ENOSPC;
@@ -6581,10 +6548,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
                if (parent != root_mem_cgroup)
                        mem_cgroup_subsys.broken_hierarchy = true;
        }
-
-       error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
        mutex_unlock(&memcg_create_mutex);
-       return error;
+
+       return memcg_init_kmem(memcg, &mem_cgroup_subsys);
 }
 
 /*
@@ -6893,7 +6859,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
        enum mc_target_type ret = MC_TARGET_NONE;
 
        page = pmd_page(pmd);
-       VM_BUG_ON(!page || !PageHead(page));
+       VM_BUG_ON_PAGE(!page || !PageHead(page), page);
        if (!move_anon())
                return ret;
        pc = lookup_page_cgroup(page);
index fabe55046c1d7b9638172aa8e26ecb4f3d6ed4b9..4f08a2d61487f3c45dc01638c31b6aff689ce6e7 100644 (file)
@@ -611,7 +611,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 }
 
 /*
- * Dirty cache page page
+ * Dirty pagecache page
  * Issues: when the error hit a hole page the error is not properly
  * propagated.
  */
@@ -856,14 +856,14 @@ static int page_action(struct page_state *ps, struct page *p,
  * the pages and send SIGBUS to the processes if the data was dirty.
  */
 static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
-                                 int trapno, int flags)
+                                 int trapno, int flags, struct page **hpagep)
 {
        enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        int ret;
        int kill = 1, forcekill;
-       struct page *hpage = compound_head(p);
+       struct page *hpage = *hpagep;
        struct page *ppage;
 
        if (PageReserved(p) || PageSlab(p))
@@ -942,11 +942,14 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                         * We pinned the head page for hwpoison handling,
                         * now we split the thp and we are interested in
                         * the hwpoisoned raw page, so move the refcount
-                        * to it.
+                        * to it. Similarly, page lock is shifted.
                         */
                        if (hpage != p) {
                                put_page(hpage);
                                get_page(p);
+                               lock_page(p);
+                               unlock_page(hpage);
+                               *hpagep = p;
                        }
                        /* THP is split, so ppage should be the real poisoned page. */
                        ppage = p;
@@ -964,17 +967,11 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
        if (kill)
                collect_procs(ppage, &tokill);
 
-       if (hpage != ppage)
-               lock_page(ppage);
-
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
                printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
                                pfn, page_mapcount(ppage));
 
-       if (hpage != ppage)
-               unlock_page(ppage);
-
        /*
         * Now that the dirty bit has been propagated to the
         * struct page and all unmaps done we can decide if
@@ -1193,8 +1190,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        /*
         * Now take care of user space mappings.
         * Abort on fail: __delete_from_page_cache() assumes unmapped page.
+        *
+        * When the raw error page is thp tail page, hpage points to the raw
+        * page after thp split.
         */
-       if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
+       if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)
+           != SWAP_SUCCESS) {
                printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
                res = -EBUSY;
                goto out;
@@ -1585,7 +1586,13 @@ static int __soft_offline_page(struct page *page, int flags)
                ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
                                        MIGRATE_SYNC, MR_MEMORY_FAILURE);
                if (ret) {
-                       putback_lru_pages(&pagelist);
+                       if (!list_empty(&pagelist)) {
+                               list_del(&page->lru);
+                               dec_zone_page_state(page, NR_ISOLATED_ANON +
+                                               page_is_file_cache(page));
+                               putback_lru_page(page);
+                       }
+
                        pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
                                pfn, ret, page->flags);
                        if (ret > 0)
index 6768ce9e57d29b6d8076b11c62c2097662f334d5..be6a0c0d4ae081d48edd26f09d37f67cc70b1c52 100644 (file)
@@ -59,6 +59,7 @@
 #include <linux/gfp.h>
 #include <linux/migrate.h>
 #include <linux/string.h>
+#include <linux/dma-debug.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -288,7 +289,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
                        return 0;
                batch = tlb->active;
        }
-       VM_BUG_ON(batch->nr > batch->max);
+       VM_BUG_ON_PAGE(batch->nr > batch->max, page);
 
        return batch->max - batch->nr;
 }
@@ -670,7 +671,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
                current->comm,
                (long long)pte_val(pte), (long long)pmd_val(*pmd));
        if (page)
-               dump_page(page);
+               dump_page(page, "bad pte");
        printk(KERN_ALERT
                "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
                (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
@@ -2559,6 +2560,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
 
 static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
 {
+       debug_dma_assert_idle(src);
+
        /*
         * If the source page was a PFN mapping, we don't have
         * a "struct page" for it. We do a best-effort copy by
@@ -2699,7 +2702,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                        goto unwritable_page;
                                }
                        } else
-                               VM_BUG_ON(!PageLocked(old_page));
+                               VM_BUG_ON_PAGE(!PageLocked(old_page), old_page);
 
                        /*
                         * Since we dropped the lock we need to revalidate
@@ -3355,7 +3358,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (unlikely(!(ret & VM_FAULT_LOCKED)))
                lock_page(vmf.page);
        else
-               VM_BUG_ON(!PageLocked(vmf.page));
+               VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
 
        /*
         * Should we do an early C-O-W break?
@@ -3392,7 +3395,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                                goto unwritable_page;
                                        }
                                } else
-                                       VM_BUG_ON(!PageLocked(page));
+                                       VM_BUG_ON_PAGE(!PageLocked(page), page);
                                page_mkwrite = 1;
                        }
                }
@@ -4272,11 +4275,20 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS
+
+static struct kmem_cache *page_ptl_cachep;
+
+void __init ptlock_cache_init(void)
+{
+       page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
+                       SLAB_PANIC, NULL);
+}
+
 bool ptlock_alloc(struct page *page)
 {
        spinlock_t *ptl;
 
-       ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+       ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL);
        if (!ptl)
                return false;
        page->ptl = ptl;
@@ -4285,6 +4297,6 @@ bool ptlock_alloc(struct page *page)
 
 void ptlock_free(struct page *page)
 {
-       kfree(page->ptl);
+       kmem_cache_free(page_ptl_cachep, page->ptl);
 }
 #endif
index 489f235502dbdde48f921d998896d78a56b0c1de..a650db29606fe73171df9d7c5326da56bbec500b 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/swap.h>
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
 #include <linux/pagevec.h>
@@ -269,7 +268,7 @@ static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
 }
 
 /* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
- * alloc_bootmem_node_nopanic() */
+ * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
 static int __ref ensure_zone_is_initialized(struct zone *zone,
                        unsigned long start_pfn, unsigned long num_pages)
 {
@@ -1108,17 +1107,18 @@ int __ref add_memory(int nid, u64 start, u64 size)
        if (ret)
                return ret;
 
-       lock_memory_hotplug();
-
        res = register_memory_resource(start, size);
        ret = -EEXIST;
        if (!res)
-               goto out;
+               return ret;
 
        {       /* Stupid hack to suppress address-never-null warning */
                void *p = NODE_DATA(nid);
                new_pgdat = !p;
        }
+
+       lock_memory_hotplug();
+
        new_node = !node_online(nid);
        if (new_node) {
                pgdat = hotadd_new_pgdat(nid, start);
@@ -1310,7 +1310,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 #ifdef CONFIG_DEBUG_VM
                        printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
                               pfn);
-                       dump_page(page);
+                       dump_page(page, "failed to remove from LRU");
 #endif
                        put_page(page);
                        /* Because we don't have big zone->lock. we should
@@ -1446,6 +1446,7 @@ static int __init cmdline_parse_movable_node(char *p)
         * the kernel away from hotpluggable memory.
         */
        memblock_set_bottom_up(true);
+       movable_node_enabled = true;
 #else
        pr_warn("movable_node option not supported\n");
 #endif
index 431fd768fbf3ebda3c0ba4bd5e87158ee939c804..36cb46cddf61aacc1b47f3d925ad58cc7a9926bb 100644 (file)
@@ -1198,10 +1198,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
        }
 
        if (PageHuge(page)) {
-               if (vma)
-                       return alloc_huge_page_noerr(vma, address, 1);
-               else
-                       return NULL;
+               BUG_ON(!vma);
+               return alloc_huge_page_noerr(vma, address, 1);
        }
        /*
         * if !vma, alloc_page_vma() will use task or system default policy
@@ -2667,7 +2665,7 @@ static void __init check_numabalancing_enable(void)
 
        if (nr_node_ids > 1 && !numabalancing_override) {
                printk(KERN_INFO "Enabling automatic NUMA balancing. "
-                       "Configure with numa_balancing= or sysctl");
+                       "Configure with numa_balancing= or the kernel.numa_balancing sysctl");
                set_numabalancing_state(numabalancing_default);
        }
 }
index 9194375b230729fead356e8dd3c8f6bb951415ab..734704f6f29b33dc8256463fcb9de1c23621d057 100644 (file)
@@ -71,29 +71,13 @@ int migrate_prep_local(void)
        return 0;
 }
 
-/*
- * Add isolated pages on the list back to the LRU under page lock
- * to avoid leaking evictable pages back onto unevictable list.
- */
-void putback_lru_pages(struct list_head *l)
-{
-       struct page *page;
-       struct page *page2;
-
-       list_for_each_entry_safe(page, page2, l, lru) {
-               list_del(&page->lru);
-               dec_zone_page_state(page, NR_ISOLATED_ANON +
-                               page_is_file_cache(page));
-                       putback_lru_page(page);
-       }
-}
-
 /*
  * Put previously isolated pages back onto the appropriate lists
  * from where they were once taken off for compaction/migration.
  *
- * This function shall be used instead of putback_lru_pages(),
- * whenever the isolated pageset has been built by isolate_migratepages_range()
+ * This function shall be used whenever the isolated pageset has been
+ * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
+ * and isolate_huge_page().
  */
 void putback_movable_pages(struct list_head *l)
 {
@@ -199,7 +183,12 @@ out:
  */
 static void remove_migration_ptes(struct page *old, struct page *new)
 {
-       rmap_walk(new, remove_migration_pte, old);
+       struct rmap_walk_control rwc = {
+               .rmap_one = remove_migration_pte,
+               .arg = old,
+       };
+
+       rmap_walk(new, &rwc);
 }
 
 /*
@@ -510,7 +499,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
        if (PageUptodate(page))
                SetPageUptodate(newpage);
        if (TestClearPageActive(page)) {
-               VM_BUG_ON(PageUnevictable(page));
+               VM_BUG_ON_PAGE(PageUnevictable(page), page);
                SetPageActive(newpage);
        } else if (TestClearPageUnevictable(page))
                SetPageUnevictable(newpage);
@@ -563,14 +552,6 @@ void migrate_page_copy(struct page *newpage, struct page *page)
  *                    Migration functions
  ***********************************************************/
 
-/* Always fail migration. Used for mappings that are not movable */
-int fail_migrate_page(struct address_space *mapping,
-                       struct page *newpage, struct page *page)
-{
-       return -EIO;
-}
-EXPORT_SYMBOL(fail_migrate_page);
-
 /*
  * Common logic to directly migrate a single page suitable for
  * pages that do not use PagePrivate/PagePrivate2.
@@ -890,7 +871,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         * free the metadata, so the page can be freed.
         */
        if (!page->mapping) {
-               VM_BUG_ON(PageAnon(page));
+               VM_BUG_ON_PAGE(PageAnon(page), page);
                if (page_has_private(page)) {
                        try_to_free_buffers(page);
                        goto uncharge;
@@ -1008,7 +989,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 {
        int rc = 0;
        int *result = NULL;
-       struct page *new_hpage = get_new_page(hpage, private, &result);
+       struct page *new_hpage;
        struct anon_vma *anon_vma = NULL;
 
        /*
@@ -1018,9 +999,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
         * tables or check whether the hugepage is pmd-based or not before
         * kicking migration.
         */
-       if (!hugepage_migration_support(page_hstate(hpage)))
+       if (!hugepage_migration_support(page_hstate(hpage))) {
+               putback_active_hugepage(hpage);
                return -ENOSYS;
+       }
 
+       new_hpage = get_new_page(hpage, private, &result);
        if (!new_hpage)
                return -ENOMEM;
 
@@ -1120,7 +1104,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
                                nr_succeeded++;
                                break;
                        default:
-                               /* Permanent failure */
+                               /*
+                                * Permanent failure (-EBUSY, -ENOSYS, etc.):
+                                * unlike -EAGAIN case, the failed page is
+                                * removed from migration page list and not
+                                * retried in the next outer loop.
+                                */
                                nr_failed++;
                                break;
                        }
@@ -1594,35 +1583,42 @@ bool migrate_ratelimited(int node)
 }
 
 /* Returns true if the node is migrate rate-limited after the update */
-bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
+static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
+                                       unsigned long nr_pages)
 {
-       bool rate_limited = false;
-
        /*
         * Rate-limit the amount of data that is being migrated to a node.
         * Optimal placement is no good if the memory bus is saturated and
         * all the time is being spent migrating!
         */
-       spin_lock(&pgdat->numabalancing_migrate_lock);
        if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
+               spin_lock(&pgdat->numabalancing_migrate_lock);
                pgdat->numabalancing_migrate_nr_pages = 0;
                pgdat->numabalancing_migrate_next_window = jiffies +
                        msecs_to_jiffies(migrate_interval_millisecs);
+               spin_unlock(&pgdat->numabalancing_migrate_lock);
        }
-       if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
-               rate_limited = true;
-       else
-               pgdat->numabalancing_migrate_nr_pages += nr_pages;
-       spin_unlock(&pgdat->numabalancing_migrate_lock);
-       
-       return rate_limited;
+       if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
+               trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
+                                                               nr_pages);
+               return true;
+       }
+
+       /*
+        * This is an unlocked non-atomic update so errors are possible.
+        * The consequences are failing to migrate when we potentiall should
+        * have which is not severe enough to warrant locking. If it is ever
+        * a problem, it can be converted to a per-cpu counter.
+        */
+       pgdat->numabalancing_migrate_nr_pages += nr_pages;
+       return false;
 }
 
-int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 {
        int page_lru;
 
-       VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
+       VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
 
        /* Avoid migrating to a node that is nearly full */
        if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
@@ -1705,7 +1701,12 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
        nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
                                     node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
        if (nr_remaining) {
-               putback_lru_pages(&migratepages);
+               if (!list_empty(&migratepages)) {
+                       list_del(&page->lru);
+                       dec_zone_page_state(page, NR_ISOLATED_ANON +
+                                       page_is_file_cache(page));
+                       putback_lru_page(page);
+               }
                isolated = 0;
        } else
                count_vm_numa_event(NUMA_PAGE_MIGRATE);
@@ -1752,8 +1753,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        if (!new_page)
                goto out_fail;
 
-       page_cpupid_xchg_last(new_page, page_cpupid_last(page));
-
        isolated = numamigrate_isolate_page(pgdat, page);
        if (!isolated) {
                put_page(new_page);
index da2be56a7b8fdd3c8391fd79e758f394dd63157a..101623378fbf636cda807db6deb9bc0ebbf8d6d3 100644 (file)
@@ -225,13 +225,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
 
        end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
 
-       if (is_vm_hugetlb_page(vma)) {
-               mincore_hugetlb_page_range(vma, addr, end, vec);
-               return (end - addr) >> PAGE_SHIFT;
-       }
-
-       end = pmd_addr_end(addr, end);
-
        if (is_vm_hugetlb_page(vma))
                mincore_hugetlb_page_range(vma, addr, end, vec);
        else
index 192e6eebe4f240e4a8ece7cc0e10e902ff29f308..ffadd08b83ddbeb6fe4cccf4f5eaca75298d1693 100644 (file)
@@ -79,8 +79,6 @@ void clear_page_mlock(struct page *page)
  */
 void mlock_vma_page(struct page *page)
 {
-       BUG_ON(!PageLocked(page));
-
        if (!TestSetPageMlocked(page)) {
                mod_zone_page_state(page_zone(page), NR_MLOCK,
                                    hpage_nr_pages(page));
@@ -90,6 +88,26 @@ void mlock_vma_page(struct page *page)
        }
 }
 
+/*
+ * Isolate a page from LRU with optional get_page() pin.
+ * Assumes lru_lock already held and page already pinned.
+ */
+static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
+{
+       if (PageLRU(page)) {
+               struct lruvec *lruvec;
+
+               lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
+               if (getpage)
+                       get_page(page);
+               ClearPageLRU(page);
+               del_page_from_lru_list(page, lruvec, page_lru(page));
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * Finish munlock after successful page isolation
  *
@@ -126,9 +144,9 @@ static void __munlock_isolated_page(struct page *page)
 static void __munlock_isolation_failed(struct page *page)
 {
        if (PageUnevictable(page))
-               count_vm_event(UNEVICTABLE_PGSTRANDED);
+               __count_vm_event(UNEVICTABLE_PGSTRANDED);
        else
-               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+               __count_vm_event(UNEVICTABLE_PGMUNLOCKED);
 }
 
 /**
@@ -152,28 +170,34 @@ static void __munlock_isolation_failed(struct page *page)
 unsigned int munlock_vma_page(struct page *page)
 {
        unsigned int nr_pages;
+       struct zone *zone = page_zone(page);
 
        BUG_ON(!PageLocked(page));
 
-       if (TestClearPageMlocked(page)) {
-               nr_pages = hpage_nr_pages(page);
-               mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
-               if (!isolate_lru_page(page))
-                       __munlock_isolated_page(page);
-               else
-                       __munlock_isolation_failed(page);
-       } else {
-               nr_pages = hpage_nr_pages(page);
-       }
-
        /*
-        * Regardless of the original PageMlocked flag, we determine nr_pages
-        * after touching the flag. This leaves a possible race with a THP page
-        * split, such that a whole THP page was munlocked, but nr_pages == 1.
-        * Returning a smaller mask due to that is OK, the worst that can
-        * happen is subsequent useless scanning of the former tail pages.
-        * The NR_MLOCK accounting can however become broken.
+        * Serialize with any parallel __split_huge_page_refcount() which
+        * might otherwise copy PageMlocked to part of the tail pages before
+        * we clear it in the head page. It also stabilizes hpage_nr_pages().
         */
+       spin_lock_irq(&zone->lru_lock);
+
+       nr_pages = hpage_nr_pages(page);
+       if (!TestClearPageMlocked(page))
+               goto unlock_out;
+
+       __mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
+
+       if (__munlock_isolate_lru_page(page, true)) {
+               spin_unlock_irq(&zone->lru_lock);
+               __munlock_isolated_page(page);
+               goto out;
+       }
+       __munlock_isolation_failed(page);
+
+unlock_out:
+       spin_unlock_irq(&zone->lru_lock);
+
+out:
        return nr_pages - 1;
 }
 
@@ -253,8 +277,8 @@ static int __mlock_posix_error_return(long retval)
 static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
                int *pgrescued)
 {
-       VM_BUG_ON(PageLRU(page));
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(PageLRU(page), page);
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
 
        if (page_mapcount(page) <= 1 && page_evictable(page)) {
                pagevec_add(pvec, page);
@@ -310,34 +334,24 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
                struct page *page = pvec->pages[i];
 
                if (TestClearPageMlocked(page)) {
-                       struct lruvec *lruvec;
-                       int lru;
-
-                       if (PageLRU(page)) {
-                               lruvec = mem_cgroup_page_lruvec(page, zone);
-                               lru = page_lru(page);
-                               /*
-                                * We already have pin from follow_page_mask()
-                                * so we can spare the get_page() here.
-                                */
-                               ClearPageLRU(page);
-                               del_page_from_lru_list(page, lruvec, lru);
-                       } else {
-                               __munlock_isolation_failed(page);
-                               goto skip_munlock;
-                       }
-
-               } else {
-skip_munlock:
                        /*
-                        * We won't be munlocking this page in the next phase
-                        * but we still need to release the follow_page_mask()
-                        * pin. We cannot do it under lru_lock however. If it's
-                        * the last pin, __page_cache_release would deadlock.
+                        * We already have pin from follow_page_mask()
+                        * so we can spare the get_page() here.
                         */
-                       pagevec_add(&pvec_putback, pvec->pages[i]);
-                       pvec->pages[i] = NULL;
+                       if (__munlock_isolate_lru_page(page, false))
+                               continue;
+                       else
+                               __munlock_isolation_failed(page);
                }
+
+               /*
+                * We won't be munlocking this page in the next phase
+                * but we still need to release the follow_page_mask()
+                * pin. We cannot do it under lru_lock however. If it's
+                * the last pin, __page_cache_release() would deadlock.
+                */
+               pagevec_add(&pvec_putback, pvec->pages[i]);
+               pvec->pages[i] = NULL;
        }
        delta_munlocked = -nr + pagevec_count(&pvec_putback);
        __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
@@ -709,19 +723,21 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 
        lru_add_drain_all();    /* flush pagevec */
 
-       down_write(&current->mm->mmap_sem);
        len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
        start &= PAGE_MASK;
 
-       locked = len >> PAGE_SHIFT;
-       locked += current->mm->locked_vm;
-
        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
+       locked = len >> PAGE_SHIFT;
+
+       down_write(&current->mm->mmap_sem);
+
+       locked += current->mm->locked_vm;
 
        /* check against resource limits */
        if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
                error = do_mlock(start, len, 1);
+
        up_write(&current->mm->mmap_sem);
        if (!error)
                error = __mm_populate(start, len, 0);
@@ -732,11 +748,13 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 {
        int ret;
 
-       down_write(&current->mm->mmap_sem);
        len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
        start &= PAGE_MASK;
+
+       down_write(&current->mm->mmap_sem);
        ret = do_mlock(start, len, 0);
        up_write(&current->mm->mmap_sem);
+
        return ret;
 }
 
@@ -781,12 +799,12 @@ SYSCALL_DEFINE1(mlockall, int, flags)
        if (flags & MCL_CURRENT)
                lru_add_drain_all();    /* flush pagevec */
 
-       down_write(&current->mm->mmap_sem);
-
        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
 
        ret = -ENOMEM;
+       down_write(&current->mm->mmap_sem);
+
        if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
            capable(CAP_IPC_LOCK))
                ret = do_mlockall(flags);
index 68562e92d50cba17ba52440f2ae809824a88e783..857a6434e3a58a85467df765ebeab08b8be6f88a 100644 (file)
@@ -202,5 +202,4 @@ static int __init mm_sysfs_init(void)
 
        return 0;
 }
-
-__initcall(mm_sysfs_init);
+pure_initcall(mm_sysfs_init);
index 834b2d785f1e2f2fdce59a608f28a94b02b5d82d..126d8b976bfd6a186c80b5cd3444765eda428e0e 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -86,6 +86,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
 
 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio __read_mostly = 50;        /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
@@ -1190,6 +1191,24 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
        return hint;
 }
 
+static inline int mlock_future_check(struct mm_struct *mm,
+                                    unsigned long flags,
+                                    unsigned long len)
+{
+       unsigned long locked, lock_limit;
+
+       /*  mlock MCL_FUTURE? */
+       if (flags & VM_LOCKED) {
+               locked = len >> PAGE_SHIFT;
+               locked += mm->locked_vm;
+               lock_limit = rlimit(RLIMIT_MEMLOCK);
+               lock_limit >>= PAGE_SHIFT;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       return -EAGAIN;
+       }
+       return 0;
+}
+
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
@@ -1251,16 +1270,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                if (!can_do_mlock())
                        return -EPERM;
 
-       /* mlock MCL_FUTURE? */
-       if (vm_flags & VM_LOCKED) {
-               unsigned long locked, lock_limit;
-               locked = len >> PAGE_SHIFT;
-               locked += mm->locked_vm;
-               lock_limit = rlimit(RLIMIT_MEMLOCK);
-               lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       return -EAGAIN;
-       }
+       if (mlock_future_check(mm, vm_flags, len))
+               return -EAGAIN;
 
        if (file) {
                struct inode *inode = file_inode(file);
@@ -2591,18 +2602,9 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
        if (error & ~PAGE_MASK)
                return error;
 
-       /*
-        * mlock MCL_FUTURE?
-        */
-       if (mm->def_flags & VM_LOCKED) {
-               unsigned long locked, lock_limit;
-               locked = len >> PAGE_SHIFT;
-               locked += mm->locked_vm;
-               lock_limit = rlimit(RLIMIT_MEMLOCK);
-               lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       return -EAGAIN;
-       }
+       error = mlock_future_check(mm, mm->def_flags, len);
+       if (error)
+               return error;
 
        /*
         * mm->mmap_sem is required to protect against another thread
@@ -3140,7 +3142,7 @@ static int init_user_reserve(void)
        sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
        return 0;
 }
-module_init(init_user_reserve)
+subsys_initcall(init_user_reserve);
 
 /*
  * Initialise sysctl_admin_reserve_kbytes.
@@ -3161,7 +3163,7 @@ static int init_admin_reserve(void)
        sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
        return 0;
 }
-module_init(init_admin_reserve)
+subsys_initcall(init_admin_reserve);
 
 /*
  * Reinititalise user and admin reserves if memory is added or removed.
@@ -3231,4 +3233,4 @@ static int __meminit init_reserve_notifier(void)
 
        return 0;
 }
-module_init(init_reserve_notifier)
+subsys_initcall(init_reserve_notifier);
index 93e6089cb456a0f036dd38fc2d5ccbd66143f5c4..41cefdf0aaddc46144187cba72ddf3e2629a9f02 100644 (file)
@@ -329,5 +329,4 @@ static int __init mmu_notifier_init(void)
 {
        return init_srcu_struct(&srcu);
 }
-
-module_init(mmu_notifier_init);
+subsys_initcall(mmu_notifier_init);
index bb53a6591aea1373d6bc74d5bb3e847650af8d24..7332c1785744fa0517a213b489e9c5b6bf350937 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
 #include <linux/perf_event.h>
+#include <linux/ksm.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -63,7 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
                                ptent = *pte;
                                page = vm_normal_page(vma, addr, oldpte);
-                               if (page) {
+                               if (page && !PageKsm(page)) {
                                        if (!pte_numa(oldpte)) {
                                                ptent = pte_mknuma(ptent);
                                                set_pte_at(mm, addr, pte, ptent);
index 2c254d37465549b0230b543cd9ac7c4d60cdf7c1..e2906a5428fd00b1d016ffa5b6bde16c61687746 100644 (file)
@@ -41,11 +41,13 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
        if (limit > memblock.current_limit)
                limit = memblock.current_limit;
 
-       addr = memblock_find_in_range_node(goal, limit, size, align, nid);
+       addr = memblock_find_in_range_node(size, align, goal, limit, nid);
        if (!addr)
                return NULL;
 
-       memblock_reserve(addr, size);
+       if (memblock_reserve(addr, size))
+               return NULL;
+
        ptr = phys_to_virt(addr);
        memset(ptr, 0, size);
        /*
@@ -117,14 +119,22 @@ static unsigned long __init free_low_memory_core_early(void)
        phys_addr_t start, end, size;
        u64 i;
 
-       for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL)
+       for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL)
                count += __free_memory_core(start, end);
 
-       /* free range that is used for reserved array if we allocate it */
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+
+       /* Free memblock.reserved array if it was allocated */
        size = get_allocated_memblock_reserved_regions_info(&start);
        if (size)
                count += __free_memory_core(start, start + size);
 
+       /* Free memblock.memory array if it was allocated */
+       size = get_allocated_memblock_memory_regions_info(&start);
+       if (size)
+               count += __free_memory_core(start, start + size);
+#endif
+
        return count;
 }
 
@@ -161,7 +171,7 @@ unsigned long __init free_all_bootmem(void)
        reset_all_zones_managed_pages();
 
        /*
-        * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
+        * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
         *  because in some case like Node0 doesn't have RAM installed
         *  low ram will be on Node1
         */
@@ -215,7 +225,7 @@ static void * __init ___alloc_bootmem_nopanic(unsigned long size,
 
 restart:
 
-       ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+       ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align, goal, limit);
 
        if (ptr)
                return ptr;
@@ -299,7 +309,7 @@ again:
        if (ptr)
                return ptr;
 
-       ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
+       ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align,
                                        goal, limit);
        if (ptr)
                return ptr;
index fec093adad9c1d65799206805f3d95fe19a1da88..8740213b1647019c9fdbaaf355da3f2e5e632d99 100644 (file)
@@ -60,6 +60,7 @@ unsigned long highest_memmap_pfn;
 struct percpu_counter vm_committed_as;
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50; /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
index 1e4a600a6163645897a42defaf21f437fdf431a6..054ff47c4478ddb6d6ef77cb66f6fd8330bdbea4 100644 (file)
@@ -47,19 +47,21 @@ static DEFINE_SPINLOCK(zone_scan_lock);
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
- * @tsk: task struct of which task to consider
+ * @start: task struct of which task to consider
  * @mask: nodemask passed to page allocator for mempolicy ooms
  *
  * Task eligibility is determined by whether or not a candidate task, @tsk,
  * shares the same mempolicy nodes as current if it is bound by such a policy
  * and whether or not it has the same set of allowed cpuset nodes.
  */
-static bool has_intersects_mems_allowed(struct task_struct *tsk,
+static bool has_intersects_mems_allowed(struct task_struct *start,
                                        const nodemask_t *mask)
 {
-       struct task_struct *start = tsk;
+       struct task_struct *tsk;
+       bool ret = false;
 
-       do {
+       rcu_read_lock();
+       for_each_thread(start, tsk) {
                if (mask) {
                        /*
                         * If this is a mempolicy constrained oom, tsk's
@@ -67,19 +69,20 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
                         * mempolicy intersects current, otherwise it may be
                         * needlessly killed.
                         */
-                       if (mempolicy_nodemask_intersects(tsk, mask))
-                               return true;
+                       ret = mempolicy_nodemask_intersects(tsk, mask);
                } else {
                        /*
                         * This is not a mempolicy constrained oom, so only
                         * check the mems of tsk's cpuset.
                         */
-                       if (cpuset_mems_allowed_intersects(current, tsk))
-                               return true;
+                       ret = cpuset_mems_allowed_intersects(current, tsk);
                }
-       } while_each_thread(start, tsk);
+               if (ret)
+                       break;
+       }
+       rcu_read_unlock();
 
-       return false;
+       return ret;
 }
 #else
 static bool has_intersects_mems_allowed(struct task_struct *tsk,
@@ -97,16 +100,21 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
  */
 struct task_struct *find_lock_task_mm(struct task_struct *p)
 {
-       struct task_struct *t = p;
+       struct task_struct *t;
 
-       do {
+       rcu_read_lock();
+
+       for_each_thread(p, t) {
                task_lock(t);
                if (likely(t->mm))
-                       return t;
+                       goto found;
                task_unlock(t);
-       } while_each_thread(p, t);
+       }
+       t = NULL;
+found:
+       rcu_read_unlock();
 
-       return NULL;
+       return t;
 }
 
 /* return true if the task is not adequate as candidate victim task. */
@@ -301,7 +309,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
        unsigned long chosen_points = 0;
 
        rcu_read_lock();
-       do_each_thread(g, p) {
+       for_each_process_thread(g, p) {
                unsigned int points;
 
                switch (oom_scan_process_thread(p, totalpages, nodemask,
@@ -323,7 +331,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
                        chosen = p;
                        chosen_points = points;
                }
-       } while_each_thread(g, p);
+       }
        if (chosen)
                get_task_struct(chosen);
        rcu_read_unlock();
@@ -406,7 +414,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 {
        struct task_struct *victim = p;
        struct task_struct *child;
-       struct task_struct *t = p;
+       struct task_struct *t;
        struct mm_struct *mm;
        unsigned int victim_points = 0;
        static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -437,7 +445,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
         * still freeing memory.
         */
        read_lock(&tasklist_lock);
-       do {
+       for_each_thread(p, t) {
                list_for_each_entry(child, &t->children, sibling) {
                        unsigned int child_points;
 
@@ -455,13 +463,11 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                                get_task_struct(victim);
                        }
                }
-       } while_each_thread(p, t);
+       }
        read_unlock(&tasklist_lock);
 
-       rcu_read_lock();
        p = find_lock_task_mm(victim);
        if (!p) {
-               rcu_read_unlock();
                put_task_struct(victim);
                return;
        } else if (victim != p) {
@@ -487,6 +493,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
         * That thread will now get access to memory reserves since it has a
         * pending fatal signal.
         */
+       rcu_read_lock();
        for_each_process(p)
                if (p->mm == mm && !same_thread_group(p, victim) &&
                    !(p->flags & PF_KTHREAD)) {
index 5248fe070aa4e9f94b4be087aa8957e16cf16c1f..60cbf9b0860ad554e3abc662162f228cfdc41dd2 100644 (file)
@@ -205,7 +205,7 @@ static char * const zone_names[MAX_NR_ZONES] = {
 };
 
 int min_free_kbytes = 1024;
-int user_min_free_kbytes;
+int user_min_free_kbytes = -1;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -295,7 +295,7 @@ static inline int bad_range(struct zone *zone, struct page *page)
 }
 #endif
 
-static void bad_page(struct page *page)
+static void bad_page(struct page *page, char *reason, unsigned long bad_flags)
 {
        static unsigned long resume;
        static unsigned long nr_shown;
@@ -329,7 +329,7 @@ static void bad_page(struct page *page)
 
        printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
                current->comm, page_to_pfn(page));
-       dump_page(page);
+       dump_page_badflags(page, reason, bad_flags);
 
        print_modules();
        dump_stack();
@@ -383,7 +383,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
        int bad = 0;
 
        if (unlikely(compound_order(page) != order)) {
-               bad_page(page);
+               bad_page(page, "wrong compound order", 0);
                bad++;
        }
 
@@ -392,8 +392,11 @@ static int destroy_compound_page(struct page *page, unsigned long order)
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
 
-               if (unlikely(!PageTail(p) || (p->first_page != page))) {
-                       bad_page(page);
+               if (unlikely(!PageTail(p))) {
+                       bad_page(page, "PageTail not set", 0);
+                       bad++;
+               } else if (unlikely(p->first_page != page)) {
+                       bad_page(page, "first_page not consistent", 0);
                        bad++;
                }
                __ClearPageTail(p);
@@ -506,12 +509,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
                return 0;
 
        if (page_is_guard(buddy) && page_order(buddy) == order) {
-               VM_BUG_ON(page_count(buddy) != 0);
+               VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                return 1;
        }
 
        if (PageBuddy(buddy) && page_order(buddy) == order) {
-               VM_BUG_ON(page_count(buddy) != 0);
+               VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                return 1;
        }
        return 0;
@@ -561,8 +564,8 @@ static inline void __free_one_page(struct page *page,
 
        page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
-       VM_BUG_ON(page_idx & ((1 << order) - 1));
-       VM_BUG_ON(bad_range(zone, page));
+       VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
+       VM_BUG_ON_PAGE(bad_range(zone, page), page);
 
        while (order < MAX_ORDER-1) {
                buddy_idx = __find_buddy_index(page_idx, order);
@@ -618,12 +621,23 @@ out:
 
 static inline int free_pages_check(struct page *page)
 {
-       if (unlikely(page_mapcount(page) |
-               (page->mapping != NULL)  |
-               (atomic_read(&page->_count) != 0) |
-               (page->flags & PAGE_FLAGS_CHECK_AT_FREE) |
-               (mem_cgroup_bad_page_check(page)))) {
-               bad_page(page);
+       char *bad_reason = NULL;
+       unsigned long bad_flags = 0;
+
+       if (unlikely(page_mapcount(page)))
+               bad_reason = "nonzero mapcount";
+       if (unlikely(page->mapping != NULL))
+               bad_reason = "non-NULL mapping";
+       if (unlikely(atomic_read(&page->_count) != 0))
+               bad_reason = "nonzero _count";
+       if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
+               bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
+               bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
+       }
+       if (unlikely(mem_cgroup_bad_page_check(page)))
+               bad_reason = "cgroup check failed";
+       if (unlikely(bad_reason)) {
+               bad_page(page, bad_reason, bad_flags);
                return 1;
        }
        page_cpupid_reset_last(page);
@@ -813,7 +827,7 @@ static inline void expand(struct zone *zone, struct page *page,
                area--;
                high--;
                size >>= 1;
-               VM_BUG_ON(bad_range(zone, &page[size]));
+               VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
                if (high < debug_guardpage_minorder()) {
@@ -843,12 +857,23 @@ static inline void expand(struct zone *zone, struct page *page,
  */
 static inline int check_new_page(struct page *page)
 {
-       if (unlikely(page_mapcount(page) |
-               (page->mapping != NULL)  |
-               (atomic_read(&page->_count) != 0)  |
-               (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
-               (mem_cgroup_bad_page_check(page)))) {
-               bad_page(page);
+       char *bad_reason = NULL;
+       unsigned long bad_flags = 0;
+
+       if (unlikely(page_mapcount(page)))
+               bad_reason = "nonzero mapcount";
+       if (unlikely(page->mapping != NULL))
+               bad_reason = "non-NULL mapping";
+       if (unlikely(atomic_read(&page->_count) != 0))
+               bad_reason = "nonzero _count";
+       if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
+               bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
+               bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
+       }
+       if (unlikely(mem_cgroup_bad_page_check(page)))
+               bad_reason = "cgroup check failed";
+       if (unlikely(bad_reason)) {
+               bad_page(page, bad_reason, bad_flags);
                return 1;
        }
        return 0;
@@ -865,8 +890,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
        }
 
        set_page_private(page, 0);
-       set_page_refcounted(page);
-
        arch_alloc_page(page, order);
        kernel_map_pages(page, 1 << order, 1);
 
@@ -876,6 +899,16 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
        if (order && (gfp_flags & __GFP_COMP))
                prep_compound_page(page, order);
 
+       /*
+        * Make sure the caller of get_page_unless_zero() will see the
+        * fully initialized page. Say, to ensure that compound_lock()
+        * can't race with the non-atomic __SetPage*() above.
+        */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       smp_wmb();
+#endif
+       set_page_refcounted(page);
+
        return 0;
 }
 
@@ -955,7 +988,7 @@ int move_freepages(struct zone *zone,
 
        for (page = start_page; page <= end_page;) {
                /* Make sure we are not inadvertently changing nodes */
-               VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone));
+               VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
 
                if (!pfn_valid_within(page_to_pfn(page))) {
                        page++;
@@ -1404,8 +1437,8 @@ void split_page(struct page *page, unsigned int order)
 {
        int i;
 
-       VM_BUG_ON(PageCompound(page));
-       VM_BUG_ON(!page_count(page));
+       VM_BUG_ON_PAGE(PageCompound(page), page);
+       VM_BUG_ON_PAGE(!page_count(page), page);
 
 #ifdef CONFIG_KMEMCHECK
        /*
@@ -1552,7 +1585,7 @@ again:
        zone_statistics(preferred_zone, zone, gfp_flags);
        local_irq_restore(flags);
 
-       VM_BUG_ON(bad_range(zone, page));
+       VM_BUG_ON_PAGE(bad_range(zone, page), page);
        if (prep_new_page(page, order, gfp_flags))
                goto again;
        return page;
@@ -2071,13 +2104,6 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
            debug_guardpage_minorder() > 0)
                return;
 
-       /*
-        * Walking all memory to count page types is very expensive and should
-        * be inhibited in non-blockable contexts.
-        */
-       if (!(gfp_mask & __GFP_WAIT))
-               filter |= SHOW_MEM_FILTER_PAGE_COUNT;
-
        /*
         * This documents exceptions given to allocations in certain
         * contexts that are allowed to allocate outside current's set
@@ -2242,10 +2268,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                                preferred_zone, migratetype);
                if (page) {
                        preferred_zone->compact_blockskip_flush = false;
-                       preferred_zone->compact_considered = 0;
-                       preferred_zone->compact_defer_shift = 0;
-                       if (order >= preferred_zone->compact_order_failed)
-                               preferred_zone->compact_order_failed = order + 1;
+                       compaction_defer_reset(preferred_zone, order, true);
                        count_vm_event(COMPACTSUCCESS);
                        return page;
                }
@@ -2535,8 +2558,15 @@ rebalance:
        }
 
        /* Atomic allocations - we can't balance anything */
-       if (!wait)
+       if (!wait) {
+               /*
+                * All existing users of the deprecated __GFP_NOFAIL are
+                * blockable, so warn of any new users that actually allow this
+                * type of allocation to fail.
+                */
+               WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL);
                goto nopage;
+       }
 
        /* Avoid recursion of direct reclaim */
        if (current->flags & PF_MEMALLOC)
@@ -2628,6 +2658,11 @@ rebalance:
                                                pages_reclaimed)) {
                /* Wait for some write requests to complete then retry */
                wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
+
+               /* Allocations that cannot fail must allocate from somewhere */
+               if (gfp_mask & __GFP_NOFAIL)
+                       alloc_flags |= ALLOC_HARDER;
+
                goto rebalance;
        } else {
                /*
@@ -3901,6 +3936,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        struct page *page;
        unsigned long block_migratetype;
        int reserve;
+       int old_reserve;
 
        /*
         * Get the start pfn, end pfn and the number of blocks to reserve
@@ -3922,6 +3958,12 @@ static void setup_zone_migrate_reserve(struct zone *zone)
         * future allocation of hugepages at runtime.
         */
        reserve = min(2, reserve);
+       old_reserve = zone->nr_migrate_reserve_block;
+
+       /* When memory hot-add, we almost always need to do nothing */
+       if (reserve == old_reserve)
+               return;
+       zone->nr_migrate_reserve_block = reserve;
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
                if (!pfn_valid(pfn))
@@ -3959,6 +4001,12 @@ static void setup_zone_migrate_reserve(struct zone *zone)
                                reserve--;
                                continue;
                        }
+               } else if (!old_reserve) {
+                       /*
+                        * At boot time we don't need to scan the whole zone
+                        * for turning off MIGRATE_RESERVE.
+                        */
+                       break;
                }
 
                /*
@@ -4209,7 +4257,6 @@ static noinline __init_refok
 int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 {
        int i;
-       struct pglist_data *pgdat = zone->zone_pgdat;
        size_t alloc_size;
 
        /*
@@ -4225,7 +4272,8 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 
        if (!slab_is_available()) {
                zone->wait_table = (wait_queue_head_t *)
-                       alloc_bootmem_node_nopanic(pgdat, alloc_size);
+                       memblock_virt_alloc_node_nopanic(
+                               alloc_size, zone->zone_pgdat->node_id);
        } else {
                /*
                 * This case means that a zone whose size was 0 gets new memory
@@ -4345,13 +4393,14 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
 #endif
 
 /**
- * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
+ * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range
  * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
- * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
+ * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid
  *
  * If an architecture guarantees that all ranges registered with
  * add_active_ranges() contain no holes and may be freed, this
- * this function may be used instead of calling free_bootmem() manually.
+ * this function may be used instead of calling memblock_free_early_nid()
+ * manually.
  */
 void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
 {
@@ -4363,9 +4412,9 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
                end_pfn = min(end_pfn, max_low_pfn);
 
                if (start_pfn < end_pfn)
-                       free_bootmem_node(NODE_DATA(this_nid),
-                                         PFN_PHYS(start_pfn),
-                                         (end_pfn - start_pfn) << PAGE_SHIFT);
+                       memblock_free_early_nid(PFN_PHYS(start_pfn),
+                                       (end_pfn - start_pfn) << PAGE_SHIFT,
+                                       this_nid);
        }
 }
 
@@ -4636,8 +4685,9 @@ static void __init setup_usemap(struct pglist_data *pgdat,
        unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
        zone->pageblock_flags = NULL;
        if (usemapsize)
-               zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
-                                                                  usemapsize);
+               zone->pageblock_flags =
+                       memblock_virt_alloc_node_nopanic(usemapsize,
+                                                        pgdat->node_id);
 }
 #else
 static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
@@ -4831,7 +4881,8 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
                size =  (end - start) * sizeof(struct page);
                map = alloc_remap(pgdat->node_id, size);
                if (!map)
-                       map = alloc_bootmem_node_nopanic(pgdat, size);
+                       map = memblock_virt_alloc_node_nopanic(size,
+                                                              pgdat->node_id);
                pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
        }
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -5012,9 +5063,33 @@ static void __init find_zone_movable_pfns_for_nodes(void)
        nodemask_t saved_node_state = node_states[N_MEMORY];
        unsigned long totalpages = early_calculate_totalpages();
        int usable_nodes = nodes_weight(node_states[N_MEMORY]);
+       struct memblock_type *type = &memblock.memory;
+
+       /* Need to find movable_zone earlier when movable_node is specified. */
+       find_usable_zone_for_movable();
 
        /*
-        * If movablecore was specified, calculate what size of
+        * If movable_node is specified, ignore kernelcore and movablecore
+        * options.
+        */
+       if (movable_node_is_enabled()) {
+               for (i = 0; i < type->cnt; i++) {
+                       if (!memblock_is_hotpluggable(&type->regions[i]))
+                               continue;
+
+                       nid = type->regions[i].nid;
+
+                       usable_startpfn = PFN_DOWN(type->regions[i].base);
+                       zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
+                               min(usable_startpfn, zone_movable_pfn[nid]) :
+                               usable_startpfn;
+               }
+
+               goto out2;
+       }
+
+       /*
+        * If movablecore=nn[KMG] was specified, calculate what size of
         * kernelcore that corresponds so that memory usable for
         * any allocation type is evenly spread. If both kernelcore
         * and movablecore are specified, then the value of kernelcore
@@ -5040,7 +5115,6 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                goto out;
 
        /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
-       find_usable_zone_for_movable();
        usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
 
 restart:
@@ -5131,6 +5205,7 @@ restart:
        if (usable_nodes && required_kernelcore > usable_nodes)
                goto restart;
 
+out2:
        /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
        for (nid = 0; nid < MAX_NUMNODES; nid++)
                zone_movable_pfn[nid] =
@@ -5692,7 +5767,12 @@ module_init(init_per_zone_wmark_min)
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
-       proc_dointvec(table, write, buffer, length, ppos);
+       int rc;
+
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+       if (rc)
+               return rc;
+
        if (write) {
                user_min_free_kbytes = min_free_kbytes;
                setup_per_zone_wmarks();
@@ -5857,7 +5937,7 @@ void *__init alloc_large_system_hash(const char *tablename,
        do {
                size = bucketsize << log2qty;
                if (flags & HASH_EARLY)
-                       table = alloc_bootmem_nopanic(size);
+                       table = memblock_virt_alloc_nopanic(size, 0);
                else if (hashdist)
                        table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
                else {
@@ -5959,7 +6039,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
        pfn = page_to_pfn(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
-       VM_BUG_ON(!zone_spans_pfn(zone, pfn));
+       VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
 
        for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
                if (flags & value)
@@ -6457,12 +6537,24 @@ static void dump_page_flags(unsigned long flags)
        printk(")\n");
 }
 
-void dump_page(struct page *page)
+void dump_page_badflags(struct page *page, char *reason, unsigned long badflags)
 {
        printk(KERN_ALERT
               "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
                page, atomic_read(&page->_count), page_mapcount(page),
                page->mapping, page->index);
        dump_page_flags(page->flags);
+       if (reason)
+               pr_alert("page dumped because: %s\n", reason);
+       if (page->flags & badflags) {
+               pr_alert("bad because of flags:\n");
+               dump_page_flags(page->flags & badflags);
+       }
        mem_cgroup_print_bad_page(page);
 }
+
+void dump_page(struct page *page, char *reason)
+{
+       dump_page_badflags(page, reason, 0);
+}
+EXPORT_SYMBOL_GPL(dump_page);
index 3bd0b8e6ab12f43fc3f99efb44c8998a510fd2cb..cfd162882c00a157ce30ada845361593982c1b27 100644 (file)
@@ -54,8 +54,9 @@ static int __init alloc_node_page_cgroup(int nid)
 
        table_size = sizeof(struct page_cgroup) * nr_pages;
 
-       base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
-                       table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+       base = memblock_virt_alloc_try_nid_nopanic(
+                       table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+                       BOOTMEM_ALLOC_ACCESSIBLE, nid);
        if (!base)
                return -ENOMEM;
        NODE_DATA(nid)->node_page_cgroup = base;
index f14eded987fac8276e3e3da5ff11d260a8ba44cf..7c59ef681381bb7afeef2cf5207d269e9a95c1f8 100644 (file)
@@ -320,8 +320,8 @@ int swap_readpage(struct page *page)
        int ret = 0;
        struct swap_info_struct *sis = page_swap_info(page);
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(PageUptodate(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(PageUptodate(page), page);
        if (frontswap_load(page) == 0) {
                SetPageUptodate(page);
                unlock_page(page);
index afbf352ae58041e2defbacdf433d26c2ff98975e..036cfe07050f65eee962f51e6223321f9e01e132 100644 (file)
@@ -1063,7 +1063,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
                          __alignof__(ai->groups[0].cpu_map[0]));
        ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
 
-       ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
+       ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), 0);
        if (!ptr)
                return NULL;
        ai = ptr;
@@ -1088,7 +1088,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
  */
 void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 {
-       free_bootmem(__pa(ai), ai->__ai_size);
+       memblock_free_early(__pa(ai), ai->__ai_size);
 }
 
 /**
@@ -1246,10 +1246,12 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
        PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
        /* process group information and build config tables accordingly */
-       group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
-       group_sizes = alloc_bootmem(ai->nr_groups * sizeof(group_sizes[0]));
-       unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
-       unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
+       group_offsets = memblock_virt_alloc(ai->nr_groups *
+                                            sizeof(group_offsets[0]), 0);
+       group_sizes = memblock_virt_alloc(ai->nr_groups *
+                                          sizeof(group_sizes[0]), 0);
+       unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
+       unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
 
        for (cpu = 0; cpu < nr_cpu_ids; cpu++)
                unit_map[cpu] = UINT_MAX;
@@ -1311,7 +1313,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
         * empty chunks.
         */
        pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
-       pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
+       pcpu_slot = memblock_virt_alloc(
+                       pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
        for (i = 0; i < pcpu_nr_slots; i++)
                INIT_LIST_HEAD(&pcpu_slot[i]);
 
@@ -1322,7 +1325,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
         * covers static area + reserved area (mostly used for module
         * static percpu allocation).
         */
-       schunk = alloc_bootmem(pcpu_chunk_struct_size);
+       schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
        INIT_LIST_HEAD(&schunk->list);
        schunk->base_addr = base_addr;
        schunk->map = smap;
@@ -1346,7 +1349,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
        /* init dynamic chunk if necessary */
        if (dyn_size) {
-               dchunk = alloc_bootmem(pcpu_chunk_struct_size);
+               dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
                INIT_LIST_HEAD(&dchunk->list);
                dchunk->base_addr = base_addr;
                dchunk->map = dmap;
@@ -1626,7 +1629,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
        size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
        areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-       areas = alloc_bootmem_nopanic(areas_size);
+       areas = memblock_virt_alloc_nopanic(areas_size, 0);
        if (!areas) {
                rc = -ENOMEM;
                goto out_free;
@@ -1712,7 +1715,7 @@ out_free_areas:
 out_free:
        pcpu_free_alloc_info(ai);
        if (areas)
-               free_bootmem(__pa(areas), areas_size);
+               memblock_free_early(__pa(areas), areas_size);
        return rc;
 }
 #endif /* BUILD_EMBED_FIRST_CHUNK */
@@ -1760,7 +1763,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
        /* unaligned allocations can't be freed, round up to page size */
        pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
                               sizeof(pages[0]));
-       pages = alloc_bootmem(pages_size);
+       pages = memblock_virt_alloc(pages_size, 0);
 
        /* allocate pages */
        j = 0;
@@ -1823,7 +1826,7 @@ enomem:
                free_fn(page_address(pages[j]), PAGE_SIZE);
        rc = -ENOMEM;
 out_free_ar:
-       free_bootmem(__pa(pages), pages_size);
+       memblock_free_early(__pa(pages), pages_size);
        pcpu_free_alloc_info(ai);
        return rc;
 }
@@ -1848,12 +1851,13 @@ EXPORT_SYMBOL(__per_cpu_offset);
 static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
                                       size_t align)
 {
-       return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
+       return  memblock_virt_alloc_from_nopanic(
+                       size, align, __pa(MAX_DMA_ADDRESS));
 }
 
 static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
 {
-       free_bootmem(__pa(ptr), size);
+       memblock_free_early(__pa(ptr), size);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -1896,7 +1900,9 @@ void __init setup_per_cpu_areas(void)
        void *fc;
 
        ai = pcpu_alloc_alloc_info(1, 1);
-       fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+       fc = memblock_virt_alloc_from_nopanic(unit_size,
+                                             PAGE_SIZE,
+                                             __pa(MAX_DMA_ADDRESS));
        if (!ai || !fc)
                panic("Failed to allocate memory for percpu areas.");
        /* kmemleak tracks the percpu allocations separately */
index 068522d8502a58e9465a963e68c37ce4ccf635d7..2dcd3353c3f679d0da34c2fefe750c41c257b12f 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
        return 1;
 }
 
+struct page_referenced_arg {
+       int mapcount;
+       int referenced;
+       unsigned long vm_flags;
+       struct mem_cgroup *memcg;
+};
 /*
- * Subfunctions of page_referenced: page_referenced_one called
- * repeatedly from either page_referenced_anon or page_referenced_file.
+ * arg: page_referenced_arg will be passed
  */
 int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-                       unsigned long address, unsigned int *mapcount,
-                       unsigned long *vm_flags)
+                       unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
        int referenced = 0;
+       struct page_referenced_arg *pra = arg;
 
        if (unlikely(PageTransHuge(page))) {
                pmd_t *pmd;
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pmd = page_check_address_pmd(page, mm, address,
                                             PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
                if (!pmd)
-                       goto out;
+                       return SWAP_AGAIN;
 
                if (vma->vm_flags & VM_LOCKED) {
                        spin_unlock(ptl);
-                       *mapcount = 0;  /* break early from loop */
-                       *vm_flags |= VM_LOCKED;
-                       goto out;
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
                }
 
                /* go ahead even if the pmd is pmd_trans_splitting() */
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                 */
                pte = page_check_address(page, mm, address, &ptl, 0);
                if (!pte)
-                       goto out;
+                       return SWAP_AGAIN;
 
                if (vma->vm_flags & VM_LOCKED) {
                        pte_unmap_unlock(pte, ptl);
-                       *mapcount = 0;  /* break early from loop */
-                       *vm_flags |= VM_LOCKED;
-                       goto out;
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
                }
 
                if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pte_unmap_unlock(pte, ptl);
        }
 
-       (*mapcount)--;
-
-       if (referenced)
-               *vm_flags |= vma->vm_flags;
-out:
-       return referenced;
-}
-
-static int page_referenced_anon(struct page *page,
-                               struct mem_cgroup *memcg,
-                               unsigned long *vm_flags)
-{
-       unsigned int mapcount;
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff;
-       struct anon_vma_chain *avc;
-       int referenced = 0;
-
-       anon_vma = page_lock_anon_vma_read(page);
-       if (!anon_vma)
-               return referenced;
-
-       mapcount = page_mapcount(page);
-       pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
-               struct vm_area_struct *vma = avc->vma;
-               unsigned long address = vma_address(page, vma);
-               /*
-                * If we are reclaiming on behalf of a cgroup, skip
-                * counting on behalf of references from different
-                * cgroups
-                */
-               if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                       continue;
-               referenced += page_referenced_one(page, vma, address,
-                                                 &mapcount, vm_flags);
-               if (!mapcount)
-                       break;
+       if (referenced) {
+               pra->referenced++;
+               pra->vm_flags |= vma->vm_flags;
        }
 
-       page_unlock_anon_vma_read(anon_vma);
-       return referenced;
+       pra->mapcount--;
+       if (!pra->mapcount)
+               return SWAP_SUCCESS; /* To break the loop */
+
+       return SWAP_AGAIN;
 }
 
-/**
- * page_referenced_file - referenced check for object-based rmap
- * @page: the page we're checking references on.
- * @memcg: target memory control group
- * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
- *
- * For an object-based mapped page, find all the places it is mapped and
- * check/clear the referenced flag.  This is done by following the page->mapping
- * pointer, then walking the chain of vmas it holds.  It returns the number
- * of references it found.
- *
- * This function is only called from page_referenced for object-based pages.
- */
-static int page_referenced_file(struct page *page,
-                               struct mem_cgroup *memcg,
-                               unsigned long *vm_flags)
+static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
 {
-       unsigned int mapcount;
-       struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
-       int referenced = 0;
+       struct page_referenced_arg *pra = arg;
+       struct mem_cgroup *memcg = pra->memcg;
 
-       /*
-        * The caller's checks on page->mapping and !PageAnon have made
-        * sure that this is a file page: the check for page->mapping
-        * excludes the case just before it gets set on an anon page.
-        */
-       BUG_ON(PageAnon(page));
-
-       /*
-        * The page lock not only makes sure that page->mapping cannot
-        * suddenly be NULLified by truncation, it makes sure that the
-        * structure at mapping cannot be freed and reused yet,
-        * so we can safely take mapping->i_mmap_mutex.
-        */
-       BUG_ON(!PageLocked(page));
-
-       mutex_lock(&mapping->i_mmap_mutex);
-
-       /*
-        * i_mmap_mutex does not stabilize mapcount at all, but mapcount
-        * is more likely to be accurate if we note it after spinning.
-        */
-       mapcount = page_mapcount(page);
-
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long address = vma_address(page, vma);
-               /*
-                * If we are reclaiming on behalf of a cgroup, skip
-                * counting on behalf of references from different
-                * cgroups
-                */
-               if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                       continue;
-               referenced += page_referenced_one(page, vma, address,
-                                                 &mapcount, vm_flags);
-               if (!mapcount)
-                       break;
-       }
+       if (!mm_match_cgroup(vma->vm_mm, memcg))
+               return true;
 
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return referenced;
+       return false;
 }
 
 /**
@@ -851,41 +768,57 @@ int page_referenced(struct page *page,
                    struct mem_cgroup *memcg,
                    unsigned long *vm_flags)
 {
-       int referenced = 0;
+       int ret;
        int we_locked = 0;
+       struct page_referenced_arg pra = {
+               .mapcount = page_mapcount(page),
+               .memcg = memcg,
+       };
+       struct rmap_walk_control rwc = {
+               .rmap_one = page_referenced_one,
+               .arg = (void *)&pra,
+               .anon_lock = page_lock_anon_vma_read,
+       };
 
        *vm_flags = 0;
-       if (page_mapped(page) && page_rmapping(page)) {
-               if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
-                       we_locked = trylock_page(page);
-                       if (!we_locked) {
-                               referenced++;
-                               goto out;
-                       }
-               }
-               if (unlikely(PageKsm(page)))
-                       referenced += page_referenced_ksm(page, memcg,
-                                                               vm_flags);
-               else if (PageAnon(page))
-                       referenced += page_referenced_anon(page, memcg,
-                                                               vm_flags);
-               else if (page->mapping)
-                       referenced += page_referenced_file(page, memcg,
-                                                               vm_flags);
-               if (we_locked)
-                       unlock_page(page);
+       if (!page_mapped(page))
+               return 0;
+
+       if (!page_rmapping(page))
+               return 0;
+
+       if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
+               we_locked = trylock_page(page);
+               if (!we_locked)
+                       return 1;
        }
-out:
-       return referenced;
+
+       /*
+        * If we are reclaiming on behalf of a cgroup, skip
+        * counting on behalf of references from different
+        * cgroups
+        */
+       if (memcg) {
+               rwc.invalid_vma = invalid_page_referenced_vma;
+       }
+
+       ret = rmap_walk(page, &rwc);
+       *vm_flags = pra.vm_flags;
+
+       if (we_locked)
+               unlock_page(page);
+
+       return pra.referenced;
 }
 
 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
-                           unsigned long address)
+                           unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte;
        spinlock_t *ptl;
        int ret = 0;
+       int *cleaned = arg;
 
        pte = page_check_address(page, mm, address, &ptl, 1);
        if (!pte)
@@ -904,44 +837,44 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
        pte_unmap_unlock(pte, ptl);
 
-       if (ret)
+       if (ret) {
                mmu_notifier_invalidate_page(mm, address);
+               (*cleaned)++;
+       }
 out:
-       return ret;
+       return SWAP_AGAIN;
 }
 
-static int page_mkclean_file(struct address_space *mapping, struct page *page)
+static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
 {
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
-       int ret = 0;
-
-       BUG_ON(PageAnon(page));
+       if (vma->vm_flags & VM_SHARED)
+               return 0;
 
-       mutex_lock(&mapping->i_mmap_mutex);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               if (vma->vm_flags & VM_SHARED) {
-                       unsigned long address = vma_address(page, vma);
-                       ret += page_mkclean_one(page, vma, address);
-               }
-       }
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return ret;
+       return 1;
 }
 
 int page_mkclean(struct page *page)
 {
-       int ret = 0;
+       int cleaned = 0;
+       struct address_space *mapping;
+       struct rmap_walk_control rwc = {
+               .arg = (void *)&cleaned,
+               .rmap_one = page_mkclean_one,
+               .invalid_vma = invalid_mkclean_vma,
+       };
 
        BUG_ON(!PageLocked(page));
 
-       if (page_mapped(page)) {
-               struct address_space *mapping = page_mapping(page);
-               if (mapping)
-                       ret = page_mkclean_file(mapping, page);
-       }
+       if (!page_mapped(page))
+               return 0;
 
-       return ret;
+       mapping = page_mapping(page);
+       if (!mapping)
+               return 0;
+
+       rmap_walk(page, &rwc);
+
+       return cleaned;
 }
 EXPORT_SYMBOL_GPL(page_mkclean);
 
@@ -961,9 +894,9 @@ void page_move_anon_rmap(struct page *page,
 {
        struct anon_vma *anon_vma = vma->anon_vma;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON(!anon_vma);
-       VM_BUG_ON(page->index != linear_page_index(vma, address));
+       VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
 
        anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
        page->mapping = (struct address_space *) anon_vma;
@@ -1062,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page,
        if (unlikely(PageKsm(page)))
                return;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        /* address might be in next vma when migration races vma_adjust */
        if (first)
                __page_set_anon_rmap(page, vma, address, exclusive);
@@ -1177,17 +1110,17 @@ out:
 }
 
 /*
- * Subfunctions of try_to_unmap: try_to_unmap_one called
- * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
+ * @arg: enum ttu_flags will be passed to this argument
  */
 int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-                    unsigned long address, enum ttu_flags flags)
+                    unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte;
        pte_t pteval;
        spinlock_t *ptl;
        int ret = SWAP_AGAIN;
+       enum ttu_flags flags = (enum ttu_flags)arg;
 
        pte = page_check_address(page, mm, address, &ptl, 0);
        if (!pte)
@@ -1426,124 +1359,18 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
        return ret;
 }
 
-bool is_vma_temporary_stack(struct vm_area_struct *vma)
-{
-       int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
-
-       if (!maybe_stack)
-               return false;
-
-       if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
-                                               VM_STACK_INCOMPLETE_SETUP)
-               return true;
-
-       return false;
-}
-
-/**
- * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
- * rmap method
- * @page: the page to unmap/unlock
- * @flags: action and flags
- *
- * Find all the mappings of a page using the mapping pointer and the vma chains
- * contained in the anon_vma struct it points to.
- *
- * This function is only called from try_to_unmap/try_to_munlock for
- * anonymous pages.
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
- * where the page was found will be held for write.  So, we won't recheck
- * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
- * 'LOCKED.
- */
-static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
-{
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff;
-       struct anon_vma_chain *avc;
-       int ret = SWAP_AGAIN;
-
-       anon_vma = page_lock_anon_vma_read(page);
-       if (!anon_vma)
-               return ret;
-
-       pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
-               struct vm_area_struct *vma = avc->vma;
-               unsigned long address;
-
-               /*
-                * During exec, a temporary VMA is setup and later moved.
-                * The VMA is moved under the anon_vma lock but not the
-                * page tables leading to a race where migration cannot
-                * find the migration ptes. Rather than increasing the
-                * locking requirements of exec(), migration skips
-                * temporary VMAs until after exec() completes.
-                */
-               if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
-                               is_vma_temporary_stack(vma))
-                       continue;
-
-               address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, flags);
-               if (ret != SWAP_AGAIN || !page_mapped(page))
-                       break;
-       }
-
-       page_unlock_anon_vma_read(anon_vma);
-       return ret;
-}
-
-/**
- * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
- * @page: the page to unmap/unlock
- * @flags: action and flags
- *
- * Find all the mappings of a page using the mapping pointer and the vma chains
- * contained in the address_space struct it points to.
- *
- * This function is only called from try_to_unmap/try_to_munlock for
- * object-based pages.
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
- * where the page was found will be held for write.  So, we won't recheck
- * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
- * 'LOCKED.
- */
-static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
+static int try_to_unmap_nonlinear(struct page *page,
+               struct address_space *mapping, struct vm_area_struct *vma)
 {
-       struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
        unsigned long cursor;
        unsigned long max_nl_cursor = 0;
        unsigned long max_nl_size = 0;
        unsigned int mapcount;
 
-       if (PageHuge(page))
-               pgoff = page->index << compound_order(page);
-
-       mutex_lock(&mapping->i_mmap_mutex);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, flags);
-               if (ret != SWAP_AGAIN || !page_mapped(page))
-                       goto out;
-       }
-
-       if (list_empty(&mapping->i_mmap_nonlinear))
-               goto out;
-
-       /*
-        * We don't bother to try to find the munlocked page in nonlinears.
-        * It's costly. Instead, later, page reclaim logic may call
-        * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
-        */
-       if (TTU_ACTION(flags) == TTU_MUNLOCK)
-               goto out;
+       list_for_each_entry(vma,
+               &mapping->i_mmap_nonlinear, shared.nonlinear) {
 
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
                cursor = (unsigned long) vma->vm_private_data;
                if (cursor > max_nl_cursor)
                        max_nl_cursor = cursor;
@@ -1553,8 +1380,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
        }
 
        if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
-               ret = SWAP_FAIL;
-               goto out;
+               return SWAP_FAIL;
        }
 
        /*
@@ -1566,7 +1392,8 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
         */
        mapcount = page_mapcount(page);
        if (!mapcount)
-               goto out;
+               return ret;
+
        cond_resched();
 
        max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
@@ -1574,10 +1401,11 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
                max_nl_cursor = CLUSTER_SIZE;
 
        do {
-               list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
+               list_for_each_entry(vma,
+                       &mapping->i_mmap_nonlinear, shared.nonlinear) {
+
                        cursor = (unsigned long) vma->vm_private_data;
-                       while ( cursor < max_nl_cursor &&
+                       while (cursor < max_nl_cursor &&
                                cursor < vma->vm_end - vma->vm_start) {
                                if (try_to_unmap_cluster(cursor, &mapcount,
                                                vma, page) == SWAP_MLOCK)
@@ -1585,7 +1413,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
                                cursor += CLUSTER_SIZE;
                                vma->vm_private_data = (void *) cursor;
                                if ((int)mapcount <= 0)
-                                       goto out;
+                                       return ret;
                        }
                        vma->vm_private_data = (void *) max_nl_cursor;
                }
@@ -1600,11 +1428,34 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
         */
        list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
                vma->vm_private_data = NULL;
-out:
-       mutex_unlock(&mapping->i_mmap_mutex);
+
        return ret;
 }
 
+bool is_vma_temporary_stack(struct vm_area_struct *vma)
+{
+       int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
+
+       if (!maybe_stack)
+               return false;
+
+       if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
+                                               VM_STACK_INCOMPLETE_SETUP)
+               return true;
+
+       return false;
+}
+
+static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
+{
+       return is_vma_temporary_stack(vma);
+}
+
+static int page_not_mapped(struct page *page)
+{
+       return !page_mapped(page);
+};
+
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
@@ -1622,16 +1473,29 @@ out:
 int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
        int ret;
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)flags,
+               .done = page_not_mapped,
+               .file_nonlinear = try_to_unmap_nonlinear,
+               .anon_lock = page_lock_anon_vma_read,
+       };
 
-       BUG_ON(!PageLocked(page));
-       VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
+       VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
+
+       /*
+        * During exec, a temporary VMA is setup and later moved.
+        * The VMA is moved under the anon_vma lock but not the
+        * page tables leading to a race where migration cannot
+        * find the migration ptes. Rather than increasing the
+        * locking requirements of exec(), migration skips
+        * temporary VMAs until after exec() completes.
+        */
+       if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page))
+               rwc.invalid_vma = invalid_migration_vma;
+
+       ret = rmap_walk(page, &rwc);
 
-       if (unlikely(PageKsm(page)))
-               ret = try_to_unmap_ksm(page, flags);
-       else if (PageAnon(page))
-               ret = try_to_unmap_anon(page, flags);
-       else
-               ret = try_to_unmap_file(page, flags);
        if (ret != SWAP_MLOCK && !page_mapped(page))
                ret = SWAP_SUCCESS;
        return ret;
@@ -1654,14 +1518,25 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
  */
 int try_to_munlock(struct page *page)
 {
-       VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+       int ret;
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)TTU_MUNLOCK,
+               .done = page_not_mapped,
+               /*
+                * We don't bother to try to find the munlocked page in
+                * nonlinears. It's costly. Instead, later, page reclaim logic
+                * may call try_to_unmap() and recover PG_mlocked lazily.
+                */
+               .file_nonlinear = NULL,
+               .anon_lock = page_lock_anon_vma_read,
 
-       if (unlikely(PageKsm(page)))
-               return try_to_unmap_ksm(page, TTU_MUNLOCK);
-       else if (PageAnon(page))
-               return try_to_unmap_anon(page, TTU_MUNLOCK);
-       else
-               return try_to_unmap_file(page, TTU_MUNLOCK);
+       };
+
+       VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
+
+       ret = rmap_walk(page, &rwc);
+       return ret;
 }
 
 void __put_anon_vma(struct anon_vma *anon_vma)
@@ -1674,18 +1549,13 @@ void __put_anon_vma(struct anon_vma *anon_vma)
        anon_vma_free(anon_vma);
 }
 
-#ifdef CONFIG_MIGRATION
-/*
- * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
- * Called by migrate.c to remove migration ptes, but might be used more later.
- */
-static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+static struct anon_vma *rmap_walk_anon_lock(struct page *page,
+                                       struct rmap_walk_control *rwc)
 {
        struct anon_vma *anon_vma;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct anon_vma_chain *avc;
-       int ret = SWAP_AGAIN;
+
+       if (rwc->anon_lock)
+               return rwc->anon_lock(page);
 
        /*
         * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
@@ -1695,58 +1565,120 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
         */
        anon_vma = page_anon_vma(page);
        if (!anon_vma)
-               return ret;
+               return NULL;
+
        anon_vma_lock_read(anon_vma);
+       return anon_vma;
+}
+
+/*
+ * rmap_walk_anon - do something to anonymous page using the object-based
+ * rmap method
+ * @page: the page to be handled
+ * @rwc: control variable according to each walk type
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * LOCKED.
+ */
+static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
+{
+       struct anon_vma *anon_vma;
+       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       struct anon_vma_chain *avc;
+       int ret = SWAP_AGAIN;
+
+       anon_vma = rmap_walk_anon_lock(page, rwc);
+       if (!anon_vma)
+               return ret;
+
        anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(page, vma);
-               ret = rmap_one(page, vma, address, arg);
+
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+                       continue;
+
+               ret = rwc->rmap_one(page, vma, address, rwc->arg);
                if (ret != SWAP_AGAIN)
                        break;
+               if (rwc->done && rwc->done(page))
+                       break;
        }
        anon_vma_unlock_read(anon_vma);
        return ret;
 }
 
-static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+/*
+ * rmap_walk_file - do something to file page using the object-based rmap method
+ * @page: the page to be handled
+ * @rwc: control variable according to each walk type
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the address_space struct it points to.
+ *
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * LOCKED.
+ */
+static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 {
        struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       pgoff_t pgoff = page->index << compound_order(page);
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
 
+       /*
+        * The page lock not only makes sure that page->mapping cannot
+        * suddenly be NULLified by truncation, it makes sure that the
+        * structure at mapping cannot be freed and reused yet,
+        * so we can safely take mapping->i_mmap_mutex.
+        */
+       VM_BUG_ON(!PageLocked(page));
+
        if (!mapping)
                return ret;
        mutex_lock(&mapping->i_mmap_mutex);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                unsigned long address = vma_address(page, vma);
-               ret = rmap_one(page, vma, address, arg);
+
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+                       continue;
+
+               ret = rwc->rmap_one(page, vma, address, rwc->arg);
                if (ret != SWAP_AGAIN)
-                       break;
+                       goto done;
+               if (rwc->done && rwc->done(page))
+                       goto done;
        }
-       /*
-        * No nonlinear handling: being always shared, nonlinear vmas
-        * never contain migration ptes.  Decide what to do about this
-        * limitation to linear when we need rmap_walk() on nonlinear.
-        */
+
+       if (!rwc->file_nonlinear)
+               goto done;
+
+       if (list_empty(&mapping->i_mmap_nonlinear))
+               goto done;
+
+       ret = rwc->file_nonlinear(page, mapping, vma);
+
+done:
        mutex_unlock(&mapping->i_mmap_mutex);
        return ret;
 }
 
-int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
 {
-       VM_BUG_ON(!PageLocked(page));
-
        if (unlikely(PageKsm(page)))
-               return rmap_walk_ksm(page, rmap_one, arg);
+               return rmap_walk_ksm(page, rwc);
        else if (PageAnon(page))
-               return rmap_walk_anon(page, rmap_one, arg);
+               return rmap_walk_anon(page, rwc);
        else
-               return rmap_walk_file(page, rmap_one, arg);
+               return rmap_walk_file(page, rwc);
 }
-#endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_HUGETLB_PAGE
 /*
index 902a14842b74a6efe5a0de23f965e8a2a9dad820..8156f95ec0cfc638d93e8f6175e107384c83d7dd 100644 (file)
@@ -285,8 +285,8 @@ static int shmem_add_to_page_cache(struct page *page,
 {
        int error;
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(!PageSwapBacked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
        page_cache_get(page);
        page->mapping = mapping;
@@ -491,7 +491,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                continue;
                        if (!unfalloc || !PageUptodate(page)) {
                                if (page->mapping == mapping) {
-                                       VM_BUG_ON(PageWriteback(page));
+                                       VM_BUG_ON_PAGE(PageWriteback(page), page);
                                        truncate_inode_page(mapping, page);
                                }
                        }
@@ -568,7 +568,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        lock_page(page);
                        if (!unfalloc || !PageUptodate(page)) {
                                if (page->mapping == mapping) {
-                                       VM_BUG_ON(PageWriteback(page));
+                                       VM_BUG_ON_PAGE(PageWriteback(page), page);
                                        truncate_inode_page(mapping, page);
                                }
                        }
index 0859c4241ba10b2ee602eca48af4e34dd82d6ae3..8184a7cde272b8aa9d6e08e6407482ca5d7bd7df 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -160,12 +160,36 @@ static inline const char *cache_name(struct kmem_cache *s)
        return s->name;
 }
 
+/*
+ * Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
+ * That said the caller must assure the memcg's cache won't go away. Since once
+ * created a memcg's cache is destroyed only along with the root cache, it is
+ * true if we are going to allocate from the cache or hold a reference to the
+ * root cache by other means. Otherwise, we should hold either the slab_mutex
+ * or the memcg's slab_caches_mutex while calling this function and accessing
+ * the returned value.
+ */
 static inline struct kmem_cache *
 cache_from_memcg_idx(struct kmem_cache *s, int idx)
 {
+       struct kmem_cache *cachep;
+       struct memcg_cache_params *params;
+
        if (!s->memcg_params)
                return NULL;
-       return s->memcg_params->memcg_caches[idx];
+
+       rcu_read_lock();
+       params = rcu_dereference(s->memcg_params);
+       cachep = params->memcg_caches[idx];
+       rcu_read_unlock();
+
+       /*
+        * Make sure we will access the up-to-date value. The code updating
+        * memcg_caches issues a write barrier to match this (see
+        * memcg_register_cache()).
+        */
+       smp_read_barrier_depends();
+       return cachep;
 }
 
 static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
index 0b7bb399b0e46af8ac8b96fbedb67aef413e5cb7..8e40321da091b66f24f983b266acb344ce41d56c 100644 (file)
@@ -171,13 +171,26 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
                        struct kmem_cache *parent_cache)
 {
        struct kmem_cache *s = NULL;
-       int err = 0;
+       int err;
 
        get_online_cpus();
        mutex_lock(&slab_mutex);
 
-       if (!kmem_cache_sanity_check(memcg, name, size) == 0)
-               goto out_locked;
+       err = kmem_cache_sanity_check(memcg, name, size);
+       if (err)
+               goto out_unlock;
+
+       if (memcg) {
+               /*
+                * Since per-memcg caches are created asynchronously on first
+                * allocation (see memcg_kmem_get_cache()), several threads can
+                * try to create the same cache, but only one of them may
+                * succeed. Therefore if we get here and see the cache has
+                * already been created, we silently return NULL.
+                */
+               if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg)))
+                       goto out_unlock;
+       }
 
        /*
         * Some allocators will constraint the set of valid flags to a subset
@@ -189,45 +202,45 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
 
        s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
        if (s)
-               goto out_locked;
+               goto out_unlock;
 
+       err = -ENOMEM;
        s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
-       if (s) {
-               s->object_size = s->size = size;
-               s->align = calculate_alignment(flags, align, size);
-               s->ctor = ctor;
+       if (!s)
+               goto out_unlock;
 
-               if (memcg_register_cache(memcg, s, parent_cache)) {
-                       kmem_cache_free(kmem_cache, s);
-                       err = -ENOMEM;
-                       goto out_locked;
-               }
+       s->object_size = s->size = size;
+       s->align = calculate_alignment(flags, align, size);
+       s->ctor = ctor;
 
-               s->name = kstrdup(name, GFP_KERNEL);
-               if (!s->name) {
-                       kmem_cache_free(kmem_cache, s);
-                       err = -ENOMEM;
-                       goto out_locked;
-               }
+       s->name = kstrdup(name, GFP_KERNEL);
+       if (!s->name)
+               goto out_free_cache;
 
-               err = __kmem_cache_create(s, flags);
-               if (!err) {
-                       s->refcount = 1;
-                       list_add(&s->list, &slab_caches);
-                       memcg_cache_list_add(memcg, s);
-               } else {
-                       kfree(s->name);
-                       kmem_cache_free(kmem_cache, s);
-               }
-       } else
-               err = -ENOMEM;
+       err = memcg_alloc_cache_params(memcg, s, parent_cache);
+       if (err)
+               goto out_free_cache;
+
+       err = __kmem_cache_create(s, flags);
+       if (err)
+               goto out_free_cache;
 
-out_locked:
+       s->refcount = 1;
+       list_add(&s->list, &slab_caches);
+       memcg_register_cache(s);
+
+out_unlock:
        mutex_unlock(&slab_mutex);
        put_online_cpus();
 
-       if (err) {
-
+       /*
+        * There is no point in flooding logs with warnings or especially
+        * crashing the system if we fail to create a cache for a memcg. In
+        * this case we will be accounting the memcg allocation to the root
+        * cgroup until we succeed to create its own cache, but it isn't that
+        * critical.
+        */
+       if (err && !memcg) {
                if (flags & SLAB_PANIC)
                        panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
                                name, err);
@@ -236,11 +249,15 @@ out_locked:
                                name, err);
                        dump_stack();
                }
-
                return NULL;
        }
-
        return s;
+
+out_free_cache:
+       memcg_free_cache_params(s);
+       kfree(s->name);
+       kmem_cache_free(kmem_cache, s);
+       goto out_unlock;
 }
 
 struct kmem_cache *
@@ -263,11 +280,12 @@ void kmem_cache_destroy(struct kmem_cache *s)
                list_del(&s->list);
 
                if (!__kmem_cache_shutdown(s)) {
+                       memcg_unregister_cache(s);
                        mutex_unlock(&slab_mutex);
                        if (s->flags & SLAB_DESTROY_BY_RCU)
                                rcu_barrier();
 
-                       memcg_release_cache(s);
+                       memcg_free_cache_params(s);
                        kfree(s->name);
                        kmem_cache_free(kmem_cache, s);
                } else {
index a99e9e67c60e9b5f02e3308510f5b3f6f6fc3dc5..d2388c850b6534a9467434f1ab1e0876ba5cb7d4 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1558,7 +1558,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
                new.freelist = freelist;
        }
 
-       VM_BUG_ON(new.frozen);
+       VM_BUG_ON_PAGE(new.frozen, &new);
        new.frozen = 1;
 
        if (!__cmpxchg_double_slab(s, page,
@@ -1811,7 +1811,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
                        set_freepointer(s, freelist, prior);
                        new.counters = counters;
                        new.inuse--;
-                       VM_BUG_ON(!new.frozen);
+                       VM_BUG_ON_PAGE(!new.frozen, &new);
 
                } while (!__cmpxchg_double_slab(s, page,
                        prior, counters,
@@ -1839,7 +1839,7 @@ redo:
 
        old.freelist = page->freelist;
        old.counters = page->counters;
-       VM_BUG_ON(!old.frozen);
+       VM_BUG_ON_PAGE(!old.frozen, &old);
 
        /* Determine target state of the slab */
        new.counters = old.counters;
@@ -1951,7 +1951,7 @@ static void unfreeze_partials(struct kmem_cache *s,
 
                        old.freelist = page->freelist;
                        old.counters = page->counters;
-                       VM_BUG_ON(!old.frozen);
+                       VM_BUG_ON_PAGE(!old.frozen, &old);
 
                        new.counters = old.counters;
                        new.freelist = old.freelist;
@@ -2224,7 +2224,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
                counters = page->counters;
 
                new.counters = counters;
-               VM_BUG_ON(!new.frozen);
+               VM_BUG_ON_PAGE(!new.frozen, &new);
 
                new.inuse = page->objects;
                new.frozen = freelist != NULL;
@@ -2318,7 +2318,7 @@ load_freelist:
         * page is pointing to the page from which the objects are obtained.
         * That page must be frozen for per cpu allocations to work.
         */
-       VM_BUG_ON(!c->page->frozen);
+       VM_BUG_ON_PAGE(!c->page->frozen, c->page);
        c->freelist = get_freepointer(s, freelist);
        c->tid = next_tid(c->tid);
        local_irq_restore(flags);
index 27eeab3be757e8c9bf04a18dba9dfd7f7aaa5ab1..4cba9c2783a147077150505dcf6114cf4592843a 100644 (file)
@@ -40,7 +40,8 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
                                unsigned long align,
                                unsigned long goal)
 {
-       return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
+       return memblock_virt_alloc_try_nid(size, align, goal,
+                                           BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
 static void *vmemmap_buf;
@@ -226,7 +227,8 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 
        if (vmemmap_buf_start) {
                /* need to free left buf */
-               free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
+               memblock_free_early(__pa(vmemmap_buf),
+                                   vmemmap_buf_end - vmemmap_buf);
                vmemmap_buf = NULL;
                vmemmap_buf_end = NULL;
        }
index 8cc7be0e95906c18068e926490a8509651e3904c..63c3ea5c119c41f62d28b05e99ba4d7882241766 100644 (file)
@@ -69,7 +69,7 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
                else
                        section = kzalloc(array_size, GFP_KERNEL);
        } else {
-               section = alloc_bootmem_node(NODE_DATA(nid), array_size);
+               section = memblock_virt_alloc_node(array_size, nid);
        }
 
        return section;
@@ -279,8 +279,9 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
        limit = goal + (1UL << PA_SECTION_SHIFT);
        nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
-       p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
-                                         SMP_CACHE_BYTES, goal, limit);
+       p = memblock_virt_alloc_try_nid_nopanic(size,
+                                               SMP_CACHE_BYTES, goal, limit,
+                                               nid);
        if (!p && limit) {
                limit = 0;
                goto again;
@@ -331,7 +332,7 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
                                         unsigned long size)
 {
-       return alloc_bootmem_node_nopanic(pgdat, size);
+       return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -376,8 +377,9 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
                return map;
 
        size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
-       map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
-                                        PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+       map = memblock_virt_alloc_try_nid(size,
+                                         PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+                                         BOOTMEM_ALLOC_ACCESSIBLE, nid);
        return map;
 }
 void __init sparse_mem_maps_populate_node(struct page **map_map,
@@ -401,8 +403,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
        }
 
        size = PAGE_ALIGN(size);
-       map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count,
-                                        PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+       map = memblock_virt_alloc_try_nid(size * map_count,
+                                         PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+                                         BOOTMEM_ALLOC_ACCESSIBLE, nodeid);
        if (map) {
                for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
                        if (!present_section_nr(pnum))
@@ -545,7 +548,7 @@ void __init sparse_init(void)
         * sparse_early_mem_map_alloc, so allocate usemap_map at first.
         */
        size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
-       usemap_map = alloc_bootmem(size);
+       usemap_map = memblock_virt_alloc(size, 0);
        if (!usemap_map)
                panic("can not allocate usemap_map\n");
        alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
@@ -553,7 +556,7 @@ void __init sparse_init(void)
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
        size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
-       map_map = alloc_bootmem(size2);
+       map_map = memblock_virt_alloc(size2, 0);
        if (!map_map)
                panic("can not allocate map_map\n");
        alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
@@ -583,9 +586,9 @@ void __init sparse_init(void)
        vmemmap_populate_print_last();
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-       free_bootmem(__pa(map_map), size2);
+       memblock_free_early(__pa(map_map), size2);
 #endif
-       free_bootmem(__pa(usemap_map), size);
+       memblock_free_early(__pa(usemap_map), size);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
index 84b26aaabd03b6d236ba82b84c713844916fd521..b31ba67d440ac997a05de372e831046bf98d9070 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,7 +31,6 @@
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
-#include <linux/hugetlb.h>
 
 #include "internal.h"
 
@@ -58,7 +57,7 @@ static void __page_cache_release(struct page *page)
 
                spin_lock_irqsave(&zone->lru_lock, flags);
                lruvec = mem_cgroup_page_lruvec(page, zone);
-               VM_BUG_ON(!PageLRU(page));
+               VM_BUG_ON_PAGE(!PageLRU(page), page);
                __ClearPageLRU(page);
                del_page_from_lru_list(page, lruvec, page_off_lru(page));
                spin_unlock_irqrestore(&zone->lru_lock, flags);
@@ -82,118 +81,150 @@ static void __put_compound_page(struct page *page)
 
 static void put_compound_page(struct page *page)
 {
-       if (unlikely(PageTail(page))) {
-               /* __split_huge_page_refcount can run under us */
-               struct page *page_head = compound_trans_head(page);
-
-               if (likely(page != page_head &&
-                          get_page_unless_zero(page_head))) {
-                       unsigned long flags;
+       struct page *page_head;
 
+       if (likely(!PageTail(page))) {
+               if (put_page_testzero(page)) {
                        /*
-                        * THP can not break up slab pages so avoid taking
-                        * compound_lock().  Slab performs non-atomic bit ops
-                        * on page->flags for better performance.  In particular
-                        * slab_unlock() in slub used to be a hot path.  It is
-                        * still hot on arches that do not support
-                        * this_cpu_cmpxchg_double().
+                        * By the time all refcounts have been released
+                        * split_huge_page cannot run anymore from under us.
                         */
-                       if (PageSlab(page_head) || PageHeadHuge(page_head)) {
-                               if (likely(PageTail(page))) {
-                                       /*
-                                        * __split_huge_page_refcount
-                                        * cannot race here.
-                                        */
-                                       VM_BUG_ON(!PageHead(page_head));
-                                       atomic_dec(&page->_mapcount);
-                                       if (put_page_testzero(page_head))
-                                               VM_BUG_ON(1);
-                                       if (put_page_testzero(page_head))
-                                               __put_compound_page(page_head);
-                                       return;
-                               } else
-                                       /*
-                                        * __split_huge_page_refcount
-                                        * run before us, "page" was a
-                                        * THP tail. The split
-                                        * page_head has been freed
-                                        * and reallocated as slab or
-                                        * hugetlbfs page of smaller
-                                        * order (only possible if
-                                        * reallocated as slab on
-                                        * x86).
-                                        */
-                                       goto skip_lock;
-                       }
+                       if (PageHead(page))
+                               __put_compound_page(page);
+                       else
+                               __put_single_page(page);
+               }
+               return;
+       }
+
+       /* __split_huge_page_refcount can run under us */
+       page_head = compound_trans_head(page);
+
+       /*
+        * THP can not break up slab pages so avoid taking
+        * compound_lock() and skip the tail page refcounting (in
+        * _mapcount) too. Slab performs non-atomic bit ops on
+        * page->flags for better performance. In particular
+        * slab_unlock() in slub used to be a hot path. It is still
+        * hot on arches that do not support
+        * this_cpu_cmpxchg_double().
+        *
+        * If "page" is part of a slab or hugetlbfs page it cannot be
+        * splitted and the head page cannot change from under us. And
+        * if "page" is part of a THP page under splitting, if the
+        * head page pointed by the THP tail isn't a THP head anymore,
+        * we'll find PageTail clear after smp_rmb() and we'll treat
+        * it as a single page.
+        */
+       if (!__compound_tail_refcounted(page_head)) {
+               /*
+                * If "page" is a THP tail, we must read the tail page
+                * flags after the head page flags. The
+                * split_huge_page side enforces write memory barriers
+                * between clearing PageTail and before the head page
+                * can be freed and reallocated.
+                */
+               smp_rmb();
+               if (likely(PageTail(page))) {
                        /*
-                        * page_head wasn't a dangling pointer but it
-                        * may not be a head page anymore by the time
-                        * we obtain the lock. That is ok as long as it
-                        * can't be freed from under us.
+                        * __split_huge_page_refcount cannot race
+                        * here.
                         */
-                       flags = compound_lock_irqsave(page_head);
-                       if (unlikely(!PageTail(page))) {
-                               /* __split_huge_page_refcount run before us */
-                               compound_unlock_irqrestore(page_head, flags);
-skip_lock:
-                               if (put_page_testzero(page_head)) {
-                                       /*
-                                        * The head page may have been
-                                        * freed and reallocated as a
-                                        * compound page of smaller
-                                        * order and then freed again.
-                                        * All we know is that it
-                                        * cannot have become: a THP
-                                        * page, a compound page of
-                                        * higher order, a tail page.
-                                        * That is because we still
-                                        * hold the refcount of the
-                                        * split THP tail and
-                                        * page_head was the THP head
-                                        * before the split.
-                                        */
-                                       if (PageHead(page_head))
-                                               __put_compound_page(page_head);
-                                       else
-                                               __put_single_page(page_head);
-                               }
-out_put_single:
-                               if (put_page_testzero(page))
-                                       __put_single_page(page);
-                               return;
+                       VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
+                       VM_BUG_ON_PAGE(page_mapcount(page) != 0, page);
+                       if (put_page_testzero(page_head)) {
+                               /*
+                                * If this is the tail of a slab
+                                * compound page, the tail pin must
+                                * not be the last reference held on
+                                * the page, because the PG_slab
+                                * cannot be cleared before all tail
+                                * pins (which skips the _mapcount
+                                * tail refcounting) have been
+                                * released. For hugetlbfs the tail
+                                * pin may be the last reference on
+                                * the page instead, because
+                                * PageHeadHuge will not go away until
+                                * the compound page enters the buddy
+                                * allocator.
+                                */
+                               VM_BUG_ON_PAGE(PageSlab(page_head), page_head);
+                               __put_compound_page(page_head);
                        }
-                       VM_BUG_ON(page_head != page->first_page);
+                       return;
+               } else
                        /*
-                        * We can release the refcount taken by
-                        * get_page_unless_zero() now that
-                        * __split_huge_page_refcount() is blocked on
-                        * the compound_lock.
+                        * __split_huge_page_refcount run before us,
+                        * "page" was a THP tail. The split page_head
+                        * has been freed and reallocated as slab or
+                        * hugetlbfs page of smaller order (only
+                        * possible if reallocated as slab on x86).
                         */
-                       if (put_page_testzero(page_head))
-                               VM_BUG_ON(1);
-                       /* __split_huge_page_refcount will wait now */
-                       VM_BUG_ON(page_mapcount(page) <= 0);
-                       atomic_dec(&page->_mapcount);
-                       VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
-                       VM_BUG_ON(atomic_read(&page->_count) != 0);
-                       compound_unlock_irqrestore(page_head, flags);
+                       goto out_put_single;
+       }
 
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               unsigned long flags;
+
+               /*
+                * page_head wasn't a dangling pointer but it may not
+                * be a head page anymore by the time we obtain the
+                * lock. That is ok as long as it can't be freed from
+                * under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               if (unlikely(!PageTail(page))) {
+                       /* __split_huge_page_refcount run before us */
+                       compound_unlock_irqrestore(page_head, flags);
                        if (put_page_testzero(page_head)) {
+                               /*
+                                * The head page may have been freed
+                                * and reallocated as a compound page
+                                * of smaller order and then freed
+                                * again.  All we know is that it
+                                * cannot have become: a THP page, a
+                                * compound page of higher order, a
+                                * tail page.  That is because we
+                                * still hold the refcount of the
+                                * split THP tail and page_head was
+                                * the THP head before the split.
+                                */
                                if (PageHead(page_head))
                                        __put_compound_page(page_head);
                                else
                                        __put_single_page(page_head);
                        }
-               } else {
-                       /* page_head is a dangling pointer */
-                       VM_BUG_ON(PageTail(page));
-                       goto out_put_single;
+out_put_single:
+                       if (put_page_testzero(page))
+                               __put_single_page(page);
+                       return;
                }
-       } else if (put_page_testzero(page)) {
-               if (PageHead(page))
-                       __put_compound_page(page);
-               else
-                       __put_single_page(page);
+               VM_BUG_ON_PAGE(page_head != page->first_page, page);
+               /*
+                * We can release the refcount taken by
+                * get_page_unless_zero() now that
+                * __split_huge_page_refcount() is blocked on the
+                * compound_lock.
+                */
+               if (put_page_testzero(page_head))
+                       VM_BUG_ON_PAGE(1, page_head);
+               /* __split_huge_page_refcount will wait now */
+               VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page);
+               atomic_dec(&page->_mapcount);
+               VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head);
+               VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
+               compound_unlock_irqrestore(page_head, flags);
+
+               if (put_page_testzero(page_head)) {
+                       if (PageHead(page_head))
+                               __put_compound_page(page_head);
+                       else
+                               __put_single_page(page_head);
+               }
+       } else {
+               /* page_head is a dangling pointer */
+               VM_BUG_ON_PAGE(PageTail(page), page);
+               goto out_put_single;
        }
 }
 
@@ -221,36 +252,37 @@ bool __get_page_tail(struct page *page)
         * split_huge_page().
         */
        unsigned long flags;
-       bool got = false;
+       bool got;
        struct page *page_head = compound_trans_head(page);
 
-       if (likely(page != page_head && get_page_unless_zero(page_head))) {
-               /* Ref to put_compound_page() comment. */
-               if (PageSlab(page_head) || PageHeadHuge(page_head)) {
-                       if (likely(PageTail(page))) {
-                               /*
-                                * This is a hugetlbfs page or a slab
-                                * page. __split_huge_page_refcount
-                                * cannot race here.
-                                */
-                               VM_BUG_ON(!PageHead(page_head));
-                               __get_page_tail_foll(page, false);
-                               return true;
-                       } else {
-                               /*
-                                * __split_huge_page_refcount run
-                                * before us, "page" was a THP
-                                * tail. The split page_head has been
-                                * freed and reallocated as slab or
-                                * hugetlbfs page of smaller order
-                                * (only possible if reallocated as
-                                * slab on x86).
-                                */
-                               put_page(page_head);
-                               return false;
-                       }
+       /* Ref to put_compound_page() comment. */
+       if (!__compound_tail_refcounted(page_head)) {
+               smp_rmb();
+               if (likely(PageTail(page))) {
+                       /*
+                        * This is a hugetlbfs page or a slab
+                        * page. __split_huge_page_refcount
+                        * cannot race here.
+                        */
+                       VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
+                       __get_page_tail_foll(page, true);
+                       return true;
+               } else {
+                       /*
+                        * __split_huge_page_refcount run
+                        * before us, "page" was a THP
+                        * tail. The split page_head has been
+                        * freed and reallocated as slab or
+                        * hugetlbfs page of smaller order
+                        * (only possible if reallocated as
+                        * slab on x86).
+                        */
+                       return false;
                }
+       }
 
+       got = false;
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
                /*
                 * page_head wasn't a dangling pointer but it
                 * may not be a head page anymore by the time
@@ -572,8 +604,8 @@ EXPORT_SYMBOL(__lru_cache_add);
  */
 void lru_cache_add(struct page *page)
 {
-       VM_BUG_ON(PageActive(page) && PageUnevictable(page));
-       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+       VM_BUG_ON_PAGE(PageLRU(page), page);
        __lru_cache_add(page);
 }
 
@@ -814,7 +846,7 @@ void release_pages(struct page **pages, int nr, int cold)
                        }
 
                        lruvec = mem_cgroup_page_lruvec(page, zone);
-                       VM_BUG_ON(!PageLRU(page));
+                       VM_BUG_ON_PAGE(!PageLRU(page), page);
                        __ClearPageLRU(page);
                        del_page_from_lru_list(page, lruvec, page_off_lru(page));
                }
@@ -856,9 +888,9 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 {
        const int file = 0;
 
-       VM_BUG_ON(!PageHead(page));
-       VM_BUG_ON(PageCompound(page_tail));
-       VM_BUG_ON(PageLRU(page_tail));
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+       VM_BUG_ON_PAGE(PageCompound(page_tail), page);
+       VM_BUG_ON_PAGE(PageLRU(page_tail), page);
        VM_BUG_ON(NR_CPUS != 1 &&
                  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
 
@@ -897,7 +929,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
        int active = PageActive(page);
        enum lru_list lru = page_lru(page);
 
-       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON_PAGE(PageLRU(page), page);
 
        SetPageLRU(page);
        add_page_to_lru_list(page, lruvec, lru);
index e6f15f8ca2af339ce9e90159bae0e6a800f06819..e76ace30d4364e99e2311adc73e8fff87dfe6f5a 100644 (file)
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
        return ret;
 }
 
+static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
+
 void show_swap_cache_info(void)
 {
        printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -83,9 +85,9 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
        int error;
        struct address_space *address_space;
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(PageSwapCache(page));
-       VM_BUG_ON(!PageSwapBacked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(PageSwapCache(page), page);
+       VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
        page_cache_get(page);
        SetPageSwapCache(page);
@@ -139,9 +141,9 @@ void __delete_from_swap_cache(struct page *page)
        swp_entry_t entry;
        struct address_space *address_space;
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(!PageSwapCache(page));
-       VM_BUG_ON(PageWriteback(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+       VM_BUG_ON_PAGE(PageWriteback(page), page);
 
        entry.val = page_private(page);
        address_space = swap_address_space(entry);
@@ -165,8 +167,8 @@ int add_to_swap(struct page *page, struct list_head *list)
        swp_entry_t entry;
        int err;
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(!PageUptodate(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(!PageUptodate(page), page);
 
        entry = get_swap_page();
        if (!entry.val)
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 
        page = find_get_page(swap_address_space(entry), entry.val);
 
-       if (page)
+       if (page) {
                INC_CACHE_INFO(find_success);
+               if (TestClearPageReadahead(page))
+                       atomic_inc(&swapin_readahead_hits);
+       }
 
        INC_CACHE_INFO(find_total);
        return page;
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
        return found_page;
 }
 
+static unsigned long swapin_nr_pages(unsigned long offset)
+{
+       static unsigned long prev_offset;
+       unsigned int pages, max_pages, last_ra;
+       static atomic_t last_readahead_pages;
+
+       max_pages = 1 << ACCESS_ONCE(page_cluster);
+       if (max_pages <= 1)
+               return 1;
+
+       /*
+        * This heuristic has been found to work well on both sequential and
+        * random loads, swapping to hard disk or to SSD: please don't ask
+        * what the "+ 2" means, it just happens to work well, that's all.
+        */
+       pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
+       if (pages == 2) {
+               /*
+                * We can have no readahead hits to judge by: but must not get
+                * stuck here forever, so check for an adjacent offset instead
+                * (and don't even bother to check whether swap type is same).
+                */
+               if (offset != prev_offset + 1 && offset != prev_offset - 1)
+                       pages = 1;
+               prev_offset = offset;
+       } else {
+               unsigned int roundup = 4;
+               while (roundup < pages)
+                       roundup <<= 1;
+               pages = roundup;
+       }
+
+       if (pages > max_pages)
+               pages = max_pages;
+
+       /* Don't shrink readahead too fast */
+       last_ra = atomic_read(&last_readahead_pages) / 2;
+       if (pages < last_ra)
+               pages = last_ra;
+       atomic_set(&last_readahead_pages, pages);
+
+       return pages;
+}
+
 /**
  * swapin_readahead - swap in pages in hope we need them soon
  * @entry: swap entry of this memory
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                        struct vm_area_struct *vma, unsigned long addr)
 {
        struct page *page;
-       unsigned long offset = swp_offset(entry);
+       unsigned long entry_offset = swp_offset(entry);
+       unsigned long offset = entry_offset;
        unsigned long start_offset, end_offset;
-       unsigned long mask = (1UL << page_cluster) - 1;
+       unsigned long mask;
        struct blk_plug plug;
 
+       mask = swapin_nr_pages(offset) - 1;
+       if (!mask)
+               goto skip;
+
        /* Read a page_cluster sized and aligned cluster around offset. */
        start_offset = offset & ~mask;
        end_offset = offset | mask;
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                                                gfp_mask, vma, addr);
                if (!page)
                        continue;
+               if (offset != entry_offset)
+                       SetPageReadahead(page);
                page_cache_release(page);
        }
        blk_finish_plug(&plug);
 
        lru_add_drain();        /* Push any new pages onto the LRU now */
+skip:
        return read_swap_cache_async(entry, gfp_mask, vma, addr);
 }
index 612a7c9795f6eca1f43f7e86af43d2e74e2f56fa..b5719e1d06cd46b18cb8763abe777de949ca2a54 100644 (file)
@@ -906,7 +906,7 @@ int reuse_swap_page(struct page *page)
 {
        int count;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        if (unlikely(PageKsm(page)))
                return 0;
        count = page_mapcount(page);
@@ -926,7 +926,7 @@ int reuse_swap_page(struct page *page)
  */
 int try_to_free_swap(struct page *page)
 {
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
 
        if (!PageSwapCache(page))
                return 0;
@@ -1922,7 +1922,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        p->swap_map = NULL;
        cluster_info = p->cluster_info;
        p->cluster_info = NULL;
-       p->flags = 0;
        frontswap_map = frontswap_map_get(p);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
@@ -1948,6 +1947,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                mutex_unlock(&inode->i_mutex);
        }
        filp_close(swap_file, NULL);
+
+       /*
+        * Clear the SWP_USED flag after all resources are freed so that swapon
+        * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
+        * not hold p->lock after we cleared its SWP_WRITEOK.
+        */
+       spin_lock(&swap_lock);
+       p->flags = 0;
+       spin_unlock(&swap_lock);
+
        err = 0;
        atomic_inc(&proc_poll_event);
        wake_up_interruptible(&proc_poll_wait);
@@ -2714,7 +2723,7 @@ struct swap_info_struct *page_swap_info(struct page *page)
  */
 struct address_space *__page_file_mapping(struct page *page)
 {
-       VM_BUG_ON(!PageSwapCache(page));
+       VM_BUG_ON_PAGE(!PageSwapCache(page), page);
        return page_swap_info(page)->swap_file->f_mapping;
 }
 EXPORT_SYMBOL_GPL(__page_file_mapping);
@@ -2722,7 +2731,7 @@ EXPORT_SYMBOL_GPL(__page_file_mapping);
 pgoff_t __page_file_index(struct page *page)
 {
        swp_entry_t swap = { .val = page_private(page) };
-       VM_BUG_ON(!PageSwapCache(page));
+       VM_BUG_ON_PAGE(!PageSwapCache(page), page);
        return swp_offset(swap);
 }
 EXPORT_SYMBOL_GPL(__page_file_index);
index 808f375648e77b6c1057aeee2c6d19ea26af981e..a24aa22f2473690c1e2fa95514f778c6c0c616a7 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -404,13 +404,45 @@ struct address_space *page_mapping(struct page *page)
        return mapping;
 }
 
+int overcommit_ratio_handler(struct ctl_table *table, int write,
+                            void __user *buffer, size_t *lenp,
+                            loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (ret == 0 && write)
+               sysctl_overcommit_kbytes = 0;
+       return ret;
+}
+
+int overcommit_kbytes_handler(struct ctl_table *table, int write,
+                            void __user *buffer, size_t *lenp,
+                            loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret == 0 && write)
+               sysctl_overcommit_ratio = 0;
+       return ret;
+}
+
 /*
  * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
  */
 unsigned long vm_commit_limit(void)
 {
-       return ((totalram_pages - hugetlb_total_pages())
-               * sysctl_overcommit_ratio / 100) + total_swap_pages;
+       unsigned long allowed;
+
+       if (sysctl_overcommit_kbytes)
+               allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
+       else
+               allowed = ((totalram_pages - hugetlb_total_pages())
+                          * sysctl_overcommit_ratio / 100);
+       allowed += total_swap_pages;
+
+       return allowed;
 }
 
 
index 0fdf96803c5b59623792a24e57015fb0e25098bb..e4f0db2a3eae5ea01f1a80e0d3b680a9c2fa7c08 100644 (file)
@@ -220,12 +220,12 @@ int is_vmalloc_or_module_addr(const void *x)
 }
 
 /*
- * Walk a vmap address to the struct page it maps.
+ * Walk a vmap address to the physical pfn it maps to.
  */
-struct page *vmalloc_to_page(const void *vmalloc_addr)
+unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
 {
        unsigned long addr = (unsigned long) vmalloc_addr;
-       struct page *page = NULL;
+       unsigned long pfn = 0;
        pgd_t *pgd = pgd_offset_k(addr);
 
        /*
@@ -244,23 +244,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
                                ptep = pte_offset_map(pmd, addr);
                                pte = *ptep;
                                if (pte_present(pte))
-                                       page = pte_page(pte);
+                                       pfn = pte_pfn(pte);
                                pte_unmap(ptep);
                        }
                }
        }
-       return page;
+       return pfn;
 }
-EXPORT_SYMBOL(vmalloc_to_page);
+EXPORT_SYMBOL(vmalloc_to_pfn);
 
 /*
- * Map a vmalloc()-space virtual address to the physical page frame number.
+ * Map a vmalloc()-space virtual address to the struct page.
  */
-unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
+struct page *vmalloc_to_page(const void *vmalloc_addr)
 {
-       return page_to_pfn(vmalloc_to_page(vmalloc_addr));
+       return pfn_to_page(vmalloc_to_pfn(vmalloc_addr));
 }
-EXPORT_SYMBOL(vmalloc_to_pfn);
+EXPORT_SYMBOL(vmalloc_to_page);
 
 
 /*** Global kva allocator ***/
index eea668d9cff6c578ada0cf6c02eca5e22de5598d..89dd0f742507244f5fcd28adb3d65b58b49660c4 100644 (file)
@@ -281,17 +281,34 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
                                nr_pages_scanned, lru_pages,
                                max_pass, delta, total_scan);
 
-       while (total_scan >= batch_size) {
+       /*
+        * Normally, we should not scan less than batch_size objects in one
+        * pass to avoid too frequent shrinker calls, but if the slab has less
+        * than batch_size objects in total and we are really tight on memory,
+        * we will try to reclaim all available objects, otherwise we can end
+        * up failing allocations although there are plenty of reclaimable
+        * objects spread over several slabs with usage less than the
+        * batch_size.
+        *
+        * We detect the "tight on memory" situations by looking at the total
+        * number of objects we want to scan (total_scan). If it is greater
+        * than the total number of objects on slab (max_pass), we must be
+        * scanning at high prio and therefore should try to reclaim as much as
+        * possible.
+        */
+       while (total_scan >= batch_size ||
+              total_scan >= max_pass) {
                unsigned long ret;
+               unsigned long nr_to_scan = min(batch_size, total_scan);
 
-               shrinkctl->nr_to_scan = batch_size;
+               shrinkctl->nr_to_scan = nr_to_scan;
                ret = shrinker->scan_objects(shrinker, shrinkctl);
                if (ret == SHRINK_STOP)
                        break;
                freed += ret;
 
-               count_vm_events(SLABS_SCANNED, batch_size);
-               total_scan -= batch_size;
+               count_vm_events(SLABS_SCANNED, nr_to_scan);
+               total_scan -= nr_to_scan;
 
                cond_resched();
        }
@@ -352,16 +369,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
        }
 
        list_for_each_entry(shrinker, &shrinker_list, list) {
-               for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
-                       if (!node_online(shrinkctl->nid))
-                               continue;
-
-                       if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
-                           (shrinkctl->nid != 0))
-                               break;
-
+               if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) {
+                       shrinkctl->nid = 0;
                        freed += shrink_slab_node(shrinkctl, shrinker,
-                                nr_pages_scanned, lru_pages);
+                                       nr_pages_scanned, lru_pages);
+                       continue;
+               }
+
+               for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
+                       if (node_online(shrinkctl->nid))
+                               freed += shrink_slab_node(shrinkctl, shrinker,
+                                               nr_pages_scanned, lru_pages);
 
                }
        }
@@ -603,7 +621,7 @@ void putback_lru_page(struct page *page)
        bool is_unevictable;
        int was_unevictable = PageUnevictable(page);
 
-       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON_PAGE(PageLRU(page), page);
 
 redo:
        ClearPageUnevictable(page);
@@ -794,8 +812,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                if (!trylock_page(page))
                        goto keep;
 
-               VM_BUG_ON(PageActive(page));
-               VM_BUG_ON(page_zone(page) != zone);
+               VM_BUG_ON_PAGE(PageActive(page), page);
+               VM_BUG_ON_PAGE(page_zone(page) != zone, page);
 
                sc->nr_scanned++;
 
@@ -1079,14 +1097,14 @@ activate_locked:
                /* Not a candidate for swapping, so reclaim swap space. */
                if (PageSwapCache(page) && vm_swap_full())
                        try_to_free_swap(page);
-               VM_BUG_ON(PageActive(page));
+               VM_BUG_ON_PAGE(PageActive(page), page);
                SetPageActive(page);
                pgactivate++;
 keep_locked:
                unlock_page(page);
 keep:
                list_add(&page->lru, &ret_pages);
-               VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
+               VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
        }
 
        free_hot_cold_page_list(&free_pages, 1);
@@ -1240,7 +1258,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                page = lru_to_page(src);
                prefetchw_prev_lru_page(page, src, flags);
 
-               VM_BUG_ON(!PageLRU(page));
+               VM_BUG_ON_PAGE(!PageLRU(page), page);
 
                switch (__isolate_lru_page(page, mode)) {
                case 0:
@@ -1295,7 +1313,7 @@ int isolate_lru_page(struct page *page)
 {
        int ret = -EBUSY;
 
-       VM_BUG_ON(!page_count(page));
+       VM_BUG_ON_PAGE(!page_count(page), page);
 
        if (PageLRU(page)) {
                struct zone *zone = page_zone(page);
@@ -1366,7 +1384,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
                struct page *page = lru_to_page(page_list);
                int lru;
 
-               VM_BUG_ON(PageLRU(page));
+               VM_BUG_ON_PAGE(PageLRU(page), page);
                list_del(&page->lru);
                if (unlikely(!page_evictable(page))) {
                        spin_unlock_irq(&zone->lru_lock);
@@ -1586,7 +1604,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
                page = lru_to_page(list);
                lruvec = mem_cgroup_page_lruvec(page, zone);
 
-               VM_BUG_ON(PageLRU(page));
+               VM_BUG_ON_PAGE(PageLRU(page), page);
                SetPageLRU(page);
 
                nr_pages = hpage_nr_pages(page);
@@ -2279,7 +2297,12 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        struct zone *zone;
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
+       unsigned long lru_pages = 0;
        bool aborted_reclaim = false;
+       struct reclaim_state *reclaim_state = current->reclaim_state;
+       struct shrink_control shrink = {
+               .gfp_mask = sc->gfp_mask,
+       };
 
        /*
         * If the number of buffer_heads in the machine exceeds the maximum
@@ -2289,6 +2312,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        if (buffer_heads_over_limit)
                sc->gfp_mask |= __GFP_HIGHMEM;
 
+       nodes_clear(shrink.nodes_to_scan);
+
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask), sc->nodemask) {
                if (!populated_zone(zone))
@@ -2300,6 +2325,10 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                if (global_reclaim(sc)) {
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                continue;
+
+                       lru_pages += zone_reclaimable_pages(zone);
+                       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
+
                        if (sc->priority != DEF_PRIORITY &&
                            !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
@@ -2336,6 +2365,20 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                shrink_zone(zone, sc);
        }
 
+       /*
+        * Don't shrink slabs when reclaiming memory from over limit cgroups
+        * but do shrink slab at least once when aborting reclaim for
+        * compaction to avoid unevenly scanning file/anon LRU pages over slab
+        * pages.
+        */
+       if (global_reclaim(sc)) {
+               shrink_slab(&shrink, sc->nr_scanned, lru_pages);
+               if (reclaim_state) {
+                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+                       reclaim_state->reclaimed_slab = 0;
+               }
+       }
+
        return aborted_reclaim;
 }
 
@@ -2376,13 +2419,9 @@ static bool all_unreclaimable(struct zonelist *zonelist,
  *             else, the number of pages reclaimed
  */
 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
-                                       struct scan_control *sc,
-                                       struct shrink_control *shrink)
+                                         struct scan_control *sc)
 {
        unsigned long total_scanned = 0;
-       struct reclaim_state *reclaim_state = current->reclaim_state;
-       struct zoneref *z;
-       struct zone *zone;
        unsigned long writeback_threshold;
        bool aborted_reclaim;
 
@@ -2397,32 +2436,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                sc->nr_scanned = 0;
                aborted_reclaim = shrink_zones(zonelist, sc);
 
-               /*
-                * Don't shrink slabs when reclaiming memory from over limit
-                * cgroups but do shrink slab at least once when aborting
-                * reclaim for compaction to avoid unevenly scanning file/anon
-                * LRU pages over slab pages.
-                */
-               if (global_reclaim(sc)) {
-                       unsigned long lru_pages = 0;
-
-                       nodes_clear(shrink->nodes_to_scan);
-                       for_each_zone_zonelist(zone, z, zonelist,
-                                       gfp_zone(sc->gfp_mask)) {
-                               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                                       continue;
-
-                               lru_pages += zone_reclaimable_pages(zone);
-                               node_set(zone_to_nid(zone),
-                                        shrink->nodes_to_scan);
-                       }
-
-                       shrink_slab(shrink, sc->nr_scanned, lru_pages);
-                       if (reclaim_state) {
-                               sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-                               reclaim_state->reclaimed_slab = 0;
-                       }
-               }
                total_scanned += sc->nr_scanned;
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
                        goto out;
@@ -2584,9 +2597,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                .target_mem_cgroup = NULL,
                .nodemask = nodemask,
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
 
        /*
         * Do not enter reclaim if fatal signal was delivered while throttled.
@@ -2600,7 +2610,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                sc.may_writepage,
                                gfp_mask);
 
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
 
@@ -2667,9 +2677,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
 
        /*
         * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
@@ -2684,7 +2691,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                            sc.may_writepage,
                                            sc.gfp_mask);
 
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -3340,9 +3347,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
                .order = 0,
                .priority = DEF_PRIORITY,
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
        struct task_struct *p = current;
        unsigned long nr_reclaimed;
@@ -3352,7 +3356,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
 
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        p->reclaim_state = NULL;
        lockdep_clear_current_reclaim_state();
@@ -3701,7 +3705,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
                if (page_evictable(page)) {
                        enum lru_list lru = page_lru_base_type(page);
 
-                       VM_BUG_ON(PageActive(page));
+                       VM_BUG_ON_PAGE(PageActive(page), page);
                        ClearPageUnevictable(page);
                        del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
                        add_page_to_lru_list(page, lruvec, lru);
index 5a63f78a5601aa63112f30b3338be6c413f02f4c..e55bab9dc41f81ab1b6384710e918f3839e0c936 100644 (file)
@@ -77,12 +77,12 @@ static u64 zswap_duplicate_entry;
 **********************************/
 /* Enable/disable zswap (disabled by default, fixed at boot for now) */
 static bool zswap_enabled __read_mostly;
-module_param_named(enabled, zswap_enabled, bool, 0);
+module_param_named(enabled, zswap_enabled, bool, 0444);
 
 /* Compressor to be used by zswap (fixed at boot for now) */
 #define ZSWAP_COMPRESSOR_DEFAULT "lzo"
 static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
-module_param_named(compressor, zswap_compressor, charp, 0);
+module_param_named(compressor, zswap_compressor, charp, 0444);
 
 /* The maximum percentage of memory that the compressed pool can occupy */
 static unsigned int zswap_max_pool_percent = 20;
index 8a520996f3d268b6a3a1407fce986c3cf781a871..e498a62b8f972d29126f24569c2dbc85037b80a7 100644 (file)
@@ -23,7 +23,6 @@
 #define ALPHA_MIN      ((3*ALPHA_SCALE)/10)    /* ~0.3 */
 #define ALPHA_MAX      (10*ALPHA_SCALE)        /* 10.0 */
 #define ALPHA_BASE     ALPHA_SCALE             /* 1.0 */
-#define U32_MAX                ((u32)~0U)
 #define RTT_MAX                (U32_MAX / ALPHA_MAX)   /* 3.3 secs */
 
 #define BETA_SHIFT     6
index 3f64a66bf5d9b78bfa551124cbc7bd3d7d8a8857..b827a0f1f3510bc0fa4e6b0a498738806ac62e81 100644 (file)
@@ -46,31 +46,12 @@ struct iface_node {
 static void
 rbtree_destroy(struct rb_root *root)
 {
-       struct rb_node *p, *n = root->rb_node;
-       struct iface_node *node;
-
-       /* Non-recursive destroy, like in ext3 */
-       while (n) {
-               if (n->rb_left) {
-                       n = n->rb_left;
-                       continue;
-               }
-               if (n->rb_right) {
-                       n = n->rb_right;
-                       continue;
-               }
-               p = rb_parent(n);
-               node = rb_entry(n, struct iface_node, node);
-               if (!p)
-                       *root = RB_ROOT;
-               else if (p->rb_left == n)
-                       p->rb_left = NULL;
-               else if (p->rb_right == n)
-                       p->rb_right = NULL;
+       struct iface_node *node, *next;
 
+       rbtree_postorder_for_each_entry_safe(node, next, root, node)
                kfree(node);
-               n = p;
-       }
+
+       *root = RB_ROOT;
 }
 
 static int
index 9fb30b15c9dc72b948266a5085b51bd8c3e7bb82..05c99c0b7e6c8e272f374c05f3fa86804efaa5ed 100755 (executable)
@@ -29,6 +29,7 @@ my $mailback = 0;
 my $summary_file = 0;
 my $show_types = 0;
 my $fix = 0;
+my $fix_inplace = 0;
 my $root;
 my %debug;
 my %camelcase = ();
@@ -76,6 +77,9 @@ Options:
                              "<inputfile>.EXPERIMENTAL-checkpatch-fixes"
                              with potential errors corrected to the preferred
                              checkpatch style
+  --fix-inplace              EXPERIMENTAL - may create horrible results
+                             Is the same as --fix, but overwrites the input
+                             file.  It's your fault if there's no backup or git
   --ignore-perl-version      override checking of perl version.  expect
                              runtime errors.
   -h, --help, --version      display this help and exit
@@ -131,6 +135,7 @@ GetOptions(
        'mailback!'     => \$mailback,
        'summary-file!' => \$summary_file,
        'fix!'          => \$fix,
+       'fix-inplace!'  => \$fix_inplace,
        'ignore-perl-version!' => \$ignore_perl_version,
        'debug=s'       => \%debug,
        'test-only=s'   => \$tst_only,
@@ -140,6 +145,8 @@ GetOptions(
 
 help(0) if ($help);
 
+$fix = 1 if ($fix_inplace);
+
 my $exit = 0;
 
 if ($^V && $^V lt $minimum_perl_version) {
@@ -1963,15 +1970,14 @@ sub process {
                }
 
 # Check for FSF mailing addresses.
-               if ($rawline =~ /You should have received a copy/ ||
-                   $rawline =~ /write to the Free Software/ ||
-                   $rawline =~ /59 Temple Place/ ||
-                   $rawline =~ /51 Franklin Street/) {
+               if ($rawline =~ /\bwrite to the Free/i ||
+                   $rawline =~ /\b59\s+Temple\s+Pl/i ||
+                   $rawline =~ /\b51\s+Franklin\s+St/i) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
                        my $msg_type = \&ERROR;
                        $msg_type = \&CHK if ($file);
                        &{$msg_type}("FSF_MAILING_ADDRESS",
-                               "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet)
+                                    "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet)
                }
 
 # check for Kconfig help text having a real description
@@ -2034,6 +2040,33 @@ sub process {
                             "Use of $flag is deprecated, please use \`$replacement->{$flag} instead.\n" . $herecurr) if ($replacement->{$flag});
                }
 
+# check for DT compatible documentation
+               if (defined $root && $realfile =~ /\.dts/ &&
+                   $rawline =~ /^\+\s*compatible\s*=/) {
+                       my @compats = $rawline =~ /\"([a-zA-Z0-9\-\,\.\+_]+)\"/g;
+
+                       foreach my $compat (@compats) {
+                               my $compat2 = $compat;
+                               my $dt_path =  $root . "/Documentation/devicetree/bindings/";
+                               $compat2 =~ s/\,[a-z]*\-/\,<\.\*>\-/;
+                               `grep -Erq "$compat|$compat2" $dt_path`;
+                               if ( $? >> 8 ) {
+                                       WARN("UNDOCUMENTED_DT_STRING",
+                                            "DT compatible string \"$compat\" appears un-documented -- check $dt_path\n" . $herecurr);
+                               }
+
+                               my $vendor = $compat;
+                               my $vendor_path = $dt_path . "vendor-prefixes.txt";
+                               next if (! -f $vendor_path);
+                               $vendor =~ s/^([a-zA-Z0-9]+)\,.*/$1/;
+                               `grep -Eq "$vendor" $vendor_path`;
+                               if ( $? >> 8 ) {
+                                       WARN("UNDOCUMENTED_DT_STRING",
+                                            "DT compatible string vendor \"$vendor\" appears un-documented -- check $vendor_path\n" . $herecurr);
+                               }
+                       }
+               }
+
 # check we are in a valid source file if not then ignore this hunk
                next if ($realfile !~ /\.(h|c|s|S|pl|sh)$/);
 
@@ -2049,16 +2082,12 @@ sub process {
                }
 
 # Check for user-visible strings broken across lines, which breaks the ability
-# to grep for the string.  Limited to strings used as parameters (those
-# following an open parenthesis), which almost completely eliminates false
-# positives, as well as warning only once per parameter rather than once per
-# line of the string.  Make an exception when the previous string ends in a
-# newline (multiple lines in one string constant) or \n\t (common in inline
-# assembly to indent the instruction on the following line).
+# to grep for the string.  Make exceptions when the previous string ends in a
+# newline (multiple lines in one string constant) or '\t', '\r', ';', or '{'
+# (common in inline assembly) or is a octal \123 or hexadecimal \xaf value
                if ($line =~ /^\+\s*"/ &&
                    $prevline =~ /"\s*$/ &&
-                   $prevline =~ /\(/ &&
-                   $prevrawline !~ /\\n(?:\\t)*"\s*$/) {
+                   $prevrawline !~ /(?:\\(?:[ntr]|[0-7]{1,3}|x[0-9a-fA-F]{1,2})|;\s*|\{\s*)"\s*$/) {
                        WARN("SPLIT_STRING",
                             "quoted string split across lines\n" . $hereprev);
                }
@@ -2115,8 +2144,10 @@ sub process {
                        if (WARN("SPACE_BEFORE_TAB",
                                "please, no space before tabs\n" . $herevet) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
-                                   s/(^\+.*) +\t/$1\t/;
+                               while ($fixed[$linenr - 1] =~
+                                          s/(^\+.*) {8,8}+\t/$1\t\t/) {}
+                               while ($fixed[$linenr - 1] =~
+                                          s/(^\+.*) +\t/$1\t/) {}
                        }
                }
 
@@ -2805,6 +2836,65 @@ sub process {
                        }
                }
 
+# Function pointer declarations
+# check spacing between type, funcptr, and args
+# canonical declaration is "type (*funcptr)(args...)"
+#
+# the $Declare variable will capture all spaces after the type
+# so check it for trailing missing spaces or multiple spaces
+               if ($line =~ /^.\s*($Declare)\((\s*)\*(\s*)$Ident(\s*)\)(\s*)\(/) {
+                       my $declare = $1;
+                       my $pre_pointer_space = $2;
+                       my $post_pointer_space = $3;
+                       my $funcname = $4;
+                       my $post_funcname_space = $5;
+                       my $pre_args_space = $6;
+
+                       if ($declare !~ /\s$/) {
+                               WARN("SPACING",
+                                    "missing space after return type\n" . $herecurr);
+                       }
+
+# unnecessary space "type  (*funcptr)(args...)"
+                       elsif ($declare =~ /\s{2,}$/) {
+                               WARN("SPACING",
+                                    "Multiple spaces after return type\n" . $herecurr);
+                       }
+
+# unnecessary space "type ( *funcptr)(args...)"
+                       if (defined $pre_pointer_space &&
+                           $pre_pointer_space =~ /^\s/) {
+                               WARN("SPACING",
+                                    "Unnecessary space after function pointer open parenthesis\n" . $herecurr);
+                       }
+
+# unnecessary space "type (* funcptr)(args...)"
+                       if (defined $post_pointer_space &&
+                           $post_pointer_space =~ /^\s/) {
+                               WARN("SPACING",
+                                    "Unnecessary space before function pointer name\n" . $herecurr);
+                       }
+
+# unnecessary space "type (*funcptr )(args...)"
+                       if (defined $post_funcname_space &&
+                           $post_funcname_space =~ /^\s/) {
+                               WARN("SPACING",
+                                    "Unnecessary space after function pointer name\n" . $herecurr);
+                       }
+
+# unnecessary space "type (*funcptr) (args...)"
+                       if (defined $pre_args_space &&
+                           $pre_args_space =~ /^\s/) {
+                               WARN("SPACING",
+                                    "Unnecessary space before function pointer arguments\n" . $herecurr);
+                       }
+
+                       if (show_type("SPACING") && $fix) {
+                               $fixed[$linenr - 1] =~
+                                   s/^(.\s*$Declare)\(\s*\*\s*($Ident)\s*\)\s*\(/rtrim($1) . " " . "\(\*$2\)\("/ex;
+                       }
+               }
+
 # check for spacing round square brackets; allowed:
 #  1. with a type on the left -- int [] a;
 #  2. at the beginning of a line for slice initialisers -- [0...10] = 5,
@@ -3125,7 +3215,7 @@ sub process {
                }
 
 # check for whitespace before a non-naked semicolon
-               if ($line =~ /^\+.*\S\s+;/) {
+               if ($line =~ /^\+.*\S\s+;\s*$/) {
                        if (WARN("SPACING",
                                 "space prohibited before semicolon\n" . $herecurr) &&
                            $fix) {
@@ -3249,6 +3339,20 @@ sub process {
                        }
                }
 
+# if statements using unnecessary parentheses - ie: if ((foo == bar))
+               if ($^V && $^V ge 5.10.0 &&
+                   $line =~ /\bif\s*((?:\(\s*){2,})/) {
+                       my $openparens = $1;
+                       my $count = $openparens =~ tr@\(@\(@;
+                       my $msg = "";
+                       if ($line =~ /\bif\s*(?:\(\s*){$count,$count}$LvalOrFunc\s*($Compare)\s*$LvalOrFunc(?:\s*\)){$count,$count}/) {
+                               my $comp = $4;  #Not $1 because of $LvalOrFunc
+                               $msg = " - maybe == should be = ?" if ($comp eq "==");
+                               WARN("UNNECESSARY_PARENTHESES",
+                                    "Unnecessary parentheses$msg\n" . $herecurr);
+                       }
+               }
+
 # Return of what appears to be an errno should normally be -'ve
                if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) {
                        my $name = $1;
@@ -4117,6 +4221,12 @@ sub process {
                             "$1 uses number as first arg, sizeof is generally wrong\n" . $herecurr);
                }
 
+# check for GFP_NOWAIT use
+               if ($line =~ /\b__GFP_NOFAIL\b/) {
+                       WARN("__GFP_NOFAIL",
+                            "Use of __GFP_NOFAIL is deprecated, no new users should be added\n" . $herecurr);
+               }
+
 # check for multiple semicolons
                if ($line =~ /;\s*;\s*$/) {
                        if (WARN("ONE_SEMICOLON",
@@ -4126,6 +4236,31 @@ sub process {
                        }
                }
 
+# check for case / default statements not preceeded by break/fallthrough/switch
+               if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
+                       my $has_break = 0;
+                       my $has_statement = 0;
+                       my $count = 0;
+                       my $prevline = $linenr;
+                       while ($prevline > 1 && $count < 3 && !$has_break) {
+                               $prevline--;
+                               my $rline = $rawlines[$prevline - 1];
+                               my $fline = $lines[$prevline - 1];
+                               last if ($fline =~ /^\@\@/);
+                               next if ($fline =~ /^\-/);
+                               next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/);
+                               $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i);
+                               next if ($fline =~ /^.[\s$;]*$/);
+                               $has_statement = 1;
+                               $count++;
+                               $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|return\b|goto\b|continue\b)/);
+                       }
+                       if (!$has_break && $has_statement) {
+                               WARN("MISSING_BREAK",
+                                    "Possible switch case/default not preceeded by break or fallthrough comment\n" . $herecurr);
+                       }
+               }
+
 # check for switch/default statements without a break;
                if ($^V && $^V ge 5.10.0 &&
                    defined $stat &&
@@ -4361,7 +4496,8 @@ sub process {
        hash_show_words(\%ignore_type, "Ignored");
 
        if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
-               my $newfile = $filename . ".EXPERIMENTAL-checkpatch-fixes";
+               my $newfile = $filename;
+               $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
                my $linecount = 0;
                my $f;
 
index 5e4fb144a04f657ebd43fb0c64145147d522d7d1..9c3986f4140c47c089f96bead3331a5b5d9d6cef 100755 (executable)
@@ -98,6 +98,7 @@ my %VCS_cmds_git = (
     "available" => '(which("git") ne "") && (-d ".git")',
     "find_signers_cmd" =>
        "git log --no-color --follow --since=\$email_git_since " .
+           '--numstat --no-merges ' .
            '--format="GitCommit: %H%n' .
                      'GitAuthor: %an <%ae>%n' .
                      'GitDate: %aD%n' .
@@ -106,6 +107,7 @@ my %VCS_cmds_git = (
            " -- \$file",
     "find_commit_signers_cmd" =>
        "git log --no-color " .
+           '--numstat ' .
            '--format="GitCommit: %H%n' .
                      'GitAuthor: %an <%ae>%n' .
                      'GitDate: %aD%n' .
@@ -114,6 +116,7 @@ my %VCS_cmds_git = (
            " -1 \$commit",
     "find_commit_author_cmd" =>
        "git log --no-color " .
+           '--numstat ' .
            '--format="GitCommit: %H%n' .
                      'GitAuthor: %an <%ae>%n' .
                      'GitDate: %aD%n' .
@@ -125,6 +128,7 @@ my %VCS_cmds_git = (
     "blame_commit_pattern" => "^([0-9a-f]+) ",
     "author_pattern" => "^GitAuthor: (.*)",
     "subject_pattern" => "^GitSubject: (.*)",
+    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
 );
 
 my %VCS_cmds_hg = (
@@ -152,6 +156,7 @@ my %VCS_cmds_hg = (
     "blame_commit_pattern" => "^([ 0-9a-f]+):",
     "author_pattern" => "^HgAuthor: (.*)",
     "subject_pattern" => "^HgSubject: (.*)",
+    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
 );
 
 my $conf = which_conf(".get_maintainer.conf");
@@ -1269,20 +1274,30 @@ sub extract_formatted_signatures {
 }
 
 sub vcs_find_signers {
-    my ($cmd) = @_;
+    my ($cmd, $file) = @_;
     my $commits;
     my @lines = ();
     my @signatures = ();
+    my @authors = ();
+    my @stats = ();
 
     @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
 
     my $pattern = $VCS_cmds{"commit_pattern"};
+    my $author_pattern = $VCS_cmds{"author_pattern"};
+    my $stat_pattern = $VCS_cmds{"stat_pattern"};
+
+    $stat_pattern =~ s/(\$\w+)/$1/eeg;         #interpolate $stat_pattern
 
     $commits = grep(/$pattern/, @lines);       # of commits
 
+    @authors = grep(/$author_pattern/, @lines);
     @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
+    @stats = grep(/$stat_pattern/, @lines);
 
-    return (0, @signatures) if !@signatures;
+#    print("stats: <@stats>\n");
+
+    return (0, \@signatures, \@authors, \@stats) if !@signatures;
 
     save_commits_by_author(@lines) if ($interactive);
     save_commits_by_signer(@lines) if ($interactive);
@@ -1291,9 +1306,10 @@ sub vcs_find_signers {
        @signatures = grep(!/${penguin_chiefs}/i, @signatures);
     }
 
+    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
     my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
 
-    return ($commits, @$signers_ref);
+    return ($commits, $signers_ref, $authors_ref, \@stats);
 }
 
 sub vcs_find_author {
@@ -1849,7 +1865,12 @@ sub vcs_assign {
 sub vcs_file_signoffs {
     my ($file) = @_;
 
+    my $authors_ref;
+    my $signers_ref;
+    my $stats_ref;
+    my @authors = ();
     my @signers = ();
+    my @stats = ();
     my $commits;
 
     $vcs_used = vcs_exists();
@@ -1858,13 +1879,59 @@ sub vcs_file_signoffs {
     my $cmd = $VCS_cmds{"find_signers_cmd"};
     $cmd =~ s/(\$\w+)/$1/eeg;          # interpolate $cmd
 
-    ($commits, @signers) = vcs_find_signers($cmd);
+    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
+
+    @signers = @{$signers_ref} if defined $signers_ref;
+    @authors = @{$authors_ref} if defined $authors_ref;
+    @stats = @{$stats_ref} if defined $stats_ref;
+
+#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
 
     foreach my $signer (@signers) {
        $signer = deduplicate_email($signer);
     }
 
     vcs_assign("commit_signer", $commits, @signers);
+    vcs_assign("authored", $commits, @authors);
+    if ($#authors == $#stats) {
+       my $stat_pattern = $VCS_cmds{"stat_pattern"};
+       $stat_pattern =~ s/(\$\w+)/$1/eeg;      #interpolate $stat_pattern
+
+       my $added = 0;
+       my $deleted = 0;
+       for (my $i = 0; $i <= $#stats; $i++) {
+           if ($stats[$i] =~ /$stat_pattern/) {
+               $added += $1;
+               $deleted += $2;
+           }
+       }
+       my @tmp_authors = uniq(@authors);
+       foreach my $author (@tmp_authors) {
+           $author = deduplicate_email($author);
+       }
+       @tmp_authors = uniq(@tmp_authors);
+       my @list_added = ();
+       my @list_deleted = ();
+       foreach my $author (@tmp_authors) {
+           my $auth_added = 0;
+           my $auth_deleted = 0;
+           for (my $i = 0; $i <= $#stats; $i++) {
+               if ($author eq deduplicate_email($authors[$i]) &&
+                   $stats[$i] =~ /$stat_pattern/) {
+                   $auth_added += $1;
+                   $auth_deleted += $2;
+               }
+           }
+           for (my $i = 0; $i < $auth_added; $i++) {
+               push(@list_added, $author);
+           }
+           for (my $i = 0; $i < $auth_deleted; $i++) {
+               push(@list_deleted, $author);
+           }
+       }
+       vcs_assign("added_lines", $added, @list_added);
+       vcs_assign("removed_lines", $deleted, @list_deleted);
+    }
 }
 
 sub vcs_file_blame {
@@ -1887,6 +1954,10 @@ sub vcs_file_blame {
     if ($email_git_blame_signatures) {
        if (vcs_is_hg()) {
            my $commit_count;
+           my $commit_authors_ref;
+           my $commit_signers_ref;
+           my $stats_ref;
+           my @commit_authors = ();
            my @commit_signers = ();
            my $commit = join(" -r ", @commits);
            my $cmd;
@@ -1894,19 +1965,27 @@ sub vcs_file_blame {
            $cmd = $VCS_cmds{"find_commit_signers_cmd"};
            $cmd =~ s/(\$\w+)/$1/eeg;   #substitute variables in $cmd
 
-           ($commit_count, @commit_signers) = vcs_find_signers($cmd);
+           ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
+           @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
+           @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
 
            push(@signers, @commit_signers);
        } else {
            foreach my $commit (@commits) {
                my $commit_count;
+               my $commit_authors_ref;
+               my $commit_signers_ref;
+               my $stats_ref;
+               my @commit_authors = ();
                my @commit_signers = ();
                my $cmd;
 
                $cmd = $VCS_cmds{"find_commit_signers_cmd"};
                $cmd =~ s/(\$\w+)/$1/eeg;       #substitute variables in $cmd
 
-               ($commit_count, @commit_signers) = vcs_find_signers($cmd);
+               ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
+               @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
+               @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
 
                push(@signers, @commit_signers);
            }
index 7941fbdfb050e573120f36b770ae8321130d8d01..cc49062acdeecf85259f646df09abe22f1019a5d 100644 (file)
 #define EM_AARCH64     183
 #endif
 
+#ifndef EM_MICROBLAZE
+#define EM_MICROBLAZE  189
+#endif
+
 static int fd_map;     /* File descriptor for file being modified. */
 static int mmap_failed; /* Boolean flag. */
 static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
@@ -275,6 +279,7 @@ do_file(char const *const fname)
        case EM_ARCOMPACT:
        case EM_ARM:
        case EM_AARCH64:
+       case EM_MICROBLAZE:
        case EM_MIPS:
                break;
        }  /* end switch */
index 9f3eae2909009517cb96de2f882121883f43444e..32487ed183544afa39033f524cfaaff528e00a83 100644 (file)
@@ -9,6 +9,7 @@ TARGETS += ptrace
 TARGETS += timers
 TARGETS += vm
 TARGETS += powerpc
+TARGETS += user
 
 all:
        for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile
new file mode 100644 (file)
index 0000000..396255b
--- /dev/null
@@ -0,0 +1,13 @@
+# Makefile for user memory selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+run_tests: all
+       @if /sbin/modprobe test_user_copy ; then \
+               rmmod test_user_copy; \
+               echo "user_copy: ok"; \
+       else \
+               echo "user_copy: [FAIL]"; \
+               exit 1; \
+       fi