]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'fix/hda-samsung-p50' into topic/hda
authorTakashi Iwai <tiwai@suse.de>
Mon, 29 Jun 2009 06:51:09 +0000 (08:51 +0200)
committerTakashi Iwai <tiwai@suse.de>
Mon, 29 Jun 2009 06:51:09 +0000 (08:51 +0200)
300 files changed:
Documentation/device-mapper/dm-log.txt [new file with mode: 0644]
Documentation/device-mapper/dm-queue-length.txt [new file with mode: 0644]
Documentation/device-mapper/dm-service-time.txt [new file with mode: 0644]
Documentation/filesystems/Locking
Documentation/kernel-parameters.txt
Documentation/laptops/thinkpad-acpi.txt
MAINTAINERS
Makefile
arch/ia64/kernel/acpi-processor.c
arch/mips/Kconfig
arch/mips/cavium-octeon/octeon-irq.c
arch/mips/cavium-octeon/octeon_boot.h [new file with mode: 0644]
arch/mips/cavium-octeon/setup.c
arch/mips/cavium-octeon/smp.c
arch/mips/include/asm/bug.h
arch/mips/include/asm/bugs.h
arch/mips/include/asm/irq.h
arch/mips/include/asm/mmu_context.h
arch/mips/include/asm/smp-ops.h
arch/mips/include/asm/smp.h
arch/mips/include/asm/sn/addrs.h
arch/mips/jazz/irq.c
arch/mips/kernel/cevt-bcm1480.c
arch/mips/kernel/cevt-r4k.c
arch/mips/kernel/cevt-sb1250.c
arch/mips/kernel/cevt-smtc.c
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/i8253.c
arch/mips/kernel/irq-gic.c
arch/mips/kernel/kgdb.c
arch/mips/kernel/process.c
arch/mips/kernel/smp-cmp.c
arch/mips/kernel/smp-up.c
arch/mips/kernel/smp.c
arch/mips/kernel/smtc.c
arch/mips/kernel/topology.c
arch/mips/mipssim/sim_time.c
arch/mips/mm/c-octeon.c
arch/mips/mm/c-r3k.c
arch/mips/mm/c-r4k.c
arch/mips/mm/c-tx39.c
arch/mips/mm/highmem.c
arch/mips/mm/init.c
arch/mips/mm/page.c
arch/mips/mm/tlb-r3k.c
arch/mips/mm/tlb-r4k.c
arch/mips/mm/tlb-r8k.c
arch/mips/mm/tlbex.c
arch/mips/mti-malta/malta-int.c
arch/mips/pci/pci-ip27.c
arch/mips/pmc-sierra/yosemite/smp.c
arch/mips/power/hibernate.S
arch/mips/sgi-ip27/ip27-init.c
arch/mips/sgi-ip27/ip27-irq.c
arch/mips/sgi-ip27/ip27-timer.c
arch/mips/sgi-ip27/ip27-xtalk.c
arch/mips/sibyte/bcm1480/irq.c
arch/mips/sibyte/common/cfe_console.c
arch/mips/sni/time.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/pci_x86.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/cstate.c
arch/x86/kernel/acpi/processor.c
arch/x86/kernel/apic/io_apic.c
arch/x86/pci/acpi.c
arch/x86/pci/amd_bus.c
arch/x86/pci/common.c
arch/x86/pci/mmconfig-shared.c
drivers/acpi/ac.c
drivers/acpi/battery.c
drivers/acpi/blacklist.c
drivers/acpi/bus.c
drivers/acpi/glue.c
drivers/acpi/osl.c
drivers/acpi/pci_bind.c
drivers/acpi/pci_irq.c
drivers/acpi/pci_root.c
drivers/acpi/power.c
drivers/acpi/processor_core.c
drivers/acpi/processor_idle.c
drivers/acpi/scan.c
drivers/acpi/video.c
drivers/acpi/video_detect.c
drivers/char/mxser.c
drivers/char/nozomi.c
drivers/char/synclink_gt.c
drivers/char/tty_port.c
drivers/dma/txx9dmac.c
drivers/gpu/drm/i915/i915_opregion.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-designware.c [new file with mode: 0644]
drivers/md/Kconfig
drivers/md/Makefile
drivers/md/dm-crypt.c
drivers/md/dm-delay.c
drivers/md/dm-exception-store.c
drivers/md/dm-exception-store.h
drivers/md/dm-io.c
drivers/md/dm-ioctl.c
drivers/md/dm-linear.c
drivers/md/dm-log-userspace-base.c [new file with mode: 0644]
drivers/md/dm-log-userspace-transfer.c [new file with mode: 0644]
drivers/md/dm-log-userspace-transfer.h [new file with mode: 0644]
drivers/md/dm-log.c
drivers/md/dm-mpath.c
drivers/md/dm-path-selector.h
drivers/md/dm-queue-length.c [new file with mode: 0644]
drivers/md/dm-raid1.c
drivers/md/dm-region-hash.c
drivers/md/dm-round-robin.c
drivers/md/dm-service-time.c [new file with mode: 0644]
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap.c
drivers/md/dm-stripe.c
drivers/md/dm-sysfs.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/dm.h
drivers/net/Kconfig
drivers/net/bnx2.c
drivers/net/can/Kconfig
drivers/net/netxen/netxen_nic_init.c
drivers/net/netxen/netxen_nic_main.c
drivers/net/qla3xxx.c
drivers/pci/hotplug/acpi_pcihp.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/intel-iommu.c
drivers/pci/intr_remapping.c
drivers/pci/intr_remapping.h
drivers/platform/x86/Kconfig
drivers/platform/x86/Makefile
drivers/platform/x86/acerhdf.c [new file with mode: 0644]
drivers/platform/x86/asus-laptop.c
drivers/platform/x86/asus_acpi.c
drivers/platform/x86/dell-wmi.c
drivers/platform/x86/eeepc-laptop.c
drivers/platform/x86/hp-wmi.c
drivers/platform/x86/thinkpad_acpi.c
drivers/pnp/pnpacpi/rsparser.c
drivers/power/Kconfig
drivers/power/Makefile
drivers/power/da9030_battery.c
drivers/power/ds2760_battery.c
drivers/power/max17040_battery.c [new file with mode: 0644]
drivers/serial/8250_pci.c
drivers/serial/icom.c
drivers/serial/jsm/jsm_tty.c
drivers/serial/serial_txx9.c
drivers/staging/octeon/Makefile
drivers/staging/octeon/ethernet-common.c [deleted file]
drivers/staging/octeon/ethernet-common.h [deleted file]
drivers/staging/octeon/ethernet-defines.h
drivers/staging/octeon/ethernet-rgmii.c
drivers/staging/octeon/ethernet-sgmii.c
drivers/staging/octeon/ethernet-spi.c
drivers/staging/octeon/ethernet-tx.c
drivers/staging/octeon/ethernet-tx.h
drivers/staging/octeon/ethernet-xaui.c
drivers/staging/octeon/ethernet.c
drivers/staging/octeon/octeon-ethernet.h
drivers/uwb/hwa-rc.c
drivers/uwb/wlp/txrx.c
drivers/w1/slaves/w1_ds2760.c
drivers/w1/slaves/w1_ds2760.h
fs/btrfs/acl.c
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/inode.c
fs/compat_ioctl.c
fs/devpts/inode.c
fs/ext2/acl.c
fs/ext2/acl.h
fs/ext2/ext2.h
fs/ext2/inode.c
fs/ext2/super.c
fs/ext3/acl.c
fs/ext3/acl.h
fs/ext3/inode.c
fs/ext3/super.c
fs/ext4/acl.c
fs/ext4/acl.h
fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/super.c
fs/fs-writeback.c
fs/inode.c
fs/ioctl.c
fs/jffs2/acl.c
fs/jffs2/acl.h
fs/jffs2/jffs2_fs_i.h
fs/jffs2/os-linux.h
fs/jffs2/readinode.c
fs/jfs/acl.c
fs/jfs/jfs_incore.h
fs/jfs/super.c
fs/jfs/xattr.c
fs/namei.c
fs/namespace.c
fs/nilfs2/inode.c
fs/nilfs2/nilfs.h
fs/nilfs2/super.c
fs/ocfs2/dlmglue.c
fs/ocfs2/dlmglue.h
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/journal.c
fs/ocfs2/journal.h
fs/ocfs2/namei.c
fs/ocfs2/ocfs2.h
fs/ocfs2/stack_o2cb.c
fs/ocfs2/stack_user.c
fs/ocfs2/stackglue.c
fs/ocfs2/stackglue.h
fs/ocfs2/suballoc.c
fs/ocfs2/super.c
fs/ocfs2/sysfile.c
fs/open.c
fs/reiserfs/inode.c
fs/reiserfs/resize.c
fs/reiserfs/super.c
fs/reiserfs/xattr_acl.c
fs/super.c
fs/ubifs/xattr.c
fs/udf/balloc.c
fs/udf/lowlevel.c
fs/xfs/linux-2.6/xfs_acl.c
fs/xfs/xfs_acl.h
fs/xfs/xfs_iget.c
fs/xfs/xfs_inode.h
include/acpi/acpi_bus.h
include/acpi/acpi_drivers.h
include/acpi/processor.h
include/acpi/video.h
include/linux/Kbuild
include/linux/acpi.h
include/linux/audit.h
include/linux/connector.h
include/linux/device-mapper.h
include/linux/dm-ioctl.h
include/linux/dm-log-userspace.h [new file with mode: 0644]
include/linux/dmar.h
include/linux/ext3_fs_i.h
include/linux/falloc.h
include/linux/fs.h
include/linux/icmpv6.h
include/linux/lockdep.h
include/linux/max17040_battery.h [new file with mode: 0644]
include/linux/pci_hotplug.h
include/linux/posix_acl.h
include/linux/reiserfs_acl.h
include/linux/reiserfs_fs_i.h
include/linux/shmem_fs.h
include/net/protocol.h
include/net/rawv6.h
include/net/sctp/sctp.h
include/net/sock.h
include/net/xfrm.h
init/main.c
kernel/Makefile
kernel/audit.c
kernel/audit.h
kernel/audit_tree.c
kernel/audit_watch.c [new file with mode: 0644]
kernel/auditfilter.c
kernel/auditsc.c
kernel/futex.c
mm/page_alloc.c
mm/shmem.c
mm/shmem_acl.c
mm/slub.c
net/ax25/ax25_in.c
net/core/dev.c
net/dccp/ipv6.c
net/ipv4/route.c
net/ipv6/ah6.c
net/ipv6/esp6.c
net/ipv6/icmp.c
net/ipv6/ip6_tunnel.c
net/ipv6/ipcomp6.c
net/ipv6/mip6.c
net/ipv6/raw.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/ipv6/tunnel6.c
net/ipv6/udp.c
net/ipv6/udp_impl.h
net/ipv6/udplite.c
net/ipv6/xfrm6_tunnel.c
net/irda/af_irda.c
net/irda/ircomm/ircomm_lmp.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_log.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_cluster.c
net/netfilter/xt_quota.c
net/netfilter/xt_rateest.c
net/sctp/ipv6.c
sound/pci/hda/hda_beep.c

diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt
new file mode 100644 (file)
index 0000000..994dd75
--- /dev/null
@@ -0,0 +1,54 @@
+Device-Mapper Logging
+=====================
+The device-mapper logging code is used by some of the device-mapper
+RAID targets to track regions of the disk that are not consistent.
+A region (or portion of the address space) of the disk may be
+inconsistent because a RAID stripe is currently being operated on or
+a machine died while the region was being altered.  In the case of
+mirrors, a region would be considered dirty/inconsistent while you
+are writing to it because the writes need to be replicated for all
+the legs of the mirror and may not reach the legs at the same time.
+Once all writes are complete, the region is considered clean again.
+
+There is a generic logging interface that the device-mapper RAID
+implementations use to perform logging operations (see
+dm_dirty_log_type in include/linux/dm-dirty-log.h).  Various different
+logging implementations are available and provide different
+capabilities.  The list includes:
+
+Type           Files
+====           =====
+disk           drivers/md/dm-log.c
+core           drivers/md/dm-log.c
+userspace      drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
+
+The "disk" log type
+-------------------
+This log implementation commits the log state to disk.  This way, the
+logging state survives reboots/crashes.
+
+The "core" log type
+-------------------
+This log implementation keeps the log state in memory.  The log state
+will not survive a reboot or crash, but there may be a small boost in
+performance.  This method can also be used if no storage device is
+available for storing log state.
+
+The "userspace" log type
+------------------------
+This log type simply provides a way to export the log API to userspace,
+so log implementations can be done there.  This is done by forwarding most
+logging requests to userspace, where a daemon receives and processes the
+request.
+
+The structure used for communication between kernel and userspace are
+located in include/linux/dm-log-userspace.h.  Due to the frequency,
+diversity, and 2-way communication nature of the exchanges between
+kernel and userspace, 'connector' is used as the interface for
+communication.
+
+There are currently two userspace log implementations that leverage this
+framework - "clustered_disk" and "clustered_core".  These implementations
+provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
+can be used in a shared-storage environment when the cluster log implementations
+are employed.
diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt
new file mode 100644 (file)
index 0000000..f4db256
--- /dev/null
@@ -0,0 +1,39 @@
+dm-queue-length
+===============
+
+dm-queue-length is a path selector module for device-mapper targets,
+which selects a path with the least number of in-flight I/Os.
+The path selector name is 'queue-length'.
+
+Table parameters for each path: [<repeat_count>]
+       <repeat_count>: The number of I/Os to dispatch using the selected
+                       path before switching to the next path.
+                       If not given, internal default is used. To check
+                       the default value, see the activated table.
+
+Status for each path: <status> <fail-count> <in-flight>
+       <status>: 'A' if the path is active, 'F' if the path is failed.
+       <fail-count>: The number of path failures.
+       <in-flight>: The number of in-flight I/Os on the path.
+
+
+Algorithm
+=========
+
+dm-queue-length increments/decrements 'in-flight' when an I/O is
+dispatched/completed respectively.
+dm-queue-length selects a path with the minimum 'in-flight'.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128.
+
+# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
+  dmsetup create test
+#
+# dmsetup table
+test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
+#
+# dmsetup status
+test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-service-time.txt b/Documentation/device-mapper/dm-service-time.txt
new file mode 100644 (file)
index 0000000..7d00668
--- /dev/null
@@ -0,0 +1,91 @@
+dm-service-time
+===============
+
+dm-service-time is a path selector module for device-mapper targets,
+which selects a path with the shortest estimated service time for
+the incoming I/O.
+
+The service time for each path is estimated by dividing the total size
+of in-flight I/Os on a path with the performance value of the path.
+The performance value is a relative throughput value among all paths
+in a path-group, and it can be specified as a table argument.
+
+The path selector name is 'service-time'.
+
+Table parameters for each path: [<repeat_count> [<relative_throughput>]]
+       <repeat_count>: The number of I/Os to dispatch using the selected
+                       path before switching to the next path.
+                       If not given, internal default is used.  To check
+                       the default value, see the activated table.
+       <relative_throughput>: The relative throughput value of the path
+                       among all paths in the path-group.
+                       The valid range is 0-100.
+                       If not given, minimum value '1' is used.
+                       If '0' is given, the path isn't selected while
+                       other paths having a positive value are available.
+
+Status for each path: <status> <fail-count> <in-flight-size> \
+                     <relative_throughput>
+       <status>: 'A' if the path is active, 'F' if the path is failed.
+       <fail-count>: The number of path failures.
+       <in-flight-size>: The size of in-flight I/Os on the path.
+       <relative_throughput>: The relative throughput value of the path
+                       among all paths in the path-group.
+
+
+Algorithm
+=========
+
+dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
+dispatched and substracts when completed.
+Basically, dm-service-time selects a path having minimum service time
+which is calculated by:
+
+       ('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
+
+However, some optimizations below are used to reduce the calculation
+as much as possible.
+
+       1. If the paths have the same 'relative_throughput', skip
+          the division and just compare the 'in-flight-size'.
+
+       2. If the paths have the same 'in-flight-size', skip the division
+          and just compare the 'relative_throughput'.
+
+       3. If some paths have non-zero 'relative_throughput' and others
+          have zero 'relative_throughput', ignore those paths with zero
+          'relative_throughput'.
+
+If such optimizations can't be applied, calculate service time, and
+compare service time.
+If calculated service time is equal, the path having maximum
+'relative_throughput' may be better.  So compare 'relative_throughput'
+then.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128
+and sda has an average throughput 1GB/s and sdb has 4GB/s,
+'relative_throughput' value may be '1' for sda and '4' for sdb.
+
+# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
+  dmsetup create test
+#
+# dmsetup table
+test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
+#
+# dmsetup status
+test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
+
+
+Or '2' for sda and '8' for sdb would be also true.
+
+# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
+  dmsetup create test
+#
+# dmsetup table
+test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
+#
+# dmsetup status
+test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
index 229d7b7c50a350053170e5fba11ac9a0168c0e70..18b9d0ca0630e281bb20dc5c5991e4b6f06b4643 100644 (file)
@@ -109,27 +109,28 @@ prototypes:
 
 locking rules:
        All may block.
-                       BKL     s_lock  s_umount
-alloc_inode:           no      no      no
-destroy_inode:         no
-dirty_inode:           no                              (must not sleep)
-write_inode:           no
-drop_inode:            no                              !!!inode_lock!!!
-delete_inode:          no
-put_super:             yes     yes     no
-write_super:           no      yes     read
-sync_fs:               no      no      read
-freeze_fs:             ?
-unfreeze_fs:           ?
-statfs:                        no      no      no
-remount_fs:            yes     yes     maybe           (see below)
-clear_inode:           no
-umount_begin:          yes     no      no
-show_options:          no                              (vfsmount->sem)
-quota_read:            no      no      no              (see below)
-quota_write:           no      no      no              (see below)
-
-->remount_fs() will have the s_umount lock if it's already mounted.
+       None have BKL
+                       s_umount
+alloc_inode:
+destroy_inode:
+dirty_inode:                           (must not sleep)
+write_inode:
+drop_inode:                            !!!inode_lock!!!
+delete_inode:
+put_super:             write
+write_super:           read
+sync_fs:               read
+freeze_fs:             read
+unfreeze_fs:           read
+statfs:                        no
+remount_fs:            maybe           (see below)
+clear_inode:
+umount_begin:          no
+show_options:          no              (namespace_sem)
+quota_read:            no              (see below)
+quota_write:           no              (see below)
+
+->remount_fs() will have the s_umount exclusive lock if it's already mounted.
 When called from get_sb_single, it does NOT have the s_umount lock.
 ->quota_read() and ->quota_write() functions are both guaranteed to
 be the only ones operating on the quota file by the quota code (via
index 92e1ab8178a8693856c599dcc80c0e4f77a78390..d08759aa0903c8499c5b422a8161f5cdb900bdcc 100644 (file)
@@ -229,14 +229,6 @@ and is between 256 and 4096 characters. It is defined in the file
                        to assume that this machine's pmtimer latches its value
                        and always returns good values.
 
-       acpi.power_nocheck=     [HW,ACPI]
-                       Format: 1/0 enable/disable the check of power state.
-                       On some bogus BIOS the _PSC object/_STA object of
-                       power resource can't return the correct device power
-                       state. In such case it is unneccessary to check its
-                       power state again in power transition.
-                       1 : disable the power state check
-
        acpi_sci=       [HW,ACPI] ACPI System Control Interrupt trigger mode
                        Format: { level | edge | high | low }
 
@@ -1863,7 +1855,7 @@ and is between 256 and 4096 characters. It is defined in the file
                                IRQ routing is enabled.
                noacpi          [X86] Do not use ACPI for IRQ routing
                                or for PCI scanning.
-               nocrs           [X86] Don't use _CRS for PCI resource
+               use_crs         [X86] Use _CRS for PCI resource
                                allocation.
                routeirq        Do IRQ routing for all PCI devices.
                                This is normally done in pci_enable_device(),
index 78e354b42f6785cd07a0e12b56b482fde4b8d10b..f2296ecedb89d41e6afec1d3ea239256cb4fe768 100644 (file)
@@ -920,7 +920,7 @@ The available commands are:
        echo '<LED number> off' >/proc/acpi/ibm/led
        echo '<LED number> blink' >/proc/acpi/ibm/led
 
-The <LED number> range is 0 to 7. The set of LEDs that can be
+The <LED number> range is 0 to 15. The set of LEDs that can be
 controlled varies from model to model. Here is the common ThinkPad
 mapping:
 
@@ -932,6 +932,11 @@ mapping:
        5 - UltraBase battery slot
        6 - (unknown)
        7 - standby
+       8 - dock status 1
+       9 - dock status 2
+       10, 11 - (unknown)
+       12 - thinkvantage
+       13, 14, 15 - (unknown)
 
 All of the above can be turned on and off and can be made to blink.
 
@@ -940,10 +945,12 @@ sysfs notes:
 The ThinkPad LED sysfs interface is described in detail by the LED class
 documentation, in Documentation/leds-class.txt.
 
-The leds are named (in LED ID order, from 0 to 7):
+The LEDs are named (in LED ID order, from 0 to 12):
 "tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
 "tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
-"tpacpi::unknown_led", "tpacpi::standby".
+"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
+"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
+"tpacpi::thinkvantage".
 
 Due to limitations in the sysfs LED class, if the status of the LED
 indicators cannot be read due to an error, thinkpad-acpi will report it as
@@ -958,6 +965,12 @@ ThinkPad indicator LED should blink in hardware accelerated mode, use the
 "timer" trigger, and leave the delay_on and delay_off parameters set to
 zero (to request hardware acceleration autodetection).
 
+LEDs that are known not to exist in a given ThinkPad model are not
+made available through the sysfs interface.  If you have a dock and you
+notice there are LEDs listed for your ThinkPad that do not exist (and
+are not in the dock), or if you notice that there are missing LEDs,
+a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
+
 
 ACPI sounds -- /proc/acpi/ibm/beep
 ----------------------------------
@@ -1156,17 +1169,19 @@ may not be distinct.  Later Lenovo models that implement the ACPI
 display backlight brightness control methods have 16 levels, ranging
 from 0 to 15.
 
-There are two interfaces to the firmware for direct brightness control,
-EC and UCMS (or CMOS).  To select which one should be used, use the
-brightness_mode module parameter: brightness_mode=1 selects EC mode,
-brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
-mode with NVRAM backing (so that brightness changes are remembered
-across shutdown/reboot).
+For IBM ThinkPads, there are two interfaces to the firmware for direct
+brightness control, EC and UCMS (or CMOS).  To select which one should be
+used, use the brightness_mode module parameter: brightness_mode=1 selects
+EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
+mode with NVRAM backing (so that brightness changes are remembered across
+shutdown/reboot).
 
 The driver tries to select which interface to use from a table of
 defaults for each ThinkPad model.  If it makes a wrong choice, please
 report this as a bug, so that we can fix it.
 
+Lenovo ThinkPads only support brightness_mode=2 (UCMS).
+
 When display backlight brightness controls are available through the
 standard ACPI interface, it is best to use it instead of this direct
 ThinkPad-specific interface.  The driver will disable its native
@@ -1254,7 +1269,7 @@ Fan control and monitoring: fan speed, fan enable/disable
 
 procfs: /proc/acpi/ibm/fan
 sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
-                         pwm1_enable
+                         pwm1_enable, fan2_input
 sysfs hwmon driver attributes: fan_watchdog
 
 NOTE NOTE NOTE: fan control operations are disabled by default for
@@ -1267,6 +1282,9 @@ from the hardware registers of the embedded controller.  This is known
 to work on later R, T, X and Z series ThinkPads but may show a bogus
 value on other models.
 
+Some Lenovo ThinkPads support a secondary fan.  This fan cannot be
+controlled separately, it shares the main fan control.
+
 Fan levels:
 
 Most ThinkPad fans work in "levels" at the firmware interface.  Level 0
@@ -1397,6 +1415,11 @@ hwmon device attribute fan1_input:
        which can take up to two minutes.  May return rubbish on older
        ThinkPads.
 
+hwmon device attribute fan2_input:
+       Fan tachometer reading, in RPM, for the secondary fan.
+       Available only on some ThinkPads.  If the secondary fan is
+       not installed, will always read 0.
+
 hwmon driver attribute fan_watchdog:
        Fan safety watchdog timer interval, in seconds.  Minimum is
        1 second, maximum is 120 seconds.  0 disables the watchdog.
@@ -1555,3 +1578,7 @@ Sysfs interface changelog:
 0x020300:      hotkey enable/disable support removed, attributes
                hotkey_bios_enabled and hotkey_enable deprecated and
                marked for removal.
+
+0x020400:      Marker for 16 LEDs support.  Also, LEDs that are known
+               to not exist in a given model are not registered with
+               the LED sysfs class anymore.
index 303129ab4b7509bff0ed0ce3960d5c6b7bc53ff9..fa2a16def17afb765ad447f10b5dde634f83ded9 100644 (file)
@@ -230,6 +230,13 @@ L: linux-acenic@sunsite.dk
 S:     Maintained
 F:     drivers/net/acenic*
 
+ACER ASPIRE ONE TEMPERATURE AND FAN DRIVER
+P: Peter Feuerer
+M: peter@piie.net
+W: http://piie.net/?section=acerhdf
+S: Maintained
+F: drivers/platform/x86/acerhdf.c
+
 ACER WMI LAPTOP EXTRAS
 P:     Carlos Corbacho
 M:     carlos@strangeworlds.co.uk
@@ -913,8 +920,7 @@ M:  corentincj@iksaif.net
 P:     Karol Kozimor
 M:     sziwan@users.sourceforge.net
 L:     acpi4asus-user@lists.sourceforge.net
-W:     http://sourceforge.net/projects/acpi4asus
-W:     http://xf.iksaif.net/acpi4asus
+W:     http://acpi4asus.sf.net
 S:     Maintained
 F:     arch/x86/kernel/acpi/boot.c
 F:     drivers/platform/x86/asus_acpi.c
@@ -930,8 +936,7 @@ ASUS LAPTOP EXTRAS DRIVER
 P:     Corentin Chary
 M:     corentincj@iksaif.net
 L:     acpi4asus-user@lists.sourceforge.net
-W:     http://sourceforge.net/projects/acpi4asus
-W:     http://xf.iksaif.net/acpi4asus
+W:     http://acpi4asus.sf.net
 S:     Maintained
 F:     drivers/platform/x86/asus-laptop.c
 
@@ -1636,7 +1641,7 @@ P:        Mikael Starvik
 M:     starvik@axis.com
 P:     Jesper Nilsson
 M:     jesper.nilsson@axis.com
-L:     dev-etrax@axis.com
+L:     linux-cris-kernel@axis.com
 W:     http://developer.axis.com
 S:     Maintained
 F:     arch/cris/
@@ -2110,7 +2115,7 @@ EEEPC LAPTOP EXTRAS DRIVER
 P:     Corentin Chary
 M:     corentincj@iksaif.net
 L:     acpi4asus-user@lists.sourceforge.net
-W:     http://sourceforge.net/projects/acpi4asus
+W:     http://acpi4asus.sf.net
 S:     Maintained
 F:     drivers/platform/x86/eeepc-laptop.c
 
index 46e1c9d03d515c4a32945b189584a1f652e87c1c..d1216fea0c922bf70de0203d3a51124a825c98c4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
-SUBLEVEL = 30
-EXTRAVERSION =
+SUBLEVEL = 31
+EXTRAVERSION = -rc1
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
index cbe6cee5a550327500d5497d6c445eb471714a12..dbda7bde61129d3403b189f5145618eae9f655e0 100644 (file)
@@ -71,3 +71,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 }
 
 EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
+
+void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
+{
+       if (pr->pdc) {
+               kfree(pr->pdc->pointer->buffer.pointer);
+               kfree(pr->pdc->pointer);
+               kfree(pr->pdc);
+               pr->pdc = NULL;
+       }
+}
+
+EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
index b29f0280d71282ad2f2cf142d96e1dc84e0a591a..8c4be1f301cf2f9b1310eae61502ebd33b5391f1 100644 (file)
@@ -601,6 +601,7 @@ config CAVIUM_OCTEON_SIMULATOR
        select SYS_SUPPORTS_64BIT_KERNEL
        select SYS_SUPPORTS_BIG_ENDIAN
        select SYS_SUPPORTS_HIGHMEM
+       select SYS_SUPPORTS_HOTPLUG_CPU
        select SYS_HAS_CPU_CAVIUM_OCTEON
        help
          The Octeon simulator is software performance model of the Cavium
@@ -615,6 +616,7 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
        select SYS_SUPPORTS_64BIT_KERNEL
        select SYS_SUPPORTS_BIG_ENDIAN
        select SYS_SUPPORTS_HIGHMEM
+       select SYS_SUPPORTS_HOTPLUG_CPU
        select SYS_HAS_EARLY_PRINTK
        select SYS_HAS_CPU_CAVIUM_OCTEON
        select SWAP_IO_SPACE
@@ -784,8 +786,17 @@ config SYS_HAS_EARLY_PRINTK
        bool
 
 config HOTPLUG_CPU
+       bool "Support for hot-pluggable CPUs"
+       depends on SMP && HOTPLUG && SYS_SUPPORTS_HOTPLUG_CPU
+       help
+         Say Y here to allow turning CPUs off and on. CPUs can be
+         controlled through /sys/devices/system/cpu.
+         (Note: power management support will enable this option
+           automatically on SMP systems. )
+         Say N if you want to disable CPU hotplug.
+
+config SYS_SUPPORTS_HOTPLUG_CPU
        bool
-       default n
 
 config I8259
        bool
@@ -2136,11 +2147,11 @@ menu "Power management options"
 
 config ARCH_HIBERNATION_POSSIBLE
        def_bool y
-       depends on !SMP
+       depends on SYS_SUPPORTS_HOTPLUG_CPU
 
 config ARCH_SUSPEND_POSSIBLE
        def_bool y
-       depends on !SMP
+       depends on SYS_SUPPORTS_HOTPLUG_CPU
 
 source "kernel/power/Kconfig"
 
index 8dfa009e00706a6a48e3b0d646c16e62021ac36d..384f1842bfb1a4c0743979f55ec40b07b4172abb 100644 (file)
@@ -7,7 +7,7 @@
  */
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/hardirq.h>
+#include <linux/smp.h>
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-pexp-defs.h>
@@ -501,3 +501,62 @@ asmlinkage void plat_irq_dispatch(void)
                }
        }
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int is_irq_enabled_on_cpu(unsigned int irq, unsigned int cpu)
+{
+       unsigned int isset;
+#ifdef CONFIG_SMP
+       int coreid = cpu_logical_map(cpu);
+#else
+       int coreid = cvmx_get_core_num();
+#endif
+       int bit = (irq < OCTEON_IRQ_WDOG0) ?
+               irq - OCTEON_IRQ_WORKQ0 : irq - OCTEON_IRQ_WDOG0;
+       if (irq < 64) {
+               isset = (cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)) &
+                       (1ull << bit)) >> bit;
+       } else {
+              isset = (cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)) &
+                       (1ull << bit)) >> bit;
+       }
+       return isset;
+}
+
+void fixup_irqs(void)
+{
+       int irq;
+
+       for (irq = OCTEON_IRQ_SW0; irq <= OCTEON_IRQ_TIMER; irq++)
+               octeon_irq_core_disable_local(irq);
+
+       for (irq = OCTEON_IRQ_WORKQ0; irq <= OCTEON_IRQ_GPIO15; irq++) {
+               if (is_irq_enabled_on_cpu(irq, smp_processor_id())) {
+                       /* ciu irq migrates to next cpu */
+                       octeon_irq_chip_ciu0.disable(irq);
+                       octeon_irq_ciu0_set_affinity(irq, &cpu_online_map);
+               }
+       }
+
+#if 0
+       for (irq = OCTEON_IRQ_MBOX0; irq <= OCTEON_IRQ_MBOX1; irq++)
+               octeon_irq_mailbox_mask(irq);
+#endif
+       for (irq = OCTEON_IRQ_UART0; irq <= OCTEON_IRQ_BOOTDMA; irq++) {
+               if (is_irq_enabled_on_cpu(irq, smp_processor_id())) {
+                       /* ciu irq migrates to next cpu */
+                       octeon_irq_chip_ciu0.disable(irq);
+                       octeon_irq_ciu0_set_affinity(irq, &cpu_online_map);
+               }
+       }
+
+       for (irq = OCTEON_IRQ_UART2; irq <= OCTEON_IRQ_RESERVED135; irq++) {
+               if (is_irq_enabled_on_cpu(irq, smp_processor_id())) {
+                       /* ciu irq migrates to next cpu */
+                       octeon_irq_chip_ciu1.disable(irq);
+                       octeon_irq_ciu1_set_affinity(irq, &cpu_online_map);
+               }
+       }
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/mips/cavium-octeon/octeon_boot.h b/arch/mips/cavium-octeon/octeon_boot.h
new file mode 100644 (file)
index 0000000..0f7f84a
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * (C) Copyright 2004, 2005 Cavium Networks
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef __OCTEON_BOOT_H__
+#define __OCTEON_BOOT_H__
+
+#include <linux/types.h>
+
+struct boot_init_vector {
+       uint32_t stack_addr;
+       uint32_t code_addr;
+       uint32_t app_start_func_addr;
+       uint32_t k0_val;
+       uint32_t flags;
+       uint32_t boot_info_addr;
+       uint32_t pad;
+       uint32_t pad2;
+};
+
+/* similar to bootloader's linux_app_boot_info but without global data */
+struct linux_app_boot_info {
+       uint32_t labi_signature;
+       uint32_t start_core0_addr;
+       uint32_t avail_coremask;
+       uint32_t pci_console_active;
+       uint32_t icache_prefetch_disable;
+       uint32_t InitTLBStart_addr;
+       uint32_t start_app_addr;
+       uint32_t cur_exception_base;
+       uint32_t no_mark_private_data;
+       uint32_t compact_flash_common_base_addr;
+       uint32_t compact_flash_attribute_base_addr;
+       uint32_t led_display_base_addr;
+};
+
+/* If not to copy a lot of bootloader's structures
+   here is only offset of requested member */
+#define AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK    0x765c
+
+/* hardcoded in bootloader */
+#define  LABI_ADDR_IN_BOOTLOADER                         0x700
+
+#define LINUX_APP_BOOT_BLOCK_NAME "linux-app-boot"
+
+#define LABI_SIGNATURE 0xAABBCCDD
+
+/*  from uboot-headers/octeon_mem_map.h */
+#define EXCEPTION_BASE_INCR     (4 * 1024)
+                              /* Increment size for exception base addresses (4k minimum) */
+#define EXCEPTION_BASE_BASE     0
+#define BOOTLOADER_PRIV_DATA_BASE       (EXCEPTION_BASE_BASE + 0x800)
+#define BOOTLOADER_BOOT_VECTOR          (BOOTLOADER_PRIV_DATA_BASE)
+
+#endif /* __OCTEON_BOOT_H__ */
index 5f4e49ba4713fcc1fdd3040074e04acd24cf255e..da559249cc2fae44161c5322ce7cfd61b3cac328 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/serial.h>
+#include <linux/smp.h>
 #include <linux/types.h>
 #include <linux/string.h>      /* for memset */
 #include <linux/tty.h>
index 24e0ad63980afe3940e2618778fd7d136c25e34c..0b891a9c6253125ddd54e863ff1d23b8299d4e25 100644 (file)
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 2004-2008 Cavium Networks
  */
+#include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
 
 #include <asm/octeon/octeon.h>
 
+#include "octeon_boot.h"
+
 volatile unsigned long octeon_processor_boot = 0xff;
 volatile unsigned long octeon_processor_sp;
 volatile unsigned long octeon_processor_gp;
 
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned int InitTLBStart_addr;
+#endif
+
 static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 {
        const int coreid = cvmx_get_core_num();
@@ -67,8 +74,28 @@ static inline void octeon_send_ipi_mask(cpumask_t mask, unsigned int action)
 }
 
 /**
- * Detect available CPUs, populate phys_cpu_present_map
+ * Detect available CPUs, populate cpu_possible_map
  */
+static void octeon_smp_hotplug_setup(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+       uint32_t labi_signature;
+
+       labi_signature =
+               cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                       LABI_ADDR_IN_BOOTLOADER +
+                                       offsetof(struct linux_app_boot_info,
+                                                   labi_signature)));
+       if (labi_signature != LABI_SIGNATURE)
+               pr_err("The bootloader version on this board is incorrect\n");
+       InitTLBStart_addr =
+               cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                  LABI_ADDR_IN_BOOTLOADER +
+                                          offsetof(struct linux_app_boot_info,
+                                                   InitTLBStart_addr)));
+#endif
+}
+
 static void octeon_smp_setup(void)
 {
        const int coreid = cvmx_get_core_num();
@@ -91,6 +118,9 @@ static void octeon_smp_setup(void)
                        cpus++;
                }
        }
+       cpu_present_map = cpu_possible_map;
+
+       octeon_smp_hotplug_setup();
 }
 
 /**
@@ -128,6 +158,17 @@ static void octeon_init_secondary(void)
        const int coreid = cvmx_get_core_num();
        union cvmx_ciu_intx_sum0 interrupt_enable;
 
+#ifdef CONFIG_HOTPLUG_CPU
+       unsigned int cur_exception_base;
+
+       cur_exception_base = cvmx_read64_uint32(
+               CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                            LABI_ADDR_IN_BOOTLOADER +
+                            offsetof(struct linux_app_boot_info,
+                                     cur_exception_base)));
+       /* cur_exception_base is incremented in bootloader after setting */
+       write_c0_ebase((unsigned int)(cur_exception_base - EXCEPTION_BASE_INCR));
+#endif
        octeon_check_cpu_bist();
        octeon_init_cvmcount();
        /*
@@ -199,6 +240,193 @@ static void octeon_cpus_done(void)
 #endif
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state);
+
+extern void fixup_irqs(void);
+
+static DEFINE_SPINLOCK(smp_reserve_lock);
+
+static int octeon_cpu_disable(void)
+{
+       unsigned int cpu = smp_processor_id();
+
+       if (cpu == 0)
+               return -EBUSY;
+
+       spin_lock(&smp_reserve_lock);
+
+       cpu_clear(cpu, cpu_online_map);
+       cpu_clear(cpu, cpu_callin_map);
+       local_irq_disable();
+       fixup_irqs();
+       local_irq_enable();
+
+       flush_cache_all();
+       local_flush_tlb_all();
+
+       spin_unlock(&smp_reserve_lock);
+
+       return 0;
+}
+
+static void octeon_cpu_die(unsigned int cpu)
+{
+       int coreid = cpu_logical_map(cpu);
+       uint32_t avail_coremask;
+       struct cvmx_bootmem_named_block_desc *block_desc;
+
+#ifdef CONFIG_CAVIUM_OCTEON_WATCHDOG
+       /* Disable the watchdog */
+       cvmx_ciu_wdogx_t ciu_wdog;
+       ciu_wdog.u64 = cvmx_read_csr(CVMX_CIU_WDOGX(cpu));
+       ciu_wdog.s.mode = 0;
+       cvmx_write_csr(CVMX_CIU_WDOGX(cpu), ciu_wdog.u64);
+#endif
+
+       while (per_cpu(cpu_state, cpu) != CPU_DEAD)
+               cpu_relax();
+
+       /*
+        * This is a bit complicated strategics of getting/settig available
+        * cores mask, copied from bootloader
+        */
+       /* LINUX_APP_BOOT_BLOCK is initialized in bootoct binary */
+       block_desc = cvmx_bootmem_find_named_block(LINUX_APP_BOOT_BLOCK_NAME);
+
+       if (!block_desc) {
+               avail_coremask =
+                       cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                                  LABI_ADDR_IN_BOOTLOADER +
+                                                  offsetof
+                                                  (struct linux_app_boot_info,
+                                                   avail_coremask)));
+       } else {                       /* alternative, already initialized */
+              avail_coremask =
+                  cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                                  block_desc->base_addr +
+                                                 AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK));
+       }
+
+       avail_coremask |= 1 << coreid;
+
+       /* Setting avail_coremask for bootoct binary */
+       if (!block_desc) {
+               cvmx_write64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                               LABI_ADDR_IN_BOOTLOADER +
+                                               offsetof(struct linux_app_boot_info,
+                                                        avail_coremask)),
+                                  avail_coremask);
+       } else {
+               cvmx_write64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                               block_desc->base_addr +
+                                               AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK),
+                                  avail_coremask);
+       }
+
+       pr_info("Reset core %d. Available Coremask = %x \n", coreid,
+               avail_coremask);
+       cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
+       cvmx_write_csr(CVMX_CIU_PP_RST, 0);
+}
+
+void play_dead(void)
+{
+       int coreid = cvmx_get_core_num();
+
+       idle_task_exit();
+       octeon_processor_boot = 0xff;
+       per_cpu(cpu_state, coreid) = CPU_DEAD;
+
+       while (1)       /* core will be reset here */
+               ;
+}
+
+extern void kernel_entry(unsigned long arg1, ...);
+
+static void start_after_reset(void)
+{
+       kernel_entry(0, 0, 0);  /* set a2 = 0 for secondary core */
+}
+
+int octeon_update_boot_vector(unsigned int cpu)
+{
+
+       int coreid = cpu_logical_map(cpu);
+       unsigned int avail_coremask;
+       struct cvmx_bootmem_named_block_desc *block_desc;
+       struct boot_init_vector *boot_vect =
+               (struct boot_init_vector *) cvmx_phys_to_ptr(0x0 +
+                                                 BOOTLOADER_BOOT_VECTOR);
+
+       block_desc = cvmx_bootmem_find_named_block(LINUX_APP_BOOT_BLOCK_NAME);
+
+       if (!block_desc) {
+               avail_coremask =
+                       cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                          LABI_ADDR_IN_BOOTLOADER +
+                                               offsetof(struct linux_app_boot_info,
+                                               avail_coremask)));
+       } else {                       /* alternative, already initialized */
+              avail_coremask =
+                  cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+                                                  block_desc->base_addr +
+                                                  AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK));
+       }
+
+       if (!(avail_coremask & (1 << coreid))) {
+               /* core not available, assume, that catched by simple-executive */
+               cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
+               cvmx_write_csr(CVMX_CIU_PP_RST, 0);
+       }
+
+       boot_vect[coreid].app_start_func_addr =
+               (uint32_t) (unsigned long) start_after_reset;
+       boot_vect[coreid].code_addr = InitTLBStart_addr;
+
+       CVMX_SYNC;
+
+       cvmx_write_csr(CVMX_CIU_NMI, (1 << coreid) & avail_coremask);
+
+       return 0;
+}
+
+static int __cpuinit octeon_cpu_callback(struct notifier_block *nfb,
+       unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+               octeon_update_boot_vector(cpu);
+               break;
+       case CPU_ONLINE:
+               pr_info("Cpu %d online\n", cpu);
+               break;
+       case CPU_DEAD:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata octeon_cpu_notifier = {
+       .notifier_call = octeon_cpu_callback,
+};
+
+static int __cpuinit register_cavium_notifier(void)
+{
+       register_hotcpu_notifier(&octeon_cpu_notifier);
+
+       return 0;
+}
+
+late_initcall(register_cavium_notifier);
+
+#endif  /* CONFIG_HOTPLUG_CPU */
+
 struct plat_smp_ops octeon_smp_ops = {
        .send_ipi_single        = octeon_send_ipi_single,
        .send_ipi_mask          = octeon_send_ipi_mask,
@@ -208,4 +436,8 @@ struct plat_smp_ops octeon_smp_ops = {
        .boot_secondary         = octeon_boot_secondary,
        .smp_setup              = octeon_smp_setup,
        .prepare_cpus           = octeon_prepare_cpus,
+#ifdef CONFIG_HOTPLUG_CPU
+       .cpu_disable            = octeon_cpu_disable,
+       .cpu_die                = octeon_cpu_die,
+#endif
 };
index 08ea46863fe5ca8ce6683fe71f527a6c4346b690..6cf29c26e873788a78221ab517dc97cc6ae5386c 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __ASM_BUG_H
 #define __ASM_BUG_H
 
+#include <linux/compiler.h>
 #include <asm/sgidefs.h>
 
 #ifdef CONFIG_BUG
index 9dc10df32078e10475dd49c60248c06d31461139..b160a706795dcf17d3b0ea4c8d0278748cd1a458 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/bug.h>
 #include <linux/delay.h>
+#include <linux/smp.h>
 
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
index 4f1eed107b08217f8f2991ed17f92c424b6c437f..09b08d05ff721707ea5c968fe1e369b19141014c 100644 (file)
@@ -10,6 +10,7 @@
 #define _ASM_IRQ_H
 
 #include <linux/linkage.h>
+#include <linux/smp.h>
 
 #include <asm/mipsmtregs.h>
 
index d7f3eb03ad12b3a87702115f0247562293e9ef80..d3bea88d8744ace24ad245062830c6cc64ea7886 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
index 64ffc0290b84dcf98fb003ba0ba7e77f6faed454..fd545547b8aa961a544de03b8843d852269be7ca 100644 (file)
@@ -26,6 +26,10 @@ struct plat_smp_ops {
        void (*boot_secondary)(int cpu, struct task_struct *idle);
        void (*smp_setup)(void);
        void (*prepare_cpus)(unsigned int max_cpus);
+#ifdef CONFIG_HOTPLUG_CPU
+       int (*cpu_disable)(void);
+       void (*cpu_die)(unsigned int cpu);
+#endif
 };
 
 extern void register_smp_ops(struct plat_smp_ops *ops);
index 40e5ef1d4d26b195b205433c5d14a5c2ba382e35..aaa2d4ab26dc5589c034a9a86db28fb8b33cbdd8 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/bitops.h>
 #include <linux/linkage.h>
+#include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 
@@ -40,6 +41,7 @@ extern int __cpu_logical_map[NR_CPUS];
 /* Octeon - Tell another core to flush its icache */
 #define SMP_ICACHE_FLUSH       0x4
 
+extern volatile cpumask_t cpu_callin_map;
 
 extern void asmlinkage smp_bootstrap(void);
 
@@ -55,6 +57,24 @@ static inline void smp_send_reschedule(int cpu)
        mp_ops->send_ipi_single(cpu, SMP_RESCHEDULE_YOURSELF);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static inline int __cpu_disable(void)
+{
+       extern struct plat_smp_ops *mp_ops;     /* private */
+
+       return mp_ops->cpu_disable();
+}
+
+static inline void __cpu_die(unsigned int cpu)
+{
+       extern struct plat_smp_ops *mp_ops;     /* private */
+
+       mp_ops->cpu_die(cpu);
+}
+
+extern void play_dead(void);
+#endif
+
 extern asmlinkage void smp_call_function_interrupt(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
index 3a56d90abfa670ebb805b6295e5c0528b02bd9ca..2367b56dcdef8badffd247e973524007cd859719 100644 (file)
@@ -11,6 +11,7 @@
 
 
 #ifndef __ASSEMBLY__
+#include <linux/smp.h>
 #include <linux/types.h>
 #endif /* !__ASSEMBLY__ */
 
index d9b6a5b5399d25b3959921288e84b08074571965..7fd170d007e7587d4839abcfd1f48370285c5c34 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/smp.h>
 #include <linux/spinlock.h>
 
 #include <asm/irq_cpu.h>
index a5182a20769619dda6bc81a216f89e2059769052..e02f79b1eb516f3213914fe2bc5fb3dec909b5c9 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/smp.h>
 
 #include <asm/addrspace.h>
 #include <asm/io.h>
index 0015e442572b5189b8f9b1dba5a25f96041d2493..2652362ce0477d9449dfbd6fa4a0f7e48a2f0ef0 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/smp.h>
 
 #include <asm/smtc_ipi.h>
 #include <asm/time.h>
index 340f53e5c6b19cd686f2a45bd69dc3f139d581d1..ac5903d1b20e3490bd7e3f0cc02336184c5f7777 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/smp.h>
 
 #include <asm/addrspace.h>
 #include <asm/io.h>
index df6f5bc60572b4e2425f76166ee81d60b3d0c720..98bd7de7577811e13c4a04379025f8c6e026a24d 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/smp.h>
 
 #include <asm/smtc_ipi.h>
 #include <asm/time.h>
index b13b8eb3059631c732479aa4da8ed35c4cb48c5c..1abe9905c9c1895ef220bc51f7f4ddf366b5cfad 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <linux/smp.h>
 #include <linux/stddef.h>
 
 #include <asm/bugs.h>
index ed20e7fe65e3270dfe2fe9c41cc3eee071ddcd4b..f7d8d5d0ddbf0337978cacacf5232b38bf4a87a4 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
+#include <linux/smp.h>
 #include <linux/spinlock.h>
 
 #include <asm/delay.h>
index 3f43c2e3aa5a59ede8eab7ecdaee7762ea6afb68..39000f103f2caf250cdd26297e0c70cd33e3b201 100644 (file)
@@ -2,6 +2,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/init.h>
+#include <linux/smp.h>
 
 #include <asm/io.h>
 #include <asm/gic.h>
index 6e152c80cd4a5bb3d0a73816311a850181371b81..50c9bb880667560bbb6f869e8b6af60a60d2a66e 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/kgdb.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <asm/inst.h>
 #include <asm/fpu.h>
 #include <asm/cacheflush.h>
index 1eaaa450e20c3a83be8daf6cafe0e2935e45e5a1..c09d681b7181dbc15851cc9f425ae98ba6e3c561 100644 (file)
  */
 void __noreturn cpu_idle(void)
 {
+       int cpu;
+
+       /* CPU is going idle. */
+       cpu = smp_processor_id();
+
        /* endless idle loop with no priority at all */
        while (1) {
                tick_nohz_stop_sched_tick(1);
-               while (!need_resched()) {
+               while (!need_resched() && cpu_online(cpu)) {
 #ifdef CONFIG_MIPS_MT_SMTC
                        extern void smtc_idle_loop_hook(void);
 
@@ -62,6 +67,12 @@ void __noreturn cpu_idle(void)
                        if (cpu_wait)
                                (*cpu_wait)();
                }
+#ifdef CONFIG_HOTPLUG_CPU
+               if (!cpu_online(cpu) && !cpu_isset(cpu, cpu_callin_map) &&
+                   (system_state == SYSTEM_RUNNING ||
+                    system_state == SYSTEM_BOOTING))
+                       play_dead();
+#endif
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
                schedule();
index f27beca4b26d22c67366b1be5d1f263637ad21c3..653be061b9ec6745c17cac05f6f5827ed2013b17 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
index 878e3733bbb2c6577074899e34cb034accee3d60..2508d55d68fd4b7292013bd6fd8964f865d23250 100644 (file)
@@ -55,6 +55,18 @@ static void __init up_prepare_cpus(unsigned int max_cpus)
 {
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int up_cpu_disable(void)
+{
+       return -ENOSYS;
+}
+
+static void up_cpu_die(unsigned int cpu)
+{
+       BUG();
+}
+#endif
+
 struct plat_smp_ops up_smp_ops = {
        .send_ipi_single        = up_send_ipi_single,
        .send_ipi_mask          = up_send_ipi_mask,
@@ -64,4 +76,8 @@ struct plat_smp_ops up_smp_ops = {
        .boot_secondary         = up_boot_secondary,
        .smp_setup              = up_smp_setup,
        .prepare_cpus           = up_prepare_cpus,
+#ifdef CONFIG_HOTPLUG_CPU
+       .cpu_disable            = up_cpu_disable,
+       .cpu_die                = up_cpu_die,
+#endif
 };
index c937506a03aac9f1eedcea13f05e30c0ed0c9fcc..bc7d9b05e2f4e4d218f0b87c81ee98e9f708b34a 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/threads.h>
 #include <linux/module.h>
@@ -44,7 +45,7 @@
 #include <asm/mipsmtregs.h>
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-static volatile cpumask_t cpu_callin_map;      /* Bitmask of started secondaries */
+volatile cpumask_t cpu_callin_map;     /* Bitmask of started secondaries */
 int __cpu_number_map[NR_CPUS];         /* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];                /* Map logical to physical */
 
@@ -200,6 +201,8 @@ void __devinit smp_prepare_boot_cpu(void)
  * and keep control until "cpu_online(cpu)" is set.  Note: cpu is
  * physical, not logical.
  */
+static struct task_struct *cpu_idle_thread[NR_CPUS];
+
 int __cpuinit __cpu_up(unsigned int cpu)
 {
        struct task_struct *idle;
@@ -209,9 +212,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
         * The following code is purely to make sure
         * Linux can schedule processes on this slave.
         */
-       idle = fork_idle(cpu);
-       if (IS_ERR(idle))
-               panic(KERN_ERR "Fork failed for CPU %d", cpu);
+       if (!cpu_idle_thread[cpu]) {
+               idle = fork_idle(cpu);
+               cpu_idle_thread[cpu] = idle;
+
+               if (IS_ERR(idle))
+                       panic(KERN_ERR "Fork failed for CPU %d", cpu);
+       } else {
+               idle = cpu_idle_thread[cpu];
+               init_idle(idle, cpu);
+       }
 
        mp_ops->boot_secondary(cpu, idle);
 
index 37d51cd124e9d6e7ec11230f0d38d475b3276267..8a0626cbb108ff0a1231dadb74b54b4a2d3576a9 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/clockchips.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
index 660e44ed44d72dc794225d4338e696bf2384ea60..cf3eb61fad121340707b8ccfe1baf6fcb4993a42 100644 (file)
@@ -17,7 +17,10 @@ static int __init topology_init(void)
 #endif /* CONFIG_NUMA */
 
        for_each_present_cpu(i) {
-               ret = register_cpu(&per_cpu(cpu_devices, i), i);
+               struct cpu *c = &per_cpu(cpu_devices, i);
+
+               c->hotpluggable = 1;
+               ret = register_cpu(c, i);
                if (ret)
                        printk(KERN_WARNING "topology_init: register_cpu %d "
                               "failed (%d)\n", i, ret);
index 881ecbc1fa238d62790bcf9dcd946137cad7639f..0cea932f12411f069b0bde34d6a76535178650c0 100644 (file)
@@ -91,6 +91,7 @@ unsigned __cpuinit get_c0_compare_int(void)
                mips_cpu_timer_irq = MSC01E_INT_BASE + MSC01E_INT_CPUCTR;
        } else {
 #endif
+              {
                if (cpu_has_vint)
                        set_vi_handler(cp0_compare_irq, mips_timer_dispatch);
                mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
index 44d01a0a849070d7943c209d16d8aaa55035cfdb..b165cdcb281815e4d95452283ac87ba8150200ed 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/bitops.h>
 #include <linux/cpu.h>
index 5500c20c79aeafc491c3b9b43cbe8f5e816e084c..54e5f7b9f440a973f555a5f76b841093ee38e710 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 
 #include <asm/page.h>
index 71fe4cb778cd9b92d70122ce17e72984c3fc9093..6721ee2b1e8ba8308730f262898dc3c792730f49 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/linkage.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
index f7c8f9ce39c1b1e983d54b6414c3a4bd1a2b8010..6515b4418714d454d55ac311d11c21663f355835 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 
 #include <asm/cacheops.h>
index 2b1309b2580a79e09a4be60d315317842f1a9295..e274fda329f4e5bc9ef0a90484972b650390e2b0 100644 (file)
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/smp.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
 
index c5511294a9eef5106e1783759d74fd02b17d2b4f..0e820508ff23fd8bbfe8a4642c5f9acf1c3a9e73 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
index 48060c635acdc91458fe2528c599c395ae410b6b..f5c73754d664ec9ef4d84a1c12245cecb8dfbc67 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
index 1c0048a6f5cf082531cf8e189e1d7712725d2614..0f5ab236ab69e30bba6076060d6b8a18b277a02c 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 
 #include <asm/page.h>
index f60fe513eb60ca91e91fe3956d0920eb79b75a57..cee502caf398403bef0365c7969b1ecfad6c8626 100644 (file)
@@ -10,6 +10,7 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 
index 4ec95cc2df2f06b9811434cb14791a401a5968dd..2b82f23df1a12dfab17d83c228c3128d208f1685 100644 (file)
@@ -10,6 +10,7 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 
 #include <asm/cpu.h>
index 8f606ead826e29502a571dfc654b7450b69ce6ac..9a17bf8395df1c92be94a48a6820891623154fe6 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/smp.h>
 #include <linux/string.h>
 #include <linux/init.h>
 
index ea176113fea9b46b7b2aafeadc57ce2757f13d34..b4eaf137e4a7143bf046c04303166e4d7446f7fc 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
index dda6f2058665b665c3a857c8a2e420f97f74f194..a0e726eb039aeb0cd8c842d72873d4fcfd8ed3a7 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/smp.h>
 #include <asm/sn/arch.h>
 #include <asm/pci/bridge.h>
 #include <asm/paccess.h>
index f78c29b68d77015e114195154c76671e2ce7d1a6..8ace27716232ec496f0a2ba5045e653dc8320765 100644 (file)
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 
 #include <asm/pmon.h>
 #include <asm/titan_dep.h>
index 486bd3fd01a14669e8ce9140d8729db10c95dcf0..4b8174b382d742f17e3dc846a67f1e707f26c432 100644 (file)
@@ -43,15 +43,6 @@ LEAF(swsusp_arch_resume)
        bne t1, t3, 1b
        PTR_L t0, PBE_NEXT(t0)
        bnez t0, 0b
-       /* flush caches to make sure context is in memory */
-       PTR_L t0, __flush_cache_all
-       jalr t0
-       /* flush tlb entries */
-#ifdef CONFIG_SMP
-       jal     flush_tlb_all
-#else
-       jal     local_flush_tlb_all
-#endif
        PTR_LA t0, saved_regs
        PTR_L ra, PT_R31(t0)
        PTR_L sp, PT_R29(t0)
index 4a500e8cd3cc2e86f0e4503b871f918095a1757a..51d3a4f2d7e1bed2df4a85071b4b435399ff9f9d 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/cpumask.h>
index 1bb692a3b31948d2a9abb4a13e7f5049ae38886a..c1c8e40d65d6a5f5353768d0729f067097d0dad3 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/ioport.h>
 #include <linux/timex.h>
 #include <linux/slab.h>
+#include <linux/smp.h>
 #include <linux/random.h>
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
index f10a7cd64f7eb787f05039f22f382f7783478af4..6d0e59ffba2ed17826a13d475961c7c10fe82c4a 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/param.h>
+#include <linux/smp.h>
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/mm.h>
index 6ae64e8dfc408473ddd9f34ee569886f8c0abac3..5e871e75a8d9c3cabd975b4495199d4dbbba079d 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/smp.h>
 #include <asm/sn/types.h>
 #include <asm/sn/klconfig.h>
 #include <asm/sn/hub.h>
index 690de06bde902f38a49b8d1e1765ae3fd2251b48..ba59839a021ee688b37761e54b8cd966abc537e5 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/interrupt.h>
+#include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
index 81e3d54376e920d3e6b73aa23c71a7bb614dd7d7..1ad2da103fe9016f95207500d4c0b7def5c1fe61 100644 (file)
@@ -51,12 +51,13 @@ static int cfe_console_setup(struct console *cons, char *str)
                        setleds("u0cn");
                } else if (!strcmp(consdev, "uart1")) {
                        setleds("u1cn");
+               } else
 #endif
 #ifdef CONFIG_VGA_CONSOLE
-               } else if (!strcmp(consdev, "pcconsole0")) {
-                       setleds("pccn");
-#endif
+                      if (!strcmp(consdev, "pcconsole0")) {
+                               setleds("pccn");
                } else
+#endif
                        return -ENODEV;
        }
        return 0;
index 69f5f88711cca8721cbc97ad4fda96e6682ecffd..0d9ec1a5c24aa679df38fa0faabdf5c1046d3b0d 100644 (file)
@@ -1,5 +1,6 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/smp.h>
 #include <linux/time.h>
 #include <linux/clockchips.h>
 
index 4518dc50090380b6676074b3f2466709ddea5a4b..20d1465a2ab046d8956eec43d5f5d4d36a3bd6bb 100644 (file)
@@ -144,6 +144,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 
 #else /* !CONFIG_ACPI */
 
+#define acpi_disabled 1
 #define acpi_lapic 0
 #define acpi_ioapic 0
 static inline void acpi_noirq_set(void) { }
index cb739cc0a08063729fb239d4d6a254f6aee08625..b399988eee3a3105e60be5fe760089d6cf57d3f0 100644 (file)
@@ -25,7 +25,7 @@
 #define PCI_BIOS_IRQ_SCAN      0x2000
 #define PCI_ASSIGN_ALL_BUSSES  0x4000
 #define PCI_CAN_SKIP_ISA_ALIGN 0x8000
-#define PCI_NO_ROOT_CRS                0x10000
+#define PCI_USE__CRS           0x10000
 #define PCI_CHECK_ENABLE_AMD_MMCONF    0x20000
 #define PCI_HAS_IO_ECS         0x40000
 #define PCI_NOASSIGN_ROMS      0x80000
@@ -121,6 +121,9 @@ extern int __init pcibios_init(void);
 extern int __init pci_mmcfg_arch_init(void);
 extern void __init pci_mmcfg_arch_free(void);
 
+extern struct acpi_mcfg_allocation *pci_mmcfg_config;
+extern int pci_mmcfg_config_num;
+
 /*
  * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
  * on their northbrige except through the * %eax register. As such, you MUST
index 631086159c53b0be5f28df92d3e2ed976cffa7a8..6b8ca3a0285d77512219724750bca449fa3a8a67 100644 (file)
 
 static int __initdata acpi_force = 0;
 u32 acpi_rsdt_forced;
-#ifdef CONFIG_ACPI
-int acpi_disabled = 0;
-#else
-int acpi_disabled = 1;
-#endif
+int acpi_disabled;
 EXPORT_SYMBOL(acpi_disabled);
 
 #ifdef CONFIG_X86_64
@@ -122,72 +118,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size)
        early_iounmap(map, size);
 }
 
-#ifdef CONFIG_PCI_MMCONFIG
-
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
-/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
-struct acpi_mcfg_allocation *pci_mmcfg_config;
-int pci_mmcfg_config_num;
-
-static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
-{
-       if (!strcmp(mcfg->header.oem_id, "SGI"))
-               acpi_mcfg_64bit_base_addr = TRUE;
-
-       return 0;
-}
-
-int __init acpi_parse_mcfg(struct acpi_table_header *header)
-{
-       struct acpi_table_mcfg *mcfg;
-       unsigned long i;
-       int config_size;
-
-       if (!header)
-               return -EINVAL;
-
-       mcfg = (struct acpi_table_mcfg *)header;
-
-       /* how many config structures do we have */
-       pci_mmcfg_config_num = 0;
-       i = header->length - sizeof(struct acpi_table_mcfg);
-       while (i >= sizeof(struct acpi_mcfg_allocation)) {
-               ++pci_mmcfg_config_num;
-               i -= sizeof(struct acpi_mcfg_allocation);
-       };
-       if (pci_mmcfg_config_num == 0) {
-               printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
-               return -ENODEV;
-       }
-
-       config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
-       pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
-       if (!pci_mmcfg_config) {
-               printk(KERN_WARNING PREFIX
-                      "No memory for MCFG config tables\n");
-               return -ENOMEM;
-       }
-
-       memcpy(pci_mmcfg_config, &mcfg[1], config_size);
-
-       acpi_mcfg_oem_check(mcfg);
-
-       for (i = 0; i < pci_mmcfg_config_num; ++i) {
-               if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
-                   !acpi_mcfg_64bit_base_addr) {
-                       printk(KERN_ERR PREFIX
-                              "MMCONFIG not in low 4GB of memory\n");
-                       kfree(pci_mmcfg_config);
-                       pci_mmcfg_config_num = 0;
-                       return -ENODEV;
-               }
-       }
-
-       return 0;
-}
-#endif                         /* CONFIG_PCI_MMCONFIG */
-
 #ifdef CONFIG_X86_LOCAL_APIC
 static int __init acpi_parse_madt(struct acpi_table_header *table)
 {
@@ -1517,14 +1447,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
                     DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
                     },
         },
-       {
-        .callback = force_acpi_ht,
-        .ident = "ASUS P4B266",
-        .matches = {
-                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
-                    DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
-                    },
-        },
        {
         .callback = force_acpi_ht,
         .ident = "ASUS P2B-DS",
index bbbe4bbb6f34b7ed8bf60cf849faa32963e93d90..8c44c232efcb9f2b1150b4f58a0942bf9dd3ac0a 100644 (file)
@@ -34,12 +34,22 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
                flags->bm_check = 1;
        else if (c->x86_vendor == X86_VENDOR_INTEL) {
                /*
-                * Today all CPUs that support C3 share cache.
-                * TBD: This needs to look at cache shared map, once
-                * multi-core detection patch makes to the base.
+                * Today all MP CPUs that support C3 share cache.
+                * And caches should not be flushed by software while
+                * entering C3 type state.
                 */
                flags->bm_check = 1;
        }
+
+       /*
+        * On all recent Intel platforms, ARB_DISABLE is a nop.
+        * So, set bm_control to zero to indicate that ARB_DISABLE
+        * is not required while entering C3 type state on
+        * P4, Core and beyond CPUs
+        */
+       if (c->x86_vendor == X86_VENDOR_INTEL &&
+           (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)))
+                       flags->bm_control = 0;
 }
 EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
 
index 7c074eec39fb56ebff16c26f3351b6eae79cca50..d296f4a195c916d8d454c144396ecc1f71af91d2 100644 (file)
@@ -72,6 +72,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
        return;
 }
 
+
 /* Initialize _PDC data based on the CPU vendor */
 void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 {
@@ -85,3 +86,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 }
 
 EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
+
+void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
+{
+       if (pr->pdc) {
+               kfree(pr->pdc->pointer->buffer.pointer);
+               kfree(pr->pdc->pointer);
+               kfree(pr->pdc);
+               pr->pdc = NULL;
+       }
+}
+
+EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
index b7a79207295ee05e0f4ecb34e3863d96e330cd1d..4d0216fcb36c4356d4f135fee9c585fac7f42c3a 100644 (file)
@@ -1414,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq,
                irte.vector = vector;
                irte.dest_id = IRTE_DEST(destination);
 
+               /* Set source-id of interrupt request */
+               set_ioapic_sid(&irte, apic_id);
+
                modify_irte(irq, &irte);
 
                ir_entry->index2 = (index >> 15) & 0x1;
@@ -3290,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
                irte.vector = cfg->vector;
                irte.dest_id = IRTE_DEST(dest);
 
+               /* Set source-id of interrupt request */
+               set_msi_sid(&irte, pdev);
+
                modify_irte(irq, &irte);
 
                msg->address_hi = MSI_ADDR_BASE_HI;
index 16c3fda85bbad502317bafedc37159bf9847b278..b26626dc517c0a4f706c0b07ac3e491e6327501e 100644 (file)
@@ -238,7 +238,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
 #endif
        }
 
-       if (bus && !(pci_probe & PCI_NO_ROOT_CRS))
+       if (bus && (pci_probe & PCI_USE__CRS))
                get_current_resources(device, busnum, domain, bus);
        return bus;
 }
index 2255f880678bb198af08f8fd43452c2e3370167a..f893d6a6e803e266f3d36030f8744fecda130bf9 100644 (file)
@@ -101,7 +101,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
        struct pci_root_info *info;
 
        /* don't go for it if _CRS is used */
-       if (!(pci_probe & PCI_NO_ROOT_CRS))
+       if (pci_probe & PCI_USE__CRS)
                return;
 
        /* if only one root bus, don't need to anything */
index 4740119e4bb73389e66ff08ed47e30c7d12c56c6..2202b6257b82b84bb9ef17f33630ca7dac929610 100644 (file)
@@ -515,8 +515,8 @@ char * __devinit  pcibios_setup(char *str)
        } else if (!strcmp(str, "assign-busses")) {
                pci_probe |= PCI_ASSIGN_ALL_BUSSES;
                return NULL;
-       } else if (!strcmp(str, "nocrs")) {
-               pci_probe |= PCI_NO_ROOT_CRS;
+       } else if (!strcmp(str, "use_crs")) {
+               pci_probe |= PCI_USE__CRS;
                return NULL;
        } else if (!strcmp(str, "earlydump")) {
                pci_early_dump_regs = 1;
index 8766b0e216c5f1a139a7094630933670dd3f1cc4..712443ec6d43b46aaf05d75e04f2c249f44d20da 100644 (file)
@@ -523,6 +523,69 @@ reject:
 
 static int __initdata known_bridge;
 
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+struct acpi_mcfg_allocation *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
+{
+       if (!strcmp(mcfg->header.oem_id, "SGI"))
+               acpi_mcfg_64bit_base_addr = TRUE;
+
+       return 0;
+}
+
+static int __init pci_parse_mcfg(struct acpi_table_header *header)
+{
+       struct acpi_table_mcfg *mcfg;
+       unsigned long i;
+       int config_size;
+
+       if (!header)
+               return -EINVAL;
+
+       mcfg = (struct acpi_table_mcfg *)header;
+
+       /* how many config structures do we have */
+       pci_mmcfg_config_num = 0;
+       i = header->length - sizeof(struct acpi_table_mcfg);
+       while (i >= sizeof(struct acpi_mcfg_allocation)) {
+               ++pci_mmcfg_config_num;
+               i -= sizeof(struct acpi_mcfg_allocation);
+       };
+       if (pci_mmcfg_config_num == 0) {
+               printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
+               return -ENODEV;
+       }
+
+       config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+       pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+       if (!pci_mmcfg_config) {
+               printk(KERN_WARNING PREFIX
+                      "No memory for MCFG config tables\n");
+               return -ENOMEM;
+       }
+
+       memcpy(pci_mmcfg_config, &mcfg[1], config_size);
+
+       acpi_mcfg_oem_check(mcfg);
+
+       for (i = 0; i < pci_mmcfg_config_num; ++i) {
+               if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
+                   !acpi_mcfg_64bit_base_addr) {
+                       printk(KERN_ERR PREFIX
+                              "MMCONFIG not in low 4GB of memory\n");
+                       kfree(pci_mmcfg_config);
+                       pci_mmcfg_config_num = 0;
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
 static void __init __pci_mmcfg_init(int early)
 {
        /* MMCONFIG disabled */
@@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early)
        }
 
        if (!known_bridge)
-               acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
+               acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
 
        pci_mmcfg_reject_broken(early);
 
index 88e42abf5d881b8bf443ecc4f58e3b4e76b3ecd3..0df8fcb687d6f00ce400c1b4677019a34af236a1 100644 (file)
@@ -61,6 +61,7 @@ static int acpi_ac_open_fs(struct inode *inode, struct file *file);
 static int acpi_ac_add(struct acpi_device *device);
 static int acpi_ac_remove(struct acpi_device *device, int type);
 static int acpi_ac_resume(struct acpi_device *device);
+static void acpi_ac_notify(struct acpi_device *device, u32 event);
 
 static const struct acpi_device_id ac_device_ids[] = {
        {"ACPI0003", 0},
@@ -72,10 +73,12 @@ static struct acpi_driver acpi_ac_driver = {
        .name = "ac",
        .class = ACPI_AC_CLASS,
        .ids = ac_device_ids,
+       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = acpi_ac_add,
                .remove = acpi_ac_remove,
                .resume = acpi_ac_resume,
+               .notify = acpi_ac_notify,
                },
 };
 
@@ -220,16 +223,14 @@ static int acpi_ac_remove_fs(struct acpi_device *device)
                                    Driver Model
    -------------------------------------------------------------------------- */
 
-static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
+static void acpi_ac_notify(struct acpi_device *device, u32 event)
 {
-       struct acpi_ac *ac = data;
-       struct acpi_device *device = NULL;
+       struct acpi_ac *ac = acpi_driver_data(device);
 
 
        if (!ac)
                return;
 
-       device = ac->device;
        switch (event) {
        default:
                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -253,7 +254,6 @@ static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 static int acpi_ac_add(struct acpi_device *device)
 {
        int result = 0;
-       acpi_status status = AE_OK;
        struct acpi_ac *ac = NULL;
 
 
@@ -286,13 +286,6 @@ static int acpi_ac_add(struct acpi_device *device)
        ac->charger.get_property = get_ac_property;
        power_supply_register(&ac->device->dev, &ac->charger);
 #endif
-       status = acpi_install_notify_handler(device->handle,
-                                            ACPI_ALL_NOTIFY, acpi_ac_notify,
-                                            ac);
-       if (ACPI_FAILURE(status)) {
-               result = -ENODEV;
-               goto end;
-       }
 
        printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
               acpi_device_name(device), acpi_device_bid(device),
@@ -328,7 +321,6 @@ static int acpi_ac_resume(struct acpi_device *device)
 
 static int acpi_ac_remove(struct acpi_device *device, int type)
 {
-       acpi_status status = AE_OK;
        struct acpi_ac *ac = NULL;
 
 
@@ -337,8 +329,6 @@ static int acpi_ac_remove(struct acpi_device *device, int type)
 
        ac = acpi_driver_data(device);
 
-       status = acpi_remove_notify_handler(device->handle,
-                                           ACPI_ALL_NOTIFY, acpi_ac_notify);
 #ifdef CONFIG_ACPI_SYSFS_POWER
        if (ac->charger.dev)
                power_supply_unregister(&ac->charger);
index b0de6312919a82a455f20f8d06dddd8f1b672d80..58b4517ce71277b2d7c533d0cc43b0052a58f975 100644 (file)
@@ -796,13 +796,12 @@ static void acpi_battery_remove_fs(struct acpi_device *device)
                                  Driver Interface
    -------------------------------------------------------------------------- */
 
-static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
+static void acpi_battery_notify(struct acpi_device *device, u32 event)
 {
-       struct acpi_battery *battery = data;
-       struct acpi_device *device;
+       struct acpi_battery *battery = acpi_driver_data(device);
+
        if (!battery)
                return;
-       device = battery->device;
        acpi_battery_update(battery);
        acpi_bus_generate_proc_event(device, event,
                                     acpi_battery_present(battery));
@@ -819,7 +818,6 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 static int acpi_battery_add(struct acpi_device *device)
 {
        int result = 0;
-       acpi_status status = 0;
        struct acpi_battery *battery = NULL;
        if (!device)
                return -EINVAL;
@@ -834,22 +832,12 @@ static int acpi_battery_add(struct acpi_device *device)
        acpi_battery_update(battery);
 #ifdef CONFIG_ACPI_PROCFS_POWER
        result = acpi_battery_add_fs(device);
-       if (result)
-               goto end;
 #endif
-       status = acpi_install_notify_handler(device->handle,
-                                            ACPI_ALL_NOTIFY,
-                                            acpi_battery_notify, battery);
-       if (ACPI_FAILURE(status)) {
-               ACPI_EXCEPTION((AE_INFO, status, "Installing notify handler"));
-               result = -ENODEV;
-               goto end;
-       }
-       printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
-              ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
-              device->status.battery_present ? "present" : "absent");
-      end:
-       if (result) {
+       if (!result) {
+               printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
+                       ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
+                       device->status.battery_present ? "present" : "absent");
+       } else {
 #ifdef CONFIG_ACPI_PROCFS_POWER
                acpi_battery_remove_fs(device);
 #endif
@@ -860,15 +848,11 @@ static int acpi_battery_add(struct acpi_device *device)
 
 static int acpi_battery_remove(struct acpi_device *device, int type)
 {
-       acpi_status status = 0;
        struct acpi_battery *battery = NULL;
 
        if (!device || !acpi_driver_data(device))
                return -EINVAL;
        battery = acpi_driver_data(device);
-       status = acpi_remove_notify_handler(device->handle,
-                                           ACPI_ALL_NOTIFY,
-                                           acpi_battery_notify);
 #ifdef CONFIG_ACPI_PROCFS_POWER
        acpi_battery_remove_fs(device);
 #endif
@@ -896,10 +880,12 @@ static struct acpi_driver acpi_battery_driver = {
        .name = "battery",
        .class = ACPI_BATTERY_CLASS,
        .ids = battery_device_ids,
+       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = acpi_battery_add,
                .resume = acpi_battery_resume,
                .remove = acpi_battery_remove,
+               .notify = acpi_battery_notify,
                },
 };
 
index 09c69806c1fc7693f91268ce86d0a8030dc2df59..f6baa77deefbd6d0743e92fc4a593b66a93c64c5 100644 (file)
@@ -192,6 +192,22 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
                     DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
                },
        },
+       {
+       .callback = dmi_disable_osi_vista,
+       .ident = "Sony VGN-NS10J_S",
+       .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS10J_S"),
+               },
+       },
+       {
+       .callback = dmi_disable_osi_vista,
+       .ident = "Sony VGN-SR290J",
+       .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "Sony VGN-SR290J"),
+               },
+       },
 
        /*
         * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
index ae862f1798dc9d59d1b1e7479a6252dec22a185c..2876fc70c3a9b71915ef016f1ade1fa235c8db3f 100644 (file)
@@ -450,18 +450,16 @@ int acpi_bus_receive_event(struct acpi_bus_event *event)
                              Notification Handling
    -------------------------------------------------------------------------- */
 
-static int
-acpi_bus_check_device(struct acpi_device *device, int *status_changed)
+static void acpi_bus_check_device(acpi_handle handle)
 {
-       acpi_status status = 0;
+       struct acpi_device *device;
+       acpi_status status;
        struct acpi_device_status old_status;
 
-
+       if (acpi_bus_get_device(handle, &device))
+               return;
        if (!device)
-               return -EINVAL;
-
-       if (status_changed)
-               *status_changed = 0;
+               return;
 
        old_status = device->status;
 
@@ -471,22 +469,15 @@ acpi_bus_check_device(struct acpi_device *device, int *status_changed)
         */
        if (device->parent && !device->parent->status.present) {
                device->status = device->parent->status;
-               if (STRUCT_TO_INT(old_status) != STRUCT_TO_INT(device->status)) {
-                       if (status_changed)
-                               *status_changed = 1;
-               }
-               return 0;
+               return;
        }
 
        status = acpi_bus_get_status(device);
        if (ACPI_FAILURE(status))
-               return -ENODEV;
+               return;
 
        if (STRUCT_TO_INT(old_status) == STRUCT_TO_INT(device->status))
-               return 0;
-
-       if (status_changed)
-               *status_changed = 1;
+               return;
 
        /*
         * Device Insertion/Removal
@@ -498,33 +489,17 @@ acpi_bus_check_device(struct acpi_device *device, int *status_changed)
                ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device removal detected\n"));
                /* TBD: Handle device removal */
        }
-
-       return 0;
 }
 
-static int acpi_bus_check_scope(struct acpi_device *device)
+static void acpi_bus_check_scope(acpi_handle handle)
 {
-       int result = 0;
-       int status_changed = 0;
-
-
-       if (!device)
-               return -EINVAL;
-
        /* Status Change? */
-       result = acpi_bus_check_device(device, &status_changed);
-       if (result)
-               return result;
-
-       if (!status_changed)
-               return 0;
+       acpi_bus_check_device(handle);
 
        /*
         * TBD: Enumerate child devices within this device's scope and
         *       run acpi_bus_check_device()'s on them.
         */
-
-       return 0;
 }
 
 static BLOCKING_NOTIFIER_HEAD(acpi_bus_notify_list);
@@ -547,22 +522,19 @@ EXPORT_SYMBOL_GPL(unregister_acpi_bus_notifier);
  */
 static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 {
-       int result = 0;
        struct acpi_device *device = NULL;
+       struct acpi_driver *driver;
+
+       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Notification %#02x to handle %p\n",
+                         type, handle));
 
        blocking_notifier_call_chain(&acpi_bus_notify_list,
                type, (void *)handle);
 
-       if (acpi_bus_get_device(handle, &device))
-               return;
-
        switch (type) {
 
        case ACPI_NOTIFY_BUS_CHECK:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received BUS CHECK notification for device [%s]\n",
-                                 device->pnp.bus_id));
-               result = acpi_bus_check_scope(device);
+               acpi_bus_check_scope(handle);
                /*
                 * TBD: We'll need to outsource certain events to non-ACPI
                 *      drivers via the device manager (device.c).
@@ -570,10 +542,7 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
                break;
 
        case ACPI_NOTIFY_DEVICE_CHECK:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received DEVICE CHECK notification for device [%s]\n",
-                                 device->pnp.bus_id));
-               result = acpi_bus_check_device(device, NULL);
+               acpi_bus_check_device(handle);
                /*
                 * TBD: We'll need to outsource certain events to non-ACPI
                 *      drivers via the device manager (device.c).
@@ -581,44 +550,26 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
                break;
 
        case ACPI_NOTIFY_DEVICE_WAKE:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received DEVICE WAKE notification for device [%s]\n",
-                                 device->pnp.bus_id));
                /* TBD */
                break;
 
        case ACPI_NOTIFY_EJECT_REQUEST:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received EJECT REQUEST notification for device [%s]\n",
-                                 device->pnp.bus_id));
                /* TBD */
                break;
 
        case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received DEVICE CHECK LIGHT notification for device [%s]\n",
-                                 device->pnp.bus_id));
                /* TBD: Exactly what does 'light' mean? */
                break;
 
        case ACPI_NOTIFY_FREQUENCY_MISMATCH:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received FREQUENCY MISMATCH notification for device [%s]\n",
-                                 device->pnp.bus_id));
                /* TBD */
                break;
 
        case ACPI_NOTIFY_BUS_MODE_MISMATCH:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received BUS MODE MISMATCH notification for device [%s]\n",
-                                 device->pnp.bus_id));
                /* TBD */
                break;
 
        case ACPI_NOTIFY_POWER_FAULT:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Received POWER FAULT notification for device [%s]\n",
-                                 device->pnp.bus_id));
                /* TBD */
                break;
 
@@ -629,7 +580,13 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
                break;
        }
 
-       return;
+       acpi_bus_get_device(handle, &device);
+       if (device) {
+               driver = device->driver;
+               if (driver && driver->ops.notify &&
+                   (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
+                       driver->ops.notify(device, type);
+       }
 }
 
 /* --------------------------------------------------------------------------
index 8bd2c2a6884d5ce550ff336775e87844331e0e69..a8a5c29958c81faaea6b007626e40ee225ec0275 100644 (file)
@@ -140,46 +140,6 @@ struct device *acpi_get_physical_device(acpi_handle handle)
 
 EXPORT_SYMBOL(acpi_get_physical_device);
 
-/* ToDo: When a PCI bridge is found, return the PCI device behind the bridge
- *       This should work in general, but did not on a Lenovo T61 for the
- *      graphics card. But this must be fixed when the PCI device is
- *       bound and the kernel device struct is attached to the acpi device
- * Note: A success call will increase reference count by one
- *       Do call put_device(dev) on the returned device then
- */
-struct device *acpi_get_physical_pci_device(acpi_handle handle)
-{
-       struct device *dev;
-       long long device_id;
-       acpi_status status;
-
-       status =
-               acpi_evaluate_integer(handle, "_ADR", NULL, &device_id);
-
-       if (ACPI_FAILURE(status))
-               return NULL;
-
-       /* We need to attempt to determine whether the _ADR refers to a
-          PCI device or not. There's no terribly good way to do this,
-          so the best we can hope for is to assume that there'll never
-          be a device in the host bridge */
-       if (device_id >= 0x10000) {
-               /* It looks like a PCI device. Does it exist? */
-               dev = acpi_get_physical_device(handle);
-       } else {
-               /* It doesn't look like a PCI device. Does its parent
-                  exist? */
-               acpi_handle phandle;
-               if (acpi_get_parent(handle, &phandle))
-                       return NULL;
-               dev = acpi_get_physical_device(phandle);
-       }
-       if (!dev)
-               return NULL;
-       return dev;
-}
-EXPORT_SYMBOL(acpi_get_physical_pci_device);
-
 static int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
        struct acpi_device *acpi_dev;
index d916bea729f1dfac4e81f020d581eee296ab42e0..71670719d61a6fec442792189fa0beac85e0324d 100644 (file)
@@ -79,6 +79,7 @@ static acpi_osd_handler acpi_irq_handler;
 static void *acpi_irq_context;
 static struct workqueue_struct *kacpid_wq;
 static struct workqueue_struct *kacpi_notify_wq;
+static struct workqueue_struct *kacpi_hotplug_wq;
 
 struct acpi_res_list {
        resource_size_t start;
@@ -192,8 +193,10 @@ acpi_status acpi_os_initialize1(void)
 {
        kacpid_wq = create_singlethread_workqueue("kacpid");
        kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify");
+       kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug");
        BUG_ON(!kacpid_wq);
        BUG_ON(!kacpi_notify_wq);
+       BUG_ON(!kacpi_hotplug_wq);
        return AE_OK;
 }
 
@@ -206,6 +209,7 @@ acpi_status acpi_os_terminate(void)
 
        destroy_workqueue(kacpid_wq);
        destroy_workqueue(kacpi_notify_wq);
+       destroy_workqueue(kacpi_hotplug_wq);
 
        return AE_OK;
 }
@@ -716,6 +720,7 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
        acpi_status status = AE_OK;
        struct acpi_os_dpc *dpc;
        struct workqueue_struct *queue;
+       work_func_t func;
        int ret;
        ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
                          "Scheduling function [%p(%p)] for deferred execution.\n",
@@ -740,15 +745,17 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
        dpc->function = function;
        dpc->context = context;
 
-       if (!hp) {
-               INIT_WORK(&dpc->work, acpi_os_execute_deferred);
-               queue = (type == OSL_NOTIFY_HANDLER) ?
-                       kacpi_notify_wq : kacpid_wq;
-               ret = queue_work(queue, &dpc->work);
-       } else {
-               INIT_WORK(&dpc->work, acpi_os_execute_hp_deferred);
-               ret = schedule_work(&dpc->work);
-       }
+       /*
+        * We can't run hotplug code in keventd_wq/kacpid_wq/kacpid_notify_wq
+        * because the hotplug code may call driver .remove() functions,
+        * which invoke flush_scheduled_work/acpi_os_wait_events_complete
+        * to flush these workqueues.
+        */
+       queue = hp ? kacpi_hotplug_wq :
+               (type == OSL_NOTIFY_HANDLER ? kacpi_notify_wq : kacpid_wq);
+       func = hp ? acpi_os_execute_hp_deferred : acpi_os_execute_deferred;
+       INIT_WORK(&dpc->work, func);
+       ret = queue_work(queue, &dpc->work);
 
        if (!ret) {
                printk(KERN_ERR PREFIX
index bc46de3d967f62904962e06bf9beef26095a9a78..a5a77b78a7237cbbb6e77a707f73a0861426718d 100644 (file)
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/spinlock.h>
-#include <linux/pm.h>
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 #define _COMPONENT             ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_bind");
 
-struct acpi_pci_data {
-       struct acpi_pci_id id;
-       struct pci_bus *bus;
-       struct pci_dev *dev;
-};
-
-static int acpi_pci_unbind(struct acpi_device *device);
-
-static void acpi_pci_data_handler(acpi_handle handle, u32 function,
-                                 void *context)
-{
-
-       /* TBD: Anything we need to do here? */
-
-       return;
-}
-
-/**
- * acpi_get_pci_id
- * ------------------
- * This function is used by the ACPI Interpreter (a.k.a. Core Subsystem)
- * to resolve PCI information for ACPI-PCI devices defined in the namespace.
- * This typically occurs when resolving PCI operation region information.
- */
-acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id)
+static int acpi_pci_unbind(struct acpi_device *device)
 {
-       int result = 0;
-       acpi_status status = AE_OK;
-       struct acpi_device *device = NULL;
-       struct acpi_pci_data *data = NULL;
-
-
-       if (!id)
-               return AE_BAD_PARAMETER;
-
-       result = acpi_bus_get_device(handle, &device);
-       if (result) {
-               printk(KERN_ERR PREFIX
-                           "Invalid ACPI Bus context for device %s\n",
-                           acpi_device_bid(device));
-               return AE_NOT_EXIST;
-       }
-
-       status = acpi_get_data(handle, acpi_pci_data_handler, (void **)&data);
-       if (ACPI_FAILURE(status) || !data) {
-               ACPI_EXCEPTION((AE_INFO, status,
-                               "Invalid ACPI-PCI context for device %s",
-                               acpi_device_bid(device)));
-               return status;
-       }
+       struct pci_dev *dev;
 
-       *id = data->id;
+       dev = acpi_get_pci_dev(device->handle);
+       if (!dev || !dev->subordinate)
+               goto out;
 
-       /*
-          id->segment = data->id.segment;
-          id->bus = data->id.bus;
-          id->device = data->id.device;
-          id->function = data->id.function;
-        */
+       acpi_pci_irq_del_prt(dev->subordinate);
 
-       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                         "Device %s has PCI address %04x:%02x:%02x.%d\n",
-                         acpi_device_bid(device), id->segment, id->bus,
-                         id->device, id->function));
+       device->ops.bind = NULL;
+       device->ops.unbind = NULL;
 
-       return AE_OK;
+out:
+       pci_dev_put(dev);
+       return 0;
 }
 
-EXPORT_SYMBOL(acpi_get_pci_id);
-
-int acpi_pci_bind(struct acpi_device *device)
+static int acpi_pci_bind(struct acpi_device *device)
 {
-       int result = 0;
        acpi_status status;
-       struct acpi_pci_data *data;
-       struct acpi_pci_data *pdata;
-       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
        acpi_handle handle;
+       struct pci_bus *bus;
+       struct pci_dev *dev;
 
-       if (!device || !device->parent)
-               return -EINVAL;
-
-       data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
-       if (ACPI_FAILURE(status)) {
-               kfree(data);
-               return -ENODEV;
-       }
-
-       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI device [%s]...\n",
-                         (char *)buffer.pointer));
-
-       /* 
-        * Segment & Bus
-        * -------------
-        * These are obtained via the parent device's ACPI-PCI context.
-        */
-       status = acpi_get_data(device->parent->handle, acpi_pci_data_handler,
-                              (void **)&pdata);
-       if (ACPI_FAILURE(status) || !pdata || !pdata->bus) {
-               ACPI_EXCEPTION((AE_INFO, status,
-                               "Invalid ACPI-PCI context for parent device %s",
-                               acpi_device_bid(device->parent)));
-               result = -ENODEV;
-               goto end;
-       }
-       data->id.segment = pdata->id.segment;
-       data->id.bus = pdata->bus->number;
-
-       /*
-        * Device & Function
-        * -----------------
-        * These are simply obtained from the device's _ADR method.  Note
-        * that a value of zero is valid.
-        */
-       data->id.device = device->pnp.bus_address >> 16;
-       data->id.function = device->pnp.bus_address & 0xFFFF;
-
-       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "...to %04x:%02x:%02x.%d\n",
-                         data->id.segment, data->id.bus, data->id.device,
-                         data->id.function));
-
-       /*
-        * TBD: Support slot devices (e.g. function=0xFFFF).
-        */
-
-       /* 
-        * Locate PCI Device
-        * -----------------
-        * Locate matching device in PCI namespace.  If it doesn't exist
-        * this typically means that the device isn't currently inserted
-        * (e.g. docking station, port replicator, etc.).
-        */
-       data->dev = pci_get_slot(pdata->bus,
-                               PCI_DEVFN(data->id.device, data->id.function));
-       if (!data->dev) {
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Device %04x:%02x:%02x.%d not present in PCI namespace\n",
-                                 data->id.segment, data->id.bus,
-                                 data->id.device, data->id.function));
-               result = -ENODEV;
-               goto end;
-       }
-       if (!data->dev->bus) {
-               printk(KERN_ERR PREFIX
-                           "Device %04x:%02x:%02x.%d has invalid 'bus' field\n",
-                           data->id.segment, data->id.bus,
-                           data->id.device, data->id.function);
-               result = -ENODEV;
-               goto end;
-       }
+       dev = acpi_get_pci_dev(device->handle);
+       if (!dev)
+               return 0;
 
        /*
-        * PCI Bridge?
-        * -----------
-        * If so, set the 'bus' field and install the 'bind' function to 
-        * facilitate callbacks for all of its children.
+        * Install the 'bind' function to facilitate callbacks for
+        * children of the P2P bridge.
         */
-       if (data->dev->subordinate) {
+       if (dev->subordinate) {
                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                                  "Device %04x:%02x:%02x.%d is a PCI bridge\n",
-                                 data->id.segment, data->id.bus,
-                                 data->id.device, data->id.function));
-               data->bus = data->dev->subordinate;
+                                 pci_domain_nr(dev->bus), dev->bus->number,
+                                 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)));
                device->ops.bind = acpi_pci_bind;
                device->ops.unbind = acpi_pci_unbind;
        }
 
        /*
-        * Attach ACPI-PCI Context
-        * -----------------------
-        * Thus binding the ACPI and PCI devices.
-        */
-       status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
-       if (ACPI_FAILURE(status)) {
-               ACPI_EXCEPTION((AE_INFO, status,
-                               "Unable to attach ACPI-PCI context to device %s",
-                               acpi_device_bid(device)));
-               result = -ENODEV;
-               goto end;
-       }
-
-       /*
-        * PCI Routing Table
-        * -----------------
-        * Evaluate and parse _PRT, if exists.  This code is independent of 
-        * PCI bridges (above) to allow parsing of _PRT objects within the
-        * scope of non-bridge devices.  Note that _PRTs within the scope of
-        * a PCI bridge assume the bridge's subordinate bus number.
+        * Evaluate and parse _PRT, if exists.  This code allows parsing of
+        * _PRT objects within the scope of non-bridge devices.  Note that
+        * _PRTs within the scope of a PCI bridge assume the bridge's
+        * subordinate bus number.
         *
         * TBD: Can _PRTs exist within the scope of non-bridge PCI devices?
         */
        status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
-       if (ACPI_SUCCESS(status)) {
-               if (data->bus)  /* PCI-PCI bridge */
-                       acpi_pci_irq_add_prt(device->handle, data->id.segment,
-                                            data->bus->number);
-               else            /* non-bridge PCI device */
-                       acpi_pci_irq_add_prt(device->handle, data->id.segment,
-                                            data->id.bus);
-       }
-
-      end:
-       kfree(buffer.pointer);
-       if (result) {
-               pci_dev_put(data->dev);
-               kfree(data);
-       }
-       return result;
-}
-
-static int acpi_pci_unbind(struct acpi_device *device)
-{
-       int result = 0;
-       acpi_status status;
-       struct acpi_pci_data *data;
-       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-
-
-       if (!device || !device->parent)
-               return -EINVAL;
-
-       status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
        if (ACPI_FAILURE(status))
-               return -ENODEV;
+               goto out;
 
-       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unbinding PCI device [%s]...\n",
-                         (char *) buffer.pointer));
-       kfree(buffer.pointer);
+       if (dev->subordinate)
+               bus = dev->subordinate;
+       else
+               bus = dev->bus;
 
-       status =
-           acpi_get_data(device->handle, acpi_pci_data_handler,
-                         (void **)&data);
-       if (ACPI_FAILURE(status)) {
-               result = -ENODEV;
-               goto end;
-       }
+       acpi_pci_irq_add_prt(device->handle, bus);
 
-       status = acpi_detach_data(device->handle, acpi_pci_data_handler);
-       if (ACPI_FAILURE(status)) {
-               ACPI_EXCEPTION((AE_INFO, status,
-                               "Unable to detach data from device %s",
-                               acpi_device_bid(device)));
-               result = -ENODEV;
-               goto end;
-       }
-       if (data->dev->subordinate) {
-               acpi_pci_irq_del_prt(data->id.segment, data->bus->number);
-       }
-       pci_dev_put(data->dev);
-       kfree(data);
-
-      end:
-       return result;
+out:
+       pci_dev_put(dev);
+       return 0;
 }
 
-int
-acpi_pci_bind_root(struct acpi_device *device,
-                  struct acpi_pci_id *id, struct pci_bus *bus)
+int acpi_pci_bind_root(struct acpi_device *device)
 {
-       int result = 0;
-       acpi_status status;
-       struct acpi_pci_data *data = NULL;
-       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-
-       if (!device || !id || !bus) {
-               return -EINVAL;
-       }
-
-       data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       data->id = *id;
-       data->bus = bus;
        device->ops.bind = acpi_pci_bind;
        device->ops.unbind = acpi_pci_unbind;
 
-       status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
-       if (ACPI_FAILURE(status)) {
-               kfree (data);
-               return -ENODEV;
-       }
-
-       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI root bridge [%s] to "
-                       "%04x:%02x\n", (char *)buffer.pointer,
-                       id->segment, id->bus));
-
-       status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
-       if (ACPI_FAILURE(status)) {
-               ACPI_EXCEPTION((AE_INFO, status,
-                               "Unable to attach ACPI-PCI context to device %s",
-                               (char *)buffer.pointer));
-               result = -ENODEV;
-               goto end;
-       }
-
-      end:
-       kfree(buffer.pointer);
-       if (result != 0)
-               kfree(data);
-
-       return result;
+       return 0;
 }
index 2faa9e2ac89331b9c46c1de0de560a0225ce88e2..b794eb88ab9030506873ab6879ab05860cc083b8 100644 (file)
@@ -182,7 +182,7 @@ static void do_prt_fixups(struct acpi_prt_entry *entry,
        }
 }
 
-static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
+static int acpi_pci_irq_add_entry(acpi_handle handle, struct pci_bus *bus,
                                  struct acpi_pci_routing_table *prt)
 {
        struct acpi_prt_entry *entry;
@@ -196,8 +196,8 @@ static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
         * 1=INTA, 2=INTB.  We use the PCI encoding throughout, so convert
         * it here.
         */
-       entry->id.segment = segment;
-       entry->id.bus = bus;
+       entry->id.segment = pci_domain_nr(bus);
+       entry->id.bus = bus->number;
        entry->id.device = (prt->address >> 16) & 0xFFFF;
        entry->pin = prt->pin + 1;
 
@@ -242,7 +242,7 @@ static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
        return 0;
 }
 
-int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
+int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus)
 {
        acpi_status status;
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -271,7 +271,7 @@ int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
 
        entry = buffer.pointer;
        while (entry && (entry->length > 0)) {
-               acpi_pci_irq_add_entry(handle, segment, bus, entry);
+               acpi_pci_irq_add_entry(handle, bus, entry);
                entry = (struct acpi_pci_routing_table *)
                    ((unsigned long)entry + entry->length);
        }
@@ -280,16 +280,17 @@ int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
        return 0;
 }
 
-void acpi_pci_irq_del_prt(int segment, int bus)
+void acpi_pci_irq_del_prt(struct pci_bus *bus)
 {
        struct acpi_prt_entry *entry, *tmp;
 
        printk(KERN_DEBUG
               "ACPI: Delete PCI Interrupt Routing Table for %04x:%02x\n",
-              segment, bus);
+              pci_domain_nr(bus), bus->number);
        spin_lock(&acpi_prt_lock);
        list_for_each_entry_safe(entry, tmp, &acpi_prt_list, list) {
-               if (segment == entry->id.segment && bus == entry->id.bus) {
+               if (pci_domain_nr(bus) == entry->id.segment
+                       && bus->number == entry->id.bus) {
                        list_del(&entry->list);
                        kfree(entry);
                }
index 196f97d00956aa379b95b8fbe8e5a6cb6ddd10eb..8a5bf3b356faa2d60c40f0c1f6aef6279d9d57c8 100644 (file)
@@ -63,9 +63,10 @@ static struct acpi_driver acpi_pci_root_driver = {
 
 struct acpi_pci_root {
        struct list_head node;
-       struct acpi_device * device;
-       struct acpi_pci_id id;
+       struct acpi_device *device;
        struct pci_bus *bus;
+       u16 segment;
+       u8 bus_nr;
 
        u32 osc_support_set;    /* _OSC state of support bits */
        u32 osc_control_set;    /* _OSC state of control bits */
@@ -82,7 +83,7 @@ static DEFINE_MUTEX(osc_lock);
 int acpi_pci_register_driver(struct acpi_pci_driver *driver)
 {
        int n = 0;
-       struct list_head *entry;
+       struct acpi_pci_root *root;
 
        struct acpi_pci_driver **pptr = &sub_driver;
        while (*pptr)
@@ -92,9 +93,7 @@ int acpi_pci_register_driver(struct acpi_pci_driver *driver)
        if (!driver->add)
                return 0;
 
-       list_for_each(entry, &acpi_pci_roots) {
-               struct acpi_pci_root *root;
-               root = list_entry(entry, struct acpi_pci_root, node);
+       list_for_each_entry(root, &acpi_pci_roots, node) {
                driver->add(root->device->handle);
                n++;
        }
@@ -106,7 +105,7 @@ EXPORT_SYMBOL(acpi_pci_register_driver);
 
 void acpi_pci_unregister_driver(struct acpi_pci_driver *driver)
 {
-       struct list_head *entry;
+       struct acpi_pci_root *root;
 
        struct acpi_pci_driver **pptr = &sub_driver;
        while (*pptr) {
@@ -120,28 +119,48 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver)
        if (!driver->remove)
                return;
 
-       list_for_each(entry, &acpi_pci_roots) {
-               struct acpi_pci_root *root;
-               root = list_entry(entry, struct acpi_pci_root, node);
+       list_for_each_entry(root, &acpi_pci_roots, node)
                driver->remove(root->device->handle);
-       }
 }
 
 EXPORT_SYMBOL(acpi_pci_unregister_driver);
 
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus)
 {
-       struct acpi_pci_root *tmp;
+       struct acpi_pci_root *root;
        
-       list_for_each_entry(tmp, &acpi_pci_roots, node) {
-               if ((tmp->id.segment == (u16) seg) && (tmp->id.bus == (u16) bus))
-                       return tmp->device->handle;
-       }
+       list_for_each_entry(root, &acpi_pci_roots, node)
+               if ((root->segment == (u16) seg) && (root->bus_nr == (u16) bus))
+                       return root->device->handle;
        return NULL;            
 }
 
 EXPORT_SYMBOL_GPL(acpi_get_pci_rootbridge_handle);
 
+/**
+ * acpi_is_root_bridge - determine whether an ACPI CA node is a PCI root bridge
+ * @handle - the ACPI CA node in question.
+ *
+ * Note: we could make this API take a struct acpi_device * instead, but
+ * for now, it's more convenient to operate on an acpi_handle.
+ */
+int acpi_is_root_bridge(acpi_handle handle)
+{
+       int ret;
+       struct acpi_device *device;
+
+       ret = acpi_bus_get_device(handle, &device);
+       if (ret)
+               return 0;
+
+       ret = acpi_match_device_ids(device, root_device_ids);
+       if (ret)
+               return 0;
+       else
+               return 1;
+}
+EXPORT_SYMBOL_GPL(acpi_is_root_bridge);
+
 static acpi_status
 get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
 {
@@ -161,19 +180,22 @@ get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
        return AE_OK;
 }
 
-static acpi_status try_get_root_bridge_busnr(acpi_handle handle, int *busnum)
+static acpi_status try_get_root_bridge_busnr(acpi_handle handle,
+                                            unsigned long long *bus)
 {
        acpi_status status;
+       int busnum;
 
-       *busnum = -1;
+       busnum = -1;
        status =
            acpi_walk_resources(handle, METHOD_NAME__CRS,
-                               get_root_bridge_busnr_callback, busnum);
+                               get_root_bridge_busnr_callback, &busnum);
        if (ACPI_FAILURE(status))
                return status;
        /* Check if we really get a bus number from _CRS */
-       if (*busnum == -1)
+       if (busnum == -1)
                return AE_ERROR;
+       *bus = busnum;
        return AE_OK;
 }
 
@@ -298,6 +320,7 @@ static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
 static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
 {
        struct acpi_pci_root *root;
+
        list_for_each_entry(root, &acpi_pci_roots, node) {
                if (root->device->handle == handle)
                        return root;
@@ -305,6 +328,87 @@ static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
        return NULL;
 }
 
+struct acpi_handle_node {
+       struct list_head node;
+       acpi_handle handle;
+};
+
+/**
+ * acpi_get_pci_dev - convert ACPI CA handle to struct pci_dev
+ * @handle: the handle in question
+ *
+ * Given an ACPI CA handle, the desired PCI device is located in the
+ * list of PCI devices.
+ *
+ * If the device is found, its reference count is increased and this
+ * function returns a pointer to its data structure.  The caller must
+ * decrement the reference count by calling pci_dev_put().
+ * If no device is found, %NULL is returned.
+ */
+struct pci_dev *acpi_get_pci_dev(acpi_handle handle)
+{
+       int dev, fn;
+       unsigned long long adr;
+       acpi_status status;
+       acpi_handle phandle;
+       struct pci_bus *pbus;
+       struct pci_dev *pdev = NULL;
+       struct acpi_handle_node *node, *tmp;
+       struct acpi_pci_root *root;
+       LIST_HEAD(device_list);
+
+       /*
+        * Walk up the ACPI CA namespace until we reach a PCI root bridge.
+        */
+       phandle = handle;
+       while (!acpi_is_root_bridge(phandle)) {
+               node = kzalloc(sizeof(struct acpi_handle_node), GFP_KERNEL);
+               if (!node)
+                       goto out;
+
+               INIT_LIST_HEAD(&node->node);
+               node->handle = phandle;
+               list_add(&node->node, &device_list);
+
+               status = acpi_get_parent(phandle, &phandle);
+               if (ACPI_FAILURE(status))
+                       goto out;
+       }
+
+       root = acpi_pci_find_root(phandle);
+       if (!root)
+               goto out;
+
+       pbus = root->bus;
+
+       /*
+        * Now, walk back down the PCI device tree until we return to our
+        * original handle. Assumes that everything between the PCI root
+        * bridge and the device we're looking for must be a P2P bridge.
+        */
+       list_for_each_entry(node, &device_list, node) {
+               acpi_handle hnd = node->handle;
+               status = acpi_evaluate_integer(hnd, "_ADR", NULL, &adr);
+               if (ACPI_FAILURE(status))
+                       goto out;
+               dev = (adr >> 16) & 0xffff;
+               fn  = adr & 0xffff;
+
+               pdev = pci_get_slot(pbus, PCI_DEVFN(dev, fn));
+               if (hnd == handle)
+                       break;
+
+               pbus = pdev->subordinate;
+               pci_dev_put(pdev);
+       }
+out:
+       list_for_each_entry_safe(node, tmp, &device_list, node)
+               kfree(node);
+
+       return pdev;
+}
+EXPORT_SYMBOL_GPL(acpi_get_pci_dev);
+
 /**
  * acpi_pci_osc_control_set - commit requested control to Firmware
  * @handle: acpi_handle for the target ACPI object
@@ -363,31 +467,46 @@ EXPORT_SYMBOL(acpi_pci_osc_control_set);
 
 static int __devinit acpi_pci_root_add(struct acpi_device *device)
 {
-       int result = 0;
-       struct acpi_pci_root *root = NULL;
-       struct acpi_pci_root *tmp;
-       acpi_status status = AE_OK;
-       unsigned long long value = 0;
-       acpi_handle handle = NULL;
+       unsigned long long segment, bus;
+       acpi_status status;
+       int result;
+       struct acpi_pci_root *root;
+       acpi_handle handle;
        struct acpi_device *child;
        u32 flags, base_flags;
 
+       segment = 0;
+       status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL,
+                                      &segment);
+       if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+               printk(KERN_ERR PREFIX "can't evaluate _SEG\n");
+               return -ENODEV;
+       }
 
-       if (!device)
-               return -EINVAL;
+       /* Check _CRS first, then _BBN.  If no _BBN, default to zero. */
+       bus = 0;
+       status = try_get_root_bridge_busnr(device->handle, &bus);
+       if (ACPI_FAILURE(status)) {
+               status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,                                               NULL, &bus);
+               if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+                       printk(KERN_ERR PREFIX
+                            "no bus number in _CRS and can't evaluate _BBN\n");
+                       return -ENODEV;
+               }
+       }
 
        root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
        if (!root)
                return -ENOMEM;
-       INIT_LIST_HEAD(&root->node);
 
+       INIT_LIST_HEAD(&root->node);
        root->device = device;
+       root->segment = segment & 0xFFFF;
+       root->bus_nr = bus & 0xFF;
        strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
        strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
        device->driver_data = root;
 
-       device->ops.bind = acpi_pci_bind;
-
        /*
         * All supported architectures that use ACPI have support for
         * PCI domains, so we indicate this in _OSC support capabilities.
@@ -395,79 +514,6 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
        flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
        acpi_pci_osc_support(root, flags);
 
-       /* 
-        * Segment
-        * -------
-        * Obtained via _SEG, if exists, otherwise assumed to be zero (0).
-        */
-       status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL,
-                                      &value);
-       switch (status) {
-       case AE_OK:
-               root->id.segment = (u16) value;
-               break;
-       case AE_NOT_FOUND:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                 "Assuming segment 0 (no _SEG)\n"));
-               root->id.segment = 0;
-               break;
-       default:
-               ACPI_EXCEPTION((AE_INFO, status, "Evaluating _SEG"));
-               result = -ENODEV;
-               goto end;
-       }
-
-       /* 
-        * Bus
-        * ---
-        * Obtained via _BBN, if exists, otherwise assumed to be zero (0).
-        */
-       status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL,
-                                      &value);
-       switch (status) {
-       case AE_OK:
-               root->id.bus = (u16) value;
-               break;
-       case AE_NOT_FOUND:
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Assuming bus 0 (no _BBN)\n"));
-               root->id.bus = 0;
-               break;
-       default:
-               ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BBN"));
-               result = -ENODEV;
-               goto end;
-       }
-
-       /* Some systems have wrong _BBN */
-       list_for_each_entry(tmp, &acpi_pci_roots, node) {
-               if ((tmp->id.segment == root->id.segment)
-                   && (tmp->id.bus == root->id.bus)) {
-                       int bus = 0;
-                       acpi_status status;
-
-                       printk(KERN_ERR PREFIX
-                                   "Wrong _BBN value, reboot"
-                                   " and use option 'pci=noacpi'\n");
-
-                       status = try_get_root_bridge_busnr(device->handle, &bus);
-                       if (ACPI_FAILURE(status))
-                               break;
-                       if (bus != root->id.bus) {
-                               printk(KERN_INFO PREFIX
-                                      "PCI _CRS %d overrides _BBN 0\n", bus);
-                               root->id.bus = bus;
-                       }
-                       break;
-               }
-       }
-       /*
-        * Device & Function
-        * -----------------
-        * Obtained from _ADR (which has already been evaluated for us).
-        */
-       root->id.device = device->pnp.bus_address >> 16;
-       root->id.function = device->pnp.bus_address & 0xFFFF;
-
        /*
         * TBD: Need PCI interface for enumeration/configuration of roots.
         */
@@ -477,7 +523,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 
        printk(KERN_INFO PREFIX "%s [%s] (%04x:%02x)\n",
               acpi_device_name(device), acpi_device_bid(device),
-              root->id.segment, root->id.bus);
+              root->segment, root->bus_nr);
 
        /*
         * Scan the Root Bridge
@@ -486,11 +532,11 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
         * PCI namespace does not get created until this call is made (and 
         * thus the root bridge's pci_dev does not exist).
         */
-       root->bus = pci_acpi_scan_root(device, root->id.segment, root->id.bus);
+       root->bus = pci_acpi_scan_root(device, segment, bus);
        if (!root->bus) {
                printk(KERN_ERR PREFIX
                            "Bus %04x:%02x not present in PCI namespace\n",
-                           root->id.segment, root->id.bus);
+                           root->segment, root->bus_nr);
                result = -ENODEV;
                goto end;
        }
@@ -500,7 +546,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
         * -----------------------
         * Thus binding the ACPI and PCI devices.
         */
-       result = acpi_pci_bind_root(device, &root->id, root->bus);
+       result = acpi_pci_bind_root(device);
        if (result)
                goto end;
 
@@ -511,8 +557,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
         */
        status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
        if (ACPI_SUCCESS(status))
-               result = acpi_pci_irq_add_prt(device->handle, root->id.segment,
-                                             root->id.bus);
+               result = acpi_pci_irq_add_prt(device->handle, root->bus);
 
        /*
         * Scan and bind all _ADR-Based Devices
@@ -531,42 +576,28 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
        if (flags != base_flags)
                acpi_pci_osc_support(root, flags);
 
-      end:
-       if (result) {
-               if (!list_empty(&root->node))
-                       list_del(&root->node);
-               kfree(root);
-       }
+       return 0;
 
+end:
+       if (!list_empty(&root->node))
+               list_del(&root->node);
+       kfree(root);
        return result;
 }
 
 static int acpi_pci_root_start(struct acpi_device *device)
 {
-       struct acpi_pci_root *root;
+       struct acpi_pci_root *root = acpi_driver_data(device);
 
-
-       list_for_each_entry(root, &acpi_pci_roots, node) {
-               if (root->device == device) {
-                       pci_bus_add_devices(root->bus);
-                       return 0;
-               }
-       }
-       return -ENODEV;
+       pci_bus_add_devices(root->bus);
+       return 0;
 }
 
 static int acpi_pci_root_remove(struct acpi_device *device, int type)
 {
-       struct acpi_pci_root *root = NULL;
-
-
-       if (!device || !acpi_driver_data(device))
-               return -EINVAL;
-
-       root = acpi_driver_data(device);
+       struct acpi_pci_root *root = acpi_driver_data(device);
 
        kfree(root);
-
        return 0;
 }
 
index 56665a63bf19b29f86c4088f02cd0ff8c7ad9faa..d74365d4a6e7d03f781954663301c50d30ac77da 100644 (file)
@@ -194,7 +194,7 @@ static int acpi_power_get_list_state(struct acpi_handle_list *list, int *state)
 
 static int acpi_power_on(acpi_handle handle, struct acpi_device *dev)
 {
-       int result = 0, state;
+       int result = 0;
        int found = 0;
        acpi_status status = AE_OK;
        struct acpi_power_resource *resource = NULL;
@@ -236,18 +236,6 @@ static int acpi_power_on(acpi_handle handle, struct acpi_device *dev)
        if (ACPI_FAILURE(status))
                return -ENODEV;
 
-       if (!acpi_power_nocheck) {
-               /*
-                * If acpi_power_nocheck is set, it is unnecessary to check
-                * the power state after power transition.
-                */
-               result = acpi_power_get_state(resource->device->handle,
-                               &state);
-               if (result)
-                       return result;
-               if (state != ACPI_POWER_RESOURCE_STATE_ON)
-                       return -ENOEXEC;
-       }
        /* Update the power resource's _device_ power state */
        resource->device->power.state = ACPI_STATE_D0;
 
@@ -258,7 +246,7 @@ static int acpi_power_on(acpi_handle handle, struct acpi_device *dev)
 
 static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev)
 {
-       int result = 0, state;
+       int result = 0;
        acpi_status status = AE_OK;
        struct acpi_power_resource *resource = NULL;
        struct list_head *node, *next;
@@ -293,18 +281,6 @@ static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev)
        if (ACPI_FAILURE(status))
                return -ENODEV;
 
-       if (!acpi_power_nocheck) {
-               /*
-                * If acpi_power_nocheck is set, it is unnecessary to check
-                * the power state after power transition.
-                */
-               result = acpi_power_get_state(handle, &state);
-               if (result)
-                       return result;
-               if (state != ACPI_POWER_RESOURCE_STATE_OFF)
-                       return -ENOEXEC;
-       }
-
        /* Update the power resource's _device_ power state */
        resource->device->power.state = ACPI_STATE_D3;
 
index 23f0fb84f1c1104538efe624c5824937e069c191..84e0f3c0744270b4c8474f92239b64255a27985b 100644 (file)
@@ -89,7 +89,7 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr);
 
 static const struct acpi_device_id processor_device_ids[] = {
        {ACPI_PROCESSOR_OBJECT_HID, 0},
-       {ACPI_PROCESSOR_HID, 0},
+       {"ACPI0007", 0},
        {"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, processor_device_ids);
@@ -596,7 +596,21 @@ static int acpi_processor_get_info(struct acpi_device *device)
                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                                  "No bus mastering arbitration control\n"));
 
-       if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_HID)) {
+       if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) {
+               /* Declared with "Processor" statement; match ProcessorID */
+               status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
+               if (ACPI_FAILURE(status)) {
+                       printk(KERN_ERR PREFIX "Evaluating processor object\n");
+                       return -ENODEV;
+               }
+
+               /*
+                * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP.
+                *      >>> 'acpi_get_processor_id(acpi_id, &id)' in
+                *      arch/xxx/acpi.c
+                */
+               pr->acpi_id = object.processor.proc_id;
+       } else {
                /*
                 * Declared with "Device" statement; match _UID.
                 * Note that we don't handle string _UIDs yet.
@@ -611,20 +625,6 @@ static int acpi_processor_get_info(struct acpi_device *device)
                }
                device_declaration = 1;
                pr->acpi_id = value;
-       } else {
-               /* Declared with "Processor" statement; match ProcessorID */
-               status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
-               if (ACPI_FAILURE(status)) {
-                       printk(KERN_ERR PREFIX "Evaluating processor object\n");
-                       return -ENODEV;
-               }
-
-               /*
-                * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP.
-                *      >>> 'acpi_get_processor_id(acpi_id, &id)' in
-                *      arch/xxx/acpi.c
-                */
-               pr->acpi_id = object.processor.proc_id;
        }
        cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id);
 
@@ -649,7 +649,16 @@ static int acpi_processor_get_info(struct acpi_device *device)
                        return -ENODEV;
                }
        }
-
+       /*
+        * On some boxes several processors use the same processor bus id.
+        * But they are located in different scope. For example:
+        * \_SB.SCK0.CPU0
+        * \_SB.SCK1.CPU0
+        * Rename the processor device bus id. And the new bus id will be
+        * generated as the following format:
+        * CPU+CPU ID.
+        */
+       sprintf(acpi_device_bid(device), "CPU%X", pr->id);
        ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
                          pr->acpi_id));
 
@@ -731,6 +740,8 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
        /* _PDC call should be done before doing anything else (if reqd.). */
        arch_acpi_processor_init_pdc(pr);
        acpi_processor_set_pdc(pr);
+       arch_acpi_processor_cleanup_pdc(pr);
+
 #ifdef CONFIG_CPU_FREQ
        acpi_processor_ppc_has_changed(pr);
 #endif
index 10a2d913635a99d76502309efb55895d7a92c5e4..0efa59e7e3afd16095fa538f333f11f37ca7a9d4 100644 (file)
@@ -139,7 +139,7 @@ static void acpi_safe_halt(void)
  * are affected too. We pick the most conservative approach: we assume
  * that the local APIC stops in both C2 and C3.
  */
-static void acpi_timer_check_state(int state, struct acpi_processor *pr,
+static void lapic_timer_check_state(int state, struct acpi_processor *pr,
                                   struct acpi_processor_cx *cx)
 {
        struct acpi_processor_power *pwr = &pr->power;
@@ -162,7 +162,7 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr,
                pr->power.timer_broadcast_on_state = state;
 }
 
-static void acpi_propagate_timer_broadcast(struct acpi_processor *pr)
+static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
 {
        unsigned long reason;
 
@@ -173,7 +173,7 @@ static void acpi_propagate_timer_broadcast(struct acpi_processor *pr)
 }
 
 /* Power(C) State timer broadcast control */
-static void acpi_state_timer_broadcast(struct acpi_processor *pr,
+static void lapic_timer_state_broadcast(struct acpi_processor *pr,
                                       struct acpi_processor_cx *cx,
                                       int broadcast)
 {
@@ -190,10 +190,10 @@ static void acpi_state_timer_broadcast(struct acpi_processor *pr,
 
 #else
 
-static void acpi_timer_check_state(int state, struct acpi_processor *pr,
+static void lapic_timer_check_state(int state, struct acpi_processor *pr,
                                   struct acpi_processor_cx *cstate) { }
-static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { }
-static void acpi_state_timer_broadcast(struct acpi_processor *pr,
+static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
+static void lapic_timer_state_broadcast(struct acpi_processor *pr,
                                       struct acpi_processor_cx *cx,
                                       int broadcast)
 {
@@ -515,7 +515,8 @@ static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
 static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
                                           struct acpi_processor_cx *cx)
 {
-       static int bm_check_flag;
+       static int bm_check_flag = -1;
+       static int bm_control_flag = -1;
 
 
        if (!cx->address)
@@ -545,12 +546,14 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
        }
 
        /* All the logic here assumes flags.bm_check is same across all CPUs */
-       if (!bm_check_flag) {
+       if (bm_check_flag == -1) {
                /* Determine whether bm_check is needed based on CPU  */
                acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
                bm_check_flag = pr->flags.bm_check;
+               bm_control_flag = pr->flags.bm_control;
        } else {
                pr->flags.bm_check = bm_check_flag;
+               pr->flags.bm_control = bm_control_flag;
        }
 
        if (pr->flags.bm_check) {
@@ -614,29 +617,25 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
                switch (cx->type) {
                case ACPI_STATE_C1:
                        cx->valid = 1;
-                       acpi_timer_check_state(i, pr, cx);
                        break;
 
                case ACPI_STATE_C2:
                        acpi_processor_power_verify_c2(cx);
-                       if (cx->valid)
-                               acpi_timer_check_state(i, pr, cx);
                        break;
 
                case ACPI_STATE_C3:
                        acpi_processor_power_verify_c3(pr, cx);
-                       if (cx->valid)
-                               acpi_timer_check_state(i, pr, cx);
                        break;
                }
-               if (cx->valid)
-                       tsc_check_state(cx->type);
+               if (!cx->valid)
+                       continue;
 
-               if (cx->valid)
-                       working++;
+               lapic_timer_check_state(i, pr, cx);
+               tsc_check_state(cx->type);
+               working++;
        }
 
-       acpi_propagate_timer_broadcast(pr);
+       lapic_timer_propagate_broadcast(pr);
 
        return (working);
 }
@@ -839,7 +838,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
                return 0;
        }
 
-       acpi_state_timer_broadcast(pr, cx, 1);
+       lapic_timer_state_broadcast(pr, cx, 1);
        kt1 = ktime_get_real();
        acpi_idle_do_entry(cx);
        kt2 = ktime_get_real();
@@ -847,7 +846,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 
        local_irq_enable();
        cx->usage++;
-       acpi_state_timer_broadcast(pr, cx, 0);
+       lapic_timer_state_broadcast(pr, cx, 0);
 
        return idle_time;
 }
@@ -892,7 +891,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
         * Must be done before busmaster disable as we might need to
         * access HPET !
         */
-       acpi_state_timer_broadcast(pr, cx, 1);
+       lapic_timer_state_broadcast(pr, cx, 1);
 
        if (cx->type == ACPI_STATE_C3)
                ACPI_FLUSH_CPU_CACHE();
@@ -914,7 +913,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 
        cx->usage++;
 
-       acpi_state_timer_broadcast(pr, cx, 0);
+       lapic_timer_state_broadcast(pr, cx, 0);
        cx->time += sleep_ticks;
        return idle_time;
 }
@@ -981,7 +980,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
         * Must be done before busmaster disable as we might need to
         * access HPET !
         */
-       acpi_state_timer_broadcast(pr, cx, 1);
+       lapic_timer_state_broadcast(pr, cx, 1);
 
        kt1 = ktime_get_real();
        /*
@@ -1026,7 +1025,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 
        cx->usage++;
 
-       acpi_state_timer_broadcast(pr, cx, 0);
+       lapic_timer_state_broadcast(pr, cx, 0);
        cx->time += sleep_ticks;
        return idle_time;
 }
index 8ff510b91d88f4f38e473afab76425a9cf392477..781435d7e3692026d81cb6f112e7e422aa00a7dc 100644 (file)
@@ -95,7 +95,7 @@ acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, cha
 }
 static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL);
 
-static int acpi_bus_hot_remove_device(void *context)
+static void acpi_bus_hot_remove_device(void *context)
 {
        struct acpi_device *device;
        acpi_handle handle = context;
@@ -104,10 +104,10 @@ static int acpi_bus_hot_remove_device(void *context)
        acpi_status status = AE_OK;
 
        if (acpi_bus_get_device(handle, &device))
-               return 0;
+               return;
 
        if (!device)
-               return 0;
+               return;
 
        ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                "Hot-removing device %s...\n", dev_name(&device->dev)));
@@ -115,7 +115,7 @@ static int acpi_bus_hot_remove_device(void *context)
        if (acpi_bus_trim(device, 1)) {
                printk(KERN_ERR PREFIX
                                "Removing device failed\n");
-               return -1;
+               return;
        }
 
        /* power off device */
@@ -142,9 +142,10 @@ static int acpi_bus_hot_remove_device(void *context)
         */
        status = acpi_evaluate_object(handle, "_EJ0", &arg_list, NULL);
        if (ACPI_FAILURE(status))
-               return -ENODEV;
+               printk(KERN_WARNING PREFIX
+                               "Eject device failed\n");
 
-       return 0;
+       return;
 }
 
 static ssize_t
@@ -155,7 +156,6 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
        acpi_status status;
        acpi_object_type type = 0;
        struct acpi_device *acpi_device = to_acpi_device(d);
-       struct task_struct *task;
 
        if ((!count) || (buf[0] != '1')) {
                return -EINVAL;
@@ -172,11 +172,7 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
                goto err;
        }
 
-       /* remove the device in another thread to fix the deadlock issue */
-       task = kthread_run(acpi_bus_hot_remove_device,
-                               acpi_device->handle, "acpi_hot_remove_device");
-       if (IS_ERR(task))
-               ret = PTR_ERR(task);
+       acpi_os_hotplug_execute(acpi_bus_hot_remove_device, acpi_device->handle);
 err:
        return ret;
 }
@@ -198,12 +194,12 @@ acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *b
        int result;
 
        result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path);
-       if(result)
+       if (result)
                goto end;
 
        result = sprintf(buf, "%s\n", (char*)path.pointer);
        kfree(path.pointer);
-  end:
+end:
        return result;
 }
 static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL);
@@ -217,21 +213,21 @@ static int acpi_device_setup_files(struct acpi_device *dev)
        /*
         * Devices gotten from FADT don't have a "path" attribute
         */
-       if(dev->handle) {
+       if (dev->handle) {
                result = device_create_file(&dev->dev, &dev_attr_path);
-               if(result)
+               if (result)
                        goto end;
        }
 
-       if(dev->flags.hardware_id) {
+       if (dev->flags.hardware_id) {
                result = device_create_file(&dev->dev, &dev_attr_hid);
-               if(result)
+               if (result)
                        goto end;
        }
 
-       if (dev->flags.hardware_id || dev->flags.compatible_ids){
+       if (dev->flags.hardware_id || dev->flags.compatible_ids) {
                result = device_create_file(&dev->dev, &dev_attr_modalias);
-               if(result)
+               if (result)
                        goto end;
        }
 
@@ -242,7 +238,7 @@ static int acpi_device_setup_files(struct acpi_device *dev)
        status = acpi_get_handle(dev->handle, "_EJ0", &temp);
        if (ACPI_SUCCESS(status))
                result = device_create_file(&dev->dev, &dev_attr_eject);
-  end:
+end:
        return result;
 }
 
@@ -262,9 +258,9 @@ static void acpi_device_remove_files(struct acpi_device *dev)
        if (dev->flags.hardware_id || dev->flags.compatible_ids)
                device_remove_file(&dev->dev, &dev_attr_modalias);
 
-       if(dev->flags.hardware_id)
+       if (dev->flags.hardware_id)
                device_remove_file(&dev->dev, &dev_attr_hid);
-       if(dev->handle)
+       if (dev->handle)
                device_remove_file(&dev->dev, &dev_attr_path);
 }
 /* --------------------------------------------------------------------------
@@ -512,7 +508,7 @@ static int acpi_device_register(struct acpi_device *device,
                        break;
                }
        }
-       if(!found) {
+       if (!found) {
                acpi_device_bus_id = new_bus_id;
                strcpy(acpi_device_bus_id->bus_id, device->flags.hardware_id ? device->pnp.hardware_id : "device");
                acpi_device_bus_id->instance_no = 0;
@@ -530,22 +526,21 @@ static int acpi_device_register(struct acpi_device *device,
        if (device->parent)
                device->dev.parent = &parent->dev;
        device->dev.bus = &acpi_bus_type;
-       device_initialize(&device->dev);
        device->dev.release = &acpi_device_release;
-       result = device_add(&device->dev);
-       if(result) {
-               dev_err(&device->dev, "Error adding device\n");
+       result = device_register(&device->dev);
+       if (result) {
+               dev_err(&device->dev, "Error registering device\n");
                goto end;
        }
 
        result = acpi_device_setup_files(device);
-       if(result)
+       if (result)
                printk(KERN_ERR PREFIX "Error creating sysfs interface for device %s\n",
                       dev_name(&device->dev));
 
        device->removal_type = ACPI_BUS_REMOVAL_NORMAL;
        return 0;
-  end:
+end:
        mutex_lock(&acpi_device_lock);
        if (device->parent)
                list_del(&device->node);
@@ -577,7 +572,7 @@ static void acpi_device_unregister(struct acpi_device *device, int type)
  * @device: the device to add and initialize
  * @driver: driver for the device
  *
- * Used to initialize a device via its device driver.  Called whenever a 
+ * Used to initialize a device via its device driver.  Called whenever a
  * driver is bound to a device.  Invokes the driver's add() ops.
  */
 static int
@@ -585,7 +580,6 @@ acpi_bus_driver_init(struct acpi_device *device, struct acpi_driver *driver)
 {
        int result = 0;
 
-
        if (!device || !driver)
                return -EINVAL;
 
@@ -802,7 +796,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
        if (!acpi_match_device_ids(device, button_device_ids))
                device->wakeup.flags.run_wake = 1;
 
-      end:
+end:
        if (ACPI_FAILURE(status))
                device->flags.wake_capable = 0;
        return 0;
@@ -1070,7 +1064,7 @@ static void acpi_device_set_id(struct acpi_device *device,
                break;
        }
 
-       /* 
+       /*
         * \_SB
         * ----
         * Fix for the system root bus device -- the only root-level device.
@@ -1320,7 +1314,7 @@ acpi_add_single_object(struct acpi_device **child,
                        device->parent->ops.bind(device);
        }
 
-      end:
+end:
        if (!result)
                *child = device;
        else {
@@ -1464,7 +1458,6 @@ acpi_bus_add(struct acpi_device **child,
 
        return result;
 }
-
 EXPORT_SYMBOL(acpi_bus_add);
 
 int acpi_bus_start(struct acpi_device *device)
@@ -1484,7 +1477,6 @@ int acpi_bus_start(struct acpi_device *device)
        }
        return result;
 }
-
 EXPORT_SYMBOL(acpi_bus_start);
 
 int acpi_bus_trim(struct acpi_device *start, int rmdevice)
@@ -1542,7 +1534,6 @@ int acpi_bus_trim(struct acpi_device *start, int rmdevice)
 }
 EXPORT_SYMBOL_GPL(acpi_bus_trim);
 
-
 static int acpi_bus_scan_fixed(struct acpi_device *root)
 {
        int result = 0;
@@ -1610,6 +1601,6 @@ int __init acpi_scan_init(void)
        if (result)
                acpi_device_unregister(acpi_root, ACPI_BUS_REMOVAL_NORMAL);
 
-      Done:
+Done:
        return result;
 }
index 1bdfb37377e32d58bcd1f3e35e858c1cc681aa5c..8851315ce858a2e2eb1e4ffd2d2b6b695fa8bfb8 100644 (file)
@@ -76,6 +76,7 @@ MODULE_LICENSE("GPL");
 static int brightness_switch_enabled = 1;
 module_param(brightness_switch_enabled, bool, 0644);
 
+static int register_count = 0;
 static int acpi_video_bus_add(struct acpi_device *device);
 static int acpi_video_bus_remove(struct acpi_device *device, int type);
 static int acpi_video_resume(struct acpi_device *device);
@@ -586,6 +587,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
                DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5315"),
                },
        },
+       {
+        .callback = video_set_bqc_offset,
+        .ident = "Acer Aspire 7720",
+        .matches = {
+               DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
+               DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"),
+               },
+       },
        {}
 };
 
@@ -976,6 +985,11 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
                device->backlight->props.max_brightness = device->brightness->count-3;
                kfree(name);
 
+               result = sysfs_create_link(&device->backlight->dev.kobj,
+                                          &device->dev->dev.kobj, "device");
+               if (result)
+                       printk(KERN_ERR PREFIX "Create sysfs link\n");
+
                device->cdev = thermal_cooling_device_register("LCD",
                                        device->dev, &video_cooling_ops);
                if (IS_ERR(device->cdev))
@@ -1054,15 +1068,15 @@ static void acpi_video_bus_find_cap(struct acpi_video_bus *video)
 static int acpi_video_bus_check(struct acpi_video_bus *video)
 {
        acpi_status status = -ENOENT;
-       struct device *dev;
+       struct pci_dev *dev;
 
        if (!video)
                return -EINVAL;
 
-       dev = acpi_get_physical_pci_device(video->device->handle);
+       dev = acpi_get_pci_dev(video->device->handle);
        if (!dev)
                return -ENODEV;
-       put_device(dev);
+       pci_dev_put(dev);
 
        /* Since there is no HID, CID and so on for VGA driver, we have
         * to check well known required nodes.
@@ -1990,6 +2004,7 @@ static int acpi_video_bus_put_one_device(struct acpi_video_device *device)
        status = acpi_remove_notify_handler(device->dev->handle,
                                            ACPI_DEVICE_NOTIFY,
                                            acpi_video_device_notify);
+       sysfs_remove_link(&device->backlight->dev.kobj, "device");
        backlight_device_unregister(device->backlight);
        if (device->cdev) {
                sysfs_remove_link(&device->dev->dev.kobj,
@@ -2318,6 +2333,13 @@ static int __init intel_opregion_present(void)
 int acpi_video_register(void)
 {
        int result = 0;
+       if (register_count) {
+               /*
+                * if the function of acpi_video_register is already called,
+                * don't register the acpi_vide_bus again and return no error.
+                */
+               return 0;
+       }
 
        acpi_video_dir = proc_mkdir(ACPI_VIDEO_CLASS, acpi_root_dir);
        if (!acpi_video_dir)
@@ -2329,10 +2351,35 @@ int acpi_video_register(void)
                return -ENODEV;
        }
 
+       /*
+        * When the acpi_video_bus is loaded successfully, increase
+        * the counter reference.
+        */
+       register_count = 1;
+
        return 0;
 }
 EXPORT_SYMBOL(acpi_video_register);
 
+void acpi_video_unregister(void)
+{
+       if (!register_count) {
+               /*
+                * If the acpi video bus is already unloaded, don't
+                * unload it again and return directly.
+                */
+               return;
+       }
+       acpi_bus_unregister_driver(&acpi_video_bus);
+
+       remove_proc_entry(ACPI_VIDEO_CLASS, acpi_root_dir);
+
+       register_count = 0;
+
+       return;
+}
+EXPORT_SYMBOL(acpi_video_unregister);
+
 /*
  * This is kind of nasty. Hardware using Intel chipsets may require
  * the video opregion code to be run first in order to initialise
@@ -2350,16 +2397,12 @@ static int __init acpi_video_init(void)
        return acpi_video_register();
 }
 
-void acpi_video_exit(void)
+static void __exit acpi_video_exit(void)
 {
-
-       acpi_bus_unregister_driver(&acpi_video_bus);
-
-       remove_proc_entry(ACPI_VIDEO_CLASS, acpi_root_dir);
+       acpi_video_unregister();
 
        return;
 }
-EXPORT_SYMBOL(acpi_video_exit);
 
 module_init(acpi_video_init);
 module_exit(acpi_video_exit);
index 09737275e25fd94990d7e250f7acfcc09d2e10af..7cd2b63435ea1bf1464d2a8dbac17650cbd12dc4 100644 (file)
@@ -10,7 +10,7 @@
  * assinged
  *
  * After PCI devices are glued with ACPI devices
- * acpi_get_physical_pci_device() can be called to identify ACPI graphics
+ * acpi_get_pci_dev() can be called to identify ACPI graphics
  * devices for which a real graphics card is plugged in
  *
  * Now acpi_video_get_capabilities() can be called to check which
@@ -36,6 +36,7 @@
 
 #include <linux/acpi.h>
 #include <linux/dmi.h>
+#include <linux/pci.h>
 
 ACPI_MODULE_NAME("video");
 #define _COMPONENT             ACPI_VIDEO_COMPONENT
@@ -109,7 +110,7 @@ static acpi_status
 find_video(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
        long *cap = context;
-       struct device *dev;
+       struct pci_dev *dev;
        struct acpi_device *acpi_dev;
 
        const struct acpi_device_id video_ids[] = {
@@ -120,10 +121,10 @@ find_video(acpi_handle handle, u32 lvl, void *context, void **rv)
                return AE_OK;
 
        if (!acpi_match_device_ids(acpi_dev, video_ids)) {
-               dev = acpi_get_physical_pci_device(handle);
+               dev = acpi_get_pci_dev(handle);
                if (!dev)
                        return AE_OK;
-               put_device(dev);
+               pci_dev_put(dev);
                *cap |= acpi_is_video_device(acpi_dev);
        }
        return AE_OK;
index 9533f43a30bb0e7bad9873acb85d25cf39e44625..52d953eb30c3d6872eb6e4d3b3710ba7578e272c 100644 (file)
@@ -1048,8 +1048,6 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
        if (retval)
                return retval;
 
-       /* unmark here for very high baud rate (ex. 921600 bps) used */
-       tty->low_latency = 1;
        return 0;
 }
 
index d6102b644b550d0f9a306b015dd985f642d7e974..574f1c79b6e637ededd5a4708651bc7902aea960 100644 (file)
@@ -1591,8 +1591,6 @@ static int ntty_open(struct tty_struct *tty, struct file *file)
 
        /* Enable interrupt downlink for channel */
        if (port->port.count == 1) {
-               /* FIXME: is this needed now ? */
-               tty->low_latency = 1;
                tty->driver_data = port;
                tty_port_tty_set(&port->port, tty);
                DBG1("open: %d", port->token_dl);
index 1386625fc4caae4bdf6a39e6835d0026b0e27ca7..a2e67e6df3a1406697e3482c93689e314bfe0f12 100644 (file)
@@ -467,7 +467,6 @@ static unsigned int free_tbuf_count(struct slgt_info *info);
 static unsigned int tbuf_bytes(struct slgt_info *info);
 static void reset_tbufs(struct slgt_info *info);
 static void tdma_reset(struct slgt_info *info);
-static void tdma_start(struct slgt_info *info);
 static void tx_load(struct slgt_info *info, const char *buf, unsigned int count);
 
 static void get_signals(struct slgt_info *info);
@@ -795,6 +794,18 @@ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios)
        }
 }
 
+static void update_tx_timer(struct slgt_info *info)
+{
+       /*
+        * use worst case speed of 1200bps to calculate transmit timeout
+        * based on data in buffers (tbuf_bytes) and FIFO (128 bytes)
+        */
+       if (info->params.mode == MGSL_MODE_HDLC) {
+               int timeout  = (tbuf_bytes(info) * 7) + 1000;
+               mod_timer(&info->tx_timer, jiffies + msecs_to_jiffies(timeout));
+       }
+}
+
 static int write(struct tty_struct *tty,
                 const unsigned char *buf, int count)
 {
@@ -838,8 +849,18 @@ start:
                spin_lock_irqsave(&info->lock,flags);
                if (!info->tx_active)
                        tx_start(info);
-               else
-                       tdma_start(info);
+               else if (!(rd_reg32(info, TDCSR) & BIT0)) {
+                       /* transmit still active but transmit DMA stopped */
+                       unsigned int i = info->tbuf_current;
+                       if (!i)
+                               i = info->tbuf_count;
+                       i--;
+                       /* if DMA buf unsent must try later after tx idle */
+                       if (desc_count(info->tbufs[i]))
+                               ret = 0;
+               }
+               if (ret > 0)
+                       update_tx_timer(info);
                spin_unlock_irqrestore(&info->lock,flags);
        }
 
@@ -1502,10 +1523,9 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
        /* save start time for transmit timeout detection */
        dev->trans_start = jiffies;
 
-       /* start hardware transmitter if necessary */
        spin_lock_irqsave(&info->lock,flags);
-       if (!info->tx_active)
-               tx_start(info);
+       tx_start(info);
+       update_tx_timer(info);
        spin_unlock_irqrestore(&info->lock,flags);
 
        return 0;
@@ -3946,50 +3966,19 @@ static void tx_start(struct slgt_info *info)
                        slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE);
                        /* clear tx idle and underrun status bits */
                        wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
-                       if (info->params.mode == MGSL_MODE_HDLC)
-                               mod_timer(&info->tx_timer, jiffies +
-                                               msecs_to_jiffies(5000));
                } else {
                        slgt_irq_off(info, IRQ_TXDATA);
                        slgt_irq_on(info, IRQ_TXIDLE);
                        /* clear tx idle status bit */
                        wr_reg16(info, SSR, IRQ_TXIDLE);
                }
-               tdma_start(info);
+               /* set 1st descriptor address and start DMA */
+               wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
+               wr_reg32(info, TDCSR, BIT2 + BIT0);
                info->tx_active = true;
        }
 }
 
-/*
- * start transmit DMA if inactive and there are unsent buffers
- */
-static void tdma_start(struct slgt_info *info)
-{
-       unsigned int i;
-
-       if (rd_reg32(info, TDCSR) & BIT0)
-               return;
-
-       /* transmit DMA inactive, check for unsent buffers */
-       i = info->tbuf_start;
-       while (!desc_count(info->tbufs[i])) {
-               if (++i == info->tbuf_count)
-                       i = 0;
-               if (i == info->tbuf_current)
-                       return;
-       }
-       info->tbuf_start = i;
-
-       /* there are unsent buffers, start transmit DMA */
-
-       /* reset needed if previous error condition */
-       tdma_reset(info);
-
-       /* set 1st descriptor address */
-       wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
-       wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */
-}
-
 static void tx_stop(struct slgt_info *info)
 {
        unsigned short val;
@@ -5004,8 +4993,7 @@ static void tx_timeout(unsigned long context)
                info->icount.txtimeout++;
        }
        spin_lock_irqsave(&info->lock,flags);
-       info->tx_active = false;
-       info->tx_count = 0;
+       tx_stop(info);
        spin_unlock_irqrestore(&info->lock,flags);
 
 #if SYNCLINK_GENERIC_HDLC
index 62dadfc95e341078ef42a284366cef37dbac6432..4e862a75f7ff213bd62022e852a9d485b65adbbf 100644 (file)
@@ -193,7 +193,7 @@ int tty_port_block_til_ready(struct tty_port *port,
 {
        int do_clocal = 0, retval;
        unsigned long flags;
-       DECLARE_WAITQUEUE(wait, current);
+       DEFINE_WAIT(wait);
        int cd;
 
        /* block if port is in the process of being closed */
index 9aa9ea9822c85c0c714d21dfbeaefbc5a7b5428d..88dab52926f4ae73217d8f2c8ec55d37d74a0597 100644 (file)
@@ -432,23 +432,27 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
        list_splice_init(&txd->tx_list, &dc->free_list);
        list_move(&desc->desc_node, &dc->free_list);
 
-       /*
-        * We use dma_unmap_page() regardless of how the buffers were
-        * mapped before they were submitted...
-        */
        if (!ds) {
                dma_addr_t dmaaddr;
                if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
                        dmaaddr = is_dmac64(dc) ?
                                desc->hwdesc.DAR : desc->hwdesc32.DAR;
-                       dma_unmap_page(chan2parent(&dc->chan), dmaaddr,
-                                      desc->len, DMA_FROM_DEVICE);
+                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+                               dma_unmap_single(chan2parent(&dc->chan),
+                                       dmaaddr, desc->len, DMA_FROM_DEVICE);
+                       else
+                               dma_unmap_page(chan2parent(&dc->chan),
+                                       dmaaddr, desc->len, DMA_FROM_DEVICE);
                }
                if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
                        dmaaddr = is_dmac64(dc) ?
                                desc->hwdesc.SAR : desc->hwdesc32.SAR;
-                       dma_unmap_page(chan2parent(&dc->chan), dmaaddr,
-                                      desc->len, DMA_TO_DEVICE);
+                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+                               dma_unmap_single(chan2parent(&dc->chan),
+                                       dmaaddr, desc->len, DMA_TO_DEVICE);
+                       else
+                               dma_unmap_page(chan2parent(&dc->chan),
+                                       dmaaddr, desc->len, DMA_TO_DEVICE);
                }
        }
 
index dc425e74a268e0c59d9f008cb3d791e1b83c8751..e4b4e8898e39a20a451cd97a5b57b13d9e63561b 100644 (file)
@@ -419,7 +419,7 @@ void intel_opregion_free(struct drm_device *dev, int suspend)
                return;
 
        if (!suspend)
-               acpi_video_exit();
+               acpi_video_unregister();
 
        opregion->acpi->drdy = 0;
 
index 3c259ee7ddda01d61b9462dae9d9d9bfc7625178..aa87b6a3bbef0b1f4b7e086ebc55b3fd638ef885 100644 (file)
@@ -326,6 +326,15 @@ config I2C_DAVINCI
          devices such as DaVinci NIC.
          For details please see http://www.ti.com/davinci
 
+config I2C_DESIGNWARE
+       tristate "Synopsys DesignWare"
+       help
+         If you say yes to this option, support will be included for the
+         Synopsys DesignWare I2C adapter. Only master mode is supported.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-designware.
+
 config I2C_GPIO
        tristate "GPIO-based bitbanging I2C"
        depends on GENERIC_GPIO
index edeabf00310600830caa27dd96e88e038762f078..e654263bfc01df6442f52b5c39df95589203b698 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_I2C_AU1550)      += i2c-au1550.o
 obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o
 obj-$(CONFIG_I2C_CPM)          += i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)      += i2c-davinci.o
+obj-$(CONFIG_I2C_DESIGNWARE)   += i2c-designware.o
 obj-$(CONFIG_I2C_GPIO)         += i2c-gpio.o
 obj-$(CONFIG_I2C_HIGHLANDER)   += i2c-highlander.o
 obj-$(CONFIG_I2C_IBM_IIC)      += i2c-ibm_iic.o
diff --git a/drivers/i2c/busses/i2c-designware.c b/drivers/i2c/busses/i2c-designware.c
new file mode 100644 (file)
index 0000000..b444762
--- /dev/null
@@ -0,0 +1,624 @@
+/*
+ * Synopsys Designware I2C adapter driver (master only).
+ *
+ * Based on the TI DAVINCI I2C adapter driver.
+ *
+ * Copyright (C) 2006 Texas Instruments.
+ * Copyright (C) 2007 MontaVista Software Inc.
+ * Copyright (C) 2009 Provigent Ltd.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * ----------------------------------------------------------------------------
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+
+/*
+ * Registers offset
+ */
+#define DW_IC_CON              0x0
+#define DW_IC_TAR              0x4
+#define DW_IC_DATA_CMD         0x10
+#define DW_IC_SS_SCL_HCNT      0x14
+#define DW_IC_SS_SCL_LCNT      0x18
+#define DW_IC_FS_SCL_HCNT      0x1c
+#define DW_IC_FS_SCL_LCNT      0x20
+#define DW_IC_INTR_STAT                0x2c
+#define DW_IC_INTR_MASK                0x30
+#define DW_IC_CLR_INTR         0x40
+#define DW_IC_ENABLE           0x6c
+#define DW_IC_STATUS           0x70
+#define DW_IC_TXFLR            0x74
+#define DW_IC_RXFLR            0x78
+#define DW_IC_COMP_PARAM_1     0xf4
+#define DW_IC_TX_ABRT_SOURCE   0x80
+
+#define DW_IC_CON_MASTER               0x1
+#define DW_IC_CON_SPEED_STD            0x2
+#define DW_IC_CON_SPEED_FAST           0x4
+#define DW_IC_CON_10BITADDR_MASTER     0x10
+#define DW_IC_CON_RESTART_EN           0x20
+#define DW_IC_CON_SLAVE_DISABLE                0x40
+
+#define DW_IC_INTR_TX_EMPTY    0x10
+#define DW_IC_INTR_TX_ABRT     0x40
+#define DW_IC_INTR_STOP_DET    0x200
+
+#define DW_IC_STATUS_ACTIVITY  0x1
+
+#define DW_IC_ERR_TX_ABRT      0x1
+
+/*
+ * status codes
+ */
+#define STATUS_IDLE                    0x0
+#define STATUS_WRITE_IN_PROGRESS       0x1
+#define STATUS_READ_IN_PROGRESS                0x2
+
+#define TIMEOUT                        20 /* ms */
+
+/*
+ * hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
+ *
+ * only expected abort codes are listed here
+ * refer to the datasheet for the full list
+ */
+#define ABRT_7B_ADDR_NOACK     0
+#define ABRT_10ADDR1_NOACK     1
+#define ABRT_10ADDR2_NOACK     2
+#define ABRT_TXDATA_NOACK      3
+#define ABRT_GCALL_NOACK       4
+#define ABRT_GCALL_READ                5
+#define ABRT_SBYTE_ACKDET      7
+#define ABRT_SBYTE_NORSTRT     9
+#define ABRT_10B_RD_NORSTRT    10
+#define ARB_MASTER_DIS         11
+#define ARB_LOST               12
+
+static char *abort_sources[] = {
+       [ABRT_7B_ADDR_NOACK]    =
+               "slave address not acknowledged (7bit mode)",
+       [ABRT_10ADDR1_NOACK]    =
+               "first address byte not acknowledged (10bit mode)",
+       [ABRT_10ADDR2_NOACK]    =
+               "second address byte not acknowledged (10bit mode)",
+       [ABRT_TXDATA_NOACK]             =
+               "data not acknowledged",
+       [ABRT_GCALL_NOACK]              =
+               "no acknowledgement for a general call",
+       [ABRT_GCALL_READ]               =
+               "read after general call",
+       [ABRT_SBYTE_ACKDET]             =
+               "start byte acknowledged",
+       [ABRT_SBYTE_NORSTRT]    =
+               "trying to send start byte when restart is disabled",
+       [ABRT_10B_RD_NORSTRT]   =
+               "trying to read when restart is disabled (10bit mode)",
+       [ARB_MASTER_DIS]                =
+               "trying to use disabled adapter",
+       [ARB_LOST]                      =
+               "lost arbitration",
+};
+
+/**
+ * struct dw_i2c_dev - private i2c-designware data
+ * @dev: driver model device node
+ * @base: IO registers pointer
+ * @cmd_complete: tx completion indicator
+ * @pump_msg: continue in progress transfers
+ * @lock: protect this struct and IO registers
+ * @clk: input reference clock
+ * @cmd_err: run time hadware error code
+ * @msgs: points to an array of messages currently being transfered
+ * @msgs_num: the number of elements in msgs
+ * @msg_write_idx: the element index of the current tx message in the msgs
+ *     array
+ * @tx_buf_len: the length of the current tx buffer
+ * @tx_buf: the current tx buffer
+ * @msg_read_idx: the element index of the current rx message in the msgs
+ *     array
+ * @rx_buf_len: the length of the current rx buffer
+ * @rx_buf: the current rx buffer
+ * @msg_err: error status of the current transfer
+ * @status: i2c master status, one of STATUS_*
+ * @abort_source: copy of the TX_ABRT_SOURCE register
+ * @irq: interrupt number for the i2c master
+ * @adapter: i2c subsystem adapter node
+ * @tx_fifo_depth: depth of the hardware tx fifo
+ * @rx_fifo_depth: depth of the hardware rx fifo
+ */
+struct dw_i2c_dev {
+       struct device           *dev;
+       void __iomem            *base;
+       struct completion       cmd_complete;
+       struct tasklet_struct   pump_msg;
+       struct mutex            lock;
+       struct clk              *clk;
+       int                     cmd_err;
+       struct i2c_msg          *msgs;
+       int                     msgs_num;
+       int                     msg_write_idx;
+       u16                     tx_buf_len;
+       u8                      *tx_buf;
+       int                     msg_read_idx;
+       u16                     rx_buf_len;
+       u8                      *rx_buf;
+       int                     msg_err;
+       unsigned int            status;
+       u16                     abort_source;
+       int                     irq;
+       struct i2c_adapter      adapter;
+       unsigned int            tx_fifo_depth;
+       unsigned int            rx_fifo_depth;
+};
+
+/**
+ * i2c_dw_init() - initialize the designware i2c master hardware
+ * @dev: device private data
+ *
+ * This functions configures and enables the I2C master.
+ * This function is called during I2C init function, and in case of timeout at
+ * run time.
+ */
+static void i2c_dw_init(struct dw_i2c_dev *dev)
+{
+       u32 input_clock_khz = clk_get_rate(dev->clk) / 1000;
+       u16 ic_con;
+
+       /* Disable the adapter */
+       writeb(0, dev->base + DW_IC_ENABLE);
+
+       /* set standard and fast speed deviders for high/low periods */
+       writew((input_clock_khz * 40 / 10000)+1, /* std speed high, 4us */
+                       dev->base + DW_IC_SS_SCL_HCNT);
+       writew((input_clock_khz * 47 / 10000)+1, /* std speed low, 4.7us */
+                       dev->base + DW_IC_SS_SCL_LCNT);
+       writew((input_clock_khz *  6 / 10000)+1, /* fast speed high, 0.6us */
+                       dev->base + DW_IC_FS_SCL_HCNT);
+       writew((input_clock_khz * 13 / 10000)+1, /* fast speed low, 1.3us */
+                       dev->base + DW_IC_FS_SCL_LCNT);
+
+       /* configure the i2c master */
+       ic_con = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
+               DW_IC_CON_RESTART_EN | DW_IC_CON_SPEED_FAST;
+       writew(ic_con, dev->base + DW_IC_CON);
+}
+
+/*
+ * Waiting for bus not busy
+ */
+static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
+{
+       int timeout = TIMEOUT;
+
+       while (readb(dev->base + DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY) {
+               if (timeout <= 0) {
+                       dev_warn(dev->dev, "timeout waiting for bus ready\n");
+                       return -ETIMEDOUT;
+               }
+               timeout--;
+               mdelay(1);
+       }
+
+       return 0;
+}
+
+/*
+ * Initiate low level master read/write transaction.
+ * This function is called from i2c_dw_xfer when starting a transfer.
+ * This function is also called from dw_i2c_pump_msg to continue a transfer
+ * that is longer than the size of the TX FIFO.
+ */
+static void
+i2c_dw_xfer_msg(struct i2c_adapter *adap)
+{
+       struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+       struct i2c_msg *msgs = dev->msgs;
+       int num = dev->msgs_num;
+       u16 ic_con, intr_mask;
+       int tx_limit = dev->tx_fifo_depth - readb(dev->base + DW_IC_TXFLR);
+       int rx_limit = dev->rx_fifo_depth - readb(dev->base + DW_IC_RXFLR);
+       u16 addr = msgs[dev->msg_write_idx].addr;
+       u16 buf_len = dev->tx_buf_len;
+
+       if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
+               /* Disable the adapter */
+               writeb(0, dev->base + DW_IC_ENABLE);
+
+               /* set the slave (target) address */
+               writew(msgs[dev->msg_write_idx].addr, dev->base + DW_IC_TAR);
+
+               /* if the slave address is ten bit address, enable 10BITADDR */
+               ic_con = readw(dev->base + DW_IC_CON);
+               if (msgs[dev->msg_write_idx].flags & I2C_M_TEN)
+                       ic_con |= DW_IC_CON_10BITADDR_MASTER;
+               else
+                       ic_con &= ~DW_IC_CON_10BITADDR_MASTER;
+               writew(ic_con, dev->base + DW_IC_CON);
+
+               /* Enable the adapter */
+               writeb(1, dev->base + DW_IC_ENABLE);
+       }
+
+       for (; dev->msg_write_idx < num; dev->msg_write_idx++) {
+               /* if target address has changed, we need to
+                * reprogram the target address in the i2c
+                * adapter when we are done with this transfer
+                */
+               if (msgs[dev->msg_write_idx].addr != addr)
+                       return;
+
+               if (msgs[dev->msg_write_idx].len == 0) {
+                       dev_err(dev->dev,
+                               "%s: invalid message length\n", __func__);
+                       dev->msg_err = -EINVAL;
+                       return;
+               }
+
+               if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
+                       /* new i2c_msg */
+                       dev->tx_buf = msgs[dev->msg_write_idx].buf;
+                       buf_len = msgs[dev->msg_write_idx].len;
+               }
+
+               while (buf_len > 0 && tx_limit > 0 && rx_limit > 0) {
+                       if (msgs[dev->msg_write_idx].flags & I2C_M_RD) {
+                               writew(0x100, dev->base + DW_IC_DATA_CMD);
+                               rx_limit--;
+                       } else
+                               writew(*(dev->tx_buf++),
+                                               dev->base + DW_IC_DATA_CMD);
+                       tx_limit--; buf_len--;
+               }
+       }
+
+       intr_mask = DW_IC_INTR_STOP_DET | DW_IC_INTR_TX_ABRT;
+       if (buf_len > 0) { /* more bytes to be written */
+               intr_mask |= DW_IC_INTR_TX_EMPTY;
+               dev->status |= STATUS_WRITE_IN_PROGRESS;
+       } else
+               dev->status &= ~STATUS_WRITE_IN_PROGRESS;
+       writew(intr_mask, dev->base + DW_IC_INTR_MASK);
+
+       dev->tx_buf_len = buf_len;
+}
+
+static void
+i2c_dw_read(struct i2c_adapter *adap)
+{
+       struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+       struct i2c_msg *msgs = dev->msgs;
+       int num = dev->msgs_num;
+       u16 addr = msgs[dev->msg_read_idx].addr;
+       int rx_valid = readw(dev->base + DW_IC_RXFLR);
+
+       for (; dev->msg_read_idx < num; dev->msg_read_idx++) {
+               u16 len;
+               u8 *buf;
+
+               if (!(msgs[dev->msg_read_idx].flags & I2C_M_RD))
+                       continue;
+
+               /* different i2c client, reprogram the i2c adapter */
+               if (msgs[dev->msg_read_idx].addr != addr)
+                       return;
+
+               if (!(dev->status & STATUS_READ_IN_PROGRESS)) {
+                       len = msgs[dev->msg_read_idx].len;
+                       buf = msgs[dev->msg_read_idx].buf;
+               } else {
+                       len = dev->rx_buf_len;
+                       buf = dev->rx_buf;
+               }
+
+               for (; len > 0 && rx_valid > 0; len--, rx_valid--)
+                       *buf++ = readb(dev->base + DW_IC_DATA_CMD);
+
+               if (len > 0) {
+                       dev->status |= STATUS_READ_IN_PROGRESS;
+                       dev->rx_buf_len = len;
+                       dev->rx_buf = buf;
+                       return;
+               } else
+                       dev->status &= ~STATUS_READ_IN_PROGRESS;
+       }
+}
+
+/*
+ * Prepare controller for a transaction and call i2c_dw_xfer_msg
+ */
+static int
+i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+{
+       struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+       int ret;
+
+       dev_dbg(dev->dev, "%s: msgs: %d\n", __func__, num);
+
+       mutex_lock(&dev->lock);
+
+       INIT_COMPLETION(dev->cmd_complete);
+       dev->msgs = msgs;
+       dev->msgs_num = num;
+       dev->cmd_err = 0;
+       dev->msg_write_idx = 0;
+       dev->msg_read_idx = 0;
+       dev->msg_err = 0;
+       dev->status = STATUS_IDLE;
+
+       ret = i2c_dw_wait_bus_not_busy(dev);
+       if (ret < 0)
+               goto done;
+
+       /* start the transfers */
+       i2c_dw_xfer_msg(adap);
+
+       /* wait for tx to complete */
+       ret = wait_for_completion_interruptible_timeout(&dev->cmd_complete, HZ);
+       if (ret == 0) {
+               dev_err(dev->dev, "controller timed out\n");
+               i2c_dw_init(dev);
+               ret = -ETIMEDOUT;
+               goto done;
+       } else if (ret < 0)
+               goto done;
+
+       if (dev->msg_err) {
+               ret = dev->msg_err;
+               goto done;
+       }
+
+       /* no error */
+       if (likely(!dev->cmd_err)) {
+               /* read rx fifo, and disable the adapter */
+               do {
+                       i2c_dw_read(adap);
+               } while (dev->status & STATUS_READ_IN_PROGRESS);
+               writeb(0, dev->base + DW_IC_ENABLE);
+               ret = num;
+               goto done;
+       }
+
+       /* We have an error */
+       if (dev->cmd_err == DW_IC_ERR_TX_ABRT) {
+               unsigned long abort_source = dev->abort_source;
+               int i;
+
+               for_each_bit(i, &abort_source, ARRAY_SIZE(abort_sources)) {
+                   dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
+               }
+       }
+       ret = -EIO;
+
+done:
+       mutex_unlock(&dev->lock);
+
+       return ret;
+}
+
+static u32 i2c_dw_func(struct i2c_adapter *adap)
+{
+       return I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR;
+}
+
+static void dw_i2c_pump_msg(unsigned long data)
+{
+       struct dw_i2c_dev *dev = (struct dw_i2c_dev *) data;
+       u16 intr_mask;
+
+       i2c_dw_read(&dev->adapter);
+       i2c_dw_xfer_msg(&dev->adapter);
+
+       intr_mask = DW_IC_INTR_STOP_DET | DW_IC_INTR_TX_ABRT;
+       if (dev->status & STATUS_WRITE_IN_PROGRESS)
+               intr_mask |= DW_IC_INTR_TX_EMPTY;
+       writew(intr_mask, dev->base + DW_IC_INTR_MASK);
+}
+
+/*
+ * Interrupt service routine. This gets called whenever an I2C interrupt
+ * occurs.
+ */
+static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
+{
+       struct dw_i2c_dev *dev = dev_id;
+       u16 stat;
+
+       stat = readw(dev->base + DW_IC_INTR_STAT);
+       dev_dbg(dev->dev, "%s: stat=0x%x\n", __func__, stat);
+       if (stat & DW_IC_INTR_TX_ABRT) {
+               dev->abort_source = readw(dev->base + DW_IC_TX_ABRT_SOURCE);
+               dev->cmd_err |= DW_IC_ERR_TX_ABRT;
+               dev->status = STATUS_IDLE;
+       } else if (stat & DW_IC_INTR_TX_EMPTY)
+               tasklet_schedule(&dev->pump_msg);
+
+       readb(dev->base + DW_IC_CLR_INTR);      /* clear interrupts */
+       writew(0, dev->base + DW_IC_INTR_MASK); /* disable interrupts */
+       if (stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET))
+               complete(&dev->cmd_complete);
+
+       return IRQ_HANDLED;
+}
+
+static struct i2c_algorithm i2c_dw_algo = {
+       .master_xfer    = i2c_dw_xfer,
+       .functionality  = i2c_dw_func,
+};
+
+static int __devinit dw_i2c_probe(struct platform_device *pdev)
+{
+       struct dw_i2c_dev *dev;
+       struct i2c_adapter *adap;
+       struct resource *mem, *irq, *ioarea;
+       int r;
+
+       /* NOTE: driver uses the static register mapping */
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!mem) {
+               dev_err(&pdev->dev, "no mem resource?\n");
+               return -EINVAL;
+       }
+
+       irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (!irq) {
+               dev_err(&pdev->dev, "no irq resource?\n");
+               return -EINVAL;
+       }
+
+       ioarea = request_mem_region(mem->start, resource_size(mem),
+                       pdev->name);
+       if (!ioarea) {
+               dev_err(&pdev->dev, "I2C region already claimed\n");
+               return -EBUSY;
+       }
+
+       dev = kzalloc(sizeof(struct dw_i2c_dev), GFP_KERNEL);
+       if (!dev) {
+               r = -ENOMEM;
+               goto err_release_region;
+       }
+
+       init_completion(&dev->cmd_complete);
+       tasklet_init(&dev->pump_msg, dw_i2c_pump_msg, (unsigned long) dev);
+       mutex_init(&dev->lock);
+       dev->dev = get_device(&pdev->dev);
+       dev->irq = irq->start;
+       platform_set_drvdata(pdev, dev);
+
+       dev->clk = clk_get(&pdev->dev, NULL);
+       if (IS_ERR(dev->clk)) {
+               r = -ENODEV;
+               goto err_free_mem;
+       }
+       clk_enable(dev->clk);
+
+       dev->base = ioremap(mem->start, resource_size(mem));
+       if (dev->base == NULL) {
+               dev_err(&pdev->dev, "failure mapping io resources\n");
+               r = -EBUSY;
+               goto err_unuse_clocks;
+       }
+       {
+               u32 param1 = readl(dev->base + DW_IC_COMP_PARAM_1);
+
+               dev->tx_fifo_depth = ((param1 >> 16) & 0xff) + 1;
+               dev->rx_fifo_depth = ((param1 >> 8)  & 0xff) + 1;
+       }
+       i2c_dw_init(dev);
+
+       writew(0, dev->base + DW_IC_INTR_MASK); /* disable IRQ */
+       r = request_irq(dev->irq, i2c_dw_isr, 0, pdev->name, dev);
+       if (r) {
+               dev_err(&pdev->dev, "failure requesting irq %i\n", dev->irq);
+               goto err_iounmap;
+       }
+
+       adap = &dev->adapter;
+       i2c_set_adapdata(adap, dev);
+       adap->owner = THIS_MODULE;
+       adap->class = I2C_CLASS_HWMON;
+       strlcpy(adap->name, "Synopsys DesignWare I2C adapter",
+                       sizeof(adap->name));
+       adap->algo = &i2c_dw_algo;
+       adap->dev.parent = &pdev->dev;
+
+       adap->nr = pdev->id;
+       r = i2c_add_numbered_adapter(adap);
+       if (r) {
+               dev_err(&pdev->dev, "failure adding adapter\n");
+               goto err_free_irq;
+       }
+
+       return 0;
+
+err_free_irq:
+       free_irq(dev->irq, dev);
+err_iounmap:
+       iounmap(dev->base);
+err_unuse_clocks:
+       clk_disable(dev->clk);
+       clk_put(dev->clk);
+       dev->clk = NULL;
+err_free_mem:
+       platform_set_drvdata(pdev, NULL);
+       put_device(&pdev->dev);
+       kfree(dev);
+err_release_region:
+       release_mem_region(mem->start, resource_size(mem));
+
+       return r;
+}
+
+static int __devexit dw_i2c_remove(struct platform_device *pdev)
+{
+       struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+       struct resource *mem;
+
+       platform_set_drvdata(pdev, NULL);
+       i2c_del_adapter(&dev->adapter);
+       put_device(&pdev->dev);
+
+       clk_disable(dev->clk);
+       clk_put(dev->clk);
+       dev->clk = NULL;
+
+       writeb(0, dev->base + DW_IC_ENABLE);
+       free_irq(dev->irq, dev);
+       kfree(dev);
+
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       release_mem_region(mem->start, resource_size(mem));
+       return 0;
+}
+
+/* work with hotplug and coldplug */
+MODULE_ALIAS("platform:i2c_designware");
+
+static struct platform_driver dw_i2c_driver = {
+       .remove         = __devexit_p(dw_i2c_remove),
+       .driver         = {
+               .name   = "i2c_designware",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init dw_i2c_init_driver(void)
+{
+       return platform_driver_probe(&dw_i2c_driver, dw_i2c_probe);
+}
+module_init(dw_i2c_init_driver);
+
+static void __exit dw_i2c_exit_driver(void)
+{
+       platform_driver_unregister(&dw_i2c_driver);
+}
+module_exit(dw_i2c_exit_driver);
+
+MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>");
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter");
+MODULE_LICENSE("GPL");
index 36e0675be9f72fe5793687c85c3ed0d7db67efae..020f9573fd82011babb4ad666a2966a44d088aba 100644 (file)
@@ -231,6 +231,17 @@ config DM_MIRROR
          Allow volume managers to mirror logical volumes, also
          needed for live data migration tools such as 'pvmove'.
 
+config DM_LOG_USERSPACE
+       tristate "Mirror userspace logging (EXPERIMENTAL)"
+       depends on DM_MIRROR && EXPERIMENTAL && NET
+       select CONNECTOR
+       ---help---
+         The userspace logging module provides a mechanism for
+         relaying the dm-dirty-log API to userspace.  Log designs
+         which are more suited to userspace implementation (e.g.
+         shared storage logs) or experimental logs can be implemented
+         by leveraging this framework.
+
 config DM_ZERO
        tristate "Zero target"
        depends on BLK_DEV_DM
@@ -249,6 +260,25 @@ config DM_MULTIPATH
        ---help---
          Allow volume managers to support multipath hardware.
 
+config DM_MULTIPATH_QL
+       tristate "I/O Path Selector based on the number of in-flight I/Os"
+       depends on DM_MULTIPATH
+       ---help---
+         This path selector is a dynamic load balancer which selects
+         the path with the least number of in-flight I/Os.
+
+         If unsure, say N.
+
+config DM_MULTIPATH_ST
+       tristate "I/O Path Selector based on the service time"
+       depends on DM_MULTIPATH
+       ---help---
+         This path selector is a dynamic load balancer which selects
+         the path expected to complete the incoming I/O in the shortest
+         time.
+
+         If unsure, say N.
+
 config DM_DELAY
        tristate "I/O delaying target (EXPERIMENTAL)"
        depends on BLK_DEV_DM && EXPERIMENTAL
index 45cc5951d9287030df73363a20659482aa55414d..1dc4185bd781f3137b86e3f29e53c2dd3de5e745 100644 (file)
@@ -8,6 +8,8 @@ dm-multipath-y  += dm-path-selector.o dm-mpath.o
 dm-snapshot-y  += dm-snap.o dm-exception-store.o dm-snap-transient.o \
                    dm-snap-persistent.o
 dm-mirror-y    += dm-raid1.o
+dm-log-userspace-y \
+               += dm-log-userspace-base.o dm-log-userspace-transfer.o
 md-mod-y       += md.o bitmap.o
 raid456-y      += raid5.o
 raid6_pq-y     += raid6algos.o raid6recov.o raid6tables.o \
@@ -36,8 +38,11 @@ obj-$(CONFIG_BLK_DEV_DM)     += dm-mod.o
 obj-$(CONFIG_DM_CRYPT)         += dm-crypt.o
 obj-$(CONFIG_DM_DELAY)         += dm-delay.o
 obj-$(CONFIG_DM_MULTIPATH)     += dm-multipath.o dm-round-robin.o
+obj-$(CONFIG_DM_MULTIPATH_QL)  += dm-queue-length.o
+obj-$(CONFIG_DM_MULTIPATH_ST)  += dm-service-time.o
 obj-$(CONFIG_DM_SNAPSHOT)      += dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)                += dm-mirror.o dm-log.o dm-region-hash.o
+obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
 obj-$(CONFIG_DM_ZERO)          += dm-zero.o
 
 quiet_cmd_unroll = UNROLL  $@
index 53394e863c749db9444ab4ae4780b06a61afb8f1..9933eb861c7191b34fe43b9ac01deb5e44bd614f 100644 (file)
@@ -1132,6 +1132,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad_crypt_queue;
        }
 
+       ti->num_flush_requests = 1;
        ti->private = cc;
        return 0;
 
@@ -1189,6 +1190,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
                     union map_info *map_context)
 {
        struct dm_crypt_io *io;
+       struct crypt_config *cc;
+
+       if (unlikely(bio_empty_barrier(bio))) {
+               cc = ti->private;
+               bio->bi_bdev = cc->dev->bdev;
+               return DM_MAPIO_REMAPPED;
+       }
 
        io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin);
 
@@ -1305,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
        return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
+static int crypt_iterate_devices(struct dm_target *ti,
+                                iterate_devices_callout_fn fn, void *data)
+{
+       struct crypt_config *cc = ti->private;
+
+       return fn(ti, cc->dev, cc->start, data);
+}
+
 static struct target_type crypt_target = {
        .name   = "crypt",
-       .version= {1, 6, 0},
+       .version = {1, 7, 0},
        .module = THIS_MODULE,
        .ctr    = crypt_ctr,
        .dtr    = crypt_dtr,
@@ -1318,6 +1334,7 @@ static struct target_type crypt_target = {
        .resume = crypt_resume,
        .message = crypt_message,
        .merge  = crypt_merge,
+       .iterate_devices = crypt_iterate_devices,
 };
 
 static int __init dm_crypt_init(void)
index 559dbb52bc851039702692e201a00a700e402187..4e5b843cd4d77fb388920991eefd5540e2399aef 100644 (file)
@@ -197,6 +197,7 @@ out:
        mutex_init(&dc->timer_lock);
        atomic_set(&dc->may_delay, 1);
 
+       ti->num_flush_requests = 1;
        ti->private = dc;
        return 0;
 
@@ -278,8 +279,9 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
 
        if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
                bio->bi_bdev = dc->dev_write->bdev;
-               bio->bi_sector = dc->start_write +
-                                (bio->bi_sector - ti->begin);
+               if (bio_sectors(bio))
+                       bio->bi_sector = dc->start_write +
+                                        (bio->bi_sector - ti->begin);
 
                return delay_bio(dc, dc->write_delay, bio);
        }
@@ -316,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type,
        return 0;
 }
 
+static int delay_iterate_devices(struct dm_target *ti,
+                                iterate_devices_callout_fn fn, void *data)
+{
+       struct delay_c *dc = ti->private;
+       int ret = 0;
+
+       ret = fn(ti, dc->dev_read, dc->start_read, data);
+       if (ret)
+               goto out;
+
+       if (dc->dev_write)
+               ret = fn(ti, dc->dev_write, dc->start_write, data);
+
+out:
+       return ret;
+}
+
 static struct target_type delay_target = {
        .name        = "delay",
-       .version     = {1, 0, 2},
+       .version     = {1, 1, 0},
        .module      = THIS_MODULE,
        .ctr         = delay_ctr,
        .dtr         = delay_dtr,
@@ -326,6 +345,7 @@ static struct target_type delay_target = {
        .presuspend  = delay_presuspend,
        .resume      = delay_resume,
        .status      = delay_status,
+       .iterate_devices = delay_iterate_devices,
 };
 
 static int __init dm_delay_init(void)
index 75d8081a904175bcff7d3199c2deba4140fc6086..c3ae51584b12f52fc1de3434965329c43a930363 100644 (file)
@@ -216,7 +216,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
                return -EINVAL;
        }
 
-       type = get_type(argv[1]);
+       type = get_type(&persistent);
        if (!type) {
                ti->error = "Exception store type not recognised";
                r = -EINVAL;
index c92701dc50016ff2f1eb87f40a76b8a8dce1b208..2442c8c0789808607089722a2157ca94f4176c92 100644 (file)
@@ -156,7 +156,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
  */
 static inline sector_t get_dev_size(struct block_device *bdev)
 {
-       return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+       return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
 }
 
 static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
index e73aabd61cd78abdbc63996704c177e6aea399d1..3a2e6a2f8bdd336f863807108c9ca3ebbfbba917 100644 (file)
@@ -22,6 +22,7 @@ struct dm_io_client {
 /* FIXME: can we shrink this ? */
 struct io {
        unsigned long error_bits;
+       unsigned long eopnotsupp_bits;
        atomic_t count;
        struct task_struct *sleeper;
        struct dm_io_client *client;
@@ -107,8 +108,11 @@ static inline unsigned bio_get_region(struct bio *bio)
  *---------------------------------------------------------------*/
 static void dec_count(struct io *io, unsigned int region, int error)
 {
-       if (error)
+       if (error) {
                set_bit(region, &io->error_bits);
+               if (error == -EOPNOTSUPP)
+                       set_bit(region, &io->eopnotsupp_bits);
+       }
 
        if (atomic_dec_and_test(&io->count)) {
                if (io->sleeper)
@@ -360,7 +364,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
                return -EIO;
        }
 
+retry:
        io.error_bits = 0;
+       io.eopnotsupp_bits = 0;
        atomic_set(&io.count, 1); /* see dispatch_io() */
        io.sleeper = current;
        io.client = client;
@@ -377,6 +383,11 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
        }
        set_current_state(TASK_RUNNING);
 
+       if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
+               rw &= ~(1 << BIO_RW_BARRIER);
+               goto retry;
+       }
+
        if (error_bits)
                *error_bits = io.error_bits;
 
@@ -397,6 +408,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
 
        io = mempool_alloc(client->pool, GFP_NOIO);
        io->error_bits = 0;
+       io->eopnotsupp_bits = 0;
        atomic_set(&io->count, 1); /* see dispatch_io() */
        io->sleeper = NULL;
        io->client = client;
index 1128d3fba797f38a89f39849eea012bc9daa22d1..7f77f18fcafa662e07139600716865780c1afcdc 100644 (file)
@@ -276,7 +276,7 @@ retry:
        up_write(&_hash_lock);
 }
 
-static int dm_hash_rename(const char *old, const char *new)
+static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
 {
        char *new_name, *old_name;
        struct hash_cell *hc;
@@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new)
                dm_table_put(table);
        }
 
-       dm_kobject_uevent(hc->md);
+       dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
 
        dm_put(hc->md);
        up_write(&_hash_lock);
@@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
 
        __hash_remove(hc);
        up_write(&_hash_lock);
+
+       dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
+
        dm_put(md);
        param->data_size = 0;
        return 0;
@@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
                return r;
 
        param->data_size = 0;
-       return dm_hash_rename(param->name, new_name);
+       return dm_hash_rename(param->event_nr, param->name, new_name);
 }
 
 static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param)
        if (dm_suspended(md))
                r = dm_resume(md);
 
-       if (!r)
+
+       if (!r) {
+               dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
                r = __dev_status(md, param);
+       }
 
        dm_put(md);
        return r;
@@ -1044,6 +1050,12 @@ static int populate_table(struct dm_table *table,
                next = spec->next;
        }
 
+       r = dm_table_set_type(table);
+       if (r) {
+               DMWARN("unable to set table type");
+               return r;
+       }
+
        return dm_table_complete(table);
 }
 
@@ -1089,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
                goto out;
        }
 
+       r = dm_table_alloc_md_mempools(t);
+       if (r) {
+               DMWARN("unable to allocate mempools for this table");
+               dm_table_destroy(t);
+               goto out;
+       }
+
        down_write(&_hash_lock);
        hc = dm_get_mdptr(md);
        if (!hc || hc->md != md) {
index 79fb53e51c709e2875382c517bc61413e50fd978..9184b6deb8685dc3660e21dd438f6aa8fcd200b3 100644 (file)
@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad;
        }
 
+       ti->num_flush_requests = 1;
        ti->private = lc;
        return 0;
 
@@ -81,7 +82,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
        struct linear_c *lc = ti->private;
 
        bio->bi_bdev = lc->dev->bdev;
-       bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
+       if (bio_sectors(bio))
+               bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
 }
 
 static int linear_map(struct dm_target *ti, struct bio *bio,
@@ -132,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
        return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
+static int linear_iterate_devices(struct dm_target *ti,
+                                 iterate_devices_callout_fn fn, void *data)
+{
+       struct linear_c *lc = ti->private;
+
+       return fn(ti, lc->dev, lc->start, data);
+}
+
 static struct target_type linear_target = {
        .name   = "linear",
-       .version= {1, 0, 3},
+       .version = {1, 1, 0},
        .module = THIS_MODULE,
        .ctr    = linear_ctr,
        .dtr    = linear_dtr,
@@ -142,6 +152,7 @@ static struct target_type linear_target = {
        .status = linear_status,
        .ioctl  = linear_ioctl,
        .merge  = linear_merge,
+       .iterate_devices = linear_iterate_devices,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
new file mode 100644 (file)
index 0000000..e69b965
--- /dev/null
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include <linux/bio.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-log-userspace.h>
+
+#include "dm-log-userspace-transfer.h"
+
+struct flush_entry {
+       int type;
+       region_t region;
+       struct list_head list;
+};
+
+struct log_c {
+       struct dm_target *ti;
+       uint32_t region_size;
+       region_t region_count;
+       char uuid[DM_UUID_LEN];
+
+       char *usr_argv_str;
+       uint32_t usr_argc;
+
+       /*
+        * in_sync_hint gets set when doing is_remote_recovering.  It
+        * represents the first region that needs recovery.  IOW, the
+        * first zero bit of sync_bits.  This can be useful for to limit
+        * traffic for calls like is_remote_recovering and get_resync_work,
+        * but be take care in its use for anything else.
+        */
+       uint64_t in_sync_hint;
+
+       spinlock_t flush_lock;
+       struct list_head flush_list;  /* only for clear and mark requests */
+};
+
+static mempool_t *flush_entry_pool;
+
+static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
+{
+       return kmalloc(sizeof(struct flush_entry), gfp_mask);
+}
+
+static void flush_entry_free(void *element, void *pool_data)
+{
+       kfree(element);
+}
+
+static int userspace_do_request(struct log_c *lc, const char *uuid,
+                               int request_type, char *data, size_t data_size,
+                               char *rdata, size_t *rdata_size)
+{
+       int r;
+
+       /*
+        * If the server isn't there, -ESRCH is returned,
+        * and we must keep trying until the server is
+        * restored.
+        */
+retry:
+       r = dm_consult_userspace(uuid, request_type, data,
+                                data_size, rdata, rdata_size);
+
+       if (r != -ESRCH)
+               return r;
+
+       DMERR(" Userspace log server not found.");
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(2*HZ);
+               DMWARN("Attempting to contact userspace log server...");
+               r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+                                        strlen(lc->usr_argv_str) + 1,
+                                        NULL, NULL);
+               if (!r)
+                       break;
+       }
+       DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
+       r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+                                0, NULL, NULL);
+       if (!r)
+               goto retry;
+
+       DMERR("Error trying to resume userspace log: %d", r);
+
+       return -ESRCH;
+}
+
+static int build_constructor_string(struct dm_target *ti,
+                                   unsigned argc, char **argv,
+                                   char **ctr_str)
+{
+       int i, str_size;
+       char *str = NULL;
+
+       *ctr_str = NULL;
+
+       for (i = 0, str_size = 0; i < argc; i++)
+               str_size += strlen(argv[i]) + 1; /* +1 for space between args */
+
+       str_size += 20; /* Max number of chars in a printed u64 number */
+
+       str = kzalloc(str_size, GFP_KERNEL);
+       if (!str) {
+               DMWARN("Unable to allocate memory for constructor string");
+               return -ENOMEM;
+       }
+
+       for (i = 0, str_size = 0; i < argc; i++)
+               str_size += sprintf(str + str_size, "%s ", argv[i]);
+       str_size += sprintf(str + str_size, "%llu",
+                           (unsigned long long)ti->len);
+
+       *ctr_str = str;
+       return str_size;
+}
+
+/*
+ * userspace_ctr
+ *
+ * argv contains:
+ *     <UUID> <other args>
+ * Where 'other args' is the userspace implementation specific log
+ * arguments.  An example might be:
+ *     <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
+ *
+ * So, this module will strip off the <UUID> for identification purposes
+ * when communicating with userspace about a log; but will pass on everything
+ * else.
+ */
+static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
+                        unsigned argc, char **argv)
+{
+       int r = 0;
+       int str_size;
+       char *ctr_str = NULL;
+       struct log_c *lc = NULL;
+       uint64_t rdata;
+       size_t rdata_size = sizeof(rdata);
+
+       if (argc < 3) {
+               DMWARN("Too few arguments to userspace dirty log");
+               return -EINVAL;
+       }
+
+       lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+       if (!lc) {
+               DMWARN("Unable to allocate userspace log context.");
+               return -ENOMEM;
+       }
+
+       lc->ti = ti;
+
+       if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
+               DMWARN("UUID argument too long.");
+               kfree(lc);
+               return -EINVAL;
+       }
+
+       strncpy(lc->uuid, argv[0], DM_UUID_LEN);
+       spin_lock_init(&lc->flush_lock);
+       INIT_LIST_HEAD(&lc->flush_list);
+
+       str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
+       if (str_size < 0) {
+               kfree(lc);
+               return str_size;
+       }
+
+       /* Send table string */
+       r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+                                ctr_str, str_size, NULL, NULL);
+
+       if (r == -ESRCH) {
+               DMERR("Userspace log server not found");
+               goto out;
+       }
+
+       /* Since the region size does not change, get it now */
+       rdata_size = sizeof(rdata);
+       r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+                                NULL, 0, (char *)&rdata, &rdata_size);
+
+       if (r) {
+               DMERR("Failed to get region size of dirty log");
+               goto out;
+       }
+
+       lc->region_size = (uint32_t)rdata;
+       lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
+
+out:
+       if (r) {
+               kfree(lc);
+               kfree(ctr_str);
+       } else {
+               lc->usr_argv_str = ctr_str;
+               lc->usr_argc = argc;
+               log->context = lc;
+       }
+
+       return r;
+}
+
+static void userspace_dtr(struct dm_dirty_log *log)
+{
+       int r;
+       struct log_c *lc = log->context;
+
+       r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+                                NULL, 0,
+                                NULL, NULL);
+
+       kfree(lc->usr_argv_str);
+       kfree(lc);
+
+       return;
+}
+
+static int userspace_presuspend(struct dm_dirty_log *log)
+{
+       int r;
+       struct log_c *lc = log->context;
+
+       r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+                                NULL, 0,
+                                NULL, NULL);
+
+       return r;
+}
+
+static int userspace_postsuspend(struct dm_dirty_log *log)
+{
+       int r;
+       struct log_c *lc = log->context;
+
+       r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+                                NULL, 0,
+                                NULL, NULL);
+
+       return r;
+}
+
+static int userspace_resume(struct dm_dirty_log *log)
+{
+       int r;
+       struct log_c *lc = log->context;
+
+       lc->in_sync_hint = 0;
+       r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+                                NULL, 0,
+                                NULL, NULL);
+
+       return r;
+}
+
+static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
+{
+       struct log_c *lc = log->context;
+
+       return lc->region_size;
+}
+
+/*
+ * userspace_is_clean
+ *
+ * Check whether a region is clean.  If there is any sort of
+ * failure when consulting the server, we return not clean.
+ *
+ * Returns: 1 if clean, 0 otherwise
+ */
+static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
+{
+       int r;
+       uint64_t region64 = (uint64_t)region;
+       int64_t is_clean;
+       size_t rdata_size;
+       struct log_c *lc = log->context;
+
+       rdata_size = sizeof(is_clean);
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
+                                (char *)&region64, sizeof(region64),
+                                (char *)&is_clean, &rdata_size);
+
+       return (r) ? 0 : (int)is_clean;
+}
+
+/*
+ * userspace_in_sync
+ *
+ * Check if the region is in-sync.  If there is any sort
+ * of failure when consulting the server, we assume that
+ * the region is not in sync.
+ *
+ * If 'can_block' is set, return immediately
+ *
+ * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
+ */
+static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
+                            int can_block)
+{
+       int r;
+       uint64_t region64 = region;
+       int64_t in_sync;
+       size_t rdata_size;
+       struct log_c *lc = log->context;
+
+       /*
+        * We can never respond directly - even if in_sync_hint is
+        * set.  This is because another machine could see a device
+        * failure and mark the region out-of-sync.  If we don't go
+        * to userspace to ask, we might think the region is in-sync
+        * and allow a read to pick up data that is stale.  (This is
+        * very unlikely if a device actually fails; but it is very
+        * likely if a connection to one device from one machine fails.)
+        *
+        * There still might be a problem if the mirror caches the region
+        * state as in-sync... but then this call would not be made.  So,
+        * that is a mirror problem.
+        */
+       if (!can_block)
+               return -EWOULDBLOCK;
+
+       rdata_size = sizeof(in_sync);
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
+                                (char *)&region64, sizeof(region64),
+                                (char *)&in_sync, &rdata_size);
+       return (r) ? 0 : (int)in_sync;
+}
+
+/*
+ * userspace_flush
+ *
+ * This function is ok to block.
+ * The flush happens in two stages.  First, it sends all
+ * clear/mark requests that are on the list.  Then it
+ * tells the server to commit them.  This gives the
+ * server a chance to optimise the commit, instead of
+ * doing it for every request.
+ *
+ * Additionally, we could implement another thread that
+ * sends the requests up to the server - reducing the
+ * load on flush.  Then the flush would have less in
+ * the list and be responsible for the finishing commit.
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+static int userspace_flush(struct dm_dirty_log *log)
+{
+       int r = 0;
+       unsigned long flags;
+       struct log_c *lc = log->context;
+       LIST_HEAD(flush_list);
+       struct flush_entry *fe, *tmp_fe;
+
+       spin_lock_irqsave(&lc->flush_lock, flags);
+       list_splice_init(&lc->flush_list, &flush_list);
+       spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+       if (list_empty(&flush_list))
+               return 0;
+
+       /*
+        * FIXME: Count up requests, group request types,
+        * allocate memory to stick all requests in and
+        * send to server in one go.  Failing the allocation,
+        * do it one by one.
+        */
+
+       list_for_each_entry(fe, &flush_list, list) {
+               r = userspace_do_request(lc, lc->uuid, fe->type,
+                                        (char *)&fe->region,
+                                        sizeof(fe->region),
+                                        NULL, NULL);
+               if (r)
+                       goto fail;
+       }
+
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
+                                NULL, 0, NULL, NULL);
+
+fail:
+       /*
+        * We can safely remove these entries, even if failure.
+        * Calling code will receive an error and will know that
+        * the log facility has failed.
+        */
+       list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
+               list_del(&fe->list);
+               mempool_free(fe, flush_entry_pool);
+       }
+
+       if (r)
+               dm_table_event(lc->ti->table);
+
+       return r;
+}
+
+/*
+ * userspace_mark_region
+ *
+ * This function should avoid blocking unless absolutely required.
+ * (Memory allocation is valid for blocking.)
+ */
+static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
+{
+       unsigned long flags;
+       struct log_c *lc = log->context;
+       struct flush_entry *fe;
+
+       /* Wait for an allocation, but _never_ fail */
+       fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
+       BUG_ON(!fe);
+
+       spin_lock_irqsave(&lc->flush_lock, flags);
+       fe->type = DM_ULOG_MARK_REGION;
+       fe->region = region;
+       list_add(&fe->list, &lc->flush_list);
+       spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+       return;
+}
+
+/*
+ * userspace_clear_region
+ *
+ * This function must not block.
+ * So, the alloc can't block.  In the worst case, it is ok to
+ * fail.  It would simply mean we can't clear the region.
+ * Does nothing to current sync context, but does mean
+ * the region will be re-sync'ed on a reload of the mirror
+ * even though it is in-sync.
+ */
+static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
+{
+       unsigned long flags;
+       struct log_c *lc = log->context;
+       struct flush_entry *fe;
+
+       /*
+        * If we fail to allocate, we skip the clearing of
+        * the region.  This doesn't hurt us in any way, except
+        * to cause the region to be resync'ed when the
+        * device is activated next time.
+        */
+       fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
+       if (!fe) {
+               DMERR("Failed to allocate memory to clear region.");
+               return;
+       }
+
+       spin_lock_irqsave(&lc->flush_lock, flags);
+       fe->type = DM_ULOG_CLEAR_REGION;
+       fe->region = region;
+       list_add(&fe->list, &lc->flush_list);
+       spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+       return;
+}
+
+/*
+ * userspace_get_resync_work
+ *
+ * Get a region that needs recovery.  It is valid to return
+ * an error for this function.
+ *
+ * Returns: 1 if region filled, 0 if no work, <0 on error
+ */
+static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
+{
+       int r;
+       size_t rdata_size;
+       struct log_c *lc = log->context;
+       struct {
+               int64_t i; /* 64-bit for mix arch compatibility */
+               region_t r;
+       } pkg;
+
+       if (lc->in_sync_hint >= lc->region_count)
+               return 0;
+
+       rdata_size = sizeof(pkg);
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
+                                NULL, 0,
+                                (char *)&pkg, &rdata_size);
+
+       *region = pkg.r;
+       return (r) ? r : (int)pkg.i;
+}
+
+/*
+ * userspace_set_region_sync
+ *
+ * Set the sync status of a given region.  This function
+ * must not fail.
+ */
+static void userspace_set_region_sync(struct dm_dirty_log *log,
+                                     region_t region, int in_sync)
+{
+       int r;
+       struct log_c *lc = log->context;
+       struct {
+               region_t r;
+               int64_t i;
+       } pkg;
+
+       pkg.r = region;
+       pkg.i = (int64_t)in_sync;
+
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
+                                (char *)&pkg, sizeof(pkg),
+                                NULL, NULL);
+
+       /*
+        * It would be nice to be able to report failures.
+        * However, it is easy emough to detect and resolve.
+        */
+       return;
+}
+
+/*
+ * userspace_get_sync_count
+ *
+ * If there is any sort of failure when consulting the server,
+ * we assume that the sync count is zero.
+ *
+ * Returns: sync count on success, 0 on failure
+ */
+static region_t userspace_get_sync_count(struct dm_dirty_log *log)
+{
+       int r;
+       size_t rdata_size;
+       uint64_t sync_count;
+       struct log_c *lc = log->context;
+
+       rdata_size = sizeof(sync_count);
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
+                                NULL, 0,
+                                (char *)&sync_count, &rdata_size);
+
+       if (r)
+               return 0;
+
+       if (sync_count >= lc->region_count)
+               lc->in_sync_hint = lc->region_count;
+
+       return (region_t)sync_count;
+}
+
+/*
+ * userspace_status
+ *
+ * Returns: amount of space consumed
+ */
+static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
+                           char *result, unsigned maxlen)
+{
+       int r = 0;
+       size_t sz = (size_t)maxlen;
+       struct log_c *lc = log->context;
+
+       switch (status_type) {
+       case STATUSTYPE_INFO:
+               r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
+                                        NULL, 0,
+                                        result, &sz);
+
+               if (r) {
+                       sz = 0;
+                       DMEMIT("%s 1 COM_FAILURE", log->type->name);
+               }
+               break;
+       case STATUSTYPE_TABLE:
+               sz = 0;
+               DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
+                      lc->uuid, lc->usr_argv_str);
+               break;
+       }
+       return (r) ? 0 : (int)sz;
+}
+
+/*
+ * userspace_is_remote_recovering
+ *
+ * Returns: 1 if region recovering, 0 otherwise
+ */
+static int userspace_is_remote_recovering(struct dm_dirty_log *log,
+                                         region_t region)
+{
+       int r;
+       uint64_t region64 = region;
+       struct log_c *lc = log->context;
+       static unsigned long long limit;
+       struct {
+               int64_t is_recovering;
+               uint64_t in_sync_hint;
+       } pkg;
+       size_t rdata_size = sizeof(pkg);
+
+       /*
+        * Once the mirror has been reported to be in-sync,
+        * it will never again ask for recovery work.  So,
+        * we can safely say there is not a remote machine
+        * recovering if the device is in-sync.  (in_sync_hint
+        * must be reset at resume time.)
+        */
+       if (region < lc->in_sync_hint)
+               return 0;
+       else if (jiffies < limit)
+               return 1;
+
+       limit = jiffies + (HZ / 4);
+       r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
+                                (char *)&region64, sizeof(region64),
+                                (char *)&pkg, &rdata_size);
+       if (r)
+               return 1;
+
+       lc->in_sync_hint = pkg.in_sync_hint;
+
+       return (int)pkg.is_recovering;
+}
+
+static struct dm_dirty_log_type _userspace_type = {
+       .name = "userspace",
+       .module = THIS_MODULE,
+       .ctr = userspace_ctr,
+       .dtr = userspace_dtr,
+       .presuspend = userspace_presuspend,
+       .postsuspend = userspace_postsuspend,
+       .resume = userspace_resume,
+       .get_region_size = userspace_get_region_size,
+       .is_clean = userspace_is_clean,
+       .in_sync = userspace_in_sync,
+       .flush = userspace_flush,
+       .mark_region = userspace_mark_region,
+       .clear_region = userspace_clear_region,
+       .get_resync_work = userspace_get_resync_work,
+       .set_region_sync = userspace_set_region_sync,
+       .get_sync_count = userspace_get_sync_count,
+       .status = userspace_status,
+       .is_remote_recovering = userspace_is_remote_recovering,
+};
+
+static int __init userspace_dirty_log_init(void)
+{
+       int r = 0;
+
+       flush_entry_pool = mempool_create(100, flush_entry_alloc,
+                                         flush_entry_free, NULL);
+
+       if (!flush_entry_pool) {
+               DMWARN("Unable to create flush_entry_pool:  No memory.");
+               return -ENOMEM;
+       }
+
+       r = dm_ulog_tfr_init();
+       if (r) {
+               DMWARN("Unable to initialize userspace log communications");
+               mempool_destroy(flush_entry_pool);
+               return r;
+       }
+
+       r = dm_dirty_log_type_register(&_userspace_type);
+       if (r) {
+               DMWARN("Couldn't register userspace dirty log type");
+               dm_ulog_tfr_exit();
+               mempool_destroy(flush_entry_pool);
+               return r;
+       }
+
+       DMINFO("version 1.0.0 loaded");
+       return 0;
+}
+
+static void __exit userspace_dirty_log_exit(void)
+{
+       dm_dirty_log_type_unregister(&_userspace_type);
+       dm_ulog_tfr_exit();
+       mempool_destroy(flush_entry_pool);
+
+       DMINFO("version 1.0.0 unloaded");
+       return;
+}
+
+module_init(userspace_dirty_log_init);
+module_exit(userspace_dirty_log_exit);
+
+MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
+MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
new file mode 100644 (file)
index 0000000..0ca1ee7
--- /dev/null
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <net/sock.h>
+#include <linux/workqueue.h>
+#include <linux/connector.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-log-userspace.h>
+
+#include "dm-log-userspace-transfer.h"
+
+static uint32_t dm_ulog_seq;
+
+/*
+ * Netlink/Connector is an unreliable protocol.  How long should
+ * we wait for a response before assuming it was lost and retrying?
+ * (If we do receive a response after this time, it will be discarded
+ * and the response to the resent request will be waited for.
+ */
+#define DM_ULOG_RETRY_TIMEOUT (15 * HZ)
+
+/*
+ * Pre-allocated space for speed
+ */
+#define DM_ULOG_PREALLOCED_SIZE 512
+static struct cn_msg *prealloced_cn_msg;
+static struct dm_ulog_request *prealloced_ulog_tfr;
+
+static struct cb_id ulog_cn_id = {
+       .idx = CN_IDX_DM,
+       .val = CN_VAL_DM_USERSPACE_LOG
+};
+
+static DEFINE_MUTEX(dm_ulog_lock);
+
+struct receiving_pkg {
+       struct list_head list;
+       struct completion complete;
+
+       uint32_t seq;
+
+       int error;
+       size_t *data_size;
+       char *data;
+};
+
+static DEFINE_SPINLOCK(receiving_list_lock);
+static struct list_head receiving_list;
+
+static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
+{
+       int r;
+       struct cn_msg *msg = prealloced_cn_msg;
+
+       memset(msg, 0, sizeof(struct cn_msg));
+
+       msg->id.idx = ulog_cn_id.idx;
+       msg->id.val = ulog_cn_id.val;
+       msg->ack = 0;
+       msg->seq = tfr->seq;
+       msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
+
+       r = cn_netlink_send(msg, 0, gfp_any());
+
+       return r;
+}
+
+/*
+ * Parameters for this function can be either msg or tfr, but not
+ * both.  This function fills in the reply for a waiting request.
+ * If just msg is given, then the reply is simply an ACK from userspace
+ * that the request was received.
+ *
+ * Returns: 0 on success, -ENOENT on failure
+ */
+static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
+{
+       uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
+       struct receiving_pkg *pkg;
+
+       /*
+        * The 'receiving_pkg' entries in this list are statically
+        * allocated on the stack in 'dm_consult_userspace'.
+        * Each process that is waiting for a reply from the user
+        * space server will have an entry in this list.
+        *
+        * We are safe to do it this way because the stack space
+        * is unique to each process, but still addressable by
+        * other processes.
+        */
+       list_for_each_entry(pkg, &receiving_list, list) {
+               if (rtn_seq != pkg->seq)
+                       continue;
+
+               if (msg) {
+                       pkg->error = -msg->ack;
+                       /*
+                        * If we are trying again, we will need to know our
+                        * storage capacity.  Otherwise, along with the
+                        * error code, we make explicit that we have no data.
+                        */
+                       if (pkg->error != -EAGAIN)
+                               *(pkg->data_size) = 0;
+               } else if (tfr->data_size > *(pkg->data_size)) {
+                       DMERR("Insufficient space to receive package [%u] "
+                             "(%u vs %lu)", tfr->request_type,
+                             tfr->data_size, *(pkg->data_size));
+
+                       *(pkg->data_size) = 0;
+                       pkg->error = -ENOSPC;
+               } else {
+                       pkg->error = tfr->error;
+                       memcpy(pkg->data, tfr->data, tfr->data_size);
+                       *(pkg->data_size) = tfr->data_size;
+               }
+               complete(&pkg->complete);
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+/*
+ * This is the connector callback that delivers data
+ * that was sent from userspace.
+ */
+static void cn_ulog_callback(void *data)
+{
+       struct cn_msg *msg = (struct cn_msg *)data;
+       struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
+
+       spin_lock(&receiving_list_lock);
+       if (msg->len == 0)
+               fill_pkg(msg, NULL);
+       else if (msg->len < sizeof(*tfr))
+               DMERR("Incomplete message received (expected %u, got %u): [%u]",
+                     (unsigned)sizeof(*tfr), msg->len, msg->seq);
+       else
+               fill_pkg(NULL, tfr);
+       spin_unlock(&receiving_list_lock);
+}
+
+/**
+ * dm_consult_userspace
+ * @uuid: log's uuid (must be DM_UUID_LEN in size)
+ * @request_type:  found in include/linux/dm-log-userspace.h
+ * @data: data to tx to the server
+ * @data_size: size of data in bytes
+ * @rdata: place to put return data from server
+ * @rdata_size: value-result (amount of space given/amount of space used)
+ *
+ * rdata_size is undefined on failure.
+ *
+ * Memory used to communicate with userspace is zero'ed
+ * before populating to ensure that no unwanted bits leak
+ * from kernel space to user-space.  All userspace log communications
+ * between kernel and user space go through this function.
+ *
+ * Returns: 0 on success, -EXXX on failure
+ **/
+int dm_consult_userspace(const char *uuid, int request_type,
+                        char *data, size_t data_size,
+                        char *rdata, size_t *rdata_size)
+{
+       int r = 0;
+       size_t dummy = 0;
+       int overhead_size =
+               sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg);
+       struct dm_ulog_request *tfr = prealloced_ulog_tfr;
+       struct receiving_pkg pkg;
+
+       if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) {
+               DMINFO("Size of tfr exceeds preallocated size");
+               return -EINVAL;
+       }
+
+       if (!rdata_size)
+               rdata_size = &dummy;
+resend:
+       /*
+        * We serialize the sending of requests so we can
+        * use the preallocated space.
+        */
+       mutex_lock(&dm_ulog_lock);
+
+       memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
+       memcpy(tfr->uuid, uuid, DM_UUID_LEN);
+       tfr->seq = dm_ulog_seq++;
+
+       /*
+        * Must be valid request type (all other bits set to
+        * zero).  This reserves other bits for possible future
+        * use.
+        */
+       tfr->request_type = request_type & DM_ULOG_REQUEST_MASK;
+
+       tfr->data_size = data_size;
+       if (data && data_size)
+               memcpy(tfr->data, data, data_size);
+
+       memset(&pkg, 0, sizeof(pkg));
+       init_completion(&pkg.complete);
+       pkg.seq = tfr->seq;
+       pkg.data_size = rdata_size;
+       pkg.data = rdata;
+       spin_lock(&receiving_list_lock);
+       list_add(&(pkg.list), &receiving_list);
+       spin_unlock(&receiving_list_lock);
+
+       r = dm_ulog_sendto_server(tfr);
+
+       mutex_unlock(&dm_ulog_lock);
+
+       if (r) {
+               DMERR("Unable to send log request [%u] to userspace: %d",
+                     request_type, r);
+               spin_lock(&receiving_list_lock);
+               list_del_init(&(pkg.list));
+               spin_unlock(&receiving_list_lock);
+
+               goto out;
+       }
+
+       r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
+       spin_lock(&receiving_list_lock);
+       list_del_init(&(pkg.list));
+       spin_unlock(&receiving_list_lock);
+       if (!r) {
+               DMWARN("[%s] Request timed out: [%u/%u] - retrying",
+                      (strlen(uuid) > 8) ?
+                      (uuid + (strlen(uuid) - 8)) : (uuid),
+                      request_type, pkg.seq);
+               goto resend;
+       }
+
+       r = pkg.error;
+       if (r == -EAGAIN)
+               goto resend;
+
+out:
+       return r;
+}
+
+int dm_ulog_tfr_init(void)
+{
+       int r;
+       void *prealloced;
+
+       INIT_LIST_HEAD(&receiving_list);
+
+       prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL);
+       if (!prealloced)
+               return -ENOMEM;
+
+       prealloced_cn_msg = prealloced;
+       prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg);
+
+       r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
+       if (r) {
+               cn_del_callback(&ulog_cn_id);
+               return r;
+       }
+
+       return 0;
+}
+
+void dm_ulog_tfr_exit(void)
+{
+       cn_del_callback(&ulog_cn_id);
+       kfree(prealloced_cn_msg);
+}
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h
new file mode 100644 (file)
index 0000000..c26d8e4
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_LOG_USERSPACE_TRANSFER_H__
+#define __DM_LOG_USERSPACE_TRANSFER_H__
+
+#define DM_MSG_PREFIX "dm-log-userspace"
+
+int dm_ulog_tfr_init(void);
+void dm_ulog_tfr_exit(void);
+int dm_consult_userspace(const char *uuid, int request_type,
+                        char *data, size_t data_size,
+                        char *rdata, size_t *rdata_size);
+
+#endif /* __DM_LOG_USERSPACE_TRANSFER_H__ */
index 6fa8ccf91c70e70174d578e10b8793b884a86406..9443896ede070152020046d15979bb9f1dc55b29 100644 (file)
@@ -412,11 +412,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
                /*
                 * Buffer holds both header and bitset.
                 */
-               buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
-                                      bitset_size,
-                                      ti->limits.logical_block_size);
+               buf_size =
+                   dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + bitset_size,
+                               bdev_logical_block_size(lc->header_location.
+                                                           bdev));
 
-               if (buf_size > dev->bdev->bd_inode->i_size) {
+               if (buf_size > i_size_read(dev->bdev->bd_inode)) {
                        DMWARN("log device %s too small: need %llu bytes",
                                dev->name, (unsigned long long)buf_size);
                        kfree(lc);
index 6a386ab4f7ebb898bdfd529682f240b2beed20c3..c70604a208979b4b1f87e4a9e98f10b5ce00036d 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/device-mapper.h>
 
 #include "dm-path-selector.h"
-#include "dm-bio-record.h"
 #include "dm-uevent.h"
 
 #include <linux/ctype.h>
@@ -35,6 +34,7 @@ struct pgpath {
 
        struct dm_path path;
        struct work_struct deactivate_path;
+       struct work_struct activate_path;
 };
 
 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +64,6 @@ struct multipath {
        spinlock_t lock;
 
        const char *hw_handler_name;
-       struct work_struct activate_path;
-       struct pgpath *pgpath_to_activate;
        unsigned nr_priority_groups;
        struct list_head priority_groups;
        unsigned pg_init_required;      /* pg_init needs calling? */
@@ -84,7 +82,7 @@ struct multipath {
        unsigned pg_init_count;         /* Number of times pg_init called */
 
        struct work_struct process_queued_ios;
-       struct bio_list queued_ios;
+       struct list_head queued_ios;
        unsigned queue_size;
 
        struct work_struct trigger_event;
@@ -101,7 +99,7 @@ struct multipath {
  */
 struct dm_mpath_io {
        struct pgpath *pgpath;
-       struct dm_bio_details details;
+       size_t nr_bytes;
 };
 
 typedef int (*action_fn) (struct pgpath *pgpath);
@@ -128,6 +126,7 @@ static struct pgpath *alloc_pgpath(void)
        if (pgpath) {
                pgpath->is_active = 1;
                INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+               INIT_WORK(&pgpath->activate_path, activate_path);
        }
 
        return pgpath;
@@ -160,7 +159,6 @@ static struct priority_group *alloc_priority_group(void)
 
 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
 {
-       unsigned long flags;
        struct pgpath *pgpath, *tmp;
        struct multipath *m = ti->private;
 
@@ -169,10 +167,6 @@ static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
                if (m->hw_handler_name)
                        scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
                dm_put_device(ti, pgpath->path.dev);
-               spin_lock_irqsave(&m->lock, flags);
-               if (m->pgpath_to_activate == pgpath)
-                       m->pgpath_to_activate = NULL;
-               spin_unlock_irqrestore(&m->lock, flags);
                free_pgpath(pgpath);
        }
 }
@@ -198,11 +192,11 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
        m = kzalloc(sizeof(*m), GFP_KERNEL);
        if (m) {
                INIT_LIST_HEAD(&m->priority_groups);
+               INIT_LIST_HEAD(&m->queued_ios);
                spin_lock_init(&m->lock);
                m->queue_io = 1;
                INIT_WORK(&m->process_queued_ios, process_queued_ios);
                INIT_WORK(&m->trigger_event, trigger_event);
-               INIT_WORK(&m->activate_path, activate_path);
                m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
                if (!m->mpio_pool) {
                        kfree(m);
@@ -250,11 +244,12 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
        m->pg_init_count = 0;
 }
 
-static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
+static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
+                              size_t nr_bytes)
 {
        struct dm_path *path;
 
-       path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
+       path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
        if (!path)
                return -ENXIO;
 
@@ -266,7 +261,7 @@ static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
        return 0;
 }
 
-static void __choose_pgpath(struct multipath *m)
+static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
 {
        struct priority_group *pg;
        unsigned bypassed = 1;
@@ -278,12 +273,12 @@ static void __choose_pgpath(struct multipath *m)
        if (m->next_pg) {
                pg = m->next_pg;
                m->next_pg = NULL;
-               if (!__choose_path_in_pg(m, pg))
+               if (!__choose_path_in_pg(m, pg, nr_bytes))
                        return;
        }
 
        /* Don't change PG until it has no remaining paths */
-       if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
+       if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
                return;
 
        /*
@@ -295,7 +290,7 @@ static void __choose_pgpath(struct multipath *m)
                list_for_each_entry(pg, &m->priority_groups, list) {
                        if (pg->bypassed == bypassed)
                                continue;
-                       if (!__choose_path_in_pg(m, pg))
+                       if (!__choose_path_in_pg(m, pg, nr_bytes))
                                return;
                }
        } while (bypassed--);
@@ -322,19 +317,21 @@ static int __must_push_back(struct multipath *m)
                dm_noflush_suspending(m->ti));
 }
 
-static int map_io(struct multipath *m, struct bio *bio,
+static int map_io(struct multipath *m, struct request *clone,
                  struct dm_mpath_io *mpio, unsigned was_queued)
 {
        int r = DM_MAPIO_REMAPPED;
+       size_t nr_bytes = blk_rq_bytes(clone);
        unsigned long flags;
        struct pgpath *pgpath;
+       struct block_device *bdev;
 
        spin_lock_irqsave(&m->lock, flags);
 
        /* Do we need to select a new pgpath? */
        if (!m->current_pgpath ||
            (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
-               __choose_pgpath(m);
+               __choose_pgpath(m, nr_bytes);
 
        pgpath = m->current_pgpath;
 
@@ -344,21 +341,28 @@ static int map_io(struct multipath *m, struct bio *bio,
        if ((pgpath && m->queue_io) ||
            (!pgpath && m->queue_if_no_path)) {
                /* Queue for the daemon to resubmit */
-               bio_list_add(&m->queued_ios, bio);
+               list_add_tail(&clone->queuelist, &m->queued_ios);
                m->queue_size++;
                if ((m->pg_init_required && !m->pg_init_in_progress) ||
                    !m->queue_io)
                        queue_work(kmultipathd, &m->process_queued_ios);
                pgpath = NULL;
                r = DM_MAPIO_SUBMITTED;
-       } else if (pgpath)
-               bio->bi_bdev = pgpath->path.dev->bdev;
-       else if (__must_push_back(m))
+       } else if (pgpath) {
+               bdev = pgpath->path.dev->bdev;
+               clone->q = bdev_get_queue(bdev);
+               clone->rq_disk = bdev->bd_disk;
+       } else if (__must_push_back(m))
                r = DM_MAPIO_REQUEUE;
        else
                r = -EIO;       /* Failed */
 
        mpio->pgpath = pgpath;
+       mpio->nr_bytes = nr_bytes;
+
+       if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
+               pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
+                                             nr_bytes);
 
        spin_unlock_irqrestore(&m->lock, flags);
 
@@ -396,30 +400,31 @@ static void dispatch_queued_ios(struct multipath *m)
 {
        int r;
        unsigned long flags;
-       struct bio *bio = NULL, *next;
        struct dm_mpath_io *mpio;
        union map_info *info;
+       struct request *clone, *n;
+       LIST_HEAD(cl);
 
        spin_lock_irqsave(&m->lock, flags);
-       bio = bio_list_get(&m->queued_ios);
+       list_splice_init(&m->queued_ios, &cl);
        spin_unlock_irqrestore(&m->lock, flags);
 
-       while (bio) {
-               next = bio->bi_next;
-               bio->bi_next = NULL;
+       list_for_each_entry_safe(clone, n, &cl, queuelist) {
+               list_del_init(&clone->queuelist);
 
-               info = dm_get_mapinfo(bio);
+               info = dm_get_rq_mapinfo(clone);
                mpio = info->ptr;
 
-               r = map_io(m, bio, mpio, 1);
-               if (r < 0)
-                       bio_endio(bio, r);
-               else if (r == DM_MAPIO_REMAPPED)
-                       generic_make_request(bio);
-               else if (r == DM_MAPIO_REQUEUE)
-                       bio_endio(bio, -EIO);
-
-               bio = next;
+               r = map_io(m, clone, mpio, 1);
+               if (r < 0) {
+                       mempool_free(mpio, m->mpio_pool);
+                       dm_kill_unmapped_request(clone, r);
+               } else if (r == DM_MAPIO_REMAPPED)
+                       dm_dispatch_request(clone);
+               else if (r == DM_MAPIO_REQUEUE) {
+                       mempool_free(mpio, m->mpio_pool);
+                       dm_requeue_unmapped_request(clone);
+               }
        }
 }
 
@@ -427,8 +432,8 @@ static void process_queued_ios(struct work_struct *work)
 {
        struct multipath *m =
                container_of(work, struct multipath, process_queued_ios);
-       struct pgpath *pgpath = NULL;
-       unsigned init_required = 0, must_queue = 1;
+       struct pgpath *pgpath = NULL, *tmp;
+       unsigned must_queue = 1;
        unsigned long flags;
 
        spin_lock_irqsave(&m->lock, flags);
@@ -437,7 +442,7 @@ static void process_queued_ios(struct work_struct *work)
                goto out;
 
        if (!m->current_pgpath)
-               __choose_pgpath(m);
+               __choose_pgpath(m, 0);
 
        pgpath = m->current_pgpath;
 
@@ -446,19 +451,15 @@ static void process_queued_ios(struct work_struct *work)
                must_queue = 0;
 
        if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
-               m->pgpath_to_activate = pgpath;
                m->pg_init_count++;
                m->pg_init_required = 0;
-               m->pg_init_in_progress = 1;
-               init_required = 1;
+               list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
+                       if (queue_work(kmpath_handlerd, &tmp->activate_path))
+                               m->pg_init_in_progress++;
+               }
        }
-
 out:
        spin_unlock_irqrestore(&m->lock, flags);
-
-       if (init_required)
-               queue_work(kmpath_handlerd, &m->activate_path);
-
        if (!must_queue)
                dispatch_queued_ios(m);
 }
@@ -553,6 +554,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
                return -EINVAL;
        }
 
+       if (ps_argc > as->argc) {
+               dm_put_path_selector(pst);
+               ti->error = "not enough arguments for path selector";
+               return -EINVAL;
+       }
+
        r = pst->create(&pg->ps, ps_argc, as->argv);
        if (r) {
                dm_put_path_selector(pst);
@@ -591,9 +598,20 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
        }
 
        if (m->hw_handler_name) {
-               r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev),
-                                  m->hw_handler_name);
+               struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
+
+               r = scsi_dh_attach(q, m->hw_handler_name);
+               if (r == -EBUSY) {
+                       /*
+                        * Already attached to different hw_handler,
+                        * try to reattach with correct one.
+                        */
+                       scsi_dh_detach(q);
+                       r = scsi_dh_attach(q, m->hw_handler_name);
+               }
+
                if (r < 0) {
+                       ti->error = "error attaching hardware handler";
                        dm_put_device(ti, p->path.dev);
                        goto bad;
                }
@@ -699,6 +717,11 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
        if (!hw_argc)
                return 0;
 
+       if (hw_argc > as->argc) {
+               ti->error = "not enough arguments for hardware handler";
+               return -EINVAL;
+       }
+
        m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
        request_module("scsi_dh_%s", m->hw_handler_name);
        if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
@@ -823,6 +846,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
                goto bad;
        }
 
+       ti->num_flush_requests = 1;
+
        return 0;
 
  bad:
@@ -836,25 +861,29 @@ static void multipath_dtr(struct dm_target *ti)
 
        flush_workqueue(kmpath_handlerd);
        flush_workqueue(kmultipathd);
+       flush_scheduled_work();
        free_multipath(m);
 }
 
 /*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
  */
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
                         union map_info *map_context)
 {
        int r;
        struct dm_mpath_io *mpio;
        struct multipath *m = (struct multipath *) ti->private;
 
-       mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
-       dm_bio_record(&mpio->details, bio);
+       mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
+       if (!mpio)
+               /* ENOMEM, requeue */
+               return DM_MAPIO_REQUEUE;
+       memset(mpio, 0, sizeof(*mpio));
 
        map_context->ptr = mpio;
-       bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
-       r = map_io(m, bio, mpio, 0);
+       clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+       r = map_io(m, clone, mpio, 0);
        if (r < 0 || r == DM_MAPIO_REQUEUE)
                mempool_free(mpio, m->mpio_pool);
 
@@ -924,9 +953,13 @@ static int reinstate_path(struct pgpath *pgpath)
 
        pgpath->is_active = 1;
 
-       m->current_pgpath = NULL;
-       if (!m->nr_valid_paths++ && m->queue_size)
+       if (!m->nr_valid_paths++ && m->queue_size) {
+               m->current_pgpath = NULL;
                queue_work(kmultipathd, &m->process_queued_ios);
+       } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
+               if (queue_work(kmpath_handlerd, &pgpath->activate_path))
+                       m->pg_init_in_progress++;
+       }
 
        dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
                      pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,87 +1135,70 @@ static void pg_init_done(struct dm_path *path, int errors)
 
        spin_lock_irqsave(&m->lock, flags);
        if (errors) {
-               DMERR("Could not failover device. Error %d.", errors);
-               m->current_pgpath = NULL;
-               m->current_pg = NULL;
+               if (pgpath == m->current_pgpath) {
+                       DMERR("Could not failover device. Error %d.", errors);
+                       m->current_pgpath = NULL;
+                       m->current_pg = NULL;
+               }
        } else if (!m->pg_init_required) {
                m->queue_io = 0;
                pg->bypassed = 0;
        }
 
-       m->pg_init_in_progress = 0;
-       queue_work(kmultipathd, &m->process_queued_ios);
+       m->pg_init_in_progress--;
+       if (!m->pg_init_in_progress)
+               queue_work(kmultipathd, &m->process_queued_ios);
        spin_unlock_irqrestore(&m->lock, flags);
 }
 
 static void activate_path(struct work_struct *work)
 {
        int ret;
-       struct multipath *m =
-               container_of(work, struct multipath, activate_path);
-       struct dm_path *path;
-       unsigned long flags;
+       struct pgpath *pgpath =
+               container_of(work, struct pgpath, activate_path);
 
-       spin_lock_irqsave(&m->lock, flags);
-       path = &m->pgpath_to_activate->path;
-       m->pgpath_to_activate = NULL;
-       spin_unlock_irqrestore(&m->lock, flags);
-       if (!path)
-               return;
-       ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
-       pg_init_done(path, ret);
+       ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
+       pg_init_done(&pgpath->path, ret);
 }
 
 /*
  * end_io handling
  */
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
                     int error, struct dm_mpath_io *mpio)
 {
+       /*
+        * We don't queue any clone request inside the multipath target
+        * during end I/O handling, since those clone requests don't have
+        * bio clones.  If we queue them inside the multipath target,
+        * we need to make bio clones, that requires memory allocation.
+        * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+        *  don't have bio clones.)
+        * Instead of queueing the clone request here, we queue the original
+        * request into dm core, which will remake a clone request and
+        * clone bios for it and resubmit it later.
+        */
+       int r = DM_ENDIO_REQUEUE;
        unsigned long flags;
 
-       if (!error)
+       if (!error && !clone->errors)
                return 0;       /* I/O complete */
 
-       if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
-               return error;
-
        if (error == -EOPNOTSUPP)
                return error;
 
-       spin_lock_irqsave(&m->lock, flags);
-       if (!m->nr_valid_paths) {
-               if (__must_push_back(m)) {
-                       spin_unlock_irqrestore(&m->lock, flags);
-                       return DM_ENDIO_REQUEUE;
-               } else if (!m->queue_if_no_path) {
-                       spin_unlock_irqrestore(&m->lock, flags);
-                       return -EIO;
-               } else {
-                       spin_unlock_irqrestore(&m->lock, flags);
-                       goto requeue;
-               }
-       }
-       spin_unlock_irqrestore(&m->lock, flags);
-
        if (mpio->pgpath)
                fail_path(mpio->pgpath);
 
-      requeue:
-       dm_bio_restore(&mpio->details, bio);
-
-       /* queue for the daemon to resubmit or fail */
        spin_lock_irqsave(&m->lock, flags);
-       bio_list_add(&m->queued_ios, bio);
-       m->queue_size++;
-       if (!m->queue_io)
-               queue_work(kmultipathd, &m->process_queued_ios);
+       if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
+               r = -EIO;
        spin_unlock_irqrestore(&m->lock, flags);
 
-       return DM_ENDIO_INCOMPLETE;     /* io not complete */
+       return r;
 }
 
-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
                            int error, union map_info *map_context)
 {
        struct multipath *m = ti->private;
@@ -1191,14 +1207,13 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
        struct path_selector *ps;
        int r;
 
-       r  = do_end_io(m, bio, error, mpio);
+       r  = do_end_io(m, clone, error, mpio);
        if (pgpath) {
                ps = &pgpath->pg->ps;
                if (ps->type->end_io)
-                       ps->type->end_io(ps, &pgpath->path);
+                       ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
        }
-       if (r != DM_ENDIO_INCOMPLETE)
-               mempool_free(mpio, m->mpio_pool);
+       mempool_free(mpio, m->mpio_pool);
 
        return r;
 }
@@ -1411,7 +1426,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
        spin_lock_irqsave(&m->lock, flags);
 
        if (!m->current_pgpath)
-               __choose_pgpath(m);
+               __choose_pgpath(m, 0);
 
        if (m->current_pgpath) {
                bdev = m->current_pgpath->path.dev->bdev;
@@ -1428,22 +1443,113 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
        return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 }
 
+static int multipath_iterate_devices(struct dm_target *ti,
+                                    iterate_devices_callout_fn fn, void *data)
+{
+       struct multipath *m = ti->private;
+       struct priority_group *pg;
+       struct pgpath *p;
+       int ret = 0;
+
+       list_for_each_entry(pg, &m->priority_groups, list) {
+               list_for_each_entry(p, &pg->pgpaths, list) {
+                       ret = fn(ti, p->path.dev, ti->begin, data);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+out:
+       return ret;
+}
+
+static int __pgpath_busy(struct pgpath *pgpath)
+{
+       struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+       return dm_underlying_device_busy(q);
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are busy (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy".  Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+       int busy = 0, has_active = 0;
+       struct multipath *m = ti->private;
+       struct priority_group *pg;
+       struct pgpath *pgpath;
+       unsigned long flags;
+
+       spin_lock_irqsave(&m->lock, flags);
+
+       /* Guess which priority_group will be used at next mapping time */
+       if (unlikely(!m->current_pgpath && m->next_pg))
+               pg = m->next_pg;
+       else if (likely(m->current_pg))
+               pg = m->current_pg;
+       else
+               /*
+                * We don't know which pg will be used at next mapping time.
+                * We don't call __choose_pgpath() here to avoid to trigger
+                * pg_init just by busy checking.
+                * So we don't know whether underlying devices we will be using
+                * at next mapping time are busy or not. Just try mapping.
+                */
+               goto out;
+
+       /*
+        * If there is one non-busy active path at least, the path selector
+        * will be able to select it. So we consider such a pg as not busy.
+        */
+       busy = 1;
+       list_for_each_entry(pgpath, &pg->pgpaths, list)
+               if (pgpath->is_active) {
+                       has_active = 1;
+
+                       if (!__pgpath_busy(pgpath)) {
+                               busy = 0;
+                               break;
+                       }
+               }
+
+       if (!has_active)
+               /*
+                * No active path in this pg, so this pg won't be used and
+                * the current_pg will be changed at next mapping time.
+                * We need to try mapping to determine it.
+                */
+               busy = 0;
+
+out:
+       spin_unlock_irqrestore(&m->lock, flags);
+
+       return busy;
+}
+
 /*-----------------------------------------------------------------
  * Module setup
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
        .name = "multipath",
-       .version = {1, 0, 5},
+       .version = {1, 1, 0},
        .module = THIS_MODULE,
        .ctr = multipath_ctr,
        .dtr = multipath_dtr,
-       .map = multipath_map,
-       .end_io = multipath_end_io,
+       .map_rq = multipath_map,
+       .rq_end_io = multipath_end_io,
        .presuspend = multipath_presuspend,
        .resume = multipath_resume,
        .status = multipath_status,
        .message = multipath_message,
        .ioctl  = multipath_ioctl,
+       .iterate_devices = multipath_iterate_devices,
+       .busy = multipath_busy,
 };
 
 static int __init dm_multipath_init(void)
index 27357b85d73d7495cbcb2d61e8522ac942317901..e7d1fa8b0459f3deaadc16a34a09231764073fbc 100644 (file)
@@ -56,7 +56,8 @@ struct path_selector_type {
         * the path fails.
         */
        struct dm_path *(*select_path) (struct path_selector *ps,
-                                    unsigned *repeat_count);
+                                       unsigned *repeat_count,
+                                       size_t nr_bytes);
 
        /*
         * Notify the selector that a path has failed.
@@ -75,7 +76,10 @@ struct path_selector_type {
        int (*status) (struct path_selector *ps, struct dm_path *path,
                       status_type_t type, char *result, unsigned int maxlen);
 
-       int (*end_io) (struct path_selector *ps, struct dm_path *path);
+       int (*start_io) (struct path_selector *ps, struct dm_path *path,
+                        size_t nr_bytes);
+       int (*end_io) (struct path_selector *ps, struct dm_path *path,
+                      size_t nr_bytes);
 };
 
 /* Register a path selector */
diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c
new file mode 100644 (file)
index 0000000..f92b6ce
--- /dev/null
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2004-2005 IBM Corp.  All Rights Reserved.
+ * Copyright (C) 2006-2009 NEC Corporation.
+ *
+ * dm-queue-length.c
+ *
+ * Module Author: Stefan Bader, IBM
+ * Modified by: Kiyoshi Ueda, NEC
+ *
+ * This file is released under the GPL.
+ *
+ * queue-length path selector - choose a path with the least number of
+ * in-flight I/Os.
+ */
+
+#include "dm.h"
+#include "dm-path-selector.h"
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+
+#define DM_MSG_PREFIX  "multipath queue-length"
+#define QL_MIN_IO      128
+#define QL_VERSION     "0.1.0"
+
+struct selector {
+       struct list_head        valid_paths;
+       struct list_head        failed_paths;
+};
+
+struct path_info {
+       struct list_head        list;
+       struct dm_path          *path;
+       unsigned                repeat_count;
+       atomic_t                qlen;   /* the number of in-flight I/Os */
+};
+
+static struct selector *alloc_selector(void)
+{
+       struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+       if (s) {
+               INIT_LIST_HEAD(&s->valid_paths);
+               INIT_LIST_HEAD(&s->failed_paths);
+       }
+
+       return s;
+}
+
+static int ql_create(struct path_selector *ps, unsigned argc, char **argv)
+{
+       struct selector *s = alloc_selector();
+
+       if (!s)
+               return -ENOMEM;
+
+       ps->context = s;
+       return 0;
+}
+
+static void ql_free_paths(struct list_head *paths)
+{
+       struct path_info *pi, *next;
+
+       list_for_each_entry_safe(pi, next, paths, list) {
+               list_del(&pi->list);
+               kfree(pi);
+       }
+}
+
+static void ql_destroy(struct path_selector *ps)
+{
+       struct selector *s = ps->context;
+
+       ql_free_paths(&s->valid_paths);
+       ql_free_paths(&s->failed_paths);
+       kfree(s);
+       ps->context = NULL;
+}
+
+static int ql_status(struct path_selector *ps, struct dm_path *path,
+                    status_type_t type, char *result, unsigned maxlen)
+{
+       unsigned sz = 0;
+       struct path_info *pi;
+
+       /* When called with NULL path, return selector status/args. */
+       if (!path)
+               DMEMIT("0 ");
+       else {
+               pi = path->pscontext;
+
+               switch (type) {
+               case STATUSTYPE_INFO:
+                       DMEMIT("%d ", atomic_read(&pi->qlen));
+                       break;
+               case STATUSTYPE_TABLE:
+                       DMEMIT("%u ", pi->repeat_count);
+                       break;
+               }
+       }
+
+       return sz;
+}
+
+static int ql_add_path(struct path_selector *ps, struct dm_path *path,
+                      int argc, char **argv, char **error)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi;
+       unsigned repeat_count = QL_MIN_IO;
+
+       /*
+        * Arguments: [<repeat_count>]
+        *      <repeat_count>: The number of I/Os before switching path.
+        *                      If not given, default (QL_MIN_IO) is used.
+        */
+       if (argc > 1) {
+               *error = "queue-length ps: incorrect number of arguments";
+               return -EINVAL;
+       }
+
+       if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
+               *error = "queue-length ps: invalid repeat count";
+               return -EINVAL;
+       }
+
+       /* Allocate the path information structure */
+       pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+       if (!pi) {
+               *error = "queue-length ps: Error allocating path information";
+               return -ENOMEM;
+       }
+
+       pi->path = path;
+       pi->repeat_count = repeat_count;
+       atomic_set(&pi->qlen, 0);
+
+       path->pscontext = pi;
+
+       list_add_tail(&pi->list, &s->valid_paths);
+
+       return 0;
+}
+
+static void ql_fail_path(struct path_selector *ps, struct dm_path *path)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = path->pscontext;
+
+       list_move(&pi->list, &s->failed_paths);
+}
+
+static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = path->pscontext;
+
+       list_move_tail(&pi->list, &s->valid_paths);
+
+       return 0;
+}
+
+/*
+ * Select a path having the minimum number of in-flight I/Os
+ */
+static struct dm_path *ql_select_path(struct path_selector *ps,
+                                     unsigned *repeat_count, size_t nr_bytes)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = NULL, *best = NULL;
+
+       if (list_empty(&s->valid_paths))
+               return NULL;
+
+       /* Change preferred (first in list) path to evenly balance. */
+       list_move_tail(s->valid_paths.next, &s->valid_paths);
+
+       list_for_each_entry(pi, &s->valid_paths, list) {
+               if (!best ||
+                   (atomic_read(&pi->qlen) < atomic_read(&best->qlen)))
+                       best = pi;
+
+               if (!atomic_read(&best->qlen))
+                       break;
+       }
+
+       if (!best)
+               return NULL;
+
+       *repeat_count = best->repeat_count;
+
+       return best->path;
+}
+
+static int ql_start_io(struct path_selector *ps, struct dm_path *path,
+                      size_t nr_bytes)
+{
+       struct path_info *pi = path->pscontext;
+
+       atomic_inc(&pi->qlen);
+
+       return 0;
+}
+
+static int ql_end_io(struct path_selector *ps, struct dm_path *path,
+                    size_t nr_bytes)
+{
+       struct path_info *pi = path->pscontext;
+
+       atomic_dec(&pi->qlen);
+
+       return 0;
+}
+
+static struct path_selector_type ql_ps = {
+       .name           = "queue-length",
+       .module         = THIS_MODULE,
+       .table_args     = 1,
+       .info_args      = 1,
+       .create         = ql_create,
+       .destroy        = ql_destroy,
+       .status         = ql_status,
+       .add_path       = ql_add_path,
+       .fail_path      = ql_fail_path,
+       .reinstate_path = ql_reinstate_path,
+       .select_path    = ql_select_path,
+       .start_io       = ql_start_io,
+       .end_io         = ql_end_io,
+};
+
+static int __init dm_ql_init(void)
+{
+       int r = dm_register_path_selector(&ql_ps);
+
+       if (r < 0)
+               DMERR("register failed %d", r);
+
+       DMINFO("version " QL_VERSION " loaded");
+
+       return r;
+}
+
+static void __exit dm_ql_exit(void)
+{
+       int r = dm_unregister_path_selector(&ql_ps);
+
+       if (r < 0)
+               DMERR("unregister failed %d", r);
+}
+
+module_init(dm_ql_init);
+module_exit(dm_ql_exit);
+
+MODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>");
+MODULE_DESCRIPTION(
+       "(C) Copyright IBM Corp. 2004,2005   All Rights Reserved.\n"
+       DM_NAME " path selector to balance the number of in-flight I/Os"
+);
+MODULE_LICENSE("GPL");
index 076fbb4e967a4a651116c029983237d942b3f1ac..ce8868c768cce1c411d3ba76d4eae5b9011bee64 100644 (file)
@@ -1283,9 +1283,23 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
        return 0;
 }
 
+static int mirror_iterate_devices(struct dm_target *ti,
+                                 iterate_devices_callout_fn fn, void *data)
+{
+       struct mirror_set *ms = ti->private;
+       int ret = 0;
+       unsigned i;
+
+       for (i = 0; !ret && i < ms->nr_mirrors; i++)
+               ret = fn(ti, ms->mirror[i].dev,
+                        ms->mirror[i].offset, data);
+
+       return ret;
+}
+
 static struct target_type mirror_target = {
        .name    = "mirror",
-       .version = {1, 0, 20},
+       .version = {1, 12, 0},
        .module  = THIS_MODULE,
        .ctr     = mirror_ctr,
        .dtr     = mirror_dtr,
@@ -1295,6 +1309,7 @@ static struct target_type mirror_target = {
        .postsuspend = mirror_postsuspend,
        .resume  = mirror_resume,
        .status  = mirror_status,
+       .iterate_devices = mirror_iterate_devices,
 };
 
 static int __init dm_mirror_init(void)
index 7b899be0b0873d66499a4f3120ba3245cc828908..36dbe29f2fd60e775b05a0c0562e4741a42a8d08 100644 (file)
@@ -283,7 +283,7 @@ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
 
        nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
        if (unlikely(!nreg))
-               nreg = kmalloc(sizeof(*nreg), GFP_NOIO);
+               nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
 
        nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
                      DM_RH_CLEAN : DM_RH_NOSYNC;
index cdfbf65b28cb0fb5d4af95d9e6718d80d69be760..24752f449bef8834b8ebb0329293491bdf4f07ed 100644 (file)
@@ -161,7 +161,7 @@ static int rr_reinstate_path(struct path_selector *ps, struct dm_path *p)
 }
 
 static struct dm_path *rr_select_path(struct path_selector *ps,
-                                  unsigned *repeat_count)
+                                     unsigned *repeat_count, size_t nr_bytes)
 {
        struct selector *s = (struct selector *) ps->context;
        struct path_info *pi = NULL;
diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-service-time.c
new file mode 100644 (file)
index 0000000..cfa668f
--- /dev/null
@@ -0,0 +1,339 @@
+/*
+ * Copyright (C) 2007-2009 NEC Corporation.  All Rights Reserved.
+ *
+ * Module Author: Kiyoshi Ueda
+ *
+ * This file is released under the GPL.
+ *
+ * Throughput oriented path selector.
+ */
+
+#include "dm.h"
+#include "dm-path-selector.h"
+
+#define DM_MSG_PREFIX  "multipath service-time"
+#define ST_MIN_IO      1
+#define ST_MAX_RELATIVE_THROUGHPUT     100
+#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT       7
+#define ST_MAX_INFLIGHT_SIZE   ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
+#define ST_VERSION     "0.2.0"
+
+struct selector {
+       struct list_head valid_paths;
+       struct list_head failed_paths;
+};
+
+struct path_info {
+       struct list_head list;
+       struct dm_path *path;
+       unsigned repeat_count;
+       unsigned relative_throughput;
+       atomic_t in_flight_size;        /* Total size of in-flight I/Os */
+};
+
+static struct selector *alloc_selector(void)
+{
+       struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+       if (s) {
+               INIT_LIST_HEAD(&s->valid_paths);
+               INIT_LIST_HEAD(&s->failed_paths);
+       }
+
+       return s;
+}
+
+static int st_create(struct path_selector *ps, unsigned argc, char **argv)
+{
+       struct selector *s = alloc_selector();
+
+       if (!s)
+               return -ENOMEM;
+
+       ps->context = s;
+       return 0;
+}
+
+static void free_paths(struct list_head *paths)
+{
+       struct path_info *pi, *next;
+
+       list_for_each_entry_safe(pi, next, paths, list) {
+               list_del(&pi->list);
+               kfree(pi);
+       }
+}
+
+static void st_destroy(struct path_selector *ps)
+{
+       struct selector *s = ps->context;
+
+       free_paths(&s->valid_paths);
+       free_paths(&s->failed_paths);
+       kfree(s);
+       ps->context = NULL;
+}
+
+static int st_status(struct path_selector *ps, struct dm_path *path,
+                    status_type_t type, char *result, unsigned maxlen)
+{
+       unsigned sz = 0;
+       struct path_info *pi;
+
+       if (!path)
+               DMEMIT("0 ");
+       else {
+               pi = path->pscontext;
+
+               switch (type) {
+               case STATUSTYPE_INFO:
+                       DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
+                              pi->relative_throughput);
+                       break;
+               case STATUSTYPE_TABLE:
+                       DMEMIT("%u %u ", pi->repeat_count,
+                              pi->relative_throughput);
+                       break;
+               }
+       }
+
+       return sz;
+}
+
+static int st_add_path(struct path_selector *ps, struct dm_path *path,
+                      int argc, char **argv, char **error)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi;
+       unsigned repeat_count = ST_MIN_IO;
+       unsigned relative_throughput = 1;
+
+       /*
+        * Arguments: [<repeat_count> [<relative_throughput>]]
+        *      <repeat_count>: The number of I/Os before switching path.
+        *                      If not given, default (ST_MIN_IO) is used.
+        *      <relative_throughput>: The relative throughput value of
+        *                      the path among all paths in the path-group.
+        *                      The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
+        *                      If not given, minimum value '1' is used.
+        *                      If '0' is given, the path isn't selected while
+        *                      other paths having a positive value are
+        *                      available.
+        */
+       if (argc > 2) {
+               *error = "service-time ps: incorrect number of arguments";
+               return -EINVAL;
+       }
+
+       if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
+               *error = "service-time ps: invalid repeat count";
+               return -EINVAL;
+       }
+
+       if ((argc == 2) &&
+           (sscanf(argv[1], "%u", &relative_throughput) != 1 ||
+            relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
+               *error = "service-time ps: invalid relative_throughput value";
+               return -EINVAL;
+       }
+
+       /* allocate the path */
+       pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+       if (!pi) {
+               *error = "service-time ps: Error allocating path context";
+               return -ENOMEM;
+       }
+
+       pi->path = path;
+       pi->repeat_count = repeat_count;
+       pi->relative_throughput = relative_throughput;
+       atomic_set(&pi->in_flight_size, 0);
+
+       path->pscontext = pi;
+
+       list_add_tail(&pi->list, &s->valid_paths);
+
+       return 0;
+}
+
+static void st_fail_path(struct path_selector *ps, struct dm_path *path)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = path->pscontext;
+
+       list_move(&pi->list, &s->failed_paths);
+}
+
+static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = path->pscontext;
+
+       list_move_tail(&pi->list, &s->valid_paths);
+
+       return 0;
+}
+
+/*
+ * Compare the estimated service time of 2 paths, pi1 and pi2,
+ * for the incoming I/O.
+ *
+ * Returns:
+ * < 0 : pi1 is better
+ * 0   : no difference between pi1 and pi2
+ * > 0 : pi2 is better
+ *
+ * Description:
+ * Basically, the service time is estimated by:
+ *     ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
+ * To reduce the calculation, some optimizations are made.
+ * (See comments inline)
+ */
+static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
+                          size_t incoming)
+{
+       size_t sz1, sz2, st1, st2;
+
+       sz1 = atomic_read(&pi1->in_flight_size);
+       sz2 = atomic_read(&pi2->in_flight_size);
+
+       /*
+        * Case 1: Both have same throughput value. Choose less loaded path.
+        */
+       if (pi1->relative_throughput == pi2->relative_throughput)
+               return sz1 - sz2;
+
+       /*
+        * Case 2a: Both have same load. Choose higher throughput path.
+        * Case 2b: One path has no throughput value. Choose the other one.
+        */
+       if (sz1 == sz2 ||
+           !pi1->relative_throughput || !pi2->relative_throughput)
+               return pi2->relative_throughput - pi1->relative_throughput;
+
+       /*
+        * Case 3: Calculate service time. Choose faster path.
+        *         Service time using pi1:
+        *             st1 = (sz1 + incoming) / pi1->relative_throughput
+        *         Service time using pi2:
+        *             st2 = (sz2 + incoming) / pi2->relative_throughput
+        *
+        *         To avoid the division, transform the expression to use
+        *         multiplication.
+        *         Because ->relative_throughput > 0 here, if st1 < st2,
+        *         the expressions below are the same meaning:
+        *             (sz1 + incoming) / pi1->relative_throughput <
+        *                 (sz2 + incoming) / pi2->relative_throughput
+        *             (sz1 + incoming) * pi2->relative_throughput <
+        *                 (sz2 + incoming) * pi1->relative_throughput
+        *         So use the later one.
+        */
+       sz1 += incoming;
+       sz2 += incoming;
+       if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
+                    sz2 >= ST_MAX_INFLIGHT_SIZE)) {
+               /*
+                * Size may be too big for multiplying pi->relative_throughput
+                * and overflow.
+                * To avoid the overflow and mis-selection, shift down both.
+                */
+               sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
+               sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
+       }
+       st1 = sz1 * pi2->relative_throughput;
+       st2 = sz2 * pi1->relative_throughput;
+       if (st1 != st2)
+               return st1 - st2;
+
+       /*
+        * Case 4: Service time is equal. Choose higher throughput path.
+        */
+       return pi2->relative_throughput - pi1->relative_throughput;
+}
+
+static struct dm_path *st_select_path(struct path_selector *ps,
+                                     unsigned *repeat_count, size_t nr_bytes)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = NULL, *best = NULL;
+
+       if (list_empty(&s->valid_paths))
+               return NULL;
+
+       /* Change preferred (first in list) path to evenly balance. */
+       list_move_tail(s->valid_paths.next, &s->valid_paths);
+
+       list_for_each_entry(pi, &s->valid_paths, list)
+               if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
+                       best = pi;
+
+       if (!best)
+               return NULL;
+
+       *repeat_count = best->repeat_count;
+
+       return best->path;
+}
+
+static int st_start_io(struct path_selector *ps, struct dm_path *path,
+                      size_t nr_bytes)
+{
+       struct path_info *pi = path->pscontext;
+
+       atomic_add(nr_bytes, &pi->in_flight_size);
+
+       return 0;
+}
+
+static int st_end_io(struct path_selector *ps, struct dm_path *path,
+                    size_t nr_bytes)
+{
+       struct path_info *pi = path->pscontext;
+
+       atomic_sub(nr_bytes, &pi->in_flight_size);
+
+       return 0;
+}
+
+static struct path_selector_type st_ps = {
+       .name           = "service-time",
+       .module         = THIS_MODULE,
+       .table_args     = 2,
+       .info_args      = 2,
+       .create         = st_create,
+       .destroy        = st_destroy,
+       .status         = st_status,
+       .add_path       = st_add_path,
+       .fail_path      = st_fail_path,
+       .reinstate_path = st_reinstate_path,
+       .select_path    = st_select_path,
+       .start_io       = st_start_io,
+       .end_io         = st_end_io,
+};
+
+static int __init dm_st_init(void)
+{
+       int r = dm_register_path_selector(&st_ps);
+
+       if (r < 0)
+               DMERR("register failed %d", r);
+
+       DMINFO("version " ST_VERSION " loaded");
+
+       return r;
+}
+
+static void __exit dm_st_exit(void)
+{
+       int r = dm_unregister_path_selector(&st_ps);
+
+       if (r < 0)
+               DMERR("unregister failed %d", r);
+}
+
+module_init(dm_st_init);
+module_exit(dm_st_exit);
+
+MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
+MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
+MODULE_LICENSE("GPL");
index 2662a41337e7813b370df2d61dbfc70e964e066a..6e3fe4f14934811ddd0f9bba845ebdd4bb504036 100644 (file)
@@ -636,7 +636,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
        /*
         * Commit exceptions to disk.
         */
-       if (ps->valid && area_io(ps, WRITE))
+       if (ps->valid && area_io(ps, WRITE_BARRIER))
                ps->valid = 0;
 
        /*
index d73f17fc777823968e4ab93d5a65dc30255233fc..d573165cd2b788968f7d274e03966b96c93da889 100644 (file)
@@ -678,6 +678,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        ti->private = s;
        ti->split_io = s->store->chunk_size;
+       ti->num_flush_requests = 1;
 
        return 0;
 
@@ -1030,6 +1031,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
        chunk_t chunk;
        struct dm_snap_pending_exception *pe = NULL;
 
+       if (unlikely(bio_empty_barrier(bio))) {
+               bio->bi_bdev = s->store->cow->bdev;
+               return DM_MAPIO_REMAPPED;
+       }
+
        chunk = sector_to_chunk(s->store, bio->bi_sector);
 
        /* Full snapshots are not usable */
@@ -1338,6 +1344,8 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        ti->private = dev;
+       ti->num_flush_requests = 1;
+
        return 0;
 }
 
@@ -1353,6 +1361,9 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
        struct dm_dev *dev = ti->private;
        bio->bi_bdev = dev->bdev;
 
+       if (unlikely(bio_empty_barrier(bio)))
+               return DM_MAPIO_REMAPPED;
+
        /* Only tell snapshots if this is a write */
        return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
 }
index 41569bc60abc9dfe2d8986beba23099ffd068d31..b240e85ae39aa4b444135b0a71823e35b03aac49 100644 (file)
@@ -167,6 +167,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        sc->stripes = stripes;
        sc->stripe_width = width;
        ti->split_io = chunk_size;
+       ti->num_flush_requests = stripes;
 
        sc->chunk_mask = ((sector_t) chunk_size) - 1;
        for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
@@ -211,10 +212,18 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
                      union map_info *map_context)
 {
        struct stripe_c *sc = (struct stripe_c *) ti->private;
+       sector_t offset, chunk;
+       uint32_t stripe;
 
-       sector_t offset = bio->bi_sector - ti->begin;
-       sector_t chunk = offset >> sc->chunk_shift;
-       uint32_t stripe = sector_div(chunk, sc->stripes);
+       if (unlikely(bio_empty_barrier(bio))) {
+               BUG_ON(map_context->flush_request >= sc->stripes);
+               bio->bi_bdev = sc->stripe[map_context->flush_request].dev->bdev;
+               return DM_MAPIO_REMAPPED;
+       }
+
+       offset = bio->bi_sector - ti->begin;
+       chunk = offset >> sc->chunk_shift;
+       stripe = sector_div(chunk, sc->stripes);
 
        bio->bi_bdev = sc->stripe[stripe].dev->bdev;
        bio->bi_sector = sc->stripe[stripe].physical_start +
@@ -304,15 +313,31 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
        return error;
 }
 
+static int stripe_iterate_devices(struct dm_target *ti,
+                                 iterate_devices_callout_fn fn, void *data)
+{
+       struct stripe_c *sc = ti->private;
+       int ret = 0;
+       unsigned i = 0;
+
+       do
+               ret = fn(ti, sc->stripe[i].dev,
+                        sc->stripe[i].physical_start, data);
+       while (!ret && ++i < sc->stripes);
+
+       return ret;
+}
+
 static struct target_type stripe_target = {
        .name   = "striped",
-       .version = {1, 1, 0},
+       .version = {1, 2, 0},
        .module = THIS_MODULE,
        .ctr    = stripe_ctr,
        .dtr    = stripe_dtr,
        .map    = stripe_map,
        .end_io = stripe_end_io,
        .status = stripe_status,
+       .iterate_devices = stripe_iterate_devices,
 };
 
 int __init dm_stripe_init(void)
index a2a45e6c7c8bb540fc0b3d28ab8c2795cca00c8c..4b045903a4e21e6ce6e0c06593c7ed7e867a13b1 100644 (file)
@@ -57,12 +57,21 @@ static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
        return strlen(buf);
 }
 
+static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
+{
+       sprintf(buf, "%d\n", dm_suspended(md));
+
+       return strlen(buf);
+}
+
 static DM_ATTR_RO(name);
 static DM_ATTR_RO(uuid);
+static DM_ATTR_RO(suspended);
 
 static struct attribute *dm_attrs[] = {
        &dm_attr_name.attr,
        &dm_attr_uuid.attr,
+       &dm_attr_suspended.attr,
        NULL,
 };
 
index e9a73bb242b0935eefae2ce0744ed97965a2143d..4899ebe767c86d342853397e23530cb3b0ea9eea 100644 (file)
@@ -41,6 +41,7 @@
 struct dm_table {
        struct mapped_device *md;
        atomic_t holders;
+       unsigned type;
 
        /* btree table */
        unsigned int depth;
@@ -62,15 +63,11 @@ struct dm_table {
        /* a list of devices used by this table */
        struct list_head devices;
 
-       /*
-        * These are optimistic limits taken from all the
-        * targets, some targets will need smaller limits.
-        */
-       struct io_restrictions limits;
-
        /* events get handed up using this callback */
        void (*event_fn)(void *);
        void *event_context;
+
+       struct dm_md_mempools *mempools;
 };
 
 /*
@@ -88,43 +85,6 @@ static unsigned int int_log(unsigned int n, unsigned int base)
        return result;
 }
 
-/*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
-/*
- * Combine two io_restrictions, always taking the lower value.
- */
-static void combine_restrictions_low(struct io_restrictions *lhs,
-                                    struct io_restrictions *rhs)
-{
-       lhs->max_sectors =
-               min_not_zero(lhs->max_sectors, rhs->max_sectors);
-
-       lhs->max_phys_segments =
-               min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
-
-       lhs->max_hw_segments =
-               min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
-
-       lhs->logical_block_size = max(lhs->logical_block_size,
-                                     rhs->logical_block_size);
-
-       lhs->max_segment_size =
-               min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
-
-       lhs->max_hw_sectors =
-               min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
-
-       lhs->seg_boundary_mask =
-               min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
-
-       lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
-
-       lhs->no_cluster |= rhs->no_cluster;
-}
-
 /*
  * Calculate the index of the child node of the n'th node k'th key.
  */
@@ -267,6 +227,8 @@ static void free_devices(struct list_head *devices)
        list_for_each_safe(tmp, next, devices) {
                struct dm_dev_internal *dd =
                    list_entry(tmp, struct dm_dev_internal, list);
+               DMWARN("dm_table_destroy: dm_put_device call missing for %s",
+                      dd->dm_dev.name);
                kfree(dd);
        }
 }
@@ -296,12 +258,10 @@ void dm_table_destroy(struct dm_table *t)
        vfree(t->highs);
 
        /* free the device list */
-       if (t->devices.next != &t->devices) {
-               DMWARN("devices still present during destroy: "
-                      "dm_table_remove_device calls missing");
-
+       if (t->devices.next != &t->devices)
                free_devices(&t->devices);
-       }
+
+       dm_free_md_mempools(t->mempools);
 
        kfree(t);
 }
@@ -385,15 +345,48 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
 /*
  * If possible, this checks an area of a destination device is valid.
  */
-static int check_device_area(struct dm_dev_internal *dd, sector_t start,
-                            sector_t len)
+static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
+                               sector_t start, void *data)
 {
-       sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT;
+       struct queue_limits *limits = data;
+       struct block_device *bdev = dev->bdev;
+       sector_t dev_size =
+               i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
+       unsigned short logical_block_size_sectors =
+               limits->logical_block_size >> SECTOR_SHIFT;
+       char b[BDEVNAME_SIZE];
 
        if (!dev_size)
                return 1;
 
-       return ((start < dev_size) && (len <= (dev_size - start)));
+       if ((start >= dev_size) || (start + ti->len > dev_size)) {
+               DMWARN("%s: %s too small for target",
+                      dm_device_name(ti->table->md), bdevname(bdev, b));
+               return 0;
+       }
+
+       if (logical_block_size_sectors <= 1)
+               return 1;
+
+       if (start & (logical_block_size_sectors - 1)) {
+               DMWARN("%s: start=%llu not aligned to h/w "
+                      "logical block size %hu of %s",
+                      dm_device_name(ti->table->md),
+                      (unsigned long long)start,
+                      limits->logical_block_size, bdevname(bdev, b));
+               return 0;
+       }
+
+       if (ti->len & (logical_block_size_sectors - 1)) {
+               DMWARN("%s: len=%llu not aligned to h/w "
+                      "logical block size %hu of %s",
+                      dm_device_name(ti->table->md),
+                      (unsigned long long)ti->len,
+                      limits->logical_block_size, bdevname(bdev, b));
+               return 0;
+       }
+
+       return 1;
 }
 
 /*
@@ -479,38 +472,32 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
        }
        atomic_inc(&dd->count);
 
-       if (!check_device_area(dd, start, len)) {
-               DMWARN("device %s too small for target", path);
-               dm_put_device(ti, &dd->dm_dev);
-               return -EINVAL;
-       }
-
        *result = &dd->dm_dev;
-
        return 0;
 }
 
-void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
+/*
+ * Returns the minimum that is _not_ zero, unless both are zero.
+ */
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+
+int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+                        sector_t start, void *data)
 {
+       struct queue_limits *limits = data;
+       struct block_device *bdev = dev->bdev;
        struct request_queue *q = bdev_get_queue(bdev);
-       struct io_restrictions *rs = &ti->limits;
        char b[BDEVNAME_SIZE];
 
        if (unlikely(!q)) {
                DMWARN("%s: Cannot set limits for nonexistent device %s",
                       dm_device_name(ti->table->md), bdevname(bdev, b));
-               return;
+               return 0;
        }
 
-       /*
-        * Combine the device limits low.
-        *
-        * FIXME: if we move an io_restriction struct
-        *        into q this would just be a call to
-        *        combine_restrictions_low()
-        */
-       rs->max_sectors =
-               min_not_zero(rs->max_sectors, queue_max_sectors(q));
+       if (blk_stack_limits(limits, &q->limits, start) < 0)
+               DMWARN("%s: target device %s is misaligned",
+                      dm_device_name(ti->table->md), bdevname(bdev, b));
 
        /*
         * Check if merge fn is supported.
@@ -519,48 +506,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
         */
 
        if (q->merge_bvec_fn && !ti->type->merge)
-               rs->max_sectors =
-                       min_not_zero(rs->max_sectors,
+               limits->max_sectors =
+                       min_not_zero(limits->max_sectors,
                                     (unsigned int) (PAGE_SIZE >> 9));
-
-       rs->max_phys_segments =
-               min_not_zero(rs->max_phys_segments,
-                            queue_max_phys_segments(q));
-
-       rs->max_hw_segments =
-               min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
-
-       rs->logical_block_size = max(rs->logical_block_size,
-                                    queue_logical_block_size(q));
-
-       rs->max_segment_size =
-               min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
-
-       rs->max_hw_sectors =
-               min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
-
-       rs->seg_boundary_mask =
-               min_not_zero(rs->seg_boundary_mask,
-                            queue_segment_boundary(q));
-
-       rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
-
-       rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
 int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
                  sector_t len, fmode_t mode, struct dm_dev **result)
 {
-       int r = __table_get_device(ti->table, ti, path,
-                                  start, len, mode, result);
-
-       if (!r)
-               dm_set_device_limits(ti, (*result)->bdev);
-
-       return r;
+       return __table_get_device(ti->table, ti, path,
+                                 start, len, mode, result);
 }
 
+
 /*
  * Decrement a devices use count and remove it if necessary.
  */
@@ -675,24 +635,78 @@ int dm_split_args(int *argc, char ***argvp, char *input)
        return 0;
 }
 
-static void check_for_valid_limits(struct io_restrictions *rs)
+/*
+ * Impose necessary and sufficient conditions on a devices's table such
+ * that any incoming bio which respects its logical_block_size can be
+ * processed successfully.  If it falls across the boundary between
+ * two or more targets, the size of each piece it gets split into must
+ * be compatible with the logical_block_size of the target processing it.
+ */
+static int validate_hardware_logical_block_alignment(struct dm_table *table,
+                                                struct queue_limits *limits)
 {
-       if (!rs->max_sectors)
-               rs->max_sectors = SAFE_MAX_SECTORS;
-       if (!rs->max_hw_sectors)
-               rs->max_hw_sectors = SAFE_MAX_SECTORS;
-       if (!rs->max_phys_segments)
-               rs->max_phys_segments = MAX_PHYS_SEGMENTS;
-       if (!rs->max_hw_segments)
-               rs->max_hw_segments = MAX_HW_SEGMENTS;
-       if (!rs->logical_block_size)
-               rs->logical_block_size = 1 << SECTOR_SHIFT;
-       if (!rs->max_segment_size)
-               rs->max_segment_size = MAX_SEGMENT_SIZE;
-       if (!rs->seg_boundary_mask)
-               rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-       if (!rs->bounce_pfn)
-               rs->bounce_pfn = -1;
+       /*
+        * This function uses arithmetic modulo the logical_block_size
+        * (in units of 512-byte sectors).
+        */
+       unsigned short device_logical_block_size_sects =
+               limits->logical_block_size >> SECTOR_SHIFT;
+
+       /*
+        * Offset of the start of the next table entry, mod logical_block_size.
+        */
+       unsigned short next_target_start = 0;
+
+       /*
+        * Given an aligned bio that extends beyond the end of a
+        * target, how many sectors must the next target handle?
+        */
+       unsigned short remaining = 0;
+
+       struct dm_target *uninitialized_var(ti);
+       struct queue_limits ti_limits;
+       unsigned i = 0;
+
+       /*
+        * Check each entry in the table in turn.
+        */
+       while (i < dm_table_get_num_targets(table)) {
+               ti = dm_table_get_target(table, i++);
+
+               blk_set_default_limits(&ti_limits);
+
+               /* combine all target devices' limits */
+               if (ti->type->iterate_devices)
+                       ti->type->iterate_devices(ti, dm_set_device_limits,
+                                                 &ti_limits);
+
+               /*
+                * If the remaining sectors fall entirely within this
+                * table entry are they compatible with its logical_block_size?
+                */
+               if (remaining < ti->len &&
+                   remaining & ((ti_limits.logical_block_size >>
+                                 SECTOR_SHIFT) - 1))
+                       break;  /* Error */
+
+               next_target_start =
+                   (unsigned short) ((next_target_start + ti->len) &
+                                     (device_logical_block_size_sects - 1));
+               remaining = next_target_start ?
+                   device_logical_block_size_sects - next_target_start : 0;
+       }
+
+       if (remaining) {
+               DMWARN("%s: table line %u (start sect %llu len %llu) "
+                      "not aligned to h/w logical block size %hu",
+                      dm_device_name(table->md), i,
+                      (unsigned long long) ti->begin,
+                      (unsigned long long) ti->len,
+                      limits->logical_block_size);
+               return -EINVAL;
+       }
+
+       return 0;
 }
 
 int dm_table_add_target(struct dm_table *t, const char *type,
@@ -747,9 +761,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
        t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-       /* FIXME: the plan is to combine high here and then have
-        * the merge fn apply the target level restrictions. */
-       combine_restrictions_low(&t->limits, &tgt->limits);
        return 0;
 
  bad:
@@ -758,6 +769,104 @@ int dm_table_add_target(struct dm_table *t, const char *type,
        return r;
 }
 
+int dm_table_set_type(struct dm_table *t)
+{
+       unsigned i;
+       unsigned bio_based = 0, request_based = 0;
+       struct dm_target *tgt;
+       struct dm_dev_internal *dd;
+       struct list_head *devices;
+
+       for (i = 0; i < t->num_targets; i++) {
+               tgt = t->targets + i;
+               if (dm_target_request_based(tgt))
+                       request_based = 1;
+               else
+                       bio_based = 1;
+
+               if (bio_based && request_based) {
+                       DMWARN("Inconsistent table: different target types"
+                              " can't be mixed up");
+                       return -EINVAL;
+               }
+       }
+
+       if (bio_based) {
+               /* We must use this table as bio-based */
+               t->type = DM_TYPE_BIO_BASED;
+               return 0;
+       }
+
+       BUG_ON(!request_based); /* No targets in this table */
+
+       /* Non-request-stackable devices can't be used for request-based dm */
+       devices = dm_table_get_devices(t);
+       list_for_each_entry(dd, devices, list) {
+               if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
+                       DMWARN("table load rejected: including"
+                              " non-request-stackable devices");
+                       return -EINVAL;
+               }
+       }
+
+       /*
+        * Request-based dm supports only tables that have a single target now.
+        * To support multiple targets, request splitting support is needed,
+        * and that needs lots of changes in the block-layer.
+        * (e.g. request completion process for partial completion.)
+        */
+       if (t->num_targets > 1) {
+               DMWARN("Request-based dm doesn't support multiple targets yet");
+               return -EINVAL;
+       }
+
+       t->type = DM_TYPE_REQUEST_BASED;
+
+       return 0;
+}
+
+unsigned dm_table_get_type(struct dm_table *t)
+{
+       return t->type;
+}
+
+bool dm_table_bio_based(struct dm_table *t)
+{
+       return dm_table_get_type(t) == DM_TYPE_BIO_BASED;
+}
+
+bool dm_table_request_based(struct dm_table *t)
+{
+       return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
+}
+
+int dm_table_alloc_md_mempools(struct dm_table *t)
+{
+       unsigned type = dm_table_get_type(t);
+
+       if (unlikely(type == DM_TYPE_NONE)) {
+               DMWARN("no table type is set, can't allocate mempools");
+               return -EINVAL;
+       }
+
+       t->mempools = dm_alloc_md_mempools(type);
+       if (!t->mempools)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void dm_table_free_md_mempools(struct dm_table *t)
+{
+       dm_free_md_mempools(t->mempools);
+       t->mempools = NULL;
+}
+
+struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
+{
+       return t->mempools;
+}
+
 static int setup_indexes(struct dm_table *t)
 {
        int i;
@@ -792,8 +901,6 @@ int dm_table_complete(struct dm_table *t)
        int r = 0;
        unsigned int leaf_nodes;
 
-       check_for_valid_limits(&t->limits);
-
        /* how many indexes will the btree have ? */
        leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
        t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -868,6 +975,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
        return &t->targets[(KEYS_PER_NODE * n) + k];
 }
 
+/*
+ * Establish the new table's queue_limits and validate them.
+ */
+int dm_calculate_queue_limits(struct dm_table *table,
+                             struct queue_limits *limits)
+{
+       struct dm_target *uninitialized_var(ti);
+       struct queue_limits ti_limits;
+       unsigned i = 0;
+
+       blk_set_default_limits(limits);
+
+       while (i < dm_table_get_num_targets(table)) {
+               blk_set_default_limits(&ti_limits);
+
+               ti = dm_table_get_target(table, i++);
+
+               if (!ti->type->iterate_devices)
+                       goto combine_limits;
+
+               /*
+                * Combine queue limits of all the devices this target uses.
+                */
+               ti->type->iterate_devices(ti, dm_set_device_limits,
+                                         &ti_limits);
+
+               /*
+                * Check each device area is consistent with the target's
+                * overall queue limits.
+                */
+               if (!ti->type->iterate_devices(ti, device_area_is_valid,
+                                              &ti_limits))
+                       return -EINVAL;
+
+combine_limits:
+               /*
+                * Merge this target's queue limits into the overall limits
+                * for the table.
+                */
+               if (blk_stack_limits(limits, &ti_limits, 0) < 0)
+                       DMWARN("%s: target device "
+                              "(start sect %llu len %llu) "
+                              "is misaligned",
+                              dm_device_name(table->md),
+                              (unsigned long long) ti->begin,
+                              (unsigned long long) ti->len);
+       }
+
+       return validate_hardware_logical_block_alignment(table, limits);
+}
+
 /*
  * Set the integrity profile for this device if all devices used have
  * matching profiles.
@@ -907,27 +1065,42 @@ no_integrity:
        return;
 }
 
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
+void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+                              struct queue_limits *limits)
 {
        /*
-        * Make sure we obey the optimistic sub devices
-        * restrictions.
+        * Each target device in the table has a data area that should normally
+        * be aligned such that the DM device's alignment_offset is 0.
+        * FIXME: Propagate alignment_offsets up the stack and warn of
+        *        sub-optimal or inconsistent settings.
+        */
+       limits->alignment_offset = 0;
+       limits->misaligned = 0;
+
+       /*
+        * Copy table's limits to the DM device's request_queue
         */
-       blk_queue_max_sectors(q, t->limits.max_sectors);
-       blk_queue_max_phys_segments(q, t->limits.max_phys_segments);
-       blk_queue_max_hw_segments(q, t->limits.max_hw_segments);
-       blk_queue_logical_block_size(q, t->limits.logical_block_size);
-       blk_queue_max_segment_size(q, t->limits.max_segment_size);
-       blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
-       blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
-       blk_queue_bounce_limit(q, t->limits.bounce_pfn);
-
-       if (t->limits.no_cluster)
+       q->limits = *limits;
+
+       if (limits->no_cluster)
                queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
        else
                queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
 
        dm_table_set_integrity(t);
+
+       /*
+        * QUEUE_FLAG_STACKABLE must be set after all queue settings are
+        * visible to other CPUs because, once the flag is set, incoming bios
+        * are processed by request-based dm, which refers to the queue
+        * settings.
+        * Until the flag set, bios are passed to bio-based dm and queued to
+        * md->deferred where queue settings are not needed yet.
+        * Those bios are passed to request-based dm at the resume time.
+        */
+       smp_mb();
+       if (dm_table_request_based(t))
+               queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -1023,6 +1196,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
        return r;
 }
 
+int dm_table_any_busy_target(struct dm_table *t)
+{
+       unsigned i;
+       struct dm_target *ti;
+
+       for (i = 0; i < t->num_targets; i++) {
+               ti = t->targets + i;
+               if (ti->type->busy && ti->type->busy(ti))
+                       return 1;
+       }
+
+       return 0;
+}
+
 void dm_table_unplug_all(struct dm_table *t)
 {
        struct dm_dev_internal *dd;
index 48db308fae67103b221eeb1c10e4411560b88135..3c6d4ee8921d23ae0e63850ec47ca7185ef93953 100644 (file)
 
 #define DM_MSG_PREFIX "core"
 
+/*
+ * Cookies are numeric values sent with CHANGE and REMOVE
+ * uevents while resuming, removing or renaming the device.
+ */
+#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
+#define DM_COOKIE_LENGTH 24
+
 static const char *_name = DM_NAME;
 
 static unsigned int major = 0;
@@ -71,7 +78,7 @@ struct dm_rq_target_io {
  */
 struct dm_rq_clone_bio_info {
        struct bio *orig;
-       struct request *rq;
+       struct dm_rq_target_io *tio;
 };
 
 union map_info *dm_get_mapinfo(struct bio *bio)
@@ -81,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio)
        return NULL;
 }
 
+union map_info *dm_get_rq_mapinfo(struct request *rq)
+{
+       if (rq && rq->end_io_data)
+               return &((struct dm_rq_target_io *)rq->end_io_data)->info;
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
+
 #define MINOR_ALLOCED ((void *)-1)
 
 /*
@@ -157,13 +172,31 @@ struct mapped_device {
         * freeze/thaw support require holding onto a super block
         */
        struct super_block *frozen_sb;
-       struct block_device *suspended_bdev;
+       struct block_device *bdev;
 
        /* forced geometry settings */
        struct hd_geometry geometry;
 
+       /* marker of flush suspend for request-based dm */
+       struct request suspend_rq;
+
+       /* For saving the address of __make_request for request based dm */
+       make_request_fn *saved_make_request_fn;
+
        /* sysfs handle */
        struct kobject kobj;
+
+       /* zero-length barrier that will be cloned and submitted to targets */
+       struct bio barrier_bio;
+};
+
+/*
+ * For mempools pre-allocation at the table loading time.
+ */
+struct dm_md_mempools {
+       mempool_t *io_pool;
+       mempool_t *tio_pool;
+       struct bio_set *bs;
 };
 
 #define MIN_IOS 256
@@ -391,14 +424,29 @@ static void free_io(struct mapped_device *md, struct dm_io *io)
        mempool_free(io, md->io_pool);
 }
 
-static struct dm_target_io *alloc_tio(struct mapped_device *md)
+static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
 {
-       return mempool_alloc(md->tio_pool, GFP_NOIO);
+       mempool_free(tio, md->tio_pool);
 }
 
-static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
+static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
 {
-       mempool_free(tio, md->tio_pool);
+       return mempool_alloc(md->tio_pool, GFP_ATOMIC);
+}
+
+static void free_rq_tio(struct dm_rq_target_io *tio)
+{
+       mempool_free(tio, tio->md->tio_pool);
+}
+
+static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
+{
+       return mempool_alloc(md->io_pool, GFP_ATOMIC);
+}
+
+static void free_bio_info(struct dm_rq_clone_bio_info *info)
+{
+       mempool_free(info, info->tio->md->io_pool);
 }
 
 static void start_io_acct(struct dm_io *io)
@@ -464,12 +512,13 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
 struct dm_table *dm_get_table(struct mapped_device *md)
 {
        struct dm_table *t;
+       unsigned long flags;
 
-       read_lock(&md->map_lock);
+       read_lock_irqsave(&md->map_lock, flags);
        t = md->map;
        if (t)
                dm_table_get(t);
-       read_unlock(&md->map_lock);
+       read_unlock_irqrestore(&md->map_lock, flags);
 
        return t;
 }
@@ -536,9 +585,11 @@ static void dec_pending(struct dm_io *io, int error)
                         * Target requested pushing back the I/O.
                         */
                        spin_lock_irqsave(&md->deferred_lock, flags);
-                       if (__noflush_suspending(md))
-                               bio_list_add_head(&md->deferred, io->bio);
-                       else
+                       if (__noflush_suspending(md)) {
+                               if (!bio_barrier(io->bio))
+                                       bio_list_add_head(&md->deferred,
+                                                         io->bio);
+                       } else
                                /* noflush suspend was interrupted. */
                                io->error = -EIO;
                        spin_unlock_irqrestore(&md->deferred_lock, flags);
@@ -553,7 +604,8 @@ static void dec_pending(struct dm_io *io, int error)
                         * a per-device variable for error reporting.
                         * Note that you can't touch the bio after end_io_acct
                         */
-                       md->barrier_error = io_error;
+                       if (!md->barrier_error && io_error != -EOPNOTSUPP)
+                               md->barrier_error = io_error;
                        end_io_acct(io);
                } else {
                        end_io_acct(io);
@@ -607,6 +659,262 @@ static void clone_endio(struct bio *bio, int error)
        dec_pending(io, error);
 }
 
+/*
+ * Partial completion handling for request-based dm
+ */
+static void end_clone_bio(struct bio *clone, int error)
+{
+       struct dm_rq_clone_bio_info *info = clone->bi_private;
+       struct dm_rq_target_io *tio = info->tio;
+       struct bio *bio = info->orig;
+       unsigned int nr_bytes = info->orig->bi_size;
+
+       bio_put(clone);
+
+       if (tio->error)
+               /*
+                * An error has already been detected on the request.
+                * Once error occurred, just let clone->end_io() handle
+                * the remainder.
+                */
+               return;
+       else if (error) {
+               /*
+                * Don't notice the error to the upper layer yet.
+                * The error handling decision is made by the target driver,
+                * when the request is completed.
+                */
+               tio->error = error;
+               return;
+       }
+
+       /*
+        * I/O for the bio successfully completed.
+        * Notice the data completion to the upper layer.
+        */
+
+       /*
+        * bios are processed from the head of the list.
+        * So the completing bio should always be rq->bio.
+        * If it's not, something wrong is happening.
+        */
+       if (tio->orig->bio != bio)
+               DMERR("bio completion is going in the middle of the request");
+
+       /*
+        * Update the original request.
+        * Do not use blk_end_request() here, because it may complete
+        * the original request before the clone, and break the ordering.
+        */
+       blk_update_request(tio->orig, 0, nr_bytes);
+}
+
+/*
+ * Don't touch any member of the md after calling this function because
+ * the md may be freed in dm_put() at the end of this function.
+ * Or do dm_get() before calling this function and dm_put() later.
+ */
+static void rq_completed(struct mapped_device *md, int run_queue)
+{
+       int wakeup_waiters = 0;
+       struct request_queue *q = md->queue;
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       if (!queue_in_flight(q))
+               wakeup_waiters = 1;
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       /* nudge anyone waiting on suspend queue */
+       if (wakeup_waiters)
+               wake_up(&md->wait);
+
+       if (run_queue)
+               blk_run_queue(q);
+
+       /*
+        * dm_put() must be at the end of this function. See the comment above
+        */
+       dm_put(md);
+}
+
+static void dm_unprep_request(struct request *rq)
+{
+       struct request *clone = rq->special;
+       struct dm_rq_target_io *tio = clone->end_io_data;
+
+       rq->special = NULL;
+       rq->cmd_flags &= ~REQ_DONTPREP;
+
+       blk_rq_unprep_clone(clone);
+       free_rq_tio(tio);
+}
+
+/*
+ * Requeue the original request of a clone.
+ */
+void dm_requeue_unmapped_request(struct request *clone)
+{
+       struct dm_rq_target_io *tio = clone->end_io_data;
+       struct mapped_device *md = tio->md;
+       struct request *rq = tio->orig;
+       struct request_queue *q = rq->q;
+       unsigned long flags;
+
+       dm_unprep_request(rq);
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       if (elv_queue_empty(q))
+               blk_plug_device(q);
+       blk_requeue_request(q, rq);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       rq_completed(md, 0);
+}
+EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
+
+static void __stop_queue(struct request_queue *q)
+{
+       blk_stop_queue(q);
+}
+
+static void stop_queue(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       __stop_queue(q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void __start_queue(struct request_queue *q)
+{
+       if (blk_queue_stopped(q))
+               blk_start_queue(q);
+}
+
+static void start_queue(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       __start_queue(q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+/*
+ * Complete the clone and the original request.
+ * Must be called without queue lock.
+ */
+static void dm_end_request(struct request *clone, int error)
+{
+       struct dm_rq_target_io *tio = clone->end_io_data;
+       struct mapped_device *md = tio->md;
+       struct request *rq = tio->orig;
+
+       if (blk_pc_request(rq)) {
+               rq->errors = clone->errors;
+               rq->resid_len = clone->resid_len;
+
+               if (rq->sense)
+                       /*
+                        * We are using the sense buffer of the original
+                        * request.
+                        * So setting the length of the sense data is enough.
+                        */
+                       rq->sense_len = clone->sense_len;
+       }
+
+       BUG_ON(clone->bio);
+       free_rq_tio(tio);
+
+       blk_end_request_all(rq, error);
+
+       rq_completed(md, 1);
+}
+
+/*
+ * Request completion handler for request-based dm
+ */
+static void dm_softirq_done(struct request *rq)
+{
+       struct request *clone = rq->completion_data;
+       struct dm_rq_target_io *tio = clone->end_io_data;
+       dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
+       int error = tio->error;
+
+       if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io)
+               error = rq_end_io(tio->ti, clone, error, &tio->info);
+
+       if (error <= 0)
+               /* The target wants to complete the I/O */
+               dm_end_request(clone, error);
+       else if (error == DM_ENDIO_INCOMPLETE)
+               /* The target will handle the I/O */
+               return;
+       else if (error == DM_ENDIO_REQUEUE)
+               /* The target wants to requeue the I/O */
+               dm_requeue_unmapped_request(clone);
+       else {
+               DMWARN("unimplemented target endio return value: %d", error);
+               BUG();
+       }
+}
+
+/*
+ * Complete the clone and the original request with the error status
+ * through softirq context.
+ */
+static void dm_complete_request(struct request *clone, int error)
+{
+       struct dm_rq_target_io *tio = clone->end_io_data;
+       struct request *rq = tio->orig;
+
+       tio->error = error;
+       rq->completion_data = clone;
+       blk_complete_request(rq);
+}
+
+/*
+ * Complete the not-mapped clone and the original request with the error status
+ * through softirq context.
+ * Target's rq_end_io() function isn't called.
+ * This may be used when the target's map_rq() function fails.
+ */
+void dm_kill_unmapped_request(struct request *clone, int error)
+{
+       struct dm_rq_target_io *tio = clone->end_io_data;
+       struct request *rq = tio->orig;
+
+       rq->cmd_flags |= REQ_FAILED;
+       dm_complete_request(clone, error);
+}
+EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
+
+/*
+ * Called with the queue lock held
+ */
+static void end_clone_request(struct request *clone, int error)
+{
+       /*
+        * For just cleaning up the information of the queue in which
+        * the clone was dispatched.
+        * The clone is *NOT* freed actually here because it is alloced from
+        * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
+        */
+       __blk_put_request(clone->q, clone);
+
+       /*
+        * Actual request completion is done in a softirq context which doesn't
+        * hold the queue lock.  Otherwise, deadlock could occur because:
+        *     - another request may be submitted by the upper level driver
+        *       of the stacking during the completion
+        *     - the submission which requires queue lock may be done
+        *       against this queue
+        */
+       dm_complete_request(clone, error);
+}
+
 static sector_t max_io_len(struct mapped_device *md,
                           sector_t sector, struct dm_target *ti)
 {
@@ -634,11 +942,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
        sector_t sector;
        struct mapped_device *md;
 
-       /*
-        * Sanity checks.
-        */
-       BUG_ON(!clone->bi_size);
-
        clone->bi_end_io = clone_endio;
        clone->bi_private = tio;
 
@@ -752,6 +1055,48 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
        return clone;
 }
 
+static struct dm_target_io *alloc_tio(struct clone_info *ci,
+                                     struct dm_target *ti)
+{
+       struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO);
+
+       tio->io = ci->io;
+       tio->ti = ti;
+       memset(&tio->info, 0, sizeof(tio->info));
+
+       return tio;
+}
+
+static void __flush_target(struct clone_info *ci, struct dm_target *ti,
+                         unsigned flush_nr)
+{
+       struct dm_target_io *tio = alloc_tio(ci, ti);
+       struct bio *clone;
+
+       tio->info.flush_request = flush_nr;
+
+       clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
+       __bio_clone(clone, ci->bio);
+       clone->bi_destructor = dm_bio_destructor;
+
+       __map_bio(ti, clone, tio);
+}
+
+static int __clone_and_map_empty_barrier(struct clone_info *ci)
+{
+       unsigned target_nr = 0, flush_nr;
+       struct dm_target *ti;
+
+       while ((ti = dm_table_get_target(ci->map, target_nr++)))
+               for (flush_nr = 0; flush_nr < ti->num_flush_requests;
+                    flush_nr++)
+                       __flush_target(ci, ti, flush_nr);
+
+       ci->sector_count = 0;
+
+       return 0;
+}
+
 static int __clone_and_map(struct clone_info *ci)
 {
        struct bio *clone, *bio = ci->bio;
@@ -759,6 +1104,9 @@ static int __clone_and_map(struct clone_info *ci)
        sector_t len = 0, max;
        struct dm_target_io *tio;
 
+       if (unlikely(bio_empty_barrier(bio)))
+               return __clone_and_map_empty_barrier(ci);
+
        ti = dm_table_find_target(ci->map, ci->sector);
        if (!dm_target_is_valid(ti))
                return -EIO;
@@ -768,10 +1116,7 @@ static int __clone_and_map(struct clone_info *ci)
        /*
         * Allocate a target io object.
         */
-       tio = alloc_tio(ci->md);
-       tio->io = ci->io;
-       tio->ti = ti;
-       memset(&tio->info, 0, sizeof(tio->info));
+       tio = alloc_tio(ci, ti);
 
        if (ci->sector_count <= max) {
                /*
@@ -827,10 +1172,7 @@ static int __clone_and_map(struct clone_info *ci)
 
                                max = max_io_len(ci->md, ci->sector, ti);
 
-                               tio = alloc_tio(ci->md);
-                               tio->io = ci->io;
-                               tio->ti = ti;
-                               memset(&tio->info, 0, sizeof(tio->info));
+                               tio = alloc_tio(ci, ti);
                        }
 
                        len = min(remaining, max);
@@ -865,7 +1207,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
                if (!bio_barrier(bio))
                        bio_io_error(bio);
                else
-                       md->barrier_error = -EIO;
+                       if (!md->barrier_error)
+                               md->barrier_error = -EIO;
                return;
        }
 
@@ -878,6 +1221,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
        ci.io->md = md;
        ci.sector = bio->bi_sector;
        ci.sector_count = bio_sectors(bio);
+       if (unlikely(bio_empty_barrier(bio)))
+               ci.sector_count = 1;
        ci.idx = bio->bi_idx;
 
        start_io_acct(ci.io);
@@ -925,6 +1270,16 @@ static int dm_merge_bvec(struct request_queue *q,
         */
        if (max_size && ti->type->merge)
                max_size = ti->type->merge(ti, bvm, biovec, max_size);
+       /*
+        * If the target doesn't support merge method and some of the devices
+        * provided their merge_bvec method (we know this by looking at
+        * queue_max_hw_sectors), then we can't allow bios with multiple vector
+        * entries.  So always set max_size to 0, and the code below allows
+        * just one page.
+        */
+       else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
+
+               max_size = 0;
 
 out_table:
        dm_table_put(map);
@@ -943,7 +1298,7 @@ out:
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
  */
-static int dm_request(struct request_queue *q, struct bio *bio)
+static int _dm_request(struct request_queue *q, struct bio *bio)
 {
        int rw = bio_data_dir(bio);
        struct mapped_device *md = q->queuedata;
@@ -980,12 +1335,274 @@ static int dm_request(struct request_queue *q, struct bio *bio)
        return 0;
 }
 
+static int dm_make_request(struct request_queue *q, struct bio *bio)
+{
+       struct mapped_device *md = q->queuedata;
+
+       if (unlikely(bio_barrier(bio))) {
+               bio_endio(bio, -EOPNOTSUPP);
+               return 0;
+       }
+
+       return md->saved_make_request_fn(q, bio); /* call __make_request() */
+}
+
+static int dm_request_based(struct mapped_device *md)
+{
+       return blk_queue_stackable(md->queue);
+}
+
+static int dm_request(struct request_queue *q, struct bio *bio)
+{
+       struct mapped_device *md = q->queuedata;
+
+       if (dm_request_based(md))
+               return dm_make_request(q, bio);
+
+       return _dm_request(q, bio);
+}
+
+void dm_dispatch_request(struct request *rq)
+{
+       int r;
+
+       if (blk_queue_io_stat(rq->q))
+               rq->cmd_flags |= REQ_IO_STAT;
+
+       rq->start_time = jiffies;
+       r = blk_insert_cloned_request(rq->q, rq);
+       if (r)
+               dm_complete_request(rq, r);
+}
+EXPORT_SYMBOL_GPL(dm_dispatch_request);
+
+static void dm_rq_bio_destructor(struct bio *bio)
+{
+       struct dm_rq_clone_bio_info *info = bio->bi_private;
+       struct mapped_device *md = info->tio->md;
+
+       free_bio_info(info);
+       bio_free(bio, md->bs);
+}
+
+static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
+                                void *data)
+{
+       struct dm_rq_target_io *tio = data;
+       struct mapped_device *md = tio->md;
+       struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
+
+       if (!info)
+               return -ENOMEM;
+
+       info->orig = bio_orig;
+       info->tio = tio;
+       bio->bi_end_io = end_clone_bio;
+       bio->bi_private = info;
+       bio->bi_destructor = dm_rq_bio_destructor;
+
+       return 0;
+}
+
+static int setup_clone(struct request *clone, struct request *rq,
+                      struct dm_rq_target_io *tio)
+{
+       int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
+                                 dm_rq_bio_constructor, tio);
+
+       if (r)
+               return r;
+
+       clone->cmd = rq->cmd;
+       clone->cmd_len = rq->cmd_len;
+       clone->sense = rq->sense;
+       clone->buffer = rq->buffer;
+       clone->end_io = end_clone_request;
+       clone->end_io_data = tio;
+
+       return 0;
+}
+
+static int dm_rq_flush_suspending(struct mapped_device *md)
+{
+       return !md->suspend_rq.special;
+}
+
+/*
+ * Called with the queue lock held.
+ */
+static int dm_prep_fn(struct request_queue *q, struct request *rq)
+{
+       struct mapped_device *md = q->queuedata;
+       struct dm_rq_target_io *tio;
+       struct request *clone;
+
+       if (unlikely(rq == &md->suspend_rq)) {
+               if (dm_rq_flush_suspending(md))
+                       return BLKPREP_OK;
+               else
+                       /* The flush suspend was interrupted */
+                       return BLKPREP_KILL;
+       }
+
+       if (unlikely(rq->special)) {
+               DMWARN("Already has something in rq->special.");
+               return BLKPREP_KILL;
+       }
+
+       tio = alloc_rq_tio(md); /* Only one for each original request */
+       if (!tio)
+               /* -ENOMEM */
+               return BLKPREP_DEFER;
+
+       tio->md = md;
+       tio->ti = NULL;
+       tio->orig = rq;
+       tio->error = 0;
+       memset(&tio->info, 0, sizeof(tio->info));
+
+       clone = &tio->clone;
+       if (setup_clone(clone, rq, tio)) {
+               /* -ENOMEM */
+               free_rq_tio(tio);
+               return BLKPREP_DEFER;
+       }
+
+       rq->special = clone;
+       rq->cmd_flags |= REQ_DONTPREP;
+
+       return BLKPREP_OK;
+}
+
+static void map_request(struct dm_target *ti, struct request *rq,
+                       struct mapped_device *md)
+{
+       int r;
+       struct request *clone = rq->special;
+       struct dm_rq_target_io *tio = clone->end_io_data;
+
+       /*
+        * Hold the md reference here for the in-flight I/O.
+        * We can't rely on the reference count by device opener,
+        * because the device may be closed during the request completion
+        * when all bios are completed.
+        * See the comment in rq_completed() too.
+        */
+       dm_get(md);
+
+       tio->ti = ti;
+       r = ti->type->map_rq(ti, clone, &tio->info);
+       switch (r) {
+       case DM_MAPIO_SUBMITTED:
+               /* The target has taken the I/O to submit by itself later */
+               break;
+       case DM_MAPIO_REMAPPED:
+               /* The target has remapped the I/O so dispatch it */
+               dm_dispatch_request(clone);
+               break;
+       case DM_MAPIO_REQUEUE:
+               /* The target wants to requeue the I/O */
+               dm_requeue_unmapped_request(clone);
+               break;
+       default:
+               if (r > 0) {
+                       DMWARN("unimplemented target map return value: %d", r);
+                       BUG();
+               }
+
+               /* The target wants to complete the I/O */
+               dm_kill_unmapped_request(clone, r);
+               break;
+       }
+}
+
+/*
+ * q->request_fn for request-based dm.
+ * Called with the queue lock held.
+ */
+static void dm_request_fn(struct request_queue *q)
+{
+       struct mapped_device *md = q->queuedata;
+       struct dm_table *map = dm_get_table(md);
+       struct dm_target *ti;
+       struct request *rq;
+
+       /*
+        * For noflush suspend, check blk_queue_stopped() to immediately
+        * quit I/O dispatching.
+        */
+       while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
+               rq = blk_peek_request(q);
+               if (!rq)
+                       goto plug_and_out;
+
+               if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */
+                       if (queue_in_flight(q))
+                               /* Not quiet yet.  Wait more */
+                               goto plug_and_out;
+
+                       /* This device should be quiet now */
+                       __stop_queue(q);
+                       blk_start_request(rq);
+                       __blk_end_request_all(rq, 0);
+                       wake_up(&md->wait);
+                       goto out;
+               }
+
+               ti = dm_table_find_target(map, blk_rq_pos(rq));
+               if (ti->type->busy && ti->type->busy(ti))
+                       goto plug_and_out;
+
+               blk_start_request(rq);
+               spin_unlock(q->queue_lock);
+               map_request(ti, rq, md);
+               spin_lock_irq(q->queue_lock);
+       }
+
+       goto out;
+
+plug_and_out:
+       if (!elv_queue_empty(q))
+               /* Some requests still remain, retry later */
+               blk_plug_device(q);
+
+out:
+       dm_table_put(map);
+
+       return;
+}
+
+int dm_underlying_device_busy(struct request_queue *q)
+{
+       return blk_lld_busy(q);
+}
+EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
+
+static int dm_lld_busy(struct request_queue *q)
+{
+       int r;
+       struct mapped_device *md = q->queuedata;
+       struct dm_table *map = dm_get_table(md);
+
+       if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
+               r = 1;
+       else
+               r = dm_table_any_busy_target(map);
+
+       dm_table_put(map);
+
+       return r;
+}
+
 static void dm_unplug_all(struct request_queue *q)
 {
        struct mapped_device *md = q->queuedata;
        struct dm_table *map = dm_get_table(md);
 
        if (map) {
+               if (dm_request_based(md))
+                       generic_unplug_device(q);
+
                dm_table_unplug_all(map);
                dm_table_put(map);
        }
@@ -1000,7 +1617,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
        if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
                map = dm_get_table(md);
                if (map) {
-                       r = dm_table_any_congested(map, bdi_bits);
+                       /*
+                        * Request-based dm cares about only own queue for
+                        * the query about congestion status of request_queue
+                        */
+                       if (dm_request_based(md))
+                               r = md->queue->backing_dev_info.state &
+                                   bdi_bits;
+                       else
+                               r = dm_table_any_congested(map, bdi_bits);
+
                        dm_table_put(map);
                }
        }
@@ -1123,30 +1749,32 @@ static struct mapped_device *alloc_dev(int minor)
        INIT_LIST_HEAD(&md->uevent_list);
        spin_lock_init(&md->uevent_lock);
 
-       md->queue = blk_alloc_queue(GFP_KERNEL);
+       md->queue = blk_init_queue(dm_request_fn, NULL);
        if (!md->queue)
                goto bad_queue;
 
+       /*
+        * Request-based dm devices cannot be stacked on top of bio-based dm
+        * devices.  The type of this dm device has not been decided yet,
+        * although we initialized the queue using blk_init_queue().
+        * The type is decided at the first table loading time.
+        * To prevent problematic device stacking, clear the queue flag
+        * for request stacking support until then.
+        *
+        * This queue is new, so no concurrency on the queue_flags.
+        */
+       queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
+       md->saved_make_request_fn = md->queue->make_request_fn;
        md->queue->queuedata = md;
        md->queue->backing_dev_info.congested_fn = dm_any_congested;
        md->queue->backing_dev_info.congested_data = md;
        blk_queue_make_request(md->queue, dm_request);
-       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
        md->queue->unplug_fn = dm_unplug_all;
        blk_queue_merge_bvec(md->queue, dm_merge_bvec);
-
-       md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
-       if (!md->io_pool)
-               goto bad_io_pool;
-
-       md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
-       if (!md->tio_pool)
-               goto bad_tio_pool;
-
-       md->bs = bioset_create(16, 0);
-       if (!md->bs)
-               goto bad_no_bioset;
+       blk_queue_softirq_done(md->queue, dm_softirq_done);
+       blk_queue_prep_rq(md->queue, dm_prep_fn);
+       blk_queue_lld_busy(md->queue, dm_lld_busy);
 
        md->disk = alloc_disk(1);
        if (!md->disk)
@@ -1170,6 +1798,10 @@ static struct mapped_device *alloc_dev(int minor)
        if (!md->wq)
                goto bad_thread;
 
+       md->bdev = bdget_disk(md->disk, 0);
+       if (!md->bdev)
+               goto bad_bdev;
+
        /* Populate the mapping, nobody knows we exist yet */
        spin_lock(&_minor_lock);
        old_md = idr_replace(&_minor_idr, md, minor);
@@ -1179,15 +1811,11 @@ static struct mapped_device *alloc_dev(int minor)
 
        return md;
 
+bad_bdev:
+       destroy_workqueue(md->wq);
 bad_thread:
        put_disk(md->disk);
 bad_disk:
-       bioset_free(md->bs);
-bad_no_bioset:
-       mempool_destroy(md->tio_pool);
-bad_tio_pool:
-       mempool_destroy(md->io_pool);
-bad_io_pool:
        blk_cleanup_queue(md->queue);
 bad_queue:
        free_minor(minor);
@@ -1204,14 +1832,15 @@ static void free_dev(struct mapped_device *md)
 {
        int minor = MINOR(disk_devt(md->disk));
 
-       if (md->suspended_bdev) {
-               unlock_fs(md);
-               bdput(md->suspended_bdev);
-       }
+       unlock_fs(md);
+       bdput(md->bdev);
        destroy_workqueue(md->wq);
-       mempool_destroy(md->tio_pool);
-       mempool_destroy(md->io_pool);
-       bioset_free(md->bs);
+       if (md->tio_pool)
+               mempool_destroy(md->tio_pool);
+       if (md->io_pool)
+               mempool_destroy(md->io_pool);
+       if (md->bs)
+               bioset_free(md->bs);
        blk_integrity_unregister(md->disk);
        del_gendisk(md->disk);
        free_minor(minor);
@@ -1226,6 +1855,29 @@ static void free_dev(struct mapped_device *md)
        kfree(md);
 }
 
+static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
+{
+       struct dm_md_mempools *p;
+
+       if (md->io_pool && md->tio_pool && md->bs)
+               /* the md already has necessary mempools */
+               goto out;
+
+       p = dm_table_get_md_mempools(t);
+       BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
+
+       md->io_pool = p->io_pool;
+       p->io_pool = NULL;
+       md->tio_pool = p->tio_pool;
+       p->tio_pool = NULL;
+       md->bs = p->bs;
+       p->bs = NULL;
+
+out:
+       /* mempool bind completed, now no need any mempools in the table */
+       dm_table_free_md_mempools(t);
+}
+
 /*
  * Bind a table to the device.
  */
@@ -1249,15 +1901,17 @@ static void __set_size(struct mapped_device *md, sector_t size)
 {
        set_capacity(md->disk, size);
 
-       mutex_lock(&md->suspended_bdev->bd_inode->i_mutex);
-       i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
-       mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex);
+       mutex_lock(&md->bdev->bd_inode->i_mutex);
+       i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
+       mutex_unlock(&md->bdev->bd_inode->i_mutex);
 }
 
-static int __bind(struct mapped_device *md, struct dm_table *t)
+static int __bind(struct mapped_device *md, struct dm_table *t,
+                 struct queue_limits *limits)
 {
        struct request_queue *q = md->queue;
        sector_t size;
+       unsigned long flags;
 
        size = dm_table_get_size(t);
 
@@ -1267,8 +1921,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
        if (size != get_capacity(md->disk))
                memset(&md->geometry, 0, sizeof(md->geometry));
 
-       if (md->suspended_bdev)
-               __set_size(md, size);
+       __set_size(md, size);
 
        if (!size) {
                dm_table_destroy(t);
@@ -1277,10 +1930,22 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
 
        dm_table_event_callback(t, event_callback, md);
 
-       write_lock(&md->map_lock);
+       /*
+        * The queue hasn't been stopped yet, if the old table type wasn't
+        * for request-based during suspension.  So stop it to prevent
+        * I/O mapping before resume.
+        * This must be done before setting the queue restrictions,
+        * because request-based dm may be run just after the setting.
+        */
+       if (dm_table_request_based(t) && !blk_queue_stopped(q))
+               stop_queue(q);
+
+       __bind_mempools(md, t);
+
+       write_lock_irqsave(&md->map_lock, flags);
        md->map = t;
-       dm_table_set_restrictions(t, q);
-       write_unlock(&md->map_lock);
+       dm_table_set_restrictions(t, q, limits);
+       write_unlock_irqrestore(&md->map_lock, flags);
 
        return 0;
 }
@@ -1288,14 +1953,15 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
 static void __unbind(struct mapped_device *md)
 {
        struct dm_table *map = md->map;
+       unsigned long flags;
 
        if (!map)
                return;
 
        dm_table_event_callback(map, NULL, NULL);
-       write_lock(&md->map_lock);
+       write_lock_irqsave(&md->map_lock, flags);
        md->map = NULL;
-       write_unlock(&md->map_lock);
+       write_unlock_irqrestore(&md->map_lock, flags);
        dm_table_destroy(map);
 }
 
@@ -1399,6 +2065,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 {
        int r = 0;
        DECLARE_WAITQUEUE(wait, current);
+       struct request_queue *q = md->queue;
+       unsigned long flags;
 
        dm_unplug_all(md->queue);
 
@@ -1408,7 +2076,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
                set_current_state(interruptible);
 
                smp_mb();
-               if (!atomic_read(&md->pending))
+               if (dm_request_based(md)) {
+                       spin_lock_irqsave(q->queue_lock, flags);
+                       if (!queue_in_flight(q) && blk_queue_stopped(q)) {
+                               spin_unlock_irqrestore(q->queue_lock, flags);
+                               break;
+                       }
+                       spin_unlock_irqrestore(q->queue_lock, flags);
+               } else if (!atomic_read(&md->pending))
                        break;
 
                if (interruptible == TASK_INTERRUPTIBLE &&
@@ -1426,34 +2101,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
        return r;
 }
 
-static int dm_flush(struct mapped_device *md)
+static void dm_flush(struct mapped_device *md)
 {
        dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
-       return 0;
+
+       bio_init(&md->barrier_bio);
+       md->barrier_bio.bi_bdev = md->bdev;
+       md->barrier_bio.bi_rw = WRITE_BARRIER;
+       __split_and_process_bio(md, &md->barrier_bio);
+
+       dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
 }
 
 static void process_barrier(struct mapped_device *md, struct bio *bio)
 {
-       int error = dm_flush(md);
-
-       if (unlikely(error)) {
-               bio_endio(bio, error);
-               return;
-       }
-       if (bio_empty_barrier(bio)) {
-               bio_endio(bio, 0);
-               return;
-       }
-
-       __split_and_process_bio(md, bio);
+       md->barrier_error = 0;
 
-       error = dm_flush(md);
+       dm_flush(md);
 
-       if (!error && md->barrier_error)
-               error = md->barrier_error;
+       if (!bio_empty_barrier(bio)) {
+               __split_and_process_bio(md, bio);
+               dm_flush(md);
+       }
 
        if (md->barrier_error != DM_ENDIO_REQUEUE)
-               bio_endio(bio, error);
+               bio_endio(bio, md->barrier_error);
+       else {
+               spin_lock_irq(&md->deferred_lock);
+               bio_list_add_head(&md->deferred, bio);
+               spin_unlock_irq(&md->deferred_lock);
+       }
 }
 
 /*
@@ -1479,10 +2156,14 @@ static void dm_wq_work(struct work_struct *work)
 
                up_write(&md->io_lock);
 
-               if (bio_barrier(c))
-                       process_barrier(md, c);
-               else
-                       __split_and_process_bio(md, c);
+               if (dm_request_based(md))
+                       generic_make_request(c);
+               else {
+                       if (bio_barrier(c))
+                               process_barrier(md, c);
+                       else
+                               __split_and_process_bio(md, c);
+               }
 
                down_write(&md->io_lock);
        }
@@ -1502,6 +2183,7 @@ static void dm_queue_flush(struct mapped_device *md)
  */
 int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
+       struct queue_limits limits;
        int r = -EINVAL;
 
        mutex_lock(&md->suspend_lock);
@@ -1510,19 +2192,96 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
        if (!dm_suspended(md))
                goto out;
 
-       /* without bdev, the device size cannot be changed */
-       if (!md->suspended_bdev)
-               if (get_capacity(md->disk) != dm_table_get_size(table))
-                       goto out;
+       r = dm_calculate_queue_limits(table, &limits);
+       if (r)
+               goto out;
+
+       /* cannot change the device type, once a table is bound */
+       if (md->map &&
+           (dm_table_get_type(md->map) != dm_table_get_type(table))) {
+               DMWARN("can't change the device type after a table is bound");
+               goto out;
+       }
+
+       /*
+        * It is enought that blk_queue_ordered() is called only once when
+        * the first bio-based table is bound.
+        *
+        * This setting should be moved to alloc_dev() when request-based dm
+        * supports barrier.
+        */
+       if (!md->map && dm_table_bio_based(table))
+               blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
 
        __unbind(md);
-       r = __bind(md, table);
+       r = __bind(md, table, &limits);
 
 out:
        mutex_unlock(&md->suspend_lock);
        return r;
 }
 
+static void dm_rq_invalidate_suspend_marker(struct mapped_device *md)
+{
+       md->suspend_rq.special = (void *)0x1;
+}
+
+static void dm_rq_abort_suspend(struct mapped_device *md, int noflush)
+{
+       struct request_queue *q = md->queue;
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       if (!noflush)
+               dm_rq_invalidate_suspend_marker(md);
+       __start_queue(q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_rq_start_suspend(struct mapped_device *md, int noflush)
+{
+       struct request *rq = &md->suspend_rq;
+       struct request_queue *q = md->queue;
+
+       if (noflush)
+               stop_queue(q);
+       else {
+               blk_rq_init(q, rq);
+               blk_insert_request(q, rq, 0, NULL);
+       }
+}
+
+static int dm_rq_suspend_available(struct mapped_device *md, int noflush)
+{
+       int r = 1;
+       struct request *rq = &md->suspend_rq;
+       struct request_queue *q = md->queue;
+       unsigned long flags;
+
+       if (noflush)
+               return r;
+
+       /* The marker must be protected by queue lock if it is in use */
+       spin_lock_irqsave(q->queue_lock, flags);
+       if (unlikely(rq->ref_count)) {
+               /*
+                * This can happen, when the previous flush suspend was
+                * interrupted, the marker is still in the queue and
+                * this flush suspend has been invoked, because we don't
+                * remove the marker at the time of suspend interruption.
+                * We have only one marker per mapped_device, so we can't
+                * start another flush suspend while it is in use.
+                */
+               BUG_ON(!rq->special); /* The marker should be invalidated */
+               DMWARN("Invalidating the previous flush suspend is still in"
+                      " progress.  Please retry later.");
+               r = 0;
+       }
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       return r;
+}
+
 /*
  * Functions to lock and unlock any filesystem running on the
  * device.
@@ -1533,7 +2292,7 @@ static int lock_fs(struct mapped_device *md)
 
        WARN_ON(md->frozen_sb);
 
-       md->frozen_sb = freeze_bdev(md->suspended_bdev);
+       md->frozen_sb = freeze_bdev(md->bdev);
        if (IS_ERR(md->frozen_sb)) {
                r = PTR_ERR(md->frozen_sb);
                md->frozen_sb = NULL;
@@ -1542,9 +2301,6 @@ static int lock_fs(struct mapped_device *md)
 
        set_bit(DMF_FROZEN, &md->flags);
 
-       /* don't bdput right now, we don't want the bdev
-        * to go away while it is locked.
-        */
        return 0;
 }
 
@@ -1553,7 +2309,7 @@ static void unlock_fs(struct mapped_device *md)
        if (!test_bit(DMF_FROZEN, &md->flags))
                return;
 
-       thaw_bdev(md->suspended_bdev, md->frozen_sb);
+       thaw_bdev(md->bdev, md->frozen_sb);
        md->frozen_sb = NULL;
        clear_bit(DMF_FROZEN, &md->flags);
 }
@@ -1565,6 +2321,53 @@ static void unlock_fs(struct mapped_device *md)
  * dm_bind_table, dm_suspend must be called to flush any in
  * flight bios and ensure that any further io gets deferred.
  */
+/*
+ * Suspend mechanism in request-based dm.
+ *
+ * After the suspend starts, further incoming requests are kept in
+ * the request_queue and deferred.
+ * Remaining requests in the request_queue at the start of suspend are flushed
+ * if it is flush suspend.
+ * The suspend completes when the following conditions have been satisfied,
+ * so wait for it:
+ *    1. q->in_flight is 0 (which means no in_flight request)
+ *    2. queue has been stopped (which means no request dispatching)
+ *
+ *
+ * Noflush suspend
+ * ---------------
+ * Noflush suspend doesn't need to dispatch remaining requests.
+ * So stop the queue immediately.  Then, wait for all in_flight requests
+ * to be completed or requeued.
+ *
+ * To abort noflush suspend, start the queue.
+ *
+ *
+ * Flush suspend
+ * -------------
+ * Flush suspend needs to dispatch remaining requests.  So stop the queue
+ * after the remaining requests are completed. (Requeued request must be also
+ * re-dispatched and completed.  Until then, we can't stop the queue.)
+ *
+ * During flushing the remaining requests, further incoming requests are also
+ * inserted to the same queue.  To distinguish which requests are to be
+ * flushed, we insert a marker request to the queue at the time of starting
+ * flush suspend, like a barrier.
+ * The dispatching is blocked when the marker is found on the top of the queue.
+ * And the queue is stopped when all in_flight requests are completed, since
+ * that means the remaining requests are completely flushed.
+ * Then, the marker is removed from the queue.
+ *
+ * To abort flush suspend, we also need to take care of the marker, not only
+ * starting the queue.
+ * We don't remove the marker forcibly from the queue since it's against
+ * the block-layer manner.  Instead, we put a invalidated mark on the marker.
+ * When the invalidated marker is found on the top of the queue, it is
+ * immediately removed from the queue, so it doesn't block dispatching.
+ * Because we have only one marker per mapped_device, we can't start another
+ * flush suspend until the invalidated marker is removed from the queue.
+ * So fail and return with -EBUSY in such a case.
+ */
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
        struct dm_table *map = NULL;
@@ -1579,6 +2382,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
                goto out_unlock;
        }
 
+       if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) {
+               r = -EBUSY;
+               goto out_unlock;
+       }
+
        map = dm_get_table(md);
 
        /*
@@ -1591,24 +2399,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
        /* This does not get reverted if there's an error later. */
        dm_table_presuspend_targets(map);
 
-       /* bdget() can stall if the pending I/Os are not flushed */
-       if (!noflush) {
-               md->suspended_bdev = bdget_disk(md->disk, 0);
-               if (!md->suspended_bdev) {
-                       DMWARN("bdget failed in dm_suspend");
-                       r = -ENOMEM;
+       /*
+        * Flush I/O to the device. noflush supersedes do_lockfs,
+        * because lock_fs() needs to flush I/Os.
+        */
+       if (!noflush && do_lockfs) {
+               r = lock_fs(md);
+               if (r)
                        goto out;
-               }
-
-               /*
-                * Flush I/O to the device. noflush supersedes do_lockfs,
-                * because lock_fs() needs to flush I/Os.
-                */
-               if (do_lockfs) {
-                       r = lock_fs(md);
-                       if (r)
-                               goto out;
-               }
        }
 
        /*
@@ -1634,6 +2432,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 
        flush_workqueue(md->wq);
 
+       if (dm_request_based(md))
+               dm_rq_start_suspend(md, noflush);
+
        /*
         * At this point no more requests are entering target request routines.
         * We call dm_wait_for_completion to wait for all existing requests
@@ -1650,6 +2451,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
        if (r < 0) {
                dm_queue_flush(md);
 
+               if (dm_request_based(md))
+                       dm_rq_abort_suspend(md, noflush);
+
                unlock_fs(md);
                goto out; /* pushback list is already flushed, so skip flush */
        }
@@ -1665,11 +2469,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
        set_bit(DMF_SUSPENDED, &md->flags);
 
 out:
-       if (r && md->suspended_bdev) {
-               bdput(md->suspended_bdev);
-               md->suspended_bdev = NULL;
-       }
-
        dm_table_put(map);
 
 out_unlock:
@@ -1696,21 +2495,20 @@ int dm_resume(struct mapped_device *md)
 
        dm_queue_flush(md);
 
-       unlock_fs(md);
+       /*
+        * Flushing deferred I/Os must be done after targets are resumed
+        * so that mapping of targets can work correctly.
+        * Request-based dm is queueing the deferred I/Os in its request_queue.
+        */
+       if (dm_request_based(md))
+               start_queue(md->queue);
 
-       if (md->suspended_bdev) {
-               bdput(md->suspended_bdev);
-               md->suspended_bdev = NULL;
-       }
+       unlock_fs(md);
 
        clear_bit(DMF_SUSPENDED, &md->flags);
 
        dm_table_unplug_all(map);
-
-       dm_kobject_uevent(md);
-
        r = 0;
-
 out:
        dm_table_put(map);
        mutex_unlock(&md->suspend_lock);
@@ -1721,9 +2519,19 @@ out:
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
-void dm_kobject_uevent(struct mapped_device *md)
-{
-       kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
+void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+                      unsigned cookie)
+{
+       char udev_cookie[DM_COOKIE_LENGTH];
+       char *envp[] = { udev_cookie, NULL };
+
+       if (!cookie)
+               kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+       else {
+               snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
+                        DM_COOKIE_ENV_VAR_NAME, cookie);
+               kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
+       }
 }
 
 uint32_t dm_next_uevent_seq(struct mapped_device *md)
@@ -1777,6 +2585,10 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
        if (&md->kobj != kobj)
                return NULL;
 
+       if (test_bit(DMF_FREEING, &md->flags) ||
+           test_bit(DMF_DELETING, &md->flags))
+               return NULL;
+
        dm_get(md);
        return md;
 }
@@ -1797,6 +2609,61 @@ int dm_noflush_suspending(struct dm_target *ti)
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
+{
+       struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
+
+       if (!pools)
+               return NULL;
+
+       pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
+                        mempool_create_slab_pool(MIN_IOS, _io_cache) :
+                        mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
+       if (!pools->io_pool)
+               goto free_pools_and_out;
+
+       pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
+                         mempool_create_slab_pool(MIN_IOS, _tio_cache) :
+                         mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
+       if (!pools->tio_pool)
+               goto free_io_pool_and_out;
+
+       pools->bs = (type == DM_TYPE_BIO_BASED) ?
+                   bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
+       if (!pools->bs)
+               goto free_tio_pool_and_out;
+
+       return pools;
+
+free_tio_pool_and_out:
+       mempool_destroy(pools->tio_pool);
+
+free_io_pool_and_out:
+       mempool_destroy(pools->io_pool);
+
+free_pools_and_out:
+       kfree(pools);
+
+       return NULL;
+}
+
+void dm_free_md_mempools(struct dm_md_mempools *pools)
+{
+       if (!pools)
+               return;
+
+       if (pools->io_pool)
+               mempool_destroy(pools->io_pool);
+
+       if (pools->tio_pool)
+               mempool_destroy(pools->tio_pool);
+
+       if (pools->bs)
+               bioset_free(pools->bs);
+
+       kfree(pools);
+}
+
 static struct block_device_operations dm_blk_dops = {
        .open = dm_blk_open,
        .release = dm_blk_close,
index a31506d93e9164115a7d1584d33f30714913f521..23278ae80f08dfc90c8458a3b6a700e4a2a87f8b 100644 (file)
 #define DM_SUSPEND_LOCKFS_FLAG         (1 << 0)
 #define DM_SUSPEND_NOFLUSH_FLAG                (1 << 1)
 
+/*
+ * Type of table and mapped_device's mempool
+ */
+#define DM_TYPE_NONE           0
+#define DM_TYPE_BIO_BASED      1
+#define DM_TYPE_REQUEST_BASED  2
+
 /*
  * List of devices that a metadevice uses and should open/close.
  */
@@ -32,6 +39,7 @@ struct dm_dev_internal {
 };
 
 struct dm_table;
+struct dm_md_mempools;
 
 /*-----------------------------------------------------------------
  * Internal table functions.
@@ -41,18 +49,34 @@ void dm_table_event_callback(struct dm_table *t,
                             void (*fn)(void *), void *context);
 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q);
+int dm_calculate_queue_limits(struct dm_table *table,
+                             struct queue_limits *limits);
+void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+                              struct queue_limits *limits);
 struct list_head *dm_table_get_devices(struct dm_table *t);
 void dm_table_presuspend_targets(struct dm_table *t);
 void dm_table_postsuspend_targets(struct dm_table *t);
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
+int dm_table_any_busy_target(struct dm_table *t);
+int dm_table_set_type(struct dm_table *t);
+unsigned dm_table_get_type(struct dm_table *t);
+bool dm_table_bio_based(struct dm_table *t);
+bool dm_table_request_based(struct dm_table *t);
+int dm_table_alloc_md_mempools(struct dm_table *t);
+void dm_table_free_md_mempools(struct dm_table *t);
+struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
 /*
  * To check the return value from dm_table_find_target().
  */
 #define dm_target_is_valid(t) ((t)->table)
 
+/*
+ * To check whether the target type is request-based or not (bio-based).
+ */
+#define dm_target_request_based(t) ((t)->type->map_rq != NULL)
+
 /*-----------------------------------------------------------------
  * A registry of target types.
  *---------------------------------------------------------------*/
@@ -92,9 +116,16 @@ void dm_stripe_exit(void);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
 
-void dm_kobject_uevent(struct mapped_device *md);
+void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+                      unsigned cookie);
 
 int dm_kcopyd_init(void);
 void dm_kcopyd_exit(void);
 
+/*
+ * Mempool operations
+ */
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type);
+void dm_free_md_mempools(struct dm_md_mempools *pools);
+
 #endif
index 1dc721517e4cdf30db36ed726230d74b83e55501..c155bd3ec9f1a47640d4706626f544afe2e335bb 100644 (file)
@@ -1725,6 +1725,7 @@ config TLAN
 
 config KS8842
        tristate "Micrel KSZ8842"
+       depends on HAS_IOMEM
        help
          This platform driver is for Micrel KSZ8842 chip.
 
index 38f1c3375d7f64eb7a0e50e59feef331b72cf16c..b70cc99962fcfcf6ccb655213ccf0f1f0225fa89 100644 (file)
@@ -6825,6 +6825,14 @@ bnx2_nway_reset(struct net_device *dev)
        return 0;
 }
 
+static u32
+bnx2_get_link(struct net_device *dev)
+{
+       struct bnx2 *bp = netdev_priv(dev);
+
+       return bp->link_up;
+}
+
 static int
 bnx2_get_eeprom_len(struct net_device *dev)
 {
@@ -7392,7 +7400,7 @@ static const struct ethtool_ops bnx2_ethtool_ops = {
        .get_wol                = bnx2_get_wol,
        .set_wol                = bnx2_set_wol,
        .nway_reset             = bnx2_nway_reset,
-       .get_link               = ethtool_op_get_link,
+       .get_link               = bnx2_get_link,
        .get_eeprom_len         = bnx2_get_eeprom_len,
        .get_eeprom             = bnx2_get_eeprom,
        .set_eeprom             = bnx2_set_eeprom,
index d5e18812bf497f615465a6f1a5c9f7708976fefe..33821a81cbf85113336ab1b2ec37015291efcc4b 100644 (file)
@@ -36,7 +36,7 @@ config CAN_CALC_BITTIMING
          If unsure, say Y.
 
 config CAN_SJA1000
-       depends on CAN_DEV
+       depends on CAN_DEV && HAS_IOMEM
        tristate "Philips SJA1000"
        ---help---
          Driver for the SJA1000 CAN controllers from Philips or NXP
index bdb143d2b5c7a4f2032d19d746875dc3dfef01db..055bb61d6e7773b5177e342ebfcc952f4f0a008b 100644 (file)
@@ -944,28 +944,31 @@ int netxen_phantom_init(struct netxen_adapter *adapter, int pegtune_val)
        u32 val = 0;
        int retries = 60;
 
-       if (!pegtune_val) {
-               do {
-                       val = NXRD32(adapter, CRB_CMDPEG_STATE);
+       if (pegtune_val)
+               return 0;
 
-                       if (val == PHAN_INITIALIZE_COMPLETE ||
-                               val == PHAN_INITIALIZE_ACK)
-                               return 0;
+       do {
+               val = NXRD32(adapter, CRB_CMDPEG_STATE);
 
-                       msleep(500);
+               switch (val) {
+               case PHAN_INITIALIZE_COMPLETE:
+               case PHAN_INITIALIZE_ACK:
+                       return 0;
+               case PHAN_INITIALIZE_FAILED:
+                       goto out_err;
+               default:
+                       break;
+               }
 
-               } while (--retries);
+               msleep(500);
 
-               if (!retries) {
-                       pegtune_val = NXRD32(adapter,
-                                       NETXEN_ROMUSB_GLB_PEGTUNE_DONE);
-                       printk(KERN_WARNING "netxen_phantom_init: init failed, "
-                                       "pegtune_val=%x\n", pegtune_val);
-                       return -1;
-               }
-       }
+       } while (--retries);
 
-       return 0;
+       NXWR32(adapter, CRB_CMDPEG_STATE, PHAN_INITIALIZE_FAILED);
+
+out_err:
+       dev_warn(&adapter->pdev->dev, "firmware init failed\n");
+       return -EIO;
 }
 
 static int
index 71daa3d5f114174f7ec13015b49fa8d45cb9d2a6..2919a2d12bf40c5327f4979059eed3ea1de78878 100644 (file)
@@ -705,7 +705,7 @@ netxen_start_firmware(struct netxen_adapter *adapter, int request_fw)
                first_driver = (adapter->ahw.pci_func == 0);
 
        if (!first_driver)
-               return 0;
+               goto wait_init;
 
        first_boot = NXRD32(adapter, NETXEN_CAM_RAM(0x1fc));
 
@@ -752,6 +752,7 @@ netxen_start_firmware(struct netxen_adapter *adapter, int request_fw)
                | (_NETXEN_NIC_LINUX_SUBVERSION);
        NXWR32(adapter, CRB_DRIVER_VERSION, val);
 
+wait_init:
        /* Handshake with the card before we register the devices. */
        err = netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE);
        if (err) {
@@ -1178,6 +1179,7 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev)
        free_netdev(netdev);
 }
 
+#ifdef CONFIG_PM
 static int
 netxen_nic_suspend(struct pci_dev *pdev, pm_message_t state)
 {
@@ -1242,6 +1244,7 @@ netxen_nic_resume(struct pci_dev *pdev)
 
        return 0;
 }
+#endif
 
 static int netxen_nic_open(struct net_device *netdev)
 {
@@ -1771,8 +1774,10 @@ static struct pci_driver netxen_driver = {
        .id_table = netxen_pci_tbl,
        .probe = netxen_nic_probe,
        .remove = __devexit_p(netxen_nic_remove),
+#ifdef CONFIG_PM
        .suspend = netxen_nic_suspend,
        .resume = netxen_nic_resume
+#endif
 };
 
 /* Driver Registration on NetXen card    */
index bbc6d4d3cc945b1cdf9f42f1872d7cef231012a6..3e4b67aaa6ea5bddeee06f372d5925bf394466ce 100644 (file)
@@ -3142,6 +3142,7 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
                                                (void __iomem *)port_regs;
        u32 delay = 10;
        int status = 0;
+       unsigned long hw_flags = 0;
 
        if(ql_mii_setup(qdev))
                return -1;
@@ -3150,7 +3151,8 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
        ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg,
                            (ISP_SERIAL_PORT_IF_WE |
                             (ISP_SERIAL_PORT_IF_WE << 16)));
-
+       /* Give the PHY time to come out of reset. */
+       mdelay(100);
        qdev->port_link_state = LS_DOWN;
        netif_carrier_off(qdev->ndev);
 
@@ -3350,7 +3352,9 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
                value = ql_read_page0_reg(qdev, &port_regs->portStatus);
                if (value & PORT_STATUS_IC)
                        break;
+               spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
                msleep(500);
+               spin_lock_irqsave(&qdev->hw_lock, hw_flags);
        } while (--delay);
 
        if (delay == 0) {
index fbc63d5e459fd4182f35f5beffc9bee1a875c8d8..eb159587d0bfa5701c2d9476c8b606625d761af9 100644 (file)
@@ -354,7 +354,7 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
                status = acpi_run_hpp(handle, hpp);
                if (ACPI_SUCCESS(status))
                        break;
-               if (acpi_root_bridge(handle))
+               if (acpi_is_root_bridge(handle))
                        break;
                status = acpi_get_parent(handle, &phandle);
                if (ACPI_FAILURE(status))
@@ -428,7 +428,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
                status = acpi_run_oshp(handle);
                if (ACPI_SUCCESS(status))
                        goto got_one;
-               if (acpi_root_bridge(handle))
+               if (acpi_is_root_bridge(handle))
                        break;
                chandle = handle;
                status = acpi_get_parent(chandle, &handle);
@@ -449,42 +449,6 @@ got_one:
 }
 EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware);
 
-/* acpi_root_bridge - check to see if this acpi object is a root bridge
- *
- * @handle - the acpi object in question.
- */
-int acpi_root_bridge(acpi_handle handle)
-{
-       acpi_status status;
-       struct acpi_device_info *info;
-       struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-       int i;
-
-       status = acpi_get_object_info(handle, &buffer);
-       if (ACPI_SUCCESS(status)) {
-               info = buffer.pointer;
-               if ((info->valid & ACPI_VALID_HID) &&
-                       !strcmp(PCI_ROOT_HID_STRING,
-                                       info->hardware_id.value)) {
-                       kfree(buffer.pointer);
-                       return 1;
-               }
-               if (info->valid & ACPI_VALID_CID) {
-                       for (i=0; i < info->compatibility_id.count; i++) {
-                               if (!strcmp(PCI_ROOT_HID_STRING,
-                                       info->compatibility_id.id[i].value)) {
-                                       kfree(buffer.pointer);
-                                       return 1;
-                               }
-                       }
-               }
-               kfree(buffer.pointer);
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(acpi_root_bridge);
-
-
 static int is_ejectable(acpi_handle handle)
 {
        acpi_status status;
index 3a6064bce5614a84ba16e1411a50bcfc7afddce3..0cb0f830a99307539d780e25e54ead35ed43cf02 100644 (file)
@@ -678,18 +678,9 @@ static void remove_bridge(acpi_handle handle)
 
 static struct pci_dev * get_apic_pci_info(acpi_handle handle)
 {
-       struct acpi_pci_id id;
-       struct pci_bus *bus;
        struct pci_dev *dev;
 
-       if (ACPI_FAILURE(acpi_get_pci_id(handle, &id)))
-               return NULL;
-
-       bus = pci_find_bus(id.segment, id.bus);
-       if (!bus)
-               return NULL;
-
-       dev = pci_get_slot(bus, PCI_DEVFN(id.device, id.function));
+       dev = acpi_get_pci_dev(handle);
        if (!dev)
                return NULL;
 
@@ -1396,19 +1387,16 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
 /* Program resources in newly inserted bridge */
 static int acpiphp_configure_bridge (acpi_handle handle)
 {
-       struct acpi_pci_id pci_id;
+       struct pci_dev *dev;
        struct pci_bus *bus;
 
-       if (ACPI_FAILURE(acpi_get_pci_id(handle, &pci_id))) {
+       dev = acpi_get_pci_dev(handle);
+       if (!dev) {
                err("cannot get PCI domain and bus number for bridge\n");
                return -EINVAL;
        }
-       bus = pci_find_bus(pci_id.segment, pci_id.bus);
-       if (!bus) {
-               err("cannot find bus %d:%d\n",
-                               pci_id.segment, pci_id.bus);
-               return -EINVAL;
-       }
+
+       bus = dev->bus;
 
        pci_bus_size_bridges(bus);
        pci_bus_assign_resources(bus);
@@ -1416,6 +1404,7 @@ static int acpiphp_configure_bridge (acpi_handle handle)
        acpiphp_set_hpp_values(handle, bus);
        pci_enable_bridges(bus);
        acpiphp_configure_ioapics(handle);
+       pci_dev_put(dev);
        return 0;
 }
 
@@ -1631,7 +1620,7 @@ find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
        int *count = (int *)context;
 
-       if (acpi_root_bridge(handle)) {
+       if (acpi_is_root_bridge(handle)) {
                acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
                                handle_hotplug_event_bridge, NULL);
                        (*count)++;
index 178853a074405d746d6c0932030aa22daf2b17ca..e53eacd75c8daf5d6096947ef646a75162fb6bf1 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/sysdev.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
+#include <asm/e820.h>
 #include "pci.h"
 
 #define ROOT_SIZE              VTD_PAGE_SIZE
@@ -217,6 +218,14 @@ static inline bool dma_pte_present(struct dma_pte *pte)
        return (pte->val & 3) != 0;
 }
 
+/*
+ * This domain is a statically identity mapping domain.
+ *     1. This domain creats a static 1:1 mapping to all usable memory.
+ *     2. It maps to each iommu if successful.
+ *     3. Each iommu mapps to this domain if successful.
+ */
+struct dmar_domain *si_domain;
+
 /* devices under the same p2p bridge are owned in one domain */
 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
 
@@ -225,6 +234,9 @@ static inline bool dma_pte_present(struct dma_pte *pte)
  */
 #define DOMAIN_FLAG_VIRTUAL_MACHINE    (1 << 1)
 
+/* si_domain contains mulitple devices */
+#define DOMAIN_FLAG_STATIC_IDENTITY    (1 << 2)
+
 struct dmar_domain {
        int     id;                     /* domain id */
        unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
@@ -435,12 +447,14 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
        return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 }
 
-/* in native case, each domain is related to only one iommu */
+/* This functionin only returns single iommu in a domain */
 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
        int iommu_id;
 
+       /* si_domain and vm domain should not get here. */
        BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
+       BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
 
        iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
        if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
@@ -1189,48 +1203,71 @@ void free_dmar_iommu(struct intel_iommu *iommu)
        free_context_table(iommu);
 }
 
-static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
+static struct dmar_domain *alloc_domain(void)
 {
-       unsigned long num;
-       unsigned long ndomains;
        struct dmar_domain *domain;
-       unsigned long flags;
 
        domain = alloc_domain_mem();
        if (!domain)
                return NULL;
 
+       memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+       domain->flags = 0;
+
+       return domain;
+}
+
+static int iommu_attach_domain(struct dmar_domain *domain,
+                              struct intel_iommu *iommu)
+{
+       int num;
+       unsigned long ndomains;
+       unsigned long flags;
+
        ndomains = cap_ndoms(iommu->cap);
 
        spin_lock_irqsave(&iommu->lock, flags);
+
        num = find_first_zero_bit(iommu->domain_ids, ndomains);
        if (num >= ndomains) {
                spin_unlock_irqrestore(&iommu->lock, flags);
-               free_domain_mem(domain);
                printk(KERN_ERR "IOMMU: no free domain ids\n");
-               return NULL;
+               return -ENOMEM;
        }
 
-       set_bit(num, iommu->domain_ids);
        domain->id = num;
-       memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+       set_bit(num, iommu->domain_ids);
        set_bit(iommu->seq_id, &domain->iommu_bmp);
-       domain->flags = 0;
        iommu->domains[num] = domain;
        spin_unlock_irqrestore(&iommu->lock, flags);
 
-       return domain;
+       return 0;
 }
 
-static void iommu_free_domain(struct dmar_domain *domain)
+static void iommu_detach_domain(struct dmar_domain *domain,
+                               struct intel_iommu *iommu)
 {
        unsigned long flags;
-       struct intel_iommu *iommu;
-
-       iommu = domain_get_iommu(domain);
+       int num, ndomains;
+       int found = 0;
 
        spin_lock_irqsave(&iommu->lock, flags);
-       clear_bit(domain->id, iommu->domain_ids);
+       ndomains = cap_ndoms(iommu->cap);
+       num = find_first_bit(iommu->domain_ids, ndomains);
+       for (; num < ndomains; ) {
+               if (iommu->domains[num] == domain) {
+                       found = 1;
+                       break;
+               }
+               num = find_next_bit(iommu->domain_ids,
+                                   cap_ndoms(iommu->cap), num+1);
+       }
+
+       if (found) {
+               clear_bit(num, iommu->domain_ids);
+               clear_bit(iommu->seq_id, &domain->iommu_bmp);
+               iommu->domains[num] = NULL;
+       }
        spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -1350,6 +1387,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 
 static void domain_exit(struct dmar_domain *domain)
 {
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
        u64 end;
 
        /* Domain 0 is reserved, so dont process it */
@@ -1368,7 +1407,10 @@ static void domain_exit(struct dmar_domain *domain)
        /* free page tables */
        dma_pte_free_pagetable(domain, 0, end);
 
-       iommu_free_domain(domain);
+       for_each_active_iommu(iommu, drhd)
+               if (test_bit(iommu->seq_id, &domain->iommu_bmp))
+                       iommu_detach_domain(domain, iommu);
+
        free_domain_mem(domain);
 }
 
@@ -1408,7 +1450,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
        id = domain->id;
        pgd = domain->pgd;
 
-       if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+       if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
+           domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
                int found = 0;
 
                /* find an available domain id for this device in iommu */
@@ -1433,6 +1476,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
                        }
 
                        set_bit(num, iommu->domain_ids);
+                       set_bit(iommu->seq_id, &domain->iommu_bmp);
                        iommu->domains[num] = domain;
                        id = num;
                }
@@ -1675,6 +1719,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
        unsigned long flags;
        int bus = 0, devfn = 0;
        int segment;
+       int ret;
 
        domain = find_domain(pdev);
        if (domain)
@@ -1707,6 +1752,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
                }
        }
 
+       domain = alloc_domain();
+       if (!domain)
+               goto error;
+
        /* Allocate new domain for the device */
        drhd = dmar_find_matched_drhd_unit(pdev);
        if (!drhd) {
@@ -1716,9 +1765,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
        }
        iommu = drhd->iommu;
 
-       domain = iommu_alloc_domain(iommu);
-       if (!domain)
+       ret = iommu_attach_domain(domain, iommu);
+       if (ret) {
+               domain_exit(domain);
                goto error;
+       }
 
        if (domain_init(domain, gaw)) {
                domain_exit(domain);
@@ -1792,6 +1843,8 @@ error:
        return find_domain(pdev);
 }
 
+static int iommu_identity_mapping;
+
 static int iommu_prepare_identity_map(struct pci_dev *pdev,
                                      unsigned long long start,
                                      unsigned long long end)
@@ -1804,8 +1857,11 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
        printk(KERN_INFO
                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
                pci_name(pdev), start, end);
-       /* page table init */
-       domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+       if (iommu_identity_mapping)
+               domain = si_domain;
+       else
+               /* page table init */
+               domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
        if (!domain)
                return -ENOMEM;
 
@@ -1952,7 +2008,110 @@ static int __init init_context_pass_through(void)
        return 0;
 }
 
-static int __init init_dmars(void)
+static int md_domain_init(struct dmar_domain *domain, int guest_width);
+static int si_domain_init(void)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       int ret = 0;
+
+       si_domain = alloc_domain();
+       if (!si_domain)
+               return -EFAULT;
+
+
+       for_each_active_iommu(iommu, drhd) {
+               ret = iommu_attach_domain(si_domain, iommu);
+               if (ret) {
+                       domain_exit(si_domain);
+                       return -EFAULT;
+               }
+       }
+
+       if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+               domain_exit(si_domain);
+               return -EFAULT;
+       }
+
+       si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
+
+       return 0;
+}
+
+static void domain_remove_one_dev_info(struct dmar_domain *domain,
+                                         struct pci_dev *pdev);
+static int identity_mapping(struct pci_dev *pdev)
+{
+       struct device_domain_info *info;
+
+       if (likely(!iommu_identity_mapping))
+               return 0;
+
+
+       list_for_each_entry(info, &si_domain->devices, link)
+               if (info->dev == pdev)
+                       return 1;
+       return 0;
+}
+
+static int domain_add_dev_info(struct dmar_domain *domain,
+                                 struct pci_dev *pdev)
+{
+       struct device_domain_info *info;
+       unsigned long flags;
+
+       info = alloc_devinfo_mem();
+       if (!info)
+               return -ENOMEM;
+
+       info->segment = pci_domain_nr(pdev->bus);
+       info->bus = pdev->bus->number;
+       info->devfn = pdev->devfn;
+       info->dev = pdev;
+       info->domain = domain;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       list_add(&info->link, &domain->devices);
+       list_add(&info->global, &device_domain_list);
+       pdev->dev.archdata.iommu = info;
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return 0;
+}
+
+static int iommu_prepare_static_identity_mapping(void)
+{
+       int i;
+       struct pci_dev *pdev = NULL;
+       int ret;
+
+       ret = si_domain_init();
+       if (ret)
+               return -EFAULT;
+
+       printk(KERN_INFO "IOMMU: Setting identity map:\n");
+       for_each_pci_dev(pdev) {
+               for (i = 0; i < e820.nr_map; i++) {
+                       struct e820entry *ei = &e820.map[i];
+
+                       if (ei->type == E820_RAM) {
+                               ret = iommu_prepare_identity_map(pdev,
+                                       ei->addr, ei->addr + ei->size);
+                               if (ret)  {
+                                       printk(KERN_INFO "1:1 mapping to one domain failed.\n");
+                                       return -EFAULT;
+                               }
+                       }
+               }
+               ret = domain_add_dev_info(si_domain, pdev);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+int __init init_dmars(void)
 {
        struct dmar_drhd_unit *drhd;
        struct dmar_rmrr_unit *rmrr;
@@ -1961,6 +2120,13 @@ static int __init init_dmars(void)
        int i, ret;
        int pass_through = 1;
 
+       /*
+        * In case pass through can not be enabled, iommu tries to use identity
+        * mapping.
+        */
+       if (iommu_pass_through)
+               iommu_identity_mapping = 1;
+
        /*
         * for each drhd
         *    allocate root
@@ -2090,9 +2256,12 @@ static int __init init_dmars(void)
 
        /*
         * If pass through is not set or not enabled, setup context entries for
-        * identity mappings for rmrr, gfx, and isa.
+        * identity mappings for rmrr, gfx, and isa and may fall back to static
+        * identity mapping if iommu_identity_mapping is set.
         */
        if (!iommu_pass_through) {
+               if (iommu_identity_mapping)
+                       iommu_prepare_static_identity_mapping();
                /*
                 * For each rmrr
                 *   for each dev attached to rmrr
@@ -2107,6 +2276,7 @@ static int __init init_dmars(void)
                 *    endfor
                 * endfor
                 */
+               printk(KERN_INFO "IOMMU: Setting RMRR:\n");
                for_each_rmrr_units(rmrr) {
                        for (i = 0; i < rmrr->devices_cnt; i++) {
                                pdev = rmrr->devices[i];
@@ -2248,6 +2418,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
        return domain;
 }
 
+static int iommu_dummy(struct pci_dev *pdev)
+{
+       return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+}
+
+/* Check if the pdev needs to go through non-identity map and unmap process.*/
+static int iommu_no_mapping(struct pci_dev *pdev)
+{
+       int found;
+
+       if (!iommu_identity_mapping)
+               return iommu_dummy(pdev);
+
+       found = identity_mapping(pdev);
+       if (found) {
+               if (pdev->dma_mask > DMA_BIT_MASK(32))
+                       return 1;
+               else {
+                       /*
+                        * 32 bit DMA is removed from si_domain and fall back
+                        * to non-identity mapping.
+                        */
+                       domain_remove_one_dev_info(si_domain, pdev);
+                       printk(KERN_INFO "32bit %s uses non-identity mapping\n",
+                              pci_name(pdev));
+                       return 0;
+               }
+       } else {
+               /*
+                * In case of a detached 64 bit DMA device from vm, the device
+                * is put into si_domain for identity mapping.
+                */
+               if (pdev->dma_mask > DMA_BIT_MASK(32)) {
+                       int ret;
+                       ret = domain_add_dev_info(si_domain, pdev);
+                       if (!ret) {
+                               printk(KERN_INFO "64bit %s uses identity mapping\n",
+                                      pci_name(pdev));
+                               return 1;
+                       }
+               }
+       }
+
+       return iommu_dummy(pdev);
+}
+
 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
                                     size_t size, int dir, u64 dma_mask)
 {
@@ -2260,7 +2476,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
        struct intel_iommu *iommu;
 
        BUG_ON(dir == DMA_NONE);
-       if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+
+       if (iommu_no_mapping(pdev))
                return paddr;
 
        domain = get_valid_domain_for_dev(pdev);
@@ -2401,8 +2618,9 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
        struct iova *iova;
        struct intel_iommu *iommu;
 
-       if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+       if (iommu_no_mapping(pdev))
                return;
+
        domain = find_domain(pdev);
        BUG_ON(!domain);
 
@@ -2492,7 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
        struct scatterlist *sg;
        struct intel_iommu *iommu;
 
-       if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+       if (iommu_no_mapping(pdev))
                return;
 
        domain = find_domain(pdev);
@@ -2553,7 +2771,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
        struct intel_iommu *iommu;
 
        BUG_ON(dir == DMA_NONE);
-       if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+       if (iommu_no_mapping(pdev))
                return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
 
        domain = get_valid_domain_for_dev(pdev);
@@ -2951,31 +3169,6 @@ int __init intel_iommu_init(void)
        return 0;
 }
 
-static int vm_domain_add_dev_info(struct dmar_domain *domain,
-                                 struct pci_dev *pdev)
-{
-       struct device_domain_info *info;
-       unsigned long flags;
-
-       info = alloc_devinfo_mem();
-       if (!info)
-               return -ENOMEM;
-
-       info->segment = pci_domain_nr(pdev->bus);
-       info->bus = pdev->bus->number;
-       info->devfn = pdev->devfn;
-       info->dev = pdev;
-       info->domain = domain;
-
-       spin_lock_irqsave(&device_domain_lock, flags);
-       list_add(&info->link, &domain->devices);
-       list_add(&info->global, &device_domain_list);
-       pdev->dev.archdata.iommu = info;
-       spin_unlock_irqrestore(&device_domain_lock, flags);
-
-       return 0;
-}
-
 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
                                           struct pci_dev *pdev)
 {
@@ -3003,7 +3196,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
        }
 }
 
-static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
+static void domain_remove_one_dev_info(struct dmar_domain *domain,
                                          struct pci_dev *pdev)
 {
        struct device_domain_info *info;
@@ -3136,7 +3329,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
        return domain;
 }
 
-static int vm_domain_init(struct dmar_domain *domain, int guest_width)
+static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
        int adjust_width;
 
@@ -3227,7 +3420,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
                        "intel_iommu_domain_init: dmar_domain == NULL\n");
                return -ENOMEM;
        }
-       if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+       if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
                printk(KERN_ERR
                        "intel_iommu_domain_init() failed\n");
                vm_domain_exit(dmar_domain);
@@ -3262,8 +3455,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 
                old_domain = find_domain(pdev);
                if (old_domain) {
-                       if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
-                               vm_domain_remove_one_dev_info(old_domain, pdev);
+                       if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
+                           dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
+                               domain_remove_one_dev_info(old_domain, pdev);
                        else
                                domain_remove_dev_info(old_domain);
                }
@@ -3285,7 +3479,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
                return -EFAULT;
        }
 
-       ret = vm_domain_add_dev_info(dmar_domain, pdev);
+       ret = domain_add_dev_info(dmar_domain, pdev);
        if (ret)
                return ret;
 
@@ -3299,7 +3493,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
        struct dmar_domain *dmar_domain = domain->priv;
        struct pci_dev *pdev = to_pci_dev(dev);
 
-       vm_domain_remove_one_dev_info(dmar_domain, pdev);
+       domain_remove_one_dev_info(dmar_domain, pdev);
 }
 
 static int intel_iommu_map_range(struct iommu_domain *domain,
index 1e83c8c5f9859d29229bec9225205eb5e39e5253..4f5b8712931f0d34c1702440a80ff0bb71f46964 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/intel-iommu.h>
 #include "intr_remapping.h"
 #include <acpi/acpi.h>
+#include <asm/pci-direct.h>
+#include "pci.h"
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
@@ -314,7 +316,8 @@ int modify_irte(int irq, struct irte *irte_modified)
        index = irq_iommu->irte_index + irq_iommu->sub_handle;
        irte = &iommu->ir_table->base[index];
 
-       set_64bit((unsigned long *)irte, irte_modified->low);
+       set_64bit((unsigned long *)&irte->low, irte_modified->low);
+       set_64bit((unsigned long *)&irte->high, irte_modified->high);
        __iommu_flush_cache(iommu, irte, sizeof(*irte));
 
        rc = qi_flush_iec(iommu, index, 0);
@@ -369,12 +372,32 @@ struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
        return drhd->iommu;
 }
 
+static int clear_entries(struct irq_2_iommu *irq_iommu)
+{
+       struct irte *start, *entry, *end;
+       struct intel_iommu *iommu;
+       int index;
+
+       if (irq_iommu->sub_handle)
+               return 0;
+
+       iommu = irq_iommu->iommu;
+       index = irq_iommu->irte_index + irq_iommu->sub_handle;
+
+       start = iommu->ir_table->base + index;
+       end = start + (1 << irq_iommu->irte_mask);
+
+       for (entry = start; entry < end; entry++) {
+               set_64bit((unsigned long *)&entry->low, 0);
+               set_64bit((unsigned long *)&entry->high, 0);
+       }
+
+       return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
+}
+
 int free_irte(int irq)
 {
        int rc = 0;
-       int index, i;
-       struct irte *irte;
-       struct intel_iommu *iommu;
        struct irq_2_iommu *irq_iommu;
        unsigned long flags;
 
@@ -385,16 +408,7 @@ int free_irte(int irq)
                return -1;
        }
 
-       iommu = irq_iommu->iommu;
-
-       index = irq_iommu->irte_index + irq_iommu->sub_handle;
-       irte = &iommu->ir_table->base[index];
-
-       if (!irq_iommu->sub_handle) {
-               for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
-                       set_64bit((unsigned long *)(irte + i), 0);
-               rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
-       }
+       rc = clear_entries(irq_iommu);
 
        irq_iommu->iommu = NULL;
        irq_iommu->irte_index = 0;
@@ -406,6 +420,91 @@ int free_irte(int irq)
        return rc;
 }
 
+/*
+ * source validation type
+ */
+#define SVT_NO_VERIFY          0x0  /* no verification is required */
+#define SVT_VERIFY_SID_SQ      0x1  /* verify using SID and SQ fiels */
+#define SVT_VERIFY_BUS         0x2  /* verify bus of request-id */
+
+/*
+ * source-id qualifier
+ */
+#define SQ_ALL_16      0x0  /* verify all 16 bits of request-id */
+#define SQ_13_IGNORE_1 0x1  /* verify most significant 13 bits, ignore
+                             * the third least significant bit
+                             */
+#define SQ_13_IGNORE_2 0x2  /* verify most significant 13 bits, ignore
+                             * the second and third least significant bits
+                             */
+#define SQ_13_IGNORE_3 0x3  /* verify most significant 13 bits, ignore
+                             * the least three significant bits
+                             */
+
+/*
+ * set SVT, SQ and SID fields of irte to verify
+ * source ids of interrupt requests
+ */
+static void set_irte_sid(struct irte *irte, unsigned int svt,
+                        unsigned int sq, unsigned int sid)
+{
+       irte->svt = svt;
+       irte->sq = sq;
+       irte->sid = sid;
+}
+
+int set_ioapic_sid(struct irte *irte, int apic)
+{
+       int i;
+       u16 sid = 0;
+
+       if (!irte)
+               return -1;
+
+       for (i = 0; i < MAX_IO_APICS; i++) {
+               if (ir_ioapic[i].id == apic) {
+                       sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
+                       break;
+               }
+       }
+
+       if (sid == 0) {
+               pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
+               return -1;
+       }
+
+       set_irte_sid(irte, 1, 0, sid);
+
+       return 0;
+}
+
+int set_msi_sid(struct irte *irte, struct pci_dev *dev)
+{
+       struct pci_dev *bridge;
+
+       if (!irte || !dev)
+               return -1;
+
+       /* PCIe device or Root Complex integrated PCI device */
+       if (dev->is_pcie || !dev->bus->parent) {
+               set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+                            (dev->bus->number << 8) | dev->devfn);
+               return 0;
+       }
+
+       bridge = pci_find_upstream_pcie_bridge(dev);
+       if (bridge) {
+               if (bridge->is_pcie) /* this is a PCIE-to-PCI/PCIX bridge */
+                       set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
+                               (bridge->bus->number << 8) | dev->bus->number);
+               else /* this is a legacy PCI bridge */
+                       set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+                               (bridge->bus->number << 8) | bridge->devfn);
+       }
+
+       return 0;
+}
+
 static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 {
        u64 addr;
@@ -612,6 +711,35 @@ error:
        return -1;
 }
 
+static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
+                                     struct intel_iommu *iommu)
+{
+       struct acpi_dmar_pci_path *path;
+       u8 bus;
+       int count;
+
+       bus = scope->bus;
+       path = (struct acpi_dmar_pci_path *)(scope + 1);
+       count = (scope->length - sizeof(struct acpi_dmar_device_scope))
+               / sizeof(struct acpi_dmar_pci_path);
+
+       while (--count > 0) {
+               /*
+                * Access PCI directly due to the PCI
+                * subsystem isn't initialized yet.
+                */
+               bus = read_pci_config_byte(bus, path->dev, path->fn,
+                                          PCI_SECONDARY_BUS);
+               path++;
+       }
+
+       ir_ioapic[ir_ioapic_num].bus   = bus;
+       ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn);
+       ir_ioapic[ir_ioapic_num].iommu = iommu;
+       ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
+       ir_ioapic_num++;
+}
+
 static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
                                 struct intel_iommu *iommu)
 {
@@ -636,9 +764,7 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
                               " 0x%Lx\n", scope->enumeration_id,
                               drhd->address);
 
-                       ir_ioapic[ir_ioapic_num].iommu = iommu;
-                       ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
-                       ir_ioapic_num++;
+                       ir_parse_one_ioapic_scope(scope, iommu);
                }
                start += scope->length;
        }
index ca48f0df8ac989bb41889e1efd87db7d5ee5fe10..63a263c18415f8d2f61ce13c6b1918d95c65d19a 100644 (file)
@@ -3,6 +3,8 @@
 struct ioapic_scope {
        struct intel_iommu *iommu;
        unsigned int id;
+       unsigned int bus;       /* PCI bus number */
+       unsigned int devfn;     /* PCI devfn number */
 };
 
 #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
index c682ac53641554a82c3b47ef91a71204530d393e..7232fe7104aa140c20b2b97b9a6bdc5717044747 100644 (file)
@@ -34,10 +34,27 @@ config ACER_WMI
          If you have an ACPI-WMI compatible Acer/ Wistron laptop, say Y or M
          here.
 
+config ACERHDF
+       tristate "Acer Aspire One temperature and fan driver"
+       depends on THERMAL && THERMAL_HWMON && ACPI
+       ---help---
+         This is a driver for Acer Aspire One netbooks. It allows to access
+         the temperature sensor and to control the fan.
+
+         After loading this driver the BIOS is still in control of the fan.
+         To let the kernel handle the fan, do:
+         echo -n enabled > /sys/class/thermal/thermal_zone0/mode
+
+         For more information about this driver see
+         <http://piie.net/files/acerhdf_README.txt>
+
+         If you have an Acer Aspire One netbook, say Y or M
+         here.
+
 config ASUS_LAPTOP
-       tristate "Asus Laptop Extras (EXPERIMENTAL)"
+       tristate "Asus Laptop Extras"
        depends on ACPI
-       depends on EXPERIMENTAL && !ACPI_ASUS
+       depends on !ACPI_ASUS
        select LEDS_CLASS
        select NEW_LEDS
        select BACKLIGHT_CLASS_DEVICE
@@ -45,12 +62,12 @@ config ASUS_LAPTOP
        ---help---
          This is the new Linux driver for Asus laptops. It may also support some
          MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate
-         standard ACPI events that go through /proc/acpi/events. It also adds
+         standard ACPI events and input events. It also adds
          support for video output switching, LCD backlight control, Bluetooth and
          Wlan control, and most importantly, allows you to blink those fancy LEDs.
 
          For more information and a userspace daemon for handling the extra
-         buttons see <http://acpi4asus.sf.net/>.
+         buttons see <http://acpi4asus.sf.net>.
 
          If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
@@ -342,7 +359,10 @@ config EEEPC_LAPTOP
        select HWMON
        ---help---
          This driver supports the Fn-Fx keys on Eee PC laptops.
-         It also adds the ability to switch camera/wlan on/off.
+
+         It  also gives access to some extra laptop functionalities like
+         Bluetooth, backlight and allows powering on/off some other
+         devices.
 
          If you have an Eee PC laptop, say Y or M here.
 
@@ -369,7 +389,7 @@ config ACPI_WMI
          any ACPI-WMI devices.
 
 config ACPI_ASUS
-       tristate "ASUS/Medion Laptop Extras"
+       tristate "ASUS/Medion Laptop Extras (DEPRECATED)"
        depends on ACPI
        select BACKLIGHT_CLASS_DEVICE
        ---help---
@@ -390,7 +410,7 @@ config ACPI_ASUS
          parameters.
 
          More information and a userspace daemon for handling the extra buttons
-         at <http://sourceforge.net/projects/acpi4asus/>.
+         at <http://acpi4asus.sf.net>.
 
          If you have an ACPI-compatible ASUS laptop, say Y or M here. This
          driver is still under development, so if your laptop is unsupported or
index e40c7bd1b87e55358a4bb02235e5e6ca7720bfdf..641b8bfa5538a7ca3ed0ebfc62410552bbd8dfb1 100644 (file)
@@ -9,6 +9,7 @@ obj-$(CONFIG_COMPAL_LAPTOP)     += compal-laptop.o
 obj-$(CONFIG_DELL_LAPTOP)      += dell-laptop.o
 obj-$(CONFIG_DELL_WMI)         += dell-wmi.o
 obj-$(CONFIG_ACER_WMI)         += acer-wmi.o
+obj-$(CONFIG_ACERHDF)          += acerhdf.o
 obj-$(CONFIG_HP_WMI)           += hp-wmi.o
 obj-$(CONFIG_TC1100_WMI)       += tc1100-wmi.o
 obj-$(CONFIG_SONY_LAPTOP)      += sony-laptop.o
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
new file mode 100644 (file)
index 0000000..bdfee17
--- /dev/null
@@ -0,0 +1,602 @@
+/*
+ * acerhdf - A driver which monitors the temperature
+ *           of the aspire one netbook, turns on/off the fan
+ *           as soon as the upper/lower threshold is reached.
+ *
+ * (C) 2009 - Peter Feuerer     peter (a) piie.net
+ *                              http://piie.net
+ *     2009 Borislav Petkov <petkovbb@gmail.com>
+ *
+ * Inspired by and many thanks to:
+ *  o acerfand   - Rachel Greenham
+ *  o acer_ec.pl - Michael Kurz     michi.kurz (at) googlemail.com
+ *               - Petr Tomasek     tomasek (#) etf,cuni,cz
+ *               - Carlos Corbacho  cathectic (at) gmail.com
+ *  o lkml       - Matthew Garrett
+ *               - Borislav Petkov
+ *               - Andreas Mohr
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define pr_fmt(fmt) "acerhdf: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/dmi.h>
+#include <acpi/acpi_drivers.h>
+#include <linux/sched.h>
+#include <linux/thermal.h>
+#include <linux/platform_device.h>
+
+/*
+ * The driver is started with "kernel mode off" by default. That means, the BIOS
+ * is still in control of the fan. In this mode the driver allows to read the
+ * temperature of the cpu and a userspace tool may take over control of the fan.
+ * If the driver is switched to "kernel mode" (e.g. via module parameter) the
+ * driver is in full control of the fan. If you want the module to be started in
+ * kernel mode by default, define the following:
+ */
+#undef START_IN_KERNEL_MODE
+
+#define DRV_VER "0.5.13"
+
+/*
+ * According to the Atom N270 datasheet,
+ * (http://download.intel.com/design/processor/datashts/320032.pdf) the
+ * CPU's optimal operating limits denoted in junction temperature as
+ * measured by the on-die thermal monitor are within 0 <= Tj <= 90. So,
+ * assume 89°C is critical temperature.
+ */
+#define ACERHDF_TEMP_CRIT 89
+#define ACERHDF_FAN_OFF 0
+#define ACERHDF_FAN_AUTO 1
+
+/*
+ * No matter what value the user puts into the fanon variable, turn on the fan
+ * at 80 degree Celsius to prevent hardware damage
+ */
+#define ACERHDF_MAX_FANON 80
+
+/*
+ * Maximum interval between two temperature checks is 15 seconds, as the die
+ * can get hot really fast under heavy load (plus we shouldn't forget about
+ * possible impact of _external_ aggressive sources such as heaters, sun etc.)
+ */
+#define ACERHDF_MAX_INTERVAL 15
+
+#ifdef START_IN_KERNEL_MODE
+static int kernelmode = 1;
+#else
+static int kernelmode;
+#endif
+
+static unsigned int interval = 10;
+static unsigned int fanon = 63;
+static unsigned int fanoff = 58;
+static unsigned int verbose;
+static unsigned int fanstate = ACERHDF_FAN_AUTO;
+static char force_bios[16];
+static unsigned int prev_interval;
+struct thermal_zone_device *thz_dev;
+struct thermal_cooling_device *cl_dev;
+struct platform_device *acerhdf_dev;
+
+module_param(kernelmode, uint, 0);
+MODULE_PARM_DESC(kernelmode, "Kernel mode fan control on / off");
+module_param(interval, uint, 0600);
+MODULE_PARM_DESC(interval, "Polling interval of temperature check");
+module_param(fanon, uint, 0600);
+MODULE_PARM_DESC(fanon, "Turn the fan on above this temperature");
+module_param(fanoff, uint, 0600);
+MODULE_PARM_DESC(fanoff, "Turn the fan off below this temperature");
+module_param(verbose, uint, 0600);
+MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+module_param_string(force_bios, force_bios, 16, 0);
+MODULE_PARM_DESC(force_bios, "Force BIOS version and omit BIOS check");
+
+/* BIOS settings */
+struct bios_settings_t {
+       const char *vendor;
+       const char *version;
+       unsigned char fanreg;
+       unsigned char tempreg;
+       unsigned char fancmd[2]; /* fan off and auto commands */
+};
+
+/* Register addresses and values for different BIOS versions */
+static const struct bios_settings_t bios_tbl[] = {
+       {"Acer", "v0.3109", 0x55, 0x58, {0x1f, 0x00} },
+       {"Acer", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
+       {"Acer", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
+       {"Acer", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
+       {"Acer", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
+       {"Acer", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
+       {"Acer", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
+       {"Acer", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
+       {"Gateway", "v0.3103", 0x55, 0x58, {0x21, 0x00} },
+       {"Packard Bell", "v0.3105", 0x55, 0x58, {0x21, 0x00} },
+       {"", "", 0, 0, {0, 0} }
+};
+
+static const struct bios_settings_t *bios_cfg __read_mostly;
+
+
+static int acerhdf_get_temp(int *temp)
+{
+       u8 read_temp;
+
+       if (ec_read(bios_cfg->tempreg, &read_temp))
+               return -EINVAL;
+
+       *temp = read_temp;
+
+       return 0;
+}
+
+static int acerhdf_get_fanstate(int *state)
+{
+       u8 fan;
+       bool tmp;
+
+       if (ec_read(bios_cfg->fanreg, &fan))
+               return -EINVAL;
+
+       tmp = (fan == bios_cfg->fancmd[ACERHDF_FAN_OFF]);
+       *state = tmp ? ACERHDF_FAN_OFF : ACERHDF_FAN_AUTO;
+
+       return 0;
+}
+
+static void acerhdf_change_fanstate(int state)
+{
+       unsigned char cmd;
+
+       if (verbose)
+               pr_notice("fan %s\n", (state == ACERHDF_FAN_OFF) ?
+                               "OFF" : "ON");
+
+       if ((state != ACERHDF_FAN_OFF) && (state != ACERHDF_FAN_AUTO)) {
+               pr_err("invalid fan state %d requested, setting to auto!\n",
+                       state);
+               state = ACERHDF_FAN_AUTO;
+       }
+
+       cmd = bios_cfg->fancmd[state];
+       fanstate = state;
+
+       ec_write(bios_cfg->fanreg, cmd);
+}
+
+static void acerhdf_check_param(struct thermal_zone_device *thermal)
+{
+       if (fanon > ACERHDF_MAX_FANON) {
+               pr_err("fanon temperature too high, set to %d\n",
+                               ACERHDF_MAX_FANON);
+               fanon = ACERHDF_MAX_FANON;
+       }
+
+       if (kernelmode && prev_interval != interval) {
+               if (interval > ACERHDF_MAX_INTERVAL) {
+                       pr_err("interval too high, set to %d\n",
+                               ACERHDF_MAX_INTERVAL);
+                       interval = ACERHDF_MAX_INTERVAL;
+               }
+               if (verbose)
+                       pr_notice("interval changed to: %d\n",
+                                       interval);
+               thermal->polling_delay = interval*1000;
+               prev_interval = interval;
+       }
+}
+
+/*
+ * This is the thermal zone callback which does the delayed polling of the fan
+ * state. We do check /sysfs-originating settings here in acerhdf_check_param()
+ * as late as the polling interval is since we can't do that in the respective
+ * accessors of the module parameters.
+ */
+static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal,
+                              unsigned long *t)
+{
+       int temp, err = 0;
+
+       acerhdf_check_param(thermal);
+
+       err = acerhdf_get_temp(&temp);
+       if (err)
+               return err;
+
+       if (verbose)
+               pr_notice("temp %d\n", temp);
+
+       *t = temp;
+       return 0;
+}
+
+static int acerhdf_bind(struct thermal_zone_device *thermal,
+                       struct thermal_cooling_device *cdev)
+{
+       /* if the cooling device is the one from acerhdf bind it */
+       if (cdev != cl_dev)
+               return 0;
+
+       if (thermal_zone_bind_cooling_device(thermal, 0, cdev)) {
+               pr_err("error binding cooling dev\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int acerhdf_unbind(struct thermal_zone_device *thermal,
+                         struct thermal_cooling_device *cdev)
+{
+       if (cdev != cl_dev)
+               return 0;
+
+       if (thermal_zone_unbind_cooling_device(thermal, 0, cdev)) {
+               pr_err("error unbinding cooling dev\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static inline void acerhdf_revert_to_bios_mode(void)
+{
+       acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
+       kernelmode = 0;
+       if (thz_dev)
+               thz_dev->polling_delay = 0;
+       pr_notice("kernel mode fan control OFF\n");
+}
+static inline void acerhdf_enable_kernelmode(void)
+{
+       kernelmode = 1;
+
+       thz_dev->polling_delay = interval*1000;
+       thermal_zone_device_update(thz_dev);
+       pr_notice("kernel mode fan control ON\n");
+}
+
+static int acerhdf_get_mode(struct thermal_zone_device *thermal,
+                           enum thermal_device_mode *mode)
+{
+       if (verbose)
+               pr_notice("kernel mode fan control %d\n", kernelmode);
+
+       *mode = (kernelmode) ? THERMAL_DEVICE_ENABLED
+                            : THERMAL_DEVICE_DISABLED;
+
+       return 0;
+}
+
+/*
+ * set operation mode;
+ * enabled: the thermal layer of the kernel takes care about
+ *          the temperature and the fan.
+ * disabled: the BIOS takes control of the fan.
+ */
+static int acerhdf_set_mode(struct thermal_zone_device *thermal,
+                           enum thermal_device_mode mode)
+{
+       if (mode == THERMAL_DEVICE_DISABLED && kernelmode)
+               acerhdf_revert_to_bios_mode();
+       else if (mode == THERMAL_DEVICE_ENABLED && !kernelmode)
+               acerhdf_enable_kernelmode();
+
+       return 0;
+}
+
+static int acerhdf_get_trip_type(struct thermal_zone_device *thermal, int trip,
+                                enum thermal_trip_type *type)
+{
+       if (trip == 0)
+               *type = THERMAL_TRIP_ACTIVE;
+
+       return 0;
+}
+
+static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip,
+                                unsigned long *temp)
+{
+       if (trip == 0)
+               *temp = fanon;
+
+       return 0;
+}
+
+static int acerhdf_get_crit_temp(struct thermal_zone_device *thermal,
+                                unsigned long *temperature)
+{
+       *temperature = ACERHDF_TEMP_CRIT;
+       return 0;
+}
+
+/* bind callback functions to thermalzone */
+struct thermal_zone_device_ops acerhdf_dev_ops = {
+       .bind = acerhdf_bind,
+       .unbind = acerhdf_unbind,
+       .get_temp = acerhdf_get_ec_temp,
+       .get_mode = acerhdf_get_mode,
+       .set_mode = acerhdf_set_mode,
+       .get_trip_type = acerhdf_get_trip_type,
+       .get_trip_temp = acerhdf_get_trip_temp,
+       .get_crit_temp = acerhdf_get_crit_temp,
+};
+
+
+/*
+ * cooling device callback functions
+ * get maximal fan cooling state
+ */
+static int acerhdf_get_max_state(struct thermal_cooling_device *cdev,
+                                unsigned long *state)
+{
+       *state = 1;
+
+       return 0;
+}
+
+static int acerhdf_get_cur_state(struct thermal_cooling_device *cdev,
+                                unsigned long *state)
+{
+       int err = 0, tmp;
+
+       err = acerhdf_get_fanstate(&tmp);
+       if (err)
+               return err;
+
+       *state = (tmp == ACERHDF_FAN_AUTO) ? 1 : 0;
+       return 0;
+}
+
+/* change current fan state - is overwritten when running in kernel mode */
+static int acerhdf_set_cur_state(struct thermal_cooling_device *cdev,
+                                unsigned long state)
+{
+       int cur_temp, cur_state, err = 0;
+
+       if (!kernelmode)
+               return 0;
+
+       err = acerhdf_get_temp(&cur_temp);
+       if (err) {
+               pr_err("error reading temperature, hand off control to BIOS\n");
+               goto err_out;
+       }
+
+       err = acerhdf_get_fanstate(&cur_state);
+       if (err) {
+               pr_err("error reading fan state, hand off control to BIOS\n");
+               goto err_out;
+       }
+
+       if (state == 0) {
+               /* turn fan off only if below fanoff temperature */
+               if ((cur_state == ACERHDF_FAN_AUTO) &&
+                   (cur_temp < fanoff))
+                       acerhdf_change_fanstate(ACERHDF_FAN_OFF);
+       } else {
+               if (cur_state == ACERHDF_FAN_OFF)
+                       acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
+       }
+       return 0;
+
+err_out:
+       acerhdf_revert_to_bios_mode();
+       return -EINVAL;
+}
+
+/* bind fan callbacks to fan device */
+struct thermal_cooling_device_ops acerhdf_cooling_ops = {
+       .get_max_state = acerhdf_get_max_state,
+       .get_cur_state = acerhdf_get_cur_state,
+       .set_cur_state = acerhdf_set_cur_state,
+};
+
+/* suspend / resume functionality */
+static int acerhdf_suspend(struct platform_device *dev, pm_message_t state)
+{
+       if (kernelmode)
+               acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
+
+       if (verbose)
+               pr_notice("going suspend\n");
+
+       return 0;
+}
+
+static int acerhdf_resume(struct platform_device *device)
+{
+       if (verbose)
+               pr_notice("resuming\n");
+
+       return 0;
+}
+
+static int __devinit acerhdf_probe(struct platform_device *device)
+{
+       return 0;
+}
+
+static int acerhdf_remove(struct platform_device *device)
+{
+       return 0;
+}
+
+struct platform_driver acerhdf_drv = {
+       .driver = {
+               .name = "acerhdf",
+               .owner = THIS_MODULE,
+       },
+       .probe = acerhdf_probe,
+       .remove = acerhdf_remove,
+       .suspend = acerhdf_suspend,
+       .resume = acerhdf_resume,
+};
+
+
+/* check hardware */
+static int acerhdf_check_hardware(void)
+{
+       char const *vendor, *version, *product;
+       int i;
+
+       /* get BIOS data */
+       vendor  = dmi_get_system_info(DMI_SYS_VENDOR);
+       version = dmi_get_system_info(DMI_BIOS_VERSION);
+       product = dmi_get_system_info(DMI_PRODUCT_NAME);
+
+       pr_info("Acer Aspire One Fan driver, v.%s\n", DRV_VER);
+
+       if (!force_bios[0]) {
+               if (strncmp(product, "AO", 2)) {
+                       pr_err("no Aspire One hardware found\n");
+                       return -EINVAL;
+               }
+       } else {
+               pr_info("forcing BIOS version: %s\n", version);
+               version = force_bios;
+               kernelmode = 0;
+       }
+
+       if (verbose)
+               pr_info("BIOS info: %s %s, product: %s\n",
+                       vendor, version, product);
+
+       /* search BIOS version and vendor in BIOS settings table */
+       for (i = 0; bios_tbl[i].version[0]; i++) {
+               if (!strcmp(bios_tbl[i].vendor, vendor) &&
+                   !strcmp(bios_tbl[i].version, version)) {
+                       bios_cfg = &bios_tbl[i];
+                       break;
+               }
+       }
+
+       if (!bios_cfg) {
+               pr_err("unknown (unsupported) BIOS version %s/%s, "
+                       "please report, aborting!\n", vendor, version);
+               return -EINVAL;
+       }
+
+       /*
+        * if started with kernel mode off, prevent the kernel from switching
+        * off the fan
+        */
+       if (!kernelmode) {
+               pr_notice("Fan control off, to enable do:\n");
+               pr_notice("echo -n \"enabled\" > "
+                       "/sys/class/thermal/thermal_zone0/mode\n");
+       }
+
+       return 0;
+}
+
+static int acerhdf_register_platform(void)
+{
+       int err = 0;
+
+       err = platform_driver_register(&acerhdf_drv);
+       if (err)
+               return err;
+
+       acerhdf_dev = platform_device_alloc("acerhdf", -1);
+       platform_device_add(acerhdf_dev);
+
+       return 0;
+}
+
+static void acerhdf_unregister_platform(void)
+{
+       if (!acerhdf_dev)
+               return;
+
+       platform_device_del(acerhdf_dev);
+       platform_driver_unregister(&acerhdf_drv);
+}
+
+static int acerhdf_register_thermal(void)
+{
+       cl_dev = thermal_cooling_device_register("acerhdf-fan", NULL,
+                                                &acerhdf_cooling_ops);
+
+       if (IS_ERR(cl_dev))
+               return -EINVAL;
+
+       thz_dev = thermal_zone_device_register("acerhdf", 1, NULL,
+                                             &acerhdf_dev_ops, 0, 0, 0,
+                                             (kernelmode) ? interval*1000 : 0);
+       if (IS_ERR(thz_dev))
+               return -EINVAL;
+
+       return 0;
+}
+
+static void acerhdf_unregister_thermal(void)
+{
+       if (cl_dev) {
+               thermal_cooling_device_unregister(cl_dev);
+               cl_dev = NULL;
+       }
+
+       if (thz_dev) {
+               thermal_zone_device_unregister(thz_dev);
+               thz_dev = NULL;
+       }
+}
+
+static int __init acerhdf_init(void)
+{
+       int err = 0;
+
+       err = acerhdf_check_hardware();
+       if (err)
+               goto out_err;
+
+       err = acerhdf_register_platform();
+       if (err)
+               goto err_unreg;
+
+       err = acerhdf_register_thermal();
+       if (err)
+               goto err_unreg;
+
+       return 0;
+
+err_unreg:
+       acerhdf_unregister_thermal();
+       acerhdf_unregister_platform();
+
+out_err:
+       return -ENODEV;
+}
+
+static void __exit acerhdf_exit(void)
+{
+       acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
+       acerhdf_unregister_thermal();
+       acerhdf_unregister_platform();
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Peter Feuerer");
+MODULE_DESCRIPTION("Aspire One temperature and fan driver");
+MODULE_ALIAS("dmi:*:*Acer*:*:");
+MODULE_ALIAS("dmi:*:*Gateway*:*:");
+MODULE_ALIAS("dmi:*:*Packard Bell*:*:");
+
+module_init(acerhdf_init);
+module_exit(acerhdf_exit);
index bfc1a8892a32f5eb03c8c2752ffb2daff977488c..db657bbeec908e456facb8271fa8624fe0f0a515 100644 (file)
@@ -33,6 +33,8 @@
  *  Sam Lin        - GPS support
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #define ASUS_HOTK_NAME          "Asus Laptop Support"
 #define ASUS_HOTK_CLASS         "hotkey"
 #define ASUS_HOTK_DEVICE_NAME   "Hotkey"
-#define ASUS_HOTK_FILE          "asus-laptop"
+#define ASUS_HOTK_FILE          KBUILD_MODNAME
 #define ASUS_HOTK_PREFIX        "\\_SB.ATKD."
 
+
 /*
  * Some events we use, same for all Asus
  */
@@ -207,13 +210,17 @@ MODULE_DEVICE_TABLE(acpi, asus_device_ids);
 
 static int asus_hotk_add(struct acpi_device *device);
 static int asus_hotk_remove(struct acpi_device *device, int type);
+static void asus_hotk_notify(struct acpi_device *device, u32 event);
+
 static struct acpi_driver asus_hotk_driver = {
        .name = ASUS_HOTK_NAME,
        .class = ASUS_HOTK_CLASS,
        .ids = asus_device_ids,
+       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = asus_hotk_add,
                .remove = asus_hotk_remove,
+               .notify = asus_hotk_notify,
                },
 };
 
@@ -323,7 +330,7 @@ static int read_wireless_status(int mask)
 
        rv = acpi_evaluate_integer(wireless_status_handle, NULL, NULL, &status);
        if (ACPI_FAILURE(rv))
-               printk(ASUS_WARNING "Error reading Wireless status\n");
+               pr_warning("Error reading Wireless status\n");
        else
                return (status & mask) ? 1 : 0;
 
@@ -337,7 +344,7 @@ static int read_gps_status(void)
 
        rv = acpi_evaluate_integer(gps_status_handle, NULL, NULL, &status);
        if (ACPI_FAILURE(rv))
-               printk(ASUS_WARNING "Error reading GPS status\n");
+               pr_warning("Error reading GPS status\n");
        else
                return status ? 1 : 0;
 
@@ -377,7 +384,7 @@ static void write_status(acpi_handle handle, int out, int mask)
        }
 
        if (write_acpi_int(handle, NULL, out, NULL))
-               printk(ASUS_WARNING " write failed %x\n", mask);
+               pr_warning(" write failed %x\n", mask);
 }
 
 /* /sys/class/led handlers */
@@ -420,7 +427,7 @@ static int set_lcd_state(int value)
                                              NULL, NULL, NULL);
 
                if (ACPI_FAILURE(status))
-                       printk(ASUS_WARNING "Error switching LCD\n");
+                       pr_warning("Error switching LCD\n");
        }
 
        write_status(NULL, lcd, LCD_ON);
@@ -444,7 +451,7 @@ static int read_brightness(struct backlight_device *bd)
 
        rv = acpi_evaluate_integer(brightness_get_handle, NULL, NULL, &value);
        if (ACPI_FAILURE(rv))
-               printk(ASUS_WARNING "Error reading brightness\n");
+               pr_warning("Error reading brightness\n");
 
        return value;
 }
@@ -457,7 +464,7 @@ static int set_brightness(struct backlight_device *bd, int value)
        /* 0 <= value <= 15 */
 
        if (write_acpi_int(brightness_set_handle, NULL, value, NULL)) {
-               printk(ASUS_WARNING "Error changing brightness\n");
+               pr_warning("Error changing brightness\n");
                ret = -EIO;
        }
 
@@ -587,7 +594,7 @@ static ssize_t store_ledd(struct device *dev, struct device_attribute *attr,
        rv = parse_arg(buf, count, &value);
        if (rv > 0) {
                if (write_acpi_int(ledd_set_handle, NULL, value, NULL))
-                       printk(ASUS_WARNING "LED display write failed\n");
+                       pr_warning("LED display write failed\n");
                else
                        hotk->ledd_status = (u32) value;
        }
@@ -632,7 +639,7 @@ static void set_display(int value)
 {
        /* no sanity check needed for now */
        if (write_acpi_int(display_set_handle, NULL, value, NULL))
-               printk(ASUS_WARNING "Error setting display\n");
+               pr_warning("Error setting display\n");
        return;
 }
 
@@ -647,7 +654,7 @@ static int read_display(void)
                rv = acpi_evaluate_integer(display_get_handle, NULL,
                                           NULL, &value);
                if (ACPI_FAILURE(rv))
-                       printk(ASUS_WARNING "Error reading display status\n");
+                       pr_warning("Error reading display status\n");
        }
 
        value &= 0x0F;          /* needed for some models, shouldn't hurt others */
@@ -689,7 +696,7 @@ static ssize_t store_disp(struct device *dev, struct device_attribute *attr,
 static void set_light_sens_switch(int value)
 {
        if (write_acpi_int(ls_switch_handle, NULL, value, NULL))
-               printk(ASUS_WARNING "Error setting light sensor switch\n");
+               pr_warning("Error setting light sensor switch\n");
        hotk->light_switch = value;
 }
 
@@ -714,7 +721,7 @@ static ssize_t store_lssw(struct device *dev, struct device_attribute *attr,
 static void set_light_sens_level(int value)
 {
        if (write_acpi_int(ls_level_handle, NULL, value, NULL))
-               printk(ASUS_WARNING "Error setting light sensor level\n");
+               pr_warning("Error setting light sensor level\n");
        hotk->light_level = value;
 }
 
@@ -812,7 +819,7 @@ static int asus_setkeycode(struct input_dev *dev, int scancode, int keycode)
        return -EINVAL;
 }
 
-static void asus_hotk_notify(acpi_handle handle, u32 event, void *data)
+static void asus_hotk_notify(struct acpi_device *device, u32 event)
 {
        static struct key_entry *key;
        u16 count;
@@ -975,11 +982,11 @@ static int asus_hotk_get_info(void)
         */
        status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus_info);
        if (ACPI_FAILURE(status))
-               printk(ASUS_WARNING "Couldn't get the DSDT table header\n");
+               pr_warning("Couldn't get the DSDT table header\n");
 
        /* We have to write 0 on init this far for all ASUS models */
        if (write_acpi_int(hotk->handle, "INIT", 0, &buffer)) {
-               printk(ASUS_ERR "Hotkey initialization failed\n");
+               pr_err("Hotkey initialization failed\n");
                return -ENODEV;
        }
 
@@ -987,9 +994,9 @@ static int asus_hotk_get_info(void)
        status =
            acpi_evaluate_integer(hotk->handle, "BSTS", NULL, &bsts_result);
        if (ACPI_FAILURE(status))
-               printk(ASUS_WARNING "Error calling BSTS\n");
+               pr_warning("Error calling BSTS\n");
        else if (bsts_result)
-               printk(ASUS_NOTICE "BSTS called, 0x%02x returned\n",
+               pr_notice("BSTS called, 0x%02x returned\n",
                       (uint) bsts_result);
 
        /* This too ... */
@@ -1020,7 +1027,7 @@ static int asus_hotk_get_info(void)
                return -ENOMEM;
 
        if (*string)
-               printk(ASUS_NOTICE "  %s model detected\n", string);
+               pr_notice("  %s model detected\n", string);
 
        ASUS_HANDLE_INIT(mled_set);
        ASUS_HANDLE_INIT(tled_set);
@@ -1077,7 +1084,7 @@ static int asus_input_init(void)
 
        hotk->inputdev = input_allocate_device();
        if (!hotk->inputdev) {
-               printk(ASUS_INFO "Unable to allocate input device\n");
+               pr_info("Unable to allocate input device\n");
                return 0;
        }
        hotk->inputdev->name = "Asus Laptop extra buttons";
@@ -1096,7 +1103,7 @@ static int asus_input_init(void)
        }
        result = input_register_device(hotk->inputdev);
        if (result) {
-               printk(ASUS_INFO "Unable to register input device\n");
+               pr_info("Unable to register input device\n");
                input_free_device(hotk->inputdev);
        }
        return result;
@@ -1113,7 +1120,7 @@ static int asus_hotk_check(void)
        if (hotk->device->status.present) {
                result = asus_hotk_get_info();
        } else {
-               printk(ASUS_ERR "Hotkey device not present, aborting\n");
+               pr_err("Hotkey device not present, aborting\n");
                return -EINVAL;
        }
 
@@ -1124,13 +1131,12 @@ static int asus_hotk_found;
 
 static int asus_hotk_add(struct acpi_device *device)
 {
-       acpi_status status = AE_OK;
        int result;
 
        if (!device)
                return -EINVAL;
 
-       printk(ASUS_NOTICE "Asus Laptop Support version %s\n",
+       pr_notice("Asus Laptop Support version %s\n",
               ASUS_LAPTOP_VERSION);
 
        hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL);
@@ -1149,15 +1155,6 @@ static int asus_hotk_add(struct acpi_device *device)
 
        asus_hotk_add_fs();
 
-       /*
-        * We install the handler, it will receive the hotk in parameter, so, we
-        * could add other data to the hotk struct
-        */
-       status = acpi_install_notify_handler(hotk->handle, ACPI_ALL_NOTIFY,
-                                            asus_hotk_notify, hotk);
-       if (ACPI_FAILURE(status))
-               printk(ASUS_ERR "Error installing notify handler\n");
-
        asus_hotk_found = 1;
 
        /* WLED and BLED are on by default */
@@ -1198,16 +1195,9 @@ end:
 
 static int asus_hotk_remove(struct acpi_device *device, int type)
 {
-       acpi_status status = 0;
-
        if (!device || !acpi_driver_data(device))
                return -EINVAL;
 
-       status = acpi_remove_notify_handler(hotk->handle, ACPI_ALL_NOTIFY,
-                                           asus_hotk_notify);
-       if (ACPI_FAILURE(status))
-               printk(ASUS_ERR "Error removing notify handler\n");
-
        kfree(hotk->name);
        kfree(hotk);
 
@@ -1260,8 +1250,7 @@ static int asus_backlight_init(struct device *dev)
                bd = backlight_device_register(ASUS_HOTK_FILE, dev,
                                               NULL, &asusbl_ops);
                if (IS_ERR(bd)) {
-                       printk(ASUS_ERR
-                              "Could not register asus backlight device\n");
+                       pr_err("Could not register asus backlight device\n");
                        asus_backlight_device = NULL;
                        return PTR_ERR(bd);
                }
@@ -1334,7 +1323,6 @@ out:
 
 static int __init asus_laptop_init(void)
 {
-       struct device *dev;
        int result;
 
        if (acpi_disabled)
@@ -1356,24 +1344,10 @@ static int __init asus_laptop_init(void)
                return -ENODEV;
        }
 
-       dev = acpi_get_physical_device(hotk->device->handle);
-
-       if (!acpi_video_backlight_support()) {
-               result = asus_backlight_init(dev);
-               if (result)
-                       goto fail_backlight;
-       } else
-               printk(ASUS_INFO "Brightness ignored, must be controlled by "
-                      "ACPI video driver\n");
-
        result = asus_input_init();
        if (result)
                goto fail_input;
 
-       result = asus_led_init(dev);
-       if (result)
-               goto fail_led;
-
        /* Register platform stuff */
        result = platform_driver_register(&asuspf_driver);
        if (result)
@@ -1394,8 +1368,27 @@ static int __init asus_laptop_init(void)
        if (result)
                goto fail_sysfs;
 
+       result = asus_led_init(&asuspf_device->dev);
+       if (result)
+               goto fail_led;
+
+       if (!acpi_video_backlight_support()) {
+               result = asus_backlight_init(&asuspf_device->dev);
+               if (result)
+                       goto fail_backlight;
+       } else
+               pr_info("Brightness ignored, must be controlled by "
+                      "ACPI video driver\n");
+
        return 0;
 
+fail_backlight:
+       asus_led_exit();
+
+fail_led:
+       sysfs_remove_group(&asuspf_device->dev.kobj,
+                         &asuspf_attribute_group);
+
 fail_sysfs:
        platform_device_del(asuspf_device);
 
@@ -1406,15 +1399,9 @@ fail_platform_device1:
        platform_driver_unregister(&asuspf_driver);
 
 fail_platform_driver:
-       asus_led_exit();
-
-fail_led:
        asus_input_exit();
 
 fail_input:
-       asus_backlight_exit();
-
-fail_backlight:
 
        return result;
 }
index ba1f7497e4b9a99f5be8d9d9d406ff1f6b170dbd..ddf5240ade8cc0e10358553382ffa68716933ce0 100644 (file)
@@ -455,6 +455,8 @@ static struct asus_hotk *hotk;
  */
 static int asus_hotk_add(struct acpi_device *device);
 static int asus_hotk_remove(struct acpi_device *device, int type);
+static void asus_hotk_notify(struct acpi_device *device, u32 event);
+
 static const struct acpi_device_id asus_device_ids[] = {
        {"ATK0100", 0},
        {"", 0},
@@ -465,9 +467,11 @@ static struct acpi_driver asus_hotk_driver = {
        .name = "asus_acpi",
        .class = ACPI_HOTK_CLASS,
        .ids = asus_device_ids,
+       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = asus_hotk_add,
                .remove = asus_hotk_remove,
+               .notify = asus_hotk_notify,
                },
 };
 
@@ -1101,12 +1105,20 @@ static int asus_hotk_remove_fs(struct acpi_device *device)
        return 0;
 }
 
-static void asus_hotk_notify(acpi_handle handle, u32 event, void *data)
+static void asus_hotk_notify(struct acpi_device *device, u32 event)
 {
        /* TODO Find a better way to handle events count. */
        if (!hotk)
                return;
 
+       /*
+        * The BIOS *should* be sending us device events, but apparently
+        * Asus uses system events instead, so just ignore any device
+        * events we get.
+        */
+       if (event > ACPI_MAX_SYS_NOTIFY)
+               return;
+
        if ((event & ~((u32) BR_UP)) < 16)
                hotk->brightness = (event & ~((u32) BR_UP));
        else if ((event & ~((u32) BR_DOWN)) < 16)
@@ -1346,15 +1358,6 @@ static int asus_hotk_add(struct acpi_device *device)
        if (result)
                goto end;
 
-       /*
-        * We install the handler, it will receive the hotk in parameter, so, we
-        * could add other data to the hotk struct
-        */
-       status = acpi_install_notify_handler(hotk->handle, ACPI_SYSTEM_NOTIFY,
-                                            asus_hotk_notify, hotk);
-       if (ACPI_FAILURE(status))
-               printk(KERN_ERR "  Error installing notify handler\n");
-
        /* For laptops without GPLV: init the hotk->brightness value */
        if ((!hotk->methods->brightness_get)
            && (!hotk->methods->brightness_status)
@@ -1389,16 +1392,9 @@ end:
 
 static int asus_hotk_remove(struct acpi_device *device, int type)
 {
-       acpi_status status = 0;
-
        if (!device || !acpi_driver_data(device))
                return -EINVAL;
 
-       status = acpi_remove_notify_handler(hotk->handle, ACPI_SYSTEM_NOTIFY,
-                                           asus_hotk_notify);
-       if (ACPI_FAILURE(status))
-               printk(KERN_ERR "Asus ACPI: Error removing notify handler\n");
-
        asus_hotk_remove_fs(device);
 
        kfree(hotk);
index 2fab94162147e2b2bc0206bcad3e6b4b76764587..0f900cc9fa7a5131283c14b2aae8e11d7667b949 100644 (file)
@@ -46,10 +46,53 @@ struct key_entry {
        u16 keycode;
 };
 
-enum { KE_KEY, KE_SW, KE_END };
+enum { KE_KEY, KE_SW, KE_IGNORE, KE_END };
+
+/*
+ * Certain keys are flagged as KE_IGNORE. All of these are either
+ * notifications (rather than requests for change) or are also sent
+ * via the keyboard controller so should not be sent again.
+ */
 
 static struct key_entry dell_wmi_keymap[] = {
        {KE_KEY, 0xe045, KEY_PROG1},
+       {KE_KEY, 0xe009, KEY_EJECTCD},
+
+       /* These also contain the brightness level at offset 6 */
+       {KE_KEY, 0xe006, KEY_BRIGHTNESSUP},
+       {KE_KEY, 0xe005, KEY_BRIGHTNESSDOWN},
+
+       /* Battery health status button */
+       {KE_KEY, 0xe007, KEY_BATTERY},
+
+       /* This is actually for all radios. Although physically a
+        * switch, the notification does not provide an indication of
+        * state and so it should be reported as a key */
+       {KE_KEY, 0xe008, KEY_WLAN},
+
+       /* The next device is at offset 6, the active devices are at
+          offset 8 and the attached devices at offset 10 */
+       {KE_KEY, 0xe00b, KEY_DISPLAYTOGGLE},
+
+       {KE_IGNORE, 0xe00c, KEY_KBDILLUMTOGGLE},
+
+       /* BIOS error detected */
+       {KE_IGNORE, 0xe00d, KEY_RESERVED},
+
+       /* Wifi Catcher */
+       {KE_KEY, 0xe011, KEY_PROG2},
+
+       /* Ambient light sensor toggle */
+       {KE_IGNORE, 0xe013, KEY_RESERVED},
+
+       {KE_IGNORE, 0xe020, KEY_MUTE},
+       {KE_IGNORE, 0xe02e, KEY_VOLUMEDOWN},
+       {KE_IGNORE, 0xe030, KEY_VOLUMEUP},
+       {KE_IGNORE, 0xe033, KEY_KBDILLUMUP},
+       {KE_IGNORE, 0xe034, KEY_KBDILLUMDOWN},
+       {KE_IGNORE, 0xe03a, KEY_CAPSLOCK},
+       {KE_IGNORE, 0xe045, KEY_NUMLOCK},
+       {KE_IGNORE, 0xe046, KEY_SCROLLLOCK},
        {KE_END, 0}
 };
 
@@ -122,15 +165,20 @@ static void dell_wmi_notify(u32 value, void *context)
 
        if (obj && obj->type == ACPI_TYPE_BUFFER) {
                int *buffer = (int *)obj->buffer.pointer;
-               key = dell_wmi_get_entry_by_scancode(buffer[1]);
+               /*
+                *  The upper bytes of the event may contain
+                *  additional information, so mask them off for the
+                *  scancode lookup
+                */
+               key = dell_wmi_get_entry_by_scancode(buffer[1] & 0xFFFF);
                if (key) {
                        input_report_key(dell_wmi_input_dev, key->keycode, 1);
                        input_sync(dell_wmi_input_dev);
                        input_report_key(dell_wmi_input_dev, key->keycode, 0);
                        input_sync(dell_wmi_input_dev);
-               } else
+               } else if (buffer[1] & 0xFFFF)
                        printk(KERN_INFO "dell-wmi: Unknown key %x pressed\n",
-                              buffer[1]);
+                              buffer[1] & 0xFFFF);
        }
 }
 
index 8153b3e5918967bb1662c44523d1ab7eab233d69..4207b26ff990634d3a75d35394f20b4c80afba21 100644 (file)
@@ -62,7 +62,10 @@ enum {
        DISABLE_ASL_GPS = 0x0020,
        DISABLE_ASL_DISPLAYSWITCH = 0x0040,
        DISABLE_ASL_MODEM = 0x0080,
-       DISABLE_ASL_CARDREADER = 0x0100
+       DISABLE_ASL_CARDREADER = 0x0100,
+       DISABLE_ASL_3G = 0x0200,
+       DISABLE_ASL_WIMAX = 0x0400,
+       DISABLE_ASL_HWCF = 0x0800
 };
 
 enum {
@@ -87,7 +90,13 @@ enum {
        CM_ASL_USBPORT3,
        CM_ASL_MODEM,
        CM_ASL_CARDREADER,
-       CM_ASL_LID
+       CM_ASL_3G,
+       CM_ASL_WIMAX,
+       CM_ASL_HWCF,
+       CM_ASL_LID,
+       CM_ASL_TYPE,
+       CM_ASL_PANELPOWER,      /*P901*/
+       CM_ASL_TPD
 };
 
 static const char *cm_getv[] = {
@@ -96,7 +105,8 @@ static const char *cm_getv[] = {
        NULL, "PBLG", NULL, NULL,
        "CFVG", NULL, NULL, NULL,
        "USBG", NULL, NULL, "MODG",
-       "CRDG", "LIDG"
+       "CRDG", "M3GG", "WIMG", "HWCF",
+       "LIDG", "TYPE", "PBPG", "TPDG"
 };
 
 static const char *cm_setv[] = {
@@ -105,7 +115,8 @@ static const char *cm_setv[] = {
        "SDSP", "PBLS", "HDPS", NULL,
        "CFVS", NULL, NULL, NULL,
        "USBG", NULL, NULL, "MODS",
-       "CRDS", NULL
+       "CRDS", "M3GS", "WIMS", NULL,
+       NULL, NULL, "PBPS", "TPDS"
 };
 
 #define EEEPC_EC       "\\_SB.PCI0.SBRG.EC0."
@@ -181,6 +192,7 @@ static struct key_entry eeepc_keymap[] = {
 static int eeepc_hotk_add(struct acpi_device *device);
 static int eeepc_hotk_remove(struct acpi_device *device, int type);
 static int eeepc_hotk_resume(struct acpi_device *device);
+static void eeepc_hotk_notify(struct acpi_device *device, u32 event);
 
 static const struct acpi_device_id eeepc_device_ids[] = {
        {EEEPC_HOTK_HID, 0},
@@ -192,10 +204,12 @@ static struct acpi_driver eeepc_hotk_driver = {
        .name = EEEPC_HOTK_NAME,
        .class = EEEPC_HOTK_CLASS,
        .ids = eeepc_device_ids,
+       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = eeepc_hotk_add,
                .remove = eeepc_hotk_remove,
                .resume = eeepc_hotk_resume,
+               .notify = eeepc_hotk_notify,
        },
 };
 
@@ -318,6 +332,15 @@ static const struct rfkill_ops eeepc_rfkill_ops = {
        .set_block = eeepc_rfkill_set,
 };
 
+static void __init eeepc_enable_camera(void)
+{
+       /*
+        * If the following call to set_acpi() fails, it's because there's no
+        * camera so we can ignore the error.
+        */
+       set_acpi(CM_ASL_CAMERA, 1);
+}
+
 /*
  * Sys helpers
  */
@@ -369,13 +392,88 @@ static ssize_t show_sys_acpi(int cm, char *buf)
 EEEPC_CREATE_DEVICE_ATTR(camera, CM_ASL_CAMERA);
 EEEPC_CREATE_DEVICE_ATTR(cardr, CM_ASL_CARDREADER);
 EEEPC_CREATE_DEVICE_ATTR(disp, CM_ASL_DISPLAYSWITCH);
-EEEPC_CREATE_DEVICE_ATTR(cpufv, CM_ASL_CPUFV);
+
+struct eeepc_cpufv {
+       int num;
+       int cur;
+};
+
+static int get_cpufv(struct eeepc_cpufv *c)
+{
+       c->cur = get_acpi(CM_ASL_CPUFV);
+       c->num = (c->cur >> 8) & 0xff;
+       c->cur &= 0xff;
+       if (c->cur < 0 || c->num <= 0 || c->num > 12)
+               return -ENODEV;
+       return 0;
+}
+
+static ssize_t show_available_cpufv(struct device *dev,
+                                   struct device_attribute *attr,
+                                   char *buf)
+{
+       struct eeepc_cpufv c;
+       int i;
+       ssize_t len = 0;
+
+       if (get_cpufv(&c))
+               return -ENODEV;
+       for (i = 0; i < c.num; i++)
+               len += sprintf(buf + len, "%d ", i);
+       len += sprintf(buf + len, "\n");
+       return len;
+}
+
+static ssize_t show_cpufv(struct device *dev,
+                         struct device_attribute *attr,
+                         char *buf)
+{
+       struct eeepc_cpufv c;
+
+       if (get_cpufv(&c))
+               return -ENODEV;
+       return sprintf(buf, "%#x\n", (c.num << 8) | c.cur);
+}
+
+static ssize_t store_cpufv(struct device *dev,
+                          struct device_attribute *attr,
+                          const char *buf, size_t count)
+{
+       struct eeepc_cpufv c;
+       int rv, value;
+
+       if (get_cpufv(&c))
+               return -ENODEV;
+       rv = parse_arg(buf, count, &value);
+       if (rv < 0)
+               return rv;
+       if (!rv || value < 0 || value >= c.num)
+               return -EINVAL;
+       set_acpi(CM_ASL_CPUFV, value);
+       return rv;
+}
+
+static struct device_attribute dev_attr_cpufv = {
+       .attr = {
+               .name = "cpufv",
+               .mode = 0644 },
+       .show   = show_cpufv,
+       .store  = store_cpufv
+};
+
+static struct device_attribute dev_attr_available_cpufv = {
+       .attr = {
+               .name = "available_cpufv",
+               .mode = 0444 },
+       .show   = show_available_cpufv
+};
 
 static struct attribute *platform_attributes[] = {
        &dev_attr_camera.attr,
        &dev_attr_cardr.attr,
        &dev_attr_disp.attr,
        &dev_attr_cpufv.attr,
+       &dev_attr_available_cpufv.attr,
        NULL
 };
 
@@ -558,7 +656,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
        eeepc_rfkill_hotplug();
 }
 
-static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data)
+static void eeepc_hotk_notify(struct acpi_device *device, u32 event)
 {
        static struct key_entry *key;
        u16 count;
@@ -566,6 +664,8 @@ static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data)
 
        if (!ehotk)
                return;
+       if (event > ACPI_MAX_SYS_NOTIFY)
+               return;
        if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX)
                brn = notify_brn();
        count = ehotk->event_count[event % 128]++;
@@ -646,7 +746,6 @@ static void eeepc_unregister_rfkill_notifier(char *node)
 
 static int eeepc_hotk_add(struct acpi_device *device)
 {
-       acpi_status status = AE_OK;
        int result;
 
        if (!device)
@@ -664,10 +763,6 @@ static int eeepc_hotk_add(struct acpi_device *device)
        result = eeepc_hotk_check();
        if (result)
                goto ehotk_fail;
-       status = acpi_install_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
-                                            eeepc_hotk_notify, ehotk);
-       if (ACPI_FAILURE(status))
-               printk(EEEPC_ERR "Error installing notify handler\n");
 
        eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6");
        eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
@@ -725,14 +820,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
 
 static int eeepc_hotk_remove(struct acpi_device *device, int type)
 {
-       acpi_status status = 0;
-
        if (!device || !acpi_driver_data(device))
                 return -EINVAL;
-       status = acpi_remove_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
-                                           eeepc_hotk_notify);
-       if (ACPI_FAILURE(status))
-               printk(EEEPC_ERR "Error removing notify handler\n");
 
        eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6");
        eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7");
@@ -989,6 +1078,9 @@ static int __init eeepc_laptop_init(void)
        result = eeepc_hwmon_init(dev);
        if (result)
                goto fail_hwmon;
+
+       eeepc_enable_camera();
+
        /* Register platform stuff */
        result = platform_driver_register(&platform_driver);
        if (result)
index 16fffe44e3335ee34b4053e7b309b6e4dd3a631b..4ac2311c00afe9fae55fcf1334522a564fd943e5 100644 (file)
@@ -47,7 +47,7 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
 #define HPWMI_DISPLAY_QUERY 0x1
 #define HPWMI_HDDTEMP_QUERY 0x2
 #define HPWMI_ALS_QUERY 0x3
-#define HPWMI_DOCK_QUERY 0x4
+#define HPWMI_HARDWARE_QUERY 0x4
 #define HPWMI_WIRELESS_QUERY 0x5
 #define HPWMI_HOTKEY_QUERY 0xc
 
@@ -75,10 +75,9 @@ struct key_entry {
        u16 keycode;
 };
 
-enum { KE_KEY, KE_SW, KE_END };
+enum { KE_KEY, KE_END };
 
 static struct key_entry hp_wmi_keymap[] = {
-       {KE_SW, 0x01, SW_DOCK},
        {KE_KEY, 0x02, KEY_BRIGHTNESSUP},
        {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
        {KE_KEY, 0x20e6, KEY_PROG1},
@@ -151,7 +150,22 @@ static int hp_wmi_als_state(void)
 
 static int hp_wmi_dock_state(void)
 {
-       return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
+       int ret = hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, 0, 0);
+
+       if (ret < 0)
+               return ret;
+
+       return ret & 0x1;
+}
+
+static int hp_wmi_tablet_state(void)
+{
+       int ret = hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, 0, 0);
+
+       if (ret < 0)
+               return ret;
+
+       return (ret & 0x4) ? 1 : 0;
 }
 
 static int hp_wmi_set_block(void *data, bool blocked)
@@ -232,6 +246,15 @@ static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
        return sprintf(buf, "%d\n", value);
 }
 
+static ssize_t show_tablet(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+{
+       int value = hp_wmi_tablet_state();
+       if (value < 0)
+               return -EINVAL;
+       return sprintf(buf, "%d\n", value);
+}
+
 static ssize_t set_als(struct device *dev, struct device_attribute *attr,
                       const char *buf, size_t count)
 {
@@ -244,6 +267,7 @@ static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
 static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
 static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
 static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
+static DEVICE_ATTR(tablet, S_IRUGO, show_tablet, NULL);
 
 static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
 {
@@ -326,13 +350,13 @@ static void hp_wmi_notify(u32 value, void *context)
                                                 key->keycode, 0);
                                input_sync(hp_wmi_input_dev);
                                break;
-                       case KE_SW:
-                               input_report_switch(hp_wmi_input_dev,
-                                                   key->keycode,
-                                                   hp_wmi_dock_state());
-                               input_sync(hp_wmi_input_dev);
-                               break;
                        }
+               } else if (eventcode == 0x1) {
+                       input_report_switch(hp_wmi_input_dev, SW_DOCK,
+                                           hp_wmi_dock_state());
+                       input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
+                                           hp_wmi_tablet_state());
+                       input_sync(hp_wmi_input_dev);
                } else if (eventcode == 0x5) {
                        if (wifi_rfkill)
                                rfkill_set_sw_state(wifi_rfkill,
@@ -369,18 +393,19 @@ static int __init hp_wmi_input_setup(void)
                        set_bit(EV_KEY, hp_wmi_input_dev->evbit);
                        set_bit(key->keycode, hp_wmi_input_dev->keybit);
                        break;
-               case KE_SW:
-                       set_bit(EV_SW, hp_wmi_input_dev->evbit);
-                       set_bit(key->keycode, hp_wmi_input_dev->swbit);
-
-                       /* Set initial dock state */
-                       input_report_switch(hp_wmi_input_dev, key->keycode,
-                                           hp_wmi_dock_state());
-                       input_sync(hp_wmi_input_dev);
-                       break;
                }
        }
 
+       set_bit(EV_SW, hp_wmi_input_dev->evbit);
+       set_bit(SW_DOCK, hp_wmi_input_dev->swbit);
+       set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit);
+
+       /* Set initial hardware state */
+       input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state());
+       input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
+                           hp_wmi_tablet_state());
+       input_sync(hp_wmi_input_dev);
+
        err = input_register_device(hp_wmi_input_dev);
 
        if (err) {
@@ -397,6 +422,7 @@ static void cleanup_sysfs(struct platform_device *device)
        device_remove_file(&device->dev, &dev_attr_hddtemp);
        device_remove_file(&device->dev, &dev_attr_als);
        device_remove_file(&device->dev, &dev_attr_dock);
+       device_remove_file(&device->dev, &dev_attr_tablet);
 }
 
 static int __init hp_wmi_bios_setup(struct platform_device *device)
@@ -414,6 +440,9 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
        if (err)
                goto add_sysfs_error;
        err = device_create_file(&device->dev, &dev_attr_dock);
+       if (err)
+               goto add_sysfs_error;
+       err = device_create_file(&device->dev, &dev_attr_tablet);
        if (err)
                goto add_sysfs_error;
 
@@ -485,23 +514,17 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
 
 static int hp_wmi_resume_handler(struct platform_device *device)
 {
-       struct key_entry *key;
-
        /*
-        * Docking state may have changed while suspended, so trigger
-        * an input event for the current state. As this is a switch,
+        * Hardware state may have changed while suspended, so trigger
+        * input events for the current state. As this is a switch,
         * the input layer will only actually pass it on if the state
         * changed.
         */
-       for (key = hp_wmi_keymap; key->type != KE_END; key++) {
-               switch (key->type) {
-               case KE_SW:
-                       input_report_switch(hp_wmi_input_dev, key->keycode,
-                                           hp_wmi_dock_state());
-                       input_sync(hp_wmi_input_dev);
-                       break;
-               }
-       }
+
+       input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state());
+       input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
+                           hp_wmi_tablet_state());
+       input_sync(hp_wmi_input_dev);
 
        return 0;
 }
index 40d64c03278ca4bd18db3cf8214e11a9412cc0e7..a463fd72c49560a7c5d7fe2f8744a614519f7f34 100644 (file)
@@ -22,7 +22,7 @@
  */
 
 #define TPACPI_VERSION "0.23"
-#define TPACPI_SYSFS_VERSION 0x020300
+#define TPACPI_SYSFS_VERSION 0x020400
 
 /*
  *  Changelog:
@@ -257,6 +257,8 @@ static struct {
        u32 wan:1;
        u32 uwb:1;
        u32 fan_ctrl_status_undef:1;
+       u32 second_fan:1;
+       u32 beep_needs_two_args:1;
        u32 input_device_registered:1;
        u32 platform_drv_registered:1;
        u32 platform_drv_attrs_registered:1;
@@ -277,8 +279,10 @@ struct thinkpad_id_data {
        char *bios_version_str; /* Something like 1ZET51WW (1.03z) */
        char *ec_version_str;   /* Something like 1ZHT51WW-1.04a */
 
-       u16 bios_model;         /* Big Endian, TP-1Y = 0x5931, 0 = unknown */
+       u16 bios_model;         /* 1Y = 0x5931, 0 = unknown */
        u16 ec_model;
+       u16 bios_release;       /* 1ZETK1WW = 0x314b, 0 = unknown */
+       u16 ec_release;
 
        char *model_str;        /* ThinkPad T43 */
        char *nummodel_str;     /* 9384A9C for a 9384-A9C model */
@@ -355,6 +359,73 @@ static void tpacpi_log_usertask(const char * const what)
                } \
        } while (0)
 
+/*
+ * Quirk handling helpers
+ *
+ * ThinkPad IDs and versions seen in the field so far
+ * are two-characters from the set [0-9A-Z], i.e. base 36.
+ *
+ * We use values well outside that range as specials.
+ */
+
+#define TPACPI_MATCH_ANY               0xffffU
+#define TPACPI_MATCH_UNKNOWN           0U
+
+/* TPID('1', 'Y') == 0x5931 */
+#define TPID(__c1, __c2) (((__c2) << 8) | (__c1))
+
+#define TPACPI_Q_IBM(__id1, __id2, __quirk)    \
+       { .vendor = PCI_VENDOR_ID_IBM,          \
+         .bios = TPID(__id1, __id2),           \
+         .ec = TPACPI_MATCH_ANY,               \
+         .quirks = (__quirk) }
+
+#define TPACPI_Q_LNV(__id1, __id2, __quirk)    \
+       { .vendor = PCI_VENDOR_ID_LENOVO,       \
+         .bios = TPID(__id1, __id2),           \
+         .ec = TPACPI_MATCH_ANY,               \
+         .quirks = (__quirk) }
+
+struct tpacpi_quirk {
+       unsigned int vendor;
+       u16 bios;
+       u16 ec;
+       unsigned long quirks;
+};
+
+/**
+ * tpacpi_check_quirks() - search BIOS/EC version on a list
+ * @qlist:             array of &struct tpacpi_quirk
+ * @qlist_size:                number of elements in @qlist
+ *
+ * Iterates over a quirks list until one is found that matches the
+ * ThinkPad's vendor, BIOS and EC model.
+ *
+ * Returns 0 if nothing matches, otherwise returns the quirks field of
+ * the matching &struct tpacpi_quirk entry.
+ *
+ * The match criteria is: vendor, ec and bios much match.
+ */
+static unsigned long __init tpacpi_check_quirks(
+                       const struct tpacpi_quirk *qlist,
+                       unsigned int qlist_size)
+{
+       while (qlist_size) {
+               if ((qlist->vendor == thinkpad_id.vendor ||
+                               qlist->vendor == TPACPI_MATCH_ANY) &&
+                   (qlist->bios == thinkpad_id.bios_model ||
+                               qlist->bios == TPACPI_MATCH_ANY) &&
+                   (qlist->ec == thinkpad_id.ec_model ||
+                               qlist->ec == TPACPI_MATCH_ANY))
+                       return qlist->quirks;
+
+               qlist_size--;
+               qlist++;
+       }
+       return 0;
+}
+
+
 /****************************************************************************
  ****************************************************************************
  *
@@ -2880,7 +2951,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
                /* update bright_acpimode... */
                tpacpi_check_std_acpi_brightness_support();
 
-       if (tp_features.bright_acpimode) {
+       if (tp_features.bright_acpimode && acpi_video_backlight_support()) {
                printk(TPACPI_INFO
                       "This ThinkPad has standard ACPI backlight "
                       "brightness control, supported by the ACPI "
@@ -4773,7 +4844,7 @@ TPACPI_HANDLE(led, ec, "SLED",    /* 570 */
           "LED",               /* all others */
           );                   /* R30, R31 */
 
-#define TPACPI_LED_NUMLEDS 8
+#define TPACPI_LED_NUMLEDS 16
 static struct tpacpi_led_classdev *tpacpi_leds;
 static enum led_status_t tpacpi_led_state_cache[TPACPI_LED_NUMLEDS];
 static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
@@ -4786,15 +4857,20 @@ static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
        "tpacpi::dock_batt",
        "tpacpi::unknown_led",
        "tpacpi::standby",
+       "tpacpi::dock_status1",
+       "tpacpi::dock_status2",
+       "tpacpi::unknown_led2",
+       "tpacpi::unknown_led3",
+       "tpacpi::thinkvantage",
 };
-#define TPACPI_SAFE_LEDS       0x0081U
+#define TPACPI_SAFE_LEDS       0x1081U
 
 static inline bool tpacpi_is_led_restricted(const unsigned int led)
 {
 #ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS
        return false;
 #else
-       return (TPACPI_SAFE_LEDS & (1 << led)) == 0;
+       return (1U & (TPACPI_SAFE_LEDS >> led)) == 0;
 #endif
 }
 
@@ -4956,6 +5032,10 @@ static int __init tpacpi_init_led(unsigned int led)
 
        tpacpi_leds[led].led = led;
 
+       /* LEDs with no name don't get registered */
+       if (!tpacpi_led_names[led])
+               return 0;
+
        tpacpi_leds[led].led_classdev.brightness_set = &led_sysfs_set;
        tpacpi_leds[led].led_classdev.blink_set = &led_sysfs_blink_set;
        if (led_supported == TPACPI_LED_570)
@@ -4974,10 +5054,59 @@ static int __init tpacpi_init_led(unsigned int led)
        return rc;
 }
 
+static const struct tpacpi_quirk led_useful_qtable[] __initconst = {
+       TPACPI_Q_IBM('1', 'E', 0x009f), /* A30 */
+       TPACPI_Q_IBM('1', 'N', 0x009f), /* A31 */
+       TPACPI_Q_IBM('1', 'G', 0x009f), /* A31 */
+
+       TPACPI_Q_IBM('1', 'I', 0x0097), /* T30 */
+       TPACPI_Q_IBM('1', 'R', 0x0097), /* T40, T41, T42, R50, R51 */
+       TPACPI_Q_IBM('7', '0', 0x0097), /* T43, R52 */
+       TPACPI_Q_IBM('1', 'Y', 0x0097), /* T43 */
+       TPACPI_Q_IBM('1', 'W', 0x0097), /* R50e */
+       TPACPI_Q_IBM('1', 'V', 0x0097), /* R51 */
+       TPACPI_Q_IBM('7', '8', 0x0097), /* R51e */
+       TPACPI_Q_IBM('7', '6', 0x0097), /* R52 */
+
+       TPACPI_Q_IBM('1', 'K', 0x00bf), /* X30 */
+       TPACPI_Q_IBM('1', 'Q', 0x00bf), /* X31, X32 */
+       TPACPI_Q_IBM('1', 'U', 0x00bf), /* X40 */
+       TPACPI_Q_IBM('7', '4', 0x00bf), /* X41 */
+       TPACPI_Q_IBM('7', '5', 0x00bf), /* X41t */
+
+       TPACPI_Q_IBM('7', '9', 0x1f97), /* T60 (1) */
+       TPACPI_Q_IBM('7', '7', 0x1f97), /* Z60* (1) */
+       TPACPI_Q_IBM('7', 'F', 0x1f97), /* Z61* (1) */
+       TPACPI_Q_IBM('7', 'B', 0x1fb7), /* X60 (1) */
+
+       /* (1) - may have excess leds enabled on MSB */
+
+       /* Defaults (order matters, keep last, don't reorder!) */
+       { /* Lenovo */
+         .vendor = PCI_VENDOR_ID_LENOVO,
+         .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY,
+         .quirks = 0x1fffU,
+       },
+       { /* IBM ThinkPads with no EC version string */
+         .vendor = PCI_VENDOR_ID_IBM,
+         .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_UNKNOWN,
+         .quirks = 0x00ffU,
+       },
+       { /* IBM ThinkPads with EC version string */
+         .vendor = PCI_VENDOR_ID_IBM,
+         .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY,
+         .quirks = 0x00bfU,
+       },
+};
+
+#undef TPACPI_LEDQ_IBM
+#undef TPACPI_LEDQ_LNV
+
 static int __init led_init(struct ibm_init_struct *iibm)
 {
        unsigned int i;
        int rc;
+       unsigned long useful_leds;
 
        vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n");
 
@@ -4999,6 +5128,9 @@ static int __init led_init(struct ibm_init_struct *iibm)
        vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n",
                str_supported(led_supported), led_supported);
 
+       if (led_supported == TPACPI_LED_NONE)
+               return 1;
+
        tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS,
                              GFP_KERNEL);
        if (!tpacpi_leds) {
@@ -5006,8 +5138,12 @@ static int __init led_init(struct ibm_init_struct *iibm)
                return -ENOMEM;
        }
 
+       useful_leds = tpacpi_check_quirks(led_useful_qtable,
+                                         ARRAY_SIZE(led_useful_qtable));
+
        for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
-               if (!tpacpi_is_led_restricted(i)) {
+               if (!tpacpi_is_led_restricted(i) &&
+                   test_bit(i, &useful_leds)) {
                        rc = tpacpi_init_led(i);
                        if (rc < 0) {
                                led_exit();
@@ -5017,12 +5153,11 @@ static int __init led_init(struct ibm_init_struct *iibm)
        }
 
 #ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS
-       if (led_supported != TPACPI_LED_NONE)
-               printk(TPACPI_NOTICE
-                       "warning: userspace override of important "
-                       "firmware LEDs is enabled\n");
+       printk(TPACPI_NOTICE
+               "warning: userspace override of important "
+               "firmware LEDs is enabled\n");
 #endif
-       return (led_supported != TPACPI_LED_NONE)? 0 : 1;
+       return 0;
 }
 
 #define str_led_status(s) \
@@ -5052,7 +5187,7 @@ static int led_read(char *p)
        }
 
        len += sprintf(p + len, "commands:\t"
-                      "<led> on, <led> off, <led> blink (<led> is 0-7)\n");
+                      "<led> on, <led> off, <led> blink (<led> is 0-15)\n");
 
        return len;
 }
@@ -5067,7 +5202,7 @@ static int led_write(char *buf)
                return -ENODEV;
 
        while ((cmd = next_cmd(&buf))) {
-               if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 7)
+               if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 15)
                        return -EINVAL;
 
                if (strstr(cmd, "off")) {
@@ -5101,8 +5236,17 @@ static struct ibm_struct led_driver_data = {
 
 TPACPI_HANDLE(beep, ec, "BEEP");       /* all except R30, R31 */
 
+#define TPACPI_BEEP_Q1 0x0001
+
+static const struct tpacpi_quirk beep_quirk_table[] __initconst = {
+       TPACPI_Q_IBM('I', 'M', TPACPI_BEEP_Q1), /* 570 */
+       TPACPI_Q_IBM('I', 'U', TPACPI_BEEP_Q1), /* 570E - unverified */
+};
+
 static int __init beep_init(struct ibm_init_struct *iibm)
 {
+       unsigned long quirks;
+
        vdbg_printk(TPACPI_DBG_INIT, "initializing beep subdriver\n");
 
        TPACPI_ACPIHANDLE_INIT(beep);
@@ -5110,6 +5254,11 @@ static int __init beep_init(struct ibm_init_struct *iibm)
        vdbg_printk(TPACPI_DBG_INIT, "beep is %s\n",
                str_supported(beep_handle != NULL));
 
+       quirks = tpacpi_check_quirks(beep_quirk_table,
+                                    ARRAY_SIZE(beep_quirk_table));
+
+       tp_features.beep_needs_two_args = !!(quirks & TPACPI_BEEP_Q1);
+
        return (beep_handle)? 0 : 1;
 }
 
@@ -5141,8 +5290,15 @@ static int beep_write(char *buf)
                        /* beep_cmd set */
                } else
                        return -EINVAL;
-               if (!acpi_evalf(beep_handle, NULL, NULL, "vdd", beep_cmd, 0))
-                       return -EIO;
+               if (tp_features.beep_needs_two_args) {
+                       if (!acpi_evalf(beep_handle, NULL, NULL, "vdd",
+                                       beep_cmd, 0))
+                               return -EIO;
+               } else {
+                       if (!acpi_evalf(beep_handle, NULL, NULL, "vd",
+                                       beep_cmd))
+                               return -EIO;
+               }
        }
 
        return 0;
@@ -5569,6 +5725,10 @@ static struct ibm_struct ecdump_driver_data = {
  *   Bit 3-0: backlight brightness level
  *
  * brightness_get_raw returns status data in the HBRV layout
+ *
+ * WARNING: The X61 has been verified to use HBRV for something else, so
+ * this should be used _only_ on IBM ThinkPads, and maybe with some careful
+ * testing on the very early *60 Lenovo models...
  */
 
 enum {
@@ -5869,6 +6029,12 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
                           brightness_mode);
        }
 
+       /* Safety */
+       if (thinkpad_id.vendor != PCI_VENDOR_ID_IBM &&
+           (brightness_mode == TPACPI_BRGHT_MODE_ECNVRAM ||
+            brightness_mode == TPACPI_BRGHT_MODE_EC))
+               return -EINVAL;
+
        if (tpacpi_brightness_get_raw(&b) < 0)
                return 1;
 
@@ -6161,6 +6327,21 @@ static struct ibm_struct volume_driver_data = {
  *     For firmware bugs, refer to:
  *     http://thinkwiki.org/wiki/Embedded_Controller_Firmware#Firmware_Issues
  *
+ *     ----
+ *
+ *     ThinkPad EC register 0x31 bit 0 (only on select models)
+ *
+ *     When bit 0 of EC register 0x31 is zero, the tachometer registers
+ *     show the speed of the main fan.  When bit 0 of EC register 0x31
+ *     is one, the tachometer registers show the speed of the auxiliary
+ *     fan.
+ *
+ *     Fan control seems to affect both fans, regardless of the state
+ *     of this bit.
+ *
+ *     So far, only the firmware for the X60/X61 non-tablet versions
+ *     seem to support this (firmware TP-7M).
+ *
  * TPACPI_FAN_WR_ACPI_FANS:
  *     ThinkPad X31, X40, X41.  Not available in the X60.
  *
@@ -6187,6 +6368,8 @@ enum {                                    /* Fan control constants */
        fan_status_offset = 0x2f,       /* EC register 0x2f */
        fan_rpm_offset = 0x84,          /* EC register 0x84: LSB, 0x85 MSB (RPM)
                                         * 0x84 must be read before 0x85 */
+       fan_select_offset = 0x31,       /* EC register 0x31 (Firmware 7M)
+                                          bit 0 selects which fan is active */
 
        TP_EC_FAN_FULLSPEED = 0x40,     /* EC fan mode: full speed */
        TP_EC_FAN_AUTO      = 0x80,     /* EC fan mode: auto fan control */
@@ -6249,30 +6432,18 @@ TPACPI_HANDLE(sfan, ec, "SFAN", /* 570 */
  * We assume 0x07 really means auto mode while this quirk is active,
  * as this is far more likely than the ThinkPad being in level 7,
  * which is only used by the firmware during thermal emergencies.
+ *
+ * Enable for TP-1Y (T43), TP-78 (R51e), TP-76 (R52),
+ * TP-70 (T43, R52), which are known to be buggy.
  */
 
-static void fan_quirk1_detect(void)
+static void fan_quirk1_setup(void)
 {
-       /* In some ThinkPads, neither the EC nor the ACPI
-        * DSDT initialize the HFSP register, and it ends up
-        * being initially set to 0x07 when it *could* be
-        * either 0x07 or 0x80.
-        *
-        * Enable for TP-1Y (T43), TP-78 (R51e),
-        * TP-76 (R52), TP-70 (T43, R52), which are known
-        * to be buggy. */
        if (fan_control_initial_status == 0x07) {
-               switch (thinkpad_id.ec_model) {
-               case 0x5931: /* TP-1Y */
-               case 0x3837: /* TP-78 */
-               case 0x3637: /* TP-76 */
-               case 0x3037: /* TP-70 */
-                       printk(TPACPI_NOTICE
-                              "fan_init: initial fan status is unknown, "
-                              "assuming it is in auto mode\n");
-                       tp_features.fan_ctrl_status_undef = 1;
-                       ;;
-               }
+               printk(TPACPI_NOTICE
+                      "fan_init: initial fan status is unknown, "
+                      "assuming it is in auto mode\n");
+               tp_features.fan_ctrl_status_undef = 1;
        }
 }
 
@@ -6292,6 +6463,38 @@ static void fan_quirk1_handle(u8 *fan_status)
        }
 }
 
+/* Select main fan on X60/X61, NOOP on others */
+static bool fan_select_fan1(void)
+{
+       if (tp_features.second_fan) {
+               u8 val;
+
+               if (ec_read(fan_select_offset, &val) < 0)
+                       return false;
+               val &= 0xFEU;
+               if (ec_write(fan_select_offset, val) < 0)
+                       return false;
+       }
+       return true;
+}
+
+/* Select secondary fan on X60/X61 */
+static bool fan_select_fan2(void)
+{
+       u8 val;
+
+       if (!tp_features.second_fan)
+               return false;
+
+       if (ec_read(fan_select_offset, &val) < 0)
+               return false;
+       val |= 0x01U;
+       if (ec_write(fan_select_offset, val) < 0)
+               return false;
+
+       return true;
+}
+
 /*
  * Call with fan_mutex held
  */
@@ -6369,6 +6572,8 @@ static int fan_get_speed(unsigned int *speed)
        switch (fan_status_access_mode) {
        case TPACPI_FAN_RD_TPEC:
                /* all except 570, 600e/x, 770e, 770x */
+               if (unlikely(!fan_select_fan1()))
+                       return -EIO;
                if (unlikely(!acpi_ec_read(fan_rpm_offset, &lo) ||
                             !acpi_ec_read(fan_rpm_offset + 1, &hi)))
                        return -EIO;
@@ -6385,6 +6590,34 @@ static int fan_get_speed(unsigned int *speed)
        return 0;
 }
 
+static int fan2_get_speed(unsigned int *speed)
+{
+       u8 hi, lo;
+       bool rc;
+
+       switch (fan_status_access_mode) {
+       case TPACPI_FAN_RD_TPEC:
+               /* all except 570, 600e/x, 770e, 770x */
+               if (unlikely(!fan_select_fan2()))
+                       return -EIO;
+               rc = !acpi_ec_read(fan_rpm_offset, &lo) ||
+                            !acpi_ec_read(fan_rpm_offset + 1, &hi);
+               fan_select_fan1(); /* play it safe */
+               if (rc)
+                       return -EIO;
+
+               if (likely(speed))
+                       *speed = (hi << 8) | lo;
+
+               break;
+
+       default:
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
 static int fan_set_level(int level)
 {
        if (!fan_control_allowed)
@@ -6790,6 +7023,25 @@ static struct device_attribute dev_attr_fan_fan1_input =
        __ATTR(fan1_input, S_IRUGO,
                fan_fan1_input_show, NULL);
 
+/* sysfs fan fan2_input ------------------------------------------------ */
+static ssize_t fan_fan2_input_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+{
+       int res;
+       unsigned int speed;
+
+       res = fan2_get_speed(&speed);
+       if (res < 0)
+               return res;
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", speed);
+}
+
+static struct device_attribute dev_attr_fan_fan2_input =
+       __ATTR(fan2_input, S_IRUGO,
+               fan_fan2_input_show, NULL);
+
 /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
 static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
                                     char *buf)
@@ -6823,6 +7075,7 @@ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO,
 static struct attribute *fan_attributes[] = {
        &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr,
        &dev_attr_fan_fan1_input.attr,
+       NULL, /* for fan2_input */
        NULL
 };
 
@@ -6830,9 +7083,36 @@ static const struct attribute_group fan_attr_group = {
        .attrs = fan_attributes,
 };
 
+#define        TPACPI_FAN_Q1   0x0001          /* Unitialized HFSP */
+#define TPACPI_FAN_2FAN        0x0002          /* EC 0x31 bit 0 selects fan2 */
+
+#define TPACPI_FAN_QI(__id1, __id2, __quirks)  \
+       { .vendor = PCI_VENDOR_ID_IBM,          \
+         .bios = TPACPI_MATCH_ANY,             \
+         .ec = TPID(__id1, __id2),             \
+         .quirks = __quirks }
+
+#define TPACPI_FAN_QL(__id1, __id2, __quirks)  \
+       { .vendor = PCI_VENDOR_ID_LENOVO,       \
+         .bios = TPACPI_MATCH_ANY,             \
+         .ec = TPID(__id1, __id2),             \
+         .quirks = __quirks }
+
+static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
+       TPACPI_FAN_QI('1', 'Y', TPACPI_FAN_Q1),
+       TPACPI_FAN_QI('7', '8', TPACPI_FAN_Q1),
+       TPACPI_FAN_QI('7', '6', TPACPI_FAN_Q1),
+       TPACPI_FAN_QI('7', '0', TPACPI_FAN_Q1),
+       TPACPI_FAN_QL('7', 'M', TPACPI_FAN_2FAN),
+};
+
+#undef TPACPI_FAN_QL
+#undef TPACPI_FAN_QI
+
 static int __init fan_init(struct ibm_init_struct *iibm)
 {
        int rc;
+       unsigned long quirks;
 
        vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_FAN,
                        "initializing fan subdriver\n");
@@ -6843,12 +7123,16 @@ static int __init fan_init(struct ibm_init_struct *iibm)
        fan_control_commands = 0;
        fan_watchdog_maxinterval = 0;
        tp_features.fan_ctrl_status_undef = 0;
+       tp_features.second_fan = 0;
        fan_control_desired_level = 7;
 
        TPACPI_ACPIHANDLE_INIT(fans);
        TPACPI_ACPIHANDLE_INIT(gfan);
        TPACPI_ACPIHANDLE_INIT(sfan);
 
+       quirks = tpacpi_check_quirks(fan_quirk_table,
+                                    ARRAY_SIZE(fan_quirk_table));
+
        if (gfan_handle) {
                /* 570, 600e/x, 770e, 770x */
                fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN;
@@ -6858,7 +7142,13 @@ static int __init fan_init(struct ibm_init_struct *iibm)
                if (likely(acpi_ec_read(fan_status_offset,
                                        &fan_control_initial_status))) {
                        fan_status_access_mode = TPACPI_FAN_RD_TPEC;
-                       fan_quirk1_detect();
+                       if (quirks & TPACPI_FAN_Q1)
+                               fan_quirk1_setup();
+                       if (quirks & TPACPI_FAN_2FAN) {
+                               tp_features.second_fan = 1;
+                               dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_FAN,
+                                       "secondary fan support enabled\n");
+                       }
                } else {
                        printk(TPACPI_ERR
                               "ThinkPad ACPI EC access misbehaving, "
@@ -6914,6 +7204,11 @@ static int __init fan_init(struct ibm_init_struct *iibm)
 
        if (fan_status_access_mode != TPACPI_FAN_NONE ||
            fan_control_access_mode != TPACPI_FAN_WR_NONE) {
+               if (tp_features.second_fan) {
+                       /* attach second fan tachometer */
+                       fan_attributes[ARRAY_SIZE(fan_attributes)-2] =
+                                       &dev_attr_fan_fan2_input.attr;
+               }
                rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
                                         &fan_attr_group);
                if (rc < 0)
@@ -7385,6 +7680,24 @@ err_out:
 
 /* Probing */
 
+static bool __pure __init tpacpi_is_fw_digit(const char c)
+{
+       return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z');
+}
+
+/* Most models: xxyTkkWW (#.##c); Ancient 570/600 and -SL lacks (#.##c) */
+static bool __pure __init tpacpi_is_valid_fw_id(const char* const s,
+                                               const char t)
+{
+       return s && strlen(s) >= 8 &&
+               tpacpi_is_fw_digit(s[0]) &&
+               tpacpi_is_fw_digit(s[1]) &&
+               s[2] == t && s[3] == 'T' &&
+               tpacpi_is_fw_digit(s[4]) &&
+               tpacpi_is_fw_digit(s[5]) &&
+               s[6] == 'W' && s[7] == 'W';
+}
+
 /* returns 0 - probe ok, or < 0 - probe error.
  * Probe ok doesn't mean thinkpad found.
  * On error, kfree() cleanup on tp->* is not performed, caller must do it */
@@ -7411,10 +7724,15 @@ static int __must_check __init get_thinkpad_model_data(
        tp->bios_version_str = kstrdup(s, GFP_KERNEL);
        if (s && !tp->bios_version_str)
                return -ENOMEM;
-       if (!tp->bios_version_str)
+
+       /* Really ancient ThinkPad 240X will fail this, which is fine */
+       if (!tpacpi_is_valid_fw_id(tp->bios_version_str, 'E'))
                return 0;
+
        tp->bios_model = tp->bios_version_str[0]
                         | (tp->bios_version_str[1] << 8);
+       tp->bios_release = (tp->bios_version_str[4] << 8)
+                        | tp->bios_version_str[5];
 
        /*
         * ThinkPad T23 or newer, A31 or newer, R50e or newer,
@@ -7433,8 +7751,21 @@ static int __must_check __init get_thinkpad_model_data(
                        tp->ec_version_str = kstrdup(ec_fw_string, GFP_KERNEL);
                        if (!tp->ec_version_str)
                                return -ENOMEM;
-                       tp->ec_model = ec_fw_string[0]
-                                       | (ec_fw_string[1] << 8);
+
+                       if (tpacpi_is_valid_fw_id(ec_fw_string, 'H')) {
+                               tp->ec_model = ec_fw_string[0]
+                                               | (ec_fw_string[1] << 8);
+                               tp->ec_release = (ec_fw_string[4] << 8)
+                                               | ec_fw_string[5];
+                       } else {
+                               printk(TPACPI_NOTICE
+                                       "ThinkPad firmware release %s "
+                                       "doesn't match the known patterns\n",
+                                       ec_fw_string);
+                               printk(TPACPI_NOTICE
+                                       "please report this to %s\n",
+                                       TPACPI_MAIL);
+                       }
                        break;
                }
        }
index 7f207f335beca2c53d8120c3a4681ff139cb7282..ef3a2cd3a7a0ac80a188cfb5cacbda03373cf3e3 100644 (file)
@@ -287,6 +287,25 @@ static void pnpacpi_parse_allocated_address_space(struct pnp_dev *dev,
                                ACPI_DECODE_16);
 }
 
+static void pnpacpi_parse_allocated_ext_address_space(struct pnp_dev *dev,
+                                                     struct acpi_resource *res)
+{
+       struct acpi_resource_extended_address64 *p = &res->data.ext_address64;
+
+       if (p->producer_consumer == ACPI_PRODUCER)
+               return;
+
+       if (p->resource_type == ACPI_MEMORY_RANGE)
+               pnpacpi_parse_allocated_memresource(dev,
+                       p->minimum, p->address_length,
+                       p->info.mem.write_protect);
+       else if (p->resource_type == ACPI_IO_RANGE)
+               pnpacpi_parse_allocated_ioresource(dev,
+                       p->minimum, p->address_length,
+                       p->granularity == 0xfff ? ACPI_DECODE_10 :
+                               ACPI_DECODE_16);
+}
+
 static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
                                              void *data)
 {
@@ -400,8 +419,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
                break;
 
        case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
-               if (res->data.ext_address64.producer_consumer == ACPI_PRODUCER)
-                       return AE_OK;
+               pnpacpi_parse_allocated_ext_address_space(dev, res);
                break;
 
        case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
@@ -630,6 +648,28 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev,
                                           IORESOURCE_IO_FIXED);
 }
 
+static __init void pnpacpi_parse_ext_address_option(struct pnp_dev *dev,
+                                                   unsigned int option_flags,
+                                                   struct acpi_resource *r)
+{
+       struct acpi_resource_extended_address64 *p = &r->data.ext_address64;
+       unsigned char flags = 0;
+
+       if (p->address_length == 0)
+               return;
+
+       if (p->resource_type == ACPI_MEMORY_RANGE) {
+               if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY)
+                       flags = IORESOURCE_MEM_WRITEABLE;
+               pnp_register_mem_resource(dev, option_flags, p->minimum,
+                                         p->minimum, 0, p->address_length,
+                                         flags);
+       } else if (p->resource_type == ACPI_IO_RANGE)
+               pnp_register_port_resource(dev, option_flags, p->minimum,
+                                          p->minimum, 0, p->address_length,
+                                          IORESOURCE_IO_FIXED);
+}
+
 struct acpipnp_parse_option_s {
        struct pnp_dev *dev;
        unsigned int option_flags;
@@ -711,6 +751,7 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res,
                break;
 
        case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
+               pnpacpi_parse_ext_address_option(dev, option_flags, res);
                break;
 
        case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
@@ -765,6 +806,7 @@ static int pnpacpi_supported_resource(struct acpi_resource *res)
        case ACPI_RESOURCE_TYPE_ADDRESS16:
        case ACPI_RESOURCE_TYPE_ADDRESS32:
        case ACPI_RESOURCE_TYPE_ADDRESS64:
+       case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
        case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
                return 1;
        }
index 33da1127992aca4c36cfa04b1dfbb7a354edf38d..7eda34838bfe979e22bff2939237c8c9847671cc 100644 (file)
@@ -82,6 +82,14 @@ config BATTERY_DA9030
          Say Y here to enable support for batteries charger integrated into
          DA9030 PMIC.
 
+config BATTERY_MAX17040
+       tristate "Maxim MAX17040 Fuel Gauge"
+       depends on I2C
+       help
+         MAX17040 is fuel-gauge systems for lithium-ion (Li+) batteries
+         in handheld and portable equipment. The MAX17040 is configured
+         to operate with a single lithium cell
+
 config CHARGER_PCF50633
        tristate "NXP PCF50633 MBC"
        depends on MFD_PCF50633
index 2fcf41d13e5cae530c79478578485fe59493cb8b..daf3179689aaecb61611604f526d2f6e090c6b00 100644 (file)
@@ -25,4 +25,5 @@ obj-$(CONFIG_BATTERY_TOSA)    += tosa_battery.o
 obj-$(CONFIG_BATTERY_WM97XX)   += wm97xx_battery.o
 obj-$(CONFIG_BATTERY_BQ27x00)  += bq27x00_battery.o
 obj-$(CONFIG_BATTERY_DA9030)   += da9030_battery.o
-obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o
\ No newline at end of file
+obj-$(CONFIG_BATTERY_MAX17040) += max17040_battery.o
+obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o
index 1662bb0f23a5bc1a8f84a077099b84d292a2a8d9..3364198134a1d55a96789d1a7333a36432e14059 100644 (file)
@@ -22,8 +22,6 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#define DA9030_STATUS_CHDET    (1 << 3)
-
 #define DA9030_FAULT_LOG               0x0a
 #define DA9030_FAULT_LOG_OVER_TEMP     (1 << 7)
 #define DA9030_FAULT_LOG_VBAT_OVER     (1 << 4)
@@ -244,6 +242,8 @@ static void da9030_set_charge(struct da9030_charger *charger, int on)
        }
 
        da903x_write(charger->master, DA9030_CHARGE_CONTROL, val);
+
+       power_supply_changed(&charger->psy);
 }
 
 static void da9030_charger_check_state(struct da9030_charger *charger)
@@ -258,6 +258,12 @@ static void da9030_charger_check_state(struct da9030_charger *charger)
                        da9030_set_charge(charger, 1);
                }
        } else {
+               /* Charger has been pulled out */
+               if (!charger->chdet) {
+                       da9030_set_charge(charger, 0);
+                       return;
+               }
+
                if (charger->adc.vbat_res >=
                    charger->thresholds.vbat_charge_stop) {
                        da9030_set_charge(charger, 0);
@@ -395,13 +401,11 @@ static int da9030_battery_event(struct notifier_block *nb, unsigned long event,
 {
        struct da9030_charger *charger =
                container_of(nb, struct da9030_charger, nb);
-       int status;
 
        switch (event) {
        case DA9030_EVENT_CHDET:
-               status = da903x_query_status(charger->master,
-                                            DA9030_STATUS_CHDET);
-               da9030_set_charge(charger, status);
+               cancel_delayed_work_sync(&charger->work);
+               schedule_work(&charger->work.work);
                break;
        case DA9030_EVENT_VBATMON:
                da9030_battery_vbat_event(charger);
@@ -565,7 +569,8 @@ static int da9030_battery_remove(struct platform_device *dev)
        da903x_unregister_notifier(charger->master, &charger->nb,
                                   DA9030_EVENT_CHDET | DA9030_EVENT_VBATMON |
                                   DA9030_EVENT_CHIOVER | DA9030_EVENT_TBAT);
-       cancel_delayed_work(&charger->work);
+       cancel_delayed_work_sync(&charger->work);
+       da9030_set_charge(charger, 0);
        power_supply_unregister(&charger->psy);
 
        kfree(charger);
index a52d4a11652d57aa7b045fa225777fdaecb252f2..520b5c49ff3049275dbeada16c80b93135cf47ea 100644 (file)
@@ -62,6 +62,10 @@ static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
+static unsigned int pmod_enabled;
+module_param(pmod_enabled, bool, 0644);
+MODULE_PARM_DESC(pmod_enabled, "PMOD enable bit");
+
 /* Some batteries have their rated capacity stored a N * 10 mAh, while
  * others use an index into this table. */
 static int rated_capacities[] = {
@@ -259,6 +263,17 @@ static void ds2760_battery_update_status(struct ds2760_device_info *di)
                power_supply_changed(&di->bat);
 }
 
+static void ds2760_battery_write_status(struct ds2760_device_info *di,
+                                       char status)
+{
+       if (status == di->raw[DS2760_STATUS_REG])
+               return;
+
+       w1_ds2760_write(di->w1_dev, &status, DS2760_STATUS_WRITE_REG, 1);
+       w1_ds2760_store_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
+       w1_ds2760_recall_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
+}
+
 static void ds2760_battery_work(struct work_struct *work)
 {
        struct ds2760_device_info *di = container_of(work,
@@ -342,9 +357,9 @@ static enum power_supply_property ds2760_battery_props[] = {
 
 static int ds2760_battery_probe(struct platform_device *pdev)
 {
+       char status;
        int retval = 0;
        struct ds2760_device_info *di;
-       struct ds2760_platform_data *pdata;
 
        di = kzalloc(sizeof(*di), GFP_KERNEL);
        if (!di) {
@@ -354,14 +369,13 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, di);
 
-       pdata = pdev->dev.platform_data;
-       di->dev         = &pdev->dev;
-       di->w1_dev           = pdev->dev.parent;
-       di->bat.name       = dev_name(&pdev->dev);
-       di->bat.type       = POWER_SUPPLY_TYPE_BATTERY;
-       di->bat.properties     = ds2760_battery_props;
-       di->bat.num_properties = ARRAY_SIZE(ds2760_battery_props);
-       di->bat.get_property   = ds2760_battery_get_property;
+       di->dev                 = &pdev->dev;
+       di->w1_dev              = pdev->dev.parent;
+       di->bat.name            = dev_name(&pdev->dev);
+       di->bat.type            = POWER_SUPPLY_TYPE_BATTERY;
+       di->bat.properties      = ds2760_battery_props;
+       di->bat.num_properties  = ARRAY_SIZE(ds2760_battery_props);
+       di->bat.get_property    = ds2760_battery_get_property;
        di->bat.external_power_changed =
                                  ds2760_battery_external_power_changed;
 
@@ -373,6 +387,16 @@ static int ds2760_battery_probe(struct platform_device *pdev)
                goto batt_failed;
        }
 
+       /* enable sleep mode feature */
+       ds2760_battery_read_status(di);
+       status = di->raw[DS2760_STATUS_REG];
+       if (pmod_enabled)
+               status |= DS2760_STATUS_PMOD;
+       else
+               status &= ~DS2760_STATUS_PMOD;
+
+       ds2760_battery_write_status(di, status);
+
        INIT_DELAYED_WORK(&di->monitor_work, ds2760_battery_work);
        di->monitor_wqueue = create_singlethread_workqueue(dev_name(&pdev->dev));
        if (!di->monitor_wqueue) {
diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c
new file mode 100644 (file)
index 0000000..87b98bf
--- /dev/null
@@ -0,0 +1,309 @@
+/*
+ *  max17040_battery.c
+ *  fuel-gauge systems for lithium-ion (Li+) batteries
+ *
+ *  Copyright (C) 2009 Samsung Electronics
+ *  Minkyu Kang <mk7.kang@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/power_supply.h>
+#include <linux/max17040_battery.h>
+
+#define MAX17040_VCELL_MSB     0x02
+#define MAX17040_VCELL_LSB     0x03
+#define MAX17040_SOC_MSB       0x04
+#define MAX17040_SOC_LSB       0x05
+#define MAX17040_MODE_MSB      0x06
+#define MAX17040_MODE_LSB      0x07
+#define MAX17040_VER_MSB       0x08
+#define MAX17040_VER_LSB       0x09
+#define MAX17040_RCOMP_MSB     0x0C
+#define MAX17040_RCOMP_LSB     0x0D
+#define MAX17040_CMD_MSB       0xFE
+#define MAX17040_CMD_LSB       0xFF
+
+#define MAX17040_DELAY         1000
+#define MAX17040_BATTERY_FULL  95
+
+struct max17040_chip {
+       struct i2c_client               *client;
+       struct delayed_work             work;
+       struct power_supply             battery;
+       struct max17040_platform_data   *pdata;
+
+       /* State Of Connect */
+       int online;
+       /* battery voltage */
+       int vcell;
+       /* battery capacity */
+       int soc;
+       /* State Of Charge */
+       int status;
+};
+
+static int max17040_get_property(struct power_supply *psy,
+                           enum power_supply_property psp,
+                           union power_supply_propval *val)
+{
+       struct max17040_chip *chip = container_of(psy,
+                               struct max17040_chip, battery);
+
+       switch (psp) {
+       case POWER_SUPPLY_PROP_STATUS:
+               val->intval = chip->status;
+               break;
+       case POWER_SUPPLY_PROP_ONLINE:
+               val->intval = chip->online;
+               break;
+       case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+               val->intval = chip->vcell;
+               break;
+       case POWER_SUPPLY_PROP_CAPACITY:
+               val->intval = chip->soc;
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int max17040_write_reg(struct i2c_client *client, int reg, u8 value)
+{
+       int ret;
+
+       ret = i2c_smbus_write_byte_data(client, reg, value);
+
+       if (ret < 0)
+               dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+       return ret;
+}
+
+static int max17040_read_reg(struct i2c_client *client, int reg)
+{
+       int ret;
+
+       ret = i2c_smbus_read_byte_data(client, reg);
+
+       if (ret < 0)
+               dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+       return ret;
+}
+
+static void max17040_reset(struct i2c_client *client)
+{
+       max17040_write_reg(client, MAX17040_CMD_MSB, 0x54);
+       max17040_write_reg(client, MAX17040_CMD_LSB, 0x00);
+}
+
+static void max17040_get_vcell(struct i2c_client *client)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+       u8 msb;
+       u8 lsb;
+
+       msb = max17040_read_reg(client, MAX17040_VCELL_MSB);
+       lsb = max17040_read_reg(client, MAX17040_VCELL_LSB);
+
+       chip->vcell = (msb << 4) + (lsb >> 4);
+}
+
+static void max17040_get_soc(struct i2c_client *client)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+       u8 msb;
+       u8 lsb;
+
+       msb = max17040_read_reg(client, MAX17040_SOC_MSB);
+       lsb = max17040_read_reg(client, MAX17040_SOC_LSB);
+
+       chip->soc = msb;
+}
+
+static void max17040_get_version(struct i2c_client *client)
+{
+       u8 msb;
+       u8 lsb;
+
+       msb = max17040_read_reg(client, MAX17040_VER_MSB);
+       lsb = max17040_read_reg(client, MAX17040_VER_LSB);
+
+       dev_info(&client->dev, "MAX17040 Fuel-Gauge Ver %d%d\n", msb, lsb);
+}
+
+static void max17040_get_online(struct i2c_client *client)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+
+       if (chip->pdata->battery_online)
+               chip->online = chip->pdata->battery_online();
+       else
+               chip->online = 1;
+}
+
+static void max17040_get_status(struct i2c_client *client)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+
+       if (!chip->pdata->charger_online || !chip->pdata->charger_enable) {
+               chip->status = POWER_SUPPLY_STATUS_UNKNOWN;
+               return;
+       }
+
+       if (chip->pdata->charger_online()) {
+               if (chip->pdata->charger_enable())
+                       chip->status = POWER_SUPPLY_STATUS_CHARGING;
+               else
+                       chip->status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+       } else {
+               chip->status = POWER_SUPPLY_STATUS_DISCHARGING;
+       }
+
+       if (chip->soc > MAX17040_BATTERY_FULL)
+               chip->status = POWER_SUPPLY_STATUS_FULL;
+}
+
+static void max17040_work(struct work_struct *work)
+{
+       struct max17040_chip *chip;
+
+       chip = container_of(work, struct max17040_chip, work.work);
+
+       max17040_get_vcell(chip->client);
+       max17040_get_soc(chip->client);
+       max17040_get_online(chip->client);
+       max17040_get_status(chip->client);
+
+       schedule_delayed_work(&chip->work, MAX17040_DELAY);
+}
+
+static enum power_supply_property max17040_battery_props[] = {
+       POWER_SUPPLY_PROP_STATUS,
+       POWER_SUPPLY_PROP_ONLINE,
+       POWER_SUPPLY_PROP_VOLTAGE_NOW,
+       POWER_SUPPLY_PROP_CAPACITY,
+};
+
+static int __devinit max17040_probe(struct i2c_client *client,
+                       const struct i2c_device_id *id)
+{
+       struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+       struct max17040_chip *chip;
+       int ret;
+
+       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+               return -EIO;
+
+       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
+       chip->client = client;
+       chip->pdata = client->dev.platform_data;
+
+       i2c_set_clientdata(client, chip);
+
+       chip->battery.name              = "battery";
+       chip->battery.type              = POWER_SUPPLY_TYPE_BATTERY;
+       chip->battery.get_property      = max17040_get_property;
+       chip->battery.properties        = max17040_battery_props;
+       chip->battery.num_properties    = ARRAY_SIZE(max17040_battery_props);
+
+       ret = power_supply_register(&client->dev, &chip->battery);
+       if (ret) {
+               dev_err(&client->dev, "failed: power supply register\n");
+               i2c_set_clientdata(client, NULL);
+               kfree(chip);
+               return ret;
+       }
+
+       max17040_reset(client);
+       max17040_get_version(client);
+
+       INIT_DELAYED_WORK_DEFERRABLE(&chip->work, max17040_work);
+       schedule_delayed_work(&chip->work, MAX17040_DELAY);
+
+       return 0;
+}
+
+static int __devexit max17040_remove(struct i2c_client *client)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+
+       power_supply_unregister(&chip->battery);
+       cancel_delayed_work(&chip->work);
+       i2c_set_clientdata(client, NULL);
+       kfree(chip);
+       return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int max17040_suspend(struct i2c_client *client,
+               pm_message_t state)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+
+       cancel_delayed_work(&chip->work);
+       return 0;
+}
+
+static int max17040_resume(struct i2c_client *client)
+{
+       struct max17040_chip *chip = i2c_get_clientdata(client);
+
+       schedule_delayed_work(&chip->work, MAX17040_DELAY);
+       return 0;
+}
+
+#else
+
+#define max17040_suspend NULL
+#define max17040_resume NULL
+
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id max17040_id[] = {
+       { "max17040", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, max17040_id);
+
+static struct i2c_driver max17040_i2c_driver = {
+       .driver = {
+               .name   = "max17040",
+       },
+       .probe          = max17040_probe,
+       .remove         = __devexit_p(max17040_remove),
+       .suspend        = max17040_suspend,
+       .resume         = max17040_resume,
+       .id_table       = max17040_id,
+};
+
+static int __init max17040_init(void)
+{
+       return i2c_add_driver(&max17040_i2c_driver);
+}
+module_init(max17040_init);
+
+static void __exit max17040_exit(void)
+{
+       i2c_del_driver(&max17040_i2c_driver);
+}
+module_exit(max17040_exit);
+
+MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
+MODULE_DESCRIPTION("MAX17040 Fuel Gauge");
+MODULE_LICENSE("GPL");
index e371a9c15341e2598825affdbd2ef1d727dfbde7..a07015d646ddad5eb6743c5fa1e1bb281694d051 100644 (file)
@@ -398,8 +398,7 @@ static int sbs_init(struct pci_dev *dev)
 {
        u8 __iomem *p;
 
-       p = ioremap_nocache(pci_resource_start(dev, 0),
-                                               pci_resource_len(dev, 0));
+       p = pci_ioremap_bar(dev, 0);
 
        if (p == NULL)
                return -ENOMEM;
@@ -423,8 +422,7 @@ static void __devexit sbs_exit(struct pci_dev *dev)
 {
        u8 __iomem *p;
 
-       p = ioremap_nocache(pci_resource_start(dev, 0),
-                                       pci_resource_len(dev, 0));
+       p = pci_ioremap_bar(dev, 0);
        /* FIXME: What if resource_len < OCT_REG_CR_OFF */
        if (p != NULL)
                writeb(0, p + OCT_REG_CR_OFF);
index 9f2891c2c4a21f93e4cc6289e4b95c286a655455..cd1b6a45bb8241b3cbdadc0f03021dc634adace4 100644 (file)
@@ -1548,8 +1548,7 @@ static int __devinit icom_probe(struct pci_dev *dev,
                goto probe_exit1;
        }
 
-        icom_adapter->base_addr = ioremap(icom_adapter->base_addr_pci,
-                                               pci_resource_len(dev, 0));
+        icom_adapter->base_addr = pci_ioremap_bar(dev, 0);
 
        if (!icom_adapter->base_addr)
                goto probe_exit1;
index 107ce2e187b8fc1c1773530a74b0779fdce283de..00f4577d2f7fc514a523f2ec534d09b73bac7d72 100644 (file)
@@ -467,7 +467,7 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd)
                        printk(KERN_INFO "jsm: linemap is full, added device failed\n");
                        continue;
                } else
-                       set_bit((int)line, linemap);
+                       set_bit(line, linemap);
                brd->channels[i]->uart_port.line = line;
                if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
                        printk(KERN_INFO "jsm: add device failed\n");
@@ -503,7 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd)
 
                ch = brd->channels[i];
 
-               clear_bit((int)(ch->uart_port.line), linemap);
+               clear_bit(ch->uart_port.line, linemap);
                uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
        }
 
index 7313c2edcb831b873d89156bb6200179dd642fb8..54dd16d66a4bc7e2a1152c87828c7eab0b14ba3b 100644 (file)
@@ -461,6 +461,94 @@ static void serial_txx9_break_ctl(struct uart_port *port, int break_state)
        spin_unlock_irqrestore(&up->port.lock, flags);
 }
 
+#if defined(CONFIG_SERIAL_TXX9_CONSOLE) || (CONFIG_CONSOLE_POLL)
+/*
+ *     Wait for transmitter & holding register to empty
+ */
+static void wait_for_xmitr(struct uart_txx9_port *up)
+{
+       unsigned int tmout = 10000;
+
+       /* Wait up to 10ms for the character(s) to be sent. */
+       while (--tmout &&
+              !(sio_in(up, TXX9_SICISR) & TXX9_SICISR_TXALS))
+               udelay(1);
+
+       /* Wait up to 1s for flow control if necessary */
+       if (up->port.flags & UPF_CONS_FLOW) {
+               tmout = 1000000;
+               while (--tmout &&
+                      (sio_in(up, TXX9_SICISR) & TXX9_SICISR_CTSS))
+                       udelay(1);
+       }
+}
+#endif
+
+#ifdef CONFIG_CONSOLE_POLL
+/*
+ * Console polling routines for writing and reading from the uart while
+ * in an interrupt or debug context.
+ */
+
+static int serial_txx9_get_poll_char(struct uart_port *port)
+{
+       unsigned int ier;
+       unsigned char c;
+       struct uart_txx9_port *up = (struct uart_txx9_port *)port;
+
+       /*
+        *      First save the IER then disable the interrupts
+        */
+       ier = sio_in(up, TXX9_SIDICR);
+       sio_out(up, TXX9_SIDICR, 0);
+
+       while (sio_in(up, TXX9_SIDISR) & TXX9_SIDISR_UVALID)
+               ;
+
+       c = sio_in(up, TXX9_SIRFIFO);
+
+       /*
+        *      Finally, clear RX interrupt status
+        *      and restore the IER
+        */
+       sio_mask(up, TXX9_SIDISR, TXX9_SIDISR_RDIS);
+       sio_out(up, TXX9_SIDICR, ier);
+       return c;
+}
+
+
+static void serial_txx9_put_poll_char(struct uart_port *port, unsigned char c)
+{
+       unsigned int ier;
+       struct uart_txx9_port *up = (struct uart_txx9_port *)port;
+
+       /*
+        *      First save the IER then disable the interrupts
+        */
+       ier = sio_in(up, TXX9_SIDICR);
+       sio_out(up, TXX9_SIDICR, 0);
+
+       wait_for_xmitr(up);
+       /*
+        *      Send the character out.
+        *      If a LF, also do CR...
+        */
+       sio_out(up, TXX9_SITFIFO, c);
+       if (c == 10) {
+               wait_for_xmitr(up);
+               sio_out(up, TXX9_SITFIFO, 13);
+       }
+
+       /*
+        *      Finally, wait for transmitter to become empty
+        *      and restore the IER
+        */
+       wait_for_xmitr(up);
+       sio_out(up, TXX9_SIDICR, ier);
+}
+
+#endif /* CONFIG_CONSOLE_POLL */
+
 static int serial_txx9_startup(struct uart_port *port)
 {
        struct uart_txx9_port *up = (struct uart_txx9_port *)port;
@@ -781,6 +869,10 @@ static struct uart_ops serial_txx9_pops = {
        .release_port   = serial_txx9_release_port,
        .request_port   = serial_txx9_request_port,
        .config_port    = serial_txx9_config_port,
+#ifdef CONFIG_CONSOLE_POLL
+       .poll_get_char  = serial_txx9_get_poll_char,
+       .poll_put_char  = serial_txx9_put_poll_char,
+#endif
 };
 
 static struct uart_txx9_port serial_txx9_ports[UART_NR];
@@ -803,27 +895,6 @@ static void __init serial_txx9_register_ports(struct uart_driver *drv,
 
 #ifdef CONFIG_SERIAL_TXX9_CONSOLE
 
-/*
- *     Wait for transmitter & holding register to empty
- */
-static inline void wait_for_xmitr(struct uart_txx9_port *up)
-{
-       unsigned int tmout = 10000;
-
-       /* Wait up to 10ms for the character(s) to be sent. */
-       while (--tmout &&
-              !(sio_in(up, TXX9_SICISR) & TXX9_SICISR_TXALS))
-               udelay(1);
-
-       /* Wait up to 1s for flow control if necessary */
-       if (up->port.flags & UPF_CONS_FLOW) {
-               tmout = 1000000;
-               while (--tmout &&
-                      (sio_in(up, TXX9_SICISR) & TXX9_SICISR_CTSS))
-                       udelay(1);
-       }
-}
-
 static void serial_txx9_console_putchar(struct uart_port *port, int ch)
 {
        struct uart_txx9_port *up = (struct uart_txx9_port *)port;
index 3c839e37d37f5ac2bb71b57b97455e1ee17dcda4..c0a583cc22274e6b29e82d344c3c630c12e5801f 100644 (file)
@@ -12,7 +12,6 @@
 obj-${CONFIG_OCTEON_ETHERNET} :=  octeon-ethernet.o
 
 octeon-ethernet-objs := ethernet.o
-octeon-ethernet-objs += ethernet-common.o
 octeon-ethernet-objs += ethernet-mdio.o
 octeon-ethernet-objs += ethernet-mem.o
 octeon-ethernet-objs += ethernet-proc.o
diff --git a/drivers/staging/octeon/ethernet-common.c b/drivers/staging/octeon/ethernet-common.c
deleted file mode 100644 (file)
index 3e6f5b8..0000000
+++ /dev/null
@@ -1,328 +0,0 @@
-/**********************************************************************
- * Author: Cavium Networks
- *
- * Contact: support@caviumnetworks.com
- * This file is part of the OCTEON SDK
- *
- * Copyright (c) 2003-2007 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- *
- * This file is distributed in the hope that it will be useful, but
- * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this file; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- * or visit http://www.gnu.org/licenses/.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium Networks for more information
-**********************************************************************/
-#include <linux/kernel.h>
-#include <linux/mii.h>
-#include <net/dst.h>
-
-#include <asm/atomic.h>
-#include <asm/octeon/octeon.h>
-
-#include "ethernet-defines.h"
-#include "ethernet-tx.h"
-#include "ethernet-mdio.h"
-#include "ethernet-util.h"
-#include "octeon-ethernet.h"
-#include "ethernet-common.h"
-
-#include "cvmx-pip.h"
-#include "cvmx-pko.h"
-#include "cvmx-fau.h"
-#include "cvmx-helper.h"
-
-#include "cvmx-gmxx-defs.h"
-
-/**
- * Get the low level ethernet statistics
- *
- * @dev:    Device to get the statistics from
- * Returns Pointer to the statistics
- */
-static struct net_device_stats *cvm_oct_common_get_stats(struct net_device *dev)
-{
-       cvmx_pip_port_status_t rx_status;
-       cvmx_pko_port_status_t tx_status;
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
-       if (priv->port < CVMX_PIP_NUM_INPUT_PORTS) {
-               if (octeon_is_simulation()) {
-                       /* The simulator doesn't support statistics */
-                       memset(&rx_status, 0, sizeof(rx_status));
-                       memset(&tx_status, 0, sizeof(tx_status));
-               } else {
-                       cvmx_pip_get_port_status(priv->port, 1, &rx_status);
-                       cvmx_pko_get_port_status(priv->port, 1, &tx_status);
-               }
-
-               priv->stats.rx_packets += rx_status.inb_packets;
-               priv->stats.tx_packets += tx_status.packets;
-               priv->stats.rx_bytes += rx_status.inb_octets;
-               priv->stats.tx_bytes += tx_status.octets;
-               priv->stats.multicast += rx_status.multicast_packets;
-               priv->stats.rx_crc_errors += rx_status.inb_errors;
-               priv->stats.rx_frame_errors += rx_status.fcs_align_err_packets;
-
-               /*
-                * The drop counter must be incremented atomically
-                * since the RX tasklet also increments it.
-                */
-#ifdef CONFIG_64BIT
-               atomic64_add(rx_status.dropped_packets,
-                            (atomic64_t *)&priv->stats.rx_dropped);
-#else
-               atomic_add(rx_status.dropped_packets,
-                            (atomic_t *)&priv->stats.rx_dropped);
-#endif
-       }
-
-       return &priv->stats;
-}
-
-/**
- * Set the multicast list. Currently unimplemented.
- *
- * @dev:    Device to work on
- */
-static void cvm_oct_common_set_multicast_list(struct net_device *dev)
-{
-       union cvmx_gmxx_prtx_cfg gmx_cfg;
-       struct octeon_ethernet *priv = netdev_priv(dev);
-       int interface = INTERFACE(priv->port);
-       int index = INDEX(priv->port);
-
-       if ((interface < 2)
-           && (cvmx_helper_interface_get_mode(interface) !=
-               CVMX_HELPER_INTERFACE_MODE_SPI)) {
-               union cvmx_gmxx_rxx_adr_ctl control;
-               control.u64 = 0;
-               control.s.bcst = 1;     /* Allow broadcast MAC addresses */
-
-               if (dev->mc_list || (dev->flags & IFF_ALLMULTI) ||
-                   (dev->flags & IFF_PROMISC))
-                       /* Force accept multicast packets */
-                       control.s.mcst = 2;
-               else
-                       /* Force reject multicat packets */
-                       control.s.mcst = 1;
-
-               if (dev->flags & IFF_PROMISC)
-                       /*
-                        * Reject matches if promisc. Since CAM is
-                        * shut off, should accept everything.
-                        */
-                       control.s.cam_mode = 0;
-               else
-                       /* Filter packets based on the CAM */
-                       control.s.cam_mode = 1;
-
-               gmx_cfg.u64 =
-                   cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
-               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
-                              gmx_cfg.u64 & ~1ull);
-
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CTL(index, interface),
-                              control.u64);
-               if (dev->flags & IFF_PROMISC)
-                       cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
-                                      (index, interface), 0);
-               else
-                       cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
-                                      (index, interface), 1);
-
-               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
-                              gmx_cfg.u64);
-       }
-}
-
-/**
- * Set the hardware MAC address for a device
- *
- * @dev:    Device to change the MAC address for
- * @addr:   Address structure to change it too. MAC address is addr + 2.
- * Returns Zero on success
- */
-static int cvm_oct_common_set_mac_address(struct net_device *dev, void *addr)
-{
-       struct octeon_ethernet *priv = netdev_priv(dev);
-       union cvmx_gmxx_prtx_cfg gmx_cfg;
-       int interface = INTERFACE(priv->port);
-       int index = INDEX(priv->port);
-
-       memcpy(dev->dev_addr, addr + 2, 6);
-
-       if ((interface < 2)
-           && (cvmx_helper_interface_get_mode(interface) !=
-               CVMX_HELPER_INTERFACE_MODE_SPI)) {
-               int i;
-               uint8_t *ptr = addr;
-               uint64_t mac = 0;
-               for (i = 0; i < 6; i++)
-                       mac = (mac << 8) | (uint64_t) (ptr[i + 2]);
-
-               gmx_cfg.u64 =
-                   cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
-               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
-                              gmx_cfg.u64 & ~1ull);
-
-               cvmx_write_csr(CVMX_GMXX_SMACX(index, interface), mac);
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM0(index, interface),
-                              ptr[2]);
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM1(index, interface),
-                              ptr[3]);
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM2(index, interface),
-                              ptr[4]);
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM3(index, interface),
-                              ptr[5]);
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM4(index, interface),
-                              ptr[6]);
-               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM5(index, interface),
-                              ptr[7]);
-               cvm_oct_common_set_multicast_list(dev);
-               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
-                              gmx_cfg.u64);
-       }
-       return 0;
-}
-
-/**
- * Change the link MTU. Unimplemented
- *
- * @dev:     Device to change
- * @new_mtu: The new MTU
- *
- * Returns Zero on success
- */
-static int cvm_oct_common_change_mtu(struct net_device *dev, int new_mtu)
-{
-       struct octeon_ethernet *priv = netdev_priv(dev);
-       int interface = INTERFACE(priv->port);
-       int index = INDEX(priv->port);
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-       int vlan_bytes = 4;
-#else
-       int vlan_bytes = 0;
-#endif
-
-       /*
-        * Limit the MTU to make sure the ethernet packets are between
-        * 64 bytes and 65535 bytes.
-        */
-       if ((new_mtu + 14 + 4 + vlan_bytes < 64)
-           || (new_mtu + 14 + 4 + vlan_bytes > 65392)) {
-               pr_err("MTU must be between %d and %d.\n",
-                      64 - 14 - 4 - vlan_bytes, 65392 - 14 - 4 - vlan_bytes);
-               return -EINVAL;
-       }
-       dev->mtu = new_mtu;
-
-       if ((interface < 2)
-           && (cvmx_helper_interface_get_mode(interface) !=
-               CVMX_HELPER_INTERFACE_MODE_SPI)) {
-               /* Add ethernet header and FCS, and VLAN if configured. */
-               int max_packet = new_mtu + 14 + 4 + vlan_bytes;
-
-               if (OCTEON_IS_MODEL(OCTEON_CN3XXX)
-                   || OCTEON_IS_MODEL(OCTEON_CN58XX)) {
-                       /* Signal errors on packets larger than the MTU */
-                       cvmx_write_csr(CVMX_GMXX_RXX_FRM_MAX(index, interface),
-                                      max_packet);
-               } else {
-                       /*
-                        * Set the hardware to truncate packets larger
-                        * than the MTU and smaller the 64 bytes.
-                        */
-                       union cvmx_pip_frm_len_chkx frm_len_chk;
-                       frm_len_chk.u64 = 0;
-                       frm_len_chk.s.minlen = 64;
-                       frm_len_chk.s.maxlen = max_packet;
-                       cvmx_write_csr(CVMX_PIP_FRM_LEN_CHKX(interface),
-                                      frm_len_chk.u64);
-               }
-               /*
-                * Set the hardware to truncate packets larger than
-                * the MTU. The jabber register must be set to a
-                * multiple of 8 bytes, so round up.
-                */
-               cvmx_write_csr(CVMX_GMXX_RXX_JABBER(index, interface),
-                              (max_packet + 7) & ~7u);
-       }
-       return 0;
-}
-
-/**
- * Per network device initialization
- *
- * @dev:    Device to initialize
- * Returns Zero on success
- */
-int cvm_oct_common_init(struct net_device *dev)
-{
-       static int count;
-       char mac[8] = { 0x00, 0x00,
-               octeon_bootinfo->mac_addr_base[0],
-               octeon_bootinfo->mac_addr_base[1],
-               octeon_bootinfo->mac_addr_base[2],
-               octeon_bootinfo->mac_addr_base[3],
-               octeon_bootinfo->mac_addr_base[4],
-               octeon_bootinfo->mac_addr_base[5] + count
-       };
-       struct octeon_ethernet *priv = netdev_priv(dev);
-
-       /*
-        * Force the interface to use the POW send if always_use_pow
-        * was specified or it is in the pow send list.
-        */
-       if ((pow_send_group != -1)
-           && (always_use_pow || strstr(pow_send_list, dev->name)))
-               priv->queue = -1;
-
-       if (priv->queue != -1) {
-               dev->hard_start_xmit = cvm_oct_xmit;
-               if (USE_HW_TCPUDP_CHECKSUM)
-                       dev->features |= NETIF_F_IP_CSUM;
-       } else
-               dev->hard_start_xmit = cvm_oct_xmit_pow;
-       count++;
-
-       dev->get_stats = cvm_oct_common_get_stats;
-       dev->set_mac_address = cvm_oct_common_set_mac_address;
-       dev->set_multicast_list = cvm_oct_common_set_multicast_list;
-       dev->change_mtu = cvm_oct_common_change_mtu;
-       dev->do_ioctl = cvm_oct_ioctl;
-       /* We do our own locking, Linux doesn't need to */
-       dev->features |= NETIF_F_LLTX;
-       SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-       dev->poll_controller = cvm_oct_poll_controller;
-#endif
-
-       cvm_oct_mdio_setup_device(dev);
-       dev->set_mac_address(dev, mac);
-       dev->change_mtu(dev, dev->mtu);
-
-       /*
-        * Zero out stats for port so we won't mistakenly show
-        * counters from the bootloader.
-        */
-       memset(dev->get_stats(dev), 0, sizeof(struct net_device_stats));
-
-       return 0;
-}
-
-void cvm_oct_common_uninit(struct net_device *dev)
-{
-       /* Currently nothing to do */
-}
diff --git a/drivers/staging/octeon/ethernet-common.h b/drivers/staging/octeon/ethernet-common.h
deleted file mode 100644 (file)
index 2bd9cd7..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*********************************************************************
- * Author: Cavium Networks
- *
- * Contact: support@caviumnetworks.com
- * This file is part of the OCTEON SDK
- *
- * Copyright (c) 2003-2007 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- *
- * This file is distributed in the hope that it will be useful, but
- * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this file; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- * or visit http://www.gnu.org/licenses/.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium Networks for more information
-*********************************************************************/
-
-int cvm_oct_common_init(struct net_device *dev);
-void cvm_oct_common_uninit(struct net_device *dev);
index 8f7374e7664c4e04d7f6c63c4c2c543f159149ab..f13131b03c333912e8bbbd0fe44d3ace7df52ec2 100644 (file)
 
 /* Maximum number of packets to process per interrupt. */
 #define MAX_RX_PACKETS 120
+/* Maximum number of SKBs to try to free per xmit packet. */
+#define MAX_SKB_TO_FREE 10
 #define MAX_OUT_QUEUE_DEPTH 1000
 
 #ifndef CONFIG_SMP
index 8579f1670d1ecffcc0a1cab86150de047bfce121..8704133fe1272c03964dc110fa48c39b51d4b394 100644 (file)
@@ -33,7 +33,6 @@
 
 #include "ethernet-defines.h"
 #include "octeon-ethernet.h"
-#include "ethernet-common.h"
 #include "ethernet-util.h"
 
 #include "cvmx-helper.h"
@@ -265,7 +264,7 @@ static irqreturn_t cvm_oct_rgmii_rml_interrupt(int cpl, void *dev_id)
        return return_status;
 }
 
-static int cvm_oct_rgmii_open(struct net_device *dev)
+int cvm_oct_rgmii_open(struct net_device *dev)
 {
        union cvmx_gmxx_prtx_cfg gmx_cfg;
        struct octeon_ethernet *priv = netdev_priv(dev);
@@ -286,7 +285,7 @@ static int cvm_oct_rgmii_open(struct net_device *dev)
        return 0;
 }
 
-static int cvm_oct_rgmii_stop(struct net_device *dev)
+int cvm_oct_rgmii_stop(struct net_device *dev)
 {
        union cvmx_gmxx_prtx_cfg gmx_cfg;
        struct octeon_ethernet *priv = netdev_priv(dev);
@@ -305,9 +304,7 @@ int cvm_oct_rgmii_init(struct net_device *dev)
        int r;
 
        cvm_oct_common_init(dev);
-       dev->open = cvm_oct_rgmii_open;
-       dev->stop = cvm_oct_rgmii_stop;
-       dev->stop(dev);
+       dev->netdev_ops->ndo_stop(dev);
 
        /*
         * Due to GMX errata in CN3XXX series chips, it is necessary
index 58fa39c1d6757727285e20547269124cb2b1e4db..2b54996bd85dbe676a9bdc1daca400cae156f45c 100644 (file)
 #include "ethernet-defines.h"
 #include "octeon-ethernet.h"
 #include "ethernet-util.h"
-#include "ethernet-common.h"
 
 #include "cvmx-helper.h"
 
 #include "cvmx-gmxx-defs.h"
 
-static int cvm_oct_sgmii_open(struct net_device *dev)
+int cvm_oct_sgmii_open(struct net_device *dev)
 {
        union cvmx_gmxx_prtx_cfg gmx_cfg;
        struct octeon_ethernet *priv = netdev_priv(dev);
@@ -61,7 +60,7 @@ static int cvm_oct_sgmii_open(struct net_device *dev)
        return 0;
 }
 
-static int cvm_oct_sgmii_stop(struct net_device *dev)
+int cvm_oct_sgmii_stop(struct net_device *dev)
 {
        union cvmx_gmxx_prtx_cfg gmx_cfg;
        struct octeon_ethernet *priv = netdev_priv(dev);
@@ -113,9 +112,7 @@ int cvm_oct_sgmii_init(struct net_device *dev)
 {
        struct octeon_ethernet *priv = netdev_priv(dev);
        cvm_oct_common_init(dev);
-       dev->open = cvm_oct_sgmii_open;
-       dev->stop = cvm_oct_sgmii_stop;
-       dev->stop(dev);
+       dev->netdev_ops->ndo_stop(dev);
        if (!octeon_is_simulation())
                priv->poll = cvm_oct_sgmii_poll;
 
index e0971bbe4ddc252f130fe1fc474cd636cda152dc..66190b0cb68f355c6f482bba828cdcc6c4228017 100644 (file)
@@ -33,7 +33,6 @@
 
 #include "ethernet-defines.h"
 #include "octeon-ethernet.h"
-#include "ethernet-common.h"
 #include "ethernet-util.h"
 
 #include "cvmx-spi.h"
index 77b7122c8fdb66d8bad1b4a4f39524bec0423617..81a851390f1bfc47d762a912114fbce62d14210f 100644 (file)
@@ -47,6 +47,7 @@
 
 #include "ethernet-defines.h"
 #include "octeon-ethernet.h"
+#include "ethernet-tx.h"
 #include "ethernet-util.h"
 
 #include "cvmx-wqe.h"
@@ -82,8 +83,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
        uint64_t old_scratch2;
        int dropped;
        int qos;
+       int queue_it_up;
        struct octeon_ethernet *priv = netdev_priv(dev);
-       int32_t in_use;
+       int32_t skb_to_free;
+       int32_t undo;
        int32_t buffers_to_free;
 #if REUSE_SKBUFFS_WITHOUT_FREE
        unsigned char *fpa_head;
@@ -120,15 +123,15 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
                old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
 
                /*
-                * Assume we're going to be able t osend this
-                * packet. Fetch and increment the number of pending
-                * packets for output.
+                * Fetch and increment the number of packets to be
+                * freed.
                 */
                cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8,
                                               FAU_NUM_PACKET_BUFFERS_TO_FREE,
                                               0);
                cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH,
-                                              priv->fau + qos * 4, 1);
+                                              priv->fau + qos * 4,
+                                              MAX_SKB_TO_FREE);
        }
 
        /*
@@ -253,10 +256,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /*
         * The skbuff will be reused without ever being freed. We must
-        * cleanup a bunch of Linux stuff.
+        * cleanup a bunch of core things.
         */
-       dst_release(skb->dst);
-       skb->dst = NULL;
+       dst_release(skb_dst(skb));
+       skb_dst_set(skb, NULL);
 #ifdef CONFIG_XFRM
        secpath_put(skb->sp);
        skb->sp = NULL;
@@ -286,15 +289,29 @@ dont_put_skbuff_in_hw:
        if (USE_ASYNC_IOBDMA) {
                /* Get the number of skbuffs in use by the hardware */
                CVMX_SYNCIOBDMA;
-               in_use = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
+               skb_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
                buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
        } else {
                /* Get the number of skbuffs in use by the hardware */
-               in_use = cvmx_fau_fetch_and_add32(priv->fau + qos * 4, 1);
+               skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
+                                                      MAX_SKB_TO_FREE);
                buffers_to_free =
                    cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
        }
 
+       /*
+        * We try to claim MAX_SKB_TO_FREE buffers.  If there were not
+        * that many available, we have to un-claim (undo) any that
+        * were in excess.  If skb_to_free is positive we will free
+        * that many buffers.
+        */
+       undo = skb_to_free > 0 ?
+               MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
+       if (undo > 0)
+               cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
+       skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
+               MAX_SKB_TO_FREE : -skb_to_free;
+
        /*
         * If we're sending faster than the receive can free them then
         * don't do the HW free.
@@ -330,38 +347,31 @@ dont_put_skbuff_in_hw:
                cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2);
        }
 
+       queue_it_up = 0;
        if (unlikely(dropped)) {
                dev_kfree_skb_any(skb);
-               cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
                priv->stats.tx_dropped++;
        } else {
                if (USE_SKBUFFS_IN_HW) {
                        /* Put this packet on the queue to be freed later */
                        if (pko_command.s.dontfree)
-                               skb_queue_tail(&priv->tx_free_list[qos], skb);
-                       else {
+                               queue_it_up = 1;
+                       else
                                cvmx_fau_atomic_add32
                                    (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1);
-                               cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
-                       }
                } else {
                        /* Put this packet on the queue to be freed later */
-                       skb_queue_tail(&priv->tx_free_list[qos], skb);
+                       queue_it_up = 1;
                }
        }
 
-       /* Free skbuffs not in use by the hardware, possibly two at a time */
-       if (skb_queue_len(&priv->tx_free_list[qos]) > in_use) {
+       if (queue_it_up) {
                spin_lock(&priv->tx_free_list[qos].lock);
-               /*
-                * Check again now that we have the lock. It might
-                * have changed.
-                */
-               if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
-                       dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
-               if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
-                       dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
+               __skb_queue_tail(&priv->tx_free_list[qos], skb);
+               cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 0);
                spin_unlock(&priv->tx_free_list[qos].lock);
+       } else {
+               cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
        }
 
        return 0;
index 5106236fe9819fc44c0c190b08367712aad78b1c..c0bebf750bc059acc8d4a8a52b4ab3451e81250d 100644 (file)
@@ -30,3 +30,28 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev);
 int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
                         int do_free, int qos);
 void cvm_oct_tx_shutdown(struct net_device *dev);
+
+/**
+ * Free dead transmit skbs.
+ *
+ * @priv:              The driver data
+ * @skb_to_free:       The number of SKBs to free (free none if negative).
+ * @qos:               The queue to free from.
+ * @take_lock:         If true, acquire the skb list lock.
+ */
+static inline void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv,
+                                       int skb_to_free,
+                                       int qos, int take_lock)
+{
+       /* Free skbuffs not in use by the hardware.  */
+       if (skb_to_free > 0) {
+               if (take_lock)
+                       spin_lock(&priv->tx_free_list[qos].lock);
+               while (skb_to_free > 0) {
+                       dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
+                       skb_to_free--;
+               }
+               if (take_lock)
+                       spin_unlock(&priv->tx_free_list[qos].lock);
+       }
+}
index f08eb32e04fcd9e987e8fc9cb82c8c4f03944cc5..0c2e7cc40f35338dc2e7ed8f179d2e5bf21c43be 100644 (file)
 
 #include "ethernet-defines.h"
 #include "octeon-ethernet.h"
-#include "ethernet-common.h"
 #include "ethernet-util.h"
 
 #include "cvmx-helper.h"
 
 #include "cvmx-gmxx-defs.h"
 
-static int cvm_oct_xaui_open(struct net_device *dev)
+int cvm_oct_xaui_open(struct net_device *dev)
 {
        union cvmx_gmxx_prtx_cfg gmx_cfg;
        struct octeon_ethernet *priv = netdev_priv(dev);
@@ -60,7 +59,7 @@ static int cvm_oct_xaui_open(struct net_device *dev)
        return 0;
 }
 
-static int cvm_oct_xaui_stop(struct net_device *dev)
+int cvm_oct_xaui_stop(struct net_device *dev)
 {
        union cvmx_gmxx_prtx_cfg gmx_cfg;
        struct octeon_ethernet *priv = netdev_priv(dev);
@@ -112,9 +111,7 @@ int cvm_oct_xaui_init(struct net_device *dev)
 {
        struct octeon_ethernet *priv = netdev_priv(dev);
        cvm_oct_common_init(dev);
-       dev->open = cvm_oct_xaui_open;
-       dev->stop = cvm_oct_xaui_stop;
-       dev->stop(dev);
+       dev->netdev_ops->ndo_stop(dev);
        if (!octeon_is_simulation())
                priv->poll = cvm_oct_xaui_poll;
 
index e8ef9e0b791fd48fd1d63874a27c09907573a2e4..b8479517dce28b9fa76af52c532c8af556cb54e5 100644 (file)
 #include <asm/octeon/octeon.h>
 
 #include "ethernet-defines.h"
+#include "octeon-ethernet.h"
 #include "ethernet-mem.h"
 #include "ethernet-rx.h"
 #include "ethernet-tx.h"
+#include "ethernet-mdio.h"
 #include "ethernet-util.h"
 #include "ethernet-proc.h"
-#include "ethernet-common.h"
-#include "octeon-ethernet.h"
+
 
 #include "cvmx-pip.h"
 #include "cvmx-pko.h"
@@ -51,6 +52,7 @@
 #include "cvmx-ipd.h"
 #include "cvmx-helper.h"
 
+#include "cvmx-gmxx-defs.h"
 #include "cvmx-smix-defs.h"
 
 #if defined(CONFIG_CAVIUM_OCTEON_NUM_PACKET_BUFFERS) \
@@ -129,53 +131,55 @@ extern struct semaphore mdio_sem;
  */
 static void cvm_do_timer(unsigned long arg)
 {
+       int32_t skb_to_free, undo;
+       int queues_per_port;
+       int qos;
+       struct octeon_ethernet *priv;
        static int port;
-       if (port < CVMX_PIP_NUM_INPUT_PORTS) {
-               if (cvm_oct_device[port]) {
-                       int queues_per_port;
-                       int qos;
-                       struct octeon_ethernet *priv =
-                               netdev_priv(cvm_oct_device[port]);
-                       if (priv->poll) {
-                               /* skip polling if we don't get the lock */
-                               if (!down_trylock(&mdio_sem)) {
-                                       priv->poll(cvm_oct_device[port]);
-                                       up(&mdio_sem);
-                               }
-                       }
 
-                       queues_per_port = cvmx_pko_get_num_queues(port);
-                       /* Drain any pending packets in the free list */
-                       for (qos = 0; qos < queues_per_port; qos++) {
-                               if (skb_queue_len(&priv->tx_free_list[qos])) {
-                                       spin_lock(&priv->tx_free_list[qos].
-                                                 lock);
-                                       while (skb_queue_len
-                                              (&priv->tx_free_list[qos]) >
-                                              cvmx_fau_fetch_and_add32(priv->
-                                                                       fau +
-                                                                       qos * 4,
-                                                                       0))
-                                               dev_kfree_skb(__skb_dequeue
-                                                             (&priv->
-                                                              tx_free_list
-                                                              [qos]));
-                                       spin_unlock(&priv->tx_free_list[qos].
-                                                   lock);
-                               }
-                       }
-                       cvm_oct_device[port]->get_stats(cvm_oct_device[port]);
-               }
-               port++;
-               /* Poll the next port in a 50th of a second.
-                  This spreads the polling of ports out a little bit */
-               mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
-       } else {
+       if (port >= CVMX_PIP_NUM_INPUT_PORTS) {
+               /*
+                * All ports have been polled. Start the next
+                * iteration through the ports in one second.
+                */
                port = 0;
-               /* All ports have been polled. Start the next iteration through
-                  the ports in one second */
                mod_timer(&cvm_oct_poll_timer, jiffies + HZ);
+               return;
+       }
+       if (!cvm_oct_device[port])
+               goto out;
+
+       priv = netdev_priv(cvm_oct_device[port]);
+       if (priv->poll) {
+               /* skip polling if we don't get the lock */
+               if (!down_trylock(&mdio_sem)) {
+                       priv->poll(cvm_oct_device[port]);
+                       up(&mdio_sem);
+               }
        }
+
+       queues_per_port = cvmx_pko_get_num_queues(port);
+       /* Drain any pending packets in the free list */
+       for (qos = 0; qos < queues_per_port; qos++) {
+               if (skb_queue_len(&priv->tx_free_list[qos]) == 0)
+                       continue;
+               skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
+                                                      MAX_SKB_TO_FREE);
+               undo = skb_to_free > 0 ?
+                       MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
+               if (undo > 0)
+                       cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
+               skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
+                       MAX_SKB_TO_FREE : -skb_to_free;
+               cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
+       }
+       cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
+
+out:
+       port++;
+       /* Poll the next port in a 50th of a second.
+          This spreads the polling of ports out a little bit */
+       mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
 }
 
 /**
@@ -245,6 +249,362 @@ int cvm_oct_free_work(void *work_queue_entry)
 }
 EXPORT_SYMBOL(cvm_oct_free_work);
 
+/**
+ * Get the low level ethernet statistics
+ *
+ * @dev:    Device to get the statistics from
+ * Returns Pointer to the statistics
+ */
+static struct net_device_stats *cvm_oct_common_get_stats(struct net_device *dev)
+{
+       cvmx_pip_port_status_t rx_status;
+       cvmx_pko_port_status_t tx_status;
+       struct octeon_ethernet *priv = netdev_priv(dev);
+
+       if (priv->port < CVMX_PIP_NUM_INPUT_PORTS) {
+               if (octeon_is_simulation()) {
+                       /* The simulator doesn't support statistics */
+                       memset(&rx_status, 0, sizeof(rx_status));
+                       memset(&tx_status, 0, sizeof(tx_status));
+               } else {
+                       cvmx_pip_get_port_status(priv->port, 1, &rx_status);
+                       cvmx_pko_get_port_status(priv->port, 1, &tx_status);
+               }
+
+               priv->stats.rx_packets += rx_status.inb_packets;
+               priv->stats.tx_packets += tx_status.packets;
+               priv->stats.rx_bytes += rx_status.inb_octets;
+               priv->stats.tx_bytes += tx_status.octets;
+               priv->stats.multicast += rx_status.multicast_packets;
+               priv->stats.rx_crc_errors += rx_status.inb_errors;
+               priv->stats.rx_frame_errors += rx_status.fcs_align_err_packets;
+
+               /*
+                * The drop counter must be incremented atomically
+                * since the RX tasklet also increments it.
+                */
+#ifdef CONFIG_64BIT
+               atomic64_add(rx_status.dropped_packets,
+                            (atomic64_t *)&priv->stats.rx_dropped);
+#else
+               atomic_add(rx_status.dropped_packets,
+                            (atomic_t *)&priv->stats.rx_dropped);
+#endif
+       }
+
+       return &priv->stats;
+}
+
+/**
+ * Change the link MTU. Unimplemented
+ *
+ * @dev:     Device to change
+ * @new_mtu: The new MTU
+ *
+ * Returns Zero on success
+ */
+static int cvm_oct_common_change_mtu(struct net_device *dev, int new_mtu)
+{
+       struct octeon_ethernet *priv = netdev_priv(dev);
+       int interface = INTERFACE(priv->port);
+       int index = INDEX(priv->port);
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+       int vlan_bytes = 4;
+#else
+       int vlan_bytes = 0;
+#endif
+
+       /*
+        * Limit the MTU to make sure the ethernet packets are between
+        * 64 bytes and 65535 bytes.
+        */
+       if ((new_mtu + 14 + 4 + vlan_bytes < 64)
+           || (new_mtu + 14 + 4 + vlan_bytes > 65392)) {
+               pr_err("MTU must be between %d and %d.\n",
+                      64 - 14 - 4 - vlan_bytes, 65392 - 14 - 4 - vlan_bytes);
+               return -EINVAL;
+       }
+       dev->mtu = new_mtu;
+
+       if ((interface < 2)
+           && (cvmx_helper_interface_get_mode(interface) !=
+               CVMX_HELPER_INTERFACE_MODE_SPI)) {
+               /* Add ethernet header and FCS, and VLAN if configured. */
+               int max_packet = new_mtu + 14 + 4 + vlan_bytes;
+
+               if (OCTEON_IS_MODEL(OCTEON_CN3XXX)
+                   || OCTEON_IS_MODEL(OCTEON_CN58XX)) {
+                       /* Signal errors on packets larger than the MTU */
+                       cvmx_write_csr(CVMX_GMXX_RXX_FRM_MAX(index, interface),
+                                      max_packet);
+               } else {
+                       /*
+                        * Set the hardware to truncate packets larger
+                        * than the MTU and smaller the 64 bytes.
+                        */
+                       union cvmx_pip_frm_len_chkx frm_len_chk;
+                       frm_len_chk.u64 = 0;
+                       frm_len_chk.s.minlen = 64;
+                       frm_len_chk.s.maxlen = max_packet;
+                       cvmx_write_csr(CVMX_PIP_FRM_LEN_CHKX(interface),
+                                      frm_len_chk.u64);
+               }
+               /*
+                * Set the hardware to truncate packets larger than
+                * the MTU. The jabber register must be set to a
+                * multiple of 8 bytes, so round up.
+                */
+               cvmx_write_csr(CVMX_GMXX_RXX_JABBER(index, interface),
+                              (max_packet + 7) & ~7u);
+       }
+       return 0;
+}
+
+/**
+ * Set the multicast list. Currently unimplemented.
+ *
+ * @dev:    Device to work on
+ */
+static void cvm_oct_common_set_multicast_list(struct net_device *dev)
+{
+       union cvmx_gmxx_prtx_cfg gmx_cfg;
+       struct octeon_ethernet *priv = netdev_priv(dev);
+       int interface = INTERFACE(priv->port);
+       int index = INDEX(priv->port);
+
+       if ((interface < 2)
+           && (cvmx_helper_interface_get_mode(interface) !=
+               CVMX_HELPER_INTERFACE_MODE_SPI)) {
+               union cvmx_gmxx_rxx_adr_ctl control;
+               control.u64 = 0;
+               control.s.bcst = 1;     /* Allow broadcast MAC addresses */
+
+               if (dev->mc_list || (dev->flags & IFF_ALLMULTI) ||
+                   (dev->flags & IFF_PROMISC))
+                       /* Force accept multicast packets */
+                       control.s.mcst = 2;
+               else
+                       /* Force reject multicat packets */
+                       control.s.mcst = 1;
+
+               if (dev->flags & IFF_PROMISC)
+                       /*
+                        * Reject matches if promisc. Since CAM is
+                        * shut off, should accept everything.
+                        */
+                       control.s.cam_mode = 0;
+               else
+                       /* Filter packets based on the CAM */
+                       control.s.cam_mode = 1;
+
+               gmx_cfg.u64 =
+                   cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
+               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
+                              gmx_cfg.u64 & ~1ull);
+
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CTL(index, interface),
+                              control.u64);
+               if (dev->flags & IFF_PROMISC)
+                       cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
+                                      (index, interface), 0);
+               else
+                       cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
+                                      (index, interface), 1);
+
+               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
+                              gmx_cfg.u64);
+       }
+}
+
+/**
+ * Set the hardware MAC address for a device
+ *
+ * @dev:    Device to change the MAC address for
+ * @addr:   Address structure to change it too. MAC address is addr + 2.
+ * Returns Zero on success
+ */
+static int cvm_oct_common_set_mac_address(struct net_device *dev, void *addr)
+{
+       struct octeon_ethernet *priv = netdev_priv(dev);
+       union cvmx_gmxx_prtx_cfg gmx_cfg;
+       int interface = INTERFACE(priv->port);
+       int index = INDEX(priv->port);
+
+       memcpy(dev->dev_addr, addr + 2, 6);
+
+       if ((interface < 2)
+           && (cvmx_helper_interface_get_mode(interface) !=
+               CVMX_HELPER_INTERFACE_MODE_SPI)) {
+               int i;
+               uint8_t *ptr = addr;
+               uint64_t mac = 0;
+               for (i = 0; i < 6; i++)
+                       mac = (mac << 8) | (uint64_t) (ptr[i + 2]);
+
+               gmx_cfg.u64 =
+                   cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
+               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
+                              gmx_cfg.u64 & ~1ull);
+
+               cvmx_write_csr(CVMX_GMXX_SMACX(index, interface), mac);
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM0(index, interface),
+                              ptr[2]);
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM1(index, interface),
+                              ptr[3]);
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM2(index, interface),
+                              ptr[4]);
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM3(index, interface),
+                              ptr[5]);
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM4(index, interface),
+                              ptr[6]);
+               cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM5(index, interface),
+                              ptr[7]);
+               cvm_oct_common_set_multicast_list(dev);
+               cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
+                              gmx_cfg.u64);
+       }
+       return 0;
+}
+
+/**
+ * Per network device initialization
+ *
+ * @dev:    Device to initialize
+ * Returns Zero on success
+ */
+int cvm_oct_common_init(struct net_device *dev)
+{
+       static int count;
+       char mac[8] = { 0x00, 0x00,
+               octeon_bootinfo->mac_addr_base[0],
+               octeon_bootinfo->mac_addr_base[1],
+               octeon_bootinfo->mac_addr_base[2],
+               octeon_bootinfo->mac_addr_base[3],
+               octeon_bootinfo->mac_addr_base[4],
+               octeon_bootinfo->mac_addr_base[5] + count
+       };
+       struct octeon_ethernet *priv = netdev_priv(dev);
+
+       /*
+        * Force the interface to use the POW send if always_use_pow
+        * was specified or it is in the pow send list.
+        */
+       if ((pow_send_group != -1)
+           && (always_use_pow || strstr(pow_send_list, dev->name)))
+               priv->queue = -1;
+
+       if (priv->queue != -1 && USE_HW_TCPUDP_CHECKSUM)
+               dev->features |= NETIF_F_IP_CSUM;
+
+       count++;
+
+       /* We do our own locking, Linux doesn't need to */
+       dev->features |= NETIF_F_LLTX;
+       SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
+
+       cvm_oct_mdio_setup_device(dev);
+       dev->netdev_ops->ndo_set_mac_address(dev, mac);
+       dev->netdev_ops->ndo_change_mtu(dev, dev->mtu);
+
+       /*
+        * Zero out stats for port so we won't mistakenly show
+        * counters from the bootloader.
+        */
+       memset(dev->netdev_ops->ndo_get_stats(dev), 0,
+              sizeof(struct net_device_stats));
+
+       return 0;
+}
+
+void cvm_oct_common_uninit(struct net_device *dev)
+{
+       /* Currently nothing to do */
+}
+
+static const struct net_device_ops cvm_oct_npi_netdev_ops = {
+       .ndo_init               = cvm_oct_common_init,
+       .ndo_uninit             = cvm_oct_common_uninit,
+       .ndo_start_xmit         = cvm_oct_xmit,
+       .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
+       .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
+       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_change_mtu         = cvm_oct_common_change_mtu,
+       .ndo_get_stats          = cvm_oct_common_get_stats,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = cvm_oct_poll_controller,
+#endif
+};
+static const struct net_device_ops cvm_oct_xaui_netdev_ops = {
+       .ndo_init               = cvm_oct_xaui_init,
+       .ndo_uninit             = cvm_oct_xaui_uninit,
+       .ndo_open               = cvm_oct_xaui_open,
+       .ndo_stop               = cvm_oct_xaui_stop,
+       .ndo_start_xmit         = cvm_oct_xmit,
+       .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
+       .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
+       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_change_mtu         = cvm_oct_common_change_mtu,
+       .ndo_get_stats          = cvm_oct_common_get_stats,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = cvm_oct_poll_controller,
+#endif
+};
+static const struct net_device_ops cvm_oct_sgmii_netdev_ops = {
+       .ndo_init               = cvm_oct_sgmii_init,
+       .ndo_uninit             = cvm_oct_sgmii_uninit,
+       .ndo_open               = cvm_oct_sgmii_open,
+       .ndo_stop               = cvm_oct_sgmii_stop,
+       .ndo_start_xmit         = cvm_oct_xmit,
+       .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
+       .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
+       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_change_mtu         = cvm_oct_common_change_mtu,
+       .ndo_get_stats          = cvm_oct_common_get_stats,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = cvm_oct_poll_controller,
+#endif
+};
+static const struct net_device_ops cvm_oct_spi_netdev_ops = {
+       .ndo_init               = cvm_oct_spi_init,
+       .ndo_uninit             = cvm_oct_spi_uninit,
+       .ndo_start_xmit         = cvm_oct_xmit,
+       .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
+       .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
+       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_change_mtu         = cvm_oct_common_change_mtu,
+       .ndo_get_stats          = cvm_oct_common_get_stats,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = cvm_oct_poll_controller,
+#endif
+};
+static const struct net_device_ops cvm_oct_rgmii_netdev_ops = {
+       .ndo_init               = cvm_oct_rgmii_init,
+       .ndo_uninit             = cvm_oct_rgmii_uninit,
+       .ndo_open               = cvm_oct_rgmii_open,
+       .ndo_stop               = cvm_oct_rgmii_stop,
+       .ndo_start_xmit         = cvm_oct_xmit,
+       .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
+       .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
+       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_change_mtu         = cvm_oct_common_change_mtu,
+       .ndo_get_stats          = cvm_oct_common_get_stats,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = cvm_oct_poll_controller,
+#endif
+};
+static const struct net_device_ops cvm_oct_pow_netdev_ops = {
+       .ndo_init               = cvm_oct_common_init,
+       .ndo_start_xmit         = cvm_oct_xmit_pow,
+       .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
+       .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
+       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_change_mtu         = cvm_oct_common_change_mtu,
+       .ndo_get_stats          = cvm_oct_common_get_stats,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = cvm_oct_poll_controller,
+#endif
+};
+
 /**
  * Module/ driver initialization. Creates the linux network
  * devices.
@@ -303,7 +663,7 @@ static int __init cvm_oct_init_module(void)
                        struct octeon_ethernet *priv = netdev_priv(dev);
                        memset(priv, 0, sizeof(struct octeon_ethernet));
 
-                       dev->init = cvm_oct_common_init;
+                       dev->netdev_ops = &cvm_oct_pow_netdev_ops;
                        priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED;
                        priv->port = CVMX_PIP_NUM_INPUT_PORTS;
                        priv->queue = -1;
@@ -372,44 +732,38 @@ static int __init cvm_oct_init_module(void)
                                break;
 
                        case CVMX_HELPER_INTERFACE_MODE_NPI:
-                               dev->init = cvm_oct_common_init;
-                               dev->uninit = cvm_oct_common_uninit;
+                               dev->netdev_ops = &cvm_oct_npi_netdev_ops;
                                strcpy(dev->name, "npi%d");
                                break;
 
                        case CVMX_HELPER_INTERFACE_MODE_XAUI:
-                               dev->init = cvm_oct_xaui_init;
-                               dev->uninit = cvm_oct_xaui_uninit;
+                               dev->netdev_ops = &cvm_oct_xaui_netdev_ops;
                                strcpy(dev->name, "xaui%d");
                                break;
 
                        case CVMX_HELPER_INTERFACE_MODE_LOOP:
-                               dev->init = cvm_oct_common_init;
-                               dev->uninit = cvm_oct_common_uninit;
+                               dev->netdev_ops = &cvm_oct_npi_netdev_ops;
                                strcpy(dev->name, "loop%d");
                                break;
 
                        case CVMX_HELPER_INTERFACE_MODE_SGMII:
-                               dev->init = cvm_oct_sgmii_init;
-                               dev->uninit = cvm_oct_sgmii_uninit;
+                               dev->netdev_ops = &cvm_oct_sgmii_netdev_ops;
                                strcpy(dev->name, "eth%d");
                                break;
 
                        case CVMX_HELPER_INTERFACE_MODE_SPI:
-                               dev->init = cvm_oct_spi_init;
-                               dev->uninit = cvm_oct_spi_uninit;
+                               dev->netdev_ops = &cvm_oct_spi_netdev_ops;
                                strcpy(dev->name, "spi%d");
                                break;
 
                        case CVMX_HELPER_INTERFACE_MODE_RGMII:
                        case CVMX_HELPER_INTERFACE_MODE_GMII:
-                               dev->init = cvm_oct_rgmii_init;
-                               dev->uninit = cvm_oct_rgmii_uninit;
+                               dev->netdev_ops = &cvm_oct_rgmii_netdev_ops;
                                strcpy(dev->name, "eth%d");
                                break;
                        }
 
-                       if (!dev->init) {
+                       if (!dev->netdev_ops) {
                                kfree(dev);
                        } else if (register_netdev(dev) < 0) {
                                pr_err("Failed to register ethernet device "
index b3199076ef5e4af6a36cdd24552f6052d77fb8c0..3aef9878fc0ad0cb178f4f9537761bb8ecb0303d 100644 (file)
@@ -111,12 +111,23 @@ static inline int cvm_oct_transmit(struct net_device *dev,
 
 extern int cvm_oct_rgmii_init(struct net_device *dev);
 extern void cvm_oct_rgmii_uninit(struct net_device *dev);
+extern int cvm_oct_rgmii_open(struct net_device *dev);
+extern int cvm_oct_rgmii_stop(struct net_device *dev);
+
 extern int cvm_oct_sgmii_init(struct net_device *dev);
 extern void cvm_oct_sgmii_uninit(struct net_device *dev);
+extern int cvm_oct_sgmii_open(struct net_device *dev);
+extern int cvm_oct_sgmii_stop(struct net_device *dev);
+
 extern int cvm_oct_spi_init(struct net_device *dev);
 extern void cvm_oct_spi_uninit(struct net_device *dev);
 extern int cvm_oct_xaui_init(struct net_device *dev);
 extern void cvm_oct_xaui_uninit(struct net_device *dev);
+extern int cvm_oct_xaui_open(struct net_device *dev);
+extern int cvm_oct_xaui_stop(struct net_device *dev);
+
+extern int cvm_oct_common_init(struct net_device *dev);
+extern void cvm_oct_common_uninit(struct net_device *dev);
 
 extern int always_use_pow;
 extern int pow_send_group;
index 559f8784acf36e52a47a3ee277f0d24eb47c7c06..9052bcb4f528d206437f9a3e3c4212b211938d12 100644 (file)
@@ -501,7 +501,7 @@ int hwarc_filter_event_WUSB_0100(struct uwb_rc *rc, struct uwb_rceb **header,
        int result = -ENOANO;
        struct uwb_rceb *rceb = *header;
        int event = le16_to_cpu(rceb->wEvent);
-       size_t event_size;
+       ssize_t event_size;
        size_t core_size, offset;
 
        if (rceb->bEventType != UWB_RC_CET_GENERAL)
index cd2035768b47824d3c5b3e80c5b292cd2d7b4d60..86a853b84119d54febd7c895683f42a78235b991 100644 (file)
@@ -326,7 +326,7 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp,
        int result = -EINVAL;
        struct ethhdr *eth_hdr = (void *) skb->data;
 
-       if (is_broadcast_ether_addr(eth_hdr->h_dest)) {
+       if (is_multicast_ether_addr(eth_hdr->h_dest)) {
                result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb);
                if (result < 0) {
                        if (printk_ratelimit())
index 1f09d4e4144c23598f3cf7560a28630fa9f6a6d3..59f708efe25f061b0573358d028d317622fe41a7 100644 (file)
@@ -68,6 +68,34 @@ int w1_ds2760_write(struct device *dev, char *buf, int addr, size_t count)
        return w1_ds2760_io(dev, buf, addr, count, 1);
 }
 
+static int w1_ds2760_eeprom_cmd(struct device *dev, int addr, int cmd)
+{
+       struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+
+       if (!dev)
+               return -EINVAL;
+
+       mutex_lock(&sl->master->mutex);
+
+       if (w1_reset_select_slave(sl) == 0) {
+               w1_write_8(sl->master, cmd);
+               w1_write_8(sl->master, addr);
+       }
+
+       mutex_unlock(&sl->master->mutex);
+       return 0;
+}
+
+int w1_ds2760_store_eeprom(struct device *dev, int addr)
+{
+       return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_COPY_DATA);
+}
+
+int w1_ds2760_recall_eeprom(struct device *dev, int addr)
+{
+       return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_RECALL_DATA);
+}
+
 static ssize_t w1_ds2760_read_bin(struct kobject *kobj,
                                  struct bin_attribute *bin_attr,
                                  char *buf, loff_t off, size_t count)
@@ -200,6 +228,8 @@ static void __exit w1_ds2760_exit(void)
 
 EXPORT_SYMBOL(w1_ds2760_read);
 EXPORT_SYMBOL(w1_ds2760_write);
+EXPORT_SYMBOL(w1_ds2760_store_eeprom);
+EXPORT_SYMBOL(w1_ds2760_recall_eeprom);
 
 module_init(w1_ds2760_init);
 module_exit(w1_ds2760_exit);
index f1302429cb0273fa4f7e1871e2d5767a2f97d7e1..58e774141568eb11846fab47ae6e9c822993d673 100644 (file)
 
 #define DS2760_PROTECTION_REG          0x00
 #define DS2760_STATUS_REG              0x01
+       #define DS2760_STATUS_IE        (1 << 2)
+       #define DS2760_STATUS_SWEN      (1 << 3)
+       #define DS2760_STATUS_RNAOP     (1 << 4)
+       #define DS2760_STATUS_PMOD      (1 << 5)
 #define DS2760_EEPROM_REG              0x07
 #define DS2760_SPECIAL_FEATURE_REG     0x08
 #define DS2760_VOLTAGE_MSB             0x0c
@@ -38,6 +42,7 @@
 #define DS2760_EEPROM_BLOCK0           0x20
 #define DS2760_ACTIVE_FULL             0x20
 #define DS2760_EEPROM_BLOCK1           0x30
+#define DS2760_STATUS_WRITE_REG                0x31
 #define DS2760_RATED_CAPACITY          0x32
 #define DS2760_CURRENT_OFFSET_BIAS     0x33
 #define DS2760_ACTIVE_EMPTY            0x3b
@@ -46,5 +51,7 @@ extern int w1_ds2760_read(struct device *dev, char *buf, int addr,
                          size_t count);
 extern int w1_ds2760_write(struct device *dev, char *buf, int addr,
                           size_t count);
+extern int w1_ds2760_store_eeprom(struct device *dev, int addr);
+extern int w1_ds2760_recall_eeprom(struct device *dev, int addr);
 
 #endif /* !__w1_ds2760_h__ */
index 603972576f0f3b8e704dc8cf8061bc3bfccae51a..f128427b995b108382fde3b3fea6d0582f841666 100644 (file)
 
 #ifdef CONFIG_FS_POSIX_ACL
 
-static void btrfs_update_cached_acl(struct inode *inode,
-                                   struct posix_acl **p_acl,
-                                   struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
-               posix_acl_release(*p_acl);
-       *p_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 {
        int size;
        const char *name;
        char *value = NULL;
-       struct posix_acl *acl = NULL, **p_acl;
+       struct posix_acl *acl;
+
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
 
        switch (type) {
        case ACL_TYPE_ACCESS:
                name = POSIX_ACL_XATTR_ACCESS;
-               p_acl = &BTRFS_I(inode)->i_acl;
                break;
        case ACL_TYPE_DEFAULT:
                name = POSIX_ACL_XATTR_DEFAULT;
-               p_acl = &BTRFS_I(inode)->i_default_acl;
                break;
        default:
-               return ERR_PTR(-EINVAL);
+               BUG();
        }
 
-       /* Handle the cached NULL acl case without locking */
-       acl = ACCESS_ONCE(*p_acl);
-       if (!acl)
-               return acl;
-
-       spin_lock(&inode->i_lock);
-       acl = *p_acl;
-       if (acl != BTRFS_ACL_NOT_CACHED)
-               acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-
-       if (acl != BTRFS_ACL_NOT_CACHED)
-               return acl;
-
        size = __btrfs_getxattr(inode, name, "", 0);
        if (size > 0) {
                value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
                size = __btrfs_getxattr(inode, name, value, size);
                if (size > 0) {
                        acl = posix_acl_from_xattr(value, size);
-                       btrfs_update_cached_acl(inode, p_acl, acl);
+                       set_cached_acl(inode, type, acl);
                }
                kfree(value);
        } else if (size == -ENOENT || size == -ENODATA || size == 0) {
                /* FIXME, who returns -ENOENT?  I think nobody */
                acl = NULL;
-               btrfs_update_cached_acl(inode, p_acl, acl);
+               set_cached_acl(inode, type, acl);
        } else {
                acl = ERR_PTR(-EIO);
        }
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
        int ret, size = 0;
        const char *name;
-       struct posix_acl **p_acl;
        char *value = NULL;
        mode_t mode;
 
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
                ret = 0;
                inode->i_mode = mode;
                name = POSIX_ACL_XATTR_ACCESS;
-               p_acl = &BTRFS_I(inode)->i_acl;
                break;
        case ACL_TYPE_DEFAULT:
                if (!S_ISDIR(inode->i_mode))
                        return acl ? -EINVAL : 0;
                name = POSIX_ACL_XATTR_DEFAULT;
-               p_acl = &BTRFS_I(inode)->i_default_acl;
                break;
        default:
                return -EINVAL;
@@ -172,7 +146,7 @@ out:
        kfree(value);
 
        if (!ret)
-               btrfs_update_cached_acl(inode, p_acl, acl);
+               set_cached_acl(inode, type, acl);
 
        return ret;
 }
index acb4f351758256752066f565c78ebca3d583df5c..ea1ea0af8c0e6cf635c3060a08e1dd648a6fb48f 100644 (file)
@@ -53,10 +53,6 @@ struct btrfs_inode {
        /* used to order data wrt metadata */
        struct btrfs_ordered_inode_tree ordered_tree;
 
-       /* standard acl pointers */
-       struct posix_acl *i_acl;
-       struct posix_acl *i_default_acl;
-
        /* for keeping track of orphaned inodes */
        struct list_head i_orphan;
 
index 03441a99ea38ad95aa5b814d99f9a42a4a74ad0d..2779c2f5360ac6ea4678db5e14aa135c93507b4c 100644 (file)
@@ -41,8 +41,6 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_MAGIC "_BHRfS_M"
 
-#define BTRFS_ACL_NOT_CACHED    ((void *)-1)
-
 #define BTRFS_MAX_LEVEL 8
 
 #define BTRFS_COMPAT_EXTENT_TREE_V0
index 8612b3a098111a818f54b3637d1af7487a6c2707..dbe1aabf96cd918d43e31160e5953621f8835983 100644 (file)
@@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
         * any xattrs or acls
         */
        maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
-       if (!maybe_acls) {
-               BTRFS_I(inode)->i_acl = NULL;
-               BTRFS_I(inode)->i_default_acl = NULL;
-       }
+       if (!maybe_acls)
+               cache_no_acl(inode);
 
        BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
                                                alloc_group_block, 0);
@@ -3141,9 +3139,6 @@ static noinline void init_btrfs_i(struct inode *inode)
 {
        struct btrfs_inode *bi = BTRFS_I(inode);
 
-       bi->i_acl = BTRFS_ACL_NOT_CACHED;
-       bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
-
        bi->generation = 0;
        bi->sequence = 0;
        bi->last_trans = 0;
@@ -4640,8 +4635,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->last_trans = 0;
        ei->logged_trans = 0;
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
-       ei->i_acl = BTRFS_ACL_NOT_CACHED;
-       ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
        INIT_LIST_HEAD(&ei->i_orphan);
        INIT_LIST_HEAD(&ei->ordered_operations);
        return &ei->vfs_inode;
@@ -4655,13 +4648,6 @@ void btrfs_destroy_inode(struct inode *inode)
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
 
-       if (BTRFS_I(inode)->i_acl &&
-           BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
-               posix_acl_release(BTRFS_I(inode)->i_acl);
-       if (BTRFS_I(inode)->i_default_acl &&
-           BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
-               posix_acl_release(BTRFS_I(inode)->i_default_acl);
-
        /*
         * Make sure we're properly removed from the ordered operation
         * lists.
index c135202c38b3667b30698114d13d8f49cc076ab2..626c7483b4de4e4f076bd4ab07af690007f9a0ae 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vt.h>
+#include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/ppp_defs.h>
@@ -1779,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
        return sys_ioctl(fd, cmd, (unsigned long)tn);
 }
 
+/* on ia32 l_start is on a 32-bit boundary */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+struct space_resv_32 {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start __attribute__((packed));
+                       /* len == 0 means until end of file */
+       __s64           l_len __attribute__((packed));
+       __s32           l_sysid;
+       __u32           l_pid;
+       __s32           l_pad[4];       /* reserve area */
+};
+
+#define FS_IOC_RESVSP_32               _IOW ('X', 40, struct space_resv_32)
+#define FS_IOC_RESVSP64_32     _IOW ('X', 42, struct space_resv_32)
+
+/* just account for different alignment */
+static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
+{
+       struct space_resv_32    __user *p32 = (void __user *)arg;
+       struct space_resv       __user *p = compat_alloc_user_space(sizeof(*p));
+
+       if (copy_in_user(&p->l_type,    &p32->l_type,   sizeof(s16)) ||
+           copy_in_user(&p->l_whence,  &p32->l_whence, sizeof(s16)) ||
+           copy_in_user(&p->l_start,   &p32->l_start,  sizeof(s64)) ||
+           copy_in_user(&p->l_len,     &p32->l_len,    sizeof(s64)) ||
+           copy_in_user(&p->l_sysid,   &p32->l_sysid,  sizeof(s32)) ||
+           copy_in_user(&p->l_pid,     &p32->l_pid,    sizeof(u32)) ||
+           copy_in_user(&p->l_pad,     &p32->l_pad,    4*sizeof(u32)))
+               return -EFAULT;
+
+       return ioctl_preallocate(file, p);
+}
+#endif
+
 
 typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
                                        unsigned long, struct file *);
@@ -2756,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
        case FIOQSIZE:
                break;
 
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+       case FS_IOC_RESVSP_32:
+       case FS_IOC_RESVSP64_32:
+               error = compat_ioctl_preallocate(filp, arg);
+               goto out_fput;
+#else
+       case FS_IOC_RESVSP:
+       case FS_IOC_RESVSP64:
+               error = ioctl_preallocate(filp, (void __user *)arg);
+               goto out_fput;
+#endif
+
        case FIBMAP:
        case FIGETBSZ:
        case FIONREAD:
index 9b1d285f9fe6eb069c9fc62d02beea8df018cffc..75efb028974b991ebe49886f6b3c1b5f78e604ff 100644 (file)
@@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type devpts_fs_type = {
-       .owner          = THIS_MODULE,
        .name           = "devpts",
        .get_sb         = devpts_get_sb,
        .kill_sb        = devpts_kill_sb,
@@ -564,13 +563,4 @@ static int __init init_devpts_fs(void)
        }
        return err;
 }
-
-static void __exit exit_devpts_fs(void)
-{
-       unregister_filesystem(&devpts_fs_type);
-       mntput(devpts_mnt);
-}
-
 module_init(init_devpts_fs)
-module_exit(exit_devpts_fs)
-MODULE_LICENSE("GPL");
index d46e38cb85c557e273e8206c32b38c9426799ae3..d636e1297cad71e13c6cc6b24e516369bde9bd3c 100644 (file)
@@ -125,37 +125,12 @@ fail:
        return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-       struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
-
-       spin_lock(&inode->i_lock);
-       if (*i_acl != EXT2_ACL_NOT_CACHED)
-               acl = posix_acl_dup(*i_acl);
-       spin_unlock(&inode->i_lock);
-
-       return acl;
-}
-
-static inline void
-ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                  struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*i_acl != EXT2_ACL_NOT_CACHED)
-               posix_acl_release(*i_acl);
-       *i_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
 /*
  * inode->i_mutex: don't care
  */
 static struct posix_acl *
 ext2_get_acl(struct inode *inode, int type)
 {
-       struct ext2_inode_info *ei = EXT2_I(inode);
        int name_index;
        char *value = NULL;
        struct posix_acl *acl;
@@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return NULL;
 
-       switch(type) {
-               case ACL_TYPE_ACCESS:
-                       acl = ext2_iget_acl(inode, &ei->i_acl);
-                       if (acl != EXT2_ACL_NOT_CACHED)
-                               return acl;
-                       name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
-                       break;
-
-               case ACL_TYPE_DEFAULT:
-                       acl = ext2_iget_acl(inode, &ei->i_default_acl);
-                       if (acl != EXT2_ACL_NOT_CACHED)
-                               return acl;
-                       name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
-                       break;
-
-               default:
-                       return ERR_PTR(-EINVAL);
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+               break;
+       case ACL_TYPE_DEFAULT:
+               name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+               break;
+       default:
+               BUG();
        }
        retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
        if (retval > 0) {
@@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl)) {
-               switch(type) {
-                       case ACL_TYPE_ACCESS:
-                               ext2_iset_acl(inode, &ei->i_acl, acl);
-                               break;
+       if (!IS_ERR(acl))
+               set_cached_acl(inode, type, acl);
 
-                       case ACL_TYPE_DEFAULT:
-                               ext2_iset_acl(inode, &ei->i_default_acl, acl);
-                               break;
-               }
-       }
        return acl;
 }
 
@@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type)
 static int
 ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-       struct ext2_inode_info *ei = EXT2_I(inode);
        int name_index;
        void *value = NULL;
        size_t size = 0;
@@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        error = ext2_xattr_set(inode, name_index, "", value, size, 0);
 
        kfree(value);
-       if (!error) {
-               switch(type) {
-                       case ACL_TYPE_ACCESS:
-                               ext2_iset_acl(inode, &ei->i_acl, acl);
-                               break;
-
-                       case ACL_TYPE_DEFAULT:
-                               ext2_iset_acl(inode, &ei->i_default_acl, acl);
-                               break;
-               }
-       }
+       if (!error)
+               set_cached_acl(inode, type, acl);
        return error;
 }
 
index b42cf578554b5fd10ec8c290ea3fca3af05f9020..ecefe478898f9c67df951ce6c9283a15d1346be5 100644 (file)
@@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size)
 
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 
-/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT2_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext2_permission (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
index d988a718aedbad0cbabd35eb73accab9213f1adc..9a8a8e27a063963210056c42feab2b335c89b566 100644 (file)
@@ -46,10 +46,6 @@ struct ext2_inode_info {
         * EAs.
         */
        struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-       struct posix_acl        *i_acl;
-       struct posix_acl        *i_default_acl;
 #endif
        rwlock_t i_meta_lock;
 
index 29ed682061f6453341c48a0c0f5aab34ac42cd89..e27130341d4f9cba4cb476186dc6c6e83b31b4fe 100644 (file)
@@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT2_I(inode);
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-       ei->i_acl = EXT2_ACL_NOT_CACHED;
-       ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-#endif
        ei->i_block_alloc_info = NULL;
 
        raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
index 458999638c3dee387cd4a5b091a73010ff1d8150..1a9ffee47d56c545dcc9420680178fb32ea6c374 100644 (file)
@@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
        ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
        if (!ei)
                return NULL;
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-       ei->i_acl = EXT2_ACL_NOT_CACHED;
-       ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-#endif
        ei->i_block_alloc_info = NULL;
        ei->vfs_inode.i_version = 1;
        return &ei->vfs_inode;
@@ -198,18 +194,6 @@ static void destroy_inodecache(void)
 static void ext2_clear_inode(struct inode *inode)
 {
        struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-       struct ext2_inode_info *ei = EXT2_I(inode);
-
-       if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
-               posix_acl_release(ei->i_acl);
-               ei->i_acl = EXT2_ACL_NOT_CACHED;
-       }
-       if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
-               posix_acl_release(ei->i_default_acl);
-               ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-       }
-#endif
        ext2_discard_reservation(inode);
        EXT2_I(inode)->i_block_alloc_info = NULL;
        if (unlikely(rsv))
index e0c7454517158dde2ee69433526c78eeb5398cd2..e167bae37ef02eb572b3c660a3a412c2bace58e2 100644 (file)
@@ -126,33 +126,6 @@ fail:
        return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-       struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-       if (acl) {
-               spin_lock(&inode->i_lock);
-               acl = *i_acl;
-               if (acl != EXT3_ACL_NOT_CACHED)
-                       acl = posix_acl_dup(acl);
-               spin_unlock(&inode->i_lock);
-       }
-
-       return acl;
-}
-
-static inline void
-ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                  struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*i_acl != EXT3_ACL_NOT_CACHED)
-               posix_acl_release(*i_acl);
-       *i_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -161,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext3_get_acl(struct inode *inode, int type)
 {
-       struct ext3_inode_info *ei = EXT3_I(inode);
        int name_index;
        char *value = NULL;
        struct posix_acl *acl;
@@ -170,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return NULL;
 
-       switch(type) {
-               case ACL_TYPE_ACCESS:
-                       acl = ext3_iget_acl(inode, &ei->i_acl);
-                       if (acl != EXT3_ACL_NOT_CACHED)
-                               return acl;
-                       name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
-                       break;
-
-               case ACL_TYPE_DEFAULT:
-                       acl = ext3_iget_acl(inode, &ei->i_default_acl);
-                       if (acl != EXT3_ACL_NOT_CACHED)
-                               return acl;
-                       name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
-                       break;
-
-               default:
-                       return ERR_PTR(-EINVAL);
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+               break;
+       case ACL_TYPE_DEFAULT:
+               name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+               break;
+       default:
+               BUG();
        }
+
        retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
        if (retval > 0) {
                value = kmalloc(retval, GFP_NOFS);
@@ -203,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl)) {
-               switch(type) {
-                       case ACL_TYPE_ACCESS:
-                               ext3_iset_acl(inode, &ei->i_acl, acl);
-                               break;
+       if (!IS_ERR(acl))
+               set_cached_acl(inode, type, acl);
 
-                       case ACL_TYPE_DEFAULT:
-                               ext3_iset_acl(inode, &ei->i_default_acl, acl);
-                               break;
-               }
-       }
        return acl;
 }
 
@@ -226,7 +187,6 @@ static int
 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
             struct posix_acl *acl)
 {
-       struct ext3_inode_info *ei = EXT3_I(inode);
        int name_index;
        void *value = NULL;
        size_t size = 0;
@@ -271,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
                                      value, size, 0);
 
        kfree(value);
-       if (!error) {
-               switch(type) {
-                       case ACL_TYPE_ACCESS:
-                               ext3_iset_acl(inode, &ei->i_acl, acl);
-                               break;
 
-                       case ACL_TYPE_DEFAULT:
-                               ext3_iset_acl(inode, &ei->i_default_acl, acl);
-                               break;
-               }
-       }
+       if (!error)
+               set_cached_acl(inode, type, acl);
+
        return error;
 }
 
index 42da16b8cac0a82d1bdd06a18c178308068e8d08..07d15a3a59696caceccd7dd89f8eab7ac6a78719 100644 (file)
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
 
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 
-/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT3_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
index 05dea8132fc0f04fa588fe163a5c6d53e0c07769..5f51fed5c750870b8c575fdf7dd8e877fe986bfb 100644 (file)
@@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT3_I(inode);
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       ei->i_acl = EXT3_ACL_NOT_CACHED;
-       ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
        ei->i_block_alloc_info = NULL;
 
        ret = __ext3_get_inode_loc(inode, &iloc, 0);
index 601e881e6105b886de6251fe71acaf51ec66ef8b..524b349c6299ca248b366cf299071939e5843aba 100644 (file)
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
        ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       ei->i_acl = EXT3_ACL_NOT_CACHED;
-       ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
        ei->i_block_alloc_info = NULL;
        ei->vfs_inode.i_version = 1;
        return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
 static void ext3_clear_inode(struct inode *inode)
 {
        struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       if (EXT3_I(inode)->i_acl &&
-                       EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-               posix_acl_release(EXT3_I(inode)->i_acl);
-               EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-       }
-       if (EXT3_I(inode)->i_default_acl &&
-                       EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-               posix_acl_release(EXT3_I(inode)->i_default_acl);
-               EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
-       }
-#endif
        ext3_discard_reservation(inode);
        EXT3_I(inode)->i_block_alloc_info = NULL;
        if (unlikely(rsv))
index 605aeed96d68116589818b5c443b32d7f3ff01b0..f6d8967149ca116602ce77fb35c6762dbb179456 100644 (file)
@@ -126,33 +126,6 @@ fail:
        return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-       struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-       if (acl) {
-               spin_lock(&inode->i_lock);
-               acl = *i_acl;
-               if (acl != EXT4_ACL_NOT_CACHED)
-                       acl = posix_acl_dup(acl);
-               spin_unlock(&inode->i_lock);
-       }
-
-       return acl;
-}
-
-static inline void
-ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-               struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*i_acl != EXT4_ACL_NOT_CACHED)
-               posix_acl_release(*i_acl);
-       *i_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -161,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext4_get_acl(struct inode *inode, int type)
 {
-       struct ext4_inode_info *ei = EXT4_I(inode);
        int name_index;
        char *value = NULL;
        struct posix_acl *acl;
@@ -170,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return NULL;
 
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
        switch (type) {
        case ACL_TYPE_ACCESS:
-               acl = ext4_iget_acl(inode, &ei->i_acl);
-               if (acl != EXT4_ACL_NOT_CACHED)
-                       return acl;
                name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
                break;
-
        case ACL_TYPE_DEFAULT:
-               acl = ext4_iget_acl(inode, &ei->i_default_acl);
-               if (acl != EXT4_ACL_NOT_CACHED)
-                       return acl;
                name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
                break;
-
        default:
-               return ERR_PTR(-EINVAL);
+               BUG();
        }
        retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
        if (retval > 0) {
@@ -203,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
                acl = ERR_PTR(retval);
        kfree(value);
 
-       if (!IS_ERR(acl)) {
-               switch (type) {
-               case ACL_TYPE_ACCESS:
-                       ext4_iset_acl(inode, &ei->i_acl, acl);
-                       break;
+       if (!IS_ERR(acl))
+               set_cached_acl(inode, type, acl);
 
-               case ACL_TYPE_DEFAULT:
-                       ext4_iset_acl(inode, &ei->i_default_acl, acl);
-                       break;
-               }
-       }
        return acl;
 }
 
@@ -226,7 +186,6 @@ static int
 ext4_set_acl(handle_t *handle, struct inode *inode, int type,
             struct posix_acl *acl)
 {
-       struct ext4_inode_info *ei = EXT4_I(inode);
        int name_index;
        void *value = NULL;
        size_t size = 0;
@@ -271,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
                                      value, size, 0);
 
        kfree(value);
-       if (!error) {
-               switch (type) {
-               case ACL_TYPE_ACCESS:
-                       ext4_iset_acl(inode, &ei->i_acl, acl);
-                       break;
+       if (!error)
+               set_cached_acl(inode, type, acl);
 
-               case ACL_TYPE_DEFAULT:
-                       ext4_iset_acl(inode, &ei->i_default_acl, acl);
-                       break;
-               }
-       }
        return error;
 }
 
index cb45257a246e888467f8db4bfb6822b00e7fddd7..949789d2bba66b47805cc9adc008afdd7f150ca5 100644 (file)
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
 
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 
-/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT4_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext4_permission(struct inode *, int);
 extern int ext4_acl_chmod(struct inode *);
index 17b9998680e3b30b479f0417bea5a9401334b606..0ddf7e55abe181e3ac1bd31fea3357bf4c1bf8a6 100644 (file)
@@ -595,10 +595,6 @@ struct ext4_inode_info {
         */
        struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-       struct posix_acl        *i_acl;
-       struct posix_acl        *i_default_acl;
-#endif
 
        struct list_head i_orphan;      /* unlinked but open inodes */
 
index 7c17ae275af4c0a0910bf5e7af7bc243199889de..60a26f3a6f8bd3e2ac5db8cfc220538630b7de6a 100644 (file)
@@ -4453,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT4_I(inode);
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-       ei->i_acl = EXT4_ACL_NOT_CACHED;
-       ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
 
        ret = __ext4_get_inode_loc(inode, &iloc, 0);
        if (ret < 0)
index 8bb9e2d3e4b8dc69cdeb84086dea2f4bcfc4a519..8f4f079e6b9a2a850c459b8677f7911fd326f4ca 100644 (file)
@@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        if (!ei)
                return NULL;
 
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-       ei->i_acl = EXT4_ACL_NOT_CACHED;
-       ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
        ei->vfs_inode.i_version = 1;
        ei->vfs_inode.i_data.writeback_index = 0;
        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -735,18 +731,6 @@ static void destroy_inodecache(void)
 
 static void ext4_clear_inode(struct inode *inode)
 {
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-       if (EXT4_I(inode)->i_acl &&
-                       EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
-               posix_acl_release(EXT4_I(inode)->i_acl);
-               EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
-       }
-       if (EXT4_I(inode)->i_default_acl &&
-                       EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
-               posix_acl_release(EXT4_I(inode)->i_default_acl);
-               EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
-       }
-#endif
        ext4_discard_preallocations(inode);
        if (EXT4_JOURNAL(inode))
                jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
index caf049146ca27a537a7dddc4a38ba02f085a6b35..c54226be52948337fb11a78056e3ee3e90688545 100644 (file)
@@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
 EXPORT_SYMBOL(sb_has_dirty_inodes);
 
 /*
- * Write a single inode's dirty pages and inode data out to disk.
+ * Wait for writeback on an inode to complete.
+ */
+static void inode_wait_for_writeback(struct inode *inode)
+{
+       DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
+       wait_queue_head_t *wqh;
+
+       wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+       do {
+               spin_unlock(&inode_lock);
+               __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+               spin_lock(&inode_lock);
+       } while (inode->i_state & I_SYNC);
+}
+
+/*
+ * Write out an inode's dirty pages.  Called under inode_lock.  Either the
+ * caller has ref on the inode (either via __iget or via syscall against an fd)
+ * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ *
  * If `wait' is set, wait on the writeout.
  *
  * The whole writeout design is quite complex and fragile.  We want to avoid
@@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
  * Called under inode_lock.
  */
 static int
-__sync_single_inode(struct inode *inode, struct writeback_control *wbc)
+writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
-       unsigned dirty;
        struct address_space *mapping = inode->i_mapping;
        int wait = wbc->sync_mode == WB_SYNC_ALL;
+       unsigned dirty;
        int ret;
 
+       if (!atomic_read(&inode->i_count))
+               WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+       else
+               WARN_ON(inode->i_state & I_WILL_FREE);
+
+       if (inode->i_state & I_SYNC) {
+               /*
+                * If this inode is locked for writeback and we are not doing
+                * writeback-for-data-integrity, move it to s_more_io so that
+                * writeback can proceed with the other inodes on s_io.
+                *
+                * We'll have another go at writing back this inode when we
+                * completed a full scan of s_io.
+                */
+               if (!wait) {
+                       requeue_io(inode);
+                       return 0;
+               }
+
+               /*
+                * It's a data-integrity sync.  We must wait.
+                */
+               inode_wait_for_writeback(inode);
+       }
+
        BUG_ON(inode->i_state & I_SYNC);
 
        /* Set I_SYNC, reset I_DIRTY */
@@ -389,50 +433,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
        return ret;
 }
 
-/*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
- */
-static int
-__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
-{
-       wait_queue_head_t *wqh;
-
-       if (!atomic_read(&inode->i_count))
-               WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
-       else
-               WARN_ON(inode->i_state & I_WILL_FREE);
-
-       if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
-               /*
-                * We're skipping this inode because it's locked, and we're not
-                * doing writeback-for-data-integrity.  Move it to s_more_io so
-                * that writeback can proceed with the other inodes on s_io.
-                * We'll have another go at writing back this inode when we
-                * completed a full scan of s_io.
-                */
-               requeue_io(inode);
-               return 0;
-       }
-
-       /*
-        * It's a data-integrity sync.  We must wait.
-        */
-       if (inode->i_state & I_SYNC) {
-               DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
-
-               wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-               do {
-                       spin_unlock(&inode_lock);
-                       __wait_on_bit(wqh, &wq, inode_wait,
-                                                       TASK_UNINTERRUPTIBLE);
-                       spin_lock(&inode_lock);
-               } while (inode->i_state & I_SYNC);
-       }
-       return __sync_single_inode(inode, wbc);
-}
-
 /*
  * Write out a superblock's list of dirty inodes.  A wait will be performed
  * upon no inodes, all inodes or the final one, depending upon sync_mode.
@@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
                BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
                __iget(inode);
                pages_skipped = wbc->pages_skipped;
-               __writeback_single_inode(inode, wbc);
+               writeback_single_inode(inode, wbc);
                if (current_is_pdflush())
                        writeback_release(bdi);
                if (wbc->pages_skipped != pages_skipped) {
@@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
 
        might_sleep();
        spin_lock(&inode_lock);
-       ret = __writeback_single_inode(inode, &wbc);
+       ret = writeback_single_inode(inode, &wbc);
        spin_unlock(&inode_lock);
        if (sync)
                inode_sync_wait(inode);
@@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
        int ret;
 
        spin_lock(&inode_lock);
-       ret = __writeback_single_inode(inode, wbc);
+       ret = writeback_single_inode(inode, wbc);
        spin_unlock(&inode_lock);
        return ret;
 }
index f643be565df8e403a0fc807c4d1af554004c0d62..901bad1e5f1210cce8f2bc2a474345f5b1a4072a 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
+#include <linux/posix_acl.h>
 
 /*
  * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
        }
        inode->i_private = NULL;
        inode->i_mapping = mapping;
+#ifdef CONFIG_FS_POSIX_ACL
+       inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+#endif
 
 #ifdef CONFIG_FSNOTIFY
        inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
        ima_inode_free(inode);
        security_inode_free(inode);
        fsnotify_inode_delete(inode);
+#ifdef CONFIG_FS_POSIX_ACL
+       if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
+               posix_acl_release(inode->i_acl);
+       if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
+               posix_acl_release(inode->i_default_acl);
+#endif
        if (inode->i_sb->s_op->destroy_inode)
                inode->i_sb->s_op->destroy_inode(inode);
        else
@@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode)
        if (inode->i_mode & S_IFDIR) {
                struct file_system_type *type = inode->i_sb->s_type;
 
-               /*
-                * ensure nobody is actually holding i_mutex
-                */
-               mutex_destroy(&inode->i_mutex);
-               mutex_init(&inode->i_mutex);
-               lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+               /* Set new key only if filesystem hasn't already changed it */
+               if (!lockdep_match_class(&inode->i_mutex,
+                   &type->i_mutex_key)) {
+                       /*
+                        * ensure nobody is actually holding i_mutex
+                        */
+                       mutex_destroy(&inode->i_mutex);
+                       mutex_init(&inode->i_mutex);
+                       lockdep_set_class(&inode->i_mutex,
+                                         &type->i_mutex_dir_key);
+               }
        }
 #endif
        /*
index 001f8d3118f27329d89131feef43c7ba0c0b1a85..5612880fcbe7d7436f3579c7c1add7360170e407 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
+#include <linux/falloc.h>
 
 #include <asm/ioctls.h>
 
@@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
 
+/*
+ * This provides compatibility with legacy XFS pre-allocation ioctls
+ * which predate the fallocate syscall.
+ *
+ * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
+ * are used here, rest are ignored.
+ */
+int ioctl_preallocate(struct file *filp, void __user *argp)
+{
+       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct space_resv sr;
+
+       if (copy_from_user(&sr, argp, sizeof(sr)))
+               return -EFAULT;
+
+       switch (sr.l_whence) {
+       case SEEK_SET:
+               break;
+       case SEEK_CUR:
+               sr.l_start += filp->f_pos;
+               break;
+       case SEEK_END:
+               sr.l_start += i_size_read(inode);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+}
+
 static int file_ioctl(struct file *filp, unsigned int cmd,
                unsigned long arg)
 {
@@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
                return ioctl_fibmap(filp, p);
        case FIONREAD:
                return put_user(i_size_read(inode) - filp->f_pos, p);
+       case FS_IOC_RESVSP:
+       case FS_IOC_RESVSP64:
+               return ioctl_preallocate(filp, p);
        }
 
        return vfs_ioctl(filp, cmd, arg);
index 043740dde20c75fb317bfdedac040abedf9004aa..8fcb6239218e53b7b9ced59a0ee9a854f4c77175 100644 (file)
@@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
        return ERR_PTR(-EINVAL);
 }
 
-static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-       struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
-
-       spin_lock(&inode->i_lock);
-       if (*i_acl != JFFS2_ACL_NOT_CACHED)
-               acl = posix_acl_dup(*i_acl);
-       spin_unlock(&inode->i_lock);
-       return acl;
-}
-
-static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*i_acl != JFFS2_ACL_NOT_CACHED)
-               posix_acl_release(*i_acl);
-       *i_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 {
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
        struct posix_acl *acl;
        char *value = NULL;
        int rc, xprefix;
 
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
        switch (type) {
        case ACL_TYPE_ACCESS:
-               acl = jffs2_iget_acl(inode, &f->i_acl_access);
-               if (acl != JFFS2_ACL_NOT_CACHED)
-                       return acl;
                xprefix = JFFS2_XPREFIX_ACL_ACCESS;
                break;
        case ACL_TYPE_DEFAULT:
-               acl = jffs2_iget_acl(inode, &f->i_acl_default);
-               if (acl != JFFS2_ACL_NOT_CACHED)
-                       return acl;
                xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
                break;
        default:
-               return ERR_PTR(-EINVAL);
+               BUG();
        }
        rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
        if (rc > 0) {
@@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
        }
        if (value)
                kfree(value);
-       if (!IS_ERR(acl)) {
-               switch (type) {
-               case ACL_TYPE_ACCESS:
-                       jffs2_iset_acl(inode, &f->i_acl_access, acl);
-                       break;
-               case ACL_TYPE_DEFAULT:
-                       jffs2_iset_acl(inode, &f->i_acl_default, acl);
-                       break;
-               }
-       }
+       if (!IS_ERR(acl))
+               set_cached_acl(inode, type, acl);
        return acl;
 }
 
@@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
 
 static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
        int rc, xprefix;
 
        if (S_ISLNK(inode->i_mode))
@@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                return -EINVAL;
        }
        rc = __jffs2_set_acl(inode, xprefix, acl);
-       if (!rc) {
-               switch(type) {
-               case ACL_TYPE_ACCESS:
-                       jffs2_iset_acl(inode, &f->i_acl_access, acl);
-                       break;
-               case ACL_TYPE_DEFAULT:
-                       jffs2_iset_acl(inode, &f->i_acl_default, acl);
-                       break;
-               }
-       }
+       if (!rc)
+               set_cached_acl(inode, type, acl);
        return rc;
 }
 
@@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask)
 
 int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 {
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
        struct posix_acl *acl, *clone;
        int rc;
 
-       f->i_acl_default = NULL;
-       f->i_acl_access = NULL;
+       cache_no_acl(inode);
 
        if (S_ISLNK(*i_mode))
                return 0;       /* Symlink always has no-ACL */
@@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
                *i_mode &= ~current_umask();
        } else {
                if (S_ISDIR(*i_mode))
-                       jffs2_iset_acl(inode, &f->i_acl_default, acl);
+                       set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
 
                clone = posix_acl_clone(acl, GFP_KERNEL);
                if (!clone)
@@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
                        return rc;
                }
                if (rc > 0)
-                       jffs2_iset_acl(inode, &f->i_acl_access, clone);
+                       set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 
                posix_acl_release(clone);
        }
@@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 
 int jffs2_init_acl_post(struct inode *inode)
 {
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
        int rc;
 
-       if (f->i_acl_default) {
-               rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default);
+       if (inode->i_default_acl) {
+               rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl);
                if (rc)
                        return rc;
        }
 
-       if (f->i_acl_access) {
-               rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access);
+       if (inode->i_acl) {
+               rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl);
                if (rc)
                        return rc;
        }
@@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode)
        return 0;
 }
 
-void jffs2_clear_acl(struct jffs2_inode_info *f)
-{
-       if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
-               posix_acl_release(f->i_acl_access);
-               f->i_acl_access = JFFS2_ACL_NOT_CACHED;
-       }
-       if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
-               posix_acl_release(f->i_acl_default);
-               f->i_acl_default = JFFS2_ACL_NOT_CACHED;
-       }
-}
-
 int jffs2_acl_chmod(struct inode *inode)
 {
        struct posix_acl *acl, *clone;
index 8ca058aed3840fa5f8c13c5b733127c473899739..fc929f2a14f6bfe16d1554767f67abe52986d137 100644 (file)
@@ -26,13 +26,10 @@ struct jffs2_acl_header {
 
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
-#define JFFS2_ACL_NOT_CACHED ((void *)-1)
-
 extern int jffs2_permission(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
-extern void jffs2_clear_acl(struct jffs2_inode_info *);
 
 extern struct xattr_handler jffs2_acl_access_xattr_handler;
 extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
 #define jffs2_acl_chmod(inode)                 (0)
 #define jffs2_init_acl_pre(dir_i,inode,mode)   (0)
 #define jffs2_init_acl_post(inode)             (0)
-#define jffs2_clear_acl(f)
 
 #endif /* CONFIG_JFFS2_FS_POSIX_ACL */
index 4c41db91eaa45bffb0566d401d4a915abea4e32b..c6923da98263331c39dd3eea3d5c3c5f6232f39a 100644 (file)
@@ -50,10 +50,6 @@ struct jffs2_inode_info {
        uint16_t flags;
        uint8_t usercompr;
        struct inode vfs_inode;
-#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-       struct posix_acl *i_acl_access;
-       struct posix_acl *i_acl_default;
-#endif
 };
 
 #endif /* _JFFS2_FS_I */
index 2228380c47b9e15435e05dd2e6768af0b1b80f6e..a7f03b7ebcb3c981777de0f64572957ed04ff03a 100644 (file)
@@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
        f->target = NULL;
        f->flags = 0;
        f->usercompr = 0;
-#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-       f->i_acl_access = JFFS2_ACL_NOT_CACHED;
-       f->i_acl_default = JFFS2_ACL_NOT_CACHED;
-#endif
 }
 
 
index 1fc1e92356eeb8f2a5c115082edf0109bb37a6db..1a80301004b8d82806c6dcdbf90a9bb4cbd71d5a 100644 (file)
@@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
        struct jffs2_full_dirent *fd, *fds;
        int deleted;
 
-       jffs2_clear_acl(f);
        jffs2_xattr_delete_inode(c, f->inocache);
        mutex_lock(&f->sem);
        deleted = f->inocache && !f->inocache->pino_nlink;
index 06ca1b8d205459e2a2bbbfb35282d0f576ee788c..91fa3ad6e8c2dc01144bcf1ab56c250a9c9e54bb 100644 (file)
@@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 {
        struct posix_acl *acl;
        char *ea_name;
-       struct jfs_inode_info *ji = JFS_IP(inode);
-       struct posix_acl **p_acl;
        int size;
        char *value = NULL;
 
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
        switch(type) {
                case ACL_TYPE_ACCESS:
                        ea_name = POSIX_ACL_XATTR_ACCESS;
-                       p_acl = &ji->i_acl;
                        break;
                case ACL_TYPE_DEFAULT:
                        ea_name = POSIX_ACL_XATTR_DEFAULT;
-                       p_acl = &ji->i_default_acl;
                        break;
                default:
                        return ERR_PTR(-EINVAL);
        }
 
-       if (*p_acl != JFS_ACL_NOT_CACHED)
-               return posix_acl_dup(*p_acl);
-
        size = __jfs_getxattr(inode, ea_name, NULL, 0);
 
        if (size > 0) {
@@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
        }
 
        if (size < 0) {
-               if (size == -ENODATA) {
-                       *p_acl = NULL;
+               if (size == -ENODATA)
                        acl = NULL;
-               else
+               else
                        acl = ERR_PTR(size);
        } else {
                acl = posix_acl_from_xattr(value, size);
-               if (!IS_ERR(acl))
-                       *p_acl = posix_acl_dup(acl);
        }
        kfree(value);
+       if (!IS_ERR(acl)) {
+               set_cached_acl(inode, type, acl);
+               posix_acl_release(acl);
+       }
        return acl;
 }
 
@@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
                       struct posix_acl *acl)
 {
        char *ea_name;
-       struct jfs_inode_info *ji = JFS_IP(inode);
-       struct posix_acl **p_acl;
        int rc;
        int size = 0;
        char *value = NULL;
@@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
        switch(type) {
                case ACL_TYPE_ACCESS:
                        ea_name = POSIX_ACL_XATTR_ACCESS;
-                       p_acl = &ji->i_acl;
                        break;
                case ACL_TYPE_DEFAULT:
                        ea_name = POSIX_ACL_XATTR_DEFAULT;
-                       p_acl = &ji->i_default_acl;
                        if (!S_ISDIR(inode->i_mode))
                                return acl ? -EACCES : 0;
                        break;
@@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 out:
        kfree(value);
 
-       if (!rc) {
-               if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED))
-                       posix_acl_release(*p_acl);
-               *p_acl = posix_acl_dup(acl);
-       }
+       if (!rc)
+               set_cached_acl(inode, type, acl);
+
        return rc;
 }
 
 static int jfs_check_acl(struct inode *inode, int mask)
 {
-       struct jfs_inode_info *ji = JFS_IP(inode);
+       struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
 
-       if (ji->i_acl == JFS_ACL_NOT_CACHED) {
-               struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
-               if (IS_ERR(acl))
-                       return PTR_ERR(acl);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl) {
+               int error = posix_acl_permission(inode, acl, mask);
                posix_acl_release(acl);
+               return error;
        }
 
-       if (ji->i_acl)
-               return posix_acl_permission(inode, ji->i_acl, mask);
        return -EAGAIN;
 }
 
index 439901d205feaf1b1a0243ea6141490267f1be30..1439f119ec830876ded18b7c5bf66b2ed1fdc80d 100644 (file)
@@ -74,10 +74,6 @@ struct jfs_inode_info {
        /* xattr_sem allows us to access the xattrs without taking i_mutex */
        struct rw_semaphore xattr_sem;
        lid_t   xtlid;          /* lid of xtree lock on directory */
-#ifdef CONFIG_JFS_POSIX_ACL
-       struct posix_acl *i_acl;
-       struct posix_acl *i_default_acl;
-#endif
        union {
                struct {
                        xtpage_t _xtroot;       /* 288: xtree root */
@@ -107,8 +103,6 @@ struct jfs_inode_info {
 #define i_inline u.link._inline
 #define i_inline_ea u.link._inline_ea
 
-#define JFS_ACL_NOT_CACHED ((void *)-1)
-
 #define IREAD_LOCK(ip, subclass) \
        down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
 #define IREAD_UNLOCK(ip)       up_read(&JFS_IP(ip)->rdwrlock)
index 09b1b6ee21861507e834d7e32322db15d5b957bb..37e6dcda8fc84f587508f8db58abdecee69524e1 100644 (file)
@@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode)
                ji->active_ag = -1;
        }
        spin_unlock_irq(&ji->ag_lock);
-
-#ifdef CONFIG_JFS_POSIX_ACL
-       if (ji->i_acl != JFS_ACL_NOT_CACHED) {
-               posix_acl_release(ji->i_acl);
-               ji->i_acl = JFS_ACL_NOT_CACHED;
-       }
-       if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
-               posix_acl_release(ji->i_default_acl);
-               ji->i_default_acl = JFS_ACL_NOT_CACHED;
-       }
-#endif
-
        kmem_cache_free(jfs_inode_cachep, ji);
 }
 
@@ -798,10 +786,6 @@ static void init_once(void *foo)
        init_rwsem(&jfs_ip->xattr_sem);
        spin_lock_init(&jfs_ip->ag_lock);
        jfs_ip->active_ag = -1;
-#ifdef CONFIG_JFS_POSIX_ACL
-       jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
-       jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
-#endif
        inode_init_once(&jfs_ip->vfs_inode);
 }
 
index 61dfa8173ebccdba37a099d1b2299716b6e89218..fad364548bc9e3716b08dc0f6ca66a58d1f9b56c 100644 (file)
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
                /*
                 * We're changing the ACL.  Get rid of the cached one
                 */
-               acl =JFS_IP(inode)->i_acl;
-               if (acl != JFS_ACL_NOT_CACHED)
-                       posix_acl_release(acl);
-               JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
+               forget_cached_acl(inode, ACL_TYPE_ACCESS);
 
                return 0;
        } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
                /*
                 * We're changing the default ACL.  Get rid of the cached one
                 */
-               acl =JFS_IP(inode)->i_default_acl;
-               if (acl && (acl != JFS_ACL_NOT_CACHED))
-                       posix_acl_release(acl);
-               JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
+               forget_cached_acl(inode, ACL_TYPE_DEFAULT);
 
                return 0;
        }
index 527119afb6a5cca3bea37efde7e6206e04052795..5b961eb71cbf9781988b50559ff10842f49e483a 100644 (file)
@@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname,
        if (error)
                return ERR_PTR(error);
        error = path_walk(pathname, &nd);
-       if (error)
+       if (error) {
+               if (nd.root.mnt)
+                       path_put(&nd.root);
                return ERR_PTR(error);
+       }
        if (unlikely(!audit_dummy_context()))
                audit_inode(pathname, nd.path.dentry);
 
@@ -1759,6 +1762,8 @@ do_last:
                }
                filp = nameidata_to_filp(&nd, open_flag);
                mnt_drop_write(nd.path.mnt);
+               if (nd.root.mnt)
+                       path_put(&nd.root);
                return filp;
        }
 
@@ -1819,6 +1824,8 @@ ok:
         */
        if (will_write)
                mnt_drop_write(nd.path.mnt);
+       if (nd.root.mnt)
+               path_put(&nd.root);
        return filp;
 
 exit_mutex_unlock:
@@ -1859,6 +1866,8 @@ do_link:
                 * with "intent.open".
                 */
                release_open_intent(&nd);
+               if (nd.root.mnt)
+                       path_put(&nd.root);
                return ERR_PTR(error);
        }
        nd.flags &= ~LOOKUP_PARENT;
index a7bea8c8bd469e7760aa1dc098478eef5b1be518..3dc283fd4716beacfe037839d2fc5fd5974c925d 100644 (file)
@@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 static int event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
+static int mnt_id_start = 0;
+static int mnt_group_start = 1;
 
 static struct list_head *mount_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
 retry:
        ida_pre_get(&mnt_id_ida, GFP_KERNEL);
        spin_lock(&vfsmount_lock);
-       res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
+       res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
+       if (!res)
+               mnt_id_start = mnt->mnt_id + 1;
        spin_unlock(&vfsmount_lock);
        if (res == -EAGAIN)
                goto retry;
@@ -79,8 +83,11 @@ retry:
 
 static void mnt_free_id(struct vfsmount *mnt)
 {
+       int id = mnt->mnt_id;
        spin_lock(&vfsmount_lock);
-       ida_remove(&mnt_id_ida, mnt->mnt_id);
+       ida_remove(&mnt_id_ida, id);
+       if (mnt_id_start > id)
+               mnt_id_start = id;
        spin_unlock(&vfsmount_lock);
 }
 
@@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt)
  */
 static int mnt_alloc_group_id(struct vfsmount *mnt)
 {
+       int res;
+
        if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
                return -ENOMEM;
 
-       return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
+       res = ida_get_new_above(&mnt_group_ida,
+                               mnt_group_start,
+                               &mnt->mnt_group_id);
+       if (!res)
+               mnt_group_start = mnt->mnt_group_id + 1;
+
+       return res;
 }
 
 /*
@@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
  */
 void mnt_release_group_id(struct vfsmount *mnt)
 {
-       ida_remove(&mnt_group_ida, mnt->mnt_group_id);
+       int id = mnt->mnt_group_id;
+       ida_remove(&mnt_group_ida, id);
+       if (mnt_group_start > id)
+               mnt_group_start = id;
        mnt->mnt_group_id = 0;
 }
 
@@ -2222,16 +2240,9 @@ static void __init init_mount_tree(void)
        mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
        if (IS_ERR(mnt))
                panic("Can't create rootfs");
-       ns = kmalloc(sizeof(*ns), GFP_KERNEL);
-       if (!ns)
+       ns = create_mnt_ns(mnt);
+       if (IS_ERR(ns))
                panic("Can't allocate initial namespace");
-       atomic_set(&ns->count, 1);
-       INIT_LIST_HEAD(&ns->list);
-       init_waitqueue_head(&ns->poll);
-       ns->event = 0;
-       list_add(&mnt->mnt_list, &ns->list);
-       ns->root = mnt;
-       mnt->mnt_ns = ns;
 
        init_task.nsproxy->mnt_ns = ns;
        get_mnt_ns(ns);
index 2696d6b513b7177d2efb9c2805ca878ebf0277a3..fe9d8f2a13f8785bf366f0a5884261af8ccfd782 100644 (file)
@@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
        /* ii->i_file_acl = 0; */
        /* ii->i_dir_acl = 0; */
        ii->i_dir_start_lookup = 0;
-#ifdef CONFIG_NILFS_FS_POSIX_ACL
-       ii->i_acl = NULL;
-       ii->i_default_acl = NULL;
-#endif
        ii->i_cno = 0;
        nilfs_set_inode_flags(inode);
        spin_lock(&sbi->s_next_gen_lock);
@@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
 
        raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
 
-#ifdef CONFIG_NILFS_FS_POSIX_ACL
-       ii->i_acl = NILFS_ACL_NOT_CACHED;
-       ii->i_default_acl = NILFS_ACL_NOT_CACHED;
-#endif
        if (nilfs_read_inode_common(inode, raw_inode))
                goto failed_unmap;
 
index edf6a59d9f2a1710e404e45250308dfa06ba28f9..724c63766e827c9232bd197a99b93af8064aea20 100644 (file)
@@ -57,10 +57,6 @@ struct nilfs_inode_info {
         * EAs.
         */
        struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_NILFS_POSIX_ACL
-       struct posix_acl *i_acl;
-       struct posix_acl *i_default_acl;
 #endif
        struct buffer_head *i_bh;       /* i_bh contains a new or dirty
                                           disk inode */
index ab785f85aa501abb40691a61b24395e0419c62f8..8e2ec43b18f4f458b6f683fc94a9f656a36a9c3a 100644 (file)
@@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode)
 {
        struct nilfs_inode_info *ii = NILFS_I(inode);
 
-#ifdef CONFIG_NILFS_POSIX_ACL
-       if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
-               posix_acl_release(ii->i_acl);
-               ii->i_acl = NILFS_ACL_NOT_CACHED;
-       }
-       if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
-               posix_acl_release(ii->i_default_acl);
-               ii->i_default_acl = NILFS_ACL_NOT_CACHED;
-       }
-#endif
        /*
         * Free resources allocated in nilfs_read_inode(), here.
         */
index 6cdeaa76f27fec651b0c14f40a48e31f3aa8f71e..110bb57c46abe76bb83ae1b689e185684384c944 100644 (file)
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
        enum ocfs2_unblock_action unblock_action;
 };
 
+/* Lockdep class keys */
+struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
                                        int new_level);
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
                             u32 dlm_flags);
 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
                                                     int wanted);
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-                                struct ocfs2_lock_res *lockres,
-                                int level);
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+                                  struct ocfs2_lock_res *lockres,
+                                  int level, unsigned long caller_ip);
+static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
+                                       struct ocfs2_lock_res *lockres,
+                                       int level)
+{
+       __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
+}
+
 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
        ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
 
        ocfs2_init_lock_stats(res);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (type != OCFS2_LOCK_TYPE_OPEN)
+               lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
+                                &lockdep_keys[type], 0);
+       else
+               res->l_lockdep_map.key = NULL;
+#endif
 }
 
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
                                            struct ocfs2_super *osb)
 {
-       struct ocfs2_orphan_scan_lvb *lvb;
-
        ocfs2_lock_res_init_once(res);
        ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
        ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
                                   &ocfs2_orphan_scan_lops, osb);
-       lvb = ocfs2_dlm_lvb(&res->l_lksb);
-       lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
 }
 
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
@@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
        return ret;
 }
 
-static int ocfs2_cluster_lock(struct ocfs2_super *osb,
-                             struct ocfs2_lock_res *lockres,
-                             int level,
-                             u32 lkm_flags,
-                             int arg_flags)
+static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
+                               struct ocfs2_lock_res *lockres,
+                               int level,
+                               u32 lkm_flags,
+                               int arg_flags,
+                               int l_subclass,
+                               unsigned long caller_ip)
 {
        struct ocfs2_mask_waiter mw;
        int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1403,13 +1418,37 @@ out:
        }
        ocfs2_update_lock_stats(lockres, level, &mw, ret);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (!ret && lockres->l_lockdep_map.key != NULL) {
+               if (level == DLM_LOCK_PR)
+                       rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
+                               !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+                               caller_ip);
+               else
+                       rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
+                               !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+                               caller_ip);
+       }
+#endif
        mlog_exit(ret);
        return ret;
 }
 
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-                                struct ocfs2_lock_res *lockres,
-                                int level)
+static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
+                                    struct ocfs2_lock_res *lockres,
+                                    int level,
+                                    u32 lkm_flags,
+                                    int arg_flags)
+{
+       return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
+                                   0, _RET_IP_);
+}
+
+
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+                                  struct ocfs2_lock_res *lockres,
+                                  int level,
+                                  unsigned long caller_ip)
 {
        unsigned long flags;
 
@@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
        ocfs2_dec_holders(lockres, level);
        ocfs2_downconvert_on_unlock(osb, lockres);
        spin_unlock_irqrestore(&lockres->l_lock, flags);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (lockres->l_lockdep_map.key != NULL)
+               rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
+#endif
        mlog_exit_void();
 }
 
@@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
 {
        struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
-       if (lvb->lvb_version == OCFS2_LVB_VERSION
+       if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
+           && lvb->lvb_version == OCFS2_LVB_VERSION
            && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
                return 1;
        return 0;
@@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
  * returns < 0 error if the callback will never be called, otherwise
  * the result of the lock will be communicated via the callback.
  */
-int ocfs2_inode_lock_full(struct inode *inode,
-                        struct buffer_head **ret_bh,
-                        int ex,
-                        int arg_flags)
+int ocfs2_inode_lock_full_nested(struct inode *inode,
+                                struct buffer_head **ret_bh,
+                                int ex,
+                                int arg_flags,
+                                int subclass)
 {
        int status, level, acquired;
        u32 dlm_flags;
@@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
        if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
                dlm_flags |= DLM_LKF_NOQUEUE;
 
-       status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
+       status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
+                                     arg_flags, subclass, _RET_IP_);
        if (status < 0) {
                if (status != -EAGAIN && status != -EIOCBRETRY)
                        mlog_errno(status);
@@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode,
        mlog_exit_void();
 }
 
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
 {
        struct ocfs2_lock_res *lockres;
        struct ocfs2_orphan_scan_lvb *lvb;
-       int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
        int status = 0;
 
+       if (ocfs2_is_hard_readonly(osb))
+               return -EROFS;
+
+       if (ocfs2_mount_local(osb))
+               return 0;
+
        lockres = &osb->osb_orphan_scan.os_lockres;
-       status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+       status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
        if (status < 0)
                return status;
 
        lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-       if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
+       if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+           lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
                *seqno = be32_to_cpu(lvb->lvb_os_seqno);
+       else
+               *seqno = osb->osb_orphan_scan.os_seqno + 1;
+
        return status;
 }
 
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
 {
        struct ocfs2_lock_res *lockres;
        struct ocfs2_orphan_scan_lvb *lvb;
-       int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 
-       lockres = &osb->osb_orphan_scan.os_lockres;
-       lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-       lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
-       lvb->lvb_os_seqno = cpu_to_be32(seqno);
-       ocfs2_cluster_unlock(osb, lockres, level);
+       if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
+               lockres = &osb->osb_orphan_scan.os_lockres;
+               lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+               lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
+               lvb->lvb_os_seqno = cpu_to_be32(seqno);
+               ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
+       }
 }
 
 int ocfs2_super_lock(struct ocfs2_super *osb,
@@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
        struct ocfs2_global_disk_dqinfo *gdinfo;
        int status = 0;
 
-       if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+       if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+           lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
                info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
                info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
                oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
index 31b90d7b8f51f0dc449da138e8e7b14f118aabb4..7553836931de8f7e5c5b72a9a55fdc3c428d94a9 100644 (file)
@@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb {
 /* don't block waiting for the downconvert thread, instead return -EAGAIN */
 #define OCFS2_LOCK_NONBLOCK            (0x04)
 
+/* Locking subclasses of inode cluster lock */
+enum {
+       OI_LS_NORMAL = 0,
+       OI_LS_PARENT,
+       OI_LS_RENAME1,
+       OI_LS_RENAME2,
+};
+
 int ocfs2_dlm_init(struct ocfs2_super *osb);
 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode);
 int ocfs2_inode_lock_atime(struct inode *inode,
                          struct vfsmount *vfsmnt,
                          int *level);
-int ocfs2_inode_lock_full(struct inode *inode,
+int ocfs2_inode_lock_full_nested(struct inode *inode,
                         struct buffer_head **ret_bh,
                         int ex,
-                        int arg_flags);
+                        int arg_flags,
+                        int subclass);
 int ocfs2_inode_lock_with_page(struct inode *inode,
                              struct buffer_head **ret_bh,
                              int ex,
                              struct page *page);
+/* Variants without special locking class or flags */
+#define ocfs2_inode_lock_full(i, r, e, f)\
+               ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
+#define ocfs2_inode_lock_nested(i, b, e, s)\
+               ocfs2_inode_lock_full_nested(i, b, e, 0, s)
 /* 99% of the time we don't want to supply any additional flags --
  * those are for very specific cases only. */
-#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0)
+#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
 void ocfs2_inode_unlock(struct inode *inode,
                       int ex);
 int ocfs2_super_lock(struct ocfs2_super *osb,
                     int ex);
 void ocfs2_super_unlock(struct ocfs2_super *osb,
                        int ex);
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex);
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex);
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
 
 int ocfs2_rename_lock(struct ocfs2_super *osb);
 void ocfs2_rename_unlock(struct ocfs2_super *osb);
index 07267e0da909410294a3f330eb08fea5d1feefb8..62442e413a001cfbe57607c43e06d408418cf13e 100644 (file)
@@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
                                      size_t len,
                                      unsigned int flags)
 {
-       int ret = 0;
+       int ret = 0, lock_level = 0;
        struct inode *inode = in->f_path.dentry->d_inode;
 
        mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
        /*
         * See the comment in ocfs2_file_aio_read()
         */
-       ret = ocfs2_inode_lock(inode, NULL, 0);
+       ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
        if (ret < 0) {
                mlog_errno(ret);
                goto bail;
        }
-       ocfs2_inode_unlock(inode, 0);
+       ocfs2_inode_unlock(inode, lock_level);
 
        ret = generic_file_splice_read(in, ppos, pipe, len, flags);
 
index 10e1fa87396aedf9ea0a1cfbd501c25361a872e7..4dc8890ba31615a3e8ce450d8b33fd38fcc202e8 100644 (file)
@@ -215,6 +215,8 @@ bail:
 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 {
        struct ocfs2_find_inode_args *args = opaque;
+       static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
+                                    ocfs2_file_ip_alloc_sem_key;
 
        mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
 
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
        if (args->fi_sysfile_type != 0)
                lockdep_set_class(&inode->i_mutex,
                        &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+       if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
+               lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+                                 &ocfs2_quota_ip_alloc_sem_key);
+       else
+               lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+                                 &ocfs2_file_ip_alloc_sem_key);
 
        mlog_exit(0);
        return 0;
index 4a3b9e6b31adcc6247dc28fea3887a426f9a09d7..f033760ecbeaae0dde0944d190828aedb7b9c5d0 100644 (file)
@@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 
        os = &osb->osb_orphan_scan;
 
-       status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+               goto out;
+
+       status = ocfs2_orphan_scan_lock(osb, &seqno);
        if (status < 0) {
                if (status != -EAGAIN)
                        mlog_errno(status);
                goto out;
        }
 
+       /* Do no queue the tasks if the volume is being umounted */
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+               goto unlock;
+
        if (os->os_seqno != seqno) {
                os->os_seqno = seqno;
                goto unlock;
@@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
        os->os_count++;
        os->os_scantime = CURRENT_TIME;
 unlock:
-       ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
+       ocfs2_orphan_scan_unlock(osb, seqno);
 out:
        return;
 }
@@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
 
        mutex_lock(&os->os_lock);
        ocfs2_queue_orphan_scan(osb);
-       schedule_delayed_work(&os->os_orphan_scan_work,
-                             ocfs2_orphan_scan_timeout());
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
+               schedule_delayed_work(&os->os_orphan_scan_work,
+                                     ocfs2_orphan_scan_timeout());
        mutex_unlock(&os->os_lock);
 }
 
@@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
        struct ocfs2_orphan_scan *os;
 
        os = &osb->osb_orphan_scan;
-       mutex_lock(&os->os_lock);
-       cancel_delayed_work(&os->os_orphan_scan_work);
-       mutex_unlock(&os->os_lock);
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
+               atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+               mutex_lock(&os->os_lock);
+               cancel_delayed_work(&os->os_orphan_scan_work);
+               mutex_unlock(&os->os_lock);
+       }
 }
 
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
 {
        struct ocfs2_orphan_scan *os;
 
        os = &osb->osb_orphan_scan;
        os->os_osb = osb;
        os->os_count = 0;
+       os->os_seqno = 0;
        os->os_scantime = CURRENT_TIME;
        mutex_init(&os->os_lock);
-
-       INIT_DELAYED_WORK(&os->os_orphan_scan_work,
-                         ocfs2_orphan_scan_work);
-       schedule_delayed_work(&os->os_orphan_scan_work,
-                             ocfs2_orphan_scan_timeout());
-       return 0;
+       INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
+
+       if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
+               atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+       else {
+               atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
+               schedule_delayed_work(&os->os_orphan_scan_work,
+                                     ocfs2_orphan_scan_timeout());
+       }
 }
 
 struct ocfs2_orphan_filldir_priv {
index 61045eeb3f6ea0c83f45e7e012b2aab1da34626a..5432c7f79cc6a959d3c39cd2cbcb41663ef599b5 100644 (file)
@@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 }
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
index 33464c6b60a2ca91252af9d4f588398379089c04..8601f934010b80e37c528b326e9121c72213ea7b 100644 (file)
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
        mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
             dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
-       status = ocfs2_inode_lock(dir, NULL, 0);
+       status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
        if (S_ISDIR(inode->i_mode))
                return -EPERM;
 
-       err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
+       err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
        if (err < 0) {
                if (err != -ENOENT)
                        mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
                return -EPERM;
        }
 
-       status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
+       status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
+                                        OI_LS_PARENT);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
                        inode1 = tmpinode;
                }
                /* lock id2 */
-               status = ocfs2_inode_lock(inode2, bh2, 1);
+               status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+                                                OI_LS_RENAME1);
                if (status < 0) {
                        if (status != -ENOENT)
                                mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
        }
 
        /* lock id1 */
-       status = ocfs2_inode_lock(inode1, bh1, 1);
+       status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
        if (status < 0) {
                /*
                 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
         * won't have to concurrently downconvert the inode and the
         * dentry locks.
         */
-       status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
+       status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
+                                        OI_LS_PARENT);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
index 18c1d9ec1c93cdc5421150e7ec39e45d4162d737..c9345ebb849343873adf6b2b67ae226f8181f65e 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/lockdep.h>
 #ifndef CONFIG_OCFS2_COMPAT_JBD
 # include <linux/jbd2.h>
 #else
@@ -152,6 +153,14 @@ struct ocfs2_lock_res {
        unsigned int             l_lock_max_exmode;        /* Max wait for EX */
        unsigned int             l_lock_refresh;           /* Disk refreshes */
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map       l_lockdep_map;
+#endif
+};
+
+enum ocfs2_orphan_scan_state {
+       ORPHAN_SCAN_ACTIVE,
+       ORPHAN_SCAN_INACTIVE
 };
 
 struct ocfs2_orphan_scan {
@@ -162,6 +171,7 @@ struct ocfs2_orphan_scan {
        struct timespec         os_scantime;  /* time this node ran the scan */
        u32                     os_count;      /* tracks node specific scans */
        u32                     os_seqno;       /* tracks cluster wide scans */
+       atomic_t                os_state;              /* ACTIVE or INACTIVE */
 };
 
 struct ocfs2_dlm_debug {
index fcd120f1493afb0ba8838bf543495eadb2b639b8..3f661376a2ded76e08769f1766de3c1523d448d9 100644 (file)
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
        return dlm_status_to_errno(lksb->lksb_o2dlm.status);
 }
 
+/*
+ * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
+ * contents, it will zero out the LVB.  Thus the caller can always trust
+ * the contents.
+ */
+static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+       return 1;
+}
+
 static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
        return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
        .dlm_lock       = o2cb_dlm_lock,
        .dlm_unlock     = o2cb_dlm_unlock,
        .lock_status    = o2cb_dlm_lock_status,
+       .lvb_valid      = o2cb_dlm_lvb_valid,
        .lock_lvb       = o2cb_dlm_lvb,
        .dump_lksb      = o2cb_dump_lksb,
 };
index 9b76d41a8ac6ed2898021bcc255ffa8e159b61cb..ff4c798a5635f23d64739356b2fc8277c370e1b9 100644 (file)
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
        return lksb->lksb_fsdlm.sb_status;
 }
 
+static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+       int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
+
+       return !invalid;
+}
+
 static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
        if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
        .dlm_lock       = user_dlm_lock,
        .dlm_unlock     = user_dlm_unlock,
        .lock_status    = user_dlm_lock_status,
+       .lvb_valid      = user_dlm_lvb_valid,
        .lock_lvb       = user_dlm_lvb,
        .plock          = user_plock,
        .dump_lksb      = user_dlm_dump_lksb,
index 68b668b0e60a0a231436fcde4e41bd23083a032d..3f2f1c45b7b6bf4abf6ac9bbaddd3e24f456171a 100644 (file)
@@ -6,7 +6,7 @@
  * Code which implements an OCFS2 specific interface to underlying
  * cluster stacks.
  *
- * Copyright (C) 2007 Oracle.  All rights reserved.
+ * Copyright (C) 2007, 2009 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 }
 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
 
-/*
- * Why don't we cast to ocfs2_meta_lvb?  The "clean" answer is that we
- * don't cast at the glue level.  The real answer is that the header
- * ordering is nigh impossible.
- */
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+       return active_stack->sp_ops->lvb_valid(lksb);
+}
+EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
+
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
        return active_stack->sp_ops->lock_lvb(lksb);
index c571af375ef87b98e6e24aad5d44c6fd7e114163..03a44d60eac91bc83699371f66f2f6cc333962fa 100644 (file)
@@ -185,6 +185,11 @@ struct ocfs2_stack_operations {
         */
        int (*lock_status)(union ocfs2_dlm_lksb *lksb);
 
+       /*
+        * Return non-zero if the LVB is valid.
+        */
+       int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
+
        /*
         * Pull the lvb pointer off of the stack-specific lksb.
         */
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
                     struct ocfs2_lock_res *astarg);
 
 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
 
index 8439f6b324b91ef1fc7bf2f459f569a49365ee77..73a16d4666dc463472e9254449ac98c9a6363188 100644 (file)
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
                                         int nr)
 {
        struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+       int ret;
 
        if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
                return 0;
-       if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
+
+       if (!buffer_jbd(bg_bh))
                return 1;
 
+       jbd_lock_bh_state(bg_bh);
        bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
-       return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+       if (bg)
+               ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+       else
+               ret = 1;
+       jbd_unlock_bh_state(bg_bh);
+
+       return ret;
 }
 
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
        unsigned int tmp;
        int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
        struct ocfs2_group_desc *undo_bg = NULL;
+       int cluster_bitmap = 0;
 
        mlog_entry_void();
 
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
        }
 
        if (ocfs2_is_cluster_bitmap(alloc_inode))
-               undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
+               cluster_bitmap = 1;
+
+       if (cluster_bitmap) {
+               jbd_lock_bh_state(group_bh);
+               undo_bg = (struct ocfs2_group_desc *)
+                                       bh2jh(group_bh)->b_committed_data;
+               BUG_ON(!undo_bg);
+       }
 
        tmp = num_bits;
        while(tmp--) {
                ocfs2_clear_bit((bit_off + tmp),
                                (unsigned long *) bg->bg_bitmap);
-               if (ocfs2_is_cluster_bitmap(alloc_inode))
+               if (cluster_bitmap)
                        ocfs2_set_bit(bit_off + tmp,
                                      (unsigned long *) undo_bg->bg_bitmap);
        }
        le16_add_cpu(&bg->bg_free_bits_count, num_bits);
 
+       if (cluster_bitmap)
+               jbd_unlock_bh_state(group_bh);
+
        status = ocfs2_journal_dirty(handle, group_bh);
        if (status < 0)
                mlog_errno(status);
index 0d3ed7407a043effbab862610ca5847589eae396..7efb349fb9bdafe0678bd63786db244bba410b4a 100644 (file)
@@ -205,11 +205,10 @@ static const match_table_t tokens = {
 #ifdef CONFIG_DEBUG_FS
 static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 {
-       int out = 0;
-       int i;
        struct ocfs2_cluster_connection *cconn = osb->cconn;
        struct ocfs2_recovery_map *rm = osb->recovery_map;
-       struct ocfs2_orphan_scan *os;
+       struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
+       int i, out = 0;
 
        out += snprintf(buf + out, len - out,
                        "%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n",
@@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
                        "%10s => Opts: 0x%lX  AtimeQuanta: %u\n", "Mount",
                        osb->s_mount_opt, osb->s_atime_quantum);
 
-       out += snprintf(buf + out, len - out,
-                       "%10s => Stack: %s  Name: %*s  Version: %d.%d\n",
-                       "Cluster",
-                       (*osb->osb_cluster_stack == '\0' ?
-                        "o2cb" : osb->osb_cluster_stack),
-                       cconn->cc_namelen, cconn->cc_name,
-                       cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
+       if (cconn) {
+               out += snprintf(buf + out, len - out,
+                               "%10s => Stack: %s  Name: %*s  "
+                               "Version: %d.%d\n", "Cluster",
+                               (*osb->osb_cluster_stack == '\0' ?
+                                "o2cb" : osb->osb_cluster_stack),
+                               cconn->cc_namelen, cconn->cc_name,
+                               cconn->cc_version.pv_major,
+                               cconn->cc_version.pv_minor);
+       }
 
        spin_lock(&osb->dc_task_lock);
        out += snprintf(buf + out, len - out,
                        "%10s => Pid: %d  Count: %lu  WakeSeq: %lu  "
                        "WorkSeq: %lu\n", "DownCnvt",
-                       task_pid_nr(osb->dc_task), osb->blocked_lock_count,
-                       osb->dc_wake_sequence, osb->dc_work_sequence);
+                       (osb->dc_task ?  task_pid_nr(osb->dc_task) : -1),
+                       osb->blocked_lock_count, osb->dc_wake_sequence,
+                       osb->dc_work_sequence);
        spin_unlock(&osb->dc_task_lock);
 
        spin_lock(&osb->osb_lock);
@@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 
        out += snprintf(buf + out, len - out,
                        "%10s => Pid: %d  Interval: %lu  Needs: %d\n", "Commit",
-                       task_pid_nr(osb->commit_task), osb->osb_commit_interval,
+                       (osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
+                       osb->osb_commit_interval,
                        atomic_read(&osb->needs_checkpoint));
 
        out += snprintf(buf + out, len - out,
-                       "%10s => State: %d  NumTxns: %d  TxnId: %lu\n",
+                       "%10s => State: %d  TxnId: %lu  NumTxns: %d\n",
                        "Journal", osb->journal->j_state,
-                       atomic_read(&osb->journal->j_num_trans),
-                       osb->journal->j_trans_id);
+                       osb->journal->j_trans_id,
+                       atomic_read(&osb->journal->j_num_trans));
 
        out += snprintf(buf + out, len - out,
                        "%10s => GlobalAllocs: %d  LocalAllocs: %d  "
@@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
                        atomic_read(&osb->s_num_inodes_stolen));
        spin_unlock(&osb->osb_lock);
 
+       out += snprintf(buf + out, len - out, "OrphanScan => ");
+       out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
+                       os->os_count, os->os_seqno);
+       out += snprintf(buf + out, len - out, " Last Scan: ");
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+               out += snprintf(buf + out, len - out, "Disabled\n");
+       else
+               out += snprintf(buf + out, len - out, "%lu seconds ago\n",
+                               (get_seconds() - os->os_scantime.tv_sec));
+
        out += snprintf(buf + out, len - out, "%10s => %3s  %10s\n",
                        "Slots", "Num", "RecoGen");
-
        for (i = 0; i < osb->max_slots; ++i) {
                out += snprintf(buf + out, len - out,
                                "%10s  %c %3d  %10d\n",
@@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
                                i, osb->slot_recovery_generations[i]);
        }
 
-       os = &osb->osb_orphan_scan;
-       out += snprintf(buf + out, len - out, "Orphan Scan=> ");
-       out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
-                       os->os_count, os->os_seqno);
-       out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
-                       (get_seconds() - os->os_scantime.tv_sec));
-
        return out;
 }
 
@@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
        wake_up(&osb->osb_mount_event);
 
+       /* Start this when the mount is almost sure of being successful */
+       ocfs2_orphan_scan_init(osb);
+
        mlog_exit(status);
        return status;
 
@@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
        debugfs_remove(osb->osb_ctxt);
 
+       /* Orphan scan should be stopped as early as possible */
+       ocfs2_orphan_scan_stop(osb);
+
        ocfs2_disable_quotas(osb);
 
        ocfs2_shutdown_local_alloc(osb);
 
        ocfs2_truncate_log_shutdown(osb);
 
-       ocfs2_orphan_scan_stop(osb);
-
        /* This will disable recovery and flush any recovery work. */
        ocfs2_recovery_exit(osb);
 
@@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
                goto bail;
        }
 
-       status = ocfs2_orphan_scan_init(osb);
-       if (status) {
-               mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
-               mlog_errno(status);
-               goto bail;
-       }
-
        init_waitqueue_head(&osb->checkpoint_event);
        atomic_set(&osb->needs_checkpoint, 0);
 
index ab713ebdd5468222899654f706c4b80955a9bfe8..40e53702948cdbc2ed92ce1e22f0827f76017417 100644 (file)
@@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
                                           int type,
                                           u32 slot);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
+#endif
+
 static inline int is_global_system_inode(int type)
 {
        return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
                inode = NULL;
                goto bail;
        }
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+           type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
+           type == JOURNAL_SYSTEM_INODE) {
+               /* Ignore inode lock on these inodes as the lock does not
+                * really belong to any process and lockdep cannot handle
+                * that */
+               OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
+       } else {
+               lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
+                                                               l_lockdep_map,
+                                ocfs2_system_inodes[type].si_name,
+                                &ocfs2_sysfile_cluster_lock_key[type], 0);
+       }
+#endif
 bail:
 
        return inode;
index 7200e23d9258ce09a76caacee7f1a71e7b25ce7f..dd98e8076024d638cb0987b57b24269d20588587 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
 #endif
 #endif /* BITS_PER_LONG == 32 */
 
-SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+
+int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-       struct file *file;
-       struct inode *inode;
-       long ret = -EINVAL;
+       struct inode *inode = file->f_path.dentry->d_inode;
+       long ret;
 
        if (offset < 0 || len <= 0)
-               goto out;
+               return -EINVAL;
 
        /* Return error if mode is not supported */
-       ret = -EOPNOTSUPP;
        if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
-               goto out;
+               return -EOPNOTSUPP;
 
-       ret = -EBADF;
-       file = fget(fd);
-       if (!file)
-               goto out;
        if (!(file->f_mode & FMODE_WRITE))
-               goto out_fput;
+               return -EBADF;
        /*
         * Revalidate the write permissions, in case security policy has
         * changed since the files were opened.
         */
        ret = security_file_permission(file, MAY_WRITE);
        if (ret)
-               goto out_fput;
+               return ret;
 
-       inode = file->f_path.dentry->d_inode;
-
-       ret = -ESPIPE;
        if (S_ISFIFO(inode->i_mode))
-               goto out_fput;
+               return -ESPIPE;
 
-       ret = -ENODEV;
        /*
         * Let individual file system decide if it supports preallocation
         * for directories or not.
         */
        if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
-               goto out_fput;
+               return -ENODEV;
 
-       ret = -EFBIG;
        /* Check for wrap through zero too */
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
-               goto out_fput;
+               return -EFBIG;
 
-       if (inode->i_op->fallocate)
-               ret = inode->i_op->fallocate(inode, mode, offset, len);
-       else
-               ret = -EOPNOTSUPP;
+       if (!inode->i_op->fallocate)
+               return -EOPNOTSUPP;
 
-out_fput:
-       fput(file);
-out:
-       return ret;
+       return inode->i_op->fallocate(inode, mode, offset, len);
 }
+
+SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+{
+       struct file *file;
+       int error = -EBADF;
+
+       file = fget(fd);
+       if (file) {
+               error = do_fallocate(file, mode, offset, len);
+               fput(file);
+       }
+
+       return error;
+}
+
 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
 {
index 6fd0f47e45db72b252808386450e468ad425af61..a14d6cd9eeda2670251c0ec3377518912cb465a2 100644 (file)
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
        REISERFS_I(inode)->i_trans_id = 0;
        REISERFS_I(inode)->i_jl = NULL;
        mutex_init(&(REISERFS_I(inode)->i_mmap));
-       reiserfs_init_acl_access(inode);
-       reiserfs_init_acl_default(inode);
        reiserfs_init_xattr_rwsem(inode);
 
        if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
            REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
        sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
        mutex_init(&(REISERFS_I(inode)->i_mmap));
-       reiserfs_init_acl_access(inode);
-       reiserfs_init_acl_default(inode);
        reiserfs_init_xattr_rwsem(inode);
 
        /* key to search for correct place for new stat data */
index 238e9d9b31e04bf877f1644630990672d1c5daba..18b315d3d104ea0cebfbb5a1991356038dc8c4fd 100644 (file)
@@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
                if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
                        printk
                            ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
-                       unlock_super(s);
                        return -ENOMEM;
                }
                /* the new journal bitmaps are zero filled, now we copy in the bitmap
index 2969773cfc22abb000e2eceb90f0011ef0fd2f34..d3aeb061612bf3818ec60972f6e78d569239d065 100644 (file)
@@ -529,10 +529,6 @@ static void init_once(void *foo)
 
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        inode_init_once(&ei->vfs_inode);
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-       ei->i_acl_access = NULL;
-       ei->i_acl_default = NULL;
-#endif
 }
 
 static int init_inodecache(void)
@@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
        reiserfs_write_unlock(inode->i_sb);
 }
 
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-static void reiserfs_clear_inode(struct inode *inode)
-{
-       struct posix_acl *acl;
-
-       acl = REISERFS_I(inode)->i_acl_access;
-       if (acl && !IS_ERR(acl))
-               posix_acl_release(acl);
-       REISERFS_I(inode)->i_acl_access = NULL;
-
-       acl = REISERFS_I(inode)->i_acl_default;
-       if (acl && !IS_ERR(acl))
-               posix_acl_release(acl);
-       REISERFS_I(inode)->i_acl_default = NULL;
-}
-#else
-#define reiserfs_clear_inode NULL
-#endif
-
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
                                    size_t, loff_t);
@@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = {
        .write_inode = reiserfs_write_inode,
        .dirty_inode = reiserfs_dirty_inode,
        .delete_inode = reiserfs_delete_inode,
-       .clear_inode = reiserfs_clear_inode,
        .put_super = reiserfs_put_super,
        .write_super = reiserfs_write_super,
        .sync_fs = reiserfs_sync_fs,
index c303c426fe2ba6ffec8d708323026f36fbd40f90..35d6e672a2796f7aaed49b31f96621c25593c51b 100644 (file)
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
        return ERR_PTR(-EINVAL);
 }
 
-static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                           struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*i_acl != ERR_PTR(-ENODATA))
-               posix_acl_release(*i_acl);
-       *i_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
-static inline struct posix_acl *iget_acl(struct inode *inode,
-                                        struct posix_acl **i_acl)
-{
-       struct posix_acl *acl = ERR_PTR(-ENODATA);
-
-       spin_lock(&inode->i_lock);
-       if (*i_acl != ERR_PTR(-ENODATA))
-               acl = posix_acl_dup(*i_acl);
-       spin_unlock(&inode->i_lock);
-
-       return acl;
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
        char *name, *value;
-       struct posix_acl *acl, **p_acl;
+       struct posix_acl *acl;
        int size;
        int retval;
-       struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
+
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
 
        switch (type) {
        case ACL_TYPE_ACCESS:
                name = POSIX_ACL_XATTR_ACCESS;
-               p_acl = &reiserfs_i->i_acl_access;
                break;
        case ACL_TYPE_DEFAULT:
                name = POSIX_ACL_XATTR_DEFAULT;
-               p_acl = &reiserfs_i->i_acl_default;
                break;
        default:
-               return ERR_PTR(-EINVAL);
+               BUG();
        }
 
-       acl = iget_acl(inode, p_acl);
-       if (acl && !IS_ERR(acl))
-               return acl;
-       else if (PTR_ERR(acl) == -ENODATA)
-               return NULL;
-
        size = reiserfs_xattr_get(inode, name, NULL, 0);
        if (size < 0) {
                if (size == -ENODATA || size == -ENOSYS) {
-                       *p_acl = ERR_PTR(-ENODATA);
+                       set_cached_acl(inode, type, NULL);
                        return NULL;
                }
                return ERR_PTR(size);
@@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
                /* This shouldn't actually happen as it should have
                   been caught above.. but just in case */
                acl = NULL;
-               *p_acl = ERR_PTR(-ENODATA);
        } else if (retval < 0) {
                acl = ERR_PTR(retval);
        } else {
                acl = posix_acl_from_disk(value, retval);
-               if (!IS_ERR(acl))
-                       iset_acl(inode, p_acl, acl);
        }
+       if (!IS_ERR(acl))
+               set_cached_acl(inode, type, acl);
 
        kfree(value);
        return acl;
@@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 {
        char *name;
        void *value = NULL;
-       struct posix_acl **p_acl;
        size_t size = 0;
        int error;
-       struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
@@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
        switch (type) {
        case ACL_TYPE_ACCESS:
                name = POSIX_ACL_XATTR_ACCESS;
-               p_acl = &reiserfs_i->i_acl_access;
                if (acl) {
                        mode_t mode = inode->i_mode;
                        error = posix_acl_equiv_mode(acl, &mode);
@@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
                break;
        case ACL_TYPE_DEFAULT:
                name = POSIX_ACL_XATTR_DEFAULT;
-               p_acl = &reiserfs_i->i_acl_default;
                if (!S_ISDIR(inode->i_mode))
                        return acl ? -EACCES : 0;
                break;
@@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
        kfree(value);
 
        if (!error)
-               iset_acl(inode, p_acl, acl);
+               set_cached_acl(inode, type, acl);
 
        return error;
 }
@@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
        }
 
        acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
-       if (IS_ERR(acl)) {
-               if (PTR_ERR(acl) == -ENODATA)
-                       goto apply_umask;
+       if (IS_ERR(acl))
                return PTR_ERR(acl);
-       }
 
        if (acl) {
                struct posix_acl *acl_copy;
index d40d53a22fb595d3489e1273ac6efb26481b39e1..2761d3e22ed92321f204a34f8de08491db6f3083 100644 (file)
@@ -608,6 +608,7 @@ void emergency_remount(void)
 
 static DEFINE_IDA(unnamed_dev_ida);
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
+static int unnamed_dev_start = 0; /* don't bother trying below it */
 
 int set_anon_super(struct super_block *s, void *data)
 {
@@ -618,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data)
        if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
                return -ENOMEM;
        spin_lock(&unnamed_dev_lock);
-       error = ida_get_new(&unnamed_dev_ida, &dev);
+       error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
+       if (!error)
+               unnamed_dev_start = dev + 1;
        spin_unlock(&unnamed_dev_lock);
        if (error == -EAGAIN)
                /* We raced and lost with another CPU. */
@@ -629,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data)
        if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
                spin_lock(&unnamed_dev_lock);
                ida_remove(&unnamed_dev_ida, dev);
+               if (unnamed_dev_start > dev)
+                       unnamed_dev_start = dev;
                spin_unlock(&unnamed_dev_lock);
                return -EMFILE;
        }
@@ -645,6 +650,8 @@ void kill_anon_super(struct super_block *sb)
        generic_shutdown_super(sb);
        spin_lock(&unnamed_dev_lock);
        ida_remove(&unnamed_dev_ida, slot);
+       if (slot < unnamed_dev_start)
+               unnamed_dev_start = slot;
        spin_unlock(&unnamed_dev_lock);
 }
 
index cfd31e229c89d48e3315edd8f96613a262c16979..adafcf556531c0d4c81a7843a36f573289781875 100644 (file)
@@ -55,9 +55,9 @@
  * ACL support is not implemented.
  */
 
+#include "ubifs.h"
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
-#include "ubifs.h"
 
 /*
  * Limit the number of extended attributes per inode so that the total size
index e48e9a3af76312d683723658ed42c51db7d8a740..1e068535b58bfc78b0970dc3ccb34e03cc7c1ecb 100644 (file)
@@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 
        mutex_lock(&sbi->s_alloc_mutex);
        part_len = sbi->s_partmaps[partition].s_partition_len;
-       if (first_block < 0 || first_block >= part_len)
+       if (first_block >= part_len)
                goto out;
 
        if (first_block + block_count > part_len)
@@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
        mutex_lock(&sbi->s_alloc_mutex);
 
 repeat:
-       if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
+       if (goal >= sbi->s_partmaps[partition].s_partition_len)
                goal = 0;
 
        nr_groups = bitmap->s_nr_groups;
@@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
        int8_t etype = -1;
        struct udf_inode_info *iinfo;
 
-       if (first_block < 0 ||
-               first_block >= sbi->s_partmaps[partition].s_partition_len)
+       if (first_block >= sbi->s_partmaps[partition].s_partition_len)
                return 0;
 
        iinfo = UDF_I(table);
@@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb,
                return newblock;
 
        mutex_lock(&sbi->s_alloc_mutex);
-       if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
+       if (goal >= sbi->s_partmaps[partition].s_partition_len)
                goal = 0;
 
        /* We search for the closest matching block to goal. If we find
index 703843f30ffd383f556639af7146c0d99a212c25..1b88fd5df05d6d7f6694a3d9284162d4ed386a0b 100644 (file)
@@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb)
        struct block_device *bdev = sb->s_bdev;
        unsigned long lblock = 0;
 
-       if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock))
+       /*
+        * ioctl failed or returned obviously bogus value?
+        * Try using the device size...
+        */
+       if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
+           lblock == 0)
                lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
 
        if (lblock)
index 1e9d1246eebc8e946e508e1c24f070f5aa28c394..b23a5450644608fddbc7d5146e6c20f638209450 100644 (file)
 #include <linux/posix_acl_xattr.h>
 
 
-#define XFS_ACL_NOT_CACHED     ((void *)-1)
-
 /*
  * Locking scheme:
  *  - all ACL updates are protected by inode->i_mutex, which is taken before
  *    calling into this file.
- *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
- *    protected by inode->i_lock.
  */
 
 STATIC struct posix_acl *
@@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
        }
 }
 
-/*
- * Update the cached ACL pointer in the inode.
- *
- * Because we don't hold any locks while reading/writing the attribute
- * from/to disk another thread could have raced and updated the cached
- * ACL value before us. In that case we release the previous cached value
- * and update it with our new value.
- */
-STATIC void
-xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
-               struct posix_acl *acl)
-{
-       spin_lock(&inode->i_lock);
-       if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
-               posix_acl_release(*p_acl);
-       *p_acl = posix_acl_dup(acl);
-       spin_unlock(&inode->i_lock);
-}
-
 struct posix_acl *
 xfs_get_acl(struct inode *inode, int type)
 {
        struct xfs_inode *ip = XFS_I(inode);
-       struct posix_acl *acl = NULL, **p_acl;
+       struct posix_acl *acl;
        struct xfs_acl *xfs_acl;
        int len = sizeof(struct xfs_acl);
        char *ea_name;
        int error;
 
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
        switch (type) {
        case ACL_TYPE_ACCESS:
                ea_name = SGI_ACL_FILE;
-               p_acl = &ip->i_acl;
                break;
        case ACL_TYPE_DEFAULT:
                ea_name = SGI_ACL_DEFAULT;
-               p_acl = &ip->i_default_acl;
                break;
        default:
-               return ERR_PTR(-EINVAL);
+               BUG();
        }
 
-       spin_lock(&inode->i_lock);
-       if (*p_acl != XFS_ACL_NOT_CACHED)
-               acl = posix_acl_dup(*p_acl);
-       spin_unlock(&inode->i_lock);
-
        /*
         * If we have a cached ACLs value just return it, not need to
         * go out to the disk.
         */
-       if (acl)
-               return acl;
 
        xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
        if (!xfs_acl)
@@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type)
                /*
                 * If the attribute doesn't exist make sure we have a negative
                 * cache entry, for any other error assume it is transient and
-                * leave the cache entry as XFS_ACL_NOT_CACHED.
+                * leave the cache entry as ACL_NOT_CACHED.
                 */
                if (error == -ENOATTR) {
                        acl = NULL;
@@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type)
                goto out;
 
  out_update_cache:
-       xfs_update_cached_acl(inode, p_acl, acl);
+       set_cached_acl(inode, type, acl);
  out:
        kfree(xfs_acl);
        return acl;
@@ -189,7 +161,6 @@ STATIC int
 xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
        struct xfs_inode *ip = XFS_I(inode);
-       struct posix_acl **p_acl;
        char *ea_name;
        int error;
 
@@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        switch (type) {
        case ACL_TYPE_ACCESS:
                ea_name = SGI_ACL_FILE;
-               p_acl = &ip->i_acl;
                break;
        case ACL_TYPE_DEFAULT:
                if (!S_ISDIR(inode->i_mode))
                        return acl ? -EACCES : 0;
                ea_name = SGI_ACL_DEFAULT;
-               p_acl = &ip->i_default_acl;
                break;
        default:
                return -EINVAL;
@@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        }
 
        if (!error)
-               xfs_update_cached_acl(inode, p_acl, acl);
+               set_cached_acl(inode, type, acl);
        return error;
 }
 
@@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode)
        return error;
 }
 
-void
-xfs_inode_init_acls(struct xfs_inode *ip)
-{
-       /*
-        * No need for locking, inode is not live yet.
-        */
-       ip->i_acl = XFS_ACL_NOT_CACHED;
-       ip->i_default_acl = XFS_ACL_NOT_CACHED;
-}
-
-void
-xfs_inode_clear_acls(struct xfs_inode *ip)
-{
-       /*
-        * No need for locking here, the inode is not live anymore
-        * and just about to be freed.
-        */
-       if (ip->i_acl != XFS_ACL_NOT_CACHED)
-               posix_acl_release(ip->i_acl);
-       if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
-               posix_acl_release(ip->i_default_acl);
-}
-
-
 /*
  * System xattr handlers.
  *
index 63dc1f2efad5bbf1c291b79b312d326b3aebc82d..947b150df8ede4576550f6f5d8f10a618348ca7e 100644 (file)
@@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask);
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
 extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
 extern int xfs_acl_chmod(struct inode *inode);
-extern void xfs_inode_init_acls(struct xfs_inode *ip);
-extern void xfs_inode_clear_acls(struct xfs_inode *ip);
 extern int posix_acl_access_exists(struct inode *inode);
 extern int posix_acl_default_exists(struct inode *inode);
 
@@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler;
 # define xfs_get_acl(inode, type)                      NULL
 # define xfs_inherit_acl(inode, default_acl)           0
 # define xfs_acl_chmod(inode)                          0
-# define xfs_inode_init_acls(ip)
-# define xfs_inode_clear_acls(ip)
 # define posix_acl_access_exists(inode)                        0
 # define posix_acl_default_exists(inode)               0
 #endif /* CONFIG_XFS_POSIX_ACL */
index 76c540f719e49bb73ef6fd7c99b8d1e0c6d80a54..5fcec6f020a7ddff25df7e54e58750ca651733b1 100644 (file)
@@ -83,7 +83,6 @@ xfs_inode_alloc(
        memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
        ip->i_size = 0;
        ip->i_new_size = 0;
-       xfs_inode_init_acls(ip);
 
        /*
         * Initialize inode's trace buffers.
@@ -560,7 +559,6 @@ xfs_ireclaim(
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!spin_is_locked(&ip->i_flags_lock));
        ASSERT(completion_done(&ip->i_flush));
-       xfs_inode_clear_acls(ip);
        kmem_zone_free(xfs_inode_zone, ip);
 }
 
index 77016702938b23cca6c9b4e6a4b48e164ae176ac..1804f866a71d285b760db6bd5df1574676ecb898 100644 (file)
@@ -273,11 +273,6 @@ typedef struct xfs_inode {
        /* VFS inode */
        struct inode            i_vnode;        /* embedded VFS inode */
 
-#ifdef CONFIG_XFS_POSIX_ACL
-       struct posix_acl        *i_acl;
-       struct posix_acl        *i_default_acl;
-#endif
-
        /* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
        struct ktrace           *i_trace;       /* general inode trace */
index c34b11022908a01c521367ddf85249fcacb98ffe..c65e4ce6c3afa38e41ada87bfac5fd5d29c27f36 100644 (file)
@@ -114,10 +114,13 @@ struct acpi_device_ops {
        acpi_op_notify notify;
 };
 
+#define ACPI_DRIVER_ALL_NOTIFY_EVENTS  0x1     /* system AND device events */
+
 struct acpi_driver {
        char name[80];
        char class[80];
        const struct acpi_device_id *ids; /* Supported Hardware IDs */
+       unsigned int flags;
        struct acpi_device_ops ops;
        struct device_driver drv;
        struct module *owner;
@@ -168,7 +171,7 @@ struct acpi_device_dir {
 
 /* Plug and Play */
 
-typedef char acpi_bus_id[5];
+typedef char acpi_bus_id[8];
 typedef unsigned long acpi_bus_address;
 typedef char acpi_hardware_id[15];
 typedef char acpi_unique_id[9];
@@ -365,10 +368,10 @@ struct acpi_bus_type {
 int register_acpi_bus_type(struct acpi_bus_type *);
 int unregister_acpi_bus_type(struct acpi_bus_type *);
 struct device *acpi_get_physical_device(acpi_handle);
-struct device *acpi_get_physical_pci_device(acpi_handle);
 
 /* helper */
 acpi_handle acpi_get_child(acpi_handle, acpi_integer);
+int acpi_is_root_bridge(acpi_handle);
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
 
index 0352c8f0b05b5f89182a8f3cce886a0b33d94752..f4906f6568d4142d1eb77c12f5997ba5da8ce5f7 100644 (file)
@@ -57,8 +57,7 @@
  */
 
 #define ACPI_POWER_HID                 "LNXPOWER"
-#define ACPI_PROCESSOR_OBJECT_HID      "ACPI_CPU"
-#define ACPI_PROCESSOR_HID             "ACPI0007"
+#define ACPI_PROCESSOR_OBJECT_HID      "LNXCPU"
 #define ACPI_SYSTEM_HID                        "LNXSYSTM"
 #define ACPI_THERMAL_HID               "LNXTHERM"
 #define ACPI_BUTTON_HID_POWERF         "LNXPWRBN"
@@ -91,17 +90,15 @@ int acpi_pci_link_free_irq(acpi_handle handle);
 
 /* ACPI PCI Interrupt Routing (pci_irq.c) */
 
-int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus);
-void acpi_pci_irq_del_prt(int segment, int bus);
+int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus);
+void acpi_pci_irq_del_prt(struct pci_bus *bus);
 
 /* ACPI PCI Device Binding (pci_bind.c) */
 
 struct pci_bus;
 
-acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id);
-int acpi_pci_bind(struct acpi_device *device);
-int acpi_pci_bind_root(struct acpi_device *device, struct acpi_pci_id *id,
-                      struct pci_bus *bus);
+struct pci_dev *acpi_get_pci_dev(acpi_handle);
+int acpi_pci_bind_root(struct acpi_device *device);
 
 /* Arch-defined function to add a bus to the system */
 
index 4927c063347c024b4528af2ecd430eeea391b0eb..baf1e0a9a7ee9f4ccc309cb72e1554e7bdf46ff8 100644 (file)
@@ -258,6 +258,7 @@ DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
 void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
+void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
 
 #ifdef ARCH_HAS_POWER_INIT
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
index af6fe95fd3d004e88e0d2cafa63c682a611e687e..cf7be3dd157bec5572a2b9f3220e4e1274d5f27b 100644 (file)
@@ -3,10 +3,10 @@
 
 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
 extern int acpi_video_register(void);
-extern int acpi_video_exit(void);
+extern void acpi_video_unregister(void);
 #else
 static inline int acpi_video_register(void) { return 0; }
-static inline void acpi_video_exit(void) { return; }
+static inline void acpi_video_unregister(void) { return; }
 #endif
 
 #endif
index 03f22076381ff4df51d2b3fe3db2bbc8fd08e4ad..334a3593cdfd4076e7522a2a40439ba5625559a8 100644 (file)
@@ -57,6 +57,7 @@ header-y += dlmconstants.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
 header-y += dm-ioctl.h
+header-y += dm-log-userspace.h
 header-y += dn.h
 header-y += dqblk_xfs.h
 header-y += efs_fs_sb.h
index 51b4b0a5ce8cf00b00fd849ee26e0e7bc504a795..34321cfffeab02010ac142f353b06c4cb7e501ad 100644 (file)
@@ -113,9 +113,6 @@ void acpi_irq_stats_init(void);
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;
 
-extern struct acpi_mcfg_allocation *pci_mmcfg_config;
-extern int pci_mmcfg_config_num;
-
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
@@ -293,7 +290,10 @@ void __init acpi_s4_no_nvs(void);
                                OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
 
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
+extern void acpi_early_init(void);
+
 #else  /* CONFIG_ACPI */
+static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
 {
index 4fa2810b675e6a157293a0aa431e1af95d71d99b..3c7a358241a71d78c9aa288f08b1d83a61a1abb8 100644 (file)
@@ -599,6 +599,8 @@ extern void             audit_log_untrustedstring(struct audit_buffer *ab,
 extern void                audit_log_d_path(struct audit_buffer *ab,
                                             const char *prefix,
                                             struct path *path);
+extern void                audit_log_key(struct audit_buffer *ab,
+                                         char *key);
 extern void                audit_log_lost(const char *message);
 extern int                 audit_update_lsm_rules(void);
 
@@ -621,6 +623,7 @@ extern int audit_enabled;
 #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0)
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_d_path(b, p, d) do { ; } while (0)
+#define audit_log_key(b, k) do { ; } while (0)
 #define audit_enabled 0
 #endif
 #endif
index b9966e64604e81b965526afcc39b4209e3a8148c..b68d27850d518073d75ff5abfb451263a26803a8 100644 (file)
 #define CN_IDX_BB                      0x5     /* BlackBoard, from the TSP GPL sampling framework */
 #define CN_DST_IDX                     0x6
 #define CN_DST_VAL                     0x1
+#define CN_IDX_DM                      0x7     /* Device Mapper */
+#define CN_VAL_DM_USERSPACE_LOG                0x1
 
-#define CN_NETLINK_USERS               7
+#define CN_NETLINK_USERS               8
 
 /*
  * Maximum connector's message size.
index 49c2362977fde10457c72d00e3672ba71142cd42..0d6310657f32a41cc6f5af7150f13db9c35a51e7 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 
+struct dm_dev;
 struct dm_target;
 struct dm_table;
 struct mapped_device;
@@ -21,6 +22,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 union map_info {
        void *ptr;
        unsigned long long ll;
+       unsigned flush_request;
 };
 
 /*
@@ -80,6 +82,15 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
                            struct bio_vec *biovec, int max_size);
 
+typedef int (*iterate_devices_callout_fn) (struct dm_target *ti,
+                                          struct dm_dev *dev,
+                                          sector_t physical_start,
+                                          void *data);
+
+typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
+                                     iterate_devices_callout_fn fn,
+                                     void *data);
+
 /*
  * Returns:
  *    0: The target can handle the next I/O immediately.
@@ -92,7 +103,8 @@ void dm_error(const char *message);
 /*
  * Combine device limits.
  */
-void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev);
+int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+                        sector_t start, void *data);
 
 struct dm_dev {
        struct block_device *bdev;
@@ -138,23 +150,12 @@ struct target_type {
        dm_ioctl_fn ioctl;
        dm_merge_fn merge;
        dm_busy_fn busy;
+       dm_iterate_devices_fn iterate_devices;
 
        /* For internal device-mapper use. */
        struct list_head list;
 };
 
-struct io_restrictions {
-       unsigned long bounce_pfn;
-       unsigned long seg_boundary_mask;
-       unsigned max_hw_sectors;
-       unsigned max_sectors;
-       unsigned max_segment_size;
-       unsigned short logical_block_size;
-       unsigned short max_hw_segments;
-       unsigned short max_phys_segments;
-       unsigned char no_cluster; /* inverted so that 0 is default */
-};
-
 struct dm_target {
        struct dm_table *table;
        struct target_type *type;
@@ -163,15 +164,18 @@ struct dm_target {
        sector_t begin;
        sector_t len;
 
-       /* FIXME: turn this into a mask, and merge with io_restrictions */
        /* Always a power of 2 */
        sector_t split_io;
 
        /*
-        * These are automatically filled in by
-        * dm_table_get_device.
+        * A number of zero-length barrier requests that will be submitted
+        * to the target for the purpose of flushing cache.
+        *
+        * The request number will be placed in union map_info->flush_request.
+        * It is a responsibility of the target driver to remap these requests
+        * to the real underlying devices.
         */
-       struct io_restrictions limits;
+       unsigned num_flush_requests;
 
        /* target specific data */
        void *private;
@@ -230,6 +234,7 @@ struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct mapped_device *md);
 int dm_noflush_suspending(struct dm_target *ti);
 union map_info *dm_get_mapinfo(struct bio *bio);
+union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 /*
  * Geometry functions.
@@ -392,4 +397,12 @@ static inline unsigned long to_bytes(sector_t n)
        return (n << SECTOR_SHIFT);
 }
 
+/*-----------------------------------------------------------------
+ * Helper for block layer and dm core operations
+ *---------------------------------------------------------------*/
+void dm_dispatch_request(struct request *rq);
+void dm_requeue_unmapped_request(struct request *rq);
+void dm_kill_unmapped_request(struct request *rq, int error);
+int dm_underlying_device_busy(struct request_queue *q);
+
 #endif /* _LINUX_DEVICE_MAPPER_H */
index 48e44ee2b466b1a0da649057af1cbb5402dd61b4..2ab84c83c31a9d7d11b8c18ee744cb85ca520fd8 100644 (file)
@@ -123,6 +123,16 @@ struct dm_ioctl {
        __u32 target_count;     /* in/out */
        __s32 open_count;       /* out */
        __u32 flags;            /* in/out */
+
+       /*
+        * event_nr holds either the event number (input and output) or the
+        * udev cookie value (input only).
+        * The DM_DEV_WAIT ioctl takes an event number as input.
+        * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+        * use the field as a cookie to return in the DM_COOKIE
+        * variable with the uevents they issue.
+        * For output, the ioctls return the event number, not the cookie.
+        */
        __u32 event_nr;         /* in/out */
        __u32 padding;
 
@@ -256,9 +266,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       14
+#define DM_VERSION_MINOR       15
 #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2008-04-23)"
+#define DM_VERSION_EXTRA       "-ioctl (2009-04-01)"
 
 /* Status bits */
 #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
new file mode 100644 (file)
index 0000000..642e301
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_LOG_USERSPACE_H__
+#define __DM_LOG_USERSPACE_H__
+
+#include <linux/dm-ioctl.h> /* For DM_UUID_LEN */
+
+/*
+ * The device-mapper userspace log module consists of a kernel component and
+ * a user-space component.  The kernel component implements the API defined
+ * in dm-dirty-log.h.  Its purpose is simply to pass the parameters and
+ * return values of those API functions between kernel and user-space.
+ *
+ * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc.
+ * These request types represent the different functions in the device-mapper
+ * dirty log API.  Each of these is described in more detail below.
+ *
+ * The user-space program must listen for requests from the kernel (representing
+ * the various API functions) and process them.
+ *
+ * User-space begins by setting up the communication link (error checking
+ * removed for clarity):
+ *     fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ *     addr.nl_family = AF_NETLINK;
+ *     addr.nl_groups = CN_IDX_DM;
+ *     addr.nl_pid = 0;
+ *     r = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+ *     opt = addr.nl_groups;
+ *     setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
+ *
+ * User-space will then wait to receive requests form the kernel, which it
+ * will process as described below.  The requests are received in the form,
+ * ((struct dm_ulog_request) + (additional data)).  Depending on the request
+ * type, there may or may not be 'additional data'.  In the descriptions below,
+ * you will see 'Payload-to-userspace' and 'Payload-to-kernel'.  The
+ * 'Payload-to-userspace' is what the kernel sends in 'additional data' as
+ * necessary parameters to complete the request.  The 'Payload-to-kernel' is
+ * the 'additional data' returned to the kernel that contains the necessary
+ * results of the request.  The 'data_size' field in the dm_ulog_request
+ * structure denotes the availability and amount of payload data.
+ */
+
+/*
+ * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h):
+ * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
+ *           unsigned argc, char **argv);
+ *
+ * Payload-to-userspace:
+ *     A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *     None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * The UUID contained in the dm_ulog_request structure is the reference that
+ * will be used by all request types to a specific log.  The constructor must
+ * record this assotiation with instance created.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_CTR                    1
+
+/*
+ * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h):
+ * void (*dtr)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *     None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being destroyed.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_DTR                    2
+
+/*
+ * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*presuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being presuspended.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_PRESUSPEND             3
+
+/*
+ * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*postsuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being postsuspended.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_POSTSUSPEND            4
+
+/*
+ * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h):
+ * int (*resume)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being resumed.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_RESUME                 5
+
+/*
+ * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
+ * uint32_t (*get_region_size)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     uint64_t - contains the region size
+ *
+ * The region size is something that was determined at constructor time.
+ * It is returned in the payload area and 'data_size' is set to
+ * reflect this.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_REGION_SIZE        6
+
+/*
+ * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h):
+ * int (*is_clean)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t - the region to get clean status on
+ * Payload-to-kernel:
+ *     int64_t  - 1 if clean, 0 otherwise
+ *
+ * Payload is sizeof(uint64_t) and contains the region for which the clean
+ * status is being made.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or
+ * 1 (clean), setting 'data_size' and 'error' appropriately.
+ */
+#define DM_ULOG_IS_CLEAN               7
+
+/*
+ * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h):
+ * int (*in_sync)(struct dm_dirty_log *log, region_t region,
+ *               int can_block);
+ *
+ * Payload-to-userspace:
+ *     uint64_t - the region to get sync status on
+ * Payload-to-kernel:
+ *     int64_t - 1 if in-sync, 0 otherwise
+ *
+ * Exactly the same as 'is_clean' above, except this time asking "has the
+ * region been recovered?" vs. "is the region not being modified?"
+ */
+#define DM_ULOG_IN_SYNC                8
+
+/*
+ * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h):
+ * int (*flush)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * No incoming or outgoing payload.  Simply flush log state to disk.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_FLUSH                  9
+
+/*
+ * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*mark_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t [] - region(s) to mark
+ * Payload-to-kernel:
+ *     None.
+ *
+ * Incoming payload contains the one or more regions to mark dirty.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_MARK_REGION           10
+
+/*
+ * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*clear_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t [] - region(s) to clear
+ * Payload-to-kernel:
+ *     None.
+ *
+ * Incoming payload contains the one or more regions to mark clean.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_CLEAR_REGION          11
+
+/*
+ * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h):
+ * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     {
+ *             int64_t i; -- 1 if recovery necessary, 0 otherwise
+ *             uint64_t r; -- The region to recover if i=1
+ *     }
+ * 'data_size' should be set appropriately.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_RESYNC_WORK       12
+
+/*
+ * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h):
+ * void (*set_region_sync)(struct dm_dirty_log *log,
+ *                        region_t region, int in_sync);
+ *
+ * Payload-to-userspace:
+ *     {
+ *             uint64_t - region to set sync state on
+ *             int64_t  - 0 if not-in-sync, 1 if in-sync
+ *     }
+ * Payload-to-kernel:
+ *     None.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_SET_REGION_SYNC       13
+
+/*
+ * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h):
+ * region_t (*get_sync_count)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     uint64_t - the number of in-sync regions
+ *
+ * No incoming payload.  Kernel-bound payload contains the number of
+ * regions that are in-sync (in a size_t).
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_GET_SYNC_COUNT        14
+
+/*
+ * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO,
+ *              char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     Character string containing STATUSTYPE_INFO
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_INFO           15
+
+/*
+ * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE,
+ *              char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     Character string containing STATUSTYPE_TABLE
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_TABLE          16
+
+/*
+ * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h):
+ * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t - region to determine recovery status on
+ * Payload-to-kernel:
+ *     {
+ *             int64_t is_recovering;  -- 0 if no, 1 if yes
+ *             uint64_t in_sync_hint;  -- lowest region still needing resync
+ *     }
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_IS_REMOTE_RECOVERING  17
+
+/*
+ * (DM_ULOG_REQUEST_MASK & request_type) to get the request type
+ *
+ * Payload-to-userspace:
+ *     A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *     None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * We are reserving 8 bits of the 32-bit 'request_type' field for the
+ * various request types above.  The remaining 24-bits are currently
+ * set to zero and are reserved for future use and compatibility concerns.
+ *
+ * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the
+ * request type from the 'request_type' field to maintain forward compatibility.
+ */
+#define DM_ULOG_REQUEST_MASK 0xFF
+#define DM_ULOG_REQUEST_TYPE(request_type) \
+       (DM_ULOG_REQUEST_MASK & (request_type))
+
+struct dm_ulog_request {
+       char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
+       char padding[7];        /* Padding because DM_UUID_LEN = 129 */
+
+       int32_t error;          /* Used to report back processing errors */
+
+       uint32_t seq;           /* Sequence number for request */
+       uint32_t request_type;  /* DM_ULOG_* defined above */
+       uint32_t data_size;     /* How much data (not including this struct) */
+
+       char data[0];
+};
+
+#endif /* __DM_LOG_USERSPACE_H__ */
index 1731fb5fd775a3ee312b5c712fa38b33f12c5a9b..4a2b162c256aac373afa166638fbed5938b773db 100644 (file)
@@ -126,6 +126,8 @@ extern int free_irte(int irq);
 extern int irq_remapped(int irq);
 extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
+extern int set_ioapic_sid(struct irte *irte, int apic);
+extern int set_msi_sid(struct irte *irte, struct pci_dev *dev);
 #else
 static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
@@ -156,6 +158,15 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
 {
        return NULL;
 }
+static inline int set_ioapic_sid(struct irte *irte, int apic)
+{
+       return 0;
+}
+static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
+{
+       return 0;
+}
+
 #define irq_remapped(irq)              (0)
 #define enable_intr_remapping(mode)    (-1)
 #define disable_intr_remapping()       (0)
index 7894dd0f3b77544a8006ff1d4471554f460be093..ca1bfe90004fa5473f35b884ad0e33abbde6ef25 100644 (file)
@@ -103,10 +103,6 @@ struct ext3_inode_info {
         */
        struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       struct posix_acl        *i_acl;
-       struct posix_acl        *i_default_acl;
-#endif
 
        struct list_head i_orphan;      /* unlinked but open inodes */
 
index 8e912ab6a072603a8280a252e57ad6c99c129b7b..3c155107d61f3d2f4be840dd2d175651ae1c1aa9 100644 (file)
@@ -3,4 +3,25 @@
 
 #define FALLOC_FL_KEEP_SIZE    0x01 /* default is extend size */
 
+#ifdef __KERNEL__
+
+/*
+ * Space reservation ioctls and argument structure
+ * are designed to be compatible with the legacy XFS ioctls.
+ */
+struct space_resv {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start;
+       __s64           l_len;          /* len == 0 means until end of file */
+       __s32           l_sysid;
+       __u32           l_pid;
+       __s32           l_pad[4];       /* reserved area */
+};
+
+#define FS_IOC_RESVSP          _IOW('X', 40, struct space_resv)
+#define FS_IOC_RESVSP64                _IOW('X', 42, struct space_resv)
+
+#endif /* __KERNEL__ */
+
 #endif /* _FALLOC_H_ */
index 1ff5e4e019524124ece3f762f36b841b0d2e1d73..0872372184fe159a62475b48a99a1a6a26d8f1cd 100644 (file)
@@ -710,6 +710,9 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
 #define i_size_ordered_init(inode) do { } while (0)
 #endif
 
+struct posix_acl;
+#define ACL_NOT_CACHED ((void *)(-1))
+
 struct inode {
        struct hlist_node       i_hash;
        struct list_head        i_list;
@@ -772,6 +775,10 @@ struct inode {
        atomic_t                i_writecount;
 #ifdef CONFIG_SECURITY
        void                    *i_security;
+#endif
+#ifdef CONFIG_FS_POSIX_ACL
+       struct posix_acl        *i_acl;
+       struct posix_acl        *i_default_acl;
 #endif
        void                    *i_private; /* fs or device private pointer */
 };
@@ -1906,6 +1913,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
                       struct file *filp);
+extern int do_fallocate(struct file *file, int mode, loff_t offset,
+                       loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
                        int mode);
 extern struct file *filp_open(const char *, int, int);
@@ -1914,6 +1923,10 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
+/* fs/ioctl.c */
+
+extern int ioctl_preallocate(struct file *filp, void __user *argp);
+
 /* fs/dcache.c */
 extern void __init vfs_caches_init_early(void);
 extern void __init vfs_caches_init(unsigned long);
index 10d701eec484cb353508afa13efbf63d506e2759..b6a85183c33389e164d7d8f54390501e31c9d415 100644 (file)
@@ -175,16 +175,16 @@ struct icmp6_filter {
 
 
 extern void                            icmpv6_send(struct sk_buff *skb,
-                                                   int type, int code,
+                                                   u8 type, u8 code,
                                                    __u32 info, 
                                                    struct net_device *dev);
 
 extern int                             icmpv6_init(void);
-extern int                             icmpv6_err_convert(int type, int code,
+extern int                             icmpv6_err_convert(u8 type, u8 code,
                                                           int *err);
 extern void                            icmpv6_cleanup(void);
 extern void                            icmpv6_param_prob(struct sk_buff *skb,
-                                                         int code, int pos);
+                                                         u8 code, int pos);
 
 struct flowi;
 struct in6_addr;
index da5a5a1f4cd2ec17fb2a5f544cc71f8e800b2d10..b25d1b53df0dfdefce12db40c2592b3cb1f0f546 100644 (file)
@@ -258,6 +258,16 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 #define lockdep_set_subclass(lock, sub)        \
                lockdep_init_map(&(lock)->dep_map, #lock, \
                                 (lock)->dep_map.key, sub)
+/*
+ * Compare locking classes
+ */
+#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key)
+
+static inline int lockdep_match_key(struct lockdep_map *lock,
+                                   struct lock_class_key *key)
+{
+       return lock->key == key;
+}
 
 /*
  * Acquire a lock.
@@ -326,6 +336,11 @@ static inline void lockdep_on(void)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
                do { (void)(key); } while (0)
 #define lockdep_set_subclass(lock, sub)                do { } while (0)
+/*
+ * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
+ * case since the result is not well defined and the caller should rather
+ * #ifdef the call himself.
+ */
 
 # define INIT_LOCKDEP
 # define lockdep_reset()               do { debug_locks = 1; } while (0)
diff --git a/include/linux/max17040_battery.h b/include/linux/max17040_battery.h
new file mode 100644 (file)
index 0000000..ad97b06
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (C) 2009 Samsung Electronics
+ *  Minkyu Kang <mk7.kang@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAX17040_BATTERY_H_
+#define __MAX17040_BATTERY_H_
+
+struct max17040_platform_data {
+       int (*battery_online)(void);
+       int (*charger_online)(void);
+       int (*charger_enable)(void);
+};
+
+#endif
index b3646cd7fd5af6a823aafce45f2c3d8290689ab8..4391741b99dc15a4083c2b0063efbe77aacc2e61 100644 (file)
@@ -229,7 +229,6 @@ struct hotplug_params {
 extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
                                struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
-int acpi_root_bridge(acpi_handle handle);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(struct pci_bus *pbus);
 #endif
index 4bc241290c241e6668faa8debc9d5985b5a2d447..065a3652a3eaf92a8381db3f35e78f86e470a488 100644 (file)
@@ -83,4 +83,78 @@ extern int posix_acl_chmod_masq(struct posix_acl *, mode_t);
 extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern int set_posix_acl(struct inode *, int, struct posix_acl *);
 
+#ifdef CONFIG_FS_POSIX_ACL
+static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
+{
+       struct posix_acl **p, *acl;
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               p = &inode->i_acl;
+               break;
+       case ACL_TYPE_DEFAULT:
+               p = &inode->i_default_acl;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
+       acl = ACCESS_ONCE(*p);
+       if (acl) {
+               spin_lock(&inode->i_lock);
+               acl = *p;
+               if (acl != ACL_NOT_CACHED)
+                       acl = posix_acl_dup(acl);
+               spin_unlock(&inode->i_lock);
+       }
+       return acl;
+}
+
+static inline void set_cached_acl(struct inode *inode,
+                                 int type,
+                                 struct posix_acl *acl)
+{
+       struct posix_acl *old = NULL;
+       spin_lock(&inode->i_lock);
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               old = inode->i_acl;
+               inode->i_acl = posix_acl_dup(acl);
+               break;
+       case ACL_TYPE_DEFAULT:
+               old = inode->i_default_acl;
+               inode->i_default_acl = posix_acl_dup(acl);
+               break;
+       }
+       spin_unlock(&inode->i_lock);
+       if (old != ACL_NOT_CACHED)
+               posix_acl_release(old);
+}
+
+static inline void forget_cached_acl(struct inode *inode, int type)
+{
+       struct posix_acl *old = NULL;
+       spin_lock(&inode->i_lock);
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               old = inode->i_acl;
+               inode->i_acl = ACL_NOT_CACHED;
+               break;
+       case ACL_TYPE_DEFAULT:
+               old = inode->i_default_acl;
+               inode->i_default_acl = ACL_NOT_CACHED;
+               break;
+       }
+       spin_unlock(&inode->i_lock);
+       if (old != ACL_NOT_CACHED)
+               posix_acl_release(old);
+}
+#endif
+
+static inline void cache_no_acl(struct inode *inode)
+{
+#ifdef CONFIG_FS_POSIX_ACL
+       inode->i_acl = NULL;
+       inode->i_default_acl = NULL;
+#endif
+}
+
 #endif  /* __LINUX_POSIX_ACL_H */
index 8cc65757e47adfc476ed4b2ca2c1d020c06ab6bf..b4448853900ed85a17ff24a9ce074ab5f890572f 100644 (file)
@@ -56,15 +56,6 @@ int reiserfs_cache_default_acl(struct inode *dir);
 extern struct xattr_handler reiserfs_posix_acl_default_handler;
 extern struct xattr_handler reiserfs_posix_acl_access_handler;
 
-static inline void reiserfs_init_acl_access(struct inode *inode)
-{
-       REISERFS_I(inode)->i_acl_access = NULL;
-}
-
-static inline void reiserfs_init_acl_default(struct inode *inode)
-{
-       REISERFS_I(inode)->i_acl_default = NULL;
-}
 #else
 
 #define reiserfs_cache_default_acl(inode) 0
@@ -86,12 +77,4 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 {
        return 0;
 }
-
-static inline void reiserfs_init_acl_access(struct inode *inode)
-{
-}
-
-static inline void reiserfs_init_acl_default(struct inode *inode)
-{
-}
 #endif
index 76360b36ac3324040a5066cc15ddca9b5c2e98b0..89f4d3abbf5af501610edeac780448e94cc4de55 100644 (file)
@@ -54,10 +54,6 @@ struct reiserfs_inode_info {
        unsigned int i_trans_id;
        struct reiserfs_journal_list *i_jl;
        struct mutex i_mmap;
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-       struct posix_acl *i_acl_access;
-       struct posix_acl *i_acl_default;
-#endif
 #ifdef CONFIG_REISERFS_FS_XATTR
        struct rw_semaphore i_xattr_sem;
 #endif
index fd83f2584b157b5e76b593344c55f3381f149014..abff6c9b413c10c118e30dc537ea7b7f5e357c8d 100644 (file)
@@ -19,10 +19,6 @@ struct shmem_inode_info {
        swp_entry_t             i_direct[SHMEM_NR_DIRECT]; /* first blocks */
        struct list_head        swaplist;       /* chain of maybes on swap */
        struct inode            vfs_inode;
-#ifdef CONFIG_TMPFS_POSIX_ACL
-       struct posix_acl        *i_acl;
-       struct posix_acl        *i_default_acl;
-#endif
 };
 
 struct shmem_sb_info {
@@ -45,7 +41,6 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 #ifdef CONFIG_TMPFS_POSIX_ACL
 int shmem_permission(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
-void shmem_acl_destroy_inode(struct inode *);
 
 extern struct xattr_handler shmem_xattr_acl_access_handler;
 extern struct xattr_handler shmem_xattr_acl_default_handler;
@@ -57,9 +52,6 @@ static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
 {
        return 0;
 }
-static inline void shmem_acl_destroy_inode(struct inode *inode)
-{
-}
 #endif  /* CONFIG_TMPFS_POSIX_ACL */
 
 #endif
index ffa5b8b1f1df7c1c92d3e679c3ffacde656faae4..1089d5aabd4990f8b028e6fb3c7db8f82195950d 100644 (file)
@@ -53,7 +53,7 @@ struct inet6_protocol
 
        void    (*err_handler)(struct sk_buff *skb,
                               struct inet6_skb_parm *opt,
-                              int type, int code, int offset,
+                              u8 type, u8 code, int offset,
                               __be32 info);
 
        int     (*gso_send_check)(struct sk_buff *skb);
index 8a22599f26ba9ce4fe7b9e649e23321fc359263d..f6b9b830df8c445e3ce88f0ff45d27fc07908c2c 100644 (file)
@@ -6,7 +6,7 @@
 #include <net/protocol.h>
 
 void raw6_icmp_error(struct sk_buff *, int nexthdr,
-               int type, int code, int inner_offset, __be32);
+               u8 type, u8 code, int inner_offset, __be32);
 int raw6_local_deliver(struct sk_buff *, int);
 
 extern int                     rawv6_rcv(struct sock *sk,
index 9f80a766828923a8b41ad0588d91b40fcf12a8be..d16a304cbed4fb74b9de069d5eb41788c7a13d0d 100644 (file)
@@ -448,6 +448,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
        struct sctp_ulpevent *event = sctp_skb2event(skb);
 
+       skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = sctp_sock_rfree;
        atomic_add(event->rmem_len, &sk->sk_rmem_alloc);
index 07133c5e986867cd3ab7affbb1a37dd85cb4801c..352f06bbd7a9b14b0257b8014d4ecf3027aad44e 100644 (file)
@@ -1252,6 +1252,7 @@ static inline int sk_has_allocations(const struct sock *sk)
 
 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 {
+       skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = sock_wfree;
        /*
@@ -1264,6 +1265,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 
 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
+       skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = sock_rfree;
        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
index 736bca4508862c668675fff1f03e9d462cb08674..9e3a3f4c1f6041bb3447392695f308cfcc5b2cc0 100644 (file)
@@ -1274,7 +1274,7 @@ struct xfrm_tunnel {
 struct xfrm6_tunnel {
        int (*handler)(struct sk_buff *skb);
        int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                          int type, int code, int offset, __be32 info);
+                          u8 type, u8 code, int offset, __be32 info);
        struct xfrm6_tunnel *next;
        int priority;
 };
index 4870dfeb9ee57798abd057eb74a5ac11497ff097..2c5ade79eb81718702dd1fdf44e8b129e69f88a9 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/smp_lock.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
+#include <linux/acpi.h>
 #include <linux/tty.h>
 #include <linux/gfp.h>
 #include <linux/percpu.h>
@@ -88,11 +89,6 @@ extern void sbus_init(void);
 extern void prio_tree_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
-#ifdef CONFIG_ACPI
-extern void acpi_early_init(void);
-#else
-static inline void acpi_early_init(void) { }
-#endif
 #ifndef CONFIG_DEBUG_RODATA
 static inline void mark_rodata_ro(void) { }
 #endif
index 0a32cb21ec9752fc689020795224280d12eb09a8..780c8dcf45168ece4ae23ca1917afdff935024d9 100644 (file)
@@ -69,7 +69,7 @@ obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
-obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
+obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
index 9442c3533ba9f8be398333d8ee7b74882b339ae0..defc2e6f1e3bc9742606d7efc0a0af4e79095b14 100644 (file)
@@ -115,9 +115,6 @@ static atomic_t    audit_lost = ATOMIC_INIT(0);
 /* The netlink socket. */
 static struct sock *audit_sock;
 
-/* Inotify handle. */
-struct inotify_handle *audit_ih;
-
 /* Hash for inode-based rules */
 struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
 
@@ -136,7 +133,7 @@ static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
 static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
 
 /* Serialize requests from userspace. */
-static DEFINE_MUTEX(audit_cmd_mutex);
+DEFINE_MUTEX(audit_cmd_mutex);
 
 /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
  * audit records.  Since printk uses a 1024 byte buffer, this buffer
@@ -375,6 +372,25 @@ static void audit_hold_skb(struct sk_buff *skb)
                kfree_skb(skb);
 }
 
+/*
+ * For one reason or another this nlh isn't getting delivered to the userspace
+ * audit daemon, just send it to printk.
+ */
+static void audit_printk_skb(struct sk_buff *skb)
+{
+       struct nlmsghdr *nlh = nlmsg_hdr(skb);
+       char *data = NLMSG_DATA(nlh);
+
+       if (nlh->nlmsg_type != AUDIT_EOE) {
+               if (printk_ratelimit())
+                       printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, data);
+               else
+                       audit_log_lost("printk limit exceeded\n");
+       }
+
+       audit_hold_skb(skb);
+}
+
 static void kauditd_send_skb(struct sk_buff *skb)
 {
        int err;
@@ -427,14 +443,8 @@ static int kauditd_thread(void *dummy)
                if (skb) {
                        if (audit_pid)
                                kauditd_send_skb(skb);
-                       else {
-                               if (printk_ratelimit())
-                                       printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0));
-                               else
-                                       audit_log_lost("printk limit exceeded\n");
-
-                               audit_hold_skb(skb);
-                       }
+                       else
+                               audit_printk_skb(skb);
                } else {
                        DECLARE_WAITQUEUE(wait, current);
                        set_current_state(TASK_INTERRUPTIBLE);
@@ -495,42 +505,25 @@ int audit_send_list(void *_dest)
        return 0;
 }
 
-#ifdef CONFIG_AUDIT_TREE
-static int prune_tree_thread(void *unused)
-{
-       mutex_lock(&audit_cmd_mutex);
-       audit_prune_trees();
-       mutex_unlock(&audit_cmd_mutex);
-       return 0;
-}
-
-void audit_schedule_prune(void)
-{
-       kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
-}
-#endif
-
 struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
                                 int multi, void *payload, int size)
 {
        struct sk_buff  *skb;
        struct nlmsghdr *nlh;
-       int             len = NLMSG_SPACE(size);
        void            *data;
        int             flags = multi ? NLM_F_MULTI : 0;
        int             t     = done  ? NLMSG_DONE  : type;
 
-       skb = alloc_skb(len, GFP_KERNEL);
+       skb = nlmsg_new(size, GFP_KERNEL);
        if (!skb)
                return NULL;
 
-       nlh              = NLMSG_PUT(skb, pid, seq, t, size);
-       nlh->nlmsg_flags = flags;
-       data             = NLMSG_DATA(nlh);
+       nlh     = NLMSG_NEW(skb, pid, seq, t, size, flags);
+       data    = NLMSG_DATA(nlh);
        memcpy(data, payload, size);
        return skb;
 
-nlmsg_failure:                 /* Used by NLMSG_PUT */
+nlmsg_failure:                 /* Used by NLMSG_NEW */
        if (skb)
                kfree_skb(skb);
        return NULL;
@@ -926,28 +919,29 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 }
 
 /*
- * Get message from skb (based on rtnetlink_rcv_skb).  Each message is
- * processed by audit_receive_msg.  Malformed skbs with wrong length are
- * discarded silently.
+ * Get message from skb.  Each message is processed by audit_receive_msg.
+ * Malformed skbs with wrong length are discarded silently.
  */
 static void audit_receive_skb(struct sk_buff *skb)
 {
-       int             err;
-       struct nlmsghdr *nlh;
-       u32             rlen;
+       struct nlmsghdr *nlh;
+       /*
+        * len MUST be signed for NLMSG_NEXT to be able to dec it below 0
+        * if the nlmsg_len was not aligned
+        */
+       int len;
+       int err;
 
-       while (skb->len >= NLMSG_SPACE(0)) {
-               nlh = nlmsg_hdr(skb);
-               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
-                       return;
-               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-               if (rlen > skb->len)
-                       rlen = skb->len;
-               if ((err = audit_receive_msg(skb, nlh))) {
+       nlh = nlmsg_hdr(skb);
+       len = skb->len;
+
+       while (NLMSG_OK(nlh, len)) {
+               err = audit_receive_msg(skb, nlh);
+               /* if err or if this message says it wants a response */
+               if (err || (nlh->nlmsg_flags & NLM_F_ACK))
                        netlink_ack(skb, nlh, err);
-               } else if (nlh->nlmsg_flags & NLM_F_ACK)
-                       netlink_ack(skb, nlh, 0);
-               skb_pull(skb, rlen);
+
+               nlh = NLMSG_NEXT(nlh, len);
        }
 }
 
@@ -959,13 +953,6 @@ static void audit_receive(struct sk_buff  *skb)
        mutex_unlock(&audit_cmd_mutex);
 }
 
-#ifdef CONFIG_AUDITSYSCALL
-static const struct inotify_operations audit_inotify_ops = {
-       .handle_event   = audit_handle_ievent,
-       .destroy_watch  = audit_free_parent,
-};
-#endif
-
 /* Initialize audit support at boot time. */
 static int __init audit_init(void)
 {
@@ -991,12 +978,6 @@ static int __init audit_init(void)
 
        audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
 
-#ifdef CONFIG_AUDITSYSCALL
-       audit_ih = inotify_init(&audit_inotify_ops);
-       if (IS_ERR(audit_ih))
-               audit_panic("cannot initialize inotify handle");
-#endif
-
        for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
                INIT_LIST_HEAD(&audit_inode_hash[i]);
 
@@ -1070,18 +1051,20 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
                        goto err;
        }
 
-       ab->skb = alloc_skb(AUDIT_BUFSIZ, gfp_mask);
-       if (!ab->skb)
-               goto err;
-
        ab->ctx = ctx;
        ab->gfp_mask = gfp_mask;
-       nlh = (struct nlmsghdr *)skb_put(ab->skb, NLMSG_SPACE(0));
-       nlh->nlmsg_type = type;
-       nlh->nlmsg_flags = 0;
-       nlh->nlmsg_pid = 0;
-       nlh->nlmsg_seq = 0;
+
+       ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask);
+       if (!ab->skb)
+               goto nlmsg_failure;
+
+       nlh = NLMSG_NEW(ab->skb, 0, 0, type, 0, 0);
+
        return ab;
+
+nlmsg_failure:                  /* Used by NLMSG_NEW */
+       kfree_skb(ab->skb);
+       ab->skb = NULL;
 err:
        audit_buffer_free(ab);
        return NULL;
@@ -1452,6 +1435,15 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
        kfree(pathname);
 }
 
+void audit_log_key(struct audit_buffer *ab, char *key)
+{
+       audit_log_format(ab, " key=");
+       if (key)
+               audit_log_untrustedstring(ab, key);
+       else
+               audit_log_format(ab, "(null)");
+}
+
 /**
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
@@ -1475,15 +1467,7 @@ void audit_log_end(struct audit_buffer *ab)
                        skb_queue_tail(&audit_skb_queue, ab->skb);
                        wake_up_interruptible(&kauditd_wait);
                } else {
-                       if (nlh->nlmsg_type != AUDIT_EOE) {
-                               if (printk_ratelimit()) {
-                                       printk(KERN_NOTICE "type=%d %s\n",
-                                               nlh->nlmsg_type,
-                                               ab->skb->data + NLMSG_SPACE(0));
-                               } else
-                                       audit_log_lost("printk limit exceeded\n");
-                       }
-                       audit_hold_skb(ab->skb);
+                       audit_printk_skb(ab->skb);
                }
                ab->skb = NULL;
        }
index 16f18cac661b68261c20c551caa3a94333df3da0..208687be4f30c8a2ddb432e3c1b8594fb85d2e8e 100644 (file)
@@ -53,18 +53,7 @@ enum audit_state {
 };
 
 /* Rule lists */
-struct audit_parent;
-
-struct audit_watch {
-       atomic_t                count;  /* reference count */
-       char                    *path;  /* insertion path */
-       dev_t                   dev;    /* associated superblock device */
-       unsigned long           ino;    /* associated inode number */
-       struct audit_parent     *parent; /* associated parent */
-       struct list_head        wlist;  /* entry in parent->watches list */
-       struct list_head        rules;  /* associated rules */
-};
-
+struct audit_watch;
 struct audit_tree;
 struct audit_chunk;
 
@@ -108,19 +97,28 @@ struct audit_netlink_list {
 
 int audit_send_list(void *);
 
-struct inotify_watch;
-/* Inotify handle */
-extern struct inotify_handle *audit_ih;
-
-extern void audit_free_parent(struct inotify_watch *);
-extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
-                               const char *, struct inode *);
 extern int selinux_audit_rule_update(void);
 
 extern struct mutex audit_filter_mutex;
 extern void audit_free_rule_rcu(struct rcu_head *);
 extern struct list_head audit_filter_list[];
 
+/* audit watch functions */
+extern unsigned long audit_watch_inode(struct audit_watch *watch);
+extern dev_t audit_watch_dev(struct audit_watch *watch);
+extern void audit_put_watch(struct audit_watch *watch);
+extern void audit_get_watch(struct audit_watch *watch);
+extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
+extern int audit_add_watch(struct audit_krule *krule);
+extern void audit_remove_watch(struct audit_watch *watch);
+extern void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list);
+extern void audit_inotify_unregister(struct list_head *in_list);
+extern char *audit_watch_path(struct audit_watch *watch);
+extern struct list_head *audit_watch_rules(struct audit_watch *watch);
+
+extern struct audit_entry *audit_dupe_rule(struct audit_krule *old,
+                                          struct audit_watch *watch);
+
 #ifdef CONFIG_AUDIT_TREE
 extern struct audit_chunk *audit_tree_lookup(const struct inode *);
 extern void audit_put_chunk(struct audit_chunk *);
@@ -130,10 +128,9 @@ extern int audit_add_tree_rule(struct audit_krule *);
 extern int audit_remove_tree_rule(struct audit_krule *);
 extern void audit_trim_trees(void);
 extern int audit_tag_tree(char *old, char *new);
-extern void audit_schedule_prune(void);
-extern void audit_prune_trees(void);
 extern const char *audit_tree_path(struct audit_tree *);
 extern void audit_put_tree(struct audit_tree *);
+extern void audit_kill_trees(struct list_head *);
 #else
 #define audit_remove_tree_rule(rule) BUG()
 #define audit_add_tree_rule(rule) -EINVAL
@@ -142,6 +139,7 @@ extern void audit_put_tree(struct audit_tree *);
 #define audit_put_tree(tree) (void)0
 #define audit_tag_tree(old, new) -EINVAL
 #define audit_tree_path(rule) ""       /* never called */
+#define audit_kill_trees(list) BUG()
 #endif
 
 extern char *audit_unpack_string(void **, size_t *, size_t);
@@ -160,7 +158,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
        return 0;
 }
 extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
+extern struct list_head *audit_killed_trees(void);
 #else
 #define audit_signal_info(s,t) AUDIT_DISABLED
 #define audit_filter_inodes(t,c) AUDIT_DISABLED
 #endif
+
+extern struct mutex audit_cmd_mutex;
index 1f6396d766874b1f28577a44812b0d097106cf88..2451dc6f328211d173747865cde60fd49b6c9bbd 100644 (file)
@@ -2,6 +2,7 @@
 #include <linux/inotify.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/kthread.h>
 
 struct audit_tree;
 struct audit_chunk;
@@ -441,13 +442,11 @@ static void kill_rules(struct audit_tree *tree)
                if (rule->tree) {
                        /* not a half-baked one */
                        ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
-                       audit_log_format(ab, "op=remove rule dir=");
+                       audit_log_format(ab, "op=");
+                       audit_log_string(ab, "remove rule");
+                       audit_log_format(ab, " dir=");
                        audit_log_untrustedstring(ab, rule->tree->pathname);
-                       if (rule->filterkey) {
-                               audit_log_format(ab, " key=");
-                               audit_log_untrustedstring(ab, rule->filterkey);
-                       } else
-                               audit_log_format(ab, " key=(null)");
+                       audit_log_key(ab, rule->filterkey);
                        audit_log_format(ab, " list=%d res=1", rule->listnr);
                        audit_log_end(ab);
                        rule->tree = NULL;
@@ -519,6 +518,8 @@ static void trim_marked(struct audit_tree *tree)
        }
 }
 
+static void audit_schedule_prune(void);
+
 /* called with audit_filter_mutex */
 int audit_remove_tree_rule(struct audit_krule *rule)
 {
@@ -824,10 +825,11 @@ int audit_tag_tree(char *old, char *new)
 
 /*
  * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread, with audit_cmd_mutex held.
+ * Runs from a separate thread.
  */
-void audit_prune_trees(void)
+static int prune_tree_thread(void *unused)
 {
+       mutex_lock(&audit_cmd_mutex);
        mutex_lock(&audit_filter_mutex);
 
        while (!list_empty(&prune_list)) {
@@ -844,6 +846,40 @@ void audit_prune_trees(void)
        }
 
        mutex_unlock(&audit_filter_mutex);
+       mutex_unlock(&audit_cmd_mutex);
+       return 0;
+}
+
+static void audit_schedule_prune(void)
+{
+       kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+}
+
+/*
+ * ... and that one is done if evict_chunk() decides to delay until the end
+ * of syscall.  Runs synchronously.
+ */
+void audit_kill_trees(struct list_head *list)
+{
+       mutex_lock(&audit_cmd_mutex);
+       mutex_lock(&audit_filter_mutex);
+
+       while (!list_empty(list)) {
+               struct audit_tree *victim;
+
+               victim = list_entry(list->next, struct audit_tree, list);
+               kill_rules(victim);
+               list_del_init(&victim->list);
+
+               mutex_unlock(&audit_filter_mutex);
+
+               prune_one(victim);
+
+               mutex_lock(&audit_filter_mutex);
+       }
+
+       mutex_unlock(&audit_filter_mutex);
+       mutex_unlock(&audit_cmd_mutex);
 }
 
 /*
@@ -854,6 +890,8 @@ void audit_prune_trees(void)
 static void evict_chunk(struct audit_chunk *chunk)
 {
        struct audit_tree *owner;
+       struct list_head *postponed = audit_killed_trees();
+       int need_prune = 0;
        int n;
 
        if (chunk->dead)
@@ -869,15 +907,21 @@ static void evict_chunk(struct audit_chunk *chunk)
                owner->root = NULL;
                list_del_init(&owner->same_root);
                spin_unlock(&hash_lock);
-               kill_rules(owner);
-               list_move(&owner->list, &prune_list);
-               audit_schedule_prune();
+               if (!postponed) {
+                       kill_rules(owner);
+                       list_move(&owner->list, &prune_list);
+                       need_prune = 1;
+               } else {
+                       list_move(&owner->list, postponed);
+               }
                spin_lock(&hash_lock);
        }
        list_del_rcu(&chunk->hash);
        for (n = 0; n < chunk->count; n++)
                list_del_init(&chunk->owners[n].list);
        spin_unlock(&hash_lock);
+       if (need_prune)
+               audit_schedule_prune();
        mutex_unlock(&audit_filter_mutex);
 }
 
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
new file mode 100644 (file)
index 0000000..0e96dbc
--- /dev/null
@@ -0,0 +1,543 @@
+/* audit_watch.c -- watching inodes
+ *
+ * Copyright 2003-2009 Red Hat, Inc.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright 2005 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/audit.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/netlink.h>
+#include <linux/sched.h>
+#include <linux/inotify.h>
+#include <linux/security.h>
+#include "audit.h"
+
+/*
+ * Reference counting:
+ *
+ * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
+ *     event.  Each audit_watch holds a reference to its associated parent.
+ *
+ * audit_watch: if added to lists, lifetime is from audit_init_watch() to
+ *     audit_remove_watch().  Additionally, an audit_watch may exist
+ *     temporarily to assist in searching existing filter data.  Each
+ *     audit_krule holds a reference to its associated watch.
+ */
+
+struct audit_watch {
+       atomic_t                count;  /* reference count */
+       char                    *path;  /* insertion path */
+       dev_t                   dev;    /* associated superblock device */
+       unsigned long           ino;    /* associated inode number */
+       struct audit_parent     *parent; /* associated parent */
+       struct list_head        wlist;  /* entry in parent->watches list */
+       struct list_head        rules;  /* associated rules */
+};
+
+struct audit_parent {
+       struct list_head        ilist;  /* entry in inotify registration list */
+       struct list_head        watches; /* associated watches */
+       struct inotify_watch    wdata;  /* inotify watch data */
+       unsigned                flags;  /* status flags */
+};
+
+/* Inotify handle. */
+struct inotify_handle *audit_ih;
+
+/*
+ * audit_parent status flags:
+ *
+ * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
+ * a filesystem event to ensure we're adding audit watches to a valid parent.
+ * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
+ * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
+ * we can receive while holding nameidata.
+ */
+#define AUDIT_PARENT_INVALID   0x001
+
+/* Inotify events we care about. */
+#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
+
+static void audit_free_parent(struct inotify_watch *i_watch)
+{
+       struct audit_parent *parent;
+
+       parent = container_of(i_watch, struct audit_parent, wdata);
+       WARN_ON(!list_empty(&parent->watches));
+       kfree(parent);
+}
+
+void audit_get_watch(struct audit_watch *watch)
+{
+       atomic_inc(&watch->count);
+}
+
+void audit_put_watch(struct audit_watch *watch)
+{
+       if (atomic_dec_and_test(&watch->count)) {
+               WARN_ON(watch->parent);
+               WARN_ON(!list_empty(&watch->rules));
+               kfree(watch->path);
+               kfree(watch);
+       }
+}
+
+void audit_remove_watch(struct audit_watch *watch)
+{
+       list_del(&watch->wlist);
+       put_inotify_watch(&watch->parent->wdata);
+       watch->parent = NULL;
+       audit_put_watch(watch); /* match initial get */
+}
+
+char *audit_watch_path(struct audit_watch *watch)
+{
+       return watch->path;
+}
+
+struct list_head *audit_watch_rules(struct audit_watch *watch)
+{
+       return &watch->rules;
+}
+
+unsigned long audit_watch_inode(struct audit_watch *watch)
+{
+       return watch->ino;
+}
+
+dev_t audit_watch_dev(struct audit_watch *watch)
+{
+       return watch->dev;
+}
+
+/* Initialize a parent watch entry. */
+static struct audit_parent *audit_init_parent(struct nameidata *ndp)
+{
+       struct audit_parent *parent;
+       s32 wd;
+
+       parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+       if (unlikely(!parent))
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&parent->watches);
+       parent->flags = 0;
+
+       inotify_init_watch(&parent->wdata);
+       /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
+       get_inotify_watch(&parent->wdata);
+       wd = inotify_add_watch(audit_ih, &parent->wdata,
+                              ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
+       if (wd < 0) {
+               audit_free_parent(&parent->wdata);
+               return ERR_PTR(wd);
+       }
+
+       return parent;
+}
+
+/* Initialize a watch entry. */
+static struct audit_watch *audit_init_watch(char *path)
+{
+       struct audit_watch *watch;
+
+       watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+       if (unlikely(!watch))
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&watch->rules);
+       atomic_set(&watch->count, 1);
+       watch->path = path;
+       watch->dev = (dev_t)-1;
+       watch->ino = (unsigned long)-1;
+
+       return watch;
+}
+
+/* Translate a watch string to kernel respresentation. */
+int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
+{
+       struct audit_watch *watch;
+
+       if (!audit_ih)
+               return -EOPNOTSUPP;
+
+       if (path[0] != '/' || path[len-1] == '/' ||
+           krule->listnr != AUDIT_FILTER_EXIT ||
+           op != Audit_equal ||
+           krule->inode_f || krule->watch || krule->tree)
+               return -EINVAL;
+
+       watch = audit_init_watch(path);
+       if (IS_ERR(watch))
+               return PTR_ERR(watch);
+
+       audit_get_watch(watch);
+       krule->watch = watch;
+
+       return 0;
+}
+
+/* Duplicate the given audit watch.  The new watch's rules list is initialized
+ * to an empty list and wlist is undefined. */
+static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
+{
+       char *path;
+       struct audit_watch *new;
+
+       path = kstrdup(old->path, GFP_KERNEL);
+       if (unlikely(!path))
+               return ERR_PTR(-ENOMEM);
+
+       new = audit_init_watch(path);
+       if (IS_ERR(new)) {
+               kfree(path);
+               goto out;
+       }
+
+       new->dev = old->dev;
+       new->ino = old->ino;
+       get_inotify_watch(&old->parent->wdata);
+       new->parent = old->parent;
+
+out:
+       return new;
+}
+
+static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
+{
+       if (audit_enabled) {
+               struct audit_buffer *ab;
+               ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+               audit_log_format(ab, "auid=%u ses=%u op=",
+                                audit_get_loginuid(current),
+                                audit_get_sessionid(current));
+               audit_log_string(ab, op);
+               audit_log_format(ab, " path=");
+               audit_log_untrustedstring(ab, w->path);
+               audit_log_key(ab, r->filterkey);
+               audit_log_format(ab, " list=%d res=1", r->listnr);
+               audit_log_end(ab);
+       }
+}
+
+/* Update inode info in audit rules based on filesystem event. */
+static void audit_update_watch(struct audit_parent *parent,
+                              const char *dname, dev_t dev,
+                              unsigned long ino, unsigned invalidating)
+{
+       struct audit_watch *owatch, *nwatch, *nextw;
+       struct audit_krule *r, *nextr;
+       struct audit_entry *oentry, *nentry;
+
+       mutex_lock(&audit_filter_mutex);
+       list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
+               if (audit_compare_dname_path(dname, owatch->path, NULL))
+                       continue;
+
+               /* If the update involves invalidating rules, do the inode-based
+                * filtering now, so we don't omit records. */
+               if (invalidating && current->audit_context)
+                       audit_filter_inodes(current, current->audit_context);
+
+               nwatch = audit_dupe_watch(owatch);
+               if (IS_ERR(nwatch)) {
+                       mutex_unlock(&audit_filter_mutex);
+                       audit_panic("error updating watch, skipping");
+                       return;
+               }
+               nwatch->dev = dev;
+               nwatch->ino = ino;
+
+               list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
+
+                       oentry = container_of(r, struct audit_entry, rule);
+                       list_del(&oentry->rule.rlist);
+                       list_del_rcu(&oentry->list);
+
+                       nentry = audit_dupe_rule(&oentry->rule, nwatch);
+                       if (IS_ERR(nentry)) {
+                               list_del(&oentry->rule.list);
+                               audit_panic("error updating watch, removing");
+                       } else {
+                               int h = audit_hash_ino((u32)ino);
+                               list_add(&nentry->rule.rlist, &nwatch->rules);
+                               list_add_rcu(&nentry->list, &audit_inode_hash[h]);
+                               list_replace(&oentry->rule.list,
+                                            &nentry->rule.list);
+                       }
+
+                       audit_watch_log_rule_change(r, owatch, "updated rules");
+
+                       call_rcu(&oentry->rcu, audit_free_rule_rcu);
+               }
+
+               audit_remove_watch(owatch);
+               goto add_watch_to_parent; /* event applies to a single watch */
+       }
+       mutex_unlock(&audit_filter_mutex);
+       return;
+
+add_watch_to_parent:
+       list_add(&nwatch->wlist, &parent->watches);
+       mutex_unlock(&audit_filter_mutex);
+       return;
+}
+
+/* Remove all watches & rules associated with a parent that is going away. */
+static void audit_remove_parent_watches(struct audit_parent *parent)
+{
+       struct audit_watch *w, *nextw;
+       struct audit_krule *r, *nextr;
+       struct audit_entry *e;
+
+       mutex_lock(&audit_filter_mutex);
+       parent->flags |= AUDIT_PARENT_INVALID;
+       list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
+               list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
+                       e = container_of(r, struct audit_entry, rule);
+                       audit_watch_log_rule_change(r, w, "remove rule");
+                       list_del(&r->rlist);
+                       list_del(&r->list);
+                       list_del_rcu(&e->list);
+                       call_rcu(&e->rcu, audit_free_rule_rcu);
+               }
+               audit_remove_watch(w);
+       }
+       mutex_unlock(&audit_filter_mutex);
+}
+
+/* Unregister inotify watches for parents on in_list.
+ * Generates an IN_IGNORED event. */
+void audit_inotify_unregister(struct list_head *in_list)
+{
+       struct audit_parent *p, *n;
+
+       list_for_each_entry_safe(p, n, in_list, ilist) {
+               list_del(&p->ilist);
+               inotify_rm_watch(audit_ih, &p->wdata);
+               /* the unpin matching the pin in audit_do_del_rule() */
+               unpin_inotify_watch(&p->wdata);
+       }
+}
+
+/* Get path information necessary for adding watches. */
+static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw)
+{
+       struct nameidata *ndparent, *ndwatch;
+       int err;
+
+       ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
+       if (unlikely(!ndparent))
+               return -ENOMEM;
+
+       ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
+       if (unlikely(!ndwatch)) {
+               kfree(ndparent);
+               return -ENOMEM;
+       }
+
+       err = path_lookup(path, LOOKUP_PARENT, ndparent);
+       if (err) {
+               kfree(ndparent);
+               kfree(ndwatch);
+               return err;
+       }
+
+       err = path_lookup(path, 0, ndwatch);
+       if (err) {
+               kfree(ndwatch);
+               ndwatch = NULL;
+       }
+
+       *ndp = ndparent;
+       *ndw = ndwatch;
+
+       return 0;
+}
+
+/* Release resources used for watch path information. */
+static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
+{
+       if (ndp) {
+               path_put(&ndp->path);
+               kfree(ndp);
+       }
+       if (ndw) {
+               path_put(&ndw->path);
+               kfree(ndw);
+       }
+}
+
+/* Associate the given rule with an existing parent inotify_watch.
+ * Caller must hold audit_filter_mutex. */
+static void audit_add_to_parent(struct audit_krule *krule,
+                               struct audit_parent *parent)
+{
+       struct audit_watch *w, *watch = krule->watch;
+       int watch_found = 0;
+
+       list_for_each_entry(w, &parent->watches, wlist) {
+               if (strcmp(watch->path, w->path))
+                       continue;
+
+               watch_found = 1;
+
+               /* put krule's and initial refs to temporary watch */
+               audit_put_watch(watch);
+               audit_put_watch(watch);
+
+               audit_get_watch(w);
+               krule->watch = watch = w;
+               break;
+       }
+
+       if (!watch_found) {
+               get_inotify_watch(&parent->wdata);
+               watch->parent = parent;
+
+               list_add(&watch->wlist, &parent->watches);
+       }
+       list_add(&krule->rlist, &watch->rules);
+}
+
+/* Find a matching watch entry, or add this one.
+ * Caller must hold audit_filter_mutex. */
+int audit_add_watch(struct audit_krule *krule)
+{
+       struct audit_watch *watch = krule->watch;
+       struct inotify_watch *i_watch;
+       struct audit_parent *parent;
+       struct nameidata *ndp = NULL, *ndw = NULL;
+       int ret = 0;
+
+       mutex_unlock(&audit_filter_mutex);
+
+       /* Avoid calling path_lookup under audit_filter_mutex. */
+       ret = audit_get_nd(watch->path, &ndp, &ndw);
+       if (ret) {
+               /* caller expects mutex locked */
+               mutex_lock(&audit_filter_mutex);
+               goto error;
+       }
+
+       /* update watch filter fields */
+       if (ndw) {
+               watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
+               watch->ino = ndw->path.dentry->d_inode->i_ino;
+       }
+
+       /* The audit_filter_mutex must not be held during inotify calls because
+        * we hold it during inotify event callback processing.  If an existing
+        * inotify watch is found, inotify_find_watch() grabs a reference before
+        * returning.
+        */
+       if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
+                              &i_watch) < 0) {
+               parent = audit_init_parent(ndp);
+               if (IS_ERR(parent)) {
+                       /* caller expects mutex locked */
+                       mutex_lock(&audit_filter_mutex);
+                       ret = PTR_ERR(parent);
+                       goto error;
+               }
+       } else
+               parent = container_of(i_watch, struct audit_parent, wdata);
+
+       mutex_lock(&audit_filter_mutex);
+
+       /* parent was moved before we took audit_filter_mutex */
+       if (parent->flags & AUDIT_PARENT_INVALID)
+               ret = -ENOENT;
+       else
+               audit_add_to_parent(krule, parent);
+
+       /* match get in audit_init_parent or inotify_find_watch */
+       put_inotify_watch(&parent->wdata);
+
+error:
+       audit_put_nd(ndp, ndw);         /* NULL args OK */
+       return ret;
+
+}
+
+void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list)
+{
+       struct audit_watch *watch = krule->watch;
+       struct audit_parent *parent = watch->parent;
+
+       list_del(&krule->rlist);
+
+       if (list_empty(&watch->rules)) {
+               audit_remove_watch(watch);
+
+               if (list_empty(&parent->watches)) {
+                       /* Put parent on the inotify un-registration
+                        * list.  Grab a reference before releasing
+                        * audit_filter_mutex, to be released in
+                        * audit_inotify_unregister().
+                        * If filesystem is going away, just leave
+                        * the sucker alone, eviction will take
+                        * care of it. */
+                       if (pin_inotify_watch(&parent->wdata))
+                               list_add(&parent->ilist, list);
+               }
+       }
+}
+
+/* Update watch data in audit rules based on inotify events. */
+static void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
+                        u32 cookie, const char *dname, struct inode *inode)
+{
+       struct audit_parent *parent;
+
+       parent = container_of(i_watch, struct audit_parent, wdata);
+
+       if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
+               audit_update_watch(parent, dname, inode->i_sb->s_dev,
+                                  inode->i_ino, 0);
+       else if (mask & (IN_DELETE|IN_MOVED_FROM))
+               audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
+       /* inotify automatically removes the watch and sends IN_IGNORED */
+       else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
+               audit_remove_parent_watches(parent);
+       /* inotify does not remove the watch, so remove it manually */
+       else if(mask & IN_MOVE_SELF) {
+               audit_remove_parent_watches(parent);
+               inotify_remove_watch_locked(audit_ih, i_watch);
+       } else if (mask & IN_IGNORED)
+               put_inotify_watch(i_watch);
+}
+
+static const struct inotify_operations audit_inotify_ops = {
+       .handle_event   = audit_handle_ievent,
+       .destroy_watch  = audit_free_parent,
+};
+
+static int __init audit_watch_init(void)
+{
+       audit_ih = inotify_init(&audit_inotify_ops);
+       if (IS_ERR(audit_ih))
+               audit_panic("cannot initialize inotify handle");
+       return 0;
+}
+subsys_initcall(audit_watch_init);
index 713098ee5a0243c61a30e946f6c34be44de653ca..a70604047f3c9061e81622a3b399b085fe5ac815 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/namei.h>
 #include <linux/netlink.h>
 #include <linux/sched.h>
-#include <linux/inotify.h>
 #include <linux/security.h>
 #include "audit.h"
 
  *             be written directly provided audit_filter_mutex is held.
  */
 
-/*
- * Reference counting:
- *
- * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
- *     event.  Each audit_watch holds a reference to its associated parent.
- *
- * audit_watch: if added to lists, lifetime is from audit_init_watch() to
- *     audit_remove_watch().  Additionally, an audit_watch may exist
- *     temporarily to assist in searching existing filter data.  Each
- *     audit_krule holds a reference to its associated watch.
- */
-
-struct audit_parent {
-       struct list_head        ilist;  /* entry in inotify registration list */
-       struct list_head        watches; /* associated watches */
-       struct inotify_watch    wdata;  /* inotify watch data */
-       unsigned                flags;  /* status flags */
-};
-
-/*
- * audit_parent status flags:
- *
- * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
- * a filesystem event to ensure we're adding audit watches to a valid parent.
- * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
- * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
- * we can receive while holding nameidata.
- */
-#define AUDIT_PARENT_INVALID   0x001
-
 /* Audit filter lists, defined in <linux/audit.h> */
 struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
        LIST_HEAD_INIT(audit_filter_list[0]),
@@ -97,41 +66,6 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
 
 DEFINE_MUTEX(audit_filter_mutex);
 
-/* Inotify events we care about. */
-#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
-
-void audit_free_parent(struct inotify_watch *i_watch)
-{
-       struct audit_parent *parent;
-
-       parent = container_of(i_watch, struct audit_parent, wdata);
-       WARN_ON(!list_empty(&parent->watches));
-       kfree(parent);
-}
-
-static inline void audit_get_watch(struct audit_watch *watch)
-{
-       atomic_inc(&watch->count);
-}
-
-static void audit_put_watch(struct audit_watch *watch)
-{
-       if (atomic_dec_and_test(&watch->count)) {
-               WARN_ON(watch->parent);
-               WARN_ON(!list_empty(&watch->rules));
-               kfree(watch->path);
-               kfree(watch);
-       }
-}
-
-static void audit_remove_watch(struct audit_watch *watch)
-{
-       list_del(&watch->wlist);
-       put_inotify_watch(&watch->parent->wdata);
-       watch->parent = NULL;
-       audit_put_watch(watch); /* match initial get */
-}
-
 static inline void audit_free_rule(struct audit_entry *e)
 {
        int i;
@@ -156,50 +90,6 @@ void audit_free_rule_rcu(struct rcu_head *head)
        audit_free_rule(e);
 }
 
-/* Initialize a parent watch entry. */
-static struct audit_parent *audit_init_parent(struct nameidata *ndp)
-{
-       struct audit_parent *parent;
-       s32 wd;
-
-       parent = kzalloc(sizeof(*parent), GFP_KERNEL);
-       if (unlikely(!parent))
-               return ERR_PTR(-ENOMEM);
-
-       INIT_LIST_HEAD(&parent->watches);
-       parent->flags = 0;
-
-       inotify_init_watch(&parent->wdata);
-       /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
-       get_inotify_watch(&parent->wdata);
-       wd = inotify_add_watch(audit_ih, &parent->wdata,
-                              ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
-       if (wd < 0) {
-               audit_free_parent(&parent->wdata);
-               return ERR_PTR(wd);
-       }
-
-       return parent;
-}
-
-/* Initialize a watch entry. */
-static struct audit_watch *audit_init_watch(char *path)
-{
-       struct audit_watch *watch;
-
-       watch = kzalloc(sizeof(*watch), GFP_KERNEL);
-       if (unlikely(!watch))
-               return ERR_PTR(-ENOMEM);
-
-       INIT_LIST_HEAD(&watch->rules);
-       atomic_set(&watch->count, 1);
-       watch->path = path;
-       watch->dev = (dev_t)-1;
-       watch->ino = (unsigned long)-1;
-
-       return watch;
-}
-
 /* Initialize an audit filterlist entry. */
 static inline struct audit_entry *audit_init_entry(u32 field_count)
 {
@@ -260,31 +150,6 @@ static inline int audit_to_inode(struct audit_krule *krule,
        return 0;
 }
 
-/* Translate a watch string to kernel respresentation. */
-static int audit_to_watch(struct audit_krule *krule, char *path, int len,
-                         u32 op)
-{
-       struct audit_watch *watch;
-
-       if (!audit_ih)
-               return -EOPNOTSUPP;
-
-       if (path[0] != '/' || path[len-1] == '/' ||
-           krule->listnr != AUDIT_FILTER_EXIT ||
-           op != Audit_equal ||
-           krule->inode_f || krule->watch || krule->tree)
-               return -EINVAL;
-
-       watch = audit_init_watch(path);
-       if (IS_ERR(watch))
-               return PTR_ERR(watch);
-
-       audit_get_watch(watch);
-       krule->watch = watch;
-
-       return 0;
-}
-
 static __u32 *classes[AUDIT_SYSCALL_CLASSES];
 
 int __init audit_register_class(int class, unsigned *list)
@@ -766,7 +631,8 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
                        break;
                case AUDIT_WATCH:
                        data->buflen += data->values[i] =
-                               audit_pack_string(&bufp, krule->watch->path);
+                               audit_pack_string(&bufp,
+                                                 audit_watch_path(krule->watch));
                        break;
                case AUDIT_DIR:
                        data->buflen += data->values[i] =
@@ -818,7 +684,8 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
                                return 1;
                        break;
                case AUDIT_WATCH:
-                       if (strcmp(a->watch->path, b->watch->path))
+                       if (strcmp(audit_watch_path(a->watch),
+                                  audit_watch_path(b->watch)))
                                return 1;
                        break;
                case AUDIT_DIR:
@@ -844,32 +711,6 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
        return 0;
 }
 
-/* Duplicate the given audit watch.  The new watch's rules list is initialized
- * to an empty list and wlist is undefined. */
-static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
-{
-       char *path;
-       struct audit_watch *new;
-
-       path = kstrdup(old->path, GFP_KERNEL);
-       if (unlikely(!path))
-               return ERR_PTR(-ENOMEM);
-
-       new = audit_init_watch(path);
-       if (IS_ERR(new)) {
-               kfree(path);
-               goto out;
-       }
-
-       new->dev = old->dev;
-       new->ino = old->ino;
-       get_inotify_watch(&old->parent->wdata);
-       new->parent = old->parent;
-
-out:
-       return new;
-}
-
 /* Duplicate LSM field information.  The lsm_rule is opaque, so must be
  * re-initialized. */
 static inline int audit_dupe_lsm_field(struct audit_field *df,
@@ -904,8 +745,8 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
  * rule with the new rule in the filterlist, then free the old rule.
  * The rlist element is undefined; list manipulations are handled apart from
  * the initial copy. */
-static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
-                                          struct audit_watch *watch)
+struct audit_entry *audit_dupe_rule(struct audit_krule *old,
+                                   struct audit_watch *watch)
 {
        u32 fcount = old->field_count;
        struct audit_entry *entry;
@@ -977,137 +818,6 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
        return entry;
 }
 
-/* Update inode info in audit rules based on filesystem event. */
-static void audit_update_watch(struct audit_parent *parent,
-                              const char *dname, dev_t dev,
-                              unsigned long ino, unsigned invalidating)
-{
-       struct audit_watch *owatch, *nwatch, *nextw;
-       struct audit_krule *r, *nextr;
-       struct audit_entry *oentry, *nentry;
-
-       mutex_lock(&audit_filter_mutex);
-       list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
-               if (audit_compare_dname_path(dname, owatch->path, NULL))
-                       continue;
-
-               /* If the update involves invalidating rules, do the inode-based
-                * filtering now, so we don't omit records. */
-               if (invalidating && current->audit_context)
-                       audit_filter_inodes(current, current->audit_context);
-
-               nwatch = audit_dupe_watch(owatch);
-               if (IS_ERR(nwatch)) {
-                       mutex_unlock(&audit_filter_mutex);
-                       audit_panic("error updating watch, skipping");
-                       return;
-               }
-               nwatch->dev = dev;
-               nwatch->ino = ino;
-
-               list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
-
-                       oentry = container_of(r, struct audit_entry, rule);
-                       list_del(&oentry->rule.rlist);
-                       list_del_rcu(&oentry->list);
-
-                       nentry = audit_dupe_rule(&oentry->rule, nwatch);
-                       if (IS_ERR(nentry)) {
-                               list_del(&oentry->rule.list);
-                               audit_panic("error updating watch, removing");
-                       } else {
-                               int h = audit_hash_ino((u32)ino);
-                               list_add(&nentry->rule.rlist, &nwatch->rules);
-                               list_add_rcu(&nentry->list, &audit_inode_hash[h]);
-                               list_replace(&oentry->rule.list,
-                                            &nentry->rule.list);
-                       }
-
-                       call_rcu(&oentry->rcu, audit_free_rule_rcu);
-               }
-
-               if (audit_enabled) {
-                       struct audit_buffer *ab;
-                       ab = audit_log_start(NULL, GFP_NOFS,
-                               AUDIT_CONFIG_CHANGE);
-                       audit_log_format(ab, "auid=%u ses=%u",
-                               audit_get_loginuid(current),
-                               audit_get_sessionid(current));
-                       audit_log_format(ab,
-                               " op=updated rules specifying path=");
-                       audit_log_untrustedstring(ab, owatch->path);
-                       audit_log_format(ab, " with dev=%u ino=%lu\n",
-                                dev, ino);
-                       audit_log_format(ab, " list=%d res=1", r->listnr);
-                       audit_log_end(ab);
-               }
-               audit_remove_watch(owatch);
-               goto add_watch_to_parent; /* event applies to a single watch */
-       }
-       mutex_unlock(&audit_filter_mutex);
-       return;
-
-add_watch_to_parent:
-       list_add(&nwatch->wlist, &parent->watches);
-       mutex_unlock(&audit_filter_mutex);
-       return;
-}
-
-/* Remove all watches & rules associated with a parent that is going away. */
-static void audit_remove_parent_watches(struct audit_parent *parent)
-{
-       struct audit_watch *w, *nextw;
-       struct audit_krule *r, *nextr;
-       struct audit_entry *e;
-
-       mutex_lock(&audit_filter_mutex);
-       parent->flags |= AUDIT_PARENT_INVALID;
-       list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
-               list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
-                       e = container_of(r, struct audit_entry, rule);
-                       if (audit_enabled) {
-                               struct audit_buffer *ab;
-                               ab = audit_log_start(NULL, GFP_NOFS,
-                                       AUDIT_CONFIG_CHANGE);
-                               audit_log_format(ab, "auid=%u ses=%u",
-                                       audit_get_loginuid(current),
-                                       audit_get_sessionid(current));
-                               audit_log_format(ab, " op=remove rule path=");
-                               audit_log_untrustedstring(ab, w->path);
-                               if (r->filterkey) {
-                                       audit_log_format(ab, " key=");
-                                       audit_log_untrustedstring(ab,
-                                                       r->filterkey);
-                               } else
-                                       audit_log_format(ab, " key=(null)");
-                               audit_log_format(ab, " list=%d res=1",
-                                       r->listnr);
-                               audit_log_end(ab);
-                       }
-                       list_del(&r->rlist);
-                       list_del(&r->list);
-                       list_del_rcu(&e->list);
-                       call_rcu(&e->rcu, audit_free_rule_rcu);
-               }
-               audit_remove_watch(w);
-       }
-       mutex_unlock(&audit_filter_mutex);
-}
-
-/* Unregister inotify watches for parents on in_list.
- * Generates an IN_IGNORED event. */
-static void audit_inotify_unregister(struct list_head *in_list)
-{
-       struct audit_parent *p, *n;
-
-       list_for_each_entry_safe(p, n, in_list, ilist) {
-               list_del(&p->ilist);
-               inotify_rm_watch(audit_ih, &p->wdata);
-               /* the unpin matching the pin in audit_do_del_rule() */
-               unpin_inotify_watch(&p->wdata);
-       }
-}
-
 /* Find an existing audit rule.
  * Caller must hold audit_filter_mutex to prevent stale rule data. */
 static struct audit_entry *audit_find_rule(struct audit_entry *entry,
@@ -1145,134 +855,6 @@ out:
        return found;
 }
 
-/* Get path information necessary for adding watches. */
-static int audit_get_nd(char *path, struct nameidata **ndp,
-                       struct nameidata **ndw)
-{
-       struct nameidata *ndparent, *ndwatch;
-       int err;
-
-       ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
-       if (unlikely(!ndparent))
-               return -ENOMEM;
-
-       ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
-       if (unlikely(!ndwatch)) {
-               kfree(ndparent);
-               return -ENOMEM;
-       }
-
-       err = path_lookup(path, LOOKUP_PARENT, ndparent);
-       if (err) {
-               kfree(ndparent);
-               kfree(ndwatch);
-               return err;
-       }
-
-       err = path_lookup(path, 0, ndwatch);
-       if (err) {
-               kfree(ndwatch);
-               ndwatch = NULL;
-       }
-
-       *ndp = ndparent;
-       *ndw = ndwatch;
-
-       return 0;
-}
-
-/* Release resources used for watch path information. */
-static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
-{
-       if (ndp) {
-               path_put(&ndp->path);
-               kfree(ndp);
-       }
-       if (ndw) {
-               path_put(&ndw->path);
-               kfree(ndw);
-       }
-}
-
-/* Associate the given rule with an existing parent inotify_watch.
- * Caller must hold audit_filter_mutex. */
-static void audit_add_to_parent(struct audit_krule *krule,
-                               struct audit_parent *parent)
-{
-       struct audit_watch *w, *watch = krule->watch;
-       int watch_found = 0;
-
-       list_for_each_entry(w, &parent->watches, wlist) {
-               if (strcmp(watch->path, w->path))
-                       continue;
-
-               watch_found = 1;
-
-               /* put krule's and initial refs to temporary watch */
-               audit_put_watch(watch);
-               audit_put_watch(watch);
-
-               audit_get_watch(w);
-               krule->watch = watch = w;
-               break;
-       }
-
-       if (!watch_found) {
-               get_inotify_watch(&parent->wdata);
-               watch->parent = parent;
-
-               list_add(&watch->wlist, &parent->watches);
-       }
-       list_add(&krule->rlist, &watch->rules);
-}
-
-/* Find a matching watch entry, or add this one.
- * Caller must hold audit_filter_mutex. */
-static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
-                          struct nameidata *ndw)
-{
-       struct audit_watch *watch = krule->watch;
-       struct inotify_watch *i_watch;
-       struct audit_parent *parent;
-       int ret = 0;
-
-       /* update watch filter fields */
-       if (ndw) {
-               watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
-               watch->ino = ndw->path.dentry->d_inode->i_ino;
-       }
-
-       /* The audit_filter_mutex must not be held during inotify calls because
-        * we hold it during inotify event callback processing.  If an existing
-        * inotify watch is found, inotify_find_watch() grabs a reference before
-        * returning.
-        */
-       mutex_unlock(&audit_filter_mutex);
-
-       if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
-                              &i_watch) < 0) {
-               parent = audit_init_parent(ndp);
-               if (IS_ERR(parent)) {
-                       /* caller expects mutex locked */
-                       mutex_lock(&audit_filter_mutex);
-                       return PTR_ERR(parent);
-               }
-       } else
-               parent = container_of(i_watch, struct audit_parent, wdata);
-
-       mutex_lock(&audit_filter_mutex);
-
-       /* parent was moved before we took audit_filter_mutex */
-       if (parent->flags & AUDIT_PARENT_INVALID)
-               ret = -ENOENT;
-       else
-               audit_add_to_parent(krule, parent);
-
-       /* match get in audit_init_parent or inotify_find_watch */
-       put_inotify_watch(&parent->wdata);
-       return ret;
-}
-
 static u64 prio_low = ~0ULL/2;
 static u64 prio_high = ~0ULL/2 - 1;
 
@@ -1282,7 +864,6 @@ static inline int audit_add_rule(struct audit_entry *entry)
        struct audit_entry *e;
        struct audit_watch *watch = entry->rule.watch;
        struct audit_tree *tree = entry->rule.tree;
-       struct nameidata *ndp = NULL, *ndw = NULL;
        struct list_head *list;
        int h, err;
 #ifdef CONFIG_AUDITSYSCALL
@@ -1296,8 +877,8 @@ static inline int audit_add_rule(struct audit_entry *entry)
 
        mutex_lock(&audit_filter_mutex);
        e = audit_find_rule(entry, &list);
-       mutex_unlock(&audit_filter_mutex);
        if (e) {
+               mutex_unlock(&audit_filter_mutex);
                err = -EEXIST;
                /* normally audit_add_tree_rule() will free it on failure */
                if (tree)
@@ -1305,22 +886,16 @@ static inline int audit_add_rule(struct audit_entry *entry)
                goto error;
        }
 
-       /* Avoid calling path_lookup under audit_filter_mutex. */
-       if (watch) {
-               err = audit_get_nd(watch->path, &ndp, &ndw);
-               if (err)
-                       goto error;
-       }
-
-       mutex_lock(&audit_filter_mutex);
        if (watch) {
                /* audit_filter_mutex is dropped and re-taken during this call */
-               err = audit_add_watch(&entry->rule, ndp, ndw);
+               err = audit_add_watch(&entry->rule);
                if (err) {
                        mutex_unlock(&audit_filter_mutex);
                        goto error;
                }
-               h = audit_hash_ino((u32)watch->ino);
+               /* entry->rule.watch may have changed during audit_add_watch() */
+               watch = entry->rule.watch;
+               h = audit_hash_ino((u32)audit_watch_inode(watch));
                list = &audit_inode_hash[h];
        }
        if (tree) {
@@ -1358,11 +933,9 @@ static inline int audit_add_rule(struct audit_entry *entry)
 #endif
        mutex_unlock(&audit_filter_mutex);
 
-       audit_put_nd(ndp, ndw);         /* NULL args OK */
        return 0;
 
 error:
-       audit_put_nd(ndp, ndw);         /* NULL args OK */
        if (watch)
                audit_put_watch(watch); /* tmp watch, matches initial get */
        return err;
@@ -1372,7 +945,7 @@ error:
 static inline int audit_del_rule(struct audit_entry *entry)
 {
        struct audit_entry  *e;
-       struct audit_watch *watch, *tmp_watch = entry->rule.watch;
+       struct audit_watch *watch = entry->rule.watch;
        struct audit_tree *tree = entry->rule.tree;
        struct list_head *list;
        LIST_HEAD(inotify_list);
@@ -1394,29 +967,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
                goto out;
        }
 
-       watch = e->rule.watch;
-       if (watch) {
-               struct audit_parent *parent = watch->parent;
-
-               list_del(&e->rule.rlist);
-
-               if (list_empty(&watch->rules)) {
-                       audit_remove_watch(watch);
-
-                       if (list_empty(&parent->watches)) {
-                               /* Put parent on the inotify un-registration
-                                * list.  Grab a reference before releasing
-                                * audit_filter_mutex, to be released in
-                                * audit_inotify_unregister().
-                                * If filesystem is going away, just leave
-                                * the sucker alone, eviction will take
-                                * care of it.
-                                */
-                               if (pin_inotify_watch(&parent->wdata))
-                                       list_add(&parent->ilist, &inotify_list);
-                       }
-               }
-       }
+       if (e->rule.watch)
+               audit_remove_watch_rule(&e->rule, &inotify_list);
 
        if (e->rule.tree)
                audit_remove_tree_rule(&e->rule);
@@ -1438,8 +990,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
                audit_inotify_unregister(&inotify_list);
 
 out:
-       if (tmp_watch)
-               audit_put_watch(tmp_watch); /* match initial get */
+       if (watch)
+               audit_put_watch(watch); /* match initial get */
        if (tree)
                audit_put_tree(tree);   /* that's the temporary one */
 
@@ -1527,11 +1079,9 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid,
                        security_release_secctx(ctx, len);
                }
        }
-       audit_log_format(ab, " op=%s rule key=", action);
-       if (rule->filterkey)
-               audit_log_untrustedstring(ab, rule->filterkey);
-       else
-               audit_log_format(ab, "(null)");
+       audit_log_format(ab, " op=");
+       audit_log_string(ab, action);
+       audit_log_key(ab, rule->filterkey);
        audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
        audit_log_end(ab);
 }
@@ -1595,7 +1145,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
                        return PTR_ERR(entry);
 
                err = audit_add_rule(entry);
-               audit_log_rule_change(loginuid, sessionid, sid, "add",
+               audit_log_rule_change(loginuid, sessionid, sid, "add rule",
                                      &entry->rule, !err);
 
                if (err)
@@ -1611,7 +1161,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
                        return PTR_ERR(entry);
 
                err = audit_del_rule(entry);
-               audit_log_rule_change(loginuid, sessionid, sid, "remove",
+               audit_log_rule_change(loginuid, sessionid, sid, "remove rule",
                                      &entry->rule, !err);
 
                audit_free_rule(entry);
@@ -1793,7 +1343,7 @@ static int update_lsm_rule(struct audit_krule *r)
                list_del(&r->list);
        } else {
                if (watch) {
-                       list_add(&nentry->rule.rlist, &watch->rules);
+                       list_add(&nentry->rule.rlist, audit_watch_rules(watch));
                        list_del(&r->rlist);
                } else if (tree)
                        list_replace_init(&r->rlist, &nentry->rule.rlist);
@@ -1829,27 +1379,3 @@ int audit_update_lsm_rules(void)
 
        return err;
 }
-
-/* Update watch data in audit rules based on inotify events. */
-void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
-                        u32 cookie, const char *dname, struct inode *inode)
-{
-       struct audit_parent *parent;
-
-       parent = container_of(i_watch, struct audit_parent, wdata);
-
-       if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
-               audit_update_watch(parent, dname, inode->i_sb->s_dev,
-                                  inode->i_ino, 0);
-       else if (mask & (IN_DELETE|IN_MOVED_FROM))
-               audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
-       /* inotify automatically removes the watch and sends IN_IGNORED */
-       else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
-               audit_remove_parent_watches(parent);
-       /* inotify does not remove the watch, so remove it manually */
-       else if(mask & IN_MOVE_SELF) {
-               audit_remove_parent_watches(parent);
-               inotify_remove_watch_locked(audit_ih, i_watch);
-       } else if (mask & IN_IGNORED)
-               put_inotify_watch(i_watch);
-}
index 7d6ac7c1f414479ada4a58af0c4b9e241051062e..68d3c6a0ecd635bbeba49c5de2d3e89b50315897 100644 (file)
@@ -199,6 +199,7 @@ struct audit_context {
 
        struct audit_tree_refs *trees, *first_trees;
        int tree_count;
+       struct list_head killed_trees;
 
        int type;
        union {
@@ -548,9 +549,9 @@ static int audit_filter_rules(struct task_struct *tsk,
                        }
                        break;
                case AUDIT_WATCH:
-                       if (name && rule->watch->ino != (unsigned long)-1)
-                               result = (name->dev == rule->watch->dev &&
-                                         name->ino == rule->watch->ino);
+                       if (name && audit_watch_inode(rule->watch) != (unsigned long)-1)
+                               result = (name->dev == audit_watch_dev(rule->watch) &&
+                                         name->ino == audit_watch_inode(rule->watch));
                        break;
                case AUDIT_DIR:
                        if (ctx)
@@ -853,6 +854,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
        if (!(context = kmalloc(sizeof(*context), GFP_KERNEL)))
                return NULL;
        audit_zero_context(context, state);
+       INIT_LIST_HEAD(&context->killed_trees);
        return context;
 }
 
@@ -1024,8 +1026,8 @@ static int audit_log_single_execve_arg(struct audit_context *context,
 {
        char arg_num_len_buf[12];
        const char __user *tmp_p = p;
-       /* how many digits are in arg_num? 3 is the length of " a=" */
-       size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 3;
+       /* how many digits are in arg_num? 5 is the length of ' a=""' */
+       size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 5;
        size_t len, len_left, to_send;
        size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
        unsigned int i, has_cntl = 0, too_long = 0;
@@ -1137,7 +1139,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                if (has_cntl)
                        audit_log_n_hex(*ab, buf, to_send);
                else
-                       audit_log_format(*ab, "\"%s\"", buf);
+                       audit_log_string(*ab, buf);
 
                p += to_send;
                len_left -= to_send;
@@ -1372,11 +1374,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 
        audit_log_task_info(ab, tsk);
-       if (context->filterkey) {
-               audit_log_format(ab, " key=");
-               audit_log_untrustedstring(ab, context->filterkey);
-       } else
-               audit_log_format(ab, " key=(null)");
+       audit_log_key(ab, context->filterkey);
        audit_log_end(ab);
 
        for (aux = context->aux; aux; aux = aux->next) {
@@ -1549,6 +1547,8 @@ void audit_free(struct task_struct *tsk)
        /* that can happen only if we are called from do_exit() */
        if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
                audit_log_exit(context, tsk);
+       if (!list_empty(&context->killed_trees))
+               audit_kill_trees(&context->killed_trees);
 
        audit_free_context(context);
 }
@@ -1692,6 +1692,9 @@ void audit_syscall_exit(int valid, long return_code)
        context->in_syscall = 0;
        context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
 
+       if (!list_empty(&context->killed_trees))
+               audit_kill_trees(&context->killed_trees);
+
        if (context->previous) {
                struct audit_context *new_context = context->previous;
                context->previous  = NULL;
@@ -2525,3 +2528,11 @@ void audit_core_dumps(long signr)
        audit_log_format(ab, " sig=%ld", signr);
        audit_log_end(ab);
 }
+
+struct list_head *audit_killed_trees(void)
+{
+       struct audit_context *ctx = current->audit_context;
+       if (likely(!ctx || !ctx->in_syscall))
+               return NULL;
+       return &ctx->killed_trees;
+}
index 80b5ce716596a95b797a00c417b5a514f9d5d2a2..1c337112335c60d6831d7b5cf1ee9a26cba54a77 100644 (file)
@@ -284,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key)
        drop_futex_key_refs(key);
 }
 
+/*
+ * fault_in_user_writeable - fault in user address and verify RW access
+ * @uaddr:     pointer to faulting user space address
+ *
+ * Slow path to fixup the fault we just took in the atomic write
+ * access to @uaddr.
+ *
+ * We have no generic implementation of a non destructive write to the
+ * user address. We know that we faulted in the atomic pagefault
+ * disabled section so we can as well avoid the #PF overhead by
+ * calling get_user_pages() right away.
+ */
+static int fault_in_user_writeable(u32 __user *uaddr)
+{
+       int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
+                                sizeof(*uaddr), 1, 0, NULL, NULL);
+       return ret < 0 ? ret : 0;
+}
+
 /**
  * futex_top_waiter() - Return the highest priority waiter on a futex
  * @hb:     the hash bucket the futex_q's reside in
@@ -896,7 +915,6 @@ retry:
 retry_private:
        op_ret = futex_atomic_op_inuser(op, uaddr2);
        if (unlikely(op_ret < 0)) {
-               u32 dummy;
 
                double_unlock_hb(hb1, hb2);
 
@@ -914,7 +932,7 @@ retry_private:
                        goto out_put_keys;
                }
 
-               ret = get_user(dummy, uaddr2);
+               ret = fault_in_user_writeable(uaddr2);
                if (ret)
                        goto out_put_keys;
 
@@ -1204,7 +1222,7 @@ retry_private:
                        double_unlock_hb(hb1, hb2);
                        put_futex_key(fshared, &key2);
                        put_futex_key(fshared, &key1);
-                       ret = get_user(curval2, uaddr2);
+                       ret = fault_in_user_writeable(uaddr2);
                        if (!ret)
                                goto retry;
                        goto out;
@@ -1482,7 +1500,7 @@ retry:
 handle_fault:
        spin_unlock(q->lock_ptr);
 
-       ret = get_user(uval, uaddr);
+       ret = fault_in_user_writeable(uaddr);
 
        spin_lock(q->lock_ptr);
 
@@ -1807,7 +1825,6 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct futex_hash_bucket *hb;
-       u32 uval;
        struct futex_q q;
        int res, ret;
 
@@ -1909,16 +1926,9 @@ out:
        return ret != -EINTR ? ret : -ERESTARTNOINTR;
 
 uaddr_faulted:
-       /*
-        * We have to r/w  *(int __user *)uaddr, and we have to modify it
-        * atomically.  Therefore, if we continue to fault after get_user()
-        * below, we need to handle the fault ourselves, while still holding
-        * the mmap_sem.  This can occur if the uaddr is under contention as
-        * we have to drop the mmap_sem in order to call get_user().
-        */
        queue_unlock(&q, hb);
 
-       ret = get_user(uval, uaddr);
+       ret = fault_in_user_writeable(uaddr);
        if (ret)
                goto out_put_key;
 
@@ -2013,17 +2023,10 @@ out:
        return ret;
 
 pi_faulted:
-       /*
-        * We have to r/w  *(int __user *)uaddr, and we have to modify it
-        * atomically.  Therefore, if we continue to fault after get_user()
-        * below, we need to handle the fault ourselves, while still holding
-        * the mmap_sem.  This can occur if the uaddr is under contention as
-        * we have to drop the mmap_sem in order to call get_user().
-        */
        spin_unlock(&hb->lock);
        put_futex_key(fshared, &key);
 
-       ret = get_user(uval, uaddr);
+       ret = fault_in_user_writeable(uaddr);
        if (!ret)
                goto retry;
 
index aecc9cdfdfce9111ef55de91a38afb164d6fad87..5d714f8fb30333f6de336e555f944658bfa1dc16 100644 (file)
@@ -1153,10 +1153,10 @@ again:
                         * properly detect and handle allocation failures.
                         *
                         * We most definitely don't want callers attempting to
-                        * allocate greater than single-page units with
+                        * allocate greater than order-1 page units with
                         * __GFP_NOFAIL.
                         */
-                       WARN_ON_ONCE(order > 0);
+                       WARN_ON_ONCE(order > 1);
                }
                spin_lock_irqsave(&zone->lock, flags);
                page = __rmqueue(zone, order, migratetype);
index e89d7ec18eda46d42abf68c3994db0e33cfded06..d713239ce2ce72be6097a1ad478ce1b04798773d 100644 (file)
@@ -1558,6 +1558,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
                spin_lock_init(&info->lock);
                info->flags = flags & VM_NORESERVE;
                INIT_LIST_HEAD(&info->swaplist);
+               cache_no_acl(inode);
 
                switch (mode & S_IFMT) {
                default:
@@ -2388,7 +2389,6 @@ static void shmem_destroy_inode(struct inode *inode)
                /* only struct inode is valid if it's an inline symlink */
                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
        }
-       shmem_acl_destroy_inode(inode);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
@@ -2397,10 +2397,6 @@ static void init_once(void *foo)
        struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
 
        inode_init_once(&p->vfs_inode);
-#ifdef CONFIG_TMPFS_POSIX_ACL
-       p->i_acl = NULL;
-       p->i_default_acl = NULL;
-#endif
 }
 
 static int init_inodecache(void)
index 8e5aadd7dcd6aef09f581d3ff925072cc494b0c0..606a8e757a422e83d2c714dfb9423c4f40b727dd 100644 (file)
@@ -22,11 +22,11 @@ shmem_get_acl(struct inode *inode, int type)
        spin_lock(&inode->i_lock);
        switch(type) {
                case ACL_TYPE_ACCESS:
-                       acl = posix_acl_dup(SHMEM_I(inode)->i_acl);
+                       acl = posix_acl_dup(inode->i_acl);
                        break;
 
                case ACL_TYPE_DEFAULT:
-                       acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl);
+                       acl = posix_acl_dup(inode->i_default_acl);
                        break;
        }
        spin_unlock(&inode->i_lock);
@@ -45,13 +45,13 @@ shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        spin_lock(&inode->i_lock);
        switch(type) {
                case ACL_TYPE_ACCESS:
-                       free = SHMEM_I(inode)->i_acl;
-                       SHMEM_I(inode)->i_acl = posix_acl_dup(acl);
+                       free = inode->i_acl;
+                       inode->i_acl = posix_acl_dup(acl);
                        break;
 
                case ACL_TYPE_DEFAULT:
-                       free = SHMEM_I(inode)->i_default_acl;
-                       SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl);
+                       free = inode->i_default_acl;
+                       inode->i_default_acl = posix_acl_dup(acl);
                        break;
        }
        spin_unlock(&inode->i_lock);
@@ -154,23 +154,6 @@ shmem_acl_init(struct inode *inode, struct inode *dir)
        return generic_acl_init(inode, dir, &shmem_acl_ops);
 }
 
-/**
- * shmem_acl_destroy_inode  -  destroy acls hanging off the in-memory inode
- *
- * This is done before destroying the actual inode.
- */
-
-void
-shmem_acl_destroy_inode(struct inode *inode)
-{
-       if (SHMEM_I(inode)->i_acl)
-               posix_acl_release(SHMEM_I(inode)->i_acl);
-       SHMEM_I(inode)->i_acl = NULL;
-       if (SHMEM_I(inode)->i_default_acl)
-               posix_acl_release(SHMEM_I(inode)->i_default_acl);
-       SHMEM_I(inode)->i_default_acl = NULL;
-}
-
 /**
  * shmem_check_acl  -  check_acl() callback for generic_permission()
  */
index ce62b770e2fc5399e53dcc9c91ec5220820fc977..819f056b39c6f27100e8b89f91acc42b3c5f5bd6 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1085,11 +1085,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
        struct page *page;
        struct kmem_cache_order_objects oo = s->oo;
+       gfp_t alloc_gfp;
 
        flags |= s->allocflags;
 
-       page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
-                                                                       oo);
+       /*
+        * Let the initial higher-order allocation fail under memory pressure
+        * so we fall-back to the minimum order allocation.
+        */
+       alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
+
+       page = alloc_slab_page(alloc_gfp, node, oo);
        if (unlikely(!page)) {
                oo = s->min;
                /*
index 5f1d2107a1dd70f7628f18d7649d92e2a1bd9d76..de56d3983de098b11d64f8971d85b8ded8187bf2 100644 (file)
@@ -437,8 +437,7 @@ free:
 int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
                  struct packet_type *ptype, struct net_device *orig_dev)
 {
-       skb->sk = NULL;         /* Initially we don't know who it's for */
-       skb->destructor = NULL; /* Who initializes this, dammit?! */
+       skb_orphan(skb);
 
        if (!net_eq(dev_net(dev), &init_net)) {
                kfree_skb(skb);
index baf2dc13a34a402aa22af5091bf87d2e1c9f1f16..60b5728122784a66562da3a4aec075692252382a 100644 (file)
@@ -2310,8 +2310,6 @@ ncls:
        if (!skb)
                goto out;
 
-       skb_orphan(skb);
-
        type = skb->protocol;
        list_for_each_entry_rcu(ptype,
                        &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
index 05ea7440d9e51b6f40e29b7335277c4853b8ca7d..3e70faab29897370b815447f6ab937dee12101b9 100644 (file)
@@ -85,7 +85,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
 }
 
 static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                       int type, int code, int offset, __be32 info)
+                       u8 type, u8 code, int offset, __be32 info)
 {
        struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
        const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
index 65b3a8b11a6c3e6699ab426dce59e356da220670..278f46f5011beb2ab85747543f84dfd3ce7c6d1c 100644 (file)
@@ -1093,8 +1093,27 @@ restart:
                 * If we drop it here, the callers have no way to resolve routes
                 * when we're not caching.  Instead, just point *rp at rt, so
                 * the caller gets a single use out of the route
+                * Note that we do rt_free on this new route entry, so that
+                * once its refcount hits zero, we are still able to reap it
+                * (Thanks Alexey)
+                * Note also the rt_free uses call_rcu.  We don't actually
+                * need rcu protection here, this is just our path to get
+                * on the route gc list.
                 */
-               goto report_and_exit;
+
+               if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+                       int err = arp_bind_neighbour(&rt->u.dst);
+                       if (err) {
+                               if (net_ratelimit())
+                                       printk(KERN_WARNING
+                                           "Neighbour table failure & not caching routes.\n");
+                               rt_drop(rt);
+                               return err;
+                       }
+               }
+
+               rt_free(rt);
+               goto skip_hashing;
        }
 
        rthp = &rt_hash_table[hash].chain;
@@ -1211,7 +1230,8 @@ restart:
 #if RT_CACHE_DEBUG >= 2
        if (rt->u.dst.rt_next) {
                struct rtable *trt;
-               printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst);
+               printk(KERN_DEBUG "rt_cache @%02x: %pI4",
+                      hash, &rt->rt_dst);
                for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
                        printk(" . %pI4", &trt->rt_dst);
                printk("\n");
@@ -1226,7 +1246,7 @@ restart:
 
        spin_unlock_bh(rt_hash_lock_addr(hash));
 
-report_and_exit:
+skip_hashing:
        if (rp)
                *rp = rt;
        else
index 52449f7a1b715cf830fb4d051cb31ebd10cc3350..86f42a288c4bb487237ffa6da9944e20d0456efb 100644 (file)
@@ -405,7 +405,7 @@ out:
 }
 
 static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                   int type, int code, int offset, __be32 info)
+                   u8 type, u8 code, int offset, __be32 info)
 {
        struct net *net = dev_net(skb->dev);
        struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
index c2f250150db10a6d1c68c6904404e258860fc30d..678bb95b1525716f2120db6abe3332a7c4e5edab 100644 (file)
@@ -354,7 +354,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                    int type, int code, int offset, __be32 info)
+                    u8 type, u8 code, int offset, __be32 info)
 {
        struct net *net = dev_net(skb->dev);
        struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
index 36dff8807183c81741baba329cea9d637ff6ebcd..eab62a7a8f06e54fd7e161b8312565f8da59ed60 100644 (file)
@@ -117,7 +117,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 /*
  * Slightly more convenient version of icmpv6_send.
  */
-void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
        icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
        kfree_skb(skb);
@@ -161,7 +161,7 @@ static int is_ineligible(struct sk_buff *skb)
 /*
  * Check the ICMP output rate limit
  */
-static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
+static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
                                     struct flowi *fl)
 {
        struct dst_entry *dst;
@@ -305,7 +305,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {}
 /*
  *     Send an ICMP message in response to a packet in error
  */
-void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
                 struct net_device *dev)
 {
        struct net *net = dev_net(skb->dev);
@@ -590,7 +590,7 @@ out:
        icmpv6_xmit_unlock(sk);
 }
 
-static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
+static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
        struct inet6_protocol *ipprot;
        int inner_offset;
@@ -643,7 +643,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
        struct in6_addr *saddr, *daddr;
        struct ipv6hdr *orig_hdr;
        struct icmp6hdr *hdr;
-       int type;
+       u8 type;
 
        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
                struct sec_path *sp = skb_sec_path(skb);
@@ -914,7 +914,7 @@ static const struct icmp6_err {
        },
 };
 
-int icmpv6_err_convert(int type, int code, int *err)
+int icmpv6_err_convert(u8 type, u8 code, int *err)
 {
        int fatal = 0;
 
index 404d16a97d5c9d85c119f1853ce23e509c924ed5..51f410e7775a2880981e909817ce9ae5eba68d1f 100644 (file)
@@ -394,13 +394,13 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 
 static int
 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
-           int *type, int *code, int *msg, __u32 *info, int offset)
+           u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 {
        struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
        struct ip6_tnl *t;
        int rel_msg = 0;
-       int rel_type = ICMPV6_DEST_UNREACH;
-       int rel_code = ICMPV6_ADDR_UNREACH;
+       u8 rel_type = ICMPV6_DEST_UNREACH;
+       u8 rel_code = ICMPV6_ADDR_UNREACH;
        __u32 rel_info = 0;
        __u16 len;
        int err = -ENOENT;
@@ -488,11 +488,11 @@ out:
 
 static int
 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-          int type, int code, int offset, __be32 info)
+          u8 type, u8 code, int offset, __be32 info)
 {
        int rel_msg = 0;
-       int rel_type = type;
-       int rel_code = code;
+       u8 rel_type = type;
+       u8 rel_code = code;
        __u32 rel_info = ntohl(info);
        int err;
        struct sk_buff *skb2;
@@ -586,11 +586,11 @@ out:
 
 static int
 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-          int type, int code, int offset, __be32 info)
+          u8 type, u8 code, int offset, __be32 info)
 {
        int rel_msg = 0;
-       int rel_type = type;
-       int rel_code = code;
+       u8 rel_type = type;
+       u8 rel_code = code;
        __u32 rel_info = ntohl(info);
        int err;
 
index 3a0b3be7ece525d6d7230c2fd2c79340a025c4f2..79c172f1ff012d6b7f577ea20febae037ec96dad 100644 (file)
@@ -51,7 +51,7 @@
 #include <linux/mutex.h>
 
 static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                               int type, int code, int offset, __be32 info)
+                               u8 type, u8 code, int offset, __be32 info)
 {
        __be32 spi;
        struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
index f995e19c87a96d29995d7de530e0f996160ff980..f797e8c6f3b36de616de064616e43c22fa70a536 100644 (file)
@@ -54,7 +54,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
        return data + padlen;
 }
 
-static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos)
+static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
        icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
 }
index 8b0b6f948063607d8d60ec18bde103a36369bc2a..d6c3c1c34b2d0cf1eac94211675800fce65be843 100644 (file)
@@ -310,7 +310,7 @@ out:
 
 static void rawv6_err(struct sock *sk, struct sk_buff *skb,
               struct inet6_skb_parm *opt,
-              int type, int code, int offset, __be32 info)
+              u8 type, u8 code, int offset, __be32 info)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
@@ -343,7 +343,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 }
 
 void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
-               int type, int code, int inner_offset, __be32 info)
+               u8 type, u8 code, int inner_offset, __be32 info)
 {
        struct sock *sk;
        int hash;
index 658293ea05ba736d143947ceca915cacdf4e5f89..1473ee0a1f5135d02cd9cc2bab8ae5c8b37e1e88 100644 (file)
@@ -1865,7 +1865,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
  *     Drop the packet on the floor
  */
 
-static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
+static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 {
        int type;
        struct dst_entry *dst = skb_dst(skb);
index 53b6a4192b161eae3f44b226395e02a691ad120c..58810c65b6359524b171fa9ca0678b9a6a21241c 100644 (file)
@@ -317,7 +317,7 @@ failure:
 }
 
 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-               int type, int code, int offset, __be32 info)
+               u8 type, u8 code, int offset, __be32 info)
 {
        struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
        const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
index 669f280989c351ce6c9b2e29ed934e1c3d02a937..633ad789effc12d0000c9ec3bb2c76192bfcee6e 100644 (file)
@@ -124,7 +124,7 @@ drop:
 }
 
 static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                       int type, int code, int offset, __be32 info)
+                       u8 type, u8 code, int offset, __be32 info)
 {
        struct xfrm6_tunnel *handler;
 
index 023beda6b22445f52f75e13fc3e0000b78d42342..33b59bd92c4dc18667a6f0bf94fc7ac4282b4323 100644 (file)
@@ -312,7 +312,7 @@ csum_copy_err:
 }
 
 void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                   int type, int code, int offset, __be32 info,
+                   u8 type, u8 code, int offset, __be32 info,
                    struct udp_table *udptable)
 {
        struct ipv6_pinfo *np;
@@ -346,8 +346,8 @@ out:
 }
 
 static __inline__ void udpv6_err(struct sk_buff *skb,
-                                struct inet6_skb_parm *opt, int type,
-                                int code, int offset, __be32 info     )
+                                struct inet6_skb_parm *opt, u8 type,
+                                u8 code, int offset, __be32 info     )
 {
        __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
index 23779208c334e90aa1bad9100a1d8c84f6b452df..6bb303471e204ade2e31d025cb9d3ec1b80fbd60 100644 (file)
@@ -9,7 +9,7 @@
 
 extern int     __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
 extern void    __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
-                              int , int , int , __be32 , struct udp_table *);
+                              u8 , u8 , int , __be32 , struct udp_table *);
 
 extern int     udp_v6_get_port(struct sock *sk, unsigned short snum);
 
index ba162a824585b8fcecd2c1022c0c64d09faa68bc..4818c48688f210b31932329204b9c47159c5e44c 100644 (file)
@@ -20,7 +20,7 @@ static int udplitev6_rcv(struct sk_buff *skb)
 
 static void udplitev6_err(struct sk_buff *skb,
                          struct inet6_skb_parm *opt,
-                         int type, int code, int offset, __be32 info)
+                         u8 type, u8 code, int offset, __be32 info)
 {
        __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
 }
index 80193db224d9070e2ca57982122fdf8384ba5d9f..81a95c00e50355855cd7fdf22ac207b7e7dc1d91 100644 (file)
@@ -262,7 +262,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                           int type, int code, int offset, __be32 info)
+                           u8 type, u8 code, int offset, __be32 info)
 {
        /* xfrm6_tunnel native err handling */
        switch (type) {
index 5922febe25c42dc9a602378a600db8d8da674f85..cb762c8723ea7a90f98b9e34074d98bc3b716742 100644 (file)
@@ -913,9 +913,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        /* Clean up the original one to keep it in listen state */
        irttp_listen(self->tsap);
 
-       /* Wow ! What is that ? Jean II */
-       skb->sk = NULL;
-       skb->destructor = NULL;
        kfree_skb(skb);
        sk->sk_ack_backlog--;
 
index 67c99d20857fab4e2e078e9c1d5d4a9ee6bbc952..7ba96618660e349de2253473d6701ab8bbad9613 100644 (file)
@@ -196,6 +196,7 @@ static int ircomm_lmp_data_request(struct ircomm_cb *self,
        /* Don't forget to refcount it - see ircomm_tty_do_softint() */
        skb_get(skb);
 
+       skb_orphan(skb);
        skb->destructor = ircomm_lmp_flow_control;
 
        if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) {
index 5f72b94b4918745f3491b48ba06cbc1714890280..7508f11c5b3952a05ed9b41682f50d7827b3c4e6 100644 (file)
@@ -335,7 +335,8 @@ begin:
        h = __nf_conntrack_find(net, tuple);
        if (h) {
                ct = nf_ct_tuplehash_to_ctrack(h);
-               if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+               if (unlikely(nf_ct_is_dying(ct) ||
+                            !atomic_inc_not_zero(&ct->ct_general.use)))
                        h = NULL;
                else {
                        if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
@@ -425,7 +426,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        /* Remove from unconfirmed list */
        hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
 
-       __nf_conntrack_hash_insert(ct, hash, repl_hash);
        /* Timer relative to confirmation time, not original
           setting time, otherwise we'd get timer wrap in
           weird delay cases. */
@@ -433,8 +433,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        add_timer(&ct->timeout);
        atomic_inc(&ct->ct_general.use);
        set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
+       /* Since the lookup is lockless, hash insertion must be done after
+        * starting the timer and setting the CONFIRMED bit. The RCU barriers
+        * guarantee that no other CPU can find the conntrack before the above
+        * stores are visible.
+        */
+       __nf_conntrack_hash_insert(ct, hash, repl_hash);
        NF_CT_STAT_INC(net, insert);
        spin_unlock_bh(&nf_conntrack_lock);
+
        help = nfct_help(ct);
        if (help && help->helper)
                nf_conntrack_event_cache(IPCT_HELPER, ct);
@@ -503,7 +511,8 @@ static noinline int early_drop(struct net *net, unsigned int hash)
                        cnt++;
                }
 
-               if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+               if (ct && unlikely(nf_ct_is_dying(ct) ||
+                                  !atomic_inc_not_zero(&ct->ct_general.use)))
                        ct = NULL;
                if (ct || cnt >= NF_CT_EVICTION_RANGE)
                        break;
@@ -1267,13 +1276,19 @@ err_cache:
        return ret;
 }
 
+/*
+ * We need to use special "null" values, not used in hash table
+ */
+#define UNCONFIRMED_NULLS_VAL  ((1<<30)+0)
+#define DYING_NULLS_VAL                ((1<<30)+1)
+
 static int nf_conntrack_init_net(struct net *net)
 {
        int ret;
 
        atomic_set(&net->ct.count, 0);
-       INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
-       INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
+       INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
+       INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
        net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
        if (!net->ct.stat) {
                ret = -ENOMEM;
index 2fefe147750a09b2f8d94f0e78f3b5f35e5b08b0..4e620305f28c765548c5c5b35b1a5b833034bf30 100644 (file)
@@ -47,7 +47,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
        mutex_lock(&nf_log_mutex);
 
        if (pf == NFPROTO_UNSPEC) {
-               int i;
                for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
                        list_add_tail(&(logger->list[i]), &(nf_loggers_l[i]));
        } else {
@@ -216,7 +215,7 @@ static const struct file_operations nflog_file_ops = {
 #endif /* PROC_FS */
 
 #ifdef CONFIG_SYSCTL
-struct ctl_path nf_log_sysctl_path[] = {
+static struct ctl_path nf_log_sysctl_path[] = {
        { .procname = "net", .ctl_name = CTL_NET, },
        { .procname = "netfilter", .ctl_name = NET_NETFILTER, },
        { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, },
@@ -228,19 +227,26 @@ static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
 static struct ctl_table_header *nf_log_dir_header;
 
 static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
-                        void *buffer, size_t *lenp, loff_t *ppos)
+                        void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        const struct nf_logger *logger;
+       char buf[NFLOGGER_NAME_LEN];
+       size_t size = *lenp;
        int r = 0;
        int tindex = (unsigned long)table->extra1;
 
        if (write) {
-               if (!strcmp(buffer, "NONE")) {
+               if (size > sizeof(buf))
+                       size = sizeof(buf);
+               if (copy_from_user(buf, buffer, size))
+                       return -EFAULT;
+
+               if (!strcmp(buf, "NONE")) {
                        nf_log_unbind_pf(tindex);
                        return 0;
                }
                mutex_lock(&nf_log_mutex);
-               logger = __find_logger(tindex, buffer);
+               logger = __find_logger(tindex, buf);
                if (logger == NULL) {
                        mutex_unlock(&nf_log_mutex);
                        return -ENOENT;
index 498b45101df7f820e8bd97d6612406811d59afaf..f28f6a5fc02d96aa79acce6e57a8968026250387 100644 (file)
@@ -40,12 +40,12 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
 static u32 hash_v4(const struct sk_buff *skb)
 {
        const struct iphdr *iph = ip_hdr(skb);
-       u32 ipaddr;
+       __be32 ipaddr;
 
        /* packets in either direction go into same queue */
        ipaddr = iph->saddr ^ iph->daddr;
 
-       return jhash_2words(ipaddr, iph->protocol, jhash_initval);
+       return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);
 }
 
 static unsigned int
@@ -63,14 +63,14 @@ nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
 static u32 hash_v6(const struct sk_buff *skb)
 {
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       u32 addr[4];
+       __be32 addr[4];
 
        addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
        addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
        addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
        addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
 
-       return jhash2(addr, ARRAY_SIZE(addr), jhash_initval);
+       return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval);
 }
 
 static unsigned int
index 69a639f354031ffa3288e1e8c6a7929fce784762..225ee3ecd69d63152489a0f47ae76d5cbacb7b8f 100644 (file)
 #include <net/netfilter/nf_conntrack.h>
 #include <linux/netfilter/xt_cluster.h>
 
-static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct)
+static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct)
 {
-       return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+       return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
 }
 
-static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
+static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
 {
-       return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
+       return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
 }
 
 static inline u_int32_t
index 01dd07b764ec46de6d82baec2625500bee2c714d..98fc190e8f0eed8683e9836b6be1567ecea74382 100644 (file)
@@ -54,6 +54,7 @@ static bool quota_mt_check(const struct xt_mtchk_param *par)
        if (q->master == NULL)
                return -ENOMEM;
 
+       q->master->quota = q->quota;
        return true;
 }
 
index 220a1d588ee0e89813127dc87381accb21b36f88..4fc6a917f6de5b30d55de8dba82531be03e67826 100644 (file)
@@ -66,7 +66,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
                if (info->flags & XT_RATEEST_MATCH_BPS)
                        ret &= bps1 == bps2;
                if (info->flags & XT_RATEEST_MATCH_PPS)
-                       ret &= pps2 == pps2;
+                       ret &= pps1 == pps2;
                break;
        }
 
index a63de3f7f1859ccf839893f69b4b7db4ee84b431..6a4b19094143653ab79a43de52d916fd8ed2a0f0 100644 (file)
@@ -133,7 +133,7 @@ static struct notifier_block sctp_inet6addr_notifier = {
 
 /* ICMP error handler. */
 SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                            int type, int code, int offset, __be32 info)
+                            u8 type, u8 code, int offset, __be32 info)
 {
        struct inet6_dev *idev;
        struct sock *sk;
index 29272f2e95a07945cf805e6c75867f0f1e2cb762..08fe6592ad44cfa138d59ba9fb21c7123d86074f 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/workqueue.h>
 #include <sound/core.h>
 #include "hda_beep.h"
+#include "hda_local.h"
 
 enum {
        DIGBEEP_HZ_STEP = 46875,        /* 46.875 Hz */
@@ -115,6 +116,9 @@ int snd_hda_attach_beep_device(struct hda_codec *codec, int nid)
        struct hda_beep *beep;
        int err;
 
+       if (!snd_hda_get_bool_hint(codec, "beep"))
+               return 0; /* disabled explicitly */
+
        beep = kzalloc(sizeof(*beep), GFP_KERNEL);
        if (beep == NULL)
                return -ENOMEM;